Normalize artists names with Musicbrainz
Nothing is worse than a MP3 library with different artist spellings : a same artist will be present two or three times in the list. Thanks to Musicbrainz it is possible to normalize the spelling. This script search on musicbrainz all your files and tries to get the correct spelling of the artist. It outputs a batch file that you can check before running.
Note this is mainly for singles. For albums, consider using the excellent software Picard.
Source Code
mp3-checkartist.py
#! /usr/bin/python
# (c) 2008 - Remi Peyronnet <[email protected]>
# coding = latin-1
SOURCE_PATH = "."
DEST_PATH = "_renamed"
EXT_FILTER = ['.mp3','.wma','.ogg','.mpeg','.avi','.mpg']
CACHE_PATH = "/space/Musique/.mbcache"
IGNORE_PATH = CACHE_PATH + "/ignore.txt"
import os
import os.path
import shutil
import hashlib
from ID3 import *
import string
import time
import re
import sys
import pickle
from musicbrainz2.webservice import Query, ArtistFilter, WebServiceError
regex = re.compile('(?P<date>....-..-..) - (?P<type>Clip|CLIP|Live) - (?P<genre>Goldorama|Clubbing)?( - )?(?P<artist>[^-]*)-(?P<title>.*).mp3');
fignore = open(IGNORE_PATH,'r')
ignorefile = fignore.read()
ignore = ignorefile.split("n")
fignore.close()
import sys
import codecs
sys.stdout=codecs.getwriter('utf-8')(sys.stdout)
def processFile(pathfile, path, file):
global regex
global ignore
global CACHE_PATH
id3info = None
try:
id3info = ID3(pathfile)
except:
#print "Skip " + pathfile
pass
if id3info:
if id3info.artist:
try:
artist_id3 = id3info.artist.decode('utf-8')
except:
artist_id3 = id3info.artist.decode('latin-1')
if artist_id3 in ignore:
print "# Ignoring " + artist_id3
else:
artistResults = None
#print "Checking " + id3info.artist + " (" + file + ")"
artistmd5 = hashlib.md5(id3info.artist).hexdigest()
cachefile = CACHE_PATH + "/artist" + artistmd5
if os.path.exists(cachefile):
cachefd = open(cachefile,'rb')
artistResults = pickle.load(cachefd)
cachefd.close()
else:
# Be sure not to ask too frequently
time.sleep(1)
try:
artistResults = Query().getArtists(ArtistFilter(name=artist_id3, limit=5))
except WebServiceError, e:
print 'WS Error', e
if artistResults:
file = open(cachefile,'wb')
pickle.dump(artistResults,file)
file.close()
if artistResults:
found=0
for result in artistResults:
if result.artist.name == artist_id3:
found=1
artist_mb = artistResults[0].artist.name
artist_mbscore = artistResults[0].score
if found==0:
print "# Musicbrainz knows " + artist_mb + " instead of " + artist_id3 + " (", artist_mbscore,"%)"
print "id3v2 -a "" + artist_mb + "" "" + pathfile.decode('utf-8') + """
for result in artistResults:
artist = result.artist
#print " " , result.score , "% : " + artist.name
#print "Id :", artist.id
#print "Name :", artist.name
#print "Sort Name :", artist.sortName
# print
def processPath(path):
str = ""
files = os.listdir(path)
files.sort()
for file in files:
(base, ext) = os.path.splitext(file)
pathfile = os.path.join(path,file);
if (os.path.isdir(pathfile)):
str += processPath(pathfile)
if (os.path.isfile(pathfile)):
processFile(pathfile, path, file)
return ""
# Auto Launch
if __name__ == "__main__":
processPath(SOURCE_PATH);
Comments are powered by Github. You must authenticate to GitHub before commenting. Create an account if you haven't one, it only takes 1 minute!
Les commentaires sont hébergés sur GitHub. Il vous faudra vous authentifier sur GitHub pour pouvoir commenter. Si vous n'avez pas de compte, une minute suffit à en créer un !