def _read_file(cwd, csv=None): '''Reads a csv file containing track metadata and annotations (v 2.0) # may use py comments in metadata .csv file. Must include a header: filename, title, artist, album, tracknum, compilation, [optional1], [optional2]... and then corresponding values per line (see example metadata.csv file in project root) @return: a 2D dict in the form dict[<file-name>][<tag>] or False if an error occurs @param cwd: complete csv file-path (if no <csv> sent) or path to directory to work in @param csv: csv file-name (in <cwd> dir). Defaults to None @todo: csv values (after header) may include "\embedded" to try getting it from the audio file. Currently only ID3 tags names understood by gordon.io.mp3_eyeD3.id3v2_getval_sub are usable in this manner. @todo: include other metadata formats (use tagpy?)''' # open csv file if csv is None: filename = cwd else: filename = os.path.join(cwd, csv) try: data = json.load(open(filename, 'r')) except ValueError: data = pickle.load(open(filename, 'r')) except: log.error(" Couldn't open '%s'", csv) raise tags = dict() for line_num, line in enumerate(data): # save title, artist, album, tracknum, compilation in tags[<file-name>] filepath = line.pop('filepath') # this deletes previous lines if filepath is repeated ... tags[filepath] = {'tracknum': 0, 'compilation': False} tags[filepath].update(line) for k, value in tags[filepath].iteritems(): if not (isinstance(value, str) or isinstance(value, unicode)): continue if os.path.isfile(value): if not is_binary(value): try: txt = open(value) tags[filepath][k] = unicode(txt.read()) txt.close() except: log.error('Error opening %s file %s at line %d', k, value, line_num) tags[filepath][k] = unicode(value) else: try: tags[filepath][k] = u'%s' % value except UnicodeDecodeError: tags[filepath][k] = value.decode('utf-8') return tags
def _store_annotations(audiofile, track, all_md=False): """Searches for metadata related to the audio-file (v 1.0; for now only ID3 in MP3): type <id3> annotation [tagname]; Searches for text-files with the same base-name whithin the folder (any [ext]ension): type <txt> annotation [ext]; Stores these annotation values in the track_annotation DB table @param audiofile: the file (should be previously verified as an actual audio file) @param track: previously stored track record in the database, represented by a gordon.db.model.Track class (SQL Alchemy) @param all_md: use True to extract all tags from the audio-file (defaults to False) returns number of annotations (0 to *) stored""" annots = 0 #chek if file is mp3. if so: if all_md: if id3.isValidMP3(audiofile): #extract all ID3 tags, store each tag value as an annotation type id3.[tagname] for tag in id3.getAllTags(audiofile, skipTrackFields=True): # this skips the 4 basic tags already in track track.annotations.append(Annotation(name=unicode(tag[0]), value=tag[1])) #todo: value=unicode(tag[1]) annots += 1 #future todo: apply tagpy or other method to extract more metadata formats if annots == 0: log.debug(' No ID3 metadata found.') # check text file annotations (pathandbase, ext) = os.path.splitext(audiofile) simfiles = list() if os.path.exists(pathandbase): simfiles.append(pathandbase) for s in glob(pathandbase+'.*'): simfiles.append(s) txt=None for simfile in simfiles: # for every file sharing base-name (any or no extension) try: if not is_binary(simfile): # if its a text file # if simfile == audiofile: continue # (we skip the original) #unnecesary; it is_binary # copy text (file content) to new track annotation (type txt.[ext]) txt=open(simfile) (xxx, ext) = os.path.splitext(simfile) track.annotations.append(Annotation(name=unicode(ext[1:]), value=unicode(txt.read()))) annots += 1 finally: if type(txt)==file: txt.close() commit() #saves all appended annotations in the track log.debug(' Stored %s annotations overall', annots) return annots
def _read_csv_tags(cwd, csv=None): '''Reads a csv file containing track metadata and annotations (v 2.0) # may use py comments in metadata .csv file. Must include a header: filename, title, artist, album, tracknum, compilation, [optional1], [optional2]... and then corresponding values per line (see example metadata.csv file in project root) @return: a 2D dict in the form dict[<file-name>][<tag>] or False if an error occurs @param cwd: complete csv file-path (if no <csv> sent) or path to directory to work in @param csv: csv file-name (in <cwd> dir). Defaults to None @todo: csv values (after header) may include "\embedded" to try getting it from the audio file. Currently only ID3 tags names understood by gordon.io.mp3_eyeD3.id3v2_getval_sub are usable in this manner. @todo: include other metadata formats (use tagpy?)''' # open csv file if csv is None: filename = cwd else: filename = os.path.join(cwd, csv) try: csvfile = reader(open(filename)) except IOError: log.error(" Couldn't open '%s'", csv) raise tags = dict() headers = False for line in csvfile: # each record (file rows) if len(line) < 6 : continue # skip bad lines (blank or too short) line[0] = line[0].strip() if not line[0] or line[0][0] == '#' : continue # skip if filepath empty or comment line # read and validate header if not headers: # first valid line is the header line=[l.strip() for l in line] if not line[:6]==['filepath','title','artist','album','tracknum','compilation']: log.error('CSV headers are incorrect at line %d.', csvfile.line_num) return False headers = [unicode(x) for x in line] continue # save title, artist, album, tracknum, compilation in tags[<file-name>] filepath=line[0] tags[filepath] = dict() # this deletes previous lines if filepath is repeated ... col = 1 # col 0 is 'filepath' so skip it while col < len(headers): if col >= len(line): break value = line[col].strip() if headers[col] == u'tracknum': # prepare for smallint in the DB try: tags[filepath][u'tracknum'] = int(value) except: tags[filepath][u'tracknum'] = 0 elif headers[col] == u'compilation': # prepare for bool in the DB if value.lower()=='true' or value=='1': value = True else: value = False tags[filepath][u'compilation'] = value elif os.path.isfile(value): if not is_binary(value): try: txt=open(value) tags[filepath][headers[col]] = unicode(txt.read()) txt.close() except: log.error('Error opening %s file %s at line %d', headers[col], value, csvfile.line_num) tags[filepath][headers[col]] = unicode(value) else: log.debug('%s file %s at line %d appears to be binary, ' 'not importing', headers[col], value, csvfile.line_num) tags[filepath][headers[col]] = unicode(value) else: try: tags[filepath][headers[col]] = u'%s' % value except UnicodeDecodeError: tags[filepath][headers[col]] = value.decode("utf-8") col+=1 return tags
def _read_csv_tags(cwd, csv=None): '''Reads a csv file containing track metadata and annotations (v 2.0) # may use py comments in metadata .csv file. Must include a header: filename, title, artist, album, tracknum, compilation, [optional1], [optional2]... and then corresponding values per line (see example metadata.csv file in project root) @return: a 2D dict in the form dict[<file-name>][<tag>] or False if an error occurs @param cwd: complete csv file-path (if no <csv> sent) or path to directory to work in @param csv: csv file-name (in <cwd> dir). Defaults to None @todo: csv values (after header) may include "\embedded" to try getting it from the audio file. Currently only ID3 tags names understood by gordon.io.mp3_eyeD3.id3v2_getval_sub are usable in this manner. @todo: include other metadata formats (use tagpy?)''' # open csv file if csv is None: filename = cwd else: filename = os.path.join(cwd, csv) try: csvfile = reader(open(filename)) except IOError: log.error(" Couldn't open '%s'", csv) raise tags = dict() headers = False for line in csvfile: # each record (file rows) if len(line) < 6: continue # skip bad lines (blank or too short) line[0] = line[0].strip() if not line[0] or line[0][0] == '#': continue # skip if filepath empty or comment line # read and validate header if not headers: # first valid line is the header line = [l.strip() for l in line] if not line[:6] == [ 'filepath', 'title', 'artist', 'album', 'tracknum', 'compilation' ]: log.error('CSV headers are incorrect at line %d.', csvfile.line_num) return False headers = [unicode(x) for x in line] continue # save title, artist, album, tracknum, compilation in tags[<file-name>] filepath = line[0] tags[filepath] = dict( ) # this deletes previous lines if filepath is repeated ... col = 1 # col 0 is 'filepath' so skip it while col < len(headers): if col >= len(line): break value = line[col].strip() if headers[col] == u'tracknum': # prepare for smallint in the DB try: tags[filepath][u'tracknum'] = int(value) except: tags[filepath][u'tracknum'] = 0 elif headers[col] == u'compilation': # prepare for bool in the DB if value.lower() == 'true' or value == '1': value = True else: value = False tags[filepath][u'compilation'] = value elif os.path.isfile(value): if not is_binary(value): try: txt = open(value) tags[filepath][headers[col]] = unicode(txt.read()) txt.close() except: log.error('Error opening %s file %s at line %d', headers[col], value, csvfile.line_num) tags[filepath][headers[col]] = unicode(value) else: log.debug( '%s file %s at line %d appears to be binary, ' 'not importing', headers[col], value, csvfile.line_num) tags[filepath][headers[col]] = unicode(value) else: try: tags[filepath][headers[col]] = u'%s' % value except UnicodeDecodeError: tags[filepath][headers[col]] = value.decode("utf-8") col += 1 return tags