def process_simple_tag(self, simple_tag_elem, tags_dict): """ Returns a dict representing the Tag element. """ name = lang = value = children = None binary = False for elem in self.process_one_level(simple_tag_elem): elem_id = elem.get_id() if elem_id == MATROSKA_TAG_NAME_ID: name = elem.get_utf8().lower() elif elem_id == MATROSKA_TAG_STRING_ID: value = elem.get_utf8() elif elem_id == MATROSKA_TAG_BINARY_ID: value = elem.get_data() binary = True elif elem_id == MATROSKA_TAG_LANGUAGE_ID: lang = elem.get_utf8() elif elem_id == MATROSKA_SIMPLE_TAG_ID: if children is None: children = core.Tags() self.process_simple_tag(elem, children) if children: # Convert ourselves to a Tags object. children.value = value children.langcode = lang value = children else: # XXX: Python datetime objects have no way to express partial dates # (e.g. only year), which the Matroska spec allows. Therefore datetime # is not suitable for this. Until we figure out a proper way to express # dates, just pass the tag value directly. #if name.startswith('date_'): # # Try to convert date to a datetime object. # value = matroska_date_to_datetime(value) value = core.Tag(value, lang, binary) if name in tags_dict: # Multiple items of this tag name. if not isinstance(tags_dict[name], list): # Convert to a list tags_dict[name] = [tags_dict[name]] # Append to list tags_dict[name].append(value) else: tags_dict[name] = value
def process_tags(self, tags): # Tags spec: http://www.matroska.org/technical/specs/tagging/index.html # Iterate over Tags children. Tags element children is a # Tag element (whose children are SimpleTags) and a Targets element # whose children specific what objects the tags apply to. for tag_elem in self.process_one_level(tags): # Start a new dict to hold all SimpleTag elements. tags_dict = core.Tags() # A list of target uids this tags dict applies too. If empty, # tags are global. targets = [] for sub_elem in self.process_one_level(tag_elem): if sub_elem.get_id() == MATROSKA_SIMPLE_TAG_ID: self.process_simple_tag(sub_elem, tags_dict) elif sub_elem.get_id() == MATROSKA_TARGETS_ID: # Targets element: if there is no uid child (track uid, # chapter uid, etc.) then the tags dict applies to the # whole file (top-level Media object). for target_elem in self.process_one_level(sub_elem): target_elem_id = target_elem.get_id() if target_elem_id in (MATRSOKA_TAGS_TRACK_UID_ID, MATRSOKA_TAGS_EDITION_UID_ID, MATRSOKA_TAGS_CHAPTER_UID_ID, MATRSOKA_TAGS_ATTACHMENT_UID_ID): targets.append(target_elem.get_value()) elif target_elem_id == MATROSKA_TARGET_TYPE_VALUE_ID: # Target types not supported for now. (Unclear how this # would fit with kaa.metadata.) pass if targets: # Assign tags to all listed uids for target in targets: try: self.objects_by_uid[target].tags.update(tags_dict) self.tags_to_attributes(self.objects_by_uid[target], tags_dict) except KeyError: log.warning( u'Tags assigned to unknown/unsupported target uid %d', target) else: self.tags.update(tags_dict) self.tags_to_attributes(self, tags_dict)
def process_simple_tag(self, simple_tag_elem, tags_dict): """ Returns a dict representing the Tag element. """ name = lang = value = children = None binary = False for elem in self.process_one_level(simple_tag_elem): elem_id = elem.get_id() if elem_id == MATROSKA_TAG_NAME_ID: name = elem.get_utf8().lower() elif elem_id == MATROSKA_TAG_STRING_ID: value = elem.get_utf8() elif elem_id == MATROSKA_TAG_BINARY_ID: value = elem.get_data() binary = True elif elem_id == MATROSKA_TAG_LANGUAGE_ID: lang = elem.get_utf8() elif elem_id == MATROSKA_SIMPLE_TAG_ID: if children is None: children = core.Tags() self.process_simple_tag(elem, children) if children: # Convert ourselves to a Tags object. children.value = value children.langcode = lang value = children else: if name.startswith('date_'): # Try to convert date to a datetime object. value = matroska_date_to_datetime(value) value = core.Tag(value, lang, binary) if name in tags_dict: # Multiple items of this tag name. if not isinstance(tags_dict[name], list): # Convert to a list tags_dict[name] = [tags_dict[name]] # Append to list tags_dict[name].append(value) else: tags_dict[name] = value
def __init__(self, file, tagVersion=eyeD3_tag.ID3_ANY_VERSION): core.Music.__init__(self) self.fileName = file.name self.codec = 0x0055 # fourcc code of mp3 self.mime = 'audio/mpeg' #if not eyeD3_tag.isMp3File(file.name): # raise core.ParseError() id3 = None try: id3 = eyeD3_tag.Mp3AudioFile(file.name) except eyeD3_tag.InvalidAudioFormatException: # File is not an MP3 raise core.ParseError() except eyeD3_tag.TagException: # The MP3 tag decoder crashed, assume the file is still # MP3 and try to play it anyway if log.level < 30: log.exception('mp3 tag parsing %s failed!' % file.name) except Exception: # The MP3 tag decoder crashed, assume the file is still # MP3 and try to play it anyway if log.level < 30: log.exception('mp3 tag parsing %s failed!' % file.name) if not id3: # let's take a look at the header s = file.read(4096) if not s[:3] == 'ID3': # no id3 tag header, not good if not re.compile(r'0*\xFF\xFB\xB0\x04$').search(s): # again, not good if not re.compile(r'0*\xFF\xFA\xB0\x04$').search(s): # that's it, it is no mp3 at all raise core.ParseError() try: if id3 and id3.tag: self.tags = core.Tags() log.debug(id3.tag.frames) # Grip unicode bug workaround: Grip stores text data as UTF-8 # and flags it as latin-1. This workaround tries to decode # these strings as utf-8 instead. # http://sourceforge.net/tracker/index.php?func=detail&aid=1196919&group_id=3714&atid=103714 for frame in id3.tag.frames['COMM']: if "created by grip" not in frame.comment.lower(): continue for frame in id3.tag.frames: if hasattr(frame, "text") and isinstance( frame.text, unicode): try: frame.text = frame.text.encode( 'latin-1').decode('utf-8') except UnicodeError: pass for k, var in MP3_INFO_TABLE.items(): if id3.tag.frames[k]: self._set(var, id3.tag.frames[k][0].text) if id3.tag.frames['APIC']: pic = id3.tag.frames['APIC'][0] if pic.imageData: self.thumbnail = pic.imageData if id3.tag.getYear(): self.userdate = id3.tag.getYear() tab = {} for f in id3.tag.frames: tag = core.Tag() if f.__class__ is eyeD3_frames.TextFrame: tab[f.header.id] = f.text tag.value = f.text elif f.__class__ is eyeD3_frames.UserTextFrame: #userTextFrames : debug: id starts with _ self._set('_' + f.description, f.text) tab['_' + f.description] = f.text tag.value = f.text elif f.__class__ is eyeD3_frames.DateFrame: tab[f.header.id] = f.date_str tag.value = f.date_str elif f.__class__ is eyeD3_frames.CommentFrame: tab[f.header.id] = f.comment self.comment = py3_str(f.comment) tag.value = f.comment elif f.__class__ is eyeD3_frames.URLFrame: tab[f.header.id] = f.url tag.value = f.url elif f.__class__ is eyeD3_frames.UserURLFrame: tab[f.header.id] = f.url tag.value = f.url elif f.__class__ is eyeD3_frames.ImageFrame: tab[f.header.id] = f if f.imageData: tag.binary = True tag.value = f.imageData else: log.debug(f.__class__) if f.header.id in ID3_TAGS_MAP and tag.value: tagname, filter = ID3_TAGS_MAP[f.header.id] try: if filter: tag.value = filter(tag.value) except Exception, e: log.warning('skipping tag %s: %s', tagname, e) else: self.tags[tagname] = tag self._appendtable('id3v2', tab) if id3.tag.frames['TCON']: genre = None tcon = id3.tag.frames['TCON'][0].text # TODO: could handle id3v2 genre refinements. try: # Assume integer. genre = int(tcon) except ValueError: # Nope, maybe it's in '(N)' format. try: genre = int(tcon[1:tcon.find(')')]) except ValueError: # Nope. Treat as a string. self.genre = py3_str(tcon) if genre is not None: try: self.genre = ID3.GENRE_LIST[genre] except KeyError: # Numeric genre specified but not one of the known genres, # use 'Unknown' as per ID3v1. self.genre = u'Unknown' self.tags[u'genre'] = core.Tag(self.genre) # and some tools store it as trackno/trackof in TRCK if not self.trackof and self.trackno and \ self.trackno.find('/') > 0: self.trackof = self.trackno[self.trackno.find('/') + 1:] self.trackno = self.trackno[:self.trackno.find('/')] if id3: self.length = id3.getPlayTime()