def clean(self):
        """returns a new MetaData object that's been cleaned of problems"""
        from audiotools.text import (CLEAN_REMOVE_TRAILING_WHITESPACE,
                                     CLEAN_REMOVE_LEADING_WHITESPACE,
                                     CLEAN_REMOVE_EMPTY_TAG,
                                     CLEAN_REMOVE_LEADING_WHITESPACE_ZEROES,
                                     CLEAN_REMOVE_LEADING_ZEROES)

        fixes_performed = []
        reverse_attr_map = {}
        for (attr, key) in self.ATTRIBUTE_MAP.items():
            reverse_attr_map[key] = attr
            if key in self.ALIASES:
                for alias in self.ALIASES[key]:
                    reverse_attr_map[alias] = attr

        cleaned_fields = []

        for comment_string in self.comment_strings:
            if u"=" in comment_string:
                (key, value) = comment_string.split(u"=", 1)
                if key.upper() in reverse_attr_map:
                    attr = reverse_attr_map[key.upper()]
                    # handle all text fields by stripping whitespace
                    if len(value.strip()) == 0:
                        fixes_performed.append(
                            CLEAN_REMOVE_EMPTY_TAG.format(key))
                    else:
                        fix1 = value.rstrip()
                        if fix1 != value:
                            fixes_performed.append(
                                CLEAN_REMOVE_TRAILING_WHITESPACE.format(key))

                        fix2 = fix1.lstrip()
                        if fix2 != fix1:
                            fixes_performed.append(
                                CLEAN_REMOVE_LEADING_WHITESPACE.format(key))

                        # integer fields also strip leading zeroes
                        if (((attr == "track_number") or
                             (attr == "album_number"))):
                            match = re.match(r'(.*?)\s*/\s*(.*)', fix2)
                            if match is not None:
                                # fix whitespace/zeroes
                                # on either side of slash
                                fix3 = u"{}/{}".format(
                                    match.group(1).lstrip(u"0"),
                                    match.group(2).lstrip(u"0"))

                                if fix3 != fix2:
                                    fixes_performed.append(
                                        CLEAN_REMOVE_LEADING_WHITESPACE_ZEROES.format(key))
                            else:
                                # fix zeroes only
                                fix3 = fix2.lstrip(u"0")

                                if fix3 != fix2:
                                    fixes_performed.append(
                                        CLEAN_REMOVE_LEADING_ZEROES.format(key))
                        elif ((attr == "track_total") or
                              (attr == "album_total")):
                            fix3 = fix2.lstrip(u"0")
                            if fix3 != fix2:
                                fixes_performed.append(
                                    CLEAN_REMOVE_LEADING_ZEROES.format(key))
                        else:
                            fix3 = fix2

                        cleaned_fields.append(u"{}={}".format(key, fix3))
                else:
                    cleaned_fields.append(comment_string)
            else:
                cleaned_fields.append(comment_string)

        return (self.__class__(cleaned_fields, self.vendor_string),
                fixes_performed)
示例#2
0
    def clean(self):
        """returns a new MetaData object that's been cleaned of problems"""
        from audiotools.text import (CLEAN_REMOVE_TRAILING_WHITESPACE,
                                     CLEAN_REMOVE_LEADING_WHITESPACE,
                                     CLEAN_REMOVE_EMPTY_TAG,
                                     CLEAN_REMOVE_LEADING_WHITESPACE_ZEROES,
                                     CLEAN_REMOVE_LEADING_ZEROES)

        fixes_performed = []
        reverse_attr_map = {}
        for (attr, key) in self.ATTRIBUTE_MAP.items():
            reverse_attr_map[key] = attr
            if key in self.ALIASES:
                for alias in self.ALIASES[key]:
                    reverse_attr_map[alias] = attr

        cleaned_fields = []

        for comment_string in self.comment_strings:
            if u"=" in comment_string:
                (key, value) = comment_string.split(u"=", 1)
                if key.upper() in reverse_attr_map:
                    attr = reverse_attr_map[key.upper()]
                    # handle all text fields by stripping whitespace
                    if len(value.strip()) == 0:
                        fixes_performed.append(
                            CLEAN_REMOVE_EMPTY_TAG.format(key))
                    else:
                        fix1 = value.rstrip()
                        if fix1 != value:
                            fixes_performed.append(
                                CLEAN_REMOVE_TRAILING_WHITESPACE.format(key))

                        fix2 = fix1.lstrip()
                        if fix2 != fix1:
                            fixes_performed.append(
                                CLEAN_REMOVE_LEADING_WHITESPACE.format(key))

                        # integer fields also strip leading zeroes
                        if (((attr == "track_number")
                             or (attr == "album_number"))):
                            match = re.match(r'(.*?)\s*/\s*(.*)', fix2)
                            if match is not None:
                                # fix whitespace/zeroes
                                # on either side of slash
                                fix3 = u"{}/{}".format(
                                    match.group(1).lstrip(u"0"),
                                    match.group(2).lstrip(u"0"))

                                if fix3 != fix2:
                                    fixes_performed.append(
                                        CLEAN_REMOVE_LEADING_WHITESPACE_ZEROES.
                                        format(key))
                            else:
                                # fix zeroes only
                                fix3 = fix2.lstrip(u"0")

                                if fix3 != fix2:
                                    fixes_performed.append(
                                        CLEAN_REMOVE_LEADING_ZEROES.format(
                                            key))
                        elif ((attr == "track_total")
                              or (attr == "album_total")):
                            fix3 = fix2.lstrip(u"0")
                            if fix3 != fix2:
                                fixes_performed.append(
                                    CLEAN_REMOVE_LEADING_ZEROES.format(key))
                        else:
                            fix3 = fix2

                        cleaned_fields.append(u"{}={}".format(key, fix3))
                else:
                    cleaned_fields.append(comment_string)
            else:
                cleaned_fields.append(comment_string)

        return (self.__class__(cleaned_fields,
                               self.vendor_string), fixes_performed)
示例#3
0
    def clean(self):
        import re
        from audiotools.text import (CLEAN_REMOVE_DUPLICATE_TAG,
                                     CLEAN_REMOVE_TRAILING_WHITESPACE,
                                     CLEAN_REMOVE_LEADING_WHITESPACE,
                                     CLEAN_FIX_TAG_FORMATTING,
                                     CLEAN_REMOVE_EMPTY_TAG)

        fixes_performed = []
        used_tags = set()
        tag_items = []
        for tag in self.tags:
            if tag.key.upper() in used_tags:
                fixes_performed.append(
                    CLEAN_REMOVE_DUPLICATE_TAG.format(tag.key.decode('ascii')))
            elif tag.type == 0:
                used_tags.add(tag.key.upper())
                text = tag.__unicode__()

                # check trailing whitespace
                fix1 = text.rstrip()
                if fix1 != text:
                    fixes_performed.append(
                        CLEAN_REMOVE_TRAILING_WHITESPACE.format(
                            tag.key.decode('ascii')))

                # check leading whitespace
                fix2 = fix1.lstrip()
                if fix2 != fix1:
                    fixes_performed.append(
                        CLEAN_REMOVE_LEADING_WHITESPACE.format(
                            tag.key.decode('ascii')))

                if tag.key in self.INTEGER_ITEMS:
                    if u"/" in fix2:
                        # item is a slashed field of some sort
                        (current, total) = fix2.split(u"/", 1)
                        current_int = re.search(r'\d+', current)
                        total_int = re.search(r'\d+', total)
                        if (current_int is None) and (total_int is None):
                            # neither side contains an integer value
                            # so ignore it altogether
                            fix3 = fix2
                        elif ((current_int is not None) and
                              (total_int is None)):
                            fix3 = u"{:d}".format(int(current_int.group(0)))
                        elif ((current_int is None) and
                              (total_int is not None)):
                            fix3 = u"{:d}/{:d}".format(
                                0, int(total_int.group(0)))
                        else:
                            # both sides contain an int
                            fix3 = u"{:d}/{:d}".format(
                                int(current_int.group(0)),
                                int(total_int.group(0)))
                    else:
                        # item contains no slash
                        current_int = re.search(r'\d+', fix2)
                        if current_int is not None:
                            # item contains an integer
                            fix3 = u"{:d}".format(int(current_int.group(0)))
                        else:
                            # item contains no integer value so ignore it
                            # (although 'Track' should only contain
                            # integers, 'Media' may contain strings
                            # so it may be best to simply ignore that case)
                            fix3 = fix2

                    if fix3 != fix2:
                        fixes_performed.append(
                            CLEAN_FIX_TAG_FORMATTING.format(
                                tag.key.decode('ascii')))
                else:
                    fix3 = fix2

                if len(fix3) > 0:
                    tag_items.append(ApeTagItem.string(tag.key, fix3))
                else:
                    fixes_performed.append(
                        CLEAN_REMOVE_EMPTY_TAG.format(tag.key.decode('ascii')))
            else:
                used_tags.add(tag.key.upper())
                tag_items.append(tag)

        return (self.__class__(tag_items,
                               self.contains_header,
                               self.contains_footer),
                fixes_performed)