def validate_release_title(self) -> Optional[str]: release_titles = unique([track.release_title for track in self.tracks.values()]) if len(release_titles) == 1 and release_titles[0] != "": return release_titles[0] # if the release title couldn't be validated, try normalizing it release_titles = unique([normalize_release_title(track.release_title) for track in self.tracks.values()]) if len(release_titles) == 1 and release_titles[0] != "": return release_titles[0]
def get_vbr_bitrate(self) -> Optional[float]: codecs = self.get_codecs() if not len(unique(codecs)) == 1 or codecs[0] != "VBR": return None return sum([track.stream_info.bitrate * track.stream_info.length for track in self.tracks.values()]) / \ sum([track.stream_info.length for track in self.tracks.values()])
def extract_release_artist(self) -> List[str]: release_artists = self.validate_release_artists() if release_artists: return release_artists artists = unique([x.artists for x in self.tracks.values()]) return artists[0] if len(artists) == 1 else []
def get_total_discs(self) -> Optional[int]: disc_numbers = sorted(unique([track.disc_number for track in self.tracks.values() if track.disc_number is not None])) if not len(disc_numbers) or disc_numbers[0] != 1: return None for x in range(len(disc_numbers) - 1): if disc_numbers[x] != disc_numbers[x+1] - 1: return None return disc_numbers[-1]
def get_release_codec_setting(self, short=True) -> str: # check for mismatched tag types tag_types = unique([track.stream_info.tag_type for track in self.tracks.values()]) if len(tag_types) != 1: return "" # check for mismatched codec settings codec_settings = unique([track.get_codec_setting_str(short=True) for track in self.tracks.values()]) if len(codec_settings) != 1: return "" prefix_str = "" if not short: if tag_types[0] == TagType.ID3: prefix_str = "MP3 " elif tag_types[0] == TagType.MP4: prefix_str = "MP4 " # check for mismatched CBR bitrates if codec_settings[0] == "CBR": cbr_bitrates = unique([round(track.stream_info.bitrate, -3) for track in self.tracks.values()]) if len(cbr_bitrates) != 1: return "" cbr_bitrate = int(round(cbr_bitrates[0] / 1000)) cbr_bitrate += cbr_bitrate % 2 return "{0}CBR{1}".format(prefix_str, cbr_bitrate) elif codec_settings[0] == "VBR": average_bitrate = round( sum([track.stream_info.bitrate * track.stream_info.length for track in self.tracks.values()]) / sum([track.stream_info.length for track in self.tracks.values()]) / 1000) return "{0}VBR{1}".format(prefix_str, average_bitrate) else: return "{0}{1}".format(prefix_str, codec_settings[0])
def guess_category(self) -> None: if self.category: return # clean release name, and category _, self.category = get_category_fix_name(self) # extract additional artists additional_artists = [] for track in self.tracks.values(): for artist in track.artists: if artist not in self.tracks[next(iter(self.tracks))].release_artists: additional_artists.append(artist) if len(self.tracks) < 4 and len(unique(additional_artists)) == 1: self.category = ReleaseCategory.SINGLE elif len(self.tracks) < 6 and len(unique(additional_artists)) == 1: self.category = ReleaseCategory.EP elif len(unique(additional_artists)) > len(self.tracks) / 2: self.category = ReleaseCategory.COMPILATION else: self.category = ReleaseCategory.ALBUM
def validate_total_tracks(self) -> List[int]: violating_discs = [] total_tracks = OrderedDict() for track in self.tracks.values(): if track.disc_number not in total_tracks: total_tracks[track.disc_number] = [] total_tracks[track.disc_number].append(track.total_tracks) for disc in total_tracks: curr_disc = unique(total_tracks[disc]) if len(curr_disc) != 1 or len(total_tracks[disc]) != curr_disc[0]: violating_discs.append(disc) return violating_discs
def strip_whitespace_genres(self) -> List[str]: return unique([x.strip() for x in self.genres])
def get_cbr_bitrates(self) -> List[int]: if (self.get_codecs()[0] if len(self.get_codecs()) == 1 else "") == "CBR": return unique([track.stream_info.bitrate for track in self.tracks.values()]) else: return []
def get_codecs(self) -> List[str]: return unique([track.get_codec_setting_str(short=True) for track in self.tracks.values()])
def validate_release_date(self) -> str: dates = unique([track.date for track in self.tracks.values()]) if len(dates) == 1 and dates[0]: return dates[0]
def validate_genres(self): genres = unique([track.genres for track in self.tracks.values()]) return genres[0] if len(genres) == 1 else []
def validate_total_discs(self) -> bool: disc_numbers = sorted(list(self.__get_disc_numbers_by_track())) total_discs = unique([track.total_discs for track in self.tracks.values()]) return len(total_discs) == 1 and len(disc_numbers) and total_discs[0] == disc_numbers[-1]
def validate_release_artists(self) -> List[str]: release_artists = unique([track.release_artists for track in self.tracks.values()]) if len(release_artists) == 1: return unique(release_artists[0]) return []
def strip_whitespace_artists(self) -> List[str]: return unique([x.strip() for x in self.artists])
def get_tag_types(self) -> List[str]: return unique([track.stream_info.tag_type for track in self.tracks.values()])
def validate(self, release: Release) -> List[Violation]: violations = OrderedSet() # leading/trailing whitespace for filename, track in release.tracks.items(): if track.artists != track.strip_whitespace_artists(): violations.add( Violation( ViolationType.ARTIST_WHITESPACE, "File '{0}' has leading/trailing whitespace in its Artist(s)" .format(filename))) for filename, track in release.tracks.items(): if track.release_artists != track.strip_whitespace_release_artists( ): violations.add( Violation( ViolationType.RELEASE_ARTIST_WHITESPACE, "File '{0}' has leading/trailing whitespace in its Album/Release Artist(s)" .format(filename))) for filename, track in release.tracks.items(): if track.date != track.strip_whitespace_date(): violations.add( Violation( ViolationType.DATE_WHITESPACE, "File '{0}' has leading/trailing whitespace in its Year/Date" .format(filename))) for filename, track in release.tracks.items(): if track.release_title != track.strip_whitespace_release_title(): violations.add( Violation( ViolationType.RELEASE_TITLE_WHITESPACE, "File '{0}' has leading/trailing whitespace in its Album/Release Title" .format(filename))) for filename, track in release.tracks.items(): if track.track_title != track.strip_whitespace_track_title(): violations.add( Violation( ViolationType.TRACK_TITLE_WHITESPACE, "File '{0}' has leading/trailing whitespace in its Track Title" .format(filename))) for filename, track in release.tracks.items(): if track.genres != track.strip_whitespace_genres(): violations.add( Violation( ViolationType.GENRE_WHITESPACE, "File '{0}' has leading/trailing whitespace in its Genre(s)" .format(filename))) # release date if not release.validate_release_date(): violations.add( Violation( ViolationType.DATE_INCONSISTENT, "Release contains blank or inconsistent 'Date' tags")) # artists if release.blank_artists(): violations.add( Violation( ViolationType.ARTIST_BLANK, "Release contains {0} tracks with missing 'Artist' tags". format(release.blank_artists()))) # track titles if release.blank_track_titles(): violations.add( Violation( ViolationType.TRACK_TITLE_BLANK, "Release contains {0} tracks with missing 'Track Title' tags" .format(release.blank_track_titles()))) # release artist release_artists = release.validate_release_artists() if not release_artists: violations.add( Violation( ViolationType.RELEASE_ARTIST_INCONSISTENT, "Release contains blank or inconsistent 'Album/Release Artist' tags" )) # if the lastfmcache is present, validate the release artist validated_release_artists = release_artists if self.lastfm and len(release_artists) == 1: validated_release_artists = [] for artist in release_artists: try: validated_release_artist = self.lastfm.get_artist( artist.strip()).artist_name if validated_release_artist != artist: violations.add( Violation( ViolationType.RELEASE_ARTIST_SPELLING, "Incorrectly spelled Album/Release Artist '{0}' (should be '{1}')" .format(artist, validated_release_artist))) validated_release_artists.append(validated_release_artist) except LastfmCache.ArtistNotFoundError: violations.add( Violation( ViolationType.ARTIST_LOOKUP, "Lookup failed of release artist '{release_artist}'" .format(release_artist=artist.strip()))) # release title release_title = release.validate_release_title() if not release_title: violations.add( Violation( ViolationType.RELEASE_TITLE_INCONSISTENT, "Release contains blank or inconsistent 'Album/Release Title' tags" )) bracket_pairs = [["[", "]"], ["(", ")"], ["{", "}"]] if release_title: # check if "[Source]" is contained in the release title for source in ReleaseSource: for brackets in bracket_pairs: curr_source = "{0}{1}{2}".format(brackets[0], source.value, brackets[1]) if curr_source.lower() in release_title.lower(): violations.add( Violation( ViolationType.RELEASE_TITLE_SOURCE, "Release title contains source {0}".format( curr_source))) # check if the release title ends with a space and a source name, without brackets for source in [x for x in ReleaseSource]: if release_title.lower().endswith(" {0}".format( source.value.lower())): violations.add( Violation( ViolationType.RELEASE_TITLE_SOURCE, "Release title ends with source {0}".format( source.value))) # check if "[Category]" is contained in the release title for category in ReleaseCategory: for brackets in bracket_pairs: curr_category = "{0}{1}{2}".format(brackets[0], category.value, brackets[1]) if curr_category.lower() in release_title.lower(): violations.add( Violation( ViolationType.RELEASE_TITLE_CATEGORY, "Release title contains category {0}".format( curr_category))) # check if the release title ends with a space and a category name, without brackets (except Album) for category in [ x for x in ReleaseCategory if x is not ReleaseCategory.ALBUM ]: if release_title.lower().endswith(" {0}".format( category.value.lower())): violations.add( Violation( ViolationType.RELEASE_TITLE_CATEGORY, "Release title ends with category {0}".format( category.value))) # lastfm artist validations if self.lastfm and release_title and len(validated_release_artists): # extract (edition info) from release titles release_title, _ = split_release_title( normalize_release_title(release_title)) flattened_artist = flatten_artists(validated_release_artists) lastfm_release = None try: lastfm_release = self.lastfm.get_release( flattened_artist, release_title) except LastfmCache.ReleaseNotFoundError as e: logging.getLogger(__name__).error(e) if lastfm_release: # release title if lastfm_release.release_name != release_title and \ ReleaseValidator.__lastfm_can_fix_release_title(release_title, lastfm_release.release_name): violations.add( Violation( ViolationType.RELEASE_TITLE_SPELLING, "Incorrectly spelled Album/Release name '{0}' (should be '{1}')" .format(release_title, lastfm_release.release_name))) # dates if lastfm_release.release_date: date = next(iter(release.tracks.values())).date if lastfm_release.release_date != date and \ (not date or len(lastfm_release.release_date) >= len(date)): violations.add( Violation( ViolationType.DATE_INCORRECT, "Incorrect Release Date '{0}' (should be '{1}')" .format(date, lastfm_release.release_date))) # tags/genres (only fail if 0-1 genres - i.e. lastfm tags have never been applied) release_genres = release.validate_genres() lastfm_tags = self.__get_lastfm_tags( release_title, validated_release_artists) if len(release_genres) < 2 <= len(lastfm_tags): violations.add( Violation( ViolationType.BAD_GENRES, "Bad release genres: [{0}] (should be [{1}])". format(", ".join(release_genres), ", ".join(lastfm_tags)))) # match and validate track titles (intersection only) if self.lastfm_track_title_validation: for track in release.tracks.values(): if track.track_number in lastfm_release.tracks: lastfm_title = normalize_track_title( lastfm_release.tracks[ track.track_number].track_name) if not track.track_title or track.track_title.lower( ) != lastfm_title.lower(): violations.add( Violation( ViolationType.INCORRECT_TRACK_TITLE, "Incorrect track title '{0}' should be: '{1}'" .format(track.track_title, lastfm_title))) # track artists for track in release.tracks.values(): for artist in track.artists: while True: try: validated_artist = self.lastfm.get_artist( normalize_artist_name(artist)).artist_name if validated_artist != artist: violations.add( Violation( ViolationType.TRACK_ARTIST_SPELLING, "Incorrectly spelled Track Artist '{0}' (should be '{1}')" .format(artist, validated_artist))) break except LastfmCache.ArtistNotFoundError: # as e: # violations.add(str(e)) break except LastfmCache.LastfmCacheError: time.sleep(1) # release artists for track in release.tracks.values(): for artist in track.release_artists: while True: try: validated_artist = self.lastfm.get_artist( normalize_artist_name(artist)).artist_name if validated_artist != artist: violations.add( Violation( ViolationType.RELEASE_ARTIST_SPELLING, "Incorrectly spelled Release Artist '{0}' (should be '{1}')" .format(artist, validated_artist))) break except LastfmCache.ArtistNotFoundError: # as e: # violations.add(str(e)) break except LastfmCache.LastfmCacheError: time.sleep(1) validated_track_numbers = release.validate_track_numbers() if validated_track_numbers: flattened_track_nums = [] for disc in validated_track_numbers: flattened_track_nums.append( "\nDisc " + str(disc) + ": " + ",".join(str(i) for i in validated_track_numbers[disc])) violations.add( Violation( ViolationType.MISSING_TRACKS, "Release does not have a full set of tracks:{0}".format( "".join(flattened_track_nums)))) validated_total_tracks = release.validate_total_tracks() for disc in validated_total_tracks: violations.add( Violation( ViolationType.TOTAL_TRACKS_INCONSISTENT, "Release disc {0} has blank, inconsistent or incorrect 'Total Tracks' tags" .format(disc))) # disc number validated_disc_numbers = release.validate_disc_numbers() if validated_disc_numbers: violations.add( Violation( ViolationType.MISSING_DISCS, "Release does not have a full set of discs: {0}".format( ", ".join(str(i) for i in validated_disc_numbers)))) # total discs if not release.validate_total_discs(): violations.add( Violation(ViolationType.TOTAL_DISCS_INCONSISTENT, "Release has incorrect 'Total Discs' tags")) # file type if len(release.get_tag_types()) != 1: violations.add( Violation( ViolationType.TAG_TYPES_INCONSISTENT, "Release has inconsistent tag types: {0}".format(", ".join( [str(x) for x in release.get_tag_types()])))) # bitrate - CBR/VBR/Vx/APS/APE if len(release.get_codecs()) != 1: violations.add( Violation( ViolationType.CODECS_INCONSISTENT, "Release has inconsistent codecs: [{0}]".format(", ".join( release.get_codecs())))) if len(unique([int(x / 1000) for x in release.get_cbr_bitrates()])) > 1: violations.add( Violation( ViolationType.CBR_INCONSISTENT, "Release has inconsistent CBR bitrates: {0}".format( ", ".join([str(x) for x in release.get_cbr_bitrates()])))) # track titles for filename in release.tracks: correct_filename = release.tracks[filename].get_filename( release.is_va()) if correct_filename and filename != correct_filename: violations.add( Violation( ViolationType.FILENAME, "Invalid filename: {0} - should be '{1}'".format( filename, correct_filename))) # forbidden comment substrings for track in release.tracks.values(): if not track.comment: continue for substr in self.forbidden_comment_substrings: if substr in track.comment.lower(): violations.add( Violation( ViolationType.COMMENT_SUBSTRING, "Invalid comment: contains forbidden substring '{0}'" .format(substr))) release.num_violations = len(violations) return list(violations)
def validate_codec(self) -> Optional[str]: codec_settings = unique([track.get_codec() for track in self.tracks.values()]) if len(codec_settings) != 1: return None return codec_settings[0]