def merge_hints(query_track, consensus_track, part, do_quick_match): # If we did a quick match, read tags, as it may have much better tags. track_title = query_track.name if do_quick_match: track_title = improve_from_tag(track_title, part, 'title') merged_track = Media.Track( index=int(query_track.index) if query_track.index is not None else -1, album=toBytes(consensus_track.album), artist=toBytes(consensus_track.artist), title=toBytes(track_title), disc=toBytes(consensus_track.disc), album_thumb_url=toBytes(consensus_track.album_thumb_url), artist_thumb_url=toBytes(consensus_track.artist_thumb_url), year=toBytes(consensus_track.year), album_guid=toBytes(consensus_track.album_guid), artist_guid=toBytes(consensus_track.artist_guid)) merged_track.parts.append(part) #if DEBUG: # merged_track.name = toBytes(merged_track.name + ' [MERGED GN MISS]') return merged_track
def merge_hints(query_track, consensus_track, part, do_quick_match): # If we did a quick match, read tags, as it may have much better tags. track_title = query_track.name if do_quick_match: track_title = improve_from_tag(track_title, part, 'title') # We don't want to use consensus disc numbers, since tags are more reliable. It's common for bonus discs, etc. to get "split". try: disc = improve_from_tag('1', part, 'discnumber').split('/')[0].split('of')[0].strip() except: disc = '1' merged_track = Media.Track( index=int(query_track.index) if (query_track.index is not None and str(query_track.index).isdigit()) else -1, album=toBytes(consensus_track.album), artist=toBytes(consensus_track.artist), title=toBytes(track_title), disc=disc, album_thumb_url=toBytes(consensus_track.album_thumb_url), artist_thumb_url=toBytes(consensus_track.artist_thumb_url), year=toBytes(consensus_track.year), album_guid=toBytes(consensus_track.album_guid), artist_guid=toBytes(consensus_track.artist_guid)) merged_track.parts.append(part) return merged_track
def merge_hints(query_track, consensus_track, part, do_quick_match): # If we did a quick match, read tags, as it may have much better tags. track_title = query_track.name if do_quick_match: track_title = improve_from_tag(track_title, part, 'title') # We don't want to use consensus disc numbers, since tags are more reliable. It's common for bonus discs, etc. to get "split". try: disc = improve_from_tag( '1', part, 'discnumber').split('/')[0].split('of')[0].strip() except: disc = '1' merged_track = Media.Track( index=int(query_track.index) if (query_track.index is not None and str(query_track.index).isdigit()) else -1, album=toBytes(consensus_track.album), artist=toBytes(consensus_track.artist), title=toBytes(track_title), disc=disc, album_thumb_url=toBytes(consensus_track.album_thumb_url), artist_thumb_url=toBytes(consensus_track.artist_thumb_url), year=toBytes(consensus_track.year), album_guid=toBytes(consensus_track.album_guid), artist_guid=toBytes(consensus_track.artist_guid)) merged_track.parts.append(part) return merged_track
def improve_from_tag(existing, file, tag): tags = None try: tags = mutagen.File(file, easy=True) except: Log('There was an exception thrown reading tags.') if tags and tag in tags: existing = tags[tag][0] return toBytes(existing)
def lookup(query_list, result_list, language=None, fingerprint=False, mixed=False, multiple=False, do_quick_match=False): # This shouldn't happen, but be safe. if len(query_list) == 0: return (0, 0, 0) # See if input looks like a sane album (sane_input_tracks, unique_input_albums, input_discs) = compute_input_sanity(query_list) # Build up the query with the contents of the query list. args = '' parts = {} Log('Running Gracenote match on %d tracks with fingerprinting: %d and mixedContent: %d and multiple: %d' % (len(query_list), fingerprint, mixed, multiple)) for i, track in enumerate(query_list): # We need to pass at least a path and an identifier for each track that we know about. args += '&tracks[%d].path=%s' % (i, quote(track.parts[0], '')) args += '&tracks[%d].userData=%d' % (i, i) # Keep track of the identifier -> part mapping so we can reassemble later. parts[i] = track.parts[0] if track.name: args += '&tracks[%d].title=%s' % (i, quote(toBytes(track.title or track.name), '')) if track.artist and track.artist != 'Various Artists': args += '&tracks[%d].artist=%s' % (i, quote(toBytes(track.artist), '')) if track.album_artist: args += '&tracks[%d].albumArtist=%s' % (i, quote(toBytes(track.album_artist), '')) elif track.artist and track.artist != 'Various Artists': args += '&tracks[%d].albumArtist=%s' % (i, quote(toBytes(track.artist), '')) if track.album and track.album != '[Unknown Album]': args += '&tracks[%d].album=%s' % (i, quote(toBytes(track.album), '')) if track.index: args += '&tracks[%d].index=%s' % (i, track.index) if track.disc: args += '&tracks[%d].parentIndex=%s' % (i, track.disc) Log(' - %s/%s - %s/%s - %s' % (toBytes(track.artist), toBytes(track.album), toBytes(track.disc), toBytes(track.index), toBytes(track.name))) url = 'http://127.0.0.1:32400/services/gracenote/search?fingerprint=%d&mixedContent=%d&multiple=%d%s&lang=%s' % (fingerprint, mixed, multiple, args, language) try: res = minidom.parse(urlopen(url)) except Exception, e: Log('Error parsing Gracenote response: ' + str(e)) return (0, 0, 0)
def improve_from_tag(existing, file, tag): tags = mutagen.File(file, easy=True) if tags and tag in tags: existing = tags[tag][0] return toBytes(existing)
def Scan(path, files, media_list, subdirs, language=None, root=None, respect_tags=False): # Scan for audio files. AudioFiles.Scan(path, files, media_list, subdirs, root) root_str = root or '' loc_str = os.path.join(root_str, path) Log('Scanning: ' + loc_str) Log('Files: ' + str(files)) Log('Subdirs: ' + str(subdirs)) # Look at the files and determine whether we can do a quick match (minimal tag parsing). do_quick_match = True mixed = False # Make sure we're looking at a leaf directory (no audio files below here). if len(subdirs) > 0: Log('Found directories below this one; won\'t attempt quick matching.') do_quick_match = False if files: # Make sure we're not sitting in the section root. parent_path = os.path.split(files[0])[0] if parent_path == root: Log('File(s) are in section root; doing expensive matching with mixed content.') do_quick_match = False mixed = True # Make sure we have reliable track indices for all files and there are no dupes. tracks = {} for f in files: try: index = re.search(r'^([0-9]{1,2})[^0-9].*', os.path.split(f)[-1]).groups(0)[0] except: do_quick_match = False Log('Couldn\'t find track indices in all filenames; doing expensive matching.') break if tracks.get(index): do_quick_match = False mixed = True Log('Found duplicate track index: %s; doing expensive matching with mixed content.' % index) break else: tracks[index] = True # Read the first track's tags to check for milti-disc and VA. if do_quick_match: disc = album_artist = None try: (artist, album, title, track, disc, album_artist, compil) = AudioFiles.getInfoFromTag(files[0], language) except: Log('Exception reading tags from first file; doing expensive matching.') do_quick_match = False # Make sure we are on the first disc. if disc is not None and disc > 1: Log('Skipping quick match because of non-first disc.') do_quick_match = False # We want to read all the tags for VA albums to pick up track artists. if album_artist is not None and album_artist == 'Various Artists': Log('Skipping quick match for Various Artists album.') do_quick_match = False artist = None album = None if do_quick_match: Log('Doing quick match') # See if we have some consensus on artist/album by reading a few tags. for i in range(3): if i < len(files): this_artist = this_album = tags = None try: tags = mutagen.File(files[i], easy=True) except: Log('There was an exception thrown reading tags.') if tags: # See if there's an album artist tag. album_artist_tags = [t for t in ['albumartist', 'TPE2', 'performer'] if t in tags] album_artist_tag = album_artist_tags[0] if len(album_artist_tags) else None this_artist = tags[album_artist_tag][0] if album_artist_tag else tags['artist'][0] if 'artist' in tags else None this_album = tags['album'][0] if 'album' in tags else None if artist and artist != this_artist: Log('Found different artists in tags (%s vs. %s); doing expensive matching.' % (artist, this_artist)) do_quick_match = False break if album and album != this_album: Log('Found different albums in tags (%s vs. %s); doing expensive matching.' % (artist, this_artist)) do_quick_match = False break artist = this_artist album = this_album if not artist or not album: Log('Couldn\'t determine unique artist or album from tags; doing expensive matching.') do_quick_match = False query_list = [] result_list = [] fingerprint = False # Directory looks clean, let's build a query list directly from info gleaned from file names. if do_quick_match: Log('Building query list for quickmatch with artist: %s, album: %s' % (artist, album)) # Determine if the artist and/or album appears in all filenames, since we'll want to strip these out for clean titles. strip_artist = True if len([f for f in files if artist.lower() in Unicodize(os.path.basename(f), language).lower()]) == len(files) else False strip_album = True if len([f for f in files if album.lower() in Unicodize(os.path.basename(f), language).lower()]) == len(files) else False for f in files: try: filename = os.path.splitext(os.path.split(f)[1])[0] (head, index, title) = re.split(r'^([0-9]{1,2})', filename) # Replace underscores and dots with spaces. title = re.sub(r'[_\. ]+', ' ', title) # Things in parens seem to confuse Gracenote, so let's strip them out. title = re.sub(r' ?\(.*\)', '', title) # Remove artist name from title if it appears in all of them. if strip_artist and len(files) > 2: title = re.sub(r'(?i)' + artist, '', title) # Remove album title from title if it appears in all of them. if strip_album and len(files) > 2: title = re.sub(r'(?i)' + album, '', title) # Remove any remaining index-, artist-, and album-related cruft from the head of the track title. title = re.sub(r'^[\W\-]+', '', title).strip() # Last chance for artist or album prefix. if not strip_artist and Unicodize(title, language).lower().find(artist.lower()) == 0: title = title[len(artist):] if not strip_album and Unicodize(title, language).lower().find(album.lower()) == 0: title = title[len(album):] t = Media.Track(artist=toBytes(artist), album=toBytes(album), title=toBytes(title), index=int(index)) t.parts.append(f) Log(' - Adding: %s - %s' % (index, title)) query_list.append(t) except Exception as e: Log('Error preparing tracks for quick matching: ' + str(e)) # Otherwise, let's do old school directory crawling and tag reading. else: AudioFiles.Process(path, files, media_list, subdirs, root) query_list = list(media_list) # Try as-is first (ask for everything at once). discs = [query_list] final_match = run_queries(discs, result_list, language, fingerprint, mixed, do_quick_match) # If the match was still shitty, and it looks like we have multiple discs, try splitting. if final_match < 75: discs = group_tracks_by_disc(query_list) if len(discs) > 1: Log('Result still looked bad, we will try splitting into separate per-disc queries.') other_result_list = [] other_match = run_queries(discs, other_result_list, language, fingerprint, mixed, do_quick_match) if other_match > final_match: Log('The split result was best, we will use it.') result_list = other_result_list final_match = other_match # If we have a crappy match, don't use it. if final_match < 50.0: Log('That was terrible, let us not use it.') result_list = [] # Finalize the results. used_tags = False del media_list[:] if len(result_list) > 0: # Gracenote results. for result in result_list: media_list.append(result) else: # We bailed during the GN lookup, fall back to tags. used_tags = True AudioFiles.Process(path, files, media_list, subdirs, root) # If we wanted to respect tags, then make sure we used tags. if not used_tags and respect_tags: # Let's grab tag results, and then set GUIDs we found. tag_media_list = [] AudioFiles.Process(path, files, tag_media_list, subdirs, root) # Now suck GN data out. path_map = {} for track in media_list: path_map[track.parts[0]] = track for track in tag_media_list: if track.parts[0] in path_map: gn_track = path_map[track.parts[0]] track.guid = gn_track.guid track.album_guid = gn_track.album_guid track.artist_guid = gn_track.artist_guid track.album_thumb_url = gn_track.album_thumb_url track.artist_thumb_url = gn_track.artist_thumb_url # If the tags failed, fill in key data from Gracenote. if track.album == '[Unknown Album]': track.album = gn_track.album if track.artist == '[Unknown Artist]': track.artist = gn_track.artist media_list[:] = tag_media_list
average_album_ratio = total_album_ratio / len(query_list) # If we've got really excellent track matches on a good number of tracks, then it's likely # that the GN match is just calling the artist different (VA vs artist, etc.) Prefer the name # in the tag if we have one and it's consistent. # track_min_ratio = 0.88 if average_album_ratio > 0.90: track_min_ratio = 0.75 if average_album_ratio > 0.98 and number_of_matched_tracks == len(query_list): track_min_ratio = 0.50 Log('Track average lev ratio %f, album lev ratio %f, required track ratio: %f' % (average_track_ratio, average_album_ratio, track_min_ratio)) if len(query_list) >= 4 and average_track_ratio > track_min_ratio: if number_of_artists == 1: Log('Using override artist of %s' % toBytes(query_list[0].artist)) artist_override = query_list[0].artist elif len(query_list) < 4 or average_track_ratio < 0.75 or ratio < 0.20: return (0, 0, 0) # Check for Various Artists albums which come back matching to an artist, or movie name. number_of_album_artists = len(set([q.album_artist for q in query_list if q.album_artist])) if number_of_artists > 1 and number_of_album_artists == 1 and query_list[0].album_artist and LevenshteinRatio(query_list[0].album_artist, 'Various Artists') > 0.9: Log('Using override artist of Various Artists') artist_override = 'Various Artists' # Restore track artists from tags if necessary. for i, query_track in enumerate(query_list): if str(i) in matched_tracks: track = matched_tracks[str(i)] if query_track.artist and not track.getAttribute('originalTitle'):
def lookup(query_list, result_list, language=None, fingerprint=False, mixed=False, multiple=False, do_quick_match=False): # This shouldn't happen, but be safe. if len(query_list) == 0: return (0, 0, 0) # See if input looks like a sane album (sane_input_tracks, unique_input_albums, input_discs) = compute_input_sanity(query_list) # Build up the query with the contents of the query list. args = '' parts = {} Log('Running Gracenote match on %d tracks with fingerprinting: %d and mixedContent: %d and multiple: %d' % (len(query_list), fingerprint, mixed, multiple)) for i, track in enumerate(query_list): # We need to pass at least a path and an identifier for each track that we know about. args += '&tracks[%d].path=%s' % (i, quote(track.parts[0], '')) args += '&tracks[%d].userData=%d' % (i, i) # Keep track of the identifier -> part mapping so we can reassemble later. parts[i] = track.parts[0] if track.name: args += '&tracks[%d].title=%s' % ( i, quote(toBytes(track.title or track.name), '')) if track.artist and track.artist != 'Various Artists': args += '&tracks[%d].artist=%s' % ( i, quote(toBytes(track.artist), '')) if track.album_artist: args += '&tracks[%d].albumArtist=%s' % ( i, quote(toBytes(track.album_artist), '')) elif track.artist and track.artist != 'Various Artists': args += '&tracks[%d].albumArtist=%s' % ( i, quote(toBytes(track.artist), '')) if track.album and track.album != '[Unknown Album]': args += '&tracks[%d].album=%s' % (i, quote(toBytes(track.album), '')) if track.index: args += '&tracks[%d].index=%s' % (i, track.index) if track.disc: args += '&tracks[%d].parentIndex=%s' % (i, track.disc) Log(' - %s/%s - %s/%s - %s' % (toBytes(track.artist), toBytes(track.album), toBytes( track.disc), toBytes(track.index), toBytes(track.name))) url = 'http://127.0.0.1:32400/services/gracenote/search?fingerprint=%d&mixedContent=%d&multiple=%d%s&lang=%s' % ( fingerprint, mixed, multiple, args, language) try: res = minidom.parse(urlopen(url)) except Exception, e: Log('Error parsing Gracenote response: ' + str(e)) return (0, 0, 0)
def Scan(path, files, media_list, subdirs, language=None, root=None, respect_tags=False): # Scan for audio files. AudioFiles.Scan(path, files, media_list, subdirs, root) root_str = root or '' loc_str = os.path.join(root_str, path) Log('Scanning: ' + loc_str) Log('Files: ' + str(files)) Log('Subdirs: ' + str(subdirs)) # Look at the files and determine whether we can do a quick match (minimal tag parsing). do_quick_match = True mixed = False # Make sure we're looking at a leaf directory (no audio files below here). if len(subdirs) > 0: Log('Found directories below this one; won\'t attempt quick matching.') do_quick_match = False if files: # Make sure we're not sitting in the section root. parent_path = os.path.split(files[0])[0] if parent_path == root: Log('File(s) are in section root; doing expensive matching with mixed content.' ) do_quick_match = False mixed = True # Make sure we have reliable track indices for all files and there are no dupes. tracks = {} for f in files: try: index = re.search(r'^([0-9]{1,2})[^0-9].*', os.path.split(f)[-1]).groups(0)[0] except: do_quick_match = False Log('Couldn\'t find track indices in all filenames; doing expensive matching.' ) break if tracks.get(index): do_quick_match = False mixed = True Log('Found duplicate track index: %s; doing expensive matching with mixed content.' % index) break else: tracks[index] = True # Read the first track's tags to check for milti-disc and VA. if do_quick_match: disc = album_artist = None try: (artist, album, title, track, disc, album_artist, compil) = AudioFiles.getInfoFromTag(files[0], language) except: Log('Exception reading tags from first file; doing expensive matching.' ) do_quick_match = False # Make sure we are on the first disc. if disc is not None and disc > 1: Log('Skipping quick match because of non-first disc.') do_quick_match = False # We want to read all the tags for VA albums to pick up track artists. if album_artist is not None and album_artist == 'Various Artists': Log('Skipping quick match for Various Artists album.') do_quick_match = False artist = None album = None if do_quick_match: Log('Doing quick match') # See if we have some consensus on artist/album by reading a few tags. for i in range(3): if i < len(files): this_artist = this_album = tags = None try: tags = mutagen.File(files[i], easy=True) except: Log('There was an exception thrown reading tags.') if tags: # See if there's an album artist tag. album_artist_tags = [ t for t in ['albumartist', 'TPE2', 'performer'] if t in tags ] album_artist_tag = album_artist_tags[0] if len( album_artist_tags) else None this_artist = tags[album_artist_tag][ 0] if album_artist_tag else tags['artist'][ 0] if 'artist' in tags else None this_album = tags['album'][ 0] if 'album' in tags else None if artist and artist != this_artist: Log('Found different artists in tags (%s vs. %s); doing expensive matching.' % (artist, this_artist)) do_quick_match = False break if album and album != this_album: Log('Found different albums in tags (%s vs. %s); doing expensive matching.' % (album, this_album)) do_quick_match = False break artist = this_artist album = this_album if not artist or not album: Log('Couldn\'t determine unique artist or album from tags; doing expensive matching.' ) do_quick_match = False query_list = [] result_list = [] fingerprint = False # Directory looks clean, let's build a query list directly from info gleaned from file names. if do_quick_match: Log('Building query list for quickmatch with artist: %s, album: %s' % (artist, album)) # Determine if the artist and/or album appears in all filenames, since we'll want to strip these out for clean titles. strip_artist = True if len([ f for f in files if artist.lower() in Unicodize( os.path.basename(f), language).lower() ]) == len(files) else False strip_album = True if len([ f for f in files if album.lower() in Unicodize( os.path.basename(f), language).lower() ]) == len(files) else False for f in files: try: filename = os.path.splitext(os.path.split(f)[1])[0] (head, index, title) = re.split(r'^([0-9]{1,2})', filename) # Replace underscores and dots with spaces. title = re.sub(r'[_\. ]+', ' ', title) # Things in parens seem to confuse Gracenote, so let's strip them out. title = re.sub(r' ?\(.*\)', '', title) # Remove artist name from title if it appears in all of them. if strip_artist and len(files) > 2: title = re.sub(r'(?i)' + artist, '', title) # Remove album title from title if it appears in all of them. if strip_album and len(files) > 2: title = re.sub(r'(?i)' + album, '', title) # Remove any remaining index-, artist-, and album-related cruft from the head of the track title. title = re.sub(r'^[\W\-]+', '', title).strip() # Last chance for artist or album prefix. if not strip_artist and Unicodize( title, language).lower().find(artist.lower()) == 0: title = title[len(artist):] if not strip_album and Unicodize( title, language).lower().find(album.lower()) == 0: title = title[len(album):] t = Media.Track(artist=toBytes(artist), album=toBytes(album), title=toBytes(title), index=int(index)) t.parts.append(f) Log(' - Adding: %s - %s' % (index, title)) query_list.append(t) except Exception as e: Log('Error preparing tracks for quick matching: ' + str(e)) # Otherwise, let's do old school directory crawling and tag reading. else: AudioFiles.Process(path, files, media_list, subdirs, root) query_list = list(media_list) # Try as-is first (ask for everything at once). discs = [query_list] final_match = run_queries(discs, result_list, language, fingerprint, mixed, do_quick_match) # If the match was still shitty, and it looks like we have multiple discs, try splitting. if final_match < 75: discs = group_tracks_by_disc(query_list) if len(discs) > 1: Log('Result still looked bad, we will try splitting into separate per-disc queries.' ) other_result_list = [] other_match = run_queries(discs, other_result_list, language, fingerprint, mixed, do_quick_match) if other_match > final_match: Log('The split result was best, we will use it.') result_list = other_result_list final_match = other_match # If we have a crappy match, don't use it. if final_match < 50.0: Log('That was terrible, let us not use it.') result_list = [] # Finalize the results. used_tags = False del media_list[:] if len(result_list) > 0: # Gracenote results. for result in result_list: media_list.append(result) else: # We bailed during the GN lookup, fall back to tags. used_tags = True AudioFiles.Process(path, files, media_list, subdirs, root) # If we wanted to respect tags, then make sure we used tags. if not used_tags and respect_tags: # Let's grab tag results, and then set GUIDs we found. tag_media_list = [] AudioFiles.Process(path, files, tag_media_list, subdirs, root) # Now suck GN data out. path_map = {} for track in media_list: path_map[track.parts[0]] = track for track in tag_media_list: if track.parts[0] in path_map: gn_track = path_map[track.parts[0]] track.guid = gn_track.guid track.album_guid = gn_track.album_guid track.artist_guid = gn_track.artist_guid track.album_thumb_url = gn_track.album_thumb_url track.artist_thumb_url = gn_track.artist_thumb_url # If the tags failed, fill in key data from Gracenote. if track.album == '[Unknown Album]': track.album = gn_track.album if track.artist == '[Unknown Artist]': track.artist = gn_track.artist media_list[:] = tag_media_list
# that the GN match is just calling the artist different (VA vs artist, etc.) Prefer the name # in the tag if we have one and it's consistent. # track_min_ratio = 0.88 if average_album_ratio > 0.90: track_min_ratio = 0.75 if average_album_ratio > 0.98 and number_of_matched_tracks == len( query_list): track_min_ratio = 0.50 Log('Track average lev ratio %f, album lev ratio %f, required track ratio: %f' % (average_track_ratio, average_album_ratio, track_min_ratio)) if len(query_list) >= 4 and average_track_ratio > track_min_ratio: if number_of_artists == 1: Log('Using override artist of %s' % toBytes(query_list[0].artist)) artist_override = query_list[0].artist elif len(query_list ) < 4 or average_track_ratio < 0.75 or ratio < 0.20: return (0, 0, 0) # Check for Various Artists albums which come back matching to an artist, or movie name. number_of_album_artists = len( set([q.album_artist for q in query_list if q.album_artist])) if number_of_artists > 1 and number_of_album_artists == 1 and query_list[ 0].album_artist and LevenshteinRatio(query_list[0].album_artist, 'Various Artists') > 0.9: Log('Using override artist of Various Artists') artist_override = 'Various Artists' # Restore track artists from tags if necessary.
def Scan(path, files, mediaList, subdirs, exts, root=None): files_to_whack = [] plexignore_files = [] plexignore_dirs = [] use_unicode = os.path.supports_unicode_filenames # Build a list of things to ignore based on a .plexignore file in this dir. if root and Utils.ContainsFile(files, '.plexignore'): ParsePlexIgnore(os.path.join(root,path,'.plexignore'), plexignore_files, plexignore_dirs) # Also look for a .plexignore in the 'root' for this source. if root and files and root != os.path.dirname(files[0]): if Utils.ContainsFile(os.listdir(root), '.plexignore'): ParsePlexIgnore(os.path.join(root,'.plexignore'), plexignore_files, plexignore_dirs) for f in files: # Only use unicode if it's supported, which it is on Windows and OS X, # but not Linux. This allows things to work with non-ASCII characters # without having to go through a bunch of work to ensure the Linux # filesystem is UTF-8 "clean". # if use_unicode: try: filename = unicode(f.decode('utf-8')) except: files_to_whack.append(f) else: filename = f (basename, ext) = os.path.splitext(f) basename = os.path.basename(basename) # If extension is wrong, don't include. if not ext.lower()[1:] in exts: files_to_whack.append(f) # Broken symlinks and zero byte files need not apply. if os.path.exists(filename) == False or os.path.getsize(filename) == 0: files_to_whack.append(f) # Remove unreadable files. if not os.access(filename, os.R_OK): # If access() claims the file is unreadable, try to read a byte just to be sure. try: read_file = open(f,'rb') read_file.read(1) read_file.close() except: files_to_whack.append(f) # Remove hidden files. if len(basename) == 0 or basename[0] == '.': files_to_whack.append(f) # Remove .plexignore file regex matches. for rx in plexignore_files: if re.match(rx, os.path.basename(f), re.IGNORECASE): files_to_whack.append(f) # Remove files that look DRM'd. try: mp4_file = open(f, 'rb') codec = mp4.MP4Info(mp4.Atoms(mp4_file), mp4_file).codec if codec in ['drms', 'enca', 'encv']: Utils.Log('Skipping file %s because it looks DRM-protected (has codec: %s)' % (toBytes(f), codec)) files_to_whack.append(f) except: pass # See what directories to ignore. ignore_dirs_total = IGNORE_DIRS if len(path) == 0: ignore_dirs_total += ROOT_IGNORE_DIRS dirs_to_whack = [] for dir in subdirs: # See which directories to get rid of. baseDir = os.path.basename(dir) for rx in ignore_dirs_total: if re.match(rx, baseDir, re.IGNORECASE): dirs_to_whack.append(dir) break # Add glob matches from .plexignore before whacking. for pattern in plexignore_dirs: for match in glob.glob(pattern): if os.path.isdir(match): dirs_to_whack.append(os.path.dirname(match)) else: files_to_whack.append(match) # Whack files. files_to_whack = list(set(files_to_whack)) for f in files_to_whack: if f in files: files.remove(f) # Remove the directories. dirs_to_whack = list(set(dirs_to_whack)) for f in dirs_to_whack: if f in subdirs: subdirs.remove(f)