def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname #filename, extension = os.path.splitext(os.path.basename(filepath)) #cdate = str(os.path.getctime(filename)).split('.')[0] # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name else: song_name, hashes, file_hash, cdate = _fingerprint_worker( filepath, self.limit, song_name=song_name ) #sid = self.db.insert_song(song_name, file_hash) print(self.config['fingerprint']['id']) print(cdate) #cdate = time.ctime(os.path.getctime(filepath)) sid = self.db.insert_song(song_name, file_hash, cdate) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, video_id, song_name=None, cached_hashes=None): """ If file has already been fingerprinted, return None. Else, return information about the file """ songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name return None else: song_name, hashes, file_hash, length_in_seconds = _fingerprint_worker( filepath, self.limit, song_name=song_name, cached_hashes=cached_hashes) sid = self.db.insert_song(song_name, video_id, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() return {"song_length": length_in_seconds}
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.unique_hash(filename) in self.songhashes_set: dejavu.shared.UITEXTLOGGER.emit( "%s already fingerprinted, continuing..." % filename) print("%s already fingerprinted, continuing..." % filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: dejavu.shared.UITEXTLOGGER.emit("Failed fingerprinting") print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: dejavu.shared.UITEXTLOGGER.emit( "Saving finger prints to Database for %s" % song_name) print("Saving finger prints to Database for %s" % song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() dejavu.shared.UITEXTLOGGER.emit( "Finished saving finger prints to Database for %s" % song_name) print("Finished saving finger prints to Database for %s" % song_name) pool.close() pool.join()
def fingerprint_file(self, filepath, video_id, song_name=None, cached_hashes=None): """ If file has already been fingerprinted, return None. Else, return information about the file """ songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name return None else: song_name, hashes, file_hash, length_in_seconds = _fingerprint_worker( filepath, self.limit, song_name=song_name, cached_hashes=cached_hashes ) sid = self.db.insert_song(song_name, video_id, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() return { "song_length" : length_in_seconds }
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.unique_hash(filename) in self.songhashes_set: logging.getLogger('dejavu').warn( "%s already fingerprinted, continuing..." % filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash, audio_length = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: logging.getLogger('dejavu').exception("Failed fingerprinting") else: logging.getLogger('dejavu').debug("Inserting " + song_name + " in database") sid = self.db.insert_song(song_name, file_hash, audio_length) self.db.insert_hashes(sid, set([(x[0], int(x[1])) for x in hashes])) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() logging.getLogger('dejavu').info(song_name + " inserted in database") pool.close() pool.join()
def fingerprint_directory(self, nprocesses=None): extensions = [".mp3"] # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(self.config['fingerprint'].get('folder'), extensions): # don't refingerprint already fingerprinted files if decoder.unique_hash(filename) in self.songhashes_set: print "%s already fingerprinted, continuing..." % filename continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash, cdate = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: sid = self.db.insert_song(song_name, file_hash, cdate, self.config['fingerprint']['id']) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() pool.close() pool.join()
def fingerprint_file(self, filepath, output_dir=None, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname song_name, hashes, file_hash = _fingerprint_worker(filepath, self.limit, song_name=song_name) f = open(os.path.join(output_dir, song_name + '.fingerp'), 'w') sorted_hashes = sorted(list(hashes), key=lambda t: t[1]) for t in sorted_hashes: f.write(str(t[0]) + " - " + str(t[1]) + "\n") f.flush() f.close()
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if self.db.get_song_by_hash( decoder.unique_hash(filename)) is not None: logger.info("%s already fingerprinted, continuing..." % filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = list( zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint))) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash = next(iterator) except multiprocessing.TimeoutError: continue except StopIteration: break except: logger.error("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) pool.close() pool.join()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) file_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if self.db.get_song_by_hash(file_hash) is not None: logger.info("%s already fingerprinted, continuing..." % song_name) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid)
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name else: song_name, hashes, file_hash = _fingerprint_worker(filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_translation_record_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname if song_hash in self.songhashes_set: msg = "%s already fingerprinted, continuing..." % song_name print msg self.logger.info(msg) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_repeat_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name else: filename, song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) tags = self.get_song_metadata(filename) title = tags.title or filename artist = tags.artist or "" genre = tags.genre or "" sid = self.db.insert_song(title, artist, genre, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: logging.getLogger('dejavu').warn( "%s already fingerprinted, continuing..." % song_name) else: song_name, hashes, file_hash, audio_length = _fingerprint_worker( filepath, self.limit, song_name=song_name) logging.getLogger('dejavu').debug("Inserting " + song_name + " in database") sid = self.db.insert_song(song_name, file_hash, audio_length) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() logging.getLogger('dejavu').info(song_name + " inserted in database")
def fingerprint_file(self, filepath, song_name=None, creative_id=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # get duration in seconds f = sf.SoundFile(filepath) duration = '{}'.format(len(f) / f.samplerate) # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: raise Exception("%s already fingerprinted." % song_name) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash, duration, creative_id) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): #print "fingerprint_file" songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname #song_hash = "_" print song_hash # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: msg = "%s already fingerprinted, continuing..." % song_name print msg self.logger.info(msg) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) #print hashes sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: dejavu.shared.UITEXTLOGGER.emit( "%s already fingerprinted, continuing..." % song_name) print("%s already fingerprinted, continuing..." % song_name) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) dejavu.shared.UITEXTLOGGER.emit( "Saving finger prints to Database for %s" % song_name) print("Saving finger prints to Database for %s" % song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() dejavu.shared.UITEXTLOGGER.emit( "Finished saving finger prints to Database for %s" % song_name) print("Finished saving finger prints to Database for %s" % song_name)
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name return Dejavu.FINGERPRINT_STATUS_FILE_EXISTED else: song_name, hashes, file_hash, audio_length = _fingerprint_worker( filepath, self.limit, song_name=song_name ) sid = self.db.insert_song(song_name, file_hash, audio_length) # Added by 9nehS as a workaround for issue https://github.com/worldveil/dejavu/issues/142 hashes = _convert_hashes(hashes) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() return Dejavu.FINGERPRINT_STATUS_SUCCESS
def _fingerprint_worker(filename, limit=None, song_name=None): # Pool.imap sends arguments as tuples so we have to unpack # them ourself. try: filename, limit = filename except ValueError: pass songname, extension = os.path.splitext(os.path.basename(filename)) song_name = song_name or songname file_hash = decoder.unique_hash(filename) channels, Fs = decoder.read(filename, limit) result = set() channel_amount = len(channels) for channeln, channel in enumerate(channels): logger.info(("Fingerprinting channel %d/%d for %s" % (channeln + 1, channel_amount, filename))) hashes = fingerprint.fingerprint(channel, Fs=Fs) logger.info(("Finished channel %d/%d for %s" % (channeln + 1, channel_amount, filename))) result |= set(hashes) return song_name, result, file_hash
def check_if_fingerprinted(self): video_hash = unique_hash(self.filename) # Set self.djv.songhashes_set fingerprinted_video_hashes = self.djv.get_fingerprinted_songs() return video_hash in self.djv.songhashes_set
def fingerprint_translation_record_directory(self, path, extensions, nprocesses=None): msg = "Starting fingerprint translation record directory" print msg self.logger.info(msg) # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): basename = os.path.basename(filename) if "_recording" in basename: continue # don't refingerprint already fingerprinted files if decoder.unique_hash(filename) in self.songhashes_set: msg = "%s already fingerprinted, continuing..." % filename print msg self.logger.info(msg) #os.remove(filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes, file_hash = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: msg = "Failed fingerprinting" print msg self.logger.info(msg) # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: sid = self.db.insert_song(song_name, file_hash) self.db.insert_repeat_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() for filename in filenames_to_fingerprint: os.remove(filename) pool.close() pool.join()
def fingerprint_directory(self, path, extensions, nprocesses=None): '''对音频进行编码,并存到数据库中,每个音频先分成声道,再对每个声道按照一节一节的编码 Args: path: 音频的路径 extensions: 扩展名 nprocesses: 多进程的数量 ''' # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count( ) # 返回当前系统有多少个cpu except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) # 进程池 filenames_to_fingerprint = [] # 需要进行fingerprint的音频路径 for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.unique_hash( filename ) in self.songhashes_set: # 对当前音频进行sha1编码,然后和所有歌曲的sha1编码list对比,有则不重复编码 print("%s already fingerprinted, continuing..." % filename) continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input # worker_input为要进行fingerprint的zip元组 worker_input = zip( filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # [None]*2==[None, None] # Send off our tasks # 每个多进程处理一首歌,进程数与cpu数相同,比如有5首歌,开了4个进程,这个4个进程处理完4首歌后,再从进程池里拿一个进程处理最后一首歌 iterator = pool.imap_unordered( _fingerprint_worker, worker_input ) # imap_unordered和map类似,第一个参数是函数,第二个参数是迭代器,将迭代器中的数放到函数里执行 # Loop till we have all of them while True: try: # _fingerprint_worker()对每个音频编码的结果 song_name, hashes, file_hash = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: # 这个地方就可以把song_name和file_hash和hashes存到文本给洪宁,不过还得看一下,他是怎么听歌识曲的 sid = self.db.insert_song( song_name, file_hash) # 将音频name和使用sha1对音频编码存到song表里 self.db.insert_hashes( sid, hashes) # 将插入song表中对应的id和hashes存到fingerprint表中,这样sid就是外键 self.db.set_song_fingerprinted( sid) # 将song表中用于标志一首歌的指纹被存到数据库中的属性,改为1 self.get_fingerprinted_songs() pool.close() pool.join()