def fingerprint_file(self, filepath, video_id, song_name=None, cached_hashes=None): """ If file has already been fingerprinted, return None. Else, return information about the file """ songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name return None else: song_name, hashes, file_hash, length_in_seconds = _fingerprint_worker( filepath, self.limit, song_name=song_name, cached_hashes=cached_hashes) sid = self.db.insert_song(song_name, video_id, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() return {"song_length": length_in_seconds}
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname #filename, extension = os.path.splitext(os.path.basename(filepath)) #cdate = str(os.path.getctime(filename)).split('.')[0] # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name else: song_name, hashes, file_hash, cdate = _fingerprint_worker( filepath, self.limit, song_name=song_name ) #sid = self.db.insert_song(song_name, file_hash) print(self.config['fingerprint']['id']) print(cdate) #cdate = time.ctime(os.path.getctime(filepath)) sid = self.db.insert_song(song_name, file_hash, cdate) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, video_id, song_name=None, cached_hashes=None): """ If file has already been fingerprinted, return None. Else, return information about the file """ songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name return None else: song_name, hashes, file_hash, length_in_seconds = _fingerprint_worker( filepath, self.limit, song_name=song_name, cached_hashes=cached_hashes ) sid = self.db.insert_song(song_name, video_id, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() return { "song_length" : length_in_seconds }
def fingerprint_directory(self, path, extensions, nprocesses=None, splited=False, splited_song_name=""): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.path_to_songname(filename) in self.songnames_set: print "%s already fingerprinted, continuing..." % filename continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) if splited and splited_song_name: sid = self.db.insert_song(splited_song_name) # Loop till we have all of them while True: try: song_name, hashes = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here traceback.print_exc(file=sys.stdout) else: if not splited: sid = self.db.insert_song(song_name) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() pool.close() pool.join()
def fingerprint_file(self, filepath, song_name=None): channels, Fs = decoder.read(filepath) if not song_name: print "Song name: %s" % song_name song_name = decoder.path_to_songname(filepath) song_id = self.db.insert_song(song_name) for data in channels: hashes = fingerprint.fingerprint(data, Fs=Fs) self.db.insert_hashes(song_id, hashes)
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) filenames_to_fingerprint = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.path_to_songname(filename) in self.songnames_set: print "%s already fingerprinted, continuing..." % filename continue filenames_to_fingerprint.append(filename) # Prepare _fingerprint_worker input worker_input = zip(filenames_to_fingerprint, [self.limit] * len(filenames_to_fingerprint)) # Send off our tasks iterator = pool.imap_unordered(_fingerprint_worker, worker_input) # Loop till we have all of them while True: try: song_name, hashes = iterator.next() except multiprocessing.TimeoutError: continue except StopIteration: break except: print("Failed fingerprinting") # Print traceback because we can't reraise it here import traceback, sys traceback.print_exc(file=sys.stdout) else: sid = self.db.insert_song(song_name) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() pool.close() pool.join()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) file_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if self.db.get_song_by_hash(file_hash) is not None: logger.info("%s already fingerprinted, continuing..." % song_name) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid)
def fingerprint_file(self, filepath, output_dir=None, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname song_name, hashes, file_hash = _fingerprint_worker(filepath, self.limit, song_name=song_name) f = open(os.path.join(output_dir, song_name + '.fingerp'), 'w') sorted_hashes = sorted(list(hashes), key=lambda t: t[1]) for t in sorted_hashes: f.write(str(t[0]) + " - " + str(t[1]) + "\n") f.flush() f.close()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name else: song_name, hashes, file_hash = _fingerprint_worker(filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_translation_record_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname if song_hash in self.songhashes_set: msg = "%s already fingerprinted, continuing..." % song_name print msg self.logger.info(msg) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_repeat_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name else: filename, song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) tags = self.get_song_metadata(filename) title = tags.title or filename artist = tags.artist or "" genre = tags.genre or "" sid = self.db.insert_song(title, artist, genre, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: logging.getLogger('dejavu').warn( "%s already fingerprinted, continuing..." % song_name) else: song_name, hashes, file_hash, audio_length = _fingerprint_worker( filepath, self.limit, song_name=song_name) logging.getLogger('dejavu').debug("Inserting " + song_name + " in database") sid = self.db.insert_song(song_name, file_hash, audio_length) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() logging.getLogger('dejavu').info(song_name + " inserted in database")
def fingerprint_file(self, filepath, song_name=None, creative_id=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # get duration in seconds f = sf.SoundFile(filepath) duration = '{}'.format(len(f) / f.samplerate) # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: raise Exception("%s already fingerprinted." % song_name) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name, file_hash, duration, creative_id) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_directory(self, path, extensions, nprocesses=None): # Try to use the maximum amount of processes if not given. try: nprocesses = nprocesses or multiprocessing.cpu_count() except NotImplementedError: nprocesses = 1 else: nprocesses = 1 if nprocesses <= 0 else nprocesses pool = multiprocessing.Pool(nprocesses) results = [] for filename, _ in decoder.find_files(path, extensions): # don't refingerprint already fingerprinted files if decoder.path_to_songname(filename) in self.songnames_set: print "%s already fingerprinted, continuing..." % filename continue result = pool.apply_async(_fingerprint_worker, (filename, self.db, self.limit)) results.append(result) while len(results): for result in results[:]: # TODO: Handle errors gracefully and return them to the callee # in some way. try: result.get(timeout=2) except multiprocessing.TimeoutError: continue except: import traceback, sys traceback.print_exc(file=sys.stdout) results.remove(result) else: results.remove(result) pool.close() pool.join()
def fingerprint_file(self, filepath, song_name=None): #print "fingerprint_file" songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname #song_hash = "_" print song_hash # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: msg = "%s already fingerprinted, continuing..." % song_name print msg self.logger.info(msg) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) #print hashes sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None, id_in_filename=False): songname = decoder.path_to_songname(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_name in self.songnames_set: print "%s already fingerprinted, continuing..." % song_name else: song_name, hashes = _fingerprint_worker(filepath, self.limit, song_name=song_name) if id_in_filename: try: sid = int(os.path.basename(filepath).split('.')[0]) except ValueError: sid = self.db.insert_song(song_name) else: sid = self.db.insert_song(song_name) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files """Previous code: if song_name in self.songnames_set: print "%s already fingerprinted, continuing..." % song_name """ song = self.recognize(FileRecognizer,filepath) if song !=None and song['confidence']>100: print "%s already fingerprinted, continuing..." % song_name.encode("ascii",errors='replace') else: song_name, hashes = _fingerprint_worker(filepath, self.limit, song_name=song_name) sid = self.db.insert_song(song_name) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs()
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: dejavu.shared.UITEXTLOGGER.emit( "%s already fingerprinted, continuing..." % song_name) print("%s already fingerprinted, continuing..." % song_name) else: song_name, hashes, file_hash = _fingerprint_worker( filepath, self.limit, song_name=song_name) dejavu.shared.UITEXTLOGGER.emit( "Saving finger prints to Database for %s" % song_name) print("Saving finger prints to Database for %s" % song_name) sid = self.db.insert_song(song_name, file_hash) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() dejavu.shared.UITEXTLOGGER.emit( "Finished saving finger prints to Database for %s" % song_name) print("Finished saving finger prints to Database for %s" % song_name)
def fingerprint_file(self, filepath, song_name=None): songname = decoder.path_to_songname(filepath) song_hash = decoder.unique_hash(filepath) song_name = song_name or songname # don't refingerprint already fingerprinted files if song_hash in self.songhashes_set: print "%s already fingerprinted, continuing..." % song_name return Dejavu.FINGERPRINT_STATUS_FILE_EXISTED else: song_name, hashes, file_hash, audio_length = _fingerprint_worker( filepath, self.limit, song_name=song_name ) sid = self.db.insert_song(song_name, file_hash, audio_length) # Added by 9nehS as a workaround for issue https://github.com/worldveil/dejavu/issues/142 hashes = _convert_hashes(hashes) self.db.insert_hashes(sid, hashes) self.db.set_song_fingerprinted(sid) self.get_fingerprinted_songs() return Dejavu.FINGERPRINT_STATUS_SUCCESS
def begin(self): for f in self.test_files: log_msg('--------------------------------------------------') log_msg('file: %s' % f) # get column col = self.get_column_id(re.findall("[0-9]*sec", f)[0]) # format: XXXX_offset_length.mp3 song = path_to_songname(f).split("_")[0] line = self.get_line_id(song) result = subprocess.check_output([ "python", "dejavu.py", '-r', 'file', self.test_folder + "/" + f]) if result.strip() == "None": log_msg('No match') self.result_match[line][col] = 'no' self.result_matching_times[line][col] = 0 self.result_query_duration[line][col] = 0 self.result_match_confidence[line][col] = 0 else: result = result.strip() result = result.replace(" \'", ' "') result = result.replace("{\'", '{"') result = result.replace("\':", '":') result = result.replace("\',", '",') # which song did we predict? result = ast.literal_eval(result) song_result = result["song_name"] log_msg('song: %s' % song) log_msg('song_result: %s' % song_result) if song_result != song: log_msg('invalid match') self.result_match[line][col] = 'invalid' self.result_matching_times[line][col] = 0 self.result_query_duration[line][col] = 0 self.result_match_confidence[line][col] = 0 else: log_msg('correct match') print(self.result_match) self.result_match[line][col] = 'yes' self.result_query_duration[line][col] = round(result[Dejavu.MATCH_TIME],3) self.result_match_confidence[line][col] = result[Dejavu.CONFIDENCE] song_start_time = re.findall("\_[^\_]+",f) song_start_time = song_start_time[0].lstrip("_ ") result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE * DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS), 0) self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time) if (abs(self.result_matching_times[line][col]) == 1): self.result_matching_times[line][col] = 0 log_msg('query duration: %s' % round(result[Dejavu.MATCH_TIME],3)) log_msg('confidence: %s' % result[Dejavu.CONFIDENCE]) log_msg('song start_time: %s' % song_start_time) log_msg('result start time: %s' % result_start_time) if (self.result_matching_times[line][col] == 0): log_msg('accurate match') else: log_msg('inaccurate match') log_msg('--------------------------------------------------\n')
def begin(self): djv = Dejavu(dburl=os.environ['DATABASE_URL']) for f in self.test_files: log_msg('--------------------------------------------------') log_msg('file: %s' % f) # get column col = self.get_column_id(re.findall("[0-9]*sec", f)[0]) # format: XXXX_offset_length.mp3 song = path_to_songname(f).split("_")[0] line = self.get_line_id(song) result = djv.recognize(FileRecognizer, self.test_folder + "/" + f) if not result: log_msg('No match') self.result_match[line][col] = 'no' self.result_matching_times[line][col] = 0 self.result_query_duration[line][col] = 0 self.result_match_confidence[line][col] = 0 continue # which song did we predict? song_result = result['song_name'] log_msg('song: %s' % song) log_msg('song_result: %s' % song_result) if song_result != song: log_msg('invalid match') self.result_match[line][col] = 'invalid' self.result_matching_times[line][col] = 0 self.result_query_duration[line][col] = 0 self.result_match_confidence[line][col] = 0 else: log_msg('correct match') print(self.result_match) self.result_match[line][col] = 'yes' self.result_query_duration[line][col] = round( result[Dejavu.MATCH_TIME], 3) self.result_match_confidence[line][col] = result[ Dejavu.CONFIDENCE] song_start_time = re.findall("\_[^\_]+", f) song_start_time = song_start_time[0].lstrip("_ ") result_start_time = round( (result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE * DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS), 0) self.result_matching_times[line][col] = int( result_start_time) - int(song_start_time) if abs(self.result_matching_times[line][col]) == 1: self.result_matching_times[line][col] = 0 log_msg('query duration: %s' % round(result[Dejavu.MATCH_TIME], 3)) log_msg('confidence: %s' % result[Dejavu.CONFIDENCE]) log_msg('song start_time: %s' % song_start_time) log_msg('result start time: %s' % result_start_time) if self.result_matching_times[line][col] == 0: log_msg('accurate match') else: log_msg('inaccurate match') log_msg('--------------------------------------------------\n')
def begin(self): for f in self.test_files: log_msg('--------------------------------------------------') log_msg('file: %s' % f) # get column col = self.get_column_id(re.findall("[0-9]*sec", f)[0]) # format: XXXX_offset_length.mp3 song = path_to_songname(f).split("_")[0] line = self.get_line_id(song) result = subprocess.check_output([ "python", "dejavu.py", '-r', 'file', self.test_folder + "/" + f]) if result.strip() == "None": log_msg('No match') self.result_match[line][col] = 'no' self.result_matching_times[line][col] = 0 self.result_query_duration[line][col] = 0 self.result_match_confidence[line][col] = 0 else: result = result.strip() result = result.replace(" \'", ' "') result = result.replace("{\'", '{"') result = result.replace("\':", '":') result = result.replace("\',", '",') # which song did we predict? result = ast.literal_eval(result) song_result = result["song_name"] log_msg('song: %s' % song) log_msg('song_result: %s' % song_result) if song_result != song: log_msg('invalid match') self.result_match[line][col] = 'invalid' self.result_matching_times[line][col] = 0 self.result_query_duration[line][col] = 0 self.result_match_confidence[line][col] = 0 else: log_msg('correct match') print self.result_match self.result_match[line][col] = 'yes' self.result_query_duration[line][col] = round(result[Dejavu.MATCH_TIME],3) self.result_match_confidence[line][col] = result[Dejavu.CONFIDENCE] song_start_time = re.findall("\_[^\_]+",f) song_start_time = song_start_time[0].lstrip("_ ") result_start_time = round((result[Dejavu.OFFSET] * DEFAULT_WINDOW_SIZE * DEFAULT_OVERLAP_RATIO) / (DEFAULT_FS), 0) self.result_matching_times[line][col] = int(result_start_time) - int(song_start_time) if (abs(self.result_matching_times[line][col]) == 1): self.result_matching_times[line][col] = 0 log_msg('query duration: %s' % round(result[Dejavu.MATCH_TIME],3)) log_msg('confidence: %s' % result[Dejavu.CONFIDENCE]) log_msg('song start_time: %s' % song_start_time) log_msg('result start time: %s' % result_start_time) if (self.result_matching_times[line][col] == 0): log_msg('accurate match') else: log_msg('inaccurate match') log_msg('--------------------------------------------------\n')