Пример #1
0
    def fingerprint_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname

        #filename, extension = os.path.splitext(os.path.basename(filepath))
        #cdate = str(os.path.getctime(filename)).split('.')[0]

        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            print "%s already fingerprinted, continuing..." % song_name
        else:
            song_name, hashes, file_hash, cdate = _fingerprint_worker(
                filepath,
                self.limit,
                song_name=song_name
            )
            #sid = self.db.insert_song(song_name, file_hash)

            print(self.config['fingerprint']['id'])
            print(cdate)
            #cdate = time.ctime(os.path.getctime(filepath))
            sid = self.db.insert_song(song_name, file_hash, cdate)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
Пример #2
0
    def fingerprint_file(self,
                         filepath,
                         video_id,
                         song_name=None,
                         cached_hashes=None):
        """
            If file has already been fingerprinted, return None.
            Else, return information about the file
        """
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            print "%s already fingerprinted, continuing..." % song_name
            return None
        else:
            song_name, hashes, file_hash, length_in_seconds = _fingerprint_worker(
                filepath,
                self.limit,
                song_name=song_name,
                cached_hashes=cached_hashes)
            sid = self.db.insert_song(song_name, video_id, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()

            return {"song_length": length_in_seconds}
Пример #3
0
    def fingerprint_directory(self, path, extensions, nprocesses=None):
        # Try to use the maximum amount of processes if not given.
        try:
            nprocesses = nprocesses or multiprocessing.cpu_count()
        except NotImplementedError:
            nprocesses = 1
        else:
            nprocesses = 1 if nprocesses <= 0 else nprocesses

        pool = multiprocessing.Pool(nprocesses)

        filenames_to_fingerprint = []
        for filename, _ in decoder.find_files(path, extensions):

            # don't refingerprint already fingerprinted files
            if decoder.unique_hash(filename) in self.songhashes_set:
                dejavu.shared.UITEXTLOGGER.emit(
                    "%s already fingerprinted, continuing..." % filename)
                print("%s already fingerprinted, continuing..." % filename)
                continue

            filenames_to_fingerprint.append(filename)

        # Prepare _fingerprint_worker input
        worker_input = zip(filenames_to_fingerprint,
                           [self.limit] * len(filenames_to_fingerprint))

        # Send off our tasks
        iterator = pool.imap_unordered(_fingerprint_worker, worker_input)

        # Loop till we have all of them
        while True:
            try:
                song_name, hashes, file_hash = iterator.next()
            except multiprocessing.TimeoutError:
                continue
            except StopIteration:
                break
            except:
                dejavu.shared.UITEXTLOGGER.emit("Failed fingerprinting")
                print("Failed fingerprinting")
                # Print traceback because we can't reraise it here
                traceback.print_exc(file=sys.stdout)
            else:
                dejavu.shared.UITEXTLOGGER.emit(
                    "Saving finger prints to Database for %s" % song_name)
                print("Saving finger prints to Database for %s" % song_name)
                sid = self.db.insert_song(song_name, file_hash)

                self.db.insert_hashes(sid, hashes)
                self.db.set_song_fingerprinted(sid)
                self.get_fingerprinted_songs()
                dejavu.shared.UITEXTLOGGER.emit(
                    "Finished saving finger prints to Database for %s" %
                    song_name)
                print("Finished saving finger prints to Database for %s" %
                      song_name)

        pool.close()
        pool.join()
Пример #4
0
    def fingerprint_file(self, filepath, video_id, song_name=None, cached_hashes=None):
        """
            If file has already been fingerprinted, return None.
            Else, return information about the file
        """
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            print "%s already fingerprinted, continuing..." % song_name
            return None
        else:
            song_name, hashes, file_hash, length_in_seconds = _fingerprint_worker(
                filepath,
                self.limit,
                song_name=song_name,
                cached_hashes=cached_hashes
            )
            sid = self.db.insert_song(song_name, video_id, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()

            return {
                "song_length" : length_in_seconds
            }
Пример #5
0
    def fingerprint_directory(self, path, extensions, nprocesses=None):
        # Try to use the maximum amount of processes if not given.
        try:
            nprocesses = nprocesses or multiprocessing.cpu_count()
        except NotImplementedError:
            nprocesses = 1
        else:
            nprocesses = 1 if nprocesses <= 0 else nprocesses

        pool = multiprocessing.Pool(nprocesses)

        filenames_to_fingerprint = []
        for filename, _ in decoder.find_files(path, extensions):

            # don't refingerprint already fingerprinted files
            if decoder.unique_hash(filename) in self.songhashes_set:
                logging.getLogger('dejavu').warn(
                    "%s already fingerprinted, continuing..." % filename)
                continue

            filenames_to_fingerprint.append(filename)

        # Prepare _fingerprint_worker input
        worker_input = zip(filenames_to_fingerprint,
                           [self.limit] * len(filenames_to_fingerprint))

        # Send off our tasks
        iterator = pool.imap_unordered(_fingerprint_worker, worker_input)

        # Loop till we have all of them
        while True:
            try:
                song_name, hashes, file_hash, audio_length = iterator.next()
            except multiprocessing.TimeoutError:
                continue
            except StopIteration:
                break
            except:
                logging.getLogger('dejavu').exception("Failed fingerprinting")
            else:
                logging.getLogger('dejavu').debug("Inserting " + song_name +
                                                  " in database")
                sid = self.db.insert_song(song_name, file_hash, audio_length)

                self.db.insert_hashes(sid,
                                      set([(x[0], int(x[1])) for x in hashes]))
                self.db.set_song_fingerprinted(sid)
                self.get_fingerprinted_songs()
                logging.getLogger('dejavu').info(song_name +
                                                 " inserted in database")
        pool.close()
        pool.join()
Пример #6
0
    def fingerprint_directory(self, nprocesses=None):
        extensions = [".mp3"]
        # Try to use the maximum amount of processes if not given.

        try:
            nprocesses = nprocesses or multiprocessing.cpu_count()
        except NotImplementedError:
            nprocesses = 1
        else:
            nprocesses = 1 if nprocesses <= 0 else nprocesses

        pool = multiprocessing.Pool(nprocesses)

        filenames_to_fingerprint = []
        for filename, _ in decoder.find_files(self.config['fingerprint'].get('folder'), extensions):

            # don't refingerprint already fingerprinted files
            if decoder.unique_hash(filename) in self.songhashes_set:
                print "%s already fingerprinted, continuing..." % filename
                continue

            filenames_to_fingerprint.append(filename)

        # Prepare _fingerprint_worker input
        worker_input = zip(filenames_to_fingerprint,
                           [self.limit] * len(filenames_to_fingerprint))

        # Send off our tasks
        iterator = pool.imap_unordered(_fingerprint_worker,
                                       worker_input)

        # Loop till we have all of them
        while True:
            try:
                song_name, hashes, file_hash, cdate = iterator.next()
            except multiprocessing.TimeoutError:
                continue
            except StopIteration:
                break
            except:
                print("Failed fingerprinting")
                # Print traceback because we can't reraise it here
                traceback.print_exc(file=sys.stdout)
            else:
                sid = self.db.insert_song(song_name, file_hash, cdate, self.config['fingerprint']['id'])
                self.db.insert_hashes(sid, hashes)
                self.db.set_song_fingerprinted(sid)
                self.get_fingerprinted_songs()

        pool.close()
        pool.join()
Пример #7
0
    def fingerprint_file(self, filepath, output_dir=None, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        song_name, hashes, file_hash = _fingerprint_worker(filepath,
                                                           self.limit,
                                                           song_name=song_name)

        f = open(os.path.join(output_dir, song_name + '.fingerp'), 'w')
        sorted_hashes = sorted(list(hashes), key=lambda t: t[1])
        for t in sorted_hashes:
            f.write(str(t[0]) + " - " + str(t[1]) + "\n")
            f.flush()
        f.close()
Пример #8
0
    def fingerprint_directory(self, path, extensions, nprocesses=None):
        # Try to use the maximum amount of processes if not given.
        try:
            nprocesses = nprocesses or multiprocessing.cpu_count()
        except NotImplementedError:
            nprocesses = 1
        else:
            nprocesses = 1 if nprocesses <= 0 else nprocesses

        pool = multiprocessing.Pool(nprocesses)

        filenames_to_fingerprint = []
        for filename, _ in decoder.find_files(path, extensions):
            # don't refingerprint already fingerprinted files
            if self.db.get_song_by_hash(
                    decoder.unique_hash(filename)) is not None:
                logger.info("%s already fingerprinted, continuing..." %
                            filename)
                continue

            filenames_to_fingerprint.append(filename)

        # Prepare _fingerprint_worker input
        worker_input = list(
            zip(filenames_to_fingerprint,
                [self.limit] * len(filenames_to_fingerprint)))

        # Send off our tasks
        iterator = pool.imap_unordered(_fingerprint_worker, worker_input)

        # Loop till we have all of them
        while True:
            try:
                song_name, hashes, file_hash = next(iterator)
            except multiprocessing.TimeoutError:
                continue
            except StopIteration:
                break
            except:
                logger.error("Failed fingerprinting")
                # Print traceback because we can't reraise it here
                traceback.print_exc(file=sys.stdout)
            else:
                sid = self.db.insert_song(song_name, file_hash)

                self.db.insert_hashes(sid, hashes)
                self.db.set_song_fingerprinted(sid)

        pool.close()
        pool.join()
Пример #9
0
    def fingerprint_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        file_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if self.db.get_song_by_hash(file_hash) is not None:
            logger.info("%s already fingerprinted, continuing..." % song_name)
        else:
            song_name, hashes, file_hash = _fingerprint_worker(
                filepath, self.limit, song_name=song_name)
            sid = self.db.insert_song(song_name, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
Пример #10
0
    def fingerprint_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            print "%s already fingerprinted, continuing..." % song_name
        else:
            song_name, hashes, file_hash = _fingerprint_worker(filepath, self.limit, song_name=song_name)
            sid = self.db.insert_song(song_name, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
Пример #11
0
    def fingerprint_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            print "%s already fingerprinted, continuing..." % song_name
        else:
            song_name, hashes, file_hash = _fingerprint_worker(
                filepath, self.limit, song_name=song_name)
            sid = self.db.insert_song(song_name, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
Пример #12
0
    def fingerprint_translation_record_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname

        if song_hash in self.songhashes_set:
            msg = "%s already fingerprinted, continuing..." % song_name
            print msg
            self.logger.info(msg)
        else:
            song_name, hashes, file_hash = _fingerprint_worker(
                filepath, self.limit, song_name=song_name)

            sid = self.db.insert_song(song_name, file_hash)

            self.db.insert_repeat_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
Пример #13
0
    def fingerprint_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            print "%s already fingerprinted, continuing..." % song_name
        else:
            filename, song_name, hashes, file_hash = _fingerprint_worker(
                filepath, self.limit, song_name=song_name)
            tags = self.get_song_metadata(filename)
            title = tags.title or filename
            artist = tags.artist or ""
            genre = tags.genre or ""
            sid = self.db.insert_song(title, artist, genre, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
Пример #14
0
    def fingerprint_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            logging.getLogger('dejavu').warn(
                "%s already fingerprinted, continuing..." % song_name)
        else:
            song_name, hashes, file_hash, audio_length = _fingerprint_worker(
                filepath, self.limit, song_name=song_name)
            logging.getLogger('dejavu').debug("Inserting " + song_name +
                                              " in database")
            sid = self.db.insert_song(song_name, file_hash, audio_length)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
            logging.getLogger('dejavu').info(song_name +
                                             " inserted in database")
Пример #15
0
    def fingerprint_file(self, filepath, song_name=None, creative_id=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname

        # get duration in seconds
        f = sf.SoundFile(filepath)
        duration = '{}'.format(len(f) / f.samplerate)

        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            raise Exception("%s already fingerprinted." % song_name)
        else:
            song_name, hashes, file_hash = _fingerprint_worker(
                filepath, self.limit, song_name=song_name)
            sid = self.db.insert_song(song_name, file_hash, duration,
                                      creative_id)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
Пример #16
0
    def fingerprint_file(self, filepath, song_name=None):
        #print "fingerprint_file"
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        #song_hash = "_"
        print song_hash
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            msg = "%s already fingerprinted, continuing..." % song_name
            print msg
            self.logger.info(msg)
        else:
            song_name, hashes, file_hash = _fingerprint_worker(
                filepath, self.limit, song_name=song_name)
            #print hashes

            sid = self.db.insert_song(song_name, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
Пример #17
0
    def fingerprint_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            dejavu.shared.UITEXTLOGGER.emit(
                "%s already fingerprinted, continuing..." % song_name)
            print("%s already fingerprinted, continuing..." % song_name)
        else:
            song_name, hashes, file_hash = _fingerprint_worker(
                filepath, self.limit, song_name=song_name)
            dejavu.shared.UITEXTLOGGER.emit(
                "Saving finger prints to Database for %s" % song_name)
            print("Saving finger prints to Database for %s" % song_name)
            sid = self.db.insert_song(song_name, file_hash)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()
            dejavu.shared.UITEXTLOGGER.emit(
                "Finished saving finger prints to Database for %s" % song_name)
            print("Finished saving finger prints to Database for %s" %
                  song_name)
Пример #18
0
    def fingerprint_file(self, filepath, song_name=None):
        songname = decoder.path_to_songname(filepath)
        song_hash = decoder.unique_hash(filepath)
        song_name = song_name or songname
        # don't refingerprint already fingerprinted files
        if song_hash in self.songhashes_set:
            print "%s already fingerprinted, continuing..." % song_name
            return Dejavu.FINGERPRINT_STATUS_FILE_EXISTED
        else:
            song_name, hashes, file_hash, audio_length = _fingerprint_worker(
                filepath,
                self.limit,
                song_name=song_name
            )
            sid = self.db.insert_song(song_name, file_hash, audio_length)

            # Added by 9nehS as a workaround for issue https://github.com/worldveil/dejavu/issues/142
            hashes = _convert_hashes(hashes)

            self.db.insert_hashes(sid, hashes)
            self.db.set_song_fingerprinted(sid)
            self.get_fingerprinted_songs()

            return Dejavu.FINGERPRINT_STATUS_SUCCESS
Пример #19
0
def _fingerprint_worker(filename, limit=None, song_name=None):
    # Pool.imap sends arguments as tuples so we have to unpack
    # them ourself.
    try:
        filename, limit = filename
    except ValueError:
        pass

    songname, extension = os.path.splitext(os.path.basename(filename))
    song_name = song_name or songname
    file_hash = decoder.unique_hash(filename)
    channels, Fs = decoder.read(filename, limit)
    result = set()
    channel_amount = len(channels)

    for channeln, channel in enumerate(channels):
        logger.info(("Fingerprinting channel %d/%d for %s" %
                     (channeln + 1, channel_amount, filename)))
        hashes = fingerprint.fingerprint(channel, Fs=Fs)
        logger.info(("Finished channel %d/%d for %s" %
                     (channeln + 1, channel_amount, filename)))
        result |= set(hashes)

    return song_name, result, file_hash
Пример #20
0
 def check_if_fingerprinted(self):
     video_hash = unique_hash(self.filename)
     # Set self.djv.songhashes_set
     fingerprinted_video_hashes = self.djv.get_fingerprinted_songs()
     return video_hash in self.djv.songhashes_set
Пример #21
0
 def check_if_fingerprinted(self):
     video_hash = unique_hash(self.filename)
     # Set self.djv.songhashes_set
     fingerprinted_video_hashes = self.djv.get_fingerprinted_songs()
     return video_hash in self.djv.songhashes_set
Пример #22
0
    def fingerprint_translation_record_directory(self,
                                                 path,
                                                 extensions,
                                                 nprocesses=None):

        msg = "Starting fingerprint translation record directory"
        print msg
        self.logger.info(msg)

        # Try to use the maximum amount of processes if not given.
        try:
            nprocesses = nprocesses or multiprocessing.cpu_count()
        except NotImplementedError:
            nprocesses = 1
        else:
            nprocesses = 1 if nprocesses <= 0 else nprocesses

        pool = multiprocessing.Pool(nprocesses)

        filenames_to_fingerprint = []
        for filename, _ in decoder.find_files(path, extensions):

            basename = os.path.basename(filename)
            if "_recording" in basename:
                continue
            # don't refingerprint already fingerprinted files
            if decoder.unique_hash(filename) in self.songhashes_set:
                msg = "%s already fingerprinted, continuing..." % filename
                print msg
                self.logger.info(msg)
                #os.remove(filename)
                continue

            filenames_to_fingerprint.append(filename)

        # Prepare _fingerprint_worker input
        worker_input = zip(filenames_to_fingerprint,
                           [self.limit] * len(filenames_to_fingerprint))

        # Send off our tasks
        iterator = pool.imap_unordered(_fingerprint_worker, worker_input)

        # Loop till we have all of them
        while True:
            try:
                song_name, hashes, file_hash = iterator.next()
            except multiprocessing.TimeoutError:
                continue
            except StopIteration:
                break
            except:
                msg = "Failed fingerprinting"
                print msg
                self.logger.info(msg)
                # Print traceback because we can't reraise it here
                traceback.print_exc(file=sys.stdout)
            else:
                sid = self.db.insert_song(song_name, file_hash)

                self.db.insert_repeat_hashes(sid, hashes)
                self.db.set_song_fingerprinted(sid)
                self.get_fingerprinted_songs()

        for filename in filenames_to_fingerprint:
            os.remove(filename)

        pool.close()
        pool.join()
Пример #23
0
    def fingerprint_directory(self, path, extensions, nprocesses=None):
        '''对音频进行编码,并存到数据库中,每个音频先分成声道,再对每个声道按照一节一节的编码
            Args:
                path: 音频的路径
                extensions: 扩展名
                nprocesses: 多进程的数量
        '''
        # Try to use the maximum amount of processes if not given.
        try:
            nprocesses = nprocesses or multiprocessing.cpu_count(
            )  # 返回当前系统有多少个cpu
        except NotImplementedError:
            nprocesses = 1
        else:
            nprocesses = 1 if nprocesses <= 0 else nprocesses

        pool = multiprocessing.Pool(nprocesses)  # 进程池

        filenames_to_fingerprint = []  # 需要进行fingerprint的音频路径
        for filename, _ in decoder.find_files(path, extensions):

            # don't refingerprint already fingerprinted files
            if decoder.unique_hash(
                    filename
            ) in self.songhashes_set:  # 对当前音频进行sha1编码,然后和所有歌曲的sha1编码list对比,有则不重复编码
                print("%s already fingerprinted, continuing..." % filename)
                continue

            filenames_to_fingerprint.append(filename)

        # Prepare _fingerprint_worker input
        # worker_input为要进行fingerprint的zip元组
        worker_input = zip(
            filenames_to_fingerprint, [self.limit] *
            len(filenames_to_fingerprint))  # [None]*2==[None, None]

        # Send off our tasks
        # 每个多进程处理一首歌,进程数与cpu数相同,比如有5首歌,开了4个进程,这个4个进程处理完4首歌后,再从进程池里拿一个进程处理最后一首歌
        iterator = pool.imap_unordered(
            _fingerprint_worker, worker_input
        )  # imap_unordered和map类似,第一个参数是函数,第二个参数是迭代器,将迭代器中的数放到函数里执行

        # Loop till we have all of them
        while True:
            try:
                # _fingerprint_worker()对每个音频编码的结果
                song_name, hashes, file_hash = iterator.next()
            except multiprocessing.TimeoutError:
                continue
            except StopIteration:
                break
            except:
                print("Failed fingerprinting")
                # Print traceback because we can't reraise it here
                traceback.print_exc(file=sys.stdout)
            else:
                # 这个地方就可以把song_name和file_hash和hashes存到文本给洪宁,不过还得看一下,他是怎么听歌识曲的
                sid = self.db.insert_song(
                    song_name, file_hash)  # 将音频name和使用sha1对音频编码存到song表里

                self.db.insert_hashes(
                    sid,
                    hashes)  # 将插入song表中对应的id和hashes存到fingerprint表中,这样sid就是外键
                self.db.set_song_fingerprinted(
                    sid)  # 将song表中用于标志一首歌的指纹被存到数据库中的属性,改为1
                self.get_fingerprinted_songs()

        pool.close()
        pool.join()