示例#1
0
    def __init__(self):
        """ Main routine for the command-line interface to audfprint """
        # Other globals set from command line
        args = config_dict
        # values = ["--dbase", "fpdbase.pklz"]
        # args = docopt.docopt(USAGE, version=__version__, argv=values)
        analyzer = setup_analyzer(args)
        precomp_type = 'hashes'

        # For everything other than precompute, we need a database name
        # Check we have one
        dbasename = args['--dbase']
        # Load existing hash table file (add, match, merge)
        hash_tab = hash_table.HashTable(dbasename)
        if analyzer and 'samplerate' in hash_tab.params \
            and hash_tab.params['samplerate'] != analyzer.target_sr:
            # analyzer.target_sr = hash_tab.params['samplerate']
            print("db samplerate overridden to ", analyzer.target_sr)

        # Create a matcher
        matcher = setup_matcher(args)

        self.args = args
        self.analyzer = analyzer
        self.matcher = matcher
        self.hash_table = hash_tab
        self.precomp_type = precomp_type
示例#2
0
def local_tester():
    test_fn = '/Users/dpwe/Downloads/carol11k.wav'
    test_ht = hash_table.HashTable()
    test_analyzer = Analyzer()

    test_analyzer.ingest(test_ht, test_fn)
    test_ht.save('httest.pklz')
示例#3
0
def make_ht_from_list(analyzer, filelist, hashbits, depth, maxtime, pipe=None):
    """ Populate a hash table from a list, used as target for
        multiprocess division.  pipe is a pipe over which to push back
        the result, else return it """
    # Create new ht instance
    ht = hash_table.HashTable(hashbits=hashbits, depth=depth, maxtime=maxtime)
    # Add in the files
    for filename in filelist:
        hashes = analyzer.wavfile2hashes(filename)
        ht.store(filename, hashes)
    # Pass back to caller
    if pipe:
        pipe.send(ht)
    else:
        return ht
示例#4
0
def glob2hashtable(pattern, density=20.0):
    """ Build a hash table from the files matching a glob pattern """
    global g2h_analyzer
    if g2h_analyzer == None:
        g2h_analyzer = Analyzer(density=density)

    ht = hash_table.HashTable()
    filelist = glob.glob(pattern)
    initticks = time.clock()
    totdur = 0.0
    tothashes = 0
    for ix, file_ in enumerate(filelist):
        print(time.ctime(), "ingesting #", ix, ":", file_, "...")
        dur, nhash = g2h_analyzer.ingest(ht, file_)
        totdur += dur
        tothashes += nhash
    elapsedtime = time.clock() - initticks
    print("Added", tothashes, "(", tothashes / float(totdur),
          "hashes/sec) at ", elapsedtime / totdur, "x RT")
    return ht
示例#5
0
def main(argv):
    """ Main routine for the command-line interface to audfprint """
    # Other globals set from command line
    args = docopt.docopt(USAGE, version=__version__, argv=argv[1:])

    # Figure which command was chosen
    poss_cmds = [
        'new', 'add', 'precompute', 'merge', 'newmerge', 'match', 'list',
        'remove'
    ]
    cmdlist = [cmdname for cmdname in poss_cmds if args[cmdname]]
    if len(cmdlist) != 1:
        raise ValueError("must specify exactly one command")
    # The actual command as a str
    cmd = cmdlist[0]

    # Setup output function
    report = setup_reporter(args)

    # Keep track of wall time
    initticks = time.clock()

    # Command line sanity.
    if args["--maxtimebits"]:
        args["--maxtimebits"] = int(args["--maxtimebits"])
    else:
        args["--maxtimebits"] = hash_table._bitsfor(int(args["--maxtime"]))

    # Setup the analyzer if we're using one (i.e., unless "merge")
    analyzer = setup_analyzer(args) if not (cmd is "merge" or cmd is "newmerge"
                                            or cmd is "list"
                                            or cmd is "remove") else None

    precomp_type = 'hashes'

    # Set up the hash table, if we're using one (i.e., unless "precompute")
    if cmd is not "precompute":
        # For everything other than precompute, we need a database name
        # Check we have one
        dbasename = args['--dbase']
        if not dbasename:
            raise ValueError("dbase name must be provided if not precompute")
        if cmd == "new" or cmd == "newmerge":
            # Check that the output directory can be created before we start
            ensure_dir(os.path.split(dbasename)[0])
            # Create a new hash table
            hash_tab = hash_table.HashTable(
                hashbits=int(args['--hashbits']),
                depth=int(args['--bucketsize']),
                maxtime=(1 << int(args['--maxtimebits'])))
            # Set its samplerate param
            if analyzer:
                hash_tab.params['samplerate'] = analyzer.target_sr

        else:
            # Load existing hash table file (add, match, merge)
            if args['--verbose']:
                report([time.ctime() + " Reading hash table " + dbasename])
            hash_tab = hash_table.HashTable(dbasename)
            if analyzer and 'samplerate' in hash_tab.params \
                   and hash_tab.params['samplerate'] != analyzer.target_sr:
                # analyzer.target_sr = hash_tab.params['samplerate']
                print("db samplerate overridden to ", analyzer.target_sr)
    else:
        # The command IS precompute
        # dummy empty hash table
        hash_tab = None
        if args['--precompute-peaks']:
            precomp_type = 'peaks'

    # Create a matcher
    matcher = setup_matcher(args) if cmd == 'match' else None

    filename_iter = filename_list_iterator(args['<file>'], args['--wavdir'],
                                           args['--wavext'], args['--list'])

    #######################
    # Run the main commmand
    #######################

    # How many processors to use (multiprocessing)
    ncores = int(args['--ncores'])
    if ncores > 1 and not (cmd == "merge" or cmd == "newmerge" or cmd == "list"
                           or cmd == "remove"):
        # merge/newmerge/list/remove are always single-thread processes
        do_cmd_multiproc(cmd,
                         analyzer,
                         hash_tab,
                         filename_iter,
                         matcher,
                         args['--precompdir'],
                         precomp_type,
                         report,
                         skip_existing=args['--skip-existing'],
                         strip_prefix=args['--wavdir'],
                         ncores=ncores)
    else:
        do_cmd(cmd,
               analyzer,
               hash_tab,
               filename_iter,
               matcher,
               args['--precompdir'],
               precomp_type,
               report,
               skip_existing=args['--skip-existing'],
               strip_prefix=args['--wavdir'])

    elapsedtime = time.clock() - initticks
    if analyzer and analyzer.soundfiletotaldur > 0.:
        log_format = "Processed {} files ({} s total dur) in {} s sec = {} x RT"
        print(
            log_format.format(analyzer.soundfilecount,
                              analyzer.soundfiletotaldur, elapsedtime,
                              (elapsedtime / analyzer.soundfiletotaldur)))

    # Save the hash table file if it has been modified
    if hash_tab and hash_tab.dirty:
        # We already created the directory, if "new".
        hash_tab.save(dbasename)
示例#6
0
def do_cmd(cmd,
           analyzer,
           hash_tab,
           filename_iter,
           matcher,
           outdir,
           type,
           report,
           skip_existing=False,
           strip_prefix=None):
    """ Breaks out the core part of running the command.
        This is just the single-core versions.
    """
    if cmd == 'merge' or cmd == 'newmerge':
        # files are other hash tables, merge them in
        for filename in filename_iter:
            hash_tab2 = hash_table.HashTable(filename)
            if "samplerate" in hash_tab.params:
                assert hash_tab.params["samplerate"] == hash_tab2.params[
                    "samplerate"]
            else:
                # "newmerge" fails to setup the samplerate param
                hash_tab.params["samplerate"] = hash_tab2.params["samplerate"]
            hash_tab.merge(hash_tab2)

    elif cmd == 'precompute':
        # just precompute fingerprints, single core
        for filename in filename_iter:
            report(
                file_precompute(analyzer,
                                filename,
                                outdir,
                                type,
                                skip_existing=skip_existing,
                                strip_prefix=strip_prefix))

    elif cmd == 'match':
        # Running query, single-core mode
        for num, filename in enumerate(filename_iter):
            msgs = matcher.file_match_to_msgs(analyzer, hash_tab, filename,
                                              num)
            msgs = msgs.message_list
            report(msgs)

    elif cmd == 'new' or cmd == 'add':
        # Adding files
        tothashes = 0
        ix = 0
        for filename in filename_iter:
            report([
                time.ctime() + " ingesting #" + str(ix) + ": " + filename +
                " ..."
            ])
            dur, nhash = analyzer.ingest(hash_tab, filename)
            tothashes += nhash
            ix += 1

        report([
            "Added " + str(tothashes) + " hashes " + "(%.1f" %
            (tothashes / float(analyzer.soundfiletotaldur)) + " hashes/sec)"
        ])
    elif cmd == 'remove':
        # Removing files from hash table.
        for filename in filename_iter:
            hash_tab.remove(filename)

    elif cmd == 'list':
        hash_tab.list(lambda x: report([x]))

    else:
        raise ValueError("unrecognized command: " + cmd)
示例#7
0
# Keep track of wall time
initticks = time.clock()

precomp_type = 'hashes'
dbasename = args['--dbase']

# Command line sanity.
if args["--maxtimebits"]:
    args["--maxtimebits"] = int(args["--maxtimebits"])
else:
    args["--maxtimebits"] = hash_table._bitsfor(int(args["--maxtime"]))

if cmd in ["new", "newmerge"]:
    hash_tab = hash_table.HashTable(hashbits=int(args['--hashbits']),
                                    depth=int(args['--bucketsize']),
                                    maxtime=(1 << int(args['--maxtimebits'])))
else:
    hash_tab = hash_table.HashTable(dbasename)

# Create a matcher
matcher = afp.setup_matcher(args)
filename_iter = afp.filename_list_iterator(args['<file>'], args['--wavdir'],
                                           args['--wavext'], args['--list'])

# How many processors to use (multiprocessing)
ncores = int(args['--ncores'])
if ncores > 1:  # not for merge, list and remove
    # merge/newmerge/list/remove are always single-thread processes
    afp.do_cmd_multiproc(cmd,
                         analyzer,
示例#8
0
 def new_hashtable(self):
     hash_tab = hash_table.HashTable(hashbits=int(self.args['--hashbits']),
                                     depth=int(self.args['--bucketsize']),
                                     maxtime=int(self.args['--maxtime']))
     return hash_tab