Python haystackreducer示例，haystackreducer.haystackreducer Python示例

示例#1

0

显示文件

文件： dbtest.py 项目： hyunwookshin/audiosearchengine

def test():
    button_wavsound = wavsound("button.wav")

    haystackss = []  # split database into list of smaller database
    keynames = []
    db_size = 300  # Set Database Size
    num_split_db = 2  # Set number of split databases
    size_split_db = int(db_size / num_split_db)

    for i in range(num_split_db):
        haystackss.append([])

    counter = 0
    for i in range(db_size):
        split_db_key = int(counter / size_split_db)
        keynames.append(i)
        haystackss[split_db_key].append(haystack(i, button_wavsound.get_data()))
        counter += 1

    # haystacks.append(haystack("7",[1, 2, 3, 4, 5]))

    button_needle_factory = needlestorage(button_wavsound, 1000, 50)
    emissions = []

    print("USING MAP PROCESS and Manager")

    needles = button_needle_factory.get_needles()
    print(needles[0])

    manager = Manager()
    return_emissions = manager.dict()
    jobs = []
    pnum = 0

    # number of needles not size of each needle
    len_needles = len(needles)
    print("Number of Needles: ", len_needles)
    start_time = time.time()

    for needle in needles:
        for haystacks in haystackss:
            p = Process(
                target=calltomapper, args=(haystacks, needle, pnum, len_needles * num_split_db, return_emissions)
            )
            jobs.append(p)
            p.start()
            pnum += 1
    print(time.time() - start_time)

    for proc in jobs:
        proc.join()  # wait for each process to end completely

    print(time.time() - start_time)
    emissions_list = sum(return_emissions.values(), [])
    print("Reduce Result:")
    print(haystackreducer(emissions_list, keynames))
    print("Done")
    print(time.time() - start_time)

    """
    This is a pool implementation of parallel processing, it has been 
    commented out as it was slower than the Process method
    
    print(button_wavsound)
    print("Utilizing MapReduce Pattern")
    
    pool = Pool(2) # if it is a quad-core machine it can be set to 4
    print(button_needle_factory.get_needles())
    emissions = pool.map(haystackmap.mapper, button_needle_factory.get_needles())
    print(emissions) 
    print(haystackreducer(sum(emissions,[])))    
    
    emissions = []
    """

    """ The algorithm below is a serial method, no optimization """
    """

示例#2

0

显示文件

文件： dbtest.py 项目： lookmyeye/audiosearchengine

def test():
    button_wavsound = wavsound('button.wav')

    haystackss = []  # split database into list of smaller database
    keynames = []
    db_size = 300  # Set Database Size
    num_split_db = 2  # Set number of split databases
    size_split_db = int(db_size / num_split_db)

    for i in range(num_split_db):
        haystackss.append([])

    counter = 0
    for i in range(db_size):
        split_db_key = int(counter / size_split_db)
        keynames.append(i)
        haystackss[split_db_key].append(haystack(i,
                                                 button_wavsound.get_data()))
        counter += 1

    #haystacks.append(haystack("7",[1, 2, 3, 4, 5]))

    button_needle_factory = needlestorage(button_wavsound, 1000, 50)
    emissions = []

    print("USING MAP PROCESS and Manager")

    needles = button_needle_factory.get_needles()
    print(needles[0])

    manager = Manager()
    return_emissions = manager.dict()
    jobs = []
    pnum = 0

    # number of needles not size of each needle
    len_needles = len(needles)
    print("Number of Needles: ", len_needles)
    start_time = time.time()

    for needle in needles:
        for haystacks in haystackss:
            p = Process(target=calltomapper,
                        args=(haystacks, needle, pnum,
                              len_needles * num_split_db, return_emissions))
            jobs.append(p)
            p.start()
            pnum += 1
    print(time.time() - start_time)

    for proc in jobs:
        proc.join()  # wait for each process to end completely

    print(time.time() - start_time)
    emissions_list = sum(return_emissions.values(), [])
    print("Reduce Result:")
    print(haystackreducer(emissions_list, keynames))
    print("Done")
    print(time.time() - start_time)
    """
    This is a pool implementation of parallel processing, it has been 
    commented out as it was slower than the Process method
    
    print(button_wavsound)
    print("Utilizing MapReduce Pattern")
    
    pool = Pool(2) # if it is a quad-core machine it can be set to 4
    print(button_needle_factory.get_needles())
    emissions = pool.map(haystackmap.mapper, button_needle_factory.get_needles())
    print(emissions) 
    print(haystackreducer(sum(emissions,[])))    
    
    emissions = []
    """
    """ The algorithm below is a serial method, no optimization """
    """

示例#3

0

显示文件

文件： interface.py 项目： lookmyeye/audiosearchengine

def run():
    """ run runs the database search taking three user inputs, the query wav file,
    number of partitions, and number of partition samples"""

    good_file = 0

    while (good_file == 0):
        query = raw_input(
            "Submit .wav file to search against database (Example: button.wav): "
        )
        if (os.path.isfile(query)):
            good_file = 1

    #Instantiate Wavsound objects from the wav files
    t_wavsounds = {}
    query_wavsound = wavsound(query)
    print(
        "\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n"
    )
    partition = raw_input("Set number of partitions of the query from 1 to " +
                          str(int(len(query_wavsound.get_data()) / 3)) + ": ")
    samples = raw_input("Set number of samples of partitions from 1 to " +
                        partition + " (Recommend < 50): ")

    # Database Structure
    haystacks = []

    # Database look up directory
    rootdir = 'db'

    for subdir, __, files in os.walk(rootdir):
        for file in files:
            # for debug print (subdir+"/"+file)
            t_wavsounds[subdir + "/" + file] = wavsound(subdir + "/" + file)
            # for debug print(t_wavsounds[subdir+"/"+file])
            haystacks.append(
                haystack(subdir + "/" + file,
                         t_wavsounds[subdir + "/" + file].get_data()))

    query_needle_factory = needlestorage(query_wavsound, int(partition),
                                         int(samples))

    haystackmap = haystackmapper(haystacks)

    needles = query_needle_factory.get_needles()
    len_needles = len(needles)
    len_needle = len(needles[0])  # size is the same for all needles

    manager = Manager()

    # Map processes emit key-value pairs to emissions
    return_emissions = manager.dict()

    # Job is a list of processes
    jobs = []

    # Process number
    pnum = 0

    print "Number of Needles: ", len(needles)

    # Database query time
    start_time = time.time()

    #Distribute processes using multiprocessor
    for needle in needles:
        p = Process(target=calltomapper,
                    args=(haystackmap, needle, pnum, len_needles,
                          return_emissions))
        jobs.append(p)
        p.start()
        pnum += 1

    for proc in jobs:
        proc.join()

    # flatten return_emissions into a list
    emissions_list = sum(return_emissions.values(), [])

    print "Search Result:"

    result_dict = haystackreducer(emissions_list)

    # Tabulate % match (wav files with 0% match are excluded from the result)
    for key in result_dict:
        print str(key), ": ", (25 - len(str(key))) * " ", str("{0:.2f}".format(
            int(result_dict[key]) / len(needles) * 100)), "% match"

    # Show search time
    timelapse_parallel = time.time() - start_time
    print timelapse_parallel, "seconds"

示例#4

0

显示文件

文件： calltoreducer.py 项目： lookmyeye/audiosearchengine

def calltoreducer(emissions, key, join):
    join[key] = haystackreducer(emissions)

示例#5

0

显示文件

文件： mapreducetest.py 项目： lookmyeye/audiosearchengine

 
 haystacks = []
 haystacks.append(haystack("0",[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
 haystacks.append(haystack("1",[3, 2, 2, 3, 4, 3, 6, 7, 8, 9]))
 haystacks.append(haystack("2",[1, 6, -1, 0, 4, 0, 6, 7, 8, 9]))
 haystacks.append(haystack("3",[3, 3, 3]))
 
 haystackmap = haystackmapper(haystacks)
 
 emissions= []
 
 print("USING MAP POOL") 
 pool = Pool(2) # if it is a quad-core machine it can be set to 4
 emissions = pool.map(haystackmap.mapper, [[2],[3]])
 print(emissions) 
 print(haystackreducer(sum(emissions,[])))
 emissions= []
 
 print("USING MAP PROCESS")
 p = Process(target=simplefunction, args=(1,2))
 p.start()
 p = Process(target=simplefunction, args=(1,3))
 p.start()
 p = Process(target=simplefunction, args=(1,4))
 p.start()
 p.join()
 
 print("USING MAP PROCESS WITH MANAGER")
 needles = [2, 3]
 manager = Manager()
 return_emissions = manager.dict()

示例#6

0

显示文件

文件： interface.py 项目： hyunwookshin/audiosearchengine

def run():
    
    """ run runs the database search taking three user inputs, the query wav file,
    number of partitions, and number of partition samples"""
    
    good_file = 0
    
    while (good_file == 0):
        query     = raw_input("Submit .wav file to search against database (Example: button.wav): ")
        if (os.path.isfile(query)):
            good_file = 1
            
    #Instantiate Wavsound objects from the wav files
    t_wavsounds = {}
    query_wavsound = wavsound(query)    
    print("\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n")
    partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data())/3))+": ")
    samples   = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ")
    
    # Database Structure
    haystacks = []
    
    # Database look up directory
    rootdir    = 'db'
    
    for subdir, __, files in os.walk(rootdir):
        for file in files:
            # for debug print (subdir+"/"+file)
            t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file)
            # for debug print(t_wavsounds[subdir+"/"+file])
            haystacks.append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data()))
            
    query_needle_factory = needlestorage(query_wavsound,int(partition),int(samples))
    
    
    haystackmap = haystackmapper(haystacks)
    
    needles = query_needle_factory.get_needles()
    len_needles = len(needles)
    len_needle = len(needles[0]) # size is the same for all needles
    
    manager = Manager()
    
    # Map processes emit key-value pairs to emissions
    return_emissions = manager.dict()    
    
    # Job is a list of processes
    jobs = []
    
    # Process number
    pnum = 0
    
    print "Number of Needles: ", len(needles)
    
    # Database query time
    start_time = time.time()
    
    #Distribute processes using multiprocessor
    for needle in needles:
        p = Process(target=calltomapper, args=(haystackmap,needle,pnum,len_needles,return_emissions))
        jobs.append(p)
        p.start()
        pnum += 1
    
    for proc in jobs:
        proc.join() 
    
    # flatten return_emissions into a list
    emissions_list = sum(return_emissions.values(),[])
    
    print "Search Result:"    

    result_dict = haystackreducer(emissions_list)
    
    # Tabulate % match (wav files with 0% match are excluded from the result)
    for key in result_dict:
        print str(key),": ",(25-len(str(key)))*" ",str("{0:.2f}".format(int(result_dict[key])/len(needles)*100)),"% match"
    
    # Show search time
    timelapse_parallel = time.time() - start_time   
    print timelapse_parallel, "seconds"