def test(): button_wavsound = wavsound("button.wav") haystackss = [] # split database into list of smaller database keynames = [] db_size = 300 # Set Database Size num_split_db = 2 # Set number of split databases size_split_db = int(db_size / num_split_db) for i in range(num_split_db): haystackss.append([]) counter = 0 for i in range(db_size): split_db_key = int(counter / size_split_db) keynames.append(i) haystackss[split_db_key].append(haystack(i, button_wavsound.get_data())) counter += 1 # haystacks.append(haystack("7",[1, 2, 3, 4, 5])) button_needle_factory = needlestorage(button_wavsound, 1000, 50) emissions = [] print("USING MAP PROCESS and Manager") needles = button_needle_factory.get_needles() print(needles[0]) manager = Manager() return_emissions = manager.dict() jobs = [] pnum = 0 # number of needles not size of each needle len_needles = len(needles) print("Number of Needles: ", len_needles) start_time = time.time() for needle in needles: for haystacks in haystackss: p = Process( target=calltomapper, args=(haystacks, needle, pnum, len_needles * num_split_db, return_emissions) ) jobs.append(p) p.start() pnum += 1 print(time.time() - start_time) for proc in jobs: proc.join() # wait for each process to end completely print(time.time() - start_time) emissions_list = sum(return_emissions.values(), []) print("Reduce Result:") print(haystackreducer(emissions_list, keynames)) print("Done") print(time.time() - start_time) """ This is a pool implementation of parallel processing, it has been commented out as it was slower than the Process method print(button_wavsound) print("Utilizing MapReduce Pattern") pool = Pool(2) # if it is a quad-core machine it can be set to 4 print(button_needle_factory.get_needles()) emissions = pool.map(haystackmap.mapper, button_needle_factory.get_needles()) print(emissions) print(haystackreducer(sum(emissions,[]))) emissions = [] """ """ The algorithm below is a serial method, no optimization """ """
def test(): button_wavsound = wavsound('button.wav') haystackss = [] # split database into list of smaller database keynames = [] db_size = 300 # Set Database Size num_split_db = 2 # Set number of split databases size_split_db = int(db_size / num_split_db) for i in range(num_split_db): haystackss.append([]) counter = 0 for i in range(db_size): split_db_key = int(counter / size_split_db) keynames.append(i) haystackss[split_db_key].append(haystack(i, button_wavsound.get_data())) counter += 1 #haystacks.append(haystack("7",[1, 2, 3, 4, 5])) button_needle_factory = needlestorage(button_wavsound, 1000, 50) emissions = [] print("USING MAP PROCESS and Manager") needles = button_needle_factory.get_needles() print(needles[0]) manager = Manager() return_emissions = manager.dict() jobs = [] pnum = 0 # number of needles not size of each needle len_needles = len(needles) print("Number of Needles: ", len_needles) start_time = time.time() for needle in needles: for haystacks in haystackss: p = Process(target=calltomapper, args=(haystacks, needle, pnum, len_needles * num_split_db, return_emissions)) jobs.append(p) p.start() pnum += 1 print(time.time() - start_time) for proc in jobs: proc.join() # wait for each process to end completely print(time.time() - start_time) emissions_list = sum(return_emissions.values(), []) print("Reduce Result:") print(haystackreducer(emissions_list, keynames)) print("Done") print(time.time() - start_time) """ This is a pool implementation of parallel processing, it has been commented out as it was slower than the Process method print(button_wavsound) print("Utilizing MapReduce Pattern") pool = Pool(2) # if it is a quad-core machine it can be set to 4 print(button_needle_factory.get_needles()) emissions = pool.map(haystackmap.mapper, button_needle_factory.get_needles()) print(emissions) print(haystackreducer(sum(emissions,[]))) emissions = [] """ """ The algorithm below is a serial method, no optimization """ """
def run(): """ run runs the database search taking three user inputs, the query wav file, number of partitions, and number of partition samples""" good_file = 0 while (good_file == 0): query = raw_input( "Submit .wav file to search against database (Example: button.wav): " ) if (os.path.isfile(query)): good_file = 1 #Instantiate Wavsound objects from the wav files t_wavsounds = {} query_wavsound = wavsound(query) print( "\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n" ) partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data()) / 3)) + ": ") samples = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ") # Database Structure haystacks = [] # Database look up directory rootdir = 'db' for subdir, __, files in os.walk(rootdir): for file in files: # for debug print (subdir+"/"+file) t_wavsounds[subdir + "/" + file] = wavsound(subdir + "/" + file) # for debug print(t_wavsounds[subdir+"/"+file]) haystacks.append( haystack(subdir + "/" + file, t_wavsounds[subdir + "/" + file].get_data())) query_needle_factory = needlestorage(query_wavsound, int(partition), int(samples)) haystackmap = haystackmapper(haystacks) needles = query_needle_factory.get_needles() len_needles = len(needles) len_needle = len(needles[0]) # size is the same for all needles manager = Manager() # Map processes emit key-value pairs to emissions return_emissions = manager.dict() # Job is a list of processes jobs = [] # Process number pnum = 0 print "Number of Needles: ", len(needles) # Database query time start_time = time.time() #Distribute processes using multiprocessor for needle in needles: p = Process(target=calltomapper, args=(haystackmap, needle, pnum, len_needles, return_emissions)) jobs.append(p) p.start() pnum += 1 for proc in jobs: proc.join() # flatten return_emissions into a list emissions_list = sum(return_emissions.values(), []) print "Search Result:" result_dict = haystackreducer(emissions_list) # Tabulate % match (wav files with 0% match are excluded from the result) for key in result_dict: print str(key), ": ", (25 - len(str(key))) * " ", str("{0:.2f}".format( int(result_dict[key]) / len(needles) * 100)), "% match" # Show search time timelapse_parallel = time.time() - start_time print timelapse_parallel, "seconds"
def calltoreducer(emissions, key, join): join[key] = haystackreducer(emissions)
haystacks = [] haystacks.append(haystack("0",[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) haystacks.append(haystack("1",[3, 2, 2, 3, 4, 3, 6, 7, 8, 9])) haystacks.append(haystack("2",[1, 6, -1, 0, 4, 0, 6, 7, 8, 9])) haystacks.append(haystack("3",[3, 3, 3])) haystackmap = haystackmapper(haystacks) emissions= [] print("USING MAP POOL") pool = Pool(2) # if it is a quad-core machine it can be set to 4 emissions = pool.map(haystackmap.mapper, [[2],[3]]) print(emissions) print(haystackreducer(sum(emissions,[]))) emissions= [] print("USING MAP PROCESS") p = Process(target=simplefunction, args=(1,2)) p.start() p = Process(target=simplefunction, args=(1,3)) p.start() p = Process(target=simplefunction, args=(1,4)) p.start() p.join() print("USING MAP PROCESS WITH MANAGER") needles = [2, 3] manager = Manager() return_emissions = manager.dict()
def run(): """ run runs the database search taking three user inputs, the query wav file, number of partitions, and number of partition samples""" good_file = 0 while (good_file == 0): query = raw_input("Submit .wav file to search against database (Example: button.wav): ") if (os.path.isfile(query)): good_file = 1 #Instantiate Wavsound objects from the wav files t_wavsounds = {} query_wavsound = wavsound(query) print("\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n") partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data())/3))+": ") samples = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ") # Database Structure haystacks = [] # Database look up directory rootdir = 'db' for subdir, __, files in os.walk(rootdir): for file in files: # for debug print (subdir+"/"+file) t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file) # for debug print(t_wavsounds[subdir+"/"+file]) haystacks.append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data())) query_needle_factory = needlestorage(query_wavsound,int(partition),int(samples)) haystackmap = haystackmapper(haystacks) needles = query_needle_factory.get_needles() len_needles = len(needles) len_needle = len(needles[0]) # size is the same for all needles manager = Manager() # Map processes emit key-value pairs to emissions return_emissions = manager.dict() # Job is a list of processes jobs = [] # Process number pnum = 0 print "Number of Needles: ", len(needles) # Database query time start_time = time.time() #Distribute processes using multiprocessor for needle in needles: p = Process(target=calltomapper, args=(haystackmap,needle,pnum,len_needles,return_emissions)) jobs.append(p) p.start() pnum += 1 for proc in jobs: proc.join() # flatten return_emissions into a list emissions_list = sum(return_emissions.values(),[]) print "Search Result:" result_dict = haystackreducer(emissions_list) # Tabulate % match (wav files with 0% match are excluded from the result) for key in result_dict: print str(key),": ",(25-len(str(key)))*" ",str("{0:.2f}".format(int(result_dict[key])/len(needles)*100)),"% match" # Show search time timelapse_parallel = time.time() - start_time print timelapse_parallel, "seconds"