def funcDoubleMapReduce(l): resultat = futures.mapReduce(func4, operator.add, l) resultat2 = futures.mapReduce(func4, operator.add, l) _control.execQueue.socket.pumpInfoSocket() return resultat == resultat2
def CompareMapReduce(): mapScoopTime = time.time() res = futures.mapReduce( simulateWorkload, operator.add, list([a] * a for a in range(1000)), ) mapScoopTime = time.time() - mapScoopTime print("futures.map in SCOOP executed in {0:.3f}s \ with result:{1}".format( mapScoopTime, res ) ) mapPythonTime = time.time() res = sum( map( simulateWorkload, list([a] * a for a in range(1000)) ) ) mapPythonTime = time.time() - mapPythonTime print("map Python executed in: {0:.3f}s \ with result: {1}".format( mapPythonTime, res ) )
def test_sum(): bound = 100000 data = [random.randint(-1000, 1000) for r in range(bound)] # Python's standard serial function start = time.time() serialSum = sum(map(abs, data)) print(start - time.time()) # SCOOP's parallel function start = time.time() parallelSum = futures.mapReduce(abs, operator.add, data) print(start - time.time()) assert serialSum == parallelSum
# GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with SCOOP. If not, see <http://www.gnu.org/licenses/>. # """ Sums the mutliples of 3 and 5 below 1000000 """ from time import time from scoop import futures from operator import add def multiples(n): return set(range(0, 1000000, n)) if __name__ == '__main__': bt = time() serial_result = sum(set.union(*map(multiples, [3, 5]))) serial_time = time() - bt bt = time() parallel_result = sum(futures.mapReduce(multiples, set.union, [3, 5])) parallel_reduce_time = time() - bt assert serial_result == parallel_result print("Serial time: {0:.4f} s\nParallel time: {1:.4f} s" "".format(serial_time, parallel_reduce_time) )
values are equal to the sum of values for each key""" # explicitly copy the dictionary, as otherwise # we risk modifying 'dict1' combined = {} for key in dict1.keys(): combined[key] = dict1[key] for key in dict2.keys(): if key in combined: combined[key] += dict2[key] else: combined[key] = dict2[key] return combined if __name__ == "__main__": # need to call it using python countwords.py ../shakespeare/* files = sys.argv[1:] total_dict = futures.mapReduce(count_words, reduce_dicts, files) words = sorted([k for k, v in total_dict.iteritems() if v > 2000]) def print_line(word, count): print word, ' = ', count map(print_line, words, [total_dict[x] for x in words]) # use Counter here?
from scoop import futures from functools import reduce import numpy as np def prod_modulo(a, b, c, d): """Computes (a + b) / (c + d)""" return (a % 7) * (b % 7) * (c % 7) * (d % 7) def subtract(x, y): """Computes x - y""" return x - y if __name__ == "__main__": a = np.random.randint(-10, 10, size=(100, )) b = np.random.randint(-10, 10, size=(100, )) c = np.random.randint(-10, 10, size=(100, )) d = np.random.randint(-10, 10, size=(100, )) out = futures.mapReduce(prod_modulo, subtract, a, b, c, d) print("Result: ", out)
def RSS(index): # Get the data interval to compute on a given Future data = zip(leftSignal[index:index+PARALLEL_SIZE], rightSignal[index:index+PARALLEL_SIZE]) return sum(abs(y - x)**2 for y, x in data) if __name__ == "__main__": # Parallel with reduction call # Take a beginning timestamp ts = time.time() # Generate indexes to pass to futures indexes = range(0, len(leftSignal), PARALLEL_SIZE, ) # Execute the RSS computation parallely presult = futures.mapReduce(RSS, operator.add, indexes, ) ptime = time.time() - ts print("mapReduce result obtained in {0:03f}s".format(ptime)) # Serial ts = time.time() sresult = sum(abs(a - b)**2 for a, b in zip(leftSignal, rightSignal)) stime = time.time() - ts print("Serial result obtained in {0:03f}s".format(stime)) assert presult == sresult
import time from scoop import futures def manipulateData(inData, chose=None): # Simulate a 10ms workload on every tasks time.sleep(0.01) return sum(inData) if __name__ == '__main__': scoopTime = time.time() res = futures.mapReduce( manipulateData, operator.add, list([a] * a for a in range(1000)), ) scoopTime = time.time() - scoopTime print("Executed parallely in: {0:.3f}s with result: {1}".format( scoopTime, res ) ) serialTime = time.time() res = sum( map( manipulateData, list([a] * a for a in range(1000)) )
# GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with SCOOP. If not, see <http://www.gnu.org/licenses/>. # """ Sums the mutliples of 3 and 5 below 1000000 """ from time import time from scoop import futures from operator import add def multiples(n): return set(range(0, 1000000, n)) if __name__ == '__main__': bt = time() serial_result = sum(set.union(*map(multiples, [3, 5]))) serial_time = time() - bt bt = time() parallel_result = sum(futures.mapReduce(multiples, set.union, [3, 5])) parallel_reduce_time = time() - bt assert serial_result == parallel_result print("Serial time: {0:.4f} s\nParallel time: {1:.4f} s" "".format(serial_time, parallel_reduce_time))
data = zip(leftSignal[index:index + PARALLEL_SIZE], rightSignal[index:index + PARALLEL_SIZE]) return sum(abs(y - x)**2 for y, x in data) if __name__ == "__main__": # Parallel with reduction call # Take a beginning timestamp ts = time.time() # Generate indexes to pass to futures indexes = range( 0, len(leftSignal), PARALLEL_SIZE, ) # Execute the RSS computation parallely presult = futures.mapReduce( RSS, operator.add, indexes, ) ptime = time.time() - ts print("mapReduce result obtained in {0:03f}s".format(ptime)) # Serial ts = time.time() sresult = sum(abs(a - b)**2 for a, b in zip(leftSignal, rightSignal)) stime = time.time() - ts print("Serial result obtained in {0:03f}s".format(stime)) assert presult == sresult
from scoop import futures def product(x, y): """Return the product of the arguments""" return x + y def sum(x, y): """Return the sum of the arguments""" return x + y if __name__ == "__main__": a = range(1, 101) b = range(101, 201) total = futures.mapReduce(product, sum, a, b) print("Sum of the products equals %d" % total)
spike_time_dict = ep.loadSpikeTimeDict(mouse_name, cell_ids, cell_info, mat_dir) pairs = np.array(list(combinations(cell_ids, 2))) chunked_pairs = np.array_split(pairs, args.num_chunks) bin_widths = ep.selected_bin_widths if args.bin_width_type == 'selected' else ep.bin_widths for bin_width in bin_widths: print(dt.datetime.now().isoformat() + ' INFO: ' + 'Processing bin width ' + str(bin_width) + '...') spike_count_dict = ep.getSpikeCountDict(spike_time_dict, bin_width, spon_start_time) if args.save_firing_rate_frame: firing_rate_frame = ep.getFiringRateFrameFromSpikeCountDict(spike_count_dict, bin_width) save_file = os.path.join(npy_dir, 'firing_rate_frames', mouse_name + '_' + str(bin_width).replace('.', 'p') + '_' + 'firing.npy') firing_rate_frame.to_pickle(save_file) print(dt.datetime.now().isoformat() + ' INFO: ' + save_file + ' saved.') if args.save_analysis_frame: save_file = os.path.join(csv_dir, 'analysis_frames', mouse_name + '_' + str(bin_width).replace('.', 'p') + '_' + 'analysis.csv') removed = os.remove(save_file) if os.path.exists(save_file) else None for i,pair_chunk in enumerate(chunked_pairs): print(dt.datetime.now().isoformat() + ' INFO: ' + 'Processing chunk number ' + str(i) + '...') analysis_frame = pd.DataFrame.from_dict(futures.mapReduce(getAnalysisDictForPair, reduceAnalysisDicts, constructMapFuncArgs(pair_chunk, spike_count_dict))) analysis_frame['bin_width'] = bin_width saveAnalysisFrame(analysis_frame, i, save_file) print(dt.datetime.now().isoformat() + ' INFO: ' + save_file + ' saved.') if args.save_conditional_correlations: print(dt.datetime.now().isoformat() + ' INFO: ' + 'Processing conditional correlations...') cond_analysis_frame, linear_model_frame, exp_cond_cov, cov_of_cond_expectations = getConditionalAnalysisFrame(mouse_face, spike_count_dict, ep.getBinsForSpikeCounts(spike_time_dict, bin_width, spon_start_time)) saveCondFramesMatrices(cond_analysis_frame, linear_model_frame, exp_cond_cov, cov_of_cond_expectations, mouse_name, bin_width) print(dt.datetime.now().isoformat() + ' INFO: Conditional analysis frames and matrices saved.') if args.save_spike_count_frame: save_file, spike_count_frame = saveSpikeCountFrame(cell_ids, bin_width, spike_time_dict, spon_start_time, mouse_name) print(dt.datetime.now().isoformat() + ' INFO: ' + save_file + ' saved.') print(dt.datetime.now().isoformat() + ' INFO: ' + 'Done.')