# Processes unpack the iterable passed to args as arguments. Usually tuples are used p = mp.Process(target=multiple_args, args=(1, 2, 3)) p.start() p.join() # But other iterable objects like string or list are allowed, as long as the length matches the number of args p = mp.Process(target=multiple_args, args='ABC') p.start() p.join() # To pass a single argument you must encapsulate it in a 1-element tuple p = mp.Process(target=single_arg, args=('hello',)) p.start() p.join() pool = mp.Pool() # Pool.map function MUST have only one argument pool.map(single_arg, [1, 2, 3]) pool.map(single_arg, (1, 2, 3)) # You can pass multiple arguments to a map function # However, keep in mind that map function does not implicitly unpack arguments pool.map(single_arg, [(1, 'a', 'one'), (2, 'b', 'two'), (3, 'c', 'three')]) # You can't use a function that receives more than one required positional argument for Pool.map try: pool.map(multiple_args, [(1, 'a', 'one'), (2, 'b', 'two'), (3, 'c', 'three')]) except Exception as e: print(e)
def main(datapoints_per_file, dimensions, clusters, parallelism, number_of_iterations, threshold, local, localhost=False): manager = mplths.Manager() # if local == True else mplths.Manager() global lock lock = manager.Lock() global barrier barrier = manager.Barrier(parallelism) global global_centroids global_centroids = manager.dict() global global_delta global_delta = manager.dict({ "delta": 1, "delta_c": 0, "delta_temp": 0, "delta_st": 0 }) global worker_stats worker_stats = manager.list() # in seconds if True == False: # TEST Crentroids centroids = GlobalCentroids(2, 2) centroids.random_init(4) print(centroids.get_centroids()) centroids.update(np.array([[1.2, 1, 1, 1], [2, 2, 2, 2]]), [2, 2]) centroids.update(np.array([[2, 2, 2, 2.2], [1, 1, 1, 1]]), [2, 2]) print(centroids.get_centroids()) # TEST Delta delta = GlobalDelta(2) delta.init() print(delta.get_delta()) delta.update(1, 2) delta.update(0, 2) print(delta.get_delta()) return # Initialize global objects centroids = GlobalCentroids(clusters, parallelism) centroids.random_init(dimensions) delta = GlobalDelta(parallelism) delta.init() start_time = time.time() iterator = list( itertools.product(range(parallelism), [datapoints_per_file], [dimensions], [clusters], [parallelism], [number_of_iterations], [threshold])) if local == True: pool = mp.Pool(processes=parallelism) pool.imap(run, iterator) pool.close() pool.join() else: if localhost == True: pool = mplths.Pool(processes=parallelism, initargs=('localhost')) pool.imap(run, iterator) pool.close() pool.join() else: pool = mplths.Pool(processes=parallelism, initargs=('serverless')) pool.imap(run, iterator) pool.close() pool.join() # Parse results times = [] for b in worker_stats: # Iterations time is second breakdown and last times.append(b[-1] - b[2]) avg_time = sum(times) / len(times) print(f"Total k-means time: {time.time() - start_time} s") print(f"Average iterations time: {avg_time} s") print(global_centroids.items()) #import matplotlib.pyplot as plt #for k,v in global_centroids.items(): # plt.scatter(v['centroids'][0], v['centroids'][1], alpha = 0.6, s=10) #plt.show() where = '' if local == True: where = 'local' elif localhost == True: where = 'localhost' else: where = 'serverless' with open( 'time_break_globals_' + where + '_' + str(datapoints_per_file) + '_' + str(dimensions) + '_' + str(parallelism) + '_' + str(clusters) + '_' + str(number_of_iterations) + '.txt', 'a+') as f: f.write(f"{time.time() - start_time}\n") f.write(f"{avg_time}\n") for item in worker_stats: f.write(f"{item}\n")
def main(datapoints_per_file, dimensions, clusters, parallelism, number_of_iterations, threshold, local, localhost=False): lock = mplths.Lock() barrier = mplths.Barrier(parallelism) global_centroids = mplths.Array('d', clusters * dimensions, lock=lock) global_counters = mplths.Array('i', clusters, lock=lock) global_centroids_temp = mplths.Array('d', clusters * dimensions, lock=lock) global_sizes_temp = mplths.Array('i', clusters, lock=lock) global_delta = mplths.Array('i', 4, lock=lock) worker_stats = mplths.Array('d', parallelism, lock=lock) # in seconds if True == False: # TEST Crentroids centroids = GlobalCentroids(2, 2) centroids.random_init(4, global_centroids, global_counters, lock) print(centroids.get_centroids()) centroids.update(np.array([[1.2, 1, 1, 1], [2, 2, 2, 2]]), [2, 2]) centroids.update(np.array([[2, 2, 2, 2.2], [1, 1, 1, 1]]), [2, 2]) print(centroids.get_centroids()) # TEST Delta delta = GlobalDelta(global_delta, 2) delta.init() print(delta.get_delta()) delta.update(1, 2) delta.update(0, 2) print(delta.get_delta()) return # Initialize global objects centroids = GlobalCentroids(clusters, parallelism) centroids.random_init(dimensions, global_centroids, global_counters, lock) delta = GlobalDelta(parallelism) delta.init(global_delta) start_time = time.time() iterator = list( itertools.product(range(parallelism), [global_centroids], [global_counters], [global_centroids_temp], [global_sizes_temp], [global_delta], [worker_stats], [lock], [barrier], [datapoints_per_file], [dimensions], [clusters], [parallelism], [number_of_iterations], [threshold])) if local == True: pool = mp.Pool(processes=parallelism) pool.imap(run, iterator) pool.close() pool.join() else: if localhost == True: pool = mplths.Pool(processes=parallelism, initargs=('localhost')) pool.imap(run, iterator) pool.close() pool.join() else: pool = mplths.Pool(processes=parallelism, initargs=('serverless')) pool.imap(run, iterator) pool.close() pool.join() # Parse results times = [] for b in worker_stats[:]: # Iterations time is second breakdown and last times.append(b[-1] - b[2]) avg_time = sum(times) / len(times) print(f"Total k-means time: {time.time() - start_time} s") print(f"Average iterations time: {avg_time} s") print(global_centroids[:]) print(global_counters[:]) print(global_centroids_temp[:]) print(global_sizes_temp[:]) where = '' if local == True: where = 'local' elif localhost == True: where = 'localhost' else: where = 'serverless' with open( 'time_break_ctypes_' + where + '_' + str(datapoints_per_file) + '_' + str(dimensions) + '_' + str(parallelism) + '_' + str(clusters) + '_' + str(number_of_iterations) + '.txt', 'w+') as f: f.write(f"{time.time() - start_time}\n") f.write(f"{avg_time}\n") for item in worker_stats[:]: f.write(f"{item}\n")
import lithops.multiprocessing as mp from lithops.multiprocessing import config as mp_config def my_map_function(x): return x + 7 if __name__ == "__main__": iterdata = [1, 2, 3, 4] mp_config.set_parameter(mp_config.EXPORT_EXECUTION_DETAILS, '.') with mp.Pool() as pool: results = pool.map(my_map_function, iterdata) print(results)
# import multiprocessing as mp from lithops import multiprocessing as mp def work(num): global param1, param2 return param1, param2 def initializer_function(arg1, arg2): global param1, param2 param1 = arg1 param2 = arg2 if __name__ == '__main__': with mp.Pool(initializer=initializer_function, initargs=('important global arg', 123456)) as p: res = p.map(work, [0] * 3) print(res)