示例#1
0
    # Processes unpack the iterable passed to args as arguments. Usually tuples are used
    p = mp.Process(target=multiple_args, args=(1, 2, 3))
    p.start()
    p.join()

    # But other iterable objects like string or list are allowed, as long as the length matches the number of args
    p = mp.Process(target=multiple_args, args='ABC')
    p.start()
    p.join()

    # To pass a single argument you must encapsulate it in a 1-element tuple
    p = mp.Process(target=single_arg, args=('hello',))
    p.start()
    p.join()

    pool = mp.Pool()

    # Pool.map function MUST have only one argument
    pool.map(single_arg, [1, 2, 3])
    pool.map(single_arg, (1, 2, 3))

    # You can pass multiple arguments to a map function
    # However, keep in mind that map function does not implicitly unpack arguments
    pool.map(single_arg, [(1, 'a', 'one'), (2, 'b', 'two'), (3, 'c', 'three')])

    # You can't use a function that receives more than one required positional argument for Pool.map
    try:
        pool.map(multiple_args, [(1, 'a', 'one'), (2, 'b', 'two'), (3, 'c', 'three')])
    except Exception as e:
        print(e)
示例#2
0
def main(datapoints_per_file,
         dimensions,
         clusters,
         parallelism,
         number_of_iterations,
         threshold,
         local,
         localhost=False):

    manager = mplths.Manager()  # if local == True else mplths.Manager()
    global lock
    lock = manager.Lock()
    global barrier
    barrier = manager.Barrier(parallelism)
    global global_centroids
    global_centroids = manager.dict()
    global global_delta
    global_delta = manager.dict({
        "delta": 1,
        "delta_c": 0,
        "delta_temp": 0,
        "delta_st": 0
    })
    global worker_stats
    worker_stats = manager.list()  # in seconds

    if True == False:
        # TEST Crentroids
        centroids = GlobalCentroids(2, 2)
        centroids.random_init(4)
        print(centroids.get_centroids())
        centroids.update(np.array([[1.2, 1, 1, 1], [2, 2, 2, 2]]), [2, 2])
        centroids.update(np.array([[2, 2, 2, 2.2], [1, 1, 1, 1]]), [2, 2])
        print(centroids.get_centroids())
        # TEST Delta
        delta = GlobalDelta(2)
        delta.init()
        print(delta.get_delta())
        delta.update(1, 2)
        delta.update(0, 2)
        print(delta.get_delta())
        return

    # Initialize global objects
    centroids = GlobalCentroids(clusters, parallelism)
    centroids.random_init(dimensions)
    delta = GlobalDelta(parallelism)
    delta.init()

    start_time = time.time()
    iterator = list(
        itertools.product(range(parallelism), [datapoints_per_file],
                          [dimensions], [clusters], [parallelism],
                          [number_of_iterations], [threshold]))

    if local == True:
        pool = mp.Pool(processes=parallelism)
        pool.imap(run, iterator)
        pool.close()
        pool.join()
    else:
        if localhost == True:
            pool = mplths.Pool(processes=parallelism, initargs=('localhost'))
            pool.imap(run, iterator)
            pool.close()
            pool.join()

        else:
            pool = mplths.Pool(processes=parallelism, initargs=('serverless'))
            pool.imap(run, iterator)
            pool.close()
            pool.join()
    # Parse results
    times = []
    for b in worker_stats:
        # Iterations time is second breakdown and last
        times.append(b[-1] - b[2])

    avg_time = sum(times) / len(times)
    print(f"Total k-means time: {time.time() - start_time} s")
    print(f"Average iterations time: {avg_time} s")
    print(global_centroids.items())
    #import matplotlib.pyplot as plt
    #for k,v in global_centroids.items():
    #    plt.scatter(v['centroids'][0], v['centroids'][1], alpha = 0.6, s=10)
    #plt.show()

    where = ''
    if local == True:
        where = 'local'
    elif localhost == True:
        where = 'localhost'
    else:
        where = 'serverless'

    with open(
            'time_break_globals_' + where + '_' + str(datapoints_per_file) +
            '_' + str(dimensions) + '_' + str(parallelism) + '_' +
            str(clusters) + '_' + str(number_of_iterations) + '.txt',
            'a+') as f:
        f.write(f"{time.time() - start_time}\n")
        f.write(f"{avg_time}\n")
        for item in worker_stats:
            f.write(f"{item}\n")
示例#3
0
def main(datapoints_per_file,
         dimensions,
         clusters,
         parallelism,
         number_of_iterations,
         threshold,
         local,
         localhost=False):

    lock = mplths.Lock()
    barrier = mplths.Barrier(parallelism)

    global_centroids = mplths.Array('d', clusters * dimensions, lock=lock)
    global_counters = mplths.Array('i', clusters, lock=lock)
    global_centroids_temp = mplths.Array('d', clusters * dimensions, lock=lock)
    global_sizes_temp = mplths.Array('i', clusters, lock=lock)
    global_delta = mplths.Array('i', 4, lock=lock)
    worker_stats = mplths.Array('d', parallelism, lock=lock)  # in seconds

    if True == False:
        # TEST Crentroids
        centroids = GlobalCentroids(2, 2)
        centroids.random_init(4, global_centroids, global_counters, lock)
        print(centroids.get_centroids())
        centroids.update(np.array([[1.2, 1, 1, 1], [2, 2, 2, 2]]), [2, 2])
        centroids.update(np.array([[2, 2, 2, 2.2], [1, 1, 1, 1]]), [2, 2])
        print(centroids.get_centroids())
        # TEST Delta
        delta = GlobalDelta(global_delta, 2)
        delta.init()
        print(delta.get_delta())
        delta.update(1, 2)
        delta.update(0, 2)
        print(delta.get_delta())
        return

    # Initialize global objects
    centroids = GlobalCentroids(clusters, parallelism)
    centroids.random_init(dimensions, global_centroids, global_counters, lock)
    delta = GlobalDelta(parallelism)
    delta.init(global_delta)

    start_time = time.time()
    iterator = list(
        itertools.product(range(parallelism), [global_centroids],
                          [global_counters], [global_centroids_temp],
                          [global_sizes_temp], [global_delta], [worker_stats],
                          [lock], [barrier], [datapoints_per_file],
                          [dimensions], [clusters], [parallelism],
                          [number_of_iterations], [threshold]))

    if local == True:
        pool = mp.Pool(processes=parallelism)
        pool.imap(run, iterator)
        pool.close()
        pool.join()
    else:
        if localhost == True:
            pool = mplths.Pool(processes=parallelism, initargs=('localhost'))
            pool.imap(run, iterator)
            pool.close()
            pool.join()

        else:
            pool = mplths.Pool(processes=parallelism, initargs=('serverless'))
            pool.imap(run, iterator)
            pool.close()
            pool.join()
    # Parse results
    times = []
    for b in worker_stats[:]:
        # Iterations time is second breakdown and last
        times.append(b[-1] - b[2])

    avg_time = sum(times) / len(times)
    print(f"Total k-means time: {time.time() - start_time} s")
    print(f"Average iterations time: {avg_time} s")
    print(global_centroids[:])
    print(global_counters[:])
    print(global_centroids_temp[:])
    print(global_sizes_temp[:])
    where = ''
    if local == True:
        where = 'local'
    elif localhost == True:
        where = 'localhost'
    else:
        where = 'serverless'

    with open(
            'time_break_ctypes_' + where + '_' + str(datapoints_per_file) +
            '_' + str(dimensions) + '_' + str(parallelism) + '_' +
            str(clusters) + '_' + str(number_of_iterations) + '.txt',
            'w+') as f:
        f.write(f"{time.time() - start_time}\n")
        f.write(f"{avg_time}\n")
        for item in worker_stats[:]:
            f.write(f"{item}\n")
示例#4
0
import lithops.multiprocessing as mp
from lithops.multiprocessing import config as mp_config


def my_map_function(x):
    return x + 7


if __name__ == "__main__":
    iterdata = [1, 2, 3, 4]

    mp_config.set_parameter(mp_config.EXPORT_EXECUTION_DETAILS, '.')

    with mp.Pool() as pool:
        results = pool.map(my_map_function, iterdata)

    print(results)
示例#5
0
# import multiprocessing as mp
from lithops import multiprocessing as mp


def work(num):
    global param1, param2
    return param1, param2


def initializer_function(arg1, arg2):
    global param1, param2
    param1 = arg1
    param2 = arg2


if __name__ == '__main__':
    with mp.Pool(initializer=initializer_function,
                 initargs=('important global arg', 123456)) as p:
        res = p.map(work, [0] * 3)
        print(res)