Пример #1
0
def big_main(args):
    # Setup
    timer = timedict()
    scale = args.scale
    NP = args.procs
    par_name = 'Tree'+str(NP)
    flat_name = 'Flat'+str(NP)
    timer.tic('spawning pool')
    pool = multiprocessing.Pool(processes=NP)
    timer.toc('spawning pool')
    #SEQ = random.random(2**scale)
    SEQ = random.random_integers(0,10,2**scale)
    print(SEQ)


    # Testing

    timer.tic(0)
    count = serial_main(SEQ)
    timer.toc(0)
    print('serial')

    if args.tree:
        timer.tic(par_name)
        tree_count = tree_main(pool, SEQ, NP)[0]
        timer.toc(par_name)
        print('tree')

    timer.tic(flat_name)
    np_count = flat_main(pool, SEQ, NP)
    timer.toc(flat_name)
    print('flat')

    # Reporting
    print('sum:%s,%s' % (count, np_count))

    eps = .00001
    assert count-np_count<eps, "we got the wrong answer %f != %f" %(count, np_count)
    if args.tree:
        print('sum:%s,%s' % (count, tree_count))
        assert count == tree_count
    
    print(repr(timer.ends))
    if args.tree:
        print('Tree speedup: %f' %
                (timer.ends[0]/timer.ends[par_name]))
    print('Flat speedup: %f' %
	    (timer.ends[0]/timer.ends[flat_name]))
    pool.close()
    pool.join()
    return (scale, NP, timer[0], timer[flat_name])
Пример #2
0
def big_main(args):
    """Does the setup and teardown to test both methods
    
    """
    # Setup
    timer = timedict()
    NP = args.procs
    scale = args.scale
    if args.verbose:
        if args.tree:
            print('no tree method for inner_product because they are too slow')
    flat_name = 'Flat'+str(NP)
    timer.tic('spawning pool')
    pool = multiprocessing.Pool(processes=NP)
    timer.toc('spawning pool')
    SEQ = np.arange(0,2**scale)
    mask = random.random_integers(0,1,2**scale)
    if args.verbose:
        vprint("SEQ",  SEQ)
        vprint("mask", mask)
 

    # Testing

    timer.tic(0)
    arr = serial_main(SEQ, mask)
    timer.toc(0)
    if args.verbose:
        print('serial done')

    timer.tic(flat_name)
    par_arr = flat_main(pool,SEQ, mask, NP)
    timer.toc(flat_name)
    #print('flat')

    # Reporting

    assert (SEQ*mask).sum() == arr, "we did not get the right answer"
    #print('Same result: %s' % (par_arr == arr))
    #print(repr(timer.ends))
    flat_speedup = timer.ends[0]/timer.ends[flat_name]
    #print('Flat speedup: %f' %
    #	    (flat_speedup))
    pool.close()
    pool.join()
    ans = (scale, NP, timer[0], timer[flat_name])
    return ans
Пример #3
0
def big_main(args):
    """Does the setup and teardown to test both methods
    
    """
    # Setup
    timer = timedict()
    scale = args.scale
    NP = args.procs
    if args.verbose:
        if args.tree:
            print('no tree method for scans because they are too slow')
    flat_name = 'Flat'+str(NP)
    timer.tic('spawning pool')
    pool = multiprocessing.Pool(processes=NP)
    timer.toc('spawning pool')
    SEQ = np.arange(0,2**scale)
    mask = random.random_integers(0,1,2**scale)
    if args.verbose:
        print(SEQ)


    # Testing

    timer.tic(0)
    arr = serial_main(SEQ, mask)
    timer.toc(0)
    print('serial')

    timer.tic(flat_name)
    seqs = flat_main(pool,SEQ, mask, NP)
    par_arr = parutils.serial_concat(seqs)
    timer.toc(flat_name)
    print('flat')

    # Reporting

    #print('sum:%s,%s,%s' % (count, tree_count, np_count))
    print('Same result: %s' % (par_arr == arr))
    print(repr(timer.ends))
    flat_speedup = timer.ends[0]/timer.ends[flat_name]
    print('Flat speedup: %f' %
	    (flat_speedup))
    pool.close()
    pool.join()
    return (scale, NP, timer[0], timer[flat_name])
Пример #4
0
def big_main(args):
    timer = timedict()
    scale = args.scale
    NP = args.procs
    if args.verbose:
        if args.tree:
            print('no tree method for scans because they are too slow')
    flat_name = 'Flat'+str(NP)
    timer.tic('spawning pool')
    pool = multiprocessing.Pool(processes=NP)
    timer.toc('spawning pool')
    SEQ = random.random_integers(0,1,2**scale)
    if args.verbose:
        print(SEQ)


    # Testing

    timer.tic(0)
    sans = serial_main(SEQ)
    timer.toc(0)
    print('serial')

    timer.tic(flat_name)
    pans = flat_main(pool, SEQ, NP)
    timer.toc(flat_name)
    pans = unpartition(pans)
    print('flat')

    # Reporting

    #print('sum:%s,%s,%s' % (count, tree_count, np_count))
    assert all([sa==pa for sa, pa in zip(sans, pans)]), 'we got the wrong answer'
    print('Same result: %s' % (sans == pans))
    print(repr(timer.ends))
    print('Flat speedup: %f' %
	    (timer.ends[0]/timer.ends[flat_name]))
    pool.close()
    pool.join()
    return (scale,NP, timer[0], timer[flat_name])
Пример #5
0
    TIMEINDEX = pd.Index(np.arange(INIT_SAMPLE, END_SAMPLE, STRIDE))
    NTARGETS = 8  #number of series for the plots
    BINCOUNT = 50  #how many bins for histograms
    TARGETSV = []
    '''TARGETSV = [688773,
                756680,
                984640,
                1067645,
                3030528,
                3035516,
                ]'''
    #[3784858, 2357671, 2975930, 359724, 2124973, 3732925,]
    #vertices that I picked by hand
    KERNEL_NAME = "betweenness_centrality"
    KERNEL_NAMES = ['bc', 'communities', 'components', 'triangles']
    timer = td.timedict()

    #run_lognormal_analysis(DATA_DIR, NSAMPLES, KERNEL_NAME)

    timer.tic('loading data')
    # figure out where data should be found
    df, names = get_me_data()
    print('data loaded')
    timer.toc('loading data')
    t = 98
    lf = np.log1p(df)
    if args.summary:
        timer.tic('summary')
        print("summarizing vertices")
        whitf = ka.summarize_vertices(lf, pos_filter=True, whiten=True)
        whitf = whitf.join(names)
Пример #6
0
    #answers = np.concatenate(answers, axis=0)
    answers = packed_reduction(pool, answers, num_procs)
    return answers

def serial_main():
    """Does the main without any parallel overhead
    :returns: the sum of the matvec

    """
    #answers = np.sqrt((SEQ * SEQ))
    answers = MAT.dot(SEQ)
    count = np.sum(answers)
    return count

if __name__ == '__main__':
    timer = timedict()
    scale = 14
    SEQ = np.random.random(2**scale)
    MAT = np.random.rand(2**scale, 2**scale)
    #print(MAT)
    print(SEQ)
    MAX_PROCS = 4
    NP = 4
    pool = multiprocessing.Pool(processes=NP)
    par_name = 'MP'+str(MAX_PROCS)
    npname = 'NumPy'+str(MAX_PROCS)
    timer.tic(0)
    count = serial_main()
    timer.toc(0)
    print('serial')
    TIMEINDEX = pd.Index(np.arange(INIT_SAMPLE,END_SAMPLE,STRIDE))
    NTARGETS = 8 #number of series for the plots
    BINCOUNT = 50 #how many bins for histograms
    TARGETSV = []
    '''TARGETSV = [688773,
                756680,
                984640,
                1067645,
                3030528,
                3035516,
                ]'''
    #[3784858, 2357671, 2975930, 359724, 2124973, 3732925,]
    #vertices that I picked by hand
    KERNEL_NAME = "betweenness_centrality"
    KERNEL_NAMES = ['bc', 'communities', 'components', 'triangles']
    timer = td.timedict()

    #run_lognormal_analysis(DATA_DIR, NSAMPLES, KERNEL_NAME)

    timer.tic('loading data')
    # figure out where data should be found
    df, names = get_me_data()
    print('data loaded')
    timer.toc('loading data')
    t = 98
    lf = np.log1p(df)
    if args.summary:
        timer.tic('summary')
        print("summarizing vertices")
        whitf = ka.summarize_vertices(lf, pos_filter=True, whiten=True)
        whitf = whitf.join(names)