Пример #1
0
def main():
    targets = []
    proc = int(sys.argv[1])
    start_time = time.time()
    result = {'joomla': [], 'wordpress': []}
    p = Pool(proc)
    with open('file.txt', 'r') as f:
        lines = f.readlines()
        for line in lines:
            url = str(line.replace('\n', ''))
            targets.append(url)
    all = p.map(search_for, targets)
    for p in all:
        if p.get("joomla") != []:
            result["joomla"].append(p.get("joomla"))
        elif p.get("wordpress") != []:
            result["wordpress"].append(p.get("wordpress"))

    print "#################################################"
    print(" Number of charged urls : " + str(len(lines)))
    print "#################################################"
    print(" Number of joomla urls : " + str(len(result["joomla"])))
    print(" Number of wordpress urls : " + str(len(result["wordpress"])))
    print(" Finished in : " + str(int(time.time() - start_time)) + "s")
    print "#################################################"
    print "Joomla links"
    print "#################################################"
    for item in result["joomla"]:
        printf(item)
    print "#################################################"
    print "Wordpress links"
    print "#################################################"
    for item in result["wordpress"]:
        printf(item)
Пример #2
0
def get_array_chans_multi_proc(lambs, K, sep, params_list, n_processes, norm):
    '''
    helper function to speed up generation of channels based on parameters.
    multiple processors are used and their results are combined.
    '''
    p = Pool(processes=n_processes)
    num_chans = len(params_list)
    num_chans_per_proc = int(num_chans / n_processes)
    results = []
    for i in range(n_processes):
        start = i * num_chans_per_proc
        end = min(start + num_chans_per_proc, num_chans)
        sub_params_list = params_list[start:end]
        results.append(
            p.apply_async(get_array_chans,
                          args=(lambs, K, sep, sub_params_list, norm)))
    p.close()
    p.join()

    output = [p.get() for p in results]
    X = None
    for i in range(0, len(output)):
        x = output[i]
        if i == 0:
            X = x
        else:
            X = np.vstack([X, x])
    del results, output
    return X
Пример #3
0
def main():
    ''' Main program starts here '''
    global opub, odown, orep, info
    # somefile is false starting turns to true if at elast one file found
    somefile = False
    # read inputs and assign constraints
    assign_constraint()
    fdown = outfile + '_to_download.csv'
    frep = outfile + '_replica.csv'
    fpub = outfile + '_not_published.csv'
    # test reading inputs
    print var0
    print exp0
    print mod0
    print fdown
    print frep
    print fpub
    # if one of the output files exists issue a warning an exit
    if opath.isfile(fdown) or opath.isfile(frep) or opath.isfile(fpub):
        print "Warning: one of the output files exists, exit to not overwrite!"
        sys.exit()
    info = {}
    # loop through experiments, 1st create a wget request for exp, then parse_file
    for exp in exp0:
        wgetfile = "wget_" + exp + ".out"
        result = parse_file(wgetfile, var0, mod0, exp)
        # if found any files matching constraints, process them one by one
        # using multiprocessing Pool to parallelise process_file
        if result:
            async_results = Pool(1).map_async(process_file, result)
            for dinfo in async_results.get():
                info.update(dinfo)
            somefile = True
        print "Finished checksum for existing files"
# if it couldn't find any file for any experiment then exit
    if not somefile:
        sys.exit("No files found for any of the experiments, exiting!")
# open not published file
    opub = open(fpub, "w")
    opub.write("var_mip-table, model, experiment\n")
    # build all requested combinations and compare to files found
    nopub_set = compare_query(var0, mod0, exp0)
    # write replica and download output files
    # open output files and write header
    odown = open(fdown, "w")
    odown.write(
        "var, mip_table, model, experiment, ensemble, version, file url\n")
    orep = open(frep, "w")
    orep.write(
        "var, mip_table, model, experiment, ensemble, version, filepath\n")
    write_file()
    # close all the output files
    odown.close()
    orep.close()
    opub.close()
    print "Finished to write output files"
    # if table option create/open spreadsheet
    # if table option write summary table in csv file
    if table:
        write_table(nopub_set)
Пример #4
0
def shortest_path_4_one_node(start_node, g, out_folder):
    node_idx = g.vs.select(name_eq=int(start_node))
    if node_idx.__len__() == 0:
        print('error: start_node nis not in graph!')
        return
    idx = [v.index for v in node_idx][0]
    sh_path = g.get_all_shortest_paths(v=idx, weights=g.es['weight'], mode=OUT)
    print(sh_path[0:10])
    """
    then calculate the weights sum of each path
    and sort them write to file for each node
    """

    # sum_path_list = worker(sh_path,  start_node,  g)

    size = len(sh_path)

    P_NUM = 10
    p = Pool(P_NUM)
    rest_list = []
    if size % P_NUM != 0:
        tail = size % P_NUM
        tail_list = sh_path[-(tail + 1):-1]
        rest_list = worker(
            tail_list,
            start_node,
            g,
        )
        print('tail end')

    main_list = [
        p.apply_async(worker,
                      args=(
                          sh_path[size // P_NUM * i:size // P_NUM * (i + 1)],
                          start_node,
                          g,
                      )) for i in range(P_NUM)
    ]
    p.close()
    p.join()

    output = [p.get() for p in main_list]
    flat_list = [item for sublist in output for item in sublist]
    sum_path_list = flat_list + rest_list
    sum_path_list = sorted(sum_path_list, key=lambda x: x[2])
    print(sum_path_list[0:10])
    """
    write start node to all the nodes' shortest path sum to one csv file
    """
    df_out = pd.DataFrame(sum_path_list,
                          columns=[
                              'start_index', 'destination_index',
                              'shortest_path_distance', 'shortest_path_seq',
                              'weight_list'
                          ])
    df_out.to_csv(out_folder + 'path_dist_index_' + str(start_node) + '.csv',
                  sep=',',
                  index=False)
Пример #5
0
def main():
    ''' Main program starts here '''
    global opub, odown, orep, info
# somefile is false starting turns to true if at elast one file found
    somefile=False
# read inputs and assign constraints
    assign_constraint()
    fdown = outfile + '_to_download.csv'
    frep = outfile + '_replica.csv'
    fpub = outfile + '_not_published.csv'
# test reading inputs
    print var0
    print exp0
    print mod0
    print fdown
    print frep
    print fpub
# if one of the output files exists issue a warning an exit
    if opath.isfile(fdown) or opath.isfile(frep) or opath.isfile(fpub):
       print "Warning: one of the output files exists, exit to not overwrite!"
       sys.exit() 
    info={}
# loop through experiments, 1st create a wget request for exp, then parse_file 
    for exp in exp0:
        wgetfile = "wget_" + exp + ".out"
        result=parse_file(wgetfile,var0,mod0,exp)
# if found any files matching constraints, process them one by one
# using multiprocessing Pool to parallelise process_file 
        if result:
           async_results = Pool(1).map_async(process_file, result)
           for dinfo in async_results.get():
               info.update(dinfo)
           somefile=True
        print "Finished checksum for existing files" 
# if it couldn't find any file for any experiment then exit
    if not somefile: 
     sys.exit("No files found for any of the experiments, exiting!") 
# open not published file
    opub=open(fpub, "w")
    opub.write("var_mip-table, model, experiment\n")
# build all requested combinations and compare to files found
    nopub_set = compare_query(var0,mod0,exp0)
# write replica and download output files
# open output files and write header
    odown=open(fdown, "w")
    odown.write("var, mip_table, model, experiment, ensemble, version, file url\n")
    orep=open(frep, "w")
    orep.write("var, mip_table, model, experiment, ensemble, version, filepath\n")
    write_file()
# close all the output files
    odown.close()
    orep.close()
    opub.close()
    print "Finished to write output files" 
# if table option create/open spreadsheet
# if table option write summary table in csv file
    if table: 
       write_table(nopub_set)
Пример #6
0
def test1():
    t= time.time()
    p = Pool(4)
    kk = 10
    results = []
    for x in range(1,7):
        results.append(p.apply_async(f, args=(x,kk)))
    output = [p.get() for p in results]
    print output
    print time.time() - t
Пример #7
0
def test1():
    t = time.time()
    p = Pool(4)
    kk = 10
    results = []
    for x in range(1, 7):
        results.append(p.apply_async(f, args=(x, kk)))
    output = [p.get() for p in results]
    print output
    print time.time() - t
Пример #8
0
def main():
    process_list = []

    p = Pool(3)
    for r in r_list:
        result = p.apply_async(circle_area, args=(r, ))
        process_list.append(result)

    for p in process_list:
        print(p.get())
Пример #9
0
 def fit(self, data_indices=None):
     '''Uses .fit() method on each model
     operates on models in parallel'''
     p=Pool(self.processes)
     p.map_async(lambda x,kwargs: x.fit(self.df[self.vars_of_interest],
                                        df[[self.y]],**kwargs),
                                         zip(self.models,self.fit_kwarg_dicts))
     out=p.get()
     self.fitted=True
     p.close()
     return out
Пример #10
0
 def fit(self, data_indices=None):
     """Uses .fit() method on each model
     operates on models in parallel"""
     p = Pool(self.processes)
     p.map_async(
         lambda x, kwargs: x.fit(self.df[self.vars_of_interest], df[[self.y]], **kwargs),
         zip(self.models, self.fit_kwarg_dicts),
     )
     out = p.get()
     self.fitted = True
     p.close()
     return out
Пример #11
0
def get_area_multi_processes():
    process_list = []
    logging.debug('multi process....')
    p = Pool(3)
    start = time.time()
    for r in r_list:
        result = p.apply_async(circle_area, args=(r, ))
        process_list.append(result)

    for p in process_list:
        print(p.get())

    print('multi processes time:', (time.time() - start) * 1000)
Пример #12
0
def main():
    args = get_args()

    start = time()
    with open(args.output, 'w') as f:
        wids = [line.strip() for line in
                open(args.input).readlines()[:args.number]]
        mapped = Pool(processes=8).map_async(identify_worker, wids)
        mapped.wait()
        print >> f, '\n'.join([x.encode('utf-8') for x in mapped.get()])
    end = time()
    total = end - start
    print '%d seconds elapsed' % total
Пример #13
0
def filter_seqs(seqs, good_cols, ncpus):
    nseqs, lseqs = len(seqs), len(seqs[0])

    # each job should not exceed 50M character to avoid transferring too much data
    diff = _best_chunk_size(nseqs, lseqs, ncpus)

    p = Pool(ncpus)
    procs = []
    for i in xrange(0, len(seqs), diff):
        procs.append(
            p.apply_async(_sub_filter_seqs, (seqs[i:i + diff], good_cols)))

    p.close()
    p.join()
    return [s for p in procs for s in p.get()]
Пример #14
0
def updateDb():
    initDB()
    start_time = time.time()
    course_numbers = sorted(getNumberOfCoursesList())
    course_numbers_length = len(course_numbers)
    p = Pool(8)
    pool = Pool(processes=4)
    results = [
        pool.apply_async(getCourseInfo, args=(course_number, ))
        for course_number in course_numbers
    ]
    output = [p.get() for p in results]
    for course in output:
        print(course.number)
        dbAddCourse(course)
    print("--- %.2f seconds ---" % (time.time() - start_time))
Пример #15
0
def parallel_to_corpus(dat,
                       worker_num=NCPU,
                       partition_num=100,
                       index_to_token=INDEX_TO_TOKENS,
                       nlp_sep=NLP_SEP):
    sub_data_list = chunkIt(dat, num=partition_num)
    p = Pool(processes=worker_num)
    data = [
        p.apply_async(func=to_corpus, args=(x, index_to_token, nlp_sep))
        for x in sub_data_list
    ]
    p.close()
    flat_data = [p.get() for p in data]

    seq_len = [size for chunk, word_list in flat_data for size in word_list]
    flat_data = [_ for chunk, word_list in flat_data for _ in chunk]
    return flat_data, seq_len
Пример #16
0
def complexity_filtering(seqs, chars, chi2_cut, ncpus):
    nseqs, lseqs = len(seqs), len(seqs[0])

    # each job should not exceed 50M character to avoid transferring too much data
    diff = _best_chunk_size(nseqs, lseqs, ncpus)

    p = Pool(ncpus)
    procs = []
    for i in xrange(0, nseqs, diff):
        procs.append(
            p.apply_async(_sub_complexity_filtering,
                          (seqs[i:i + diff], chars)))
    p.close()
    p.join()

    counts = procs.pop(0).get()
    for p in procs:
        counts = [[counts[i][j] + v for j, v in enumerate(l)]
                  for i, l in enumerate(p.get())]

    expected = {}
    total = float(sum(sum(c) for c in counts))
    for i, c in enumerate(chars):
        expected[c] = sum(c[i] for c in counts) / total

    printime('   * proportion of each site type: ' +
             ', '.join(['%s: %.4f' % (c.upper(), expected[c]) for c in chars]))

    good_cols = []
    mean = 1. / len(expected)
    sums = []
    prop = []
    stds = []
    chi2 = []
    for i in xrange(lseqs):
        count = counts[i]
        total = float(sum(count)) or 1  # in case total is equal to 0
        std = sum((c / total - mean)**2 for c in count)
        stds.append(std**0.5)
        sums.append(total)
        av = [total * expected[c] for c in chars]
        chi2.append(sum((c - av[i])**2 / av[i] for i, c in enumerate(count)))
        if chi2[-1] > chi2_cut:
            good_cols.append(i)
        prop.append([c / total for c in count])
    return sums, prop, stds, chi2, good_cols, expected
Пример #17
0
def to_adops_xls(args, wid_to_topics):
    my_workbook = xlwt.Workbook()
    ids_worksheet = my_workbook.add_sheet("Wikis to Topics")
    ids_worksheet.write(0, 0, 'Wiki')
    ids_worksheet.write(0, 1, 'URL')
    ids_worksheet.write(0, 2, 'Topic')
    ids_worksheet.write(0, 3, 'Rank')

    ids = wid_to_topics.keys()
    r = Pool(processes=16).map_async(wiki_data_for_ids, [ids[i:i+20] for i in range(0, len(ids), 20)])
    wiki_data = {}
    map(wiki_data.update, r.get())

    row = 1
    for wid, topics in wid_to_topics.items():
        top_five = sorted(wid_to_topics[wid].keys(), key=lambda x: wid_to_topics[wid][x], reverse=True)[:5]
        for counter, topic in enumerate(top_five):
            ids_worksheet.write(row, 0, wid)
            ids_worksheet.write(row, 1, wiki_data.get(wid, {}).get('url', '?'))
            ids_worksheet.write(row, 2, int(topic)+1)
            ids_worksheet.write(row, 3, counter)
            row += 1

    urls_worksheet = my_workbook.add_sheet("Topic Data")
    urls_worksheet.write(0, 0, 'Topic')
    urls_worksheet.write(0, 1, 'Phrase')
    urls_worksheet.write(0, 2, 'Weight')
    urls_worksheet.write(0, 3, 'Rank')

    row = 1
    for topic, line in enumerate(args.features_file):
        words = line.decode('utf8').split(u' + ')
        for rank, word_data in enumerate(words):
            weight, word = word_data.split('*')
            urls_worksheet.write(row, 0, topic+1)
            urls_worksheet.write(row, 1, word)
            urls_worksheet.write(row, 2, weight)
            urls_worksheet.write(row, 3, rank+1)
            row += 1

    my_workbook.save(args.topics_file.name.replace('.csv', '-adops-report.xls'))
    print args.topics_file.name.replace('.csv', '-adops-report.xls')
Пример #18
0
def get_denser_columns(seqs, cutoff, ncpus):
    nseqs, lseqs = len(seqs), len(seqs[0])

    col_cutoff = nseqs - cutoff

    ##  search for dense columns
    p = Pool(ncpus)
    # each job should not exceed 50M character to avoid transferring too much data
    diff = _best_chunk_size(nseqs, lseqs, ncpus)
    procs = []
    for i in xrange(0, len(seqs), diff):
        procs.append(p.apply_async(_sub_get_dense_cols, (seqs[i:i + diff], )))

    p.close()
    p.join()

    good_cols = procs.pop(0).get()
    for p in procs:
        good_cols = [good_cols[i] + v for i, v in enumerate(p.get())]

    return [i for i, col in enumerate(good_cols) if col < col_cutoff]
    calibration_cmd = "python ../../tools/modifiedRouteSampler.py -r ./route_files/ai15_run%s.rou.xml --edgedata-files ./calibration/weekday_survey_2019.xml --optimize 20 --optimize-input -a 'type=\"nhbCar\"' --write-route-ids --mismatch-output ./logs/ai15_run%s_calibration_logs.txt -o ./route_files/ai15_run%s_weekday_survey_calibrated.rou.xml" % (run_number, run_number,  run_number)
    subprocess.call(calibration_cmd, shell=False)


if __name__ == '__main__':
    od_sets = ['hbnwauto15',
                'hbwauto15',
                'nhbauto15',
                'truck15',
                # 'hbnwauto25',  # do not include RISM25 demand since RISM15 is used for base calibration
                # 'hbwauto25',
                # 'nhbauto25',
                # 'truck25',
                ]
    random_seed = 6789
    number_of_runs = 10
    i = 0

    p = Pool(4)

    for od_name in od_sets:
        random_seed += i
        print(od_name)

        results = [p.apply_async(generate_trips, args=(od_name, random_seed, run)) for run in range(0, number_of_runs)]
        out = [p.get() for p in results]
        i += 1

    results = [p.apply_async(generate_routes, args=(run,)) for run in range(0, number_of_runs)]
    out = [p.get() for p in results]
Пример #20
0
Файл: main.py Проект: voidmm/PNG
if __name__ == "__main__":
    start = time.time()

    p = Pool()
    size_of_game_batch = number_of_repeats / CPU_COUNT
    all_parallel_games = [int(size_of_game_batch) for i in range(CPU_COUNT)]

    output = [
        p.apply_async(games, args=(x, word_counter_container))
        for x in all_parallel_games
    ]
    p.close()
    p.join()

    history = [p.get()[0] for p in output]
    history = np.reshape(np.ravel(history, order='A'),
                         (number_of_repeats, number_of_rounds))
    mean_number_of_different_words = np.mean(
        [p.get()[1]['different words of all games'] for p in output],
        axis=0,
        dtype=int)[0]
    mean_number_of_total_words = np.mean(
        [p.get()[1]['total number of words of all games'] for p in output],
        axis=0,
        dtype=int)[0]

    end = time.time()
    print('Total time in seconds: ' + str(end - start))

    Plot = Plot()
Пример #21
0
def main():
    # collect args
    command = ' '.join(sys.argv[0:])
    myparser = argparse.ArgumentParser()
    myparser.add_argument('-i', '--input_csv', required=True)
    myparser.add_argument('-b', '--bias', required=True, type=float)
    myparser.add_argument('-o', '--output_dir', required=True)
    myparser.add_argument('-cov',
                          '--covariate_dir',
                          required=False,
                          default=None)
    myparser.add_argument('-boot',
                          '--bootstrap_dir',
                          required=False,
                          default=None)
    myparser.add_argument('-cut',
                          '--bootstrap_cutoff',
                          required=False,
                          type=float,
                          default=0.95)
    myparser.add_argument('-do_att',
                          '--do_att',
                          required=False,
                          default=False,
                          type=bool)
    myparser.add_argument('-multi',
                          '--multi',
                          required=False,
                          default=1,
                          type=int)
    args = myparser.parse_args()

    # plot single sensitivity without bootstrap
    main_plot_coords, main_variable_coords = do_single_sensitivity(
        args.input_csv, args.bias, args.output_dir, args.covariate_dir,
        args.do_att, command)

    # if bootstrap values provided
    if args.bootstrap_dir is not None:
        # extract names of input_csv, covariate dir and output dir
        input_name = os.path.basename(os.path.normpath(args.input_csv))
        if args.covariate_dir is not None:
            covariate_dir_name = os.path.basename(
                os.path.normpath(args.covariate_dir))
        else:
            covariate_dir_name = None
        output_dir_name = os.path.basename(os.path.normpath(args.output_dir))

        # create paths of input subdirectories
        subdir_list = [
            os.path.join(args.bootstrap_dir, subdir)
            for subdir in os.listdir(args.bootstrap_dir)
        ]
        print('Calculating outputs for individual bootstrapped datasets')
        # Create pool for multiprocessing
        pool = Pool(args.multi)
        # Create progress bar
        pbar = tqdm(total=len(subdir_list))

        # Calculate bootstrap co-ordinates for all provided bootstrap values and combine them into a list of dataframes
        res = [
            pool.apply_async(do_bootstrap_sensitivity,
                             args=(n, subdir_list),
                             kwds={
                                 'input_name': input_name,
                                 'covariate_dir_name': covariate_dir_name,
                                 'output_dir_name': output_dir_name,
                                 'command': command,
                                 'args': args
                             },
                             callback=lambda _: pbar.update(1))
            for n in range(len(subdir_list))
        ]
        boot_plot_coords, boot_variable_coords = list(
            zip(*[pool.get() for pool in res]))
        pbar.close()
        boot_plot_coords = list(boot_plot_coords)
        boot_variable_coords = list(boot_variable_coords)

        # which variables to plot?
        if args.covariate_dir is None:
            plot_variables = 'none'
        else:
            if has_none(boot_variable_coords):
                plot_variables = 'main'
            else:
                plot_variables = 'both'
        print('Plotting combined bootstrap graph')

        p, plot_coords_out, variable_coords_out = plot_bootstrap_sensitivity_graph(
            main_plot_coords, main_variable_coords, boot_plot_coords,
            boot_variable_coords, plot_variables, args.bootstrap_cutoff,
            args.bias)

        # Save bootstrap output files
        p.save(os.path.join(args.output_dir, 'austen_plot_bootstrap.png'),
               dpi=500,
               verbose=False)

        with open(os.path.join(args.output_dir,
                               'austen_plot_coordinates_bootstrap.csv'),
                  'w+',
                  newline='\n') as file:
            file.write(f'#{command}\n')
            plot_coords_out.to_csv(file, index=False)
        if variable_coords_out is not None:
            with open(os.path.join(args.output_dir,
                                   'variable_importances_bootstrap.csv'),
                      'w+',
                      newline='\n') as file:
                file.write(f'#{command}\n')
                variable_coords_out.to_csv(file, index=False)

    return None
    pr.start()
    ## 等待pr结束:
    pr.join()
    print
    print "all data write and read done"
    print
    print "Process Pool example...\n"
    ## 注意:队列对象不能在父进程与子进程间通信,如果想要使用进程池中使用队列则要使用multiprocess的Manager类,如下:
    manager = multiprocessing.Manager()
    # 父进程创建Queue,并传给各个子进程:
    q = manager.Queue()
    lock = manager.Lock() ## 创建队列锁,保证同一时间,只有一个进程在对队列进行操作
    p = Pool(processes=5) ## 保证每次只能5个进程在同时运行
    p_list = []
    # 同时开20个进程
    for i in range(20):
        pw = p.apply_async(write,args=(i,q,lock))
        p_list.append(pw)
    for p in p_list:
        p.get()

    p = Pool()
    time.sleep(0.5)
    pr = p.apply_async(read,args=(q,))
    p.close() # 调用get()之前必须先调用close(),调用close()之后就不能继续添加新的Process了
    p.join()


    print
    print 'all data write and read done'
Пример #23
0
# for more info look at pyesgf module documentation
    esgfargs=constraints
    if 'mip' in constraints.keys():
        esgfargs['cmor_table']=esgfargs.pop('mip')
    if 'exp0' in locals():
        esgfargs['query']=exp0+"%"
    esgfargs['replica']=False
    esgf.search_node(**esgfargs)
    print("Found ",esgf.ds_count(),"simulations for constraints")
# loop returned DatasetResult objects
# using multiprocessing Pool to parallelise process_file
# using 8 here as it is the number ov VCPU on VDI
    if esgf.ds_count()>=1:
        results=esgf.get_ds()
        async_results = Pool(1).map_async(retrieve_ds, results)
        for ds_info in async_results.get():
            esgf_results.append(ds_info)

# append to results list of version dictionaries containing useful info 
# NB search should return only one latest, not replica version if any
        
# compare local to remote info
    print("Finished to retrieve remote data")
    if esgf_results==[]:
        if db_results!=[]:
            print("Found local version but none is currently available on ESGF nodes for constraints:\n",constraints)
        else: 
            print("Nothing currently available on ESGF nodes and no local version exists for constraints:\n",constraints)
    else:
        print(esgf.ds_count(),"instances were found on ESGF and ",outputs.count()," on the local database")
        if sys.version_info < ( 3, 0 ):
Пример #24
0
Файл: main.py Проект: voidmm/MNG
                                                            initial_word_transitions,reward)
        aggregated_history.append(history)
    return aggregated_history, word_frequencies


if __name__ == "__main__":
    start = time.time()

    p = Pool()
    size_of_game_batch = number_of_repeats / CPU_COUNT
    all_parallel_games = [int(size_of_game_batch) for i in range(CPU_COUNT)]
    output = [p.apply_async(games, args=(x, word_frequencies)) for x in all_parallel_games]
    p.close()
    p.join()

    history = [p.get()[0] for p in output]
    history = np.reshape(np.ravel(history, order='A'), (number_of_repeats, number_of_rounds))

    word_frequencies = np.sum([p.get()[1]['word frequencies'] for p in output], axis=0)
    word_associations = [p.get()[1]['word associations'] for p in output]
    word_game_counts = [p.get()[1]['word game counts'] for p in output]
    word_game_counts = np.reshape(word_game_counts, (number_of_repeats, len(initial_word_memory)))

    end = time.time()
    print('Required time, in seconds: '+ str(end - start))

    Plot = Plot()
    Plot.plot_sigmoid(history)
    Plot.plot_box_word_probabilities([i[0] for i in initial_word_memory], word_game_counts)
    Plot.plot_word_frequency([i[0] for i in initial_word_memory], word_frequencies)
Пример #25
0
def build_graph(
    students,
    num_partitions=3,
    friendship_weight=30,
    classmate_weight=3,
    schoolmate_weight=1,
    teacher_multiplier=100,
    node_weights_to_ubvec=None,
):
    G = nx.Graph()
    G.graph['edge_weight_attr'] = 'weight'
    G.graph['node_weight_attr'] = list(node_weights_to_ubvec.keys())

    # helper vars
    name_to_index = {}
    class_to_indices = {}
    school_to_indices = {}
    i = 0

    # add students as nodes
    for row in students:
        name = row['Name']

        if not name:
            continue

        origin_class = row['School'] + ' ' + row['Class']
        origin_school = row['School']
        gender = row.get('Gender')

        G.add_node(
            i,
            label=name + "\n" + origin_class,
            origin_school=origin_school,
            origin_class=origin_class,
            preferences=row['Preferences'],

            # styling
            shape='hexagon' if gender == 'm' else 'ellipse',
            penwidth=3,

            # node attributes
            total=1,  # used to balance total number of students
            gender=1 if gender == 'f' else 0,
            **{
                attribute: int(row[attribute.capitalize()])
                for attribute in node_weights_to_ubvec.keys()
                if attribute not in ('gender', 'total')
            },
        )

        # fill helper vars for preference parsing later on
        name_to_index[name] = i
        if origin_class not in class_to_indices:
            class_to_indices[origin_class] = set()
        class_to_indices[origin_class].add(i)

        if origin_school not in school_to_indices:
            school_to_indices[origin_school] = set()
        school_to_indices[origin_school].add(i)

        i += 1

    # add preferences as edges
    for student, node in G.nodes.items():
        items = node['preferences'].split(',')

        for item in items:
            item = item.strip()  # remove whitespace

            if not item:
                continue  # skip empty items

            # parse negation symbol
            multiplier = 1
            if item.startswith('!'):
                multiplier = -1
                item = item[1:]

            # parse teacher symbol
            if item.startswith('*'):
                multiplier = multiplier * teacher_multiplier
                item = item[1:]

            # parse filter
            (filter_attr, filter_value) = (None, None)
            if item.startswith('['):
                item_parts = item[1:].split(']')
                filter_parts = item_parts[0].split('=')
                assert len(filter_parts) == 2, 'Invalid filter: ' + item
                (filter_attr, filter_value) = filter_parts
                item = item_parts[1]

            schoolmates = school_to_indices.get(item, None)
            classmates = class_to_indices.get(item, None)
            friend = name_to_index.get(item, None)

            # set targets and base weight
            if schoolmates is not None:
                targets = schoolmates
                base_weight = schoolmate_weight
            elif classmates is not None:
                targets = classmates
                base_weight = classmate_weight
            elif friend is not None:
                targets = [friend]
                base_weight = friendship_weight
            else:
                raise ValueError('Could not parse preference: ' + item)

            # apply filter
            if filter_attr:
                targets = [
                    i for i, node in G.nodes.items()
                    if i in targets and node[filter_attr] == filter_value
                ]

            # actually add edges
            for target in targets:
                if student != target:
                    add_edge(G,
                             student,
                             target,
                             weight=base_weight * multiplier)

    # partition graph in async process because METIS will sometimes throw
    # a segmentation fault which we want to raise properly
    try:
        promise = Pool().apply_async(
            metis.part_graph,
            args=(),
            kwds={
                'graph':
                G,
                'nparts':
                num_partitions,
                'tpwgts': [
                    tuple(1 / num_partitions
                          for i in range(0, len(node_weights_to_ubvec)))
                    for i in range(0, num_partitions)
                ],
                'ubvec':
                list(node_weights_to_ubvec.values()),
                'objtype':
                'cut',
                # 'ctype': 'shem',
                'seed':
                111,
            })
        (total_volume, parts) = promise.get(timeout=3)
    except TimeoutError as e:
        raise ValueError(
            'METIS failed to partition graph. Try fewer edges or partitions.'
        ) from e

    # assign colors according to assigned partition
    colors = get_color_list(num_partitions)
    for i, p in enumerate(parts):
        G.node[i]['color'] = colors[p]

    # remove low-weight and negative edges to clean up the painted graph
    edges = [e for e in G.edges.data()]
    for edge_obj in edges:
        edge_from, edge_to, edge_data = edge_obj
        if edge_data['weight'] < 30:
            G.remove_edge(edge_from, edge_to)

    # convert to pydot
    P = nx.drawing.nx_pydot.to_pydot(G)

    # create subgraphs to cluster students in same class together
    for color in colors:
        subgraph = pydot.Cluster(color, label='test 1', nodesep=7, ranksep=4)
        for node in P.get_node_list():
            if node.get_attributes()['color'] == color:
                subgraph.add_node(node)
        num_students = len(subgraph.get_node_list())
        subgraph.set_label(str(num_students) + ' students')
        P.add_subgraph(subgraph)

    return total_volume, P
Пример #26
0
    fftCepstre = fft2(cepstre)
    fftCepstre = np.exp(fftCepstre)
    return fftCepstre


#################################
#################################
#################################
#################################

start = time.time()

resultH = Pool(6).map_async(repFreqConduitVocal, hann(x))
resultE = Pool(2).map_async(fftCourtTerm, hann(e))

H = resultH.get()
E = resultE.get()

print time.time() - start

#################################
#################################
#################################
#################################

V = np.ndarray(shape=(nbFenetre, N, 2), dtype=np.complex128)
for m in range(nbFenetre):
    V[m] = E[m] * H[m]

#################################
#################################
Пример #27
0
p = Pool(cpu_count())
print(f'Num Core: {cpu_count()}')
param_dict = {
    'task1': list(range(10, 30000000)),
    'task2': list(range(30000000, 60000000)),
    'task3': list(range(60000000, 90000000)),
    'task4': list(range(90000000, 120000000)),
    'task5': list(range(120000000, 150000000)),
    'task6': list(range(150000000, 180000000)),
    'task7': list(range(180000000, 210000000)),
    'task8': list(range(210000000, 240000000))
}
mg = Manager()
managed_locker = mg.Lock()
managed_dict = mg.dict()

results = []
for name, param in param_dict.items():
    results.append(
        p.apply_async(train_on_parameter,
                      args=(name, param, managed_dict, managed_locker)))

# results = [p.apply_async(train_on_parameter, args=(name, param, managed_dict, managed_locker)) for name, param in param_dict.items()]
results = [p.get() for p in results]
print(managed_dict)

end1 = datetime.datetime.now()
print(f'Duration: {end1 - start1}')

## REF: https://zhuanlan.zhihu.com/p/93305921
Пример #28
0
def run_cohort(cohort, created_cohorts, mutation_input_files,
               mutations_cohorts_dir, motif_name_index, f_score_index,
               motif_breaking_score_index, chromatin_cat_index,
               background_window, background_window_size, filter_on_qval,
               sig_category, sig_thresh, sim_sig_thresh, sim_output_extension,
               filter_cond, operation_on_unify, output_extension,
               distance_to_merge, merged_mut_sig_threshold,
               local_domain_window, chr_lengths_file, sig_elements_output_file,
               sig_tfs_file, sig_tfpos_file, tmp_dir, n_cores_fscore,
               p_value_on_score, active_driver_script_dir,
               active_driver_min_mut, n_cores):

    "get the cohort name to use for output file names"
    cohort_full_name = created_cohorts[cohort][0].split('_')[0]
    if '/' in created_cohorts[cohort][0]:
        cohort_full_name = '/'.join(
            created_cohorts[cohort][0].split('/')[0:-1]
        ) + "/" + created_cohorts[cohort][0].split('/')[-1].split('_')[0]

    print('Processing: ', cohort_full_name)
    '''Calculate std, nummotifs and mean of the scores per TF-motif in the simulation sets
       The first file in each created_cohorts[cohort] is the observed set, so skip it
    '''
    #dict_simulated_mean_sd_per_TF_motif_output_file = cohort_full_name + "_meansdrand{}sets.dict".format(len(mutation_input_files)-1)
    #print(dict_simulated_mean_sd_per_TF_motif_output_file)
    '''As background consider simulated mutations in provided bacground window size around mutation
    '''

    #     if background_window:
    #         #print(background_window_size)
    #         dict_type_mean_std_scores = Utilities.get_simulated_mean_sd_per_TF_motif_background_window(
    #             cohort_full_name = cohort_full_name,
    #             annotated_input_file = created_cohorts[cohort][0],
    #             simulated_annotated_input_files=created_cohorts[cohort][1:],
    #             mutations_cohorts_dir = mutations_cohorts_dir,
    #             cohort_mean_sd_per_tf_overall_output_dict_file= dict_simulated_mean_sd_per_TF_motif_output_file,
    #             chr_lengths_file = chr_lengths_file,
    #             background_window_size = background_window_size,
    #             motif_name_index = motif_name_index, f_score_index = f_score_index,
    #             motif_breaking_score_index = motif_breaking_score_index,
    #             chromatin_cat_index = chromatin_cat_index, tmp_dir = tmp_dir, n_cores_fscore=n_cores_fscore)
    #     else:
    #         '''As background consider whole genome
    #         '''
    #         dict_type_mean_std_scores = Utilities.get_simulated_mean_sd_per_TF_motif(
    #             simulated_annotated_input_files=created_cohorts[cohort][1:],
    #             cohort_mean_sd_per_tf_overall_output_dict_file= dict_simulated_mean_sd_per_TF_motif_output_file,
    #             motif_name_index = motif_name_index, f_score_index = f_score_index,
    #             motif_breaking_score_index = motif_breaking_score_index)
    #
    #     '''For each mutation in the observed set created_cohorts[cohort][0]
    #        calculate pval and qval by comparing its score to the std and mean
    #        scores in the corresponding TF motif.
    #        Filter out mutations that don't have a sig score or dont' pass other filters
    #     '''
    #     muts_sig_per_TF_file = Utilities.get_muts_sig_per_TF(
    #         annoted_input_file=created_cohorts[cohort][0],
    #         dict_type_mean_std_scores=dict_type_mean_std_scores,
    #         annoted_output_file_extension="_rand{}setsTF".format(len(mutation_input_files)-1),
    #         annoted_output_file_extension_onlysig="_rand{}setsTFsigQval{}".format(
    #             len(mutation_input_files)-1, sig_thresh),
    #         background_window = background_window,
    #         motif_name_index = motif_name_index, f_score_index = f_score_index,
    #         motif_breaking_score_index = motif_breaking_score_index,
    #         filter_on_qval=filter_on_qval, sig_cat=sig_category,
    #         sig_thresh=sig_thresh,
    #         filter_on_signal = True, dnase_index = 24, fantom_index = 25, num_other_tfs_index = 27)
    #     sig_muts_per_tf_mutation_input_files = [muts_sig_per_TF_file]
    #
    #     '''repeat the same process to keep only sig muts from the simulated, but use sim_sig_thresh
    #     if sim_sig_thresh >=1.0 no sig is performed and all muts are written to the output'''
    #     for mutations_input_file in created_cohorts[cohort][1:]:
    #         muts_sig_per_TF_file = Utilities.get_muts_sig_per_TF(
    #             annoted_input_file=mutations_input_file,
    #             dict_type_mean_std_scores=dict_type_mean_std_scores,
    #             annoted_output_file_extension="_rand{}setsTF".format(len(mutation_input_files)-1),
    #             annoted_output_file_extension_onlysig=sim_output_extension,
    #             background_window = background_window,
    #             motif_name_index = motif_name_index, f_score_index = f_score_index,
    #             motif_breaking_score_index = motif_breaking_score_index,
    #             filter_on_qval=filter_on_qval, sig_cat=sig_category,
    #             sig_thresh=sim_sig_thresh
    #             )
    #         sig_muts_per_tf_mutation_input_files.append(muts_sig_per_TF_file)
    #
    '''Based on the mutations that have a significant score (specify if qval should be used)
       Count number of mutations per TF motif and per TF motif position
       For each calcualate a pvalue and qvalue based on number of mutations
       in the correponding motif in the simulated sets  
    '''
    #sig_tfs_file, sig_tfpos_file = Utilities.get_tf_pval(
    #    cohort, sig_muts_per_tf_mutation_input_files, p_value_on_score, motif_name_index,
    #            f_score_index, motif_breaking_score_index,
    #            filter_cond, fsep='\t', sig_tfs_file=sig_tfs_file,
    #            sig_tfpos_file=sig_tfpos_file,
    #            filter_on_signal = True, dnase_index = 24, fantom_index = 25,
    #             num_other_tfs_index = 27)

    #
    '''replace an observed regulatory mutation file with an all annotated observed mutation file
    '''
    #muts_per_tf_mutation_input_files  = [created_cohorts[cohort][0]+"_rand{}setsTF".format(len(mutation_input_files)-1)]

    #muts_per_tf_mutation_input_files.extend(sig_muts_per_tf_mutation_input_files[1:])
    #print(muts_per_tf_mutation_input_files)
    #print(len(muts_per_tf_mutation_input_files))
    '''Combine nearby mutations accross the cohort into one element'''
    '''Unify the mutations that have significant scores accross the cohorts
       Make one record for mutations that overlap multiple motifs
    '''

    unified_muts_files_obj = []
    p = Pool(15)
    for mutations_input_file in created_cohorts[cohort]:
        unified_muts_file = p.apply_async(
            Utilities.unify_muts,
            args=(mutations_input_file, output_extension, True, filter_cond,
                  operation_on_unify))
        unified_muts_files_obj.append(unified_muts_file)
    p.close()
    p.join()
    unified_muts_files = [p.get() for p in unified_muts_files_obj]

    unified_mutation_input_files_obj = []
    p = Pool(15)
    for unified_muts_file in unified_muts_files:
        unified_muts_file_wihtmotifinfo = p.apply_async(
            Utilities.get_max_motif_in_grouped_muts,
            args=(unified_muts_file, ))
        unified_mutation_input_files_obj.append(
            unified_muts_file_wihtmotifinfo)

    p.close()
    p.join()
    unified_mutation_input_files = [
        p.get() for p in unified_mutation_input_files_obj
    ]

    #for unified_muts_file in unified_muts_files:
    #        if os.path.exists(unified_muts_file):
    #            os.remove(unified_muts_file)

    #     unified_mutation_input_files = []
    #     for mutations_input_file in created_cohorts[cohort]:
    #         unified_muts_file = mutations_input_file + output_extension + "_groupedbymut"
    #         unified_muts_file_wihtmotifinfo = unified_muts_file+"withmotifinfo"
    #         if not os.path.exists(unified_muts_file_wihtmotifinfo):
    #             print("Unifying: ", mutations_input_file)
    #             Utilities.unify_muts(mutations_input_file, unified_muts_file_ext,
    #                                  filter_mut_motifs=True, filter_cond=filter_cond,
    #                                  operation_on_unify=operation_on_unify)
    #             Utilities.get_max_motif_in_grouped_muts(
    #                 annotated_mutations_grouped_file=unified_muts_file,
    #                 annotated_mutations_grouped_output_file=unified_muts_file_wihtmotifinfo)
    #             os.remove(unified_muts_file)
    #         unified_mutation_input_files.append(unified_muts_file_wihtmotifinfo)
    #    print('Unified mutations input files: ', unified_mutation_input_files)
    '''   Evaluate the significance of each element based on: 
       - the element score (sum of the score of its mutations)
       - number of mutations in the element 
    '''
    get_sig_merged_elements(
        unified_mutation_input_files,
        cohort_full_name,
        output_extension,
        #sim_output_extension+output_extension,
        distance_to_merge,
        merged_mut_sig_threshold,
        local_domain_window,
        chr_lengths_file,
        sig_elements_output_file,
        sim_sig_thresh,
        p_value_on_score=p_value_on_score)

    #ActiveDriverWGS

    active_driver_results = sig_elements_output_file + '_ActiveDriver_results'
    active_driver_results_sig = active_driver_results + '_sig'
    active_driver_output_file = sig_elements_output_file + '_ActiveDriver'
    active_driver_output_file_sig = active_driver_output_file + '_sig'

    if not os.path.exists(active_driver_output_file_sig):
        print([
            'Rscript', active_driver_script_dir, sig_elements_output_file,
            created_cohorts[cohort][0], active_driver_min_mut,
            active_driver_output_file, active_driver_output_file_sig,
            active_driver_results, n_cores
        ])

        try:
            subprocess.call([
                'Rscript', active_driver_script_dir, sig_elements_output_file,
                created_cohorts[cohort][0],
                str(active_driver_min_mut), active_driver_output_file,
                active_driver_output_file_sig, active_driver_results,
                str(n_cores)
            ])
        except KeyError:
            open(active_driver_output_file_sig, 'a').close()

    sig_muts_file = created_cohorts[cohort][0] + "_sig"

    sig_muts_file = Utilities.get_sig_muts(
        elements_input_file=active_driver_output_file_sig,
        mutations_input_file=created_cohorts[cohort][0],
        sig_muts_file=sig_muts_file,
        motif_breaking_score_index=motif_breaking_score_index,
        tf_binding_index=30,
        dnase_index=24)

    sig_muts_per_tf_mutation_input_files = []
    sig_muts_per_tf_mutation_input_files = [sig_muts_file]
    for mutations_input_file in created_cohorts[cohort][1:]:
        sig_muts_per_tf_mutation_input_files.append(mutations_input_file)

    sig_tfs_file, sig_tfpos_file = Utilities.get_tf_pval(
        cohort,
        sig_muts_per_tf_mutation_input_files,
        p_value_on_score,
        motif_name_index,
        f_score_index,
        motif_breaking_score_index,
        filter_cond,
        fsep='\t',
        sig_tfs_file=sig_tfs_file,
        sig_tfpos_file=sig_tfpos_file,
        filter_on_signal=True,
        dnase_index=24,
        fantom_index=25,
        num_other_tfs_index=27)

    return sig_elements_output_file, active_driver_output_file_sig, sig_tfs_file, sig_tfpos_file
Пример #29
0
    url = details.get('url')
    lang = details.get('lang')
    #print url
    #doc_ids = ListDocIdsService().get_value(wid)
    doc_ids = map(lambda x: x.split('_')[1],
                  filter(lambda y: '_' in y,
                         #ListDocIdsService().get_value(wid)))[:100]
                         ListDocIdsService().get_value(wid)))
    #pprint(doc_ids); sys.exit(0)
    #for n in range(0, len(doc_ids), step):
    ##for n in range(0, 20, step):
    #    print 'n = %d' % n
    #    doc_ids_subset = doc_ids[n:n+step]
    r = Pool(processes=8).map_async(get_fields, chunks(doc_ids, step))
    r.wait()
    pprint(r.get())
    print '*'*80
    #for k in r.get():  # DEBUG
    #    print k
    fields = []
    m = map(lambda x: fields.extend(x), r.get())
    #pprint(fields)
    indexed = dict(fields)

pprint(indexed)  # DEBUG

#for doc_id in doc_ids_to_heads:
#    entity_response = doc_ids_to_entities.get(
#        doc_id, {'titles': [], 'redirects': {}})
#    doc_ids_combined[doc_id] = map(preprocess,
#                                   indexed.get(doc_id, []) +
Пример #30
0
    #print url
    #doc_ids = ListDocIdsService().get_value(wid)
    doc_ids = map(
        lambda x: x.split('_')[1],
        filter(
            lambda y: '_' in y,
            #ListDocIdsService().get_value(wid)))[:100]
            ListDocIdsService().get_value(wid)))
    #pprint(doc_ids); sys.exit(0)
    #for n in range(0, len(doc_ids), step):
    ##for n in range(0, 20, step):
    #    print 'n = %d' % n
    #    doc_ids_subset = doc_ids[n:n+step]
    r = Pool(processes=8).map_async(get_fields, chunks(doc_ids, step))
    r.wait()
    pprint(r.get())
    print '*' * 80
    #for k in r.get():  # DEBUG
    #    print k
    fields = []
    m = map(lambda x: fields.extend(x), r.get())
    #pprint(fields)
    indexed = dict(fields)

pprint(indexed)  # DEBUG

#for doc_id in doc_ids_to_heads:
#    entity_response = doc_ids_to_entities.get(
#        doc_id, {'titles': [], 'redirects': {}})
#    doc_ids_combined[doc_id] = map(preprocess,
#                                   indexed.get(doc_id, []) +
Пример #31
0
import numpy as np
from graspy.inference import SemiparametricTest
from graspy.embed import AdjacencySpectralEmbed, select_dimension
import warnings
# from graspy.simulations import sbm
from graspy.utils import symmetrize
import time
import sys
import getopt
import pickle
from multiprocessing import Pool, cpu_count


def junk(seed, ):
    np.random.seed(seed)
    msg = np.random.normal(0, 1)
    return msg
    # outputs = [p.get() for p in tests]
    # epsilon_outputs.append(outputs)


if __name__ == '__main__':
    p = Pool()
    n_sims = 100
    seeds = [np.random.randint(1, 100000000) for _ in range(n_sims)]
    print(seeds)
    jobs = [p.apply_async(junk, args=(seeds[i], )) for i in range(n_sims)]
    outputs = [p.get() for p in jobs]
    print(outputs)