def simulate(probabilities, n_mutations=1, n_sequences=50000): ''' Simulates mutation of antibody sequences, given a list of mutations and their probabilities. Inputs ------ probabilities: a dictionary containing mutations as keys and probabilities as values. n_mutations: Number of mutations in each sequence. If n_mutations is greater than 1, mutations are selected from the pool of supplied mutations without replacement. Default is 1. n_sequences: Number of mutated sequences to generate. Default is 50,000. Returns ------- Mean number of VRC01-like mutations (float) and 95% confidence interval (tuple of floats). ''' vrc01_muts = get_vrc01_class_mutations() sim_muts = [] muts, probs = list(zip(*list(probabilities.items()))) start = datetime.now() for i in range(n_sequences): m = np.random.choice(muts, size=n_mutations, replace=False, p=probs) sim_muts.append(m) if (i + 1) % 100 == 0: progress_bar(i + 1, n_sequences, start) mut_counts = [len([x for x in sublist if x in vrc01_muts]) for sublist in sim_muts] # calculate mean and 95% confidence interval n, min_max, mean, var, skew, kurt = stats.describe(mut_counts) std = np.sqrt(var) R = stats.norm.interval(0.95, loc=mean, scale=std / np.sqrt(len(mut_counts))) return mean, R
def monitor_update(results): finished = 0 jobs = len(results) while finished < jobs: time.sleep(1) finished = len([r for r in results if r.ready()]) progbar.progress_bar(finished, jobs) progbar.progress_bar(finished, jobs)
def monitor_mp_jobs(results): finished = 0 jobs = len(results) while finished < jobs: time.sleep(1) ready = [ar for ar in results if ar.ready()] finished = len(ready) progbar.progress_bar(finished, jobs) print('')
def multiprocess_mongoimport(jsons, db, coll, args): progbar.progress_bar(0, len(jsons)) async_results = [] p = mp.Pool() for j in jsons: async_results.append(p.apply_async(do_mongoimport, args=(j, args.ip, args.port, db, coll, args.user, args.password))) monitor_results(async_results) remove_temp_files(args) print('')
def monitor_celery_jobs(results): finished = 0 jobs = len(results) while finished < jobs: time.sleep(1) succeeded = [ar for ar in results if ar.successful()] failed = [ar for ar in results if ar.failed()] finished = len(succeeded) + len(failed) progbar.progress_bar(finished, jobs) print('')
def multiprocess_mongoimport(jsons, db, coll, args): progbar.progress_bar(0, len(jsons)) async_results = [] p = mp.Pool() for j in jsons: async_results.append( p.apply_async(do_mongoimport, args=(j, args.ip, args.port, db, coll, args.user, args.password)) ) monitor_results(async_results) remove_temp_files(args) print ("")
def update_db(db, standard, scores, collection, args): db = mongodb.get_db(args.db, args.ip, args.port, args.user, args.password) print_index_info() mongodb.index(db, collection, ['seq_id']) print_update_info() start = time.time() conn = mongodb.get_connection(args.ip, args.port, args.user, args.password) mongo_version = conn.server_info()['version'] standard = standard.replace('.', '_') g = scores.groupby('identity') groups = regroup(g.groups) for g in range(0, len(groups), args.update_threads): tlist = [] for group in groups[g:g + args.update_threads]: t = Thread(target=update, args=(db, collection, group, standard, mongo_version, args)) t.start() tlist.append(t) for t in tlist: t.join() progbar.progress_bar(g + args.update_threads, len(groups)) # if platform.system().lower() == 'darwin' or args.debug or args.single_process_update: # for i, group in enumerate(groups): # update(db, collection, group, standard, mongo_version, args) # progbar.progress_bar(i, len(groups)) # else: # p = mp.Pool(processes=25) # async_results = [] # for group in groups: # async_results.append(p.apply_async(update, args=(db, collection, group, standard, mongo_version, args))) # monitor_update(async_results) # p.close() # p.join() print('') run_time = time.time() - start logger.info('Updating took {} seconds. ({} sequences per second)'.format(round(run_time, 2), round(len(scores) / run_time, 1)))