def test_usearch_supported_version(self): """usearch is in path and version is supported """ acceptable_version = [(5, 2, 236), (5, 2, 236)] self.assertTrue( which('usearch'), "usearch not found. This may or may not be a problem depending on " + "which components of QIIME you plan to use.") command = "usearch --version" proc = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT) stdout = proc.stdout.read() version_string = stdout.split('v')[1] try: version = tuple(map(int, version_string.split('.'))) pass_test = version in acceptable_version except ValueError: pass_test = False version_string = stdout self.assertTrue( pass_test, "Unsupported usearch version. %s is required, but running %s." % ('.'.join(map(str, acceptable_version)), version_string))
def run_pair_alignment (seq, blast_db, num_threads, e_value_min, bitscore_cutoff, ids_to_recs): """Core alignment routine. 1) Takes a single sequence, acquires multiple BLASTp alignemnts to the swissprot enzyme database. 2) Canonical sequences of the results from (1) are retrieved from dictionary of ids to swissprot records derived from a swissprot fasta 3) Original query is globally aligned versus sequences from (2) """ #First pass cutoff with BLAST alignments if verbose: print "[DETECT]: Running BLASTp for {} ...".format(seq.name()) p = subprocess.Popen(("blastp", "-query", "-", "-out", "-", "-db", blast_db, "-outfmt", "6 sseqid bitscore", "-max_target_seqs", "100000", "-num_threads",str(num_threads), "-evalue", str(e_value_min)), stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout,stderr = p.communicate(seq.data) with open("blast_hits","w") as blast_hits: blast_hit_list = list() for line in stdout.split("\n"): if not line in whitespace: swissprot_id,bitscore = line.split("\t") if float(bitscore) > bitscore_cutoff: blast_hit_list.append(swissprot_id) blast_hit_ids = "\n".join(frozenset(blast_hit_list)) if verbose: print "[DETECT]: Found {} hits for {} ...".format(len(blast_hit_ids),seq.name()) #stop if nothing found if len(blast_hit_ids) == 0: return list() SeqIO.write((ids_to_recs[hid] for hid in blast_hit_ids.split("\n")), blast_hits, "fasta") if verbose: print "[DETECT]: Running Needleman-Wunch alignments for {} ...".format(seq.name()) #Run Needleman-Wunsch alignment on the results of the BLAST search p = subprocess.Popen(("needle", "-filter", "-bsequence", "blast_hits", "-gapopen", "10", "-gapextend", "0.5", "-sprotein", "Y", "-aformat_outfile", "score"), stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout,stderr = p.communicate(seq.fasta()) return parse_needle_results(stdout)
def remote_list_apps(env_id: str, login_info: str, remote_home: str): commands = [ f'[ ! -d "{remote_home}/.COMPSsApps/{env_id}" ] && echo "NO_APPS"', f'ls ~/.COMPSsApps/{env_id}/', ] stdout = utils.ssh_run_commands(login_info, commands).strip() apps = stdout.split('\n') if 'NO_APPS' in stdout or (len(apps) == 1 and apps[0] == ''): return [] return apps
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--engine', action='append', required=True) parser.add_argument('-n', '--game-pairs', type=int, default=10) parser.add_argument('-r', '--repeat-pairs', type=int, default=1) parser.add_argument('-w', '--results-file', type=argparse.FileType('wb')) args = parser.parse_args() if len(args.engine) == 1: print('Warning: no games to play with only one engine') engines = [] for engine_cmd in args.engine: engines.append(Engine(engine_cmd)) initial_states = [] stdout, _ = engines[0].run(f'-g {args.game_pairs}') for initial_state in stdout.split(): initial_states.append(initial_state) matchups = list(combinations(engines, 2)) matchups = list(product(matchups, initial_states * args.repeat_pairs)) total_games = 2 * len(matchups) played_games = 0 shuffle(matchups) start_time = time.perf_counter() for ((engine1, engine2), initial_state) in matchups: for p1, p2 in [(engine1, engine2), (engine2, engine1)]: play_game(p1, p2, initial_state) played_games += 1 percentage = 100 * played_games / total_games mean_turns = mean([result.turns for result in results]) mean_time = mean([result.time for result in results]) remaining_games = total_games - played_games elapsed = timedelta(seconds=int(time.perf_counter() - start_time)) etr = timedelta(seconds=int(mean_time * remaining_games)) print() print_results() print( f'{played_games}/{total_games}\t{percentage:.0f}%\tavg. game {mean_turns:.0f}t/{mean_time:.1f}s\t{elapsed} elapsed\t\tapprox. {etr} remaining' ) if args.results_file: args.results_file.seek(0) pickle.dump(results, args.results_file)
def curl(url, outfile=None): """Call curl""" if not outfile: args = ['curl', '-sl', url,] else: args = ['curl', '-#', '--output', outfile, url] p = subprocess.Popen(args, stdout=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode < 0: raise ValueError("curl error: {}".format(stderr)) if not outfile: return stdout.split()
def test_usearch_supported_version(self): """usearch is in path and version is supported """ acceptable_version = [(5, 2, 236), (5, 2, 236)] self.assertTrue(which('usearch'), "usearch not found. This may or may not be a problem depending on " + "which components of QIIME you plan to use.") command = "usearch --version" proc = Popen(command, shell=True, universal_newlines=True, stdout=PIPE, stderr=STDOUT) stdout = proc.stdout.read() version_string = stdout.split('v')[1] try: version = tuple(map(int, version_string.split('.'))) pass_test = version in acceptable_version except ValueError: pass_test = False version_string = stdout self.assertTrue(pass_test, "Unsupported usearch version. %s is required, but running %s." % ('.'.join(map(str, acceptable_version)), version_string))
def curl(url, outfile=None): """Call curl""" if not outfile: args = [ 'curl', '-sl', url, ] else: args = ['curl', '-#', '--output', outfile, url] p = subprocess.Popen(args, stdout=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode < 0: raise ValueError("curl error: {}".format(stderr)) if not outfile: return stdout.split()
def run_pair_alignment (seq, blast_db, num_threads, e_value_min, bitscore_cutoff): """Core alignment routine. 1) Takes a single sequence, acquires multiple BLASTp alignemnts to the swissprot enzyme database. 2) Canonical sequences of the resutls from (1) are retrieved with blastdbcmd 3) Original query is globally aligned versus sequences from (2) """ #First pass cutoff with BLAST alignments if verbose: print "[DETECT]: Running BLASTp for {} ...".format(seq.name()) p = subprocess.Popen(("blastp", "-query", "-", "-out", "-", "-db", blast_db, "-outfmt", "6 sseqid bitscore", "-max_target_seqs", "100000", "-num_threads",str(num_threads), "-evalue", str(e_value_min)), stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout,stderr = p.communicate(seq.data) with open("blast_hits_"+seq.name,"w") as blast_hits: blast_hit_list = list() for line in stdout.split("\n"): if not line in whitespace: swissprot_id,bitscore = line.split("\t") #sprot identifiers are sp|<ID>|<extra> seq_id = swissprot_id.split("|")[1] if float(bitscore) > bitscore_cutoff: blast_hit_list.append(seq_id) blast_hit_ids = "\n".join(blast_hit_list) if verbose: print "[DETECT]: Found {} hits for {} ...".format(len(blast_hit_ids),seq.name()) #stop if nothing found if len(blast_hit_ids) == 0: return list() p = subprocess.Popen(("blastdbcmd", "-db", blast_db, "-entry_batch", "-"), stdout=subprocess.PIPE, stdin=subprocess.PIPE) stdout,stderr = p.communicate(blast_hit_ids) blast_hits.write(stdout) if verbose: print "[DETECT]: Running Needleman-Wunch alignments for {} ...".format(seq.name()) #Run Needleman-Wunch alignment on the results of the BLAST search p = subprocess.Popen(("needle", "-filter", "-bsequence", "blast_hits", "-gapopen", "10", "-gapextend", "0.5", "-aformat_outfile", "score"), stdin=subprocess.PIPE, stdout=subprocess.PIPE) stdout,stderr = p.communicate(seq.fasta()) os.remove("blast_hits_"+seq.name) return parse_needle_results(stdout)
# Wait for the beeline shell # to gracefully exit p.wait() # Close the tunnel ssh.terminate() ssh.wait() devNull.close() theLog.write("OUT:" + stdout + "\n") theLog.flush() theLog.write("ERR:" + stderr + "\n") theLog.flush() result = stdout.split("\n")[-3].split()[1] # Archive it as well -- TODO relocate this functionality to # earlier in the ETL process, entire ZIP archives will be created # rather than per-set TXT archives # On further testing, the "content_md5" is only for header rather # than the actual blob content - have to wait for these APIs to mature #try: # azureStorage.put_block_blob_from_path(archiveContainer, # targetArchiveFullPath, # fullFilePath, # #content_md5=md5Checksum.encode('base64').strip(), # max_connections=5) #except AzureHttpError as e: # if result is not None:
def run_pair_alignment(seq, blast_db, num_threads, e_value_min, bitscore_cutoff, uniprot_df, blastp, needle, dump_dir): """Core alignment routine. 1) Takes a single sequence, acquires multiple BLASTp alignemnts to the swissprot enzyme database. 2) Canonical sequences of the results from (1) are retrieved from dictionary of ids to swissprot records derived from a swissprot fasta 3) Original query is globally aligned versus sequences from (2) """ #First pass cutoff with BLAST alignments if verbose: print("[DETECT]: Running BLASTp for {} ...".format(seq.name())) invalid_chars = ["?", "<", ">", "\\", ":", "*", "|"] valid_seq_name = seq.name() for char in invalid_chars: valid_seq_name = valid_seq_name.replace(char, "_") try: p = subprocess.Popen( (blastp, "-query", "-", "-out", "-", "-db", blast_db, "-outfmt", "6 sseqid bitscore", "-max_target_seqs", "100000", "-num_threads", str(num_threads), "-evalue", str(e_value_min)), stdin=subprocess.PIPE, stdout=subprocess.PIPE, encoding='utf8') stdout, stderr = p.communicate(seq.data) except Exception as e: print(dt.today(), "BLASTp FAILED", e) sys.exit() blast_hits_path = dump_dir + "blast_hits_" + valid_seq_name #print("creating:", blast_hits_path) #blast_hits = open(blast_hits_path,"w") blast_hit_list = list() for line in stdout.split("\n"): if not line in whitespace: swissprot_id, bitscore = line.split("\t") if float(bitscore) > bitscore_cutoff: blast_hit_list.append(">" + str(swissprot_id)) #don't continue if there's nothing from BLAST if (len(blast_hit_list) == 0): #print(dt.today(), "No BLAST hits found. ending process") return list() else: blast_df = uniprot_df[uniprot_df.index.isin(blast_hit_list)] #print("BLAST LIST:", blast_hit_list) #print("blast_df:", blast_df) if (blast_df.empty): print(blast_hits_path, "BLAST_DF is empty... ERROR") return list() blast_df.to_csv(blast_hits_path, mode="w+", sep='\n', header=False, quoting=3) #Run Needleman-Wunsch alignment on the results of the BLAST search gapextend_value = "0.5" if (needle.endswith("stretcher")): gapextend_value = "2" try: p = subprocess.Popen( ( needle, "-filter", "-bsequence", blast_hits_path, #"blast_hits_" + valid_seq_name, "-gapopen", "10", "-gapextend", gapextend_value, "-sprotein", "Y", "-aformat_outfile", "score"), stdin=subprocess.PIPE, stdout=subprocess.PIPE, encoding='utf8') stdout, stderr = p.communicate(seq.fasta()) except Exception as e: print(dt.today(), "NEEDLE FAILED:", e) #print("deleting:", blast_hits_path) os.remove(blast_hits_path) sys.exit() #print("deleting:", blast_hits_path) os.remove(blast_hits_path) return parse_needle_results(stdout)