Пример #1
0
 def test_usearch_supported_version(self):
     """usearch is in path and version is supported """
     acceptable_version = [(5, 2, 236), (5, 2, 236)]
     self.assertTrue(
         which('usearch'),
         "usearch not found. This may or may not be a problem depending on "
         + "which components of QIIME you plan to use.")
     command = "usearch --version"
     proc = Popen(command,
                  shell=True,
                  universal_newlines=True,
                  stdout=PIPE,
                  stderr=STDOUT)
     stdout = proc.stdout.read()
     version_string = stdout.split('v')[1]
     try:
         version = tuple(map(int, version_string.split('.')))
         pass_test = version in acceptable_version
     except ValueError:
         pass_test = False
         version_string = stdout
     self.assertTrue(
         pass_test,
         "Unsupported usearch version. %s is required, but running %s." %
         ('.'.join(map(str, acceptable_version)), version_string))
Пример #2
0
def run_pair_alignment (seq, blast_db, num_threads, e_value_min, bitscore_cutoff, ids_to_recs):
	"""Core alignment routine.
	1) Takes a single sequence, acquires multiple BLASTp alignemnts to the swissprot enzyme database.
	2) Canonical sequences of the results from (1) are retrieved from dictionary of ids to swissprot records derived
	from a swissprot fasta
	3) Original query is globally aligned versus sequences from (2)
	"""
	
	#First pass cutoff with BLAST alignments
	if verbose: print "[DETECT]: Running BLASTp for {} ...".format(seq.name())
	p = subprocess.Popen(("blastp", "-query", "-", 
					"-out", "-",
					"-db", blast_db,
					"-outfmt", "6 sseqid bitscore",
					"-max_target_seqs", "100000",
					"-num_threads",str(num_threads),
					"-evalue", str(e_value_min)),
				stdin=subprocess.PIPE,	
				stdout=subprocess.PIPE)
	stdout,stderr = p.communicate(seq.data)
	
	with open("blast_hits","w") as blast_hits:
		blast_hit_list = list()	
		for line in stdout.split("\n"):
			if not line in whitespace:
				swissprot_id,bitscore = line.split("\t")
				if float(bitscore) > bitscore_cutoff:
					blast_hit_list.append(swissprot_id)
		blast_hit_ids = "\n".join(frozenset(blast_hit_list))
			
		if verbose: print "[DETECT]: Found {} hits for {} ...".format(len(blast_hit_ids),seq.name())
		
		#stop if nothing found
		if len(blast_hit_ids) == 0:
			return list()

		SeqIO.write((ids_to_recs[hid] for hid in blast_hit_ids.split("\n")), blast_hits, "fasta")

	if verbose: print "[DETECT]: Running Needleman-Wunch alignments for {} ...".format(seq.name())

	#Run Needleman-Wunsch alignment on the results of the BLAST search
	p = subprocess.Popen(("needle", "-filter",
					"-bsequence", "blast_hits",
					"-gapopen", "10",
					"-gapextend", "0.5",
					"-sprotein", "Y",
					"-aformat_outfile", "score"),
				stdin=subprocess.PIPE,
				stdout=subprocess.PIPE)
		
	stdout,stderr = p.communicate(seq.fasta())

	return parse_needle_results(stdout)
Пример #3
0
def remote_list_apps(env_id: str, login_info: str, remote_home: str):
    commands = [
        f'[ ! -d "{remote_home}/.COMPSsApps/{env_id}" ] && echo "NO_APPS"',
        f'ls ~/.COMPSsApps/{env_id}/',
    ]

    stdout = utils.ssh_run_commands(login_info, commands).strip()
    apps = stdout.split('\n')
    if 'NO_APPS' in stdout or (len(apps) == 1 and apps[0] == ''):
        return []

    return apps
Пример #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--engine', action='append', required=True)
    parser.add_argument('-n', '--game-pairs', type=int, default=10)
    parser.add_argument('-r', '--repeat-pairs', type=int, default=1)
    parser.add_argument('-w', '--results-file', type=argparse.FileType('wb'))
    args = parser.parse_args()

    if len(args.engine) == 1:
        print('Warning: no games to play with only one engine')

    engines = []
    for engine_cmd in args.engine:
        engines.append(Engine(engine_cmd))

    initial_states = []

    stdout, _ = engines[0].run(f'-g {args.game_pairs}')
    for initial_state in stdout.split():
        initial_states.append(initial_state)

    matchups = list(combinations(engines, 2))
    matchups = list(product(matchups, initial_states * args.repeat_pairs))
    total_games = 2 * len(matchups)
    played_games = 0

    shuffle(matchups)
    start_time = time.perf_counter()
    for ((engine1, engine2), initial_state) in matchups:
        for p1, p2 in [(engine1, engine2), (engine2, engine1)]:
            play_game(p1, p2, initial_state)
            played_games += 1

            percentage = 100 * played_games / total_games
            mean_turns = mean([result.turns for result in results])
            mean_time = mean([result.time for result in results])
            remaining_games = total_games - played_games
            elapsed = timedelta(seconds=int(time.perf_counter() - start_time))
            etr = timedelta(seconds=int(mean_time * remaining_games))

            print()
            print_results()
            print(
                f'{played_games}/{total_games}\t{percentage:.0f}%\tavg. game {mean_turns:.0f}t/{mean_time:.1f}s\t{elapsed} elapsed\t\tapprox. {etr} remaining'
            )

            if args.results_file:
                args.results_file.seek(0)
                pickle.dump(results, args.results_file)
Пример #5
0
def curl(url, outfile=None):
	"""Call curl"""

	if not outfile:
		args = ['curl', '-sl', url,]
	else:
		args = ['curl', '-#', '--output', outfile, url]

	p = subprocess.Popen(args, stdout=subprocess.PIPE)
	stdout, stderr = p.communicate()

	if p.returncode < 0:
		raise ValueError("curl error: {}".format(stderr))

	if not outfile:
		return stdout.split()
Пример #6
0
 def test_usearch_supported_version(self):
     """usearch is in path and version is supported """
     acceptable_version = [(5, 2, 236), (5, 2, 236)]
     self.assertTrue(which('usearch'),
                     "usearch not found. This may or may not be a problem depending on " +
                     "which components of QIIME you plan to use.")
     command = "usearch --version"
     proc = Popen(command, shell=True, universal_newlines=True,
                  stdout=PIPE, stderr=STDOUT)
     stdout = proc.stdout.read()
     version_string = stdout.split('v')[1]
     try:
         version = tuple(map(int, version_string.split('.')))
         pass_test = version in acceptable_version
     except ValueError:
         pass_test = False
         version_string = stdout
     self.assertTrue(pass_test,
                     "Unsupported usearch version. %s is required, but running %s."
                     % ('.'.join(map(str, acceptable_version)), version_string))
Пример #7
0
def curl(url, outfile=None):
    """Call curl"""

    if not outfile:
        args = [
            'curl',
            '-sl',
            url,
        ]
    else:
        args = ['curl', '-#', '--output', outfile, url]

    p = subprocess.Popen(args, stdout=subprocess.PIPE)
    stdout, stderr = p.communicate()

    if p.returncode < 0:
        raise ValueError("curl error: {}".format(stderr))

    if not outfile:
        return stdout.split()
Пример #8
0
def run_pair_alignment (seq, blast_db, num_threads, e_value_min, bitscore_cutoff):
	"""Core alignment routine.
	1) Takes a single sequence, acquires multiple BLASTp alignemnts to the swissprot enzyme database.
	2) Canonical sequences of the resutls from (1) are retrieved with blastdbcmd
	3) Original query is globally aligned versus sequences from (2)

	"""
	
	#First pass cutoff with BLAST alignments
	if verbose: print "[DETECT]: Running BLASTp for {} ...".format(seq.name())
	p = subprocess.Popen(("blastp", "-query", "-", 
					"-out", "-",
					"-db", blast_db,
					"-outfmt", "6 sseqid bitscore",
					"-max_target_seqs", "100000",
					"-num_threads",str(num_threads),
					"-evalue", str(e_value_min)),
				stdin=subprocess.PIPE,	
				stdout=subprocess.PIPE)
	stdout,stderr = p.communicate(seq.data)
	
	with open("blast_hits_"+seq.name,"w") as blast_hits:
		
		blast_hit_list = list()	
		for line in stdout.split("\n"):
			if not line in whitespace:
				swissprot_id,bitscore = line.split("\t")
				#sprot identifiers are sp|<ID>|<extra>
				seq_id = swissprot_id.split("|")[1]
				if float(bitscore) > bitscore_cutoff:
					blast_hit_list.append(seq_id)
		blast_hit_ids = "\n".join(blast_hit_list)
			
		if verbose: print "[DETECT]: Found {} hits for {} ...".format(len(blast_hit_ids),seq.name())
		
		#stop if nothing found
		if len(blast_hit_ids) == 0:
			return list()
		
		p = subprocess.Popen(("blastdbcmd", "-db", blast_db,
							"-entry_batch", "-"),
						stdout=subprocess.PIPE,
						stdin=subprocess.PIPE)
		
		stdout,stderr = p.communicate(blast_hit_ids)
		blast_hits.write(stdout)

	if verbose: print "[DETECT]: Running Needleman-Wunch alignments for {} ...".format(seq.name())

	#Run Needleman-Wunch alignment on the results of the BLAST search
	p = subprocess.Popen(("needle", "-filter",
					"-bsequence", "blast_hits",
					"-gapopen", "10",
					"-gapextend", "0.5",
					"-aformat_outfile", "score"),
				stdin=subprocess.PIPE,
				stdout=subprocess.PIPE)
		
	stdout,stderr = p.communicate(seq.fasta())
	os.remove("blast_hits_"+seq.name)
	return parse_needle_results(stdout)
Пример #9
0
    # Wait for the beeline shell
    # to gracefully exit
    p.wait()

    # Close the tunnel
    ssh.terminate()
    ssh.wait()

    devNull.close()

    theLog.write("OUT:" + stdout + "\n")
    theLog.flush()
    theLog.write("ERR:" + stderr + "\n")
    theLog.flush()

    result = stdout.split("\n")[-3].split()[1]

    # Archive it as well -- TODO relocate this functionality to
    # earlier in the ETL process, entire ZIP archives will be created
    # rather than per-set TXT archives

    # On further testing, the "content_md5" is only for header rather
    # than the actual blob content - have to wait for these APIs to mature
    #try:
    #    azureStorage.put_block_blob_from_path(archiveContainer,
    #                                          targetArchiveFullPath,
    #                                          fullFilePath,
    #                                          #content_md5=md5Checksum.encode('base64').strip(),
    #                                          max_connections=5)
    #except AzureHttpError as e:
    #    if result is not None:
Пример #10
0
def run_pair_alignment(seq, blast_db, num_threads, e_value_min,
                       bitscore_cutoff, uniprot_df, blastp, needle, dump_dir):
    """Core alignment routine.
    1) Takes a single sequence, acquires multiple BLASTp alignemnts to the swissprot enzyme database.
    2) Canonical sequences of the results from (1) are retrieved from dictionary of ids to swissprot records derived
    from a swissprot fasta
    3) Original query is globally aligned versus sequences from (2)
    """

    #First pass cutoff with BLAST alignments
    if verbose: print("[DETECT]: Running BLASTp for {} ...".format(seq.name()))

    invalid_chars = ["?", "<", ">", "\\", ":", "*", "|"]
    valid_seq_name = seq.name()
    for char in invalid_chars:
        valid_seq_name = valid_seq_name.replace(char, "_")
    try:
        p = subprocess.Popen(
            (blastp, "-query", "-", "-out", "-", "-db", blast_db, "-outfmt",
             "6 sseqid bitscore", "-max_target_seqs", "100000", "-num_threads",
             str(num_threads), "-evalue", str(e_value_min)),
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            encoding='utf8')
        stdout, stderr = p.communicate(seq.data)
    except Exception as e:
        print(dt.today(), "BLASTp FAILED", e)
        sys.exit()

    blast_hits_path = dump_dir + "blast_hits_" + valid_seq_name
    #print("creating:", blast_hits_path)
    #blast_hits = open(blast_hits_path,"w")
    blast_hit_list = list()
    for line in stdout.split("\n"):
        if not line in whitespace:
            swissprot_id, bitscore = line.split("\t")
            if float(bitscore) > bitscore_cutoff:
                blast_hit_list.append(">" + str(swissprot_id))

    #don't continue if there's nothing from BLAST
    if (len(blast_hit_list) == 0):
        #print(dt.today(), "No BLAST hits found. ending process")
        return list()
    else:
        blast_df = uniprot_df[uniprot_df.index.isin(blast_hit_list)]
        #print("BLAST LIST:", blast_hit_list)
        #print("blast_df:", blast_df)
        if (blast_df.empty):
            print(blast_hits_path, "BLAST_DF is empty... ERROR")
            return list()
        blast_df.to_csv(blast_hits_path,
                        mode="w+",
                        sep='\n',
                        header=False,
                        quoting=3)

    #Run Needleman-Wunsch alignment on the results of the BLAST search

    gapextend_value = "0.5"
    if (needle.endswith("stretcher")):
        gapextend_value = "2"

    try:
        p = subprocess.Popen(
            (
                needle,
                "-filter",
                "-bsequence",
                blast_hits_path,  #"blast_hits_" + valid_seq_name,
                "-gapopen",
                "10",
                "-gapextend",
                gapextend_value,
                "-sprotein",
                "Y",
                "-aformat_outfile",
                "score"),
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            encoding='utf8')
        stdout, stderr = p.communicate(seq.fasta())
    except Exception as e:
        print(dt.today(), "NEEDLE FAILED:", e)
        #print("deleting:", blast_hits_path)
        os.remove(blast_hits_path)
        sys.exit()
    #print("deleting:", blast_hits_path)
    os.remove(blast_hits_path)
    return parse_needle_results(stdout)