def test_bootstrap_ui(capsys, alb_resources): test_in_args = deepcopy(in_args) test_in_args.bootstrap = [False] Alb.command_line_ui(test_in_args, alb_resources.get_one("m p s"), skip_exit=True) out, err = capsys.readouterr() tester = Alb.AlignBuddy(out) assert tester.lengths() == [481, 683] test_in_args.bootstrap = [3] Alb.command_line_ui(test_in_args, alb_resources.get_one("m p s"), skip_exit=True) out, err = capsys.readouterr() tester = Alb.AlignBuddy(out) assert tester.lengths() == [481, 481, 481, 683, 683, 683]
def test_phylip_sequential_read(alb_odd_resources, hf, capsys): records = br.phylip_sequential_read(open("{0}Mnemiopsis_cds.physr".format(RESOURCE_PATH), "r", encoding="utf-8").read()) buddy = Alb.AlignBuddy(records, out_format="phylipsr") assert hf.buddy2hash(buddy) == "c5fb6a5ce437afa1a4004e4f8780ad68" records = br.phylip_sequential_read(open("{0}Mnemiopsis_cds.physs".format(RESOURCE_PATH), "r", encoding="utf-8").read(), relaxed=False) buddy = Alb.AlignBuddy(records, out_format="phylipss") assert hf.buddy2hash(buddy) == "4c0c1c0c63298786e6fb3db1385af4d5" with open(alb_odd_resources['dna']['single']['phylipss_cols'], "r", encoding="utf-8") as ifile: records = ifile.read() with pytest.raises(br.PhylipError) as err: br.phylip_sequential_read(records) assert "Malformed Phylip --> Less sequence found than expected" in str(err) with open(alb_odd_resources['dna']['single']['phylipss_recs'], "r", encoding="utf-8") as ifile: records = ifile.read() with pytest.raises(br.PhylipError) as err: br.phylip_sequential_read(records) assert "Malformed Phylip --> 9 sequences expected, 4 found." in str(err) capsys.readouterr() records = """ 3 15 Mle-Panxα4 M--VIE---------A Mle-Panxα8 M--VLE---------A Mle-Panxα6 M--LLE----------A """ with pytest.raises(br.PhylipError) as err: br.phylip_sequential_read(records) assert "Malformed Phylip --> Sequence Mle-Panxα4 has 16 columns, 15 expected." in str(err) records = """ 3 15 Mle-Panxα4 M--VIE--------A Mle-Panxα8 M--VLE--------A Mle-Panxα8 M--LLE--------A """ with pytest.raises(br.PhylipError) as err: br.phylip_sequential_read(records) assert "Malformed Phylip --> Repeat ID Mle-Panxα8." in str(err) records = """ 3 15 Mle-Panxα4M--VIE--------A Mle-Panxα8M--VLE--------A Mle-Panxα8M--LLE--------A """ with pytest.raises(br.PhylipError) as err: br.phylip_sequential_read(records, relaxed=False) assert "Malformed Phylip --> Repeat id 'Mle-Panxα8' after strict truncation. " in str(err)
def test_trimal2(alb_resources, hf): tester = Alb.trimal(alb_resources.get_one("o p n"), 'all') assert hf.buddy2hash(tester) == "8faaf09741ddb3137653cb77ee66974a" tester = alb_resources.get_one("o p n") tester.alignments[0]._records = tester.alignments[0]._records[:5] Alb.trimal(tester, 'clean') assert hf.buddy2hash(tester) == "93a2aa21e6baf5ca70eb2de52ae8dbea" tester = alb_resources.get_one("o p n") tester_dir = TEMPDIR.subdir() tester.write("%s%strimal" % (tester_dir, os.path.sep)) assert hf.buddy2hash(Alb.trimal( tester, 'gappyout')) == "2877ecfb201fc35211a4625f34c7afdd" """ Probably not a good idea to be calling binaries like this... real_trimal = Popen("trimal -in %s%strimal -gappyout" % (tester_dir, os.path.sep), stdout=PIPE, shell=True).communicate() real_trimal = real_trimal[0].decode() with open("%s%strimal" % (tester_dir, os.path.sep), "w") as ofile: ofile.write(real_trimal) tester = Alb.AlignBuddy("%s%strimal" % (tester_dir, os.path.sep)) assert hf.buddy2hash(tester) == "2877ecfb201fc35211a4625f34c7afdd" """ records = [ SeqRecord(Seq("A--G-")), SeqRecord(Seq("--T--")), SeqRecord(Seq("--TG-")), SeqRecord(Seq("A---C")) ] tester = Alb.AlignBuddy([MultipleSeqAlignment(records)]) Alb.trimal(tester, "gappyout") assert "".join([str(rec.seq) for rec in tester.records()]) == ""
def test_delete_invariant_sites_ui(capsys, hf, alb_odd_resources): test_in_args = deepcopy(in_args) test_in_args.delete_invariant_sites = [[]] tester = Alb.AlignBuddy(alb_odd_resources['dna']['single']['ambiguous']) Alb.command_line_ui(test_in_args, tester, skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "27233a416437eabc72aa5d57cb695036"
def test_clean_seqs(alb_resources, hf): # Test an amino acid file tester = Alb.clean_seq(alb_resources.get_one("m p py")) assert hf.buddy2hash(tester) == "07a861a1c80753e7f89f092602271072" tester = Alb.clean_seq(Alb.AlignBuddy("%sambiguous_dna_alignment.fa" % hf.resource_path), ambiguous=False, rep_char="X") assert hf.buddy2hash(tester) == "6755ea1408eddd0e5f267349c287d989"
def __init__(self): base_dict_structure = {'dna': {'single': {}, 'multi': {}}, 'rna': {'single': {}, 'multi': {}}, 'pep': {'single': {}, 'multi': {}}} self.resources = deepcopy(base_dict_structure) self.resources['dna']['single'] = {file_format: name.format(path=RESOURCE_PATH) for file_format, name in [ ("clustal", "{path}Mnemiopsis_cds.clus"), ("fasta", "{path}Mnemiopsis_cds_aln.fa"), ("gb", "{path}Mnemiopsis_cds_aln.gb"), ("nexus", "{path}Mnemiopsis_cds.nex"), ("phylip", "{path}Mnemiopsis_cds.phy"), ("phylipr", "{path}Mnemiopsis_cds.phyr"), ("phylipss", "{path}Mnemiopsis_cds.physs"), ("phylipsr", "{path}Mnemiopsis_cds.physr"), ("stockholm", "{path}Mnemiopsis_cds.stklm")]} self.resources['dna']['multi'] = {file_format: name.format(path=RESOURCE_PATH) for file_format, name in [ ("clustal", "{path}Alignments_cds.clus"), ("phylip", "{path}Alignments_cds.phy"), ("phylipr", "{path}Alignments_cds.phyr"), ("phylipss", "{path}Alignments_cds.physs"), ("phylipsr", "{path}Alignments_cds.physr"), ("stockholm", "{path}Alignments_cds.stklm")]} self.resources['rna']['single'] = {"nexus": "{path}Mnemiopsis_rna.nex".format(path=RESOURCE_PATH)} self.resources['rna']['multi'] = {} self.resources['pep']['single'] = {file_format: name.format(path=RESOURCE_PATH) for file_format, name in [ ("gb", "{path}Mnemiopsis_pep_aln.gb"), ("nexus", "{path}Mnemiopsis_pep.nex"), ("phylip", "{path}Mnemiopsis_pep.phy"), ("phylipr", "{path}Mnemiopsis_pep.phyr"), ("phylipss", "{path}Mnemiopsis_pep.physs"), ("phylipsr", "{path}Mnemiopsis_pep.physr"), ("stockholm", "{path}Mnemiopsis_pep.stklm")]} self.resources['pep']['multi'] = {file_format: name.format(path=RESOURCE_PATH) for file_format, name in [ ("clustal", "{path}Alignments_pep.clus"), ("phylip", "{path}Alignments_pep.phy"), ("phylipr", "{path}Alignments_pep.phyr"), ("phylipss", "{path}Alignments_pep.physs"), ("phylipsr", "{path}Alignments_pep.physr"), ("stockholm", "{path}Alignments_pep.stklm")]} # Create new AlignBuddy objects for each resource file self.alb_objs = deepcopy(base_dict_structure) for mol in self.resources: for num in self.resources[mol]: for file_format in self.resources[mol][num]: self.alb_objs[mol][num][file_format] = Alb.AlignBuddy(self.resources[mol][num][file_format]) self.code_dict = {"molecule": {"p": "pep", "d": "dna", "r": "rna"}, "num_aligns": {"o": "single", "m": "multi"}, "format": {"c": "clustal", "f": "fasta", "g": "gb", "n": "nexus", "py": "phylip", "pr": "phylipr", "pss": "phylipss", "psr": "phylipsr", "s": "stockholm"}} self.single_letter_codes = {"p": "pep", "d": "dna", "r": "rna", "o": "single", "m": "multi", "c": "clustal", "f": "fasta", "g": "gb", "n": "nexus", "py": "phylip", "pr": "phylipr", "pss": "phylipss", "psr": "phylipsr", "s": "stockholm"}
def test_faux_align_ui(capsys, alb_resources): test_in_args = deepcopy(in_args) test_in_args.faux_align = [None] test_in_args.alignments = [alb_resources.get_one("o p g", "paths")] Alb.command_line_ui(test_in_args, Alb.AlignBuddy, skip_exit=True) out, err = capsys.readouterr() alignbuddy = Alb.AlignBuddy(out) assert len(alignbuddy.alignments[0][0]) == 625
def test_concat_alignments_ui(capsys, alb_resources, hf): test_in_args = deepcopy(in_args) test_in_args.concat_alignments = [[]] tester = Sb.SeqBuddy("%s/Cnidaria_pep.nexus" % hf.resource_path) Sb.pull_recs(tester, "Ccr|Cla|Hec") tester = Alb.AlignBuddy(str(tester)) tester.alignments.append(tester.alignments[0]) tester.set_format("genbank") Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "86349e715f41e0bdd91bbd1dc0914769" test_in_args.concat_alignments = [["(.).(.)-Panx(.)"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "cd2b6594b22c431aea67fa45899f933a" test_in_args.concat_alignments = [["...", "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "e49b26f695c910a93f93d70563fd9dd9" test_in_args.concat_alignments = [[3, "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "e49b26f695c910a93f93d70563fd9dd9" test_in_args.concat_alignments = [[-9, "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "aaa9d9b717a5f79cfdf5d2666fb0f687" test_in_args.concat_alignments = [[3, 3]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "2f37a4e395162032bf43fab291c882f4" test_in_args.concat_alignments = [[3, -3]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "7fa8cd803df82414a5e1e190916456d8" Alb.command_line_ui(test_in_args, alb_resources.get_one("p o g"), skip_exit=True) out, err = capsys.readouterr() assert "Please provide at least two alignments." in err test_in_args.concat_alignments = [["foo"]] Alb.command_line_ui(test_in_args, alb_resources.get_one("m p c"), skip_exit=True) out, err = capsys.readouterr() assert "No match found for record" in err
def test_clean_seqs_ui(capsys, alb_resources, alb_odd_resources, hf): test_in_args = deepcopy(in_args) test_in_args.clean_seq = [[None]] Alb.command_line_ui(test_in_args, alb_resources.get_one("m p pr"), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "73b5d11dd25dd100648870228ab10d3d" test_in_args.clean_seq = [['strict', 'X']] Alb.command_line_ui(test_in_args, Alb.AlignBuddy(alb_odd_resources['dna']['single']['ambiguous']), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "6755ea1408eddd0e5f267349c287d989"
def test_concat_alignments_ui(capsys, alb_resources, hf): test_in_args = deepcopy(in_args) test_in_args.concat_alignments = [[]] tester = Sb.SeqBuddy("%s/Cnidaria_pep.nexus" % hf.resource_path) Sb.pull_recs(tester, "Ccr|Cla|Hec") tester = Alb.AlignBuddy(str(tester)) tester.alignments.append(tester.alignments[0]) tester.set_format("genbank") Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "d21940f3dad2295dd647f632825d8541" test_in_args.concat_alignments = [["(.).(.)-Panx(.)"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "5ac908ebf7918a45664a31da480fda58" test_in_args.concat_alignments = [["...", "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "e754350b0397cf54f531421d1e85774f" test_in_args.concat_alignments = [[3, "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "e754350b0397cf54f531421d1e85774f" test_in_args.concat_alignments = [[-9, "Panx.*"]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "9d2886afc640d35618754e05223032a2" test_in_args.concat_alignments = [[3, 3]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "4e4101f9b5a6d44d524a9783a8c4004b" test_in_args.concat_alignments = [[3, -3]] Alb.command_line_ui(test_in_args, Alb.make_copy(tester), skip_exit=True) out, err = capsys.readouterr() assert hf.string2hash(out) == "5d9d9ac8fae604be74c436e5f0b5b6db" Alb.command_line_ui(test_in_args, alb_resources.get_one("p o g"), skip_exit=True) out, err = capsys.readouterr() assert "Please provide at least two alignments." in err test_in_args.concat_alignments = [["foo"]] Alb.command_line_ui(test_in_args, alb_resources.get_one("m p c"), skip_exit=True) out, err = capsys.readouterr() assert "No match found for record" in err
def create_all_by_all_scores(seqbuddy, quiet=False): """ Generate a multiple sequence alignment and pull out all-by-all similarity graph :param seqbuddy: SeqBuddy object :param quiet: Supress multicore output :return: """ if len(seqbuddy) == 1: alignment = Alb.AlignBuddy(str(seqbuddy)) sim_scores = pd.DataFrame(data=None, columns=["seq1", "seq2", "score"]) else: alignment = Alb.generate_msa(Sb.make_copy(seqbuddy), tool="mafft", params="--globalpair --thread -1", quiet=True) # Need to specify what columns the PsiPred files map to now that there are gaps. psi_pred_files = {} for rec in alignment.records_iter(): ss_file = pd.read_csv("%s/psi_pred/%s.ss2" % (in_args.outdir, rec.id), comment="#", header=None, delim_whitespace=True) ss_file.columns = ["indx", "aa", "ss", "coil_prob", "helix_prob", "sheet_prob"] ss_counter = 0 for indx, residue in enumerate(rec.seq): if residue != "-": ss_file.set_value(ss_counter, "indx", indx) ss_counter += 1 psi_pred_files[rec.id] = ss_file alignment = Alb.trimal(alignment, "gappyout") # Re-update PsiPred files, now that some columns are removed for rec in alignment.records_iter(): new_psi_pred = [] for row in psi_pred_files[rec.id].itertuples(): if alignment.alignments[0].position_map[int(row[1])][1]: new_psi_pred.append(list(row)[1:]) psi_pred_files[rec.id] = pd.DataFrame(new_psi_pred, columns=["indx", "aa", "ss", "coil_prob", "helix_prob", "sheet_prob"]) ids1 = [rec.id for rec in alignment.records_iter()] ids2 = [rec.id for rec in alignment.records_iter()] all_by_all = [] for rec1 in ids1: del ids2[ids2.index(rec1)] for rec2 in ids2: all_by_all.append((rec1, rec2)) outfile = MyFuncs.TempFile() outfile.write("seq1,seq2,score") printer.clear() MyFuncs.run_multicore_function(all_by_all, score_sequences, [alignment, psi_pred_files, outfile.path], quiet=quiet) sim_scores = pd.read_csv(outfile.path, index_col=False) return alignment, sim_scores
def test_delete_invariant_sites(alb_resources, hf, alb_odd_resources): tester = Alb.AlignBuddy(alb_odd_resources['dna']['single']['ambiguous']) tester = Alb.delete_invariant_sites(tester) assert hf.buddy2hash(tester) == "27233a416437eabc72aa5d57cb695036" tester = alb_resources.get_one("o p py") tester = Alb.delete_invariant_sites(tester, consider_ambiguous=False) assert hf.buddy2hash(tester) == "f0b16bb8133bfc9e29ad43bdfc4ad2ee" tester = Alb.delete_invariant_sites(tester) assert hf.buddy2hash(tester) == "c13031016c1f7382e808bd4e68d8f406" tester.alignments.append([]) # Catch empty alignment tester = Alb.delete_invariant_sites(tester) assert hf.buddy2hash(tester) == "c13031016c1f7382e808bd4e68d8f406"
def test_concat_alignments(alb_resources, hf): with pytest.raises(AttributeError) as e: Alb.concat_alignments(alb_resources.get_one("p o g"), '.*') assert "Please provide at least two alignments." in str(e) tester = alb_resources.get_one("o p g") tester.alignments.append(alb_resources.get_one("o p g").alignments[0]) with pytest.raises(ValueError) as e: Alb.concat_alignments(tester, 'foo') assert "No match found for record" in str(e) with pytest.raises(ValueError) as e: Alb.concat_alignments(tester, 'Panx') assert "Replicate matches" in str(e) tester = Sb.SeqBuddy("%sCnidaria_pep.nexus" % hf.resource_path) Sb.pull_recs(tester, "Ccr|Cla|Hec") tester = Alb.AlignBuddy(str(tester)) tester.alignments.append(tester.alignments[0]) assert hf.buddy2hash(Alb.concat_alignments( Alb.make_copy(tester))) == '32a507107b7dcd044ea7760c8812441c' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)") assert hf.buddy2hash(tester2) == '5ac908ebf7918a45664a31da480fda58' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)") assert hf.buddy2hash(tester2) == '5ac908ebf7918a45664a31da480fda58' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "Panx.*") assert hf.buddy2hash(tester2) == 'e754350b0397cf54f531421d1e85774f' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "(P)an(x)(.)") assert hf.buddy2hash(tester2) == '5c6653aec09489cadcbed68fbd2f7465' shorten = Alb.delete_records(Alb.make_copy(tester), "Ccr") tester.alignments[1] = shorten.alignments[1] assert hf.buddy2hash(Alb.concat_alignments( Alb.make_copy(tester))) == 'f3ed9139ab6f97042a244d3f791228b6'
def test_concat_alignments(alb_resources, hf): with pytest.raises(AttributeError) as e: Alb.concat_alignments(alb_resources.get_one("p o g"), '.*') assert "Please provide at least two alignments." in str(e) tester = alb_resources.get_one("o p g") tester.alignments.append(alb_resources.get_one("o p g").alignments[0]) with pytest.raises(ValueError) as e: Alb.concat_alignments(tester, 'foo') assert "No match found for record" in str(e) with pytest.raises(ValueError) as e: Alb.concat_alignments(tester, 'Panx') assert "Replicate matches" in str(e) tester = Sb.SeqBuddy("%sCnidaria_pep.nexus" % hf.resource_path) Sb.pull_recs(tester, "Ccr|Cla|Hec") tester = Alb.AlignBuddy(str(tester)) tester.alignments.append(tester.alignments[0]) assert hf.buddy2hash(Alb.concat_alignments( Alb.make_copy(tester))) == '32a507107b7dcd044ea7760c8812441c' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)") assert hf.buddy2hash(tester2) == 'cd2b6594b22c431aea67fa45899f933a' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "(.).(.)-Panx(.)") assert hf.buddy2hash(tester2) == 'cd2b6594b22c431aea67fa45899f933a' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "Panx.*") assert hf.buddy2hash(tester2) == 'e49b26f695c910a93f93d70563fd9dd9' tester.set_format("gb") tester2 = Alb.concat_alignments(Alb.make_copy(tester), "...", "(P)an(x)(.)") assert hf.buddy2hash(tester2) == '3abfdf2217050ac2170c0de27352a8c6' shorten = Alb.delete_records(Alb.make_copy(tester), "Ccr") tester.alignments[1] = shorten.alignments[1] assert hf.buddy2hash(Alb.concat_alignments( Alb.make_copy(tester))) == '685f24ee1fc88860dd9465035040c91e'
def test_inplace(capsys, alb_resources, hf): tmp_dir = br.TempDir() tester = alb_resources.get_one("o d f") tester.write("%s/align" % tmp_dir.path) test_in_args = deepcopy(in_args) test_in_args.transcribe = True test_in_args.in_place = True test_in_args.alignments = ["%s/align" % tmp_dir.path] Alb.command_line_ui(test_in_args, tester, skip_exit=True) out, err = capsys.readouterr() tester = Alb.AlignBuddy("%s/align" % tmp_dir.path) assert "File over-written at:" in err assert hf.buddy2hash(tester) == "8f78e0c99e2d6d7d9b89b8d854e02bcd", tester.write("temp.del") test_in_args.alignments = ["I/do/not/exist"] Alb.command_line_ui(test_in_args, alb_resources.get_one("o d f"), skip_exit=True) out, err = capsys.readouterr() assert "Warning: The -i flag was passed in, but the positional argument doesn't seem to be a file." in err
def main(in_args): wsdl_url = "http://v2.topcons.net/pred/api_submitseq/?wsdl" fixtop = "" if os.path.isfile(in_args.input): try: seqbuddy = Sb.SeqBuddy(in_args.input, out_format="fasta") Sb.clean_seq(seqbuddy) # Sb.hash_ids(seqbuddy) except br.GuessError: print("Unable to read the provided input file, is it a properly formatted sequence file?") return 1 if len(str(seqbuddy)) >= MAX_FILESIZE: print("You input seqfile is too large! Please split the file into chunks less than %d Mb." % MAX_FILESIZE_IN_MB, file=sys.stderr) return 1 # ***** Here's the meat ***** # myclient = Client(wsdl_url, cache=None) ret_value = myclient.service.submitjob(str(seqbuddy), fixtop, in_args.jobname, in_args.email) if len(ret_value) >= 1: jobid, result_url, numseq_str, errinfo, warninfo = ret_value[0][:5] if jobid != "None" and jobid != "": print("You have successfully submitted your job with %s sequences. jobid = %s" % (numseq_str, jobid)) if warninfo != "" and warninfo != "None": print("Warning message:\n%s" % warninfo) else: print("Failed to submit job!") if errinfo != "" and errinfo != "None": print("Error message:\n%s" % errinfo) if warninfo != "" and warninfo != "None": print("Warning message:\n%s" % warninfo) else: print("Failed to submit job!") return 1 else: myclient = Client(wsdl_url, cache=None) ret_value = myclient.service.checkjob(in_args.input) if len(ret_value) >= 1: status, result_url, errinfo = ret_value[0][:3] if status == "Failed": print("Your job with jobid %s is failed!" % in_args.input) if errinfo != "" and errinfo != "None": print("Error message:\n%s" % errinfo) elif status == "Finished": print("Your job with jobid %s is finished!" % in_args.input) if not os.path.exists(in_args.outpath): try: os.makedirs(in_args.outpath) except OSError: print("Failed to create the outpath %s" % in_args.outpath) return 1 outfile = "%s/%s.zip" % (in_args.outpath, in_args.input) if not os.path.exists(outfile): print("Retrieving") urllib.request.urlretrieve(result_url, outfile) if os.path.exists(outfile): print("The result file %s has been retrieved for jobid %s" % (outfile, in_args.input)) else: print("Failed to retrieve result for jobid %s" % in_args.input) with zipfile.ZipFile(outfile) as zf: zf.extractall(in_args.outpath) with open("%s/%s/query.result.txt" % (in_args.outpath, in_args.input), "r") as ifile: topcons = ifile.read() topcons = topcons.split("##############################################################################")[2:-1] records = [] for rec in topcons: seq_id = re.search("Sequence name: (.*)", rec).group(1).strip() seq = re.search("Sequence:\n([A-Z]+)", rec).group(1).strip() alignment = "" for algorithm in ["TOPCONS", "OCTOPUS", "Philius", "PolyPhobius", "SCAMPI", "SPOCTOPUS"]: try: top_file = re.search("%s predicted topology:\n([ioMS]+)" % algorithm, rec).group(1).strip() alignment += ">%s\n%s\n\n" % (algorithm, top_file) except: print("%s: %s" % (seq_id, algorithm)) pass alignment = Alb.AlignBuddy(alignment) # print(alignment) Alb.consensus_sequence(alignment) cons_seq = Sb.SeqBuddy(">%s\n%s\n" % (seq_id, seq), out_format="genbank") counter = 1 for tmd in re.finditer("([MX]+)", str(alignment.records()[0].seq)): Sb.annotate(cons_seq, "TMD%s" % counter, "%s-%s" % (tmd.start(), tmd.end())) counter += 1 records.append(cons_seq.records[0]) seqbuddy = Sb.SeqBuddy(records, out_format="genbank") seqbuddy.write("%s/%s.gb" % (in_args.outpath, in_args.input)) elif status == "None": print("Your job with jobid %s does not exist! Please check you typing!" % in_args.input) else: print("Your job with jobid %s is not ready, status = %s" % (in_args.input, status)) else: print("Failed to get job!") return 1 return 0