Пример #1
0
def test_mafft_inputs(sb_resources, hf):
    # FASTA
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'mafft')
    assert hf.buddy2hash(tester) == 'f94e0fd591dad83bd94201f0af038904'
Пример #2
0
def test_mafft_outputs(sb_resources, hf):
    # CLUSTAL
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'mafft', '--clustalout')
    assert hf.buddy2hash(tester) == 'd6046c77e2bdb5683188e5de653affe5'
Пример #3
0
def test_clustalw_multi_param(sb_resources, hf):
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, clustalw_bin,
                              '-output=phylip -noweights')
    assert hf.buddy2hash(tester) == 'ae9126eb8c482a82d4060d175803c478'
Пример #4
0
def test_pagan(sb_resources, hf):
    # FASTA
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'pagan')
    assert hf.buddy2hash(tester) in [
        'da1c6bb365e2da8cb4e7fad32d7dafdb', '1219647676b359a5ad0be6d9dda81c73'
    ]
    # NEXUS
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'pagan', '-f nexus')
    assert hf.buddy2hash(tester) in [
        'f93607e234441a2577fa7d8a387ef7ec', '42bfddd38fa4ed75a99841abf2112e54'
    ]
    # PHYLIPI
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'pagan', '-f phylipi')
    assert hf.buddy2hash(tester) in [
        '09dd492fde598670d7cfee61d4e2eab8', '438e1551b3f1c8526fc8a44eaf2a3dc1'
    ]
    # PHYLIPS
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'pagan', '-f phylips')
    assert hf.buddy2hash(tester) in [
        '249c88cb64d41c47388514c65bf8fff1', '6366e50da5a6b33d2d281d6ea13df0b7'
    ]
    # Multi-param
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'pagan', '-f nexus --translate')
    assert hf.buddy2hash(tester) == 'dd140ec4eb895ce75d574498a58aa28a'

    # A few edge cases
    tester = sb_resources.get_one("d f")
    tester = Sb.pull_recs(tester, "α[2345]")
    Alb.generate_msa(tester, "pagan", "-f foo", quiet=True)

    tester = sb_resources.get_one("d f")
    tester = Sb.pull_recs(tester, "α[2345]")
    Alb.generate_msa(tester, "pagan", "-f nexus", quiet=True)

    tester = sb_resources.get_one("d f")
    tester = Sb.pull_recs(tester, "α[2345]")
    Alb.generate_msa(tester, "pagan", "-f phylipi", quiet=True)
Пример #5
0
def test_muscle_outputs(sb_resources, hf):
    # FASTA
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'muscle', '-clw')
    assert hf.buddy2hash(tester) == '91542667cef761ccaf39d8cb4e877944'
Пример #6
0
def test_muscle_multi_param(sb_resources, hf):
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'muscle', '-clw -diags')
    assert hf.buddy2hash(tester) == '91542667cef761ccaf39d8cb4e877944'
Пример #7
0
def test_prank_inputs(sb_resources):
    # FASTA
    tester = Sb.pull_recs(sb_resources.get_one("d f"), 'α1')
    tester = Alb.generate_msa(tester, 'prank', '-once')
    assert tester.out_format == 'fasta'
Пример #8
0
def test_prank_outputs1(sb_resources):
    # NEXUS
    tester = Sb.pull_recs(sb_resources.get_one("d f"), 'α1')
    tester = Alb.generate_msa(tester, 'prank', '-f=nexus -once')
    assert tester.out_format == 'nexus'
Пример #9
0
def test_generate_alignments_genbank(sb_resources, hf):
    tester = sb_resources.get_one("p g")
    tester = Alb.generate_msa(tester, "mafft")
    assert hf.buddy2hash(tester) == "a4ab6b2a2ddda38a4d04abc18c54d18b"
Пример #10
0
def test_generate_alignments_edges2(tool, params, sb_resources):
    tester = sb_resources.get_one("d f")
    tester = Sb.pull_recs(tester, "α[2345]")
    Alb.generate_msa(tester, tool, params, quiet=True)
Пример #11
0
def test_mafft_multi_param(sb_resources, hf):
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'mafft', '--clustalout --noscore')
    assert hf.buddy2hash(tester) == 'd6046c77e2bdb5683188e5de653affe5'
Пример #12
0
def test_muscle_inputs(sb_resources, hf):
    # FASTA
    tester = sb_resources.get_one("d f")
    tester = Alb.generate_msa(tester, 'muscle')
    assert hf.buddy2hash(tester) == '5ec18f3e0c9f5cf96944a1abb130232f'
Пример #13
0
def test_prank_outputs3(sb_resources):
    # PHYLIPS
    tester = Sb.pull_recs(sb_resources.get_one("d f"), 'α1')
    tester = Alb.generate_msa(tester, 'prank', params='-f=phylips -once')
    assert tester.out_format == 'phylipsr'
Пример #14
0
def test_generate_alignments_genbank(sb_resources, hf):
    tester = sb_resources.get_one("p g")
    tester = Alb.generate_msa(tester, "mafft")
    assert hf.buddy2hash(tester) == "ff3d1e474b1b1b76fdda02ebcb225cff"
Пример #15
0
    os.makedirs(in_args.outdir)
    os.makedirs("%s/alignments" % in_args.outdir)
    os.makedirs("%s/mcmcmc" % in_args.outdir)
    os.makedirs("%s/sim_scores" % in_args.outdir)
    os.makedirs("%s/psi_pred" % in_args.outdir)
    if in_args.psi_pred and os.path.isdir(in_args.psi_pred):
        files = os.listdir(in_args.psi_pred)
        for f in files:
            shutil.copyfile("%s/%s" % (in_args.psi_pred, f), "%s/psi_pred/%s" % (in_args.outdir, f))

    print("\nExecuting PSI-Pred")
    MyFuncs.run_multicore_function(sequences.records, _psi_pred)

    print("\nGenerating initial all-by-all")
    #alignbuddy, scores_data = create_all_by_all_scores(sequences)
    alignbuddy = Alb.generate_msa(Sb.make_copy(sequences), tool="mafft", params="--globalpair --thread -1", quiet=True)
    alignbuddy.write("%s/alignments/group_0.aln" % in_args.outdir)
    scores_data = pd.read_csv("temp_group0.csv", index_col=False)
    scores_data.to_csv("%s/sim_scores/group_0.csv" % in_args.outdir, index=False)
    group_0 = pd.concat([scores_data.seq1, scores_data.seq2])
    group_0 = group_0.value_counts()
    group_0 = Cluster([i for i in group_0.index], scores_data)

    print("\nScoring base cluster")
    group_0.score()

    #taxa_count = [x.split("-")[0] for x in master_cluster.seq_ids]
    #taxa_count = pd.Series(taxa_count)
    #taxa_count = taxa_count.value_counts()

    print("Creating clusters")