示例#1
0
def run_nrpspks_specific_hmmer(seq_record, withinclustergenes, pksnrpsvars):
    nrpspksfasta = utils.get_specific_multifasta(withinclustergenes)
    #Analyse for abMotifs
    abmotif_opts = ["-E", "0.25"]
    abmotif_results = utils.run_hmmscan(
        utils.get_full_path(__file__, "abmotifs.hmm"), nrpspksfasta,
        abmotif_opts)
    mhmmlengthsdict = utils.hmmlengths(
        utils.get_full_path(__file__, "abmotifs.hmm"))
    pksnrpsvars.motifdict = parse_hmmscan_results(abmotif_results,
                                                  mhmmlengthsdict)
    #Analyse for C/A/PCP/E/KS/AT/ATd/DH/KR/ER/ACP/TE/TD/COM/Docking/MT/CAL domains
    nrpspksdomain_opts = ["--cut_tc"]
    nrpspksdomain_results = utils.run_hmmscan(
        utils.get_full_path(__file__, "nrpspksdomains.hmm"), nrpspksfasta,
        nrpspksdomain_opts)
    hmmlengthsdict = utils.hmmlengths(
        utils.get_full_path(__file__, "nrpspksdomains.hmm"))
    pksnrpsvars.domaindict = parse_hmmscan_results(nrpspksdomain_results,
                                                   hmmlengthsdict)
    filter_nonterminal_docking_domains(seq_record, pksnrpsvars)
    #Analyse KS domains & PKS/NRPS protein domain composition to detect NRPS/PKS types
    kshmmlengthsdict = utils.hmmlengths(
        utils.get_full_path(__file__, "ksdomains.hmm"))
    ksdomain_results = utils.run_hmmscan(
        utils.get_full_path(__file__, "ksdomains.hmm"), nrpspksfasta,
        nrpspksdomain_opts)
    pksnrpsvars.ksdomaindict = parse_hmmscan_results(ksdomain_results,
                                                     kshmmlengthsdict)
示例#2
0
def run(seq_record, options):
    "run hmmsearch against PFAM for all CDS features"
    if 'pfamdir' not in options:
        options.pfamdir = utils.get_full_path(__file__, '')

    query_sequence = utils.get_multifasta(seq_record)

    target_hmmfile = path.join(options.pfamdir, 'Pfam-A.hmm')

    logging.info('Running whole-genome pfam search')

    if options.skip_cleanup:
        results_file = path.join(options.full_outputfolder_path, 'fullhmmer.txt')
        if path.exists(results_file):
            results = list(SearchIO.parse(results_file, 'hmmer3-text'))
        else:
            results = utils.run_hmmscan(target_hmmfile, query_sequence, results_file=results_file)
    else:
        results = utils.run_hmmscan(target_hmmfile, query_sequence)

    _annotate(seq_record, options, results)
示例#3
0
def run(seq_record, options):
    "run hmmsearch against PFAM for all CDS features"
    if 'pfamdir' not in options:
        options.pfamdir = utils.get_full_path(__file__, '')

    query_sequence = utils.get_multifasta(seq_record)

    target_hmmfile = path.join(options.pfamdir, 'Pfam-A.hmm')

    logging.info('Running whole-genome pfam search')
    results = utils.run_hmmscan(target_hmmfile, query_sequence)

    _annotate(seq_record, options, results)
示例#4
0
    def test_run_hmmscan(self):
        "Test utils.run_hmmscan()"
        mock('Bio.SearchIO.parse', tracker=self.tt, returns=['mock result'])
        mock('utils.execute', tracker=self.tt, returns=('output', 'error', 0))

        expected = r"""    Called utils.execute(
        ['hmmscan', '--cpu', '2', '--nobias', 'fake.hmm', '-'],
        input='>testinput\nMADEUP')
    Called Bio.SearchIO.parse(
        <cStringIO.StringI object at ...>,
        'hmmer3-text')"""

        hits = utils.run_hmmscan('fake.hmm', ">testinput\nMADEUP")
        self.assertEqual(len(hits), 1)

        hit = hits.pop()
        self.assertEqual('mock result', hit)
        assert_same_trace(self.tt, expected)
示例#5
0
    def test_run_hmmscan_write_resultfile(self):
        """Test utils.run_hmmscan() writing a results file"""
        mock('Bio.SearchIO.parse', tracker=self.tt, returns=['mock result'])
        mock('utils.execute', tracker=self.tt, returns=('output', 'error', 0))

        expected = r"""    Called utils.execute(
        ['hmmscan', '--cpu', '2', '--nobias', 'fake.hmm', '-'],
        input='>testinput\nMADEUP')
    Called Bio.SearchIO.parse(
        <cStringIO.StringI object at ...>,
        'hmmer3-text')"""

        results_file = path.join(self.tmpdir, 'fake_hmmscan_output.txt')
        hits = utils.run_hmmscan('fake.hmm',
                                 ">testinput\nMADEUP",
                                 results_file=results_file)
        self.assertEqual(len(hits), 1)

        hit = hits.pop()
        self.assertEqual('mock result', hit)
        assert_same_trace(self.tt, expected)
        self.assertTrue(path.exists(results_file))
        self.assertEqual(open(results_file).read(), 'output')
示例#6
0
def run_smcog_analysis(seq_record, options):
    #run_smcog_analysis(opts, globalvars, geneclustervars, pksnrpscoregenes)
    logging.info('Running smCOG analysis')
    smcogvars = utils.Storage()
    smcogvars.smcogtreedict = {}
    smcogvars.smcogdict = {}
    geneclustergenes = utils.get_withincluster_cds_features(seq_record)
    pksnrpscoregenes = utils.get_pksnrps_cds_features(seq_record)
    logging.info("Performing smCOG analysis")
    smcogs_fasta = utils.get_specific_multifasta(geneclustergenes)
    smcogs_opts = ["-E", "1E-6"]
    smcogs_results = utils.run_hmmscan(utils.get_full_path(__file__, "smcogs.hmm"), smcogs_fasta, smcogs_opts)
    hmmlengthsdict = utils.hmmlengths(utils.get_full_path(__file__, "smcogs.hmm"))
    smcogvars.smcogdict = parse_hmmscan_results(smcogs_results, hmmlengthsdict)
    #Write output
    options.smcogsfolder = path.abspath(path.join(options.outputfoldername, "smcogs"))
    if not os.path.exists(options.smcogsfolder):
        os.mkdir(options.smcogsfolder)
    originaldir = os.getcwd()
    os.chdir(options.smcogsfolder)
    smcogfile = open("smcogs.txt","w")
    pksnrpscoregenenames = [utils.get_gene_id(feature) for feature in pksnrpscoregenes]
    for feature in geneclustergenes:
        k = utils.get_gene_id(feature)
        if k not in pksnrpscoregenenames:
            if smcogvars.smcogdict.has_key(k):
                l = smcogvars.smcogdict[k]
                smcogfile.write(">> " + k + "\n")
                smcogfile.write("name\tstart\tend\te-value\tscore\n")
                smcogfile.write("** smCOG hits **\n")
                for i in l:
                    smcogfile.write(str(i[0]) + "\t" + str(i[1]) + "\t" + str(i[2]) + "\t" + str(i[3]) + "\t" + str(i[4]) + "\n")
                smcogfile.write("\n\n")
    smcogfile.close()
    #smCOG phylogenetic tree construction
    logging.info("Calculating and drawing phylogenetic trees of cluster genes "
        "with smCOG members")
    with TemporaryDirectory(change=True):
        smcoganalysisgenes = []
        for feature in geneclustergenes:
            k = utils.get_gene_id(feature)
            if k not in pksnrpscoregenenames:
                smcoganalysisgenes.append(feature)
        smcogsets = []
        equalpartsizes = int(len(smcoganalysisgenes)/options.cpus)
        for i in range(options.cpus):
            if i == 0:
                geneslist = smcoganalysisgenes[:equalpartsizes]
            elif i == (options.cpus - 1):
                geneslist = smcoganalysisgenes[(i*equalpartsizes):]
            else:
                geneslist = smcoganalysisgenes[(i*equalpartsizes):((i+1)*equalpartsizes)]
            smcogsets.append(geneslist)
        processes = []
        z = 0
        for k in smcogsets:
            processes.append(Process(target=smcog_analysis,
                                     args=[k, z, seq_record,
                                        smcogvars.smcogdict, options.smcogsfolder]))
            z += 1
        for k in processes:
            k.start()
        time.sleep(1)
        while True:
            processrunning = "n"
            for k in processes:
                if k.is_alive():
                    processrunning = "y"
            if processrunning == "y":
                time.sleep(5)
            else:
                break
        for k in processes:
            k.join()
    os.chdir(options.smcogsfolder)
    dircontents = os.listdir(os.getcwd())
    for k in dircontents:
        if ".png" in k:
            tag = k.split(".png")[0]
            smcogvars.smcogtreedict[tag] = tag + ".png"
    os.chdir(originaldir)
    _annotate(geneclustergenes, smcogvars, options)