Exemplo n.º 1
0
def annotate_pksnrps(pksnrpsvars, seq_record, options):
    withinclustergenes = utils.get_withincluster_cds_features(seq_record)
    if len(withinclustergenes) == 0:
        logging.debug('No genes within a sec_met cluster found for %r' %
                      seq_record.id)
        return pksnrpsvars
    run_nrpspks_specific_hmmer(seq_record, withinclustergenes, pksnrpsvars)
    name_nrpspks(seq_record, pksnrpsvars, withinclustergenes, options)
    pksnrpsvars.pksnrpscoregenes = utils.get_pksnrps_cds_features(seq_record)
    return pksnrpsvars
Exemplo n.º 2
0
def retrieve_gene_cluster_annotations(seq_record, smcogdict, gtrcoglist,
                                      transportercoglist, geneclusternr):
    allcoregenes = [
        utils.get_gene_id(cds)
        for cds in utils.get_secmet_cds_features(seq_record)
    ]
    pksnrpscoregenes = [
        utils.get_gene_id(cds)
        for cds in utils.get_pksnrps_cds_features(seq_record)
    ]
    feature_by_id = utils.get_feature_dict(seq_record)
    clustergenes = [
        utils.get_gene_id(cds) for cds in utils.get_cluster_cds_features(
            utils.get_cluster_by_nr(seq_record, geneclusternr), seq_record)
    ]
    clustertype = utils.get_cluster_type(
        utils.get_cluster_by_nr(seq_record, geneclusternr))
    annotations = {}
    colors = []
    starts = []
    ends = []
    strands = []
    pksnrpsprots = []
    gtrs = []
    transporters = []
    for j in clustergenes:
        cdsfeature = feature_by_id[j]
        if cdsfeature.qualifiers.has_key('product'):
            annotations[j] = cdsfeature.qualifiers['product'][0]
        else:
            annotations[j] = 'Unannotated gene'
        starts.append(cdsfeature.location.start)
        ends.append(cdsfeature.location.end)
        if cdsfeature.strand == -1:
            strands.append("-")
        else:
            strands.append("+")
        if j in allcoregenes:
            colors.append("#810E15")
        else:
            colors.append("grey")
        if j in pksnrpscoregenes:
            pksnrpsprots.append(j)
        if smcogdict.has_key(j):
            if len(smcogdict[j]) > 0 and smcogdict[j][0] in gtrcoglist:
                gtrs.append(j)
            if len(smcogdict[j]) > 0 and smcogdict[j][0] in transportercoglist:
                transporters.append(j)
    clustersize = max(ends) - min(starts)
    return clustergenes, clustertype, annotations, colors, starts, ends, strands, pksnrpsprots, gtrs, transporters, clustersize
Exemplo n.º 3
0
def retrieve_pksnrps_info(seq_record, geneclusternr, pksnrpsprots):
    pksnrpsprotsnames = [utils.get_gene_id(cds) for cds in utils.get_pksnrps_cds_features(seq_record)]
    domaindict = utils.get_nrpspks_domain_dict(seq_record)
    substr_spec_preds = utils.get_nrpspks_substr_spec_preds(seq_record)
    pksnrpsdomains = {}
    domsdetails = {}
    substrspecnrpspredictordict = {}
    substrspecminowadict = {}
    substrspecpkssigdict = {}
    substrspecconsensusdict = {}
    krpredictionsdict = {}
    for i in pksnrpsprots:
        domlist = []
        domsdetails = {}
        doms = domaindict[i]
        for j in doms:
            nr = 1
            while j[0] + str(nr) in domlist:
                nr += 1
            domname = j[0] + str(nr)
            domlist.append(domname)
            domsdetails[domname] = [j[1],j[2]]
            if "AMP-binding" in domname or "A-OX" in domname:
                domname2 = i + "_" + "A" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_nrps_preds[i + "_A" + str(nr)]
                substrspecnrpspredictordict[domname2] = [substr_spec_preds.nrps_code_preds[i + "_A" + str(nr)], substr_spec_preds.nrps_svm_preds[i + "_A" + str(nr)]]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_A" + str(nr)]
            if "PKS_AT" in domname:
                domname2 = i + "_" + "AT" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_pks_preds[i + "_AT" + str(nr)]
                substrspecpkssigdict[domname2] = substr_spec_preds.pks_code_preds[i + "_AT" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_AT" + str(nr)]
            if "CAL_domain" in domname:
                domname2 = i + "_" + "CAL" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_cal_preds[i + "_CAL" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_CAL" + str(nr)]
            if "CAL_domain" in domname:
                domname2 = i + "_" + "CAL" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_cal_preds[i + "_CAL" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_CAL" + str(nr)]
            if "PKS_KR" in domname:
                domname2 = i + "_" + "KR" + str(nr)
                krpredictionsdict[domname2] = [substr_spec_preds.kr_activity_preds[i + "_KR" + str(nr)], substr_spec_preds.kr_stereo_preds[i + "_KR" + str(nr)]]
        pksnrpsdomains[i] = [domlist,domsdetails]
    structpred = utils.get_structure_pred(utils.get_cluster_by_nr(seq_record, geneclusternr))
    return pksnrpsprotsnames, pksnrpsdomains, substrspecnrpspredictordict, substrspecminowadict, substrspecpkssigdict, substrspecconsensusdict, krpredictionsdict, structpred
Exemplo n.º 4
0
def run_smcog_analysis(seq_record, options):
    #run_smcog_analysis(opts, globalvars, geneclustervars, pksnrpscoregenes)
    logging.info('Running smCOG analysis')
    smcogvars = utils.Storage()
    smcogvars.smcogtreedict = {}
    smcogvars.smcogdict = {}
    geneclustergenes = utils.get_withincluster_cds_features(seq_record)
    pksnrpscoregenes = utils.get_pksnrps_cds_features(seq_record)
    logging.info("Performing smCOG analysis")
    smcogs_fasta = utils.get_specific_multifasta(geneclustergenes)
    smcogs_opts = ["-E", "1E-6"]
    smcogs_results = utils.run_hmmscan(utils.get_full_path(__file__, "smcogs.hmm"), smcogs_fasta, smcogs_opts)
    hmmlengthsdict = utils.hmmlengths(utils.get_full_path(__file__, "smcogs.hmm"))
    smcogvars.smcogdict = parse_hmmscan_results(smcogs_results, hmmlengthsdict)
    #Write output
    options.smcogsfolder = path.abspath(path.join(options.outputfoldername, "smcogs"))
    if not os.path.exists(options.smcogsfolder):
        os.mkdir(options.smcogsfolder)
    originaldir = os.getcwd()
    os.chdir(options.smcogsfolder)
    smcogfile = open("smcogs.txt","w")
    pksnrpscoregenenames = [utils.get_gene_id(feature) for feature in pksnrpscoregenes]
    for feature in geneclustergenes:
        k = utils.get_gene_id(feature)
        if k not in pksnrpscoregenenames:
            if smcogvars.smcogdict.has_key(k):
                l = smcogvars.smcogdict[k]
                smcogfile.write(">> " + k + "\n")
                smcogfile.write("name\tstart\tend\te-value\tscore\n")
                smcogfile.write("** smCOG hits **\n")
                for i in l:
                    smcogfile.write(str(i[0]) + "\t" + str(i[1]) + "\t" + str(i[2]) + "\t" + str(i[3]) + "\t" + str(i[4]) + "\n")
                smcogfile.write("\n\n")
    smcogfile.close()
    #smCOG phylogenetic tree construction
    logging.info("Calculating and drawing phylogenetic trees of cluster genes "
        "with smCOG members")
    with TemporaryDirectory(change=True):
        smcoganalysisgenes = []
        for feature in geneclustergenes:
            k = utils.get_gene_id(feature)
            if k not in pksnrpscoregenenames:
                smcoganalysisgenes.append(feature)
        smcogsets = []
        equalpartsizes = int(len(smcoganalysisgenes)/options.cpus)
        for i in range(options.cpus):
            if i == 0:
                geneslist = smcoganalysisgenes[:equalpartsizes]
            elif i == (options.cpus - 1):
                geneslist = smcoganalysisgenes[(i*equalpartsizes):]
            else:
                geneslist = smcoganalysisgenes[(i*equalpartsizes):((i+1)*equalpartsizes)]
            smcogsets.append(geneslist)
        processes = []
        z = 0
        for k in smcogsets:
            processes.append(Process(target=smcog_analysis,
                                     args=[k, z, seq_record,
                                        smcogvars.smcogdict, options.smcogsfolder]))
            z += 1
        for k in processes:
            k.start()
        time.sleep(1)
        while True:
            processrunning = "n"
            for k in processes:
                if k.is_alive():
                    processrunning = "y"
            if processrunning == "y":
                time.sleep(5)
            else:
                break
        for k in processes:
            k.join()
    os.chdir(options.smcogsfolder)
    dircontents = os.listdir(os.getcwd())
    for k in dircontents:
        if ".png" in k:
            tag = k.split(".png")[0]
            smcogvars.smcogtreedict[tag] = tag + ".png"
    os.chdir(originaldir)
    _annotate(geneclustergenes, smcogvars, options)
Exemplo n.º 5
0
 def test_get_pksnrps_cds_featuers(self):
     """Test utils.get_pksnrps_cds_features()"""
     self.features[3].qualifiers['sec_met'] = ["NRPS/PKS Domain: "]
     features = utils.get_pksnrps_cds_features(self.record)
     self.assertEqual([self.features[3]], features)