示例#1
0
def prepare_visualization(options, seq_record):
    
    # Check, whether (Sub)ClusterBlast data is encoded in source feature
    sourceFeatures = utils.get_all_features_of_type(seq_record, 'source')
    if len(sourceFeatures) == 0:
        loc = FeatureLocation(0, len(seq_record.seq))
        source_feature = SeqFeature(loc, type="source")
        seq_record.features.append(source_feature)
        sourceFeatures = utils.get_all_features_of_type(seq_record, 'source')
    
    if 'extrarecord' in options:
        if options.extrarecord.has_key(seq_record.id):
            # As there is only one source feature per record we just can take the first one without cycling through all features
            for key in options.extrarecord[seq_record.id].extradata.keys():
                if key == 'ClusterBlastData':
                    logging.debug("prepare_visualization: Found ClusterBlastData storage object")
                    options.clusterblast = True
                    
                    clusterBlastResults = options.extrarecord[seq_record.id].extradata[key]
                                                                                     
                    seq_record.internalhomologygroupsdict = clusterBlastResults.internalhomologygroupsdict
                    seq_record.known_compound_dict = clusterBlastResults.known_compound_dict
                    seq_record.nrhitgeneclusters = clusterBlastResults.nrhitgeneclusters
                    seq_record.qgeneclusterdata = clusterBlastResults.qgeneclusterdata
                    seq_record.queryclusterdata = clusterBlastResults.queryclusterdata
                    seq_record.pubchem_dict = clusterBlastResults.pubchem_dict
                    seq_record.pubmed_dict = clusterBlastResults.pubmed_dict
        
                elif key == 'SubClusterBlastData':
                    logging.debug("prepare_visualization: Found SubClusterBlastData storage object")
                    options.subclusterblast = True
                   
                    subclusterBlastResults = options.extrarecord[seq_record.id].extradata[key]
                    seq_record.internalhomologygroupsdict = subclusterBlastResults.internalhomologygroupsdict
                    seq_record.sc_nrhitgeneclusters = subclusterBlastResults.sc_nrhitgeneclusters
                    seq_record.sc_queryclusterdata = subclusterBlastResults.sc_queryclusterdata
                    seq_record.pubchem_dict = subclusterBlastResults.pubchem_dict
                    seq_record.pubmed_dict = subclusterBlastResults.pubmed_dict
                    
                elif key == 'KnownClusterBlastData':
                    logging.debug("prepare_visualization: Found KnownClusterBlastData storage object")
                    options.knownclusterblast = True
                    
                    knownclusterBlastResults = options.extrarecord[seq_record.id].extradata[key]
                    seq_record.internalhomologygroupsdict = knownclusterBlastResults.internalhomologygroupsdict
                    seq_record.kc_nrhitgeneclusters = knownclusterBlastResults.kc_nrhitgeneclusters
                    seq_record.kc_queryclusterdata = knownclusterBlastResults.kc_queryclusterdata
                    seq_record.pubchem_dict = knownclusterBlastResults.pubchem_dict
                    seq_record.pubmed_dict = knownclusterBlastResults.pubmed_dict
    
        load_genecluster_info(seq_record, options)
示例#2
0
文件: js.py 项目: chevrm/transPACT
def convert_clusters(record, annotations, options):
    """Convert cluster SeqFeatures to JSON"""
    js_clusters = []
    for cluster in utils.get_cluster_features(record):
        features = utils.get_cluster_cds_features(cluster, record)
        borders = utils.get_cluster_cluster_border_features(cluster, record)

        tta_codons = []
        all_misc_features = utils.get_all_features_of_type(
            record, 'misc_feature')
        for feature in all_misc_features:
            if not utils.features_overlap(cluster, feature):
                continue
            if 'note' not in feature.qualifiers:
                continue

            for note in feature.qualifiers['note']:
                if note.startswith('tta leucine codon'):
                    tta_codons.append(feature)
                    break

        js_cluster = {}
        js_cluster['start'] = int(cluster.location.start) + 1
        js_cluster['end'] = int(cluster.location.end)
        js_cluster['idx'] = utils.get_cluster_number(cluster)
        js_cluster['orfs'] = convert_cds_features(record, features,
                                                  annotations, options)
        js_cluster['borders'] = convert_cluster_border_features(borders)
        js_cluster['tta_codons'] = convert_tta_codons(tta_codons)
        js_cluster['type'] = utils.get_cluster_type(cluster)
        if 'probability' in cluster.qualifiers:
            js_cluster['probability'] = cluster.qualifiers['probability'][0]
        if options.input_type == 'prot':
            js_cluster['unordered'] = True
        js_cluster['knowncluster'] = "-"
        js_cluster['BGCid'] = "-"

        if 'knownclusterblast' in cluster.qualifiers:
            knownclusters = cluster.qualifiers['knownclusterblast']
            bestcluster = [
                kcluster for kcluster in knownclusters
                if kcluster.startswith('1.')
            ]
            if not len(bestcluster) == 1:
                logging.warning(
                    "Error parsing best knowncluster hit; knownclusters array = %s. Possibly no significant hits to known biosynthetic gene clusters."
                    % str(knownclusters))
            else:
                reObj = re.match('\d+\. (\S+)\t(.*)', bestcluster[0])
                js_cluster['knowncluster'] = reObj.group(2)
                js_cluster['BGCid'] = reObj.group(1)
                logging.debug('Found closest cluster "%s" for cluster no. %s' %
                              (js_cluster['knowncluster'],
                               utils.get_cluster_number(cluster)))
        js_clusters.append(js_cluster)

    return js_clusters
示例#3
0
 def test_detect(self):
     """Test tta.detect()"""
     self.assertEqual(len(self.record.features), 3)
     tta.detect(self.record, None)
     self.assertEqual(len(self.record.features), 5)
     fw_tta, rv_tta = utils.get_all_features_of_type(
         self.record, 'misc_feature')
     self.assertEqual(fw_tta.location.start, 3)
     self.assertEqual(fw_tta.location.end, 6)
     self.assertEqual(fw_tta.strand, 1)
     self.assertEqual(rv_tta.location.start, 15)
     self.assertEqual(rv_tta.location.end, 18)
     self.assertEqual(rv_tta.strand, -1)
示例#4
0
def seq_record_convert_nucl_to_prot(seq_records, options):
    seq_record = seq_records[0]
    cdsfeatures = utils.get_cds_features(seq_record)
    cdsmotifs = utils.get_all_features_of_type(seq_record, ["CDS_motif"])
    #Find corresponding cdsmotifs for each cdsfeature
    cdsmotifdict = {}
    for cdsfeature in cdsfeatures:
        for cdsmotif in cdsmotifs:
            if cdsfeature.location.start <= cdsmotif.location.start <= cdsfeature.location.end:
                if not cdsmotifdict.has_key(cdsfeature.qualifiers['product'][0]):
                    cdsmotifdict[cdsfeature.qualifiers['product'][0]] = [cdsmotif]
                else:
                    cdsmotifdict[cdsfeature.qualifiers['product'][0]].append(cdsmotif)
    #For each cdsfeature, write a protein SeqRecord with CDS_motif features (abMotifs AND sec_met)
    prot_seq_records = []
    for cdsfeature in cdsfeatures:
        cds_domains = []
        #Extract sec_met info from feature
        if 'sec_met' in cdsfeature.qualifiers:
            if len([qual for qual in cdsfeature.qualifiers['sec_met'] if "NRPS/PKS subtype: " in qual]) > 0:
                cds_description = [qual for qual in cdsfeature.qualifiers['sec_met'] if "NRPS/PKS subtype: " in qual][0].partition("NRPS/PKS subtype: ")[2]
            else:
                cds_description = "Unknown protein"
            cds_domains = [qual for qual in cdsfeature.qualifiers['sec_met'] if "NRPS/PKS Domain: " in qual]
        else:
            cds_description = "Unknown protein"
        #Create protein seq_record
        prot_seq_record = SeqRecord(Seq(cdsfeature.qualifiers['translation'][0], IUPAC.protein),
                                    id=cdsfeature.qualifiers['product'][0], name=cdsfeature.qualifiers['product'][0],
                                    description=cds_description)
        utils.fix_record_name_id(prot_seq_record, options)
        #Add CDS_motif features based on NRPS/PKS domains
        cdsmotif_features = []
        for cds_domain in cds_domains:
            domainstart, domainend = cds_domain.partition(" (")[2].partition("). ")[0].split("-")
            domainlocation = FeatureLocation(int(domainstart), int(domainend))
            domain_feature = SeqFeature(domainlocation, type="CDS_motif")
            domain_feature.qualifiers['note'] = [cds_domain]
            cdsmotif_features.append(domain_feature)
        #Add CDS_motif features based on NRPS/PKS abMotifs
        if cdsmotifdict.has_key(cdsfeature.qualifiers['product'][0]):
            for cdsmotif in cdsmotifdict[cdsfeature.qualifiers['product'][0]]:
                oldstart, oldend = cdsmotif.location.start, cdsmotif.location.end
                newstart = (oldstart - cdsfeature.location.start) / 3
                newend = (oldend - cdsfeature.location.start) / 3
                newlocation = FeatureLocation(newstart, newend)
                cdsmotif.location = newlocation
                cdsmotif_features.append(cdsmotif)
        prot_seq_record.features.extend(cdsmotif_features)
        prot_seq_records.append(prot_seq_record)
    return prot_seq_records
示例#5
0
def write(seq_records, options):
    logging.debug("Exporting antiSMASH information as txt tables")
    #Don't store TXT tables for protein input
    if options.input_type == 'prot':
        return
    #Localize output folder, create TXT subdirectory
    txt_outfolder = options.full_outputfolder_path + os.sep + "txt"
    if not os.path.exists(txt_outfolder):
        os.mkdir(txt_outfolder)
    #Define table names
    tables = "genome", "BGC", "signature_gene_info", "gene", "NRPS_PKS", "smCOG", "RiPP", "transltable"
    #For each gene cluster, write out info to TXT files
    for seq_record in seq_records:
        if len(utils.get_cluster_features(seq_record)) > 0:
            #Open up TXT files
            txt_files = {}
            for table in tables:
                txt_files[table] = open(
                    path.join(
                        txt_outfolder, "%s_%s.txt" %
                        (seq_record.id.partition(".")[0], table)), "w")
            #Gather all information
            info = utils.Storage()
            info.clustertypes, info.clustergenes, info.accessions, info.cdsmotifs, info.clusternrs = {}, {}, {}, {}, []
            clusters = utils.get_cluster_features(seq_record)
            for cluster in clusters:
                clusternr = utils.get_cluster_number(cluster)
                info.clusternrs.append(clusternr)
                info.clustertypes[clusternr] = utils.get_cluster_type(cluster)
                info.clustergenes[clusternr] = [
                    utils.get_gene_id(cds)
                    for cds in utils.get_cluster_cds_features(
                        cluster, seq_record)
                ]
                info.accessions[clusternr] = [
                    utils.get_gene_acc(cds)
                    for cds in utils.get_cluster_cds_features(
                        cluster, seq_record)
                ]
                info.cdsmotifs[clusternr] = utils.get_all_features_of_type(
                    seq_record, ["CDS_motif"])
            info.seq_record = seq_record
            #Write information to tables
            for table in tables:
                getattr(write_tables, 'write_' + table)(txt_files[table], info,
                                                        options)
            for table in tables:
                txt_files[table].close()
示例#6
0
def _find_leader_peptides(cluster, seq_record):
    """Find CDS_motifs containing lantipeptide leader peptide annotations"""
    motifs = []
    for motif in utils.get_all_features_of_type(seq_record, 'CDS_motif'):
        if motif.location.start < cluster.location.start or \
           motif.location.end > cluster.location.end:
            continue

        if not motif.qualifiers.has_key('note'):
            continue

        if not 'leader peptide' in motif.qualifiers['note']:
            continue

        motifs.append(motif)

    return motifs
示例#7
0
 def test_get_pfam_features(self):
     "Test utils.get_pfam_features()"
     motifs = utils.get_pfam_features(self.rec)
     features = utils.get_all_features_of_type(self.rec, "PFAM_domain")
     self.assertListEqual(motifs, features)
示例#8
0
 def test_get_cluster_features(self):
     "Test utils.get_cluster_features()"
     clusters = utils.get_cluster_features(self.rec)
     features = utils.get_all_features_of_type(self.rec, "cluster")
     self.assertListEqual(clusters, features)
示例#9
0
 def test_get_cds_features(self):
     "Test utils.get_all_cds_features()"
     cds = utils.get_cds_features(self.rec)
     features = utils.get_all_features_of_type(self.rec, "CDS")
     self.assertListEqual(cds, features)
示例#10
0
 def test_get_all_features_of_type(self):
     "Test utils.get_all_features_of_type()"
     for t in set(self.types):
         f = utils.get_all_features_of_type(self.rec, t)
         self.assertEqual(len(f), self.types.count(t))