Exemplo n.º 1
0
def get_34_aa_signature(domain: AntismashDomain) -> str:
    """ Extract 10 / 34 AA NRPS signatures from A domains """
    assert " " not in domain.get_name()
    assert verify_good_sequence(domain.translation)

    # Run muscle and collect sequence positions from file
    alignments = subprocessing.run_muscle_single(domain.get_name(),
                                                 domain.translation,
                                                 ADOMAINS_FILENAME)

    domain_alignment = alignments[domain.get_name()]
    reference_alignment = alignments[REF_SEQUENCE]

    positions = read_positions(APOSITION_FILENAME, START_POSITION)
    # Count residues in ref sequence and put positions in list
    poslist = build_position_list(positions, reference_alignment)

    # Extract positions from query sequence
    query_sig_seq = extract(domain_alignment, poslist)
    # Add fixed lysine 517
    query_sig_seq += "K"

    # repeat with 34 AA codes
    angpositions = read_positions(A34_POSITIONS_FILENAME, START_POSITION)
    poslist = build_position_list(angpositions, reference_alignment)

    return extract(domain_alignment, poslist)
Exemplo n.º 2
0
    def test_angstrom(self):
        domain = AntismashDomain(FeatureLocation(1, 2), "test")
        domain.domain_id = "query"
        domain.translation = self.aligns[domain.domain_id].replace("-", "")

        sig = nrps_predictor.get_34_aa_signature(domain)
        assert sig == "L--SFDASLFEMYLLTGGDRNMYGPTEATMCATW"
Exemplo n.º 3
0
def main():
    domain = AntismashDomain(FeatureLocation(1, 3, 1), tool="test")
    print(get_34_aa_signature(domain))
        out_file = fasta_dir

    # Reads in file
    try:
        create_domain_fa = fasta.read_fasta(out_file)
    except:
        print("Error: please check your file is a valid FASTA or GenBank file")
        sys.exit(1)

    # Create antiSMASH-like domain objects from AMP-binding hits
    domain_list = []

    for i, domain in enumerate(create_domain_fa):
        domain_list.append(
            AntismashDomain(
                FeatureLocation(1, 1, 1),
                tool="test"))  # arbitrary feature location for testing
        domain_list[i].domain_id = list(create_domain_fa.keys())[i]
        domain_list[i].translation = list(create_domain_fa.values())[i]

    # Extract the active site residues
    res, nms, sqs = [], [], []
    new_path = "%s/data/34_aa_xtracted.fasta" % parent_folder

    if not silent:
        print("##### Extracting active site residues... #####")
    for i, domain in enumerate(tqdm.tqdm(domain_list, disable=silent)):
        res.append(get_34_aa_signature(domain))
        nms.append(domain.domain_id)
        sqs.append(res[i])
Exemplo n.º 5
0
    def add_to_record(self, record) -> None:
        """ Save substrate specificity predictions in NRPS/PKS domain sec_met info of record
        """

        for feature in record.get_nrps_pks_cds_features():
            x_count = 0
            nrps_qualifier = feature.nrps_pks
            new_features = []
            gene_id = feature.get_name()
            for domain in nrps_qualifier.domains:
                domain_type = domain.name
                start_aa = domain.start
                end_aa = domain.end
                evalue = domain.evalue
                score = domain.bitscore

                domain.predictions.clear()

                # calculate respective positions based on aa coordinates
                if feature.location.strand == 1:
                    start = feature.location.start + (3 * start_aa)
                    end = feature.location.start + (3 * end_aa)
                else:
                    end = feature.location.end - (3 * start_aa)
                    start = feature.location.end - (3 * end_aa)
                loc = FeatureLocation(start, end, strand=feature.strand)

                #  set up new CDS_motif feature
                new_feature = AntismashDomain(loc)
                new_feature.domain_subtype = domain_type
                if feature.locus_tag:
                    new_feature.locus_tag = feature.locus_tag
                else:
                    new_feature.locus_tag = gene_id
                new_feature.detection = "hmmscan"
                new_feature.database = "nrpspksdomains.hmm"
                new_feature.evalue = evalue
                new_feature.score = score
                if feature.transl_table:
                    transl_table = feature.transl_table
                else:
                    transl_table = 1
                new_feature.translation = str(new_feature.extract(record.seq).translate(table=transl_table))
                domainname = gene_id + domain.label
                if domain_type == "AMP-binding":
                    new_feature.label = domainname
                    new_feature.domain_id = "nrpspksdomains_" + domainname
                    domain.predictions["consensus"] = "nrp"

                elif domain_type == "PKS_AT":
                    new_feature.label = domainname
                    new_feature.domain_id = "nrpspksdomains_" + domainname

                    # For t1pks, t2pks and t3pks
                    if 'transatpks' not in feature.cluster.products:
                        consensus = self.consensus[domainname]
                    else:  # for transatpks
                        consensus = self.consensus_transat[domainname]
                    pks_sig = self.pks.method_results["signature"][domainname]
                    if pks_sig:
                        domain.predictions["PKS signature"] = pks_sig[0].name.rsplit("_", 1)[1]
                    else:
                        domain.predictions["PKS signature"] = _UNKNOWN
                    minowa = self.pks.method_results["minowa_at"][domainname][0][0]
                    domain.predictions["Minowa"] = LONG_TO_SHORT.get(minowa, minowa)
                    domain.predictions["consensus"] = consensus

                elif domain_type == "CAL_domain":
                    new_feature.label = domainname
                    new_feature.domain_id = "nrpspksdomains_" + domainname
                    minowa = self.pks.method_results["minowa_cal"][domainname][0][0]
                    domain.predictions["Minowa"] = LONG_TO_SHORT.get(minowa, minowa)

                elif domain_type == "PKS_KR":
                    new_feature.label = domainname
                    new_feature.domain_id = "nrpspksdomains_" + domainname

                    domain.predictions["KR activity"] = \
                            "active" if self.pks.method_results["kr_activity"][domainname] else "inactive"
                    domain.predictions["KR stereochemistry"] = \
                            self.pks.method_results["kr_stereochem"].get(domainname, _UNKNOWN)
                else:
                    x_count += 1
                    new_feature.domain_id = "nrpspksdomains_" + gene_id.partition(".")[0] \
                                            + "_Xdom"+'{:02d}'.format(x_count)
#                    updated_nrps_qualifier.append(domain) # TODO weird, but should it be done?
                for method, pred in domain.predictions.items():
                    new_feature.specificity.append("%s: %s" % (method, pred))
                mapping = DOMAIN_TYPE_MAPPING.get(domain_type)
                if mapping:
                    new_feature.domain_subtype = domain_type
                    new_feature.domain = mapping
                new_features.append(new_feature)

            for new_feature in new_features:
                record.add_feature(new_feature)
Exemplo n.º 6
0
        with open(input_filename, "w") as handle:
            for sig, domain in zip(signatures, a_domains):
                handle.write("%s\t%s\n" % (sig, domain.get_name()))
        # Run NRPSPredictor2 SVM
        commands = [
            'java',
            '-Ddatadir=%s' % data_dir, '-cp', classpath,
            'org.roettig.NRPSpredictor2.NRPSpredictor2', '-i', input_filename,
            '-r', output_filename, '-s', '1', '-b', bacterial
        ]
        result = subprocessing.execute(commands)
        if not result.successful():
            raise RuntimeError("NRPSPredictor2 failed: %s" % result.stderr)

        with open(output_filename) as handle:
            lines = handle.read().splitlines()[1:]  # strip the header

    return read_output(lines)


create_domain_fa = fasta.read_fasta(
    '/Users/robi0916/Documents/Wageningen_UR/github/sandpuma2_serina/flat/fullset20160624_cl.faa'
)
domain_list = []
for i, domain in enumerate(create_domain_fa):
    domain_list.append(AntismashDomain(FeatureLocation(
        1, 1, 1), tool="test"))  # arbitrary feature location
    domain_list[i].domain_id = list(create_domain_fa.keys())[i]
    domain_list[i].translation = list(create_domain_fa.values())[i]
run_nrpspredictor(domain_list)