Пример #1
0
    def model_hsps(seq_id,
                   work_dir,
                   hsps,
                   refinement=REFINEMENT,
                   models_to_generate=MODELS_TO_GENERATE,
                   assessments=ASSESMENTS,
                   entries={},
                   pdb_divided="/data/databases/pdb/divided/",
                   tmp_dir=None,
                   max_models=3):
        result = {"models": defaultdict(lambda: {})}

        alns = []
        for hsp in hsps:
            aln = Struct(aln_query=Struct(name=seq_id,
                                          seq=str(hsp.aln[0].seq),
                                          start=hsp.query_start,
                                          end=hsp.query_end),
                         aln_hit=Struct(name=hsp.hit.id,
                                        seq=str(hsp.aln[1].seq),
                                        start=hsp.hit_start,
                                        end=hsp.hit_end))
            alns.append(aln)

        modeler = Modeller(work_dir, tmp_dir)

        modeler._refinement = refinement
        modeler.model_count = models_to_generate
        modeler._assess_methods = assessments
        modeler.parallel_jobs = 1

        def pdb_fn(x):
            return x.aln_hit.name.split("_")[0]

        alns = sorted(alns,
                      key=lambda x: entries[pdb_fn(x)]
                      if pdb_fn(x) in entries else 20)
        result["alns"] = alns
        for aligment in alns[0:max_models]:
            # pdb,aligment = pdb_alignment
            pdb, chain, _, _ = aligment.aln_hit.name.split("_")
            if not os.path.exists(
                    modeler.pdb_path(seq_id + "_" + aligment.aln_hit.name,
                                     seq_id)):
                base_model_path = pdb_divided + pdb[1:3] + "/pdb" + pdb + ".ent"
                ChainSplitter(tmp_dir).make_pdb(base_model_path,
                                                pdb,
                                                chain,
                                                overwrite=True)
                models = modeler.create_model(
                    seq_id + "_" + aligment.aln_hit.name, aligment)
            else:
                models = [
                    modeler.pdb_path(seq_id + "_" + aligment.aln_hit.name,
                                     seq_id, idx)
                    for idx in range(1, models_to_generate + 1)
                ]
            result["models"][aligment.aln_hit.name] = models
        return result
Пример #2
0
    def run_dssp(self):
        out = tempfile.mkstemp(suffix=".dssp")[1]
        execute("dssp -i {pdb_path} -o {out}", pdb_path=self.pdb_path, out=out)

        with open(out) as h:
            start = False
            for l in h:
                if start:
                    res = int(l[5:10])
                    aa = l[10:14].strip()
                    ss = l[14:17].strip()
                    bbl1 = l[23:24]
                    bbl2 = l[24:25]
                    bp1 = int(l[25:29])
                    bp2 = int(l[29:33])
                    bslabel = l[33:34]
                    self.dssp.append(
                        Struct(res=res,
                               aa=aa,
                               ss=ss,
                               bp1=bp1,
                               bp2=bp2,
                               bbl1=bbl1,
                               bbl2=bbl2,
                               bslabel=bslabel))
                else:
                    if l.startswith("  #  RESIDUE AA"):
                        start = True
Пример #3
0
 def residues_near_drug(drug_centroid, aa_residues):
     residues_near = []
     for r in aa_residues:
         for a in list(r):
             dist = a - Struct(coord=drug_centroid)
             if dist > 20:
                 break
             if dist < 10:
                 residues_near.append(r)
                 break
     return residues_near
Пример #4
0
    def query(self):
        "hmmscan [-options] <hmmdb> <seqfile>"
        self._format_database()
        self._check_input_file()

        if (not os.path.exists(self.output_file)) or (os.path.getsize(self.output_file) == 0):
            self._hmmscan()

        decorated = self.query_iterator()
        meta = self.query_iterator()._meta

        return Struct(_meta=meta, __iter__=lambda: decorated)
Пример #5
0
def smart_parse(path, seqs=None, gz_input=None):
    """

    :param path: sequence path
    :param seqs: dictionary of sequences. key=sequence name, value=Bio.Seq.Seq object
    :return: sequences iterator
    """
    raw_path = path.strip()

    if gz_input or raw_path.endswith(".gz"):
        path = path[:-3]
        handle = gzip.open(raw_path, "rt")
    else:
        path = raw_path
        handle = open(path, "r")
    try:
        it = None
        if path.endswith(".fasta"):
            it = bpio.parse(handle, "fasta")
        if path.endswith(".faa"):
            it = bpio.parse(handle, "fasta")
        if path.endswith(".fna"):
            it = bpio.parse(handle, "fasta")

        if path.endswith(".gb"):
            it = bpio.parse(handle, "gb")
        if path.endswith(".gbf"):
            it = bpio.parse(handle, "gb")

        if path.endswith(".gbk"):
            it = bpio.parse(handle, "gb")
        if path.endswith(".genebank"):
            it = bpio.parse(path, "gb")

        if path.endswith(".gbff"):
            it = bpio.parse(handle, "gb")

        if path.endswith(".embl"):
            it = bpio.parse(handle, "embl")

        if path.endswith(".gff") or path.endswith(".gff3"):
            from BCBio import GFF
            it = GFF.parse(handle)

        if path.endswith(".fq"):
            it = bpio.parse(handle, "fastq")
        if path.endswith(".fastq"):
            it = bpio.parse(handle, "fastq")

        if path.endswith(".hmm"):
            it = bpsio.parse(handle, "hmmer3-text")

        if path.endswith(".xml"):
            with open(path) as h:
                h.readline()
                l = h.readline()
                if "BlastOutput" in l:
                    it = add_blast_xml_props(
                        search_iterator(bpsio.parse(handle, "blast-xml")))
                if "<uniprot" in l:
                    it = bpio.parse(handle, "uniprot-xml")
    except:
        handle.close()
    if it:
        if seqs:

            def witer():
                for x in it:
                    if x.id in seqs:
                        x.seq = seqs[x.id]
                    yield x

            return Struct(__iter__=witer)
        else:
            return it
    raise Exception("invalid format")
Пример #6
0
            os.makedirs(self.model_directory(model_id, query_id))


if __name__ == '__main__':
    from SNDG import init_log, Struct

    init_log()

    workdir = "/media/eze/Data/data/organismos/Pext14-3B/analysis/struct/good"
    modeler = Modeller(workdir, "/tmp")
    model_id = "PE143B_RS25640_3u52_B_6_498"
    alignment = Struct(
        aln_query=Struct(
            name="PE143B_RS25640",
            seq=
            "KKLNAKDKYRLLTRDLAWEPSYRTEEEIFPYIAYEGLKIHDWNKWEDPFRLTMDAYWKYQAEKERKFYAIIDAHAQNNGHLNITDARYLSALKIFLQAISPGEYAAHKGFARAGREFRGVGTQVACQMQAIDELRHAQTQIHALSNYNKFYNGFHAFADQRDRIWYTSVARSFFDDAMSAGPFEFMIAIGFSFEYVLTNLLFVPFMSGAAYNGDMATVTFGFSAQSDEARHMTLGLECIKFMLEQDPANLPIVQGWIDKWFWRGFRVLGLVSTMMDYMLPKRVMSWREAWEIYGAENGGALFKDLARYGIRPPKSWDDAEASIDHMSHQFMLGLYQWSFGTAFHAWIPSDDDMQWLSAKYPTTFDKYYRPRWEHIKKMEAAGTPFKNYGLAKLCQCCQLPTVFTEPDDPTLICHRQVQYKGDKYHFCSDHCMGIFNNEPEKYIQAWLPMPALFQAPTN-GDLGAWMD-WVSLKDGQDNGDFADSQDRRN",
            start=7,
            end=494),  # .ungap("-")
        aln_hit=Struct(
            name="3u52_B_6_498",
            seq=
            "KKLNLKDKYQYLTRDMAWEPTYQDKKDIFPEEDFEGIKITDWSQWEDPFRLTMDAYWKYQAEKEKKLYAIFDAFAQNNGHQNISDARYVNALKLFISGISPLEHAAFQGYSKVGRQFSGAGARVACQMQAIDELRHSQTQQHAMSHYNKHFNGLHDGPHMHDRVWYLSVPKSFFDDARSAGPFEFLTAISFSFEYVLTNLLFVPFMSGAAYNGDMATVTFGFSAQSDEARHMTLGLEVIKFILEQHEDNVPIVQRWIDKWFWRGFRLLSLVSMMMDYMLPNKVMSWSEAWEVYYEQNGGALFKDLERYGIRPPKYQDVANDAKHHLSHQLWTTFYQYCQATNFHTWIPEKEEMDWMSEKYPDTFDKYYRPRYEYLAKEAAAGRRFYNNTLPQLCQVCQIPTIFTEKDAPTMLSHRQIEHEGERYHFCSDGCCDIFKHEPEKYIQAWLPVHQIYQGNCEGGDLETVVQKYYHINIGEDNFDYVGSPDQKH",
            start=0,
            end=489))
    ChainSplitter("/tmp/").make_pdb(
        pdb_path="/data/databases/pdb/divided/u5/pdb3u52.ent",
        pdb_id="3u52",
        chain="B",
        overwrite=True)
    models = modeler.create_model(model_id, alignment)
Пример #7
0
    def model_hsps(seq_id,
                   work_dir,
                   hsps,
                   refinement=REFINEMENT,
                   models_to_generate=MODELS_TO_GENERATE,
                   assessments=ASSESMENTS,
                   entries={},
                   tmp_dir=None,
                   max_models=3):
        result = {"models": defaultdict(lambda: {}), "errors": []}

        alns = []
        for hsp in hsps:
            aln = Struct(aln_query=Struct(name=seq_id,
                                          seq=str(hsp.aln[0].seq),
                                          start=hsp.query_start,
                                          end=hsp.query_end),
                         aln_hit=Struct(name=hsp.hit.id,
                                        seq=str(hsp.aln[1].seq),
                                        start=hsp.hit_start,
                                        end=hsp.hit_end))
            alns.append(aln)

        modeler = Modeller(work_dir, tmp_dir)

        modeler._refinement = refinement
        modeler.model_count = models_to_generate
        modeler._assess_methods = assessments
        modeler.parallel_jobs = 1

        def pdb_fn(x):
            return x.aln_hit.name.split("_")[0]

        alns = sorted(alns,
                      key=lambda x: entries[pdb_fn(x)]
                      if pdb_fn(x) in entries else 20)
        result["alns"] = alns
        for aligment in alns[0:max_models]:
            # pdb,aligment = pdb_alignment
            if ";" in aligment.aln_hit.name:
                pdb = aligment.aln_hit.name[-5:-1]
                chain = aligment.aln_hit.name[-1]
                modeller_pdb_id = f'{seq_id}_{pdb}_{chain}'
                aligment.aln_hit.name = f'{pdb}_{chain}'
            else:
                pdb, chain, _, _ = aligment.aln_hit.name.split("_")
                modeller_pdb_id = f'{seq_id}_{pdb}_{chain}'

            if not os.path.exists(modeler.pdb_path(modeller_pdb_id, seq_id)):
                try:
                    models = modeler.create_model(modeller_pdb_id, aligment)
                except SequenceMismatchError as ex:
                    result["errors"].append(f"modeller_pdb_id {str(ex)}\n")
                    continue
                except ModellerError as ex:
                    result["errors"].append(f"modeller_pdb_id {str(ex)}\n")
                    continue

            else:
                models = [
                    modeler.pdb_path(modeller_pdb_id, seq_id, idx)
                    for idx in range(1, models_to_generate + 1)
                ]
            result["models"][aligment.aln_hit.name] = models
        result["models"] = dict(result["models"])
        return result