Пример #1
0
 def read_mmCIF_file(structure_id, filename, hetatm=False, water=False):
     """
     
     Read mmCIF file and create Structure instance based upon it.
        
     Argument:
         *structure_id*
             structure_id code of mmCIF file       
         *filename*
             name of mmCIF file
         *hetatm*
             Boolean representing whether the mmCIF file contains hetatom.
             Default and recommended is False.
         *water*
            Boolean representing whether to add water to the structure.
            Default and recommended is False.
     
     Return:
         Structure Instance
     """
     from .Bio.PDB import MMCIFParser as MMCIFParserBiopy
     p = MMCIFParserBiopy()  #permissive default True
     structure = p.get_structure(structure_id, filename)
     return mmCIFParser._biommCIF_strcuture_to_TEMpy(
         filename, structure, hetatm, water)
Пример #2
0
def structure_scanning(pdb, ligname, graph, model, edge_map, embed_dim):
    """
        Given a PDB structure make a prediction for each residue in the structure:
            - chop the structure into candidate sites (for each residue get a sphere..)
            - convert residue neighbourhood into graph
            - get prediction from model for each
            - compare prediction to native ligand.
        :returns: `residue_preds` dictionary with residue id as key and fingerprint prediction as value.
    """
    from data_processor.build_dataset import get_pocket_graph

    parser = MMCIFParser(QUIET=True)
    structure = parser.get_structure("", pdb)[0]

    residue_preds = {}
    residues = list(structure.get_residues())
    for residue in tqdm(residues):
        if residue.resname in ['A', 'U', 'C', 'G', ligname]:
            res_info = ":".join([
                "_",
                residue.get_parent().id, residue.resname,
                str(residue.id[1])
            ])
            pocket_graph = get_pocket_graph(pdb, res_info, graph)
            _, dgl_graph = nx_to_dgl(pocket_graph, edge_map, embed_dim)
            _, fp_pred = model(dgl_graph)
            fp_pred = fp_pred.detach().numpy() > 0.5
            residue_preds[(residue.get_parent().id, residue.id[1])] = fp_pred
        else:
            continue
    return residue_preds
Пример #3
0
 def test_dssp_with_mmcif_file_and_nonstandard_residues(self):
     """Test DSSP generation from MMCIF with non-standard residues."""
     p = MMCIFParser()
     pdbfile = "PDB/1AS5.cif"
     model = p.get_structure("1AS5", pdbfile)[0]
     dssp = DSSP(model, pdbfile)
     self.assertEqual(len(dssp), 24)
Пример #4
0
    def load_test_structure(self, pdb_code):
        mmcif_parser = MMCIFParser(QUIET=True)
        structure = mmcif_parser.get_structure(
            pdb_code, self.TEST_STRUCTURE_DIR / f'{pdb_code}.cif')

        return structure, BiopythonToMmcifResidueIds.create(
            mmcif_parser._mmcif_dict)  # reuse already parsed
Пример #5
0
 def fetch_mmCIF(structure_id, filename,hetatm=False,water= False):
     
     """
     
     Fetch mmCIF file and create Structure instance based upon it.
        
     Argument:
         *structure_id*
             structure_id code of mmCIF file       
         *filename*
             name of mmCIF file
         *hetatm*
             Boolean representing whether the mmCIF file contains hetatom.
         *water*
            Boolean representing whether to add water to the structure.
            Default and recommended is False.
     
     Return:
         Structure Instance
      """
     from Bio.PDB import MMCIFParser as MMCIFParserBiopy
     
     p=MMCIFParserBiopy()
     url = 'http://www.rcsb.org/pdb/files/%s.cif' % structure_id
     urllib.urlretrieve(url, filename)
     structure=p.get_structure(structure_id, filename)
     return mmCIFParser._biommCIF_strcuture_to_TEMpy(filename,structure,hetatm,water)
Пример #6
0
 def test_dssp_with_mmcif_file(self):
     """Test DSSP generation from MMCIF."""
     p = MMCIFParser()
     pdbfile = "PDB/2BEG.cif"
     model = p.get_structure("2BEG", pdbfile)[0]
     dssp = DSSP(model, pdbfile)
     self.assertEqual(len(dssp), 130)
Пример #7
0
 def cifReader(self, name, file):
     try:
         parser = MMCIFParser()
         structure = parser.get_structure(name, file)
         return structure
     except:
         print("Something went wrong: File not found")
Пример #8
0
 def __get_structure__(self, file_path):
     base_name = os.path.basename(file_path)
     name, ext = os.path.splitext(base_name)
     if 'cif' in ext:
         parser = MMCIFParser()
     else:
         parser = PDBParser()
     return parser.get_structure(name, file_path)
Пример #9
0
 def creator(parser=parser):
     try:
         ret = parser.get_structure(pid, file=dst)
     except ValueError as e:  # assume it's a .cif
         if PARSE_CIF:
             parser = MMCIFParser(QUIET=True)
             ret = parser.get_structure(pid, dst)
         else:
             raise e
     finally:
         self.freemem()
     return ret
def load_pdb(path):


    # If using PDB
    # parser = PDBParser(PERMISSIVE=1)

    # if using mmCIF
    parser = MMCIFParser()

    structure = parser.get_structure('structure 1', path)

    return structure
Пример #11
0
    def test_cealigner_nucleic(self):
        """Test aligning 1LCD on 1LCD."""
        ref = "PDB/1LCD.cif"
        mob = "PDB/1LCD.cif"

        parser = MMCIFParser(QUIET=1)
        s1 = parser.get_structure("1lcd_ref", ref)
        s2 = parser.get_structure("1lcd_mob", mob)

        aligner = CEAligner()
        aligner.set_reference(s1)
        aligner.align(s2)

        self.assertAlmostEqual(aligner.rms, 0.0, places=3)
Пример #12
0
def print_input_file(structure_file, ss2_file=None):
    extension = os.path.basename(structure_file).rsplit(".", 1)[-1].lower()
    if extension in ("cif", "mmcif"):
        from Bio.PDB import MMCIFParser
        parser = MMCIFParser()
    else:
        from Bio.PDB import PDBParser
        parser = PDBParser()
    struc = parser.get_structure("", structure_file)

    seq = ""
    coords = []
    for chain in struc[0]:
        for res in chain:
            # Skip hetero and water residues
            if res.id[0] != " ":
                continue
            seq += three_to_one_aas[res.get_resname()]
            if res.get_resname() == "GLY":
                # Extend vector of length 1 Å from Cα to act as fake centroid
                d = res["CA"].get_coord() - res["C"].get_coord() + res["CA"].get_coord() - res["N"].get_coord()
                coord_cent = res["CA"].get_coord() + d / np.linalg.norm(d)
            else:
                # Centroid coordinates of sidechain heavy atoms
                atom_coords = []
                for atom in res:
                    if atom.get_name() not in ("N", "CA", "C", "O") and atom.element != "H":
                        atom_coords.append(atom.get_coord())
                coord_cent = np.array(atom_coords).mean(0)
            coords.append([res["N"].get_coord(), res["CA"].get_coord(), res["C"].get_coord(), coord_cent])

    print(seq)
    if ss2_file:
        # Extract 3-state secondary structure prediction from PSIPRED ss2 output file
        ss_pred = ""
        with open(ss2_file) as f:
            for line in f:
                if len(line.rstrip()) > 0 and not line.startswith("#"):
                    ss_pred += line.split()[2]
        assert len(seq) == len(ss_pred), f"Sequence length is {len(seq)} but SS prediction length is {len(ss_pred)}"
        print(ss_pred)
    else:
        print("C" * len(seq))

    def coord_str(coord):
        return " ".join([str(round(c, 3)) for c in coord])

    for coord_n, coord_ca, coord_c, coord_cent in coords:
        print(f"{coord_str(coord_n)} {coord_str(coord_ca)} {coord_str(coord_c)} {coord_str(coord_cent)}")
Пример #13
0
def get_convert_cifs(url, cif_path, pdb_path):
    try:
        url_req.urlretrieve(url, cif_path)
    except (url_err.URLError, url_err.HTTPError):
        print("!!!HTTP or URL error, couldn't get " + url + '.')
        return
    try:
        p = MMCIFParser()
        struc = p.get_structure('', cif_path)
        io = PDBIO()
        io.set_structure(struc)
        io.save(pdb_path)
        print('^^^SUCCESSFULLY CONVERTED CIF TO PDB')
    except TypeError:
        print('Problem making pdb file')
Пример #14
0
def scanning_analyze():
    """
        Visualize results of scanning on PDB.
        Color residues by prediction score.
          1fmn_#0.1:A:FMN:36.nx_annot.p
    """
    from data_processor.build_dataset import find_residue, lig_center

    model, edge_map, embed_dim = load_model('small_no_rec_2',
                                            '../data/annotated/pockets_nx')
    for f in os.listdir("../data/annotated/pockets_nx"):
        pdbid = f.split("_")[0]
        _, chain, ligname, pos = f.replace(".nx_annot.p", "").split(":")
        pos = int(pos)
        print(chain, ligname, pos)
        graph = pickle.load(open(f'../data/RNA_Graphs/{pdbid}.pickle', 'rb'))
        if len(graph.nodes()) > 100:
            continue
        try:
            fp_preds = structure_scanning(
                f'../data/all_rna_prot_lig_2019/{pdbid}.cif', ligname, graph,
                model, edge_map, embed_dim)
        except Exception as e:
            print(e)
            continue
        parser = MMCIFParser(QUIET=True)
        structure = parser.get_structure(
            "", f"../data/all_rna_prot_lig_2019/{pdbid}.cif")[0]
        lig_res = find_residue(structure[chain], pos)
        lig_c = lig_center(lig_res.get_atoms())

        fp_dict = pickle.load(open("../data/all_ligs_maccs.p", 'rb'))
        true_fp = fp_dict[ligname]
        dists = []
        jaccards = []
        decoys = get_decoys()
        for res, fp in fp_preds.items():
            chain, pos = res
            r = find_residue(structure[chain], pos)
            r_center = lig_center(r.get_atoms())
            dists.append(euclidean(r_center, lig_c))
            jaccards.append(mse(true_fp, fp))
        plt.title(f)
        plt.distplot(dists, jaccards)
        plt.xlabel("dist to binding site")
        plt.ylabel("dist to fp")
        plt.show()
    pass
Пример #15
0
    def _biommCIF_strcuture_to_TEMpy(filename,
                                     structure,
                                     hetatm=False,
                                     water=False):
        #imported if and when the function is executed.
        """
        PRIVATE FUNCTION to convert to Structure Instance
        filename = name of mmCIF file
        hetatm = Boolean representing whether to add hetatm to the structure.Default and Raccomanded is False.
        water = Boolean representing whether to add water to the structure.Default and Raccomanded is False.
        """
        from Bio.PDB import MMCIFParser as MMCIFParserBiopy

        p = MMCIFParserBiopy()

        atomList = []
        hetatomList = []
        wateratomList = []
        footer = ''
        header = ''
        cif_code = filename.split("/")[-1]  #use os.1FAT.cif
        structure_id = "%s" % cif_code[:-4]
        structure = p.get_structure(structure_id, filename)
        residues = structure.get_residues()
        for res in residues:
            hetfield = res.get_id()[0]
            if hetfield[0] == "H":
                for atom in res:
                    BioPyAtom(atom)
                    hetatomList.append(BioPyAtom(atom))
            elif hetfield[0] == "W":
                for atom in res:
                    BioPyAtom(atom)
                    wateratomList.append(BioPyAtom(atom))
            else:
                for atom in res:
                    BioPyAtom(atom)
                    atomList.append(BioPyAtom(atom))
        if hetatm:
            atomList = append(atomList, hetatomList)
        if water:
            atomList = append(atomList, wateratomList)

        return BioPy_Structure(atomList,
                               filename=filename,
                               header=header,
                               footer=footer)
Пример #16
0
    def Extract_coordinates_from_PDB(self, PDB_file, type):
        ''' Returns both the alpha carbon coordinates contained in the PDB file and the residues coordinates for the desired chains'''
        from Bio.PDB.PDBParser import PDBParser
        from Bio.PDB import MMCIFParser
        Name = ntpath.basename(PDB_file).split('.')[0]

        try:
            parser = PDB.PDBParser()
            structure = parser.get_structure('%s' % (Name), PDB_file)
        except:
            parser = MMCIFParser()
            structure = parser.get_structure('%s' % (Name), PDB_file)

        ############## Iterating over residues to extract all of them even if there is more than 1 chain
        if type == 'models':
            CoordinatesPerModel = []
            for model in structure:
                model_coord = []
                for chain in model:
                    for residue in chain:
                        if is_aa(residue.get_resname(), standard=True):
                            model_coord.append(residue['CA'].get_coord())
                CoordinatesPerModel.append(model_coord)

            return CoordinatesPerModel
        elif type == 'chains':
            CoordinatesPerChain = []
            for model in structure:
                for chain in model:
                    chain_coord = []
                    for residue in chain:
                        if is_aa(residue.get_resname(), standard=True):
                            chain_coord.append(residue['CA'].get_coord())
                    CoordinatesPerChain.append(chain_coord)
            return CoordinatesPerChain

        elif type == 'all':
            alpha_carbon_coordinates = []
            for chain in structure.get_chains():
                for residue in chain:
                    if is_aa(residue.get_resname(), standard=True):
                        # try:
                        alpha_carbon_coordinates.append(
                            residue['CA'].get_coord())
                    # except:
                    # pass
            return alpha_carbon_coordinates
Пример #17
0
def call_mmcif(f):
    '''
    Call function for mmcif files
    '''

    if (".cif") in f:
        name = f.split('/')[-1].split('.')[0].upper()
        # Open gz files
        if ".gz" in f:
            f = gzip.open(f, 'rt')
        parser = MMCIFParser()
        structure = parser.get_structure(name, f)
        mmtf_encoder = MMTFEncoder()
        pass_data_on(input_data=structure,
                     input_function=biopythonInputFunction,
                     output_data=mmtf_encoder)
        return (name, mmtf_encoder)
Пример #18
0
    def Write_PDB(self, initialPDB, Rotation, Translation, N):
        ''' Transform by rotating and translating the atom coordinates from the original PDB file and rewrite it '''
        from Bio.PDB.PDBParser import PDBParser
        from Bio.PDB import MMCIFParser, PDBIO
        Name = ntpath.basename(initialPDB).split('.')[0]

        try:
            parser = PDB.PDBParser()
            structure = parser.get_structure('%s' % (Name), initialPDB)
        except:
            parser = MMCIFParser()
            structure = parser.get_structure('%s' % (Name), initialPDB)

        for atom in structure.get_atoms():
            atom.transform(Rotation, Translation)
        io = PDBIO()
        io.set_structure(structure)
        io.save("{}_{}".format(N, ntpath.basename(initialPDB)))
Пример #19
0
    def test_cealigner_no_transform(self):
        """Test aligning 7CFN on 6WQA without transforming 7CFN."""
        ref = "PDB/6WQA.cif"
        mob = "PDB/7CFN.cif"

        parser = MMCIFParser(QUIET=1)
        s1 = parser.get_structure("6wqa", ref)
        s2 = parser.get_structure("7cfn", mob)

        s2_original_coords = [list(a.coord) for a in s2.get_atoms()]

        aligner = CEAligner()
        aligner.set_reference(s1)
        aligner.align(s2, transform=False)
        s2_coords_final = [list(a.coord) for a in s2.get_atoms()]

        self.assertAlmostEqual(aligner.rms, 3.83, places=2)
        self.assertEqual(s2_original_coords, s2_coords_final)
Пример #20
0
def process_structrure(pdb_file_chains, save_dir):
    pdb_file, chain_ids = pdb_file_chains
    prot = os.path.split(pdb_file)[-1].split(".")[0].upper()

    parser = MMCIFParser()
    try:
        model = parser.get_structure(None, pdb_file)[0]
    except PDB.PDBExceptions.PDBConstructionException:
        return

    for c_id in set(chain_ids):
        try:
            chain = model[c_id]
        except KeyError:
            return

        seq = []
        coords = []
        for residue in chain.get_unpacked_list():
            if "CA" in residue:
                xyz = residue["CA"].get_coord()
                if coords and np.allclose(
                        coords[-1], xyz
                ):  # Ignore residue if too close to the previous one.
                    continue
                aa_c = aa_codes.get(
                    _aa3to1_dict.get(residue.get_resname(), "-"), 0)
                seq.append(aa_c)
                coords.append(xyz)
        if seq:
            npz_filename = os.path.join(save_dir, f"{prot}-{chain.id}.npz")
            # if os.path.exists(npz_filename):
            #     print(f'{prot}-{c_id} exists already!')
            #     return
            np.savez_compressed(npz_filename, seq=seq, coords=coords)
            print(f"{npz_filename} saved!")
Пример #21
0
class WriteTest(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.io = MMCIFIO()
        self.mmcif_parser = MMCIFParser()
        self.pdb_parser = PDBParser()

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            self.structure = self.pdb_parser.get_structure(
                "example", "PDB/1A8O.pdb")
            self.mmcif_file = "PDB/1A8O.cif"
            self.mmcif_multimodel_pdb_file = "PDB/1SSU_mod.pdb"
            self.mmcif_multimodel_mmcif_file = "PDB/1SSU_mod.cif"

    def test_mmcifio_write_structure(self):
        """Write a full structure using MMCIFIO."""
        struct1 = self.structure
        # Write full model to temp file
        self.io.set_structure(struct1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(len(struct2), 1)
            self.assertEqual(nresidues, 158)
        finally:
            os.remove(filename)

    def test_mmcifio_write_residue(self):
        """Write a single residue using MMCIFIO."""
        struct1 = self.structure
        residue1 = list(struct1.get_residues())[0]
        # Write full model to temp file
        self.io.set_structure(residue1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)
        finally:
            os.remove(filename)

    def test_mmcifio_write_residue_w_chain(self):
        """Write a single residue (chain id == X) using MMCIFIO."""
        struct1 = self.structure.copy()  # make copy so we can change it
        residue1 = list(struct1.get_residues())[0]

        # Modify parent id
        parent = residue1.parent
        parent.id = "X"

        # Write full model to temp file
        self.io.set_structure(residue1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)

            # Assert chain remained the same
            chain_id = [c.id for c in struct2.get_chains()][0]
            self.assertEqual(chain_id, "X")
        finally:
            os.remove(filename)

    def test_mmcifio_write_residue_wout_chain(self):
        """Write a single orphan residue using MMCIFIO."""
        struct1 = self.structure
        residue1 = list(struct1.get_residues())[0]

        residue1.parent = None  # detach residue

        # Write full model to temp file
        self.io.set_structure(residue1)

        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 1)

            # Assert chain is default: "A"
            chain_id = [c.id for c in struct2.get_chains()][0]
            self.assertEqual(chain_id, "A")
        finally:
            os.remove(filename)

    def test_mmcifio_write_custom_residue(self):
        """Write a chainless residue using PDBIO."""
        res = Residue.Residue((" ", 1, " "), "DUM", "")
        atm = Atom.Atom("CA", [0.1, 0.1, 0.1], 1.0, 1.0, " ", "CA", 1, "C")
        res.add(atm)

        # Ensure that set_structure doesn't alter parent
        parent = res.parent

        # Write full model to temp file
        self.io.set_structure(res)

        self.assertIs(parent, res.parent)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            struct2 = self.mmcif_parser.get_structure("res", filename)
            latoms = list(struct2.get_atoms())
            self.assertEqual(len(latoms), 1)
            self.assertEqual(latoms[0].name, "CA")
            self.assertEqual(latoms[0].parent.resname, "DUM")
            self.assertEqual(latoms[0].parent.parent.id, "A")
        finally:
            os.remove(filename)

    def test_mmcifio_select(self):
        """Write a selection of the structure using a Select subclass."""

        # Selection class to filter all alpha carbons
        class CAonly(Select):
            """Accepts only CA residues."""
            def accept_atom(self, atom):
                if atom.name == "CA" and atom.element == "C":
                    return 1

        struct1 = self.structure
        # Write to temp file
        self.io.set_structure(struct1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename, CAonly())
            struct2 = self.mmcif_parser.get_structure("1a8o", filename)
            nresidues = len(list(struct2.get_residues()))
            self.assertEqual(nresidues, 70)
        finally:
            os.remove(filename)

    def test_mmcifio_write_dict(self):
        """Write an mmCIF dictionary out, read it in and compare them."""
        d1 = MMCIF2Dict(self.mmcif_file)

        # Write to temp file
        self.io.set_dict(d1)
        filenumber, filename = tempfile.mkstemp()
        os.close(filenumber)
        try:
            self.io.save(filename)
            d2 = MMCIF2Dict(filename)
            k1 = sorted(d1.keys())
            k2 = sorted(d2.keys())
            self.assertEqual(k1, k2)
            for key in k1:
                self.assertEqual(d1[key], d2[key])
        finally:
            os.remove(filename)

    def test_mmcifio_multimodel(self):
        """Write a multi-model, multi-chain mmCIF file."""
        pdb_struct = self.pdb_parser.get_structure(
            "1SSU_mod_pdb", self.mmcif_multimodel_pdb_file)
        mmcif_struct = self.mmcif_parser.get_structure(
            "1SSU_mod_mmcif", self.mmcif_multimodel_mmcif_file)
        io = MMCIFIO()
        for struct in [pdb_struct, mmcif_struct]:
            self.io.set_structure(struct)
            filenumber, filename = tempfile.mkstemp()
            os.close(filenumber)
            try:
                self.io.save(filename)
                struct_in = self.mmcif_parser.get_structure(
                    "1SSU_mod_in", filename)
                self.assertEqual(len(struct_in), 2)
                self.assertEqual(len(struct_in[1]), 2)
                self.assertEqual(
                    round(float(struct_in[1]["B"][1]["N"].get_coord()[0]), 3),
                    6.259)
            finally:
                os.remove(filename)
Пример #22
0
def mmcif2pdb(pdb, chain, model=0):
    parser = MMCIFParser()
    writer = PDB.PDBIO()

    remove = False
    if not os.path.isfile(pdb) and len(pdb) == 4:
        pdb_code = pdb
        pdb = os.path.join(tempdir, "{}.cif".format(pdb.lower()))
        attempts = 0
        while not os.path.isfile(pdb) and attempts < 5:
            try:
                pdb = PDB.PDBList().retrieve_pdb_file(pdb_code,
                                                      pdir=tempdir,
                                                      file_format="mmCif")
            except (KeyboardInterrupt, SystemExit):
                raise
            except:
                pass
            attempts += 1
            time.sleep(1)

        if not os.path.isfile(pdb):
            raise IOError("Cannot download file")
        remove = True

    name = os.path.splitext(pdb)[0]

    try:
        structure = parser.get_structure(name, pdb)
    except (KeyboardInterrupt, SystemExit):
        raise
    except:
        raise IOError("Unable to open pdb {}".format(pdb))

    try:
        writer.set_structure(structure)
    except (KeyboardInterrupt, SystemExit):
        raise
    except:
        raise IOError("Unable to save pdb {}".format(pdb))

    if not isinstance(chain, (list, tuple)):
        chain = [chain]

    for c in chain:
        if c is None:
            yield None
            continue

        #Make chain one character to fit in PDB format
        new_pdb = "{}_{}.pdb".format(name, c)
        new_chain = c[0]
        try:
            writer.save(new_pdb, select=SelectChain(new_chain, model))
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            raise IOError("Unable to save pdb {}".format(new_pdb))

        if not os.path.isfile(new_pdb):
            raise IOError("Cannot extract chain")

        yield new_pdb

    if remove:
        try:
            os.remove(pdb)
        except OSError:
            pass
Пример #23
0
# Benchmark the parsing of a mmCIF file given as an argument

import sys
import time
from Bio.PDB import MMCIFParser

mmcif_filepath = sys.argv[1]
parser = MMCIFParser()

start = time.time()
parser.get_structure("", mmcif_filepath)
end = time.time()

print(end - start)
Пример #24
0
def align_proteingbs_seq(conn, xtalsdbpath):
    #check PROTEINGBS exists then
    seqdbutils.check_tables_exist(conn, ['PROTEINGBS', 'CURATEDXTALS'])

    assert (xtalsdbpath.exists()
            ), f'not finding xtalsdbpath {xtalsdbpath}, check keyword args'
    xdbconn = seqdbutils.gracefuldbopen(xtalsdbpath)
    seqdbutils.check_tables_exist(xdbconn, ['XTALS'])
    xc = xdbconn.cursor()
    c = conn.cursor()
    mmcparser = MMCIFParser()
    ppb = PPBuilder()

    aln_registers = []
    c.execute('''SELECT * FROM CURATEDXTALS''')
    curatedrows = c.fetchall()
    for curatedrow in curatedrows:

        c.execute('''SELECT * FROM PROTEINGBS WHERE acc=(?)''',
                  (curatedrow['acc'], ))
        pgbrow = c.fetchone()
        pgbseq = pickle.loads(pgbrow['pklgbsr']).seq

        if (pgbrow['seq_checksum']==curatedrow['pgbsr_seqchecksum'] and \
            curatedrow['pgbsr_ccstart'] is not None and curatedrow['pgbsr_ccstop'] is not None and
            curatedrow['pgbsr_fullstart'] is not None and curatedrow['pgbsr_fullstop'] is not None):
            print(f'skipping previously-completed {curatedrow["pdbid"]}')
            continue  #skip this one!

        print(f'now examining {curatedrow["pdbid"]}')
        for enzyme_format in ['cc', 'full']:
            aln_register = AlnRegister(enzyme_format, curatedrow['acc'],
                                       curatedrow['pdbid'])
            #just copy over cc values to full if possible---
            if (enzyme_format=='full' and curatedrow['ntccpos']==curatedrow['ctccpos'] and \
                curatedrow['ntfullpos']==curatedrow['ctfullpos']):
                aln_register.start = aln_registers[-1].start
                aln_register.stop = aln_registers[-1].stop
                aln_register.message = 'used cc values for full'
                aln_register.success = aln_registers[-1].success
                continue

            else:
                aln_register = do_register_aln(pgbseq, curatedrow,
                                               enzyme_format, aln_register,
                                               'easy')
                if not aln_register.success:
                    #get sequence from pdb file
                    xc.execute('''SELECT * FROM XTALS WHERE pdbid=(?)''',
                               (curatedrow['pdbid'], ))
                    xtalrow = xc.fetchone()
                    xtalfpath = xtalsdbpath.parent / xtalrow['relpath']
                    xtalstruct = mmcparser.get_structure(
                        curatedrow['pdbid'], xtalfpath)
                    xtalchain = xtalstruct
                    xtalppchain = ppb.build_peptides(xtalchain)
                    xtalseqstr = str(xtalppchain[0].get_sequence())
                    aln_register = do_register_aln(pgbseq,
                                                   curatedrow,
                                                   enzyme_format,
                                                   aln_register,
                                                   'xtal',
                                                   xtalseqstr=xtalseqstr)
                    if not aln_register.success:
                        if curatedrow['enable_fuzzy']:
                            aln_register = do_register_aln(
                                pgbseq,
                                curatedrow,
                                enzyme_format,
                                aln_register,
                                'fuzzy',
                                xtalseqstr=xtalseqstr)
                        else:
                            aln_register.message = '************ * * * *  * * *  *  *   *   * * *    *   *   *  **  * *    *********************\n'
                            aln_register.message += f'-------could not match. Consider setting enable_fuzzy column to yes for {curatedrow["pdbid"]}-------\n'
                            aln_register.message += '************  *  * *  *  * *  *  * *    *   *   * *   * *  * *   *  * *  *******************\n'
                aln_registers.append(aln_register)

    for aln_register in aln_registers:
        print(aln_register.message)
        if aln_register.success:
            update_tuple = (aln_register.start, aln_register.stop,
                            pgbrow['seq_checksum'], aln_register.acc)
            if aln_register.enzyme_format == 'cc':
                c.execute(
                    '''UPDATE CURATEDXTALS SET pgbsr_ccstart=(?), pgbsr_ccstop=(?), pgbsr_seqchecksum=(?) \
                            WHERE acc=(?)''', update_tuple)
            if aln_register.enzyme_format == 'full':
                c.execute(
                    '''UPDATE CURATEDXTALS SET pgbsr_fullstart=(?), pgbsr_fullstop=(?), pgbsr_seqchecksum=(?) \
                            WHERE acc=(?)''', update_tuple)
        else:
            print(
                f'+++++------FAILED TO FIND {aln_register.enzyme_format} START,STOP FOR {aln_register.pdbid} ({aln_register.acc})------++++++\n'
            )
    xdbconn.close()