Exemplo n.º 1
0
 def read_mmCIF_file(structure_id, filename, hetatm=False, water=False):
     """
     
     Read mmCIF file and create Structure instance based upon it.
        
     Argument:
         *structure_id*
             structure_id code of mmCIF file       
         *filename*
             name of mmCIF file
         *hetatm*
             Boolean representing whether the mmCIF file contains hetatom.
             Default and recommended is False.
         *water*
            Boolean representing whether to add water to the structure.
            Default and recommended is False.
     
     Return:
         Structure Instance
     """
     from .Bio.PDB import MMCIFParser as MMCIFParserBiopy
     p = MMCIFParserBiopy()  #permissive default True
     structure = p.get_structure(structure_id, filename)
     return mmCIFParser._biommCIF_strcuture_to_TEMpy(
         filename, structure, hetatm, water)
Exemplo n.º 2
0
 def fetch_mmCIF(structure_id, filename,hetatm=False,water= False):
     
     """
     
     Fetch mmCIF file and create Structure instance based upon it.
        
     Argument:
         *structure_id*
             structure_id code of mmCIF file       
         *filename*
             name of mmCIF file
         *hetatm*
             Boolean representing whether the mmCIF file contains hetatom.
         *water*
            Boolean representing whether to add water to the structure.
            Default and recommended is False.
     
     Return:
         Structure Instance
      """
     from Bio.PDB import MMCIFParser as MMCIFParserBiopy
     
     p=MMCIFParserBiopy()
     url = 'http://www.rcsb.org/pdb/files/%s.cif' % structure_id
     urllib.urlretrieve(url, filename)
     structure=p.get_structure(structure_id, filename)
     return mmCIFParser._biommCIF_strcuture_to_TEMpy(filename,structure,hetatm,water)
Exemplo n.º 3
0
 def test_dssp_with_mmcif_file_and_nonstandard_residues(self):
     """Test DSSP generation from MMCIF with non-standard residues."""
     p = MMCIFParser()
     pdbfile = "PDB/1AS5.cif"
     model = p.get_structure("1AS5", pdbfile)[0]
     dssp = DSSP(model, pdbfile)
     self.assertEqual(len(dssp), 24)
Exemplo n.º 4
0
def structure_scanning(pdb, ligname, graph, model, edge_map, embed_dim):
    """
        Given a PDB structure make a prediction for each residue in the structure:
            - chop the structure into candidate sites (for each residue get a sphere..)
            - convert residue neighbourhood into graph
            - get prediction from model for each
            - compare prediction to native ligand.
        :returns: `residue_preds` dictionary with residue id as key and fingerprint prediction as value.
    """
    from data_processor.build_dataset import get_pocket_graph

    parser = MMCIFParser(QUIET=True)
    structure = parser.get_structure("", pdb)[0]

    residue_preds = {}
    residues = list(structure.get_residues())
    for residue in tqdm(residues):
        if residue.resname in ['A', 'U', 'C', 'G', ligname]:
            res_info = ":".join([
                "_",
                residue.get_parent().id, residue.resname,
                str(residue.id[1])
            ])
            pocket_graph = get_pocket_graph(pdb, res_info, graph)
            _, dgl_graph = nx_to_dgl(pocket_graph, edge_map, embed_dim)
            _, fp_pred = model(dgl_graph)
            fp_pred = fp_pred.detach().numpy() > 0.5
            residue_preds[(residue.get_parent().id, residue.id[1])] = fp_pred
        else:
            continue
    return residue_preds
Exemplo n.º 5
0
 def test_dssp_with_mmcif_file(self):
     """Test DSSP generation from MMCIF."""
     p = MMCIFParser()
     pdbfile = "PDB/2BEG.cif"
     model = p.get_structure("2BEG", pdbfile)[0]
     dssp = DSSP(model, pdbfile)
     self.assertEqual(len(dssp), 130)
Exemplo n.º 6
0
    def load_test_structure(self, pdb_code):
        mmcif_parser = MMCIFParser(QUIET=True)
        structure = mmcif_parser.get_structure(
            pdb_code, self.TEST_STRUCTURE_DIR / f'{pdb_code}.cif')

        return structure, BiopythonToMmcifResidueIds.create(
            mmcif_parser._mmcif_dict)  # reuse already parsed
Exemplo n.º 7
0
 def cifReader(self, name, file):
     try:
         parser = MMCIFParser()
         structure = parser.get_structure(name, file)
         return structure
     except:
         print("Something went wrong: File not found")
Exemplo n.º 8
0
 def __get_structure__(self, file_path):
     base_name = os.path.basename(file_path)
     name, ext = os.path.splitext(base_name)
     if 'cif' in ext:
         parser = MMCIFParser()
     else:
         parser = PDBParser()
     return parser.get_structure(name, file_path)
def load_pdb(path):


    # If using PDB
    # parser = PDBParser(PERMISSIVE=1)

    # if using mmCIF
    parser = MMCIFParser()

    structure = parser.get_structure('structure 1', path)

    return structure
Exemplo n.º 10
0
    def setUpClass(self):
        self.io = MMCIFIO()
        self.mmcif_parser = MMCIFParser()
        self.pdb_parser = PDBParser()

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            self.structure = self.pdb_parser.get_structure(
                "example", "PDB/1A8O.pdb")
            self.mmcif_file = "PDB/1A8O.cif"
            self.mmcif_multimodel_pdb_file = "PDB/1SSU_mod.pdb"
            self.mmcif_multimodel_mmcif_file = "PDB/1SSU_mod.cif"
Exemplo n.º 11
0
 def creator(parser=parser):
     try:
         ret = parser.get_structure(pid, file=dst)
     except ValueError as e:  # assume it's a .cif
         if PARSE_CIF:
             parser = MMCIFParser(QUIET=True)
             ret = parser.get_structure(pid, dst)
         else:
             raise e
     finally:
         self.freemem()
     return ret
Exemplo n.º 12
0
    def test_cealigner_nucleic(self):
        """Test aligning 1LCD on 1LCD."""
        ref = "PDB/1LCD.cif"
        mob = "PDB/1LCD.cif"

        parser = MMCIFParser(QUIET=1)
        s1 = parser.get_structure("1lcd_ref", ref)
        s2 = parser.get_structure("1lcd_mob", mob)

        aligner = CEAligner()
        aligner.set_reference(s1)
        aligner.align(s2)

        self.assertAlmostEqual(aligner.rms, 0.0, places=3)
Exemplo n.º 13
0
def print_input_file(structure_file, ss2_file=None):
    extension = os.path.basename(structure_file).rsplit(".", 1)[-1].lower()
    if extension in ("cif", "mmcif"):
        from Bio.PDB import MMCIFParser
        parser = MMCIFParser()
    else:
        from Bio.PDB import PDBParser
        parser = PDBParser()
    struc = parser.get_structure("", structure_file)

    seq = ""
    coords = []
    for chain in struc[0]:
        for res in chain:
            # Skip hetero and water residues
            if res.id[0] != " ":
                continue
            seq += three_to_one_aas[res.get_resname()]
            if res.get_resname() == "GLY":
                # Extend vector of length 1 Å from Cα to act as fake centroid
                d = res["CA"].get_coord() - res["C"].get_coord() + res["CA"].get_coord() - res["N"].get_coord()
                coord_cent = res["CA"].get_coord() + d / np.linalg.norm(d)
            else:
                # Centroid coordinates of sidechain heavy atoms
                atom_coords = []
                for atom in res:
                    if atom.get_name() not in ("N", "CA", "C", "O") and atom.element != "H":
                        atom_coords.append(atom.get_coord())
                coord_cent = np.array(atom_coords).mean(0)
            coords.append([res["N"].get_coord(), res["CA"].get_coord(), res["C"].get_coord(), coord_cent])

    print(seq)
    if ss2_file:
        # Extract 3-state secondary structure prediction from PSIPRED ss2 output file
        ss_pred = ""
        with open(ss2_file) as f:
            for line in f:
                if len(line.rstrip()) > 0 and not line.startswith("#"):
                    ss_pred += line.split()[2]
        assert len(seq) == len(ss_pred), f"Sequence length is {len(seq)} but SS prediction length is {len(ss_pred)}"
        print(ss_pred)
    else:
        print("C" * len(seq))

    def coord_str(coord):
        return " ".join([str(round(c, 3)) for c in coord])

    for coord_n, coord_ca, coord_c, coord_cent in coords:
        print(f"{coord_str(coord_n)} {coord_str(coord_ca)} {coord_str(coord_c)} {coord_str(coord_cent)}")
Exemplo n.º 14
0
def get_convert_cifs(url, cif_path, pdb_path):
    try:
        url_req.urlretrieve(url, cif_path)
    except (url_err.URLError, url_err.HTTPError):
        print("!!!HTTP or URL error, couldn't get " + url + '.')
        return
    try:
        p = MMCIFParser()
        struc = p.get_structure('', cif_path)
        io = PDBIO()
        io.set_structure(struc)
        io.save(pdb_path)
        print('^^^SUCCESSFULLY CONVERTED CIF TO PDB')
    except TypeError:
        print('Problem making pdb file')
Exemplo n.º 15
0
def scanning_analyze():
    """
        Visualize results of scanning on PDB.
        Color residues by prediction score.
          1fmn_#0.1:A:FMN:36.nx_annot.p
    """
    from data_processor.build_dataset import find_residue, lig_center

    model, edge_map, embed_dim = load_model('small_no_rec_2',
                                            '../data/annotated/pockets_nx')
    for f in os.listdir("../data/annotated/pockets_nx"):
        pdbid = f.split("_")[0]
        _, chain, ligname, pos = f.replace(".nx_annot.p", "").split(":")
        pos = int(pos)
        print(chain, ligname, pos)
        graph = pickle.load(open(f'../data/RNA_Graphs/{pdbid}.pickle', 'rb'))
        if len(graph.nodes()) > 100:
            continue
        try:
            fp_preds = structure_scanning(
                f'../data/all_rna_prot_lig_2019/{pdbid}.cif', ligname, graph,
                model, edge_map, embed_dim)
        except Exception as e:
            print(e)
            continue
        parser = MMCIFParser(QUIET=True)
        structure = parser.get_structure(
            "", f"../data/all_rna_prot_lig_2019/{pdbid}.cif")[0]
        lig_res = find_residue(structure[chain], pos)
        lig_c = lig_center(lig_res.get_atoms())

        fp_dict = pickle.load(open("../data/all_ligs_maccs.p", 'rb'))
        true_fp = fp_dict[ligname]
        dists = []
        jaccards = []
        decoys = get_decoys()
        for res, fp in fp_preds.items():
            chain, pos = res
            r = find_residue(structure[chain], pos)
            r_center = lig_center(r.get_atoms())
            dists.append(euclidean(r_center, lig_c))
            jaccards.append(mse(true_fp, fp))
        plt.title(f)
        plt.distplot(dists, jaccards)
        plt.xlabel("dist to binding site")
        plt.ylabel("dist to fp")
        plt.show()
    pass
Exemplo n.º 16
0
    def setUpClass(cls):

        cls.dssp_version = "0.0.0"
        is_dssp_available = False
        # Check if DSSP is installed
        quiet_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
        try:
            try:
                # Newer versions of DSSP
                version_string = subprocess.check_output(
                    ["dssp", "--version"], universal_newlines=True)
                cls.dssp_version = re.search(r"\s*([\d.]+)",
                                             version_string).group(1)
                is_dssp_available = True
            except subprocess.CalledProcessError:
                # Older versions of DSSP
                subprocess.check_call(["dssp", "-h"], **quiet_kwargs)
                is_dssp_available = True
        except OSError:
            try:
                version_string = subprocess.check_output(
                    ["mkdssp", "--version"], universal_newlines=True)
                cls.dssp_version = re.search(r"\s*([\d.]+)",
                                             version_string).group(1)
                is_dssp_available = True
            except OSError:
                pass

        if not is_dssp_available:
            raise unittest.SkipTest(
                "Install dssp if you want to use it from Biopython.")

        cls.pdbparser = PDBParser()
        cls.cifparser = MMCIFParser()
Exemplo n.º 17
0
def parse_structure(path):
    """
    Parses a structure using Biopython's PDB/mmCIF Parser
    Verifies the integrity of the structure (gaps) and its
    suitability for the calculation (is it a complex?).
    """
    # setup logging
    logger = logging.getLogger('Prodigy')
    logger.info('[+] Reading structure file: {0}'.format(path))
    fname = os.path.basename(path)
    sname = '.'.join(fname.split('.')[:-1])
    s_ext = fname.split('.')[-1]

    _ext = {'pdb', 'ent', 'cif'}
    if s_ext not in _ext:
        raise IOError(
            '[!] Structure format \'{0}\' is not supported. Use \'.pdb\' or \'.cif\'.'
            .format(s_ext))

    sparser = PDBParser(QUIET=1) if s_ext in {'pdb', 'ent'} else MMCIFParser()

    try:
        s = sparser.get_structure(sname, path)
    except Exception as exeption:
        logger.error('[!] Structure \'{0}\' could not be parsed'.format(sname))
        raise Exception(exeption)

    return (validate_structure(s), len(set([c.id for c in s.get_chains()])),
            len(list(s.get_residues())))
Exemplo n.º 18
0
    def __init__(self, structure, name='structure', path='.'):
        if isinstance(structure, str):
            file_type = (str(structure).split('.')[-1]).lower()
            if file_type in ('pdb', 'ent'):
                # load a PDB file
                __parser = PDBParser(PERMISSIVE=1, QUIET=True)
                self.structure = __parser.get_structure(
                    name, os.path.join(path, structure))
            elif file_type == 'cif':
                # load MMCIF file
                __parser = MMCIFParser(QUIET=True)
                self.structure = __parser.get_structure(
                    name, os.path.join(path, structure))
            else:
                raise ValueError(
                    "Unknown filetype for structure file name: {}".format(
                        structure))
        elif isinstance(structure, Entity):
            # use structure as-is
            self.structure = structure
        else:
            raise ValueError(
                "Unknown type for input argument 'structure': {}".format(
                    str(structure)))

        # properties
        self.name = name

        # cachable properties
        self.cache = {}
        self._atom_KDTree = None
        self._atom_list = None
        self._surface_residues = None
Exemplo n.º 19
0
    def _biommCIF_strcuture_to_TEMpy(filename,
                                     structure,
                                     hetatm=False,
                                     water=False):
        #imported if and when the function is executed.
        """
        PRIVATE FUNCTION to convert to Structure Instance
        filename = name of mmCIF file
        hetatm = Boolean representing whether to add hetatm to the structure.Default and Raccomanded is False.
        water = Boolean representing whether to add water to the structure.Default and Raccomanded is False.
        """
        from Bio.PDB import MMCIFParser as MMCIFParserBiopy

        p = MMCIFParserBiopy()

        atomList = []
        hetatomList = []
        wateratomList = []
        footer = ''
        header = ''
        cif_code = filename.split("/")[-1]  #use os.1FAT.cif
        structure_id = "%s" % cif_code[:-4]
        structure = p.get_structure(structure_id, filename)
        residues = structure.get_residues()
        for res in residues:
            hetfield = res.get_id()[0]
            if hetfield[0] == "H":
                for atom in res:
                    BioPyAtom(atom)
                    hetatomList.append(BioPyAtom(atom))
            elif hetfield[0] == "W":
                for atom in res:
                    BioPyAtom(atom)
                    wateratomList.append(BioPyAtom(atom))
            else:
                for atom in res:
                    BioPyAtom(atom)
                    atomList.append(BioPyAtom(atom))
        if hetatm:
            atomList = append(atomList, hetatomList)
        if water:
            atomList = append(atomList, wateratomList)

        return BioPy_Structure(atomList,
                               filename=filename,
                               header=header,
                               footer=footer)
Exemplo n.º 20
0
    def Extract_coordinates_from_PDB(self, PDB_file, type):
        ''' Returns both the alpha carbon coordinates contained in the PDB file and the residues coordinates for the desired chains'''
        from Bio.PDB.PDBParser import PDBParser
        from Bio.PDB import MMCIFParser
        Name = ntpath.basename(PDB_file).split('.')[0]

        try:
            parser = PDB.PDBParser()
            structure = parser.get_structure('%s' % (Name), PDB_file)
        except:
            parser = MMCIFParser()
            structure = parser.get_structure('%s' % (Name), PDB_file)

        ############## Iterating over residues to extract all of them even if there is more than 1 chain
        if type == 'models':
            CoordinatesPerModel = []
            for model in structure:
                model_coord = []
                for chain in model:
                    for residue in chain:
                        if is_aa(residue.get_resname(), standard=True):
                            model_coord.append(residue['CA'].get_coord())
                CoordinatesPerModel.append(model_coord)

            return CoordinatesPerModel
        elif type == 'chains':
            CoordinatesPerChain = []
            for model in structure:
                for chain in model:
                    chain_coord = []
                    for residue in chain:
                        if is_aa(residue.get_resname(), standard=True):
                            chain_coord.append(residue['CA'].get_coord())
                    CoordinatesPerChain.append(chain_coord)
            return CoordinatesPerChain

        elif type == 'all':
            alpha_carbon_coordinates = []
            for chain in structure.get_chains():
                for residue in chain:
                    if is_aa(residue.get_resname(), standard=True):
                        # try:
                        alpha_carbon_coordinates.append(
                            residue['CA'].get_coord())
                    # except:
                    # pass
            return alpha_carbon_coordinates
Exemplo n.º 21
0
def call_mmcif(f):
    '''
    Call function for mmcif files
    '''

    if (".cif") in f:
        name = f.split('/')[-1].split('.')[0].upper()
        # Open gz files
        if ".gz" in f:
            f = gzip.open(f, 'rt')
        parser = MMCIFParser()
        structure = parser.get_structure(name, f)
        mmtf_encoder = MMTFEncoder()
        pass_data_on(input_data=structure,
                     input_function=biopythonInputFunction,
                     output_data=mmtf_encoder)
        return (name, mmtf_encoder)
Exemplo n.º 22
0
    def Write_PDB(self, initialPDB, Rotation, Translation, N):
        ''' Transform by rotating and translating the atom coordinates from the original PDB file and rewrite it '''
        from Bio.PDB.PDBParser import PDBParser
        from Bio.PDB import MMCIFParser, PDBIO
        Name = ntpath.basename(initialPDB).split('.')[0]

        try:
            parser = PDB.PDBParser()
            structure = parser.get_structure('%s' % (Name), initialPDB)
        except:
            parser = MMCIFParser()
            structure = parser.get_structure('%s' % (Name), initialPDB)

        for atom in structure.get_atoms():
            atom.transform(Rotation, Translation)
        io = PDBIO()
        io.set_structure(structure)
        io.save("{}_{}".format(N, ntpath.basename(initialPDB)))
Exemplo n.º 23
0
def StructureParser(pdbfile):
    if pdbfile.endswith('.pdb'):
        parser = PDBParser(QUIET=True)
    elif pdbfile.endswith('.cif'):
        parser = MMCIFParser(QUIET=True)
    else:
        print 'ERROR: a protein structure file shall end with either .pdb or .cif'
        exit(1)
    return parser
Exemplo n.º 24
0
    def test_cealigner_no_transform(self):
        """Test aligning 7CFN on 6WQA without transforming 7CFN."""
        ref = "PDB/6WQA.cif"
        mob = "PDB/7CFN.cif"

        parser = MMCIFParser(QUIET=1)
        s1 = parser.get_structure("6wqa", ref)
        s2 = parser.get_structure("7cfn", mob)

        s2_original_coords = [list(a.coord) for a in s2.get_atoms()]

        aligner = CEAligner()
        aligner.set_reference(s1)
        aligner.align(s2, transform=False)
        s2_coords_final = [list(a.coord) for a in s2.get_atoms()]

        self.assertAlmostEqual(aligner.rms, 3.83, places=2)
        self.assertEqual(s2_original_coords, s2_coords_final)
Exemplo n.º 25
0
def set_parser(protein_file):
    '''
    Choose the correct parser according to the protein file's format
    '''
    if is_cif(protein_file):
        parser = MMCIFParser()
    else:
        parser = PDBParser()
    return parser
        def is_holo(ordinal, s: Dict):
            logger.info(f'processing {ordinal}-th structure {s["pdb_code"]}')

            is_holo_analyzer = IsHolo()

            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                model = MMCIFParser().get_structure(s['pdb_code'],
                                                    s['path'])[0]

            return is_holo_analyzer(model, model[s['chain_id']])
    def setUpClass(cls):
        # Check if MSMS is installed
        try:
            v = subprocess.check_output(["msms", "-h"],
                                        universal_newlines=True,
                                        stderr=subprocess.STDOUT)
        except OSError:
            raise unittest.SkipTest(
                "Install MSMS if you want to use it from Biopython.")

        cls.pdbparser = PDBParser()
        cls.cifparser = MMCIFParser()
Exemplo n.º 28
0
def get_parser(file_):
    """ Get a parser appropriate to the file format. """
    try:
        # Try to get the file extension to determine the file format.
        file_base, ext = os.path.splitext(file_)
    except ValueError:
        raise ValueError("Cannot obtain extension of file {}".format(file_))
    else:
        try:
            # Use a parser appropriate for the file format.
            return {".pdb": PDBParser(), ".cif": MMCIFParser()}[ext]
        except KeyError:
            raise ValueError("Unknown molecular file format: {}".format(ext))
Exemplo n.º 29
0
def process_structrure(pdb_file_chains, save_dir):
    pdb_file, chain_ids = pdb_file_chains
    prot = os.path.split(pdb_file)[-1].split(".")[0].upper()

    parser = MMCIFParser()
    try:
        model = parser.get_structure(None, pdb_file)[0]
    except PDB.PDBExceptions.PDBConstructionException:
        return

    for c_id in set(chain_ids):
        try:
            chain = model[c_id]
        except KeyError:
            return

        seq = []
        coords = []
        for residue in chain.get_unpacked_list():
            if "CA" in residue:
                xyz = residue["CA"].get_coord()
                if coords and np.allclose(
                        coords[-1], xyz
                ):  # Ignore residue if too close to the previous one.
                    continue
                aa_c = aa_codes.get(
                    _aa3to1_dict.get(residue.get_resname(), "-"), 0)
                seq.append(aa_c)
                coords.append(xyz)
        if seq:
            npz_filename = os.path.join(save_dir, f"{prot}-{chain.id}.npz")
            # if os.path.exists(npz_filename):
            #     print(f'{prot}-{c_id} exists already!')
            #     return
            np.savez_compressed(npz_filename, seq=seq, coords=coords)
            print(f"{npz_filename} saved!")
Exemplo n.º 30
0
    def add_struc_path(self, struc_path):
        from Bio.SeqRecord import SeqRecord
        from Bio.Seq import Seq
        from Bio.PDB import PDBParser, MMCIFParser
        from Bio.SeqUtils import seq1

        self.struc_path = struc_path
        if ntpath.splitext(self.struc_path)[1] == ".pdb":
            parser = PDBParser()
        elif ntpath.splitext(self.struc_path)[1] == ".cif":
            parser = MMCIFParser()
        else:
            raise IOError(
                "Unrecognized structure file type! Please use .pdb or .cif files!"
            )

        structure = parser.get_structure("none", self.struc_path)
        chains = list()
        for chain in structure.get_chains():
            chains.append(chain)
        if len(chains) != 1:
            raise IOError(
                f"When using structure files, they need to have a single chain!"
            )
        sequence = str()
        seq_ix_mapping = dict()
        untrue_seq_ix = 1
        residues = list(chains[0].get_residues())
        for resi in residues:
            resi_id = resi.get_id()
            if not re.match(r' ', resi_id[2]):
                continue
            if re.match(r'^H_', resi_id[0]):
                continue
            if re.match(r'W', resi_id[0]):
                continue
            sequence += resi.get_resname().replace(' ', '')
            seq_ix_mapping[untrue_seq_ix] = int(resi.get_id()[1])
            untrue_seq_ix += 1

        if len(seq1(residues[seq_ix_mapping[1]].get_resname().replace(
                ' ', ''))) != 0:
            sequence = seq1(sequence)
        self.seq_ix_mapping = seq_ix_mapping
        self.struc_seq = SeqRecord(Seq(sequence))