def parse(self, protein_id=None, filename=None, Protein_model=None):
        """Parses a PDB with the ProDy parser"""

        if filename == None:
            parser = prody.parseCIF(protein_id, model=Protein_model)
        else:
            parser = prody.parseCIF(filename, model=Protein_model)

        return parser
Exemplo n.º 2
0
def get_structures(structure_chain_id_pairs: typing.List[typing.Tuple[str,
                                                                      str]]):
    """
    Gets ProDy AtomGroup objects for each (pdb_id, chain) pair
    """
    pdb_to_chain = {p: c for p, c in structure_chain_id_pairs}
    return [pd.parseCIF(x, chain=pdb_to_chain[x]) for x in pdb_to_chain.keys()]
Exemplo n.º 3
0
def get_chain_from_trainid(proteinnet_id):
    """
    Given a ProteinNet ID of a training or validation set item, this function returns the associated
    ProDy-parsed chain object. "1A9U_2_A"
    """
    # Try parsing the ID as a PDB ID. If it fails, assume it's an ASTRAL ID.
    try:
        pdbid, model_id, chid = proteinnet_id.split("_")
        if "#" in pdbid:
            pdbid = pdbid.split("#")[1]
    except ValueError:
        try:
            pdbid, astral_id = proteinnet_id.split("_")
            return get_chain_from_astral_id(astral_id.replace("-", "_"),
                                            ASTRAL_ID_MAPPING)
        except KeyError:
            return ERRORS["MISSING_ASTRAL_IDS"]
        except ValueError:
            return ERRORS["FAILED_ASTRAL_IDS"]
        except:
            return ERRORS["FAILED_ASTRAL_IDS"]

    # Continue loading the chain, given the PDB ID
    try:
        chain = pr.parsePDB(pdbid, chain=chid)
    except:
        try:
            chain = pr.parseCIF(
                pdbid, chain=chid
            )  # changed pr.parsePDB to pr.parseCIF, removed heirarchal view
        except AttributeError:
            return ERRORS["PARSING_ERROR_ATTRIBUTE"]
        except pr.proteins.pdbfile.PDBParseError:
            return ERRORS["PARSING_ERROR"]
        except OSError:
            return ERRORS["PARSING_ERROR_OSERROR"]
        except Exception as e:
            return ERRORS["UNKNOWN_EXCEPTIONS"]

    if chain is None:
        print(proteinnet_id)
        return ERRORS["NONE_CHAINS"]
    # Attempt to select a coordset
    try:
        if chain.numCoordsets() > 1:
            chain.setACSIndex(int(model_id))
    except IndexError:
        return ERRORS["COORDSET_INDEX_ERROR"]

    return chain
Exemplo n.º 4
0
def get_annotations_single(uniprot_id,
                           pdb_id,
                           residue_mapper: dict,
                           chain=None,
                           n_modes=6,
                           full_pdb_solvent_accessibility=True):
    structure = pd.parseCIF(pdb_id, chain=chain)
    gnm, calphas = pd.calcGNM(structure, n_modes=n_modes)
    anm, _ = pd.calcANM(structure, n_modes=n_modes)
    effectiveness, sensitivity = get_perturbations(anm, n_modes)
    hinge_sites = [get_hinge_indices(gnm, mode=n) for n in range(n_modes)]
    return StructureAnnotation(
        pdb_id, chain, structure, calphas, uniprot_id, residue_mapper,
        get_enm_fluctuations(anm, n_modes), effectiveness, sensitivity,
        get_stiffness(anm, calphas, n_modes),
        get_relative_solvent_accessibility(
            pdb_id,
            residue_mapper,
            chain=chain,
            full_pdb_solvent_accessibility=full_pdb_solvent_accessibility),
        hinge_sites, anm, gnm)
Exemplo n.º 5
0
def get_chain_from_trainid(pnid):
    """Return a ProDy chain object for a ProteinNet ID. Assumes train/valid ID.

    Args:
        pnid: ProteinNet ID

    Returns:
        ProDy chain object corresponding to ProteinNet ID.
    """
    modified_model_number = False
    # Try parsing the ID as a PDB ID. If it fails, assume it's an ASTRAL ID.
    try:
        pdbid, chnum, chid = pnid.split("_")
        chnum = int(chnum)
        # If this is a validation set pnid, separate the annotation from the ID
        if "#" in pdbid:
            pdbid = pdbid.split("#")[1]
    except ValueError:
        try:
            pdbid, astral_id = pnid.split("_")
            return get_chain_from_astral_id(astral_id.replace("-", "_"),
                                            ASTRAL_ID_MAPPING)
        except KeyError:
            return pnid, errors.ERRORS["MISSING_ASTRAL_IDS"]
        except (ValueError, Exception):
            return pnid, errors.ERRORS["FAILED_ASTRAL_IDS"]

    # Continue loading the chain, given the PDB ID
    use_pdb = True
    try:
        chain = pr.parsePDB(pdbid, chain=chid, model=chnum)
        if not chain:
            chain = pr.parseCIF(pdbid, chain=chid, model=chnum)
            use_pdb = False
    # If the file is too large, then we can download the CIF instead
    except OSError:
        try:
            chain = pr.parseCIF(pdbid, chain=chid, model=chnum)
            use_pdb = False
        except IndexError:
            try:
                chain = pr.parseCIF(pdbid, chain=chid, model=1)
                use_pdb = False
                modified_model_number = True
            except Exception as e:
                print(e)
                return pnid, errors.ERRORS["PARSING_ERROR_OSERROR"]
        except Exception as e:  # EOFERROR
            print(e)
            return pnid, errors.ERRORS["PARSING_ERROR_OSERROR"]
    except AttributeError:
        return pnid, errors.ERRORS["PARSING_ERROR_ATTRIBUTE"]
    except (pr.proteins.pdbfile.PDBParseError, IndexError):
        # For now, if the requested coordinate set doesn't exist, then we will
        # default to using the only (first) available coordinate set
        struct = pr.parsePDB(pdbid, chain=chid) if use_pdb else pr.parseCIF(pdbid,
                                                                            chain=chid)
        if struct and chnum > 1:
            try:
                chain = pr.parsePDB(pdbid, chain=chid, model=1)
                modified_model_number = True
            except Exception:
                return pnid, errors.ERRORS["PARSING_ERROR"]
        else:
            return pnid, errors.ERRORS["PARSING_ERROR"]
    except Exception as e:
        print(e)
        return pnid, errors.ERRORS["UNKNOWN_EXCEPTIONS"]

    if chain is None:
        return pnid, errors.ERRORS["NONE_CHAINS"]

    if modified_model_number:
        return chain, "MODIFIED_MODEL"

    if contains_d_amino_acids(chain):
        return pnid, errors.ERRORS["D_AMINO_ACIDS"]

    return chain