示例#1
0
def fetch_protein(pdb_id: str) -> Tuple[List[str], np.ndarray]:
    # retrieve pdb file from Protein Data Bank
    pdb_file = f"{pdb_id}.pdb"
    pdb_file_path = os.path.join(os.getcwd(), pdb_file)
    protein_url = f"https://files.rcsb.org/download/{pdb_file}"
    req = requests.get(protein_url)
    with open(pdb_file_path, "w") as f:
        f.write(req.text)

    # parse pdb file
    structure = PDBParser().get_structure(pdb_id, pdb_file)
    peptides = PPBuilder().build_peptides(structure)[0]

    # extract amino acid sequence and phi/psi angles
    aa_sequence = list(peptides.get_sequence())
    phi_psi_angles = np.array(
        list(
            map(
                lambda x: (180 if not x[0] else np.rad2deg(x[0]), 180
                           if not x[1] else np.rad2deg(x[1])),
                peptides.get_phi_psi_list()))).T

    # remove pdb file
    subprocess.check_output(["rm", pdb_file])

    return aa_sequence, phi_psi_angles
示例#2
0
def find_pdb_limits(pdb_path):
    """"""
    pdb = PDBParser().get_structure('', pdb_path)
    # takes the first (and only) polypeptide
    pp = PPBuilder().build_peptides(pdb)[0]
    start = pp[0].get_id()[1]
    end = pp[-1].get_id()[1]
    seq = pp.get_sequence()
    return (start, end, seq)