示例#1
0
    def pdb_array_creator(self, structure=None, filename=None):
        ''' Creates an OrderedDict() from the pdb of a Structure object where residue numbers/generic numbers are 
            keys for the residues, and atom names are keys for the Bio.PDB.Residue objects.
            
            @param structure: Structure, Structure object of protein. When using structure, leave filename=None. \n
            @param filename: str, filename of pdb to be parsed. When using filename, leave structure=None).
        '''
        # seq_nums_overwrite_cutoff_dict = {'4PHU':2000, '4LDL':1000, '4LDO':1000, '4QKX':1000, '5JQH':1000, '5TZY':2000, '5KW2':2000}
        if structure != None and filename == None:
            io = StringIO(structure.pdb_data.pdb)
        else:
            io = filename
        gn_array = []
        residue_array = []
        # pdb_struct = PDB.PDBParser(QUIET=True).get_structure(structure.pdb_code.index, io)[0]

        residues = Residue.objects.filter(
            protein_conformation=structure.protein_conformation)
        gn_list = []
        for i in residues:
            try:
                gn_list.append(
                    ggn(i.display_generic_number.label).replace('x', '.'))
            except:
                pass

        ssno = StructureSeqNumOverwrite(structure)
        ssno.seq_num_overwrite('pdb')
        if len(ssno.pdb_wt_table) > 0:
            residues = residues.filter(protein_segment__slug__in=[
                'TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TM7', 'H8'
            ]).order_by('sequence_number')
            output = OrderedDict()
            for r in residues:
                print(r, r.display_generic_number.label,
                      r.protein_segment.slug)
                if r.protein_segment.slug == None:
                    continue
                if r.protein_segment.slug not in output:
                    output[r.protein_segment.slug] = OrderedDict()
                rotamer = Rotamer.objects.filter(residue=r)
                rotamer = self.right_rotamer_select(rotamer)
                rota_io = StringIO(rotamer.pdbdata.pdb)
                p = PDB.PDBParser()
                parsed_rota = p.get_structure('rota', rota_io)
                for chain in parsed_rota[0]:
                    for res in chain:
                        atom_list = []
                        for atom in res:
                            # Skip hydrogens
                            if atom.get_id().startswith('H'):
                                continue
                            if atom.get_id() == 'N':
                                bw, gn = r.display_generic_number.label.split(
                                    'x')
                                atom.set_bfactor(bw)
                            elif atom.get_id() == 'CA':
                                bw, gn = r.display_generic_number.label.split(
                                    'x')
                                gn = "{}.{}".format(bw.split('.')[0], gn)
                                if len(gn.split('.')[1]) == 3:
                                    gn = '-' + gn[:-1]
                                atom.set_bfactor(gn)
                            atom_list.append(atom)
                        output[r.protein_segment.slug][ggn(
                            r.display_generic_number.label).replace(
                                'x', '.')] = atom_list
            pprint.pprint(output)
            return output
        else:
            assign_gn = as_gn.GenericNumbering(
                pdb_file=io,
                pdb_code=structure.pdb_code.index,
                sequence_parser=True)
            pdb_struct = assign_gn.assign_generic_numbers_with_sequence_parser(
            )
            pref_chain = structure.preferred_chain
            parent_prot_conf = ProteinConformation.objects.get(
                protein=structure.protein_conformation.protein.parent)
            parent_residues = Residue.objects.filter(
                protein_conformation=parent_prot_conf)
            last_res = list(parent_residues)[-1].sequence_number
            if len(pref_chain) > 1:
                pref_chain = pref_chain[0]
            for residue in pdb_struct[pref_chain]:
                if 'CA' in residue and -9.1 < residue['CA'].get_bfactor(
                ) < 9.1:
                    use_resid = False
                    gn = str(residue['CA'].get_bfactor())
                    if len(gn.split('.')[1]) == 1:
                        gn = gn + '0'
                    if gn[0] == '-':
                        gn = gn[1:] + '1'
                    # Exceptions
                    if structure.pdb_code.index == '3PBL' and residue.get_id(
                    )[1] == 331:
                        use_resid = True
                    elif structure.pdb_code.index == '6QZH' and residue.get_id(
                    )[1] == 1434:
                        use_resid = True
                    #################################################
                    elif gn in gn_list:
                        gn_array.append(gn)
                        residue_array.append(residue.get_list())
                    else:
                        use_resid = True
                    if use_resid:
                        gn_array.append(str(residue.get_id()[1]))
                        residue_array.append(residue.get_list())
            output = OrderedDict()
            for num, label in self.segment_coding.items():
                output[label] = OrderedDict()
            if len(gn_array) != len(residue_array):
                raise AssertionError()

            for gn, res in zip(gn_array, residue_array):
                if '.' in gn:
                    seg_num = int(gn.split('.')[0])
                    seg_label = self.segment_coding[seg_num]
                    if seg_num == 8 and len(output['TM7']) == 0:
                        continue
                    else:
                        output[seg_label][gn] = res
                else:
                    try:
                        found_res, found_gn = None, None
                        try:
                            found_res = Residue.objects.get(
                                protein_conformation=structure.
                                protein_conformation,
                                sequence_number=gn)
                        except:
                            # Exception for res 317 in 5VEX, 5VEW
                            if structure.pdb_code.index in [
                                    '5VEX', '5VEW'
                            ] and gn == '317' and res[0].get_parent(
                            ).get_resname() == 'CYS':
                                found_res = Residue.objects.get(
                                    protein_conformation=parent_prot_conf,
                                    sequence_number=gn)
                            #####################################
                        found_gn = str(
                            ggn(found_res.display_generic_number.label)
                        ).replace('x', '.')

                        # Exception for res 318 in 5VEX, 5VEW
                        if structure.pdb_code.index in [
                                '5VEX', '5VEW'
                        ] and gn == '318' and res[0].get_parent().get_resname(
                        ) == 'ILE' and found_gn == '5.47':
                            found_gn = '5.48'
                        #####################################
                        if -9.1 < float(found_gn) < 9.1:
                            if len(res) == 1:
                                continue
                            if int(gn) > last_res:
                                continue
                            seg_label = self.segment_coding[int(
                                found_gn.split('.')[0])]
                            output[seg_label][found_gn] = res
                    except:
                        if res[0].get_parent().get_resname() == 'YCM' or res[
                                0].get_parent().get_resname() == 'CSD':
                            try:
                                found_res = Residue.objects.get(
                                    protein_conformation=parent_prot_conf,
                                    sequence_number=gn)
                            except:
                                continue
                            if found_res.protein_segment.slug[0] not in [
                                    'T', 'H'
                            ]:
                                continue
                            try:
                                found_gn = str(
                                    ggn(found_res.display_generic_number.label)
                                ).replace('x', '.')
                            except:
                                found_gn = str(gn)
                            output[
                                found_res.protein_segment.slug][found_gn] = res
        return output
示例#2
0
    def get_simrna_ready(self,  renumber_residues=True):
        """Get simrna_ready .. 

        - take only first model,
        - renumber residues if renumber_residues=True

        .. warning:: requires: Biopython"""
        try:
            from Bio import PDB
            from Bio.PDB import PDBIO
        except:
            sys.exit('Error: Install biopython to use this function (pip biopython)')

        import warnings
        
        warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
        warnings.filterwarnings('ignore', '.*with given element *',)
        
        import copy

        G_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split()
        A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split()
        U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split()
        C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split()

        ftmp = '/tmp/out.pdb'
        self.write(ftmp,v=False)

        parser = PDB.PDBParser()
        struct = parser.get_structure('', ftmp)
        model = struct[0]

        s2 = PDB.Structure.Structure(struct.id)
        m2 = PDB.Model.Model(model.id)

        chains2 = []

        missing = []
        
        for chain in model.get_list():
            res = [] 
            for r in chain:
                res.append(r)

            res = copy.copy(res)

            c2 = PDB.Chain.Chain(chain.id)        

            c = 1  # new chain, goes from 1 if renumber True
            for r in res:
                # hack for amber/qrna
                r.resname = r.resname.strip()
                if r.resname == 'RC3': r.resname = 'C'
                if r.resname == 'RU3': r.resname = 'U'
                if r.resname == 'RG3': r.resname = 'G'
                if r.resname == 'RA3': r.resname = 'A'

                if r.resname == 'C3': r.resname = 'C'
                if r.resname == 'U3': r.resname = 'U'
                if r.resname == 'G3': r.resname = 'G'
                if r.resname == 'A3': r.resname = 'A'

                if r.resname == 'RC5': r.resname = 'C'
                if r.resname == 'RU5': r.resname = 'U'
                if r.resname == 'RG5': r.resname = 'G'
                if r.resname == 'RA5': r.resname = 'A'

                if r.resname == 'C5': r.resname = 'C'
                if r.resname == 'U5': r.resname = 'U'
                if r.resname == 'G5': r.resname = 'G'
                if r.resname == 'A5': r.resname = 'A'

                if r.resname.strip() == 'RC': r.resname = 'C'
                if r.resname.strip() == 'RU': r.resname = 'U'
                if r.resname.strip() == 'RG': r.resname = 'G'
                if r.resname.strip() == 'RA': r.resname = 'A'

                r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid)
                if renumber_residues:
                    r2.id = (r2.id[0], c, r2.id[2]) ## renumber residues
                if c == 1:
                    p_missing = True
                    #if p_missing:
                    #    try:
                    #        x = r["O5'"]
                    #        x.id =       ' P'
                    #        x.name =     ' P'
                    #        x.fullname = ' P'
                    #        print "REMARK 000 FIX O5' -> P fix in chain ", chain.id
                    #    except:
                    #        pass
                    for a in r:
                        if a.id == 'P':
                            p_missing = False

                    if p_missing:
                            currfn = __file__
                            if currfn == '':
                                path = '.'
                            else:
                                path = os.path.dirname(currfn)
                            if os.path.islink(currfn):#path + os.sep + os.path.basename(__file__)):
                                path = os.path.dirname(os.readlink(path + os.sep + os.path.basename(currfn)))

                            po3_struc = PDB.PDBParser().get_structure('', path + '/data/PO3_inner.pdb') 
                            po3 = [po3_atom for po3_atom in po3_struc[0].get_residues()][0]

                            r_atoms = [r["O4'"], r["C4'"], r["C3'"]]
                            po3_atoms = [po3["O4'"], po3["C4'"], po3["C3'"]]

                            sup = PDB.Superimposer()
                            sup.set_atoms(r_atoms, po3_atoms)
                            rms = round(sup.rms, 3)

                            sup.apply( po3_struc.get_atoms() ) # to all atoms of po3

                            r.add( po3['P'])
                            r.add( po3['OP1'])
                            r.add( po3['OP2'])
                            try:
                                r.add( po3["O5'"]) 
                            except:
                                del r["O5'"] 
                                r.add( po3["O5'"]) 

                    p_missing = False # off this function

                    # save it
                    #io = PDB.PDBIO()
                    #io.set_structure( po3_struc )
                    #io.save("po3.pdb")

                if str(r.get_resname()).strip() == "G":
                    for an in G_ATOMS:
                        if c == 1 and ignore_op3:
                            if an in ['P', 'OP1', 'OP2']:
                                continue
                        try:
                            if c == 1 and an == "O5'" and p_missing:
                                r2.add(x)
                            else:
                                r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "A":
                    for an in A_ATOMS:
                        if c == 1 and ignore_op3:
                            if an in ['P', 'OP1', 'OP2']:
                                continue
                        try:
                            if c == 1 and an == "O5'" and p_missing:
                                r2.add(x)
                            else:
                                r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "C":
                    for an in C_ATOMS:
                        if c == 1 and ignore_op3:
                            if an in ['P', 'OP1', 'OP2']:
                                continue
                        try:
                            if c == 1 and an == "O5'" and p_missing:
                                r2.add(x)
                            else:
                                r2.add(r[an])
                        except:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "U":
                    for an in U_ATOMS:
                        if c == 1 and ignore_op3:
                            if an in ['P', 'OP1', 'OP2']:
                                continue
                        try:
                            if c == 1 and an == "O5'" and p_missing:
                                r2.add(x)
                            else:
                                r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r,' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                c += 1
            chains2.append(c2)

        io = PDBIO()
        s2.add(m2)
        for chain2 in chains2:
            m2.add(chain2) 
        #print c2
        #print m2
        io.set_structure(s2)
        #fout = fn.replace('.pdb', '_fx.pdb')
        fout = '/tmp/outout.pdb' # hack
        io.save(fout)
        
        if missing:
            print('REMARK 000 Missing atoms:')
            for i in missing:
                print('REMARK 000  +', i[0], i[1], i[2], 'residue #', i[3])
            #raise Exception('Missing atoms in %s' % self.fn)
        s = StrucFile(fout)
        self.lines = s.lines
示例#3
0
def calculate_adjecency(fname,
                        pdb_chain,
                        pdb_list,
                        seq,
                        get_angle=True,
                        include_nones=True,
                        **kw_args):
    '''
	params:
		fname (str) path to .pdb structure file
		chain (str) chain name
		pdb_list (list) list of residues indices
		seq (str) chain residues sequence - used to match sizes
	returns:
		distance_matrix (np.ndarray) 
	'''
    #check informations
    assert pdb_chain is not None
    assert os.path.isfile(fname), f'no such file {fname}'

    with open(fname, 'rt') as f:
        structure = bp.PDBParser().get_structure(pdb_chain, f)
    assert len(structure) == 1
    model = structure[0]

    # Generate dict of residue objects
    resid2res = dict([(''.join([str(j)
                                for j in res.full_id[-1][1:]]).strip(), res)
                      for res in model.get_residues()])
    coords_a = []
    coords_b = []
    for pdb_idx in pdb_list:
        # Residue present in sequence but *not* in structure
        if pdb_idx is None and include_nones == False:
            continue
        elif pdb_idx is None:
            coords_a.append(np.array([np.NaN, np.NaN, np.NaN]))
            coords_b.append(np.array([np.NaN, np.NaN, np.NaN]))
        else:
            res = resid2res[pdb_idx]
            coords_a.append(res.child_dict['CA'].coord)

            if res.resname == 'GLY':
                cb = generate_Cb(res)
            else:
                try:
                    cb = res.child_dict['CB'].coord
                except KeyError:
                    print(
                        f'CB atom missing for res {res.resname} in {pdb_chain}; dispatching monkeys to address this fatal issue'
                    )
                    cb = generate_Cb(res)
            coords_b.append(cb)

    if include_nones == True:
        assert len(coords_a) == len(coords_b) == len(seq)

    xyz_alpha = np.array(coords_a, dtype=np.float32)
    xyz_beta = np.array(coords_b, dtype=np.float32)
    #alpha_dist = prairwaise_vec(xyz_alpha)
    #beta_dist = prairwaise_vec(xyz_beta)

    # To sieje bledami z powodu nan'ow (swoja droga eleganckie rozwiazanie!)
    #parallel_side_chains = (alpha_dist < beta_dist)*1
    distance = distance_matrix(xyz_alpha, xyz_alpha)
    if None in pdb_list and include_nones == True:
        shape = distance.shape[0]
        off_diag_left = np.arange(1, shape, 1, dtype=int)
        off_diag_right = np.arange(0, shape - 1, 1, dtype=int)
        diag = np.arange(0, shape, 1, dtype=int)
        distance[off_diag_left, off_diag_right] = 5
        distance[off_diag_right, off_diag_left] = 5
    if get_angle:
        angle_dist = side_chains_angles(xyz_alpha, xyz_beta)
        return distance, angle_dist
    else:
        return distance
示例#4
0
def get_seq(path):
    dico = get_info(path)

    parser = pdb.PDBParser()
    structure = parser.get_structure(path, path)

    seq = dict()
    maxenf = 1e-10

    AA = [
        'CYS', 'ASP', 'SER', 'GLN', 'LYS', 'ILE', 'PRO', 'THR', 'PHE', 'ASN',
        'GLY', 'HIS', 'LEU', 'ARG', 'TRP', 'ALA', 'VAL', 'GLU', 'TYR', 'MET'
    ]

    for model in structure:
        for chain in model:
            for residue in chain:
                if (residue.get_resname() in AA
                        and residue.get_resname()[0] != " "):
                    aminoacid = dict()
                    aminoacid["name"] = convert_name_AA(residue.get_resname())

                    bary_res = [0, 0, 0]
                    num_atom = 0
                    for atom in residue:
                        bary_res += atom.get_coord()
                        num_atom += 1
                    bary_res /= num_atom
                    #aminoacid["bary_res"] = bary_res
                    aminoacid["enfouissement"] = sum(
                        (bary_res - dico["baryres"])**2)**0.5
                    if (aminoacid["enfouissement"] > maxenf):
                        maxenf = aminoacid["enfouissement"]
                    aminoacid["struct"] = "V"
                    seq[get_num(residue)] = aminoacid

    #print(seq)
    lines = open(path, "r").readlines()
    for line in lines:
        if (line[:6] == "HELIX "):
            start = int(line[21:25])
            end = int(line[33:37])
            #print(path[-8:], start, end)
            for i in range(start, end + 1):
                if (i in seq.keys()):
                    seq[i]["struct"] = "H"

        if (line[:6] == "SHEET "):
            start = int(line[23:26])
            end = int(line[34:37])
            for i in range(start, end + 1):
                if (i in seq.keys()):
                    seq[i]["struct"] = "F"

    idref = 0
    for key in seq.keys():
        seq[key]["enfouissement"] = 1 - seq[key]["enfouissement"] / maxenf
        seq[key]["id"] = idref
        idref += 1

    ID = [seq[k]["id"] for k in seq.keys()]
    if (not sum([ID[k] == k for k in range(0, len(ID))])):
        print("WARNING - problème id des séquences", path[4:-4], ID)

    return seq
示例#5
0
    def draw_bounding_boxes(self, bg, s):
        '''
        Draw bounding boxes for all of the residues encompassed
        by a stem. But only if there is a pdb file handy.

        @param bg: The BulgeGraph
        @param s: The name of the stem
        '''
        if self.pdb_file is None:
            return

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            struct = bp.PDBParser().get_structure('temp', self.pdb_file)
        chain = list(struct.get_chains())[0]

        for i in range(bg.stem_length(s)):
            (origin, bases, bb) = ftug.bounding_boxes(bg, chain, s, i)
            for k in range(2):
                (n, x) = bb[k]

                corners = [
                          [n[0], n[1], n[2]],
                          [n[0], n[1], x[2]],

                          [n[0], x[1], n[2]],
                          [n[0], x[1], x[2]],

                          [x[0], n[1], n[2]],
                          [x[0], n[1], x[2]],

                          [x[0], x[1], n[2]],
                          [x[0], x[1], x[2]],

                          [n[0], n[1], n[2]],
                          [x[0], n[1], n[2]],

                          [n[0], x[1], n[2]],
                          [x[0], x[1], n[2]],

                          [n[0], x[1], x[2]],
                          [x[0], x[1], x[2]],

                          [n[0], n[1], x[2]],
                          [x[0], n[1], x[2]],

                          [n[0], n[1], n[2]],
                          [n[0], x[1], n[2]],

                          [x[0], n[1], n[2]],
                          [x[0], x[1], n[2]],

                          [n[0], n[1], x[2]],
                          [n[0], x[1], x[2]],

                          [x[0], n[1], x[2]],
                          [x[0], x[1], x[2]]]

                new_corners = []
                for corner in corners:
                    new_corners += [origin + cuv.change_basis(np.array(corner),
                                    cuv.standard_basis, bases[k])]
                corners = np.array(new_corners)

                if k == 0:
                    self.boxes += [(corners, 'yellow')]
                    self.add_sphere(corners[0], 'yellow', 0.4, '',
                                    [238 / 255., 221 / 255., 130 / 255.])
                    self.add_sphere(corners[7], 'yellow', 0.4, '',
                                    [184 / 255., 134 / 255., 11 / 255.])
                else:
                    self.add_sphere(corners[0], 'purple', 0.4, '',
                                    [238 / 255., 130 / 255., 238 / 255.])
                    self.add_sphere(corners[7], 'purple', 0.4, '',
                                    [208 / 255., 32 / 255., 144 / 255.])
                    self.boxes += [(corners, 'purple')]
示例#6
0
def compute_dihedrals(pdbfilename):

    ignoremodified=('PTR','TPO','SEP','MSE','BWB','CAS','CME','CSO','CSS','CSX','MK8','MLY','NEP','NMM','PHD','CAF','CSD','CYO','OCS','OCY','SCS',\
                    'ALY','KCX',',LGY','CXM','MHO','T8L','ACE','AME','CY0','UNK','T8L','MHO','COM')

    if '.gz' in pdbfilename.lower():
        handle = gzip.open(pdbfilename, 'rt')
        pdbfilename = pdbfilename[0:-3]
    else:
        handle = open(pdbfilename, 'r')

    if '.pdb' in pdbfilename.lower():
        parser = PDB.PDBParser(QUIET=True)
    if '.cif' in pdbfilename.lower():
        parser = PDB.MMCIFParser(QUIET=True)
    structure = parser.get_structure("PDB", handle)

    for model in structure:
        for chain in model:
            first = 1
            for residue in chain:
                if residue.id[0] != ' ' or residue.id[0][2:] in ignoremodified:
                    continue

                if first == 1:  #The 'first' blocks are required to assign first and second residue to variables
                    prev_residue = residue
                    first = 2
                    continue

                if first == 2:  #This block computes psi dihedral for first residue
                    curr_residue = residue
                    psi = compute_psi(structure, model, chain, prev_residue,
                                      curr_residue)
                    chi1 = compute_chi1(structure, model, chain, prev_residue)
                    chi2 = compute_chi2(structure, model, chain, prev_residue)
                    chi3 = compute_chi3(structure, model, chain, prev_residue)
                    chi4 = compute_chi4(structure, model, chain, prev_residue)

                    first = 3
                    print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(prev_residue.id[1]).rjust(8)+prev_residue.resname.rjust(8)+\
                          str(999.00).rjust(8)+str(psi).rjust(8)+str(999.00).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8))
                    continue

                if first == 3:  #This block computes phi and psi dihedrals from second residue onward. At anytime in the block we have three residue variables assigned.
                    next_residue = residue
                    phi = compute_phi(structure, model, chain, prev_residue,
                                      curr_residue)
                    psi = compute_psi(structure, model, chain, curr_residue,
                                      next_residue)
                    omega = compute_omega(structure, model, chain,
                                          prev_residue, curr_residue)
                    chi1 = compute_chi1(structure, model, chain, curr_residue)
                    chi2 = compute_chi2(structure, model, chain, curr_residue)
                    chi3 = compute_chi3(structure, model, chain, curr_residue)
                    chi4 = compute_chi4(structure, model, chain, curr_residue)

                    print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(curr_residue.id[1]).rjust(8)+curr_residue.resname.rjust(8)+\
                          str(phi).rjust(8)+str(psi).rjust(8)+str(omega).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8))

                    prev_residue = curr_residue
                    curr_residue = next_residue  #update residue variables

            if first == 3:  #This block computes phi dihedral for the last residue
                phi = compute_phi(structure, model, chain, prev_residue,
                                  curr_residue)
                omega = compute_omega(structure, model, chain, prev_residue,
                                      curr_residue)
                chi1 = compute_chi1(structure, model, chain, curr_residue)
                chi2 = compute_chi2(structure, model, chain, curr_residue)
                chi3 = compute_chi3(structure, model, chain, curr_residue)
                chi4 = compute_chi4(structure, model, chain, curr_residue)

                print(pdbfilename[0:-4].rjust(8)+str(model.id).rjust(8)+chain.id.rjust(8)+str(curr_residue.id[1]).rjust(8)+curr_residue.resname.rjust(8)\
                     +str(phi).rjust(8)+str(999.00).rjust(8)+str(omega).rjust(8)+str(chi1).rjust(8)+str(chi2).rjust(8)+str(chi3).rjust(8)+str(chi4).rjust(8))
    return
示例#7
0
def structure_from_pdb_string(pdb_string, name=''):
    '''Read the structure stored in a PDB string.'''
    parser = PDB.PDBParser()
    pdb_sf = io.StringIO(pdb_string)
    return parser.get_structure(name, pdb_sf)
示例#8
0
'''

Extract the protein sequence from a PDB chain.

-----------------------------------------------------------
(c) 2013 Allegra Via and Kristian Rother
    Licensed under the conditions of the Python License

    This code appears in section 21.4.2 of the book
    "Managing Biological Data with Python".
-----------------------------------------------------------
'''

from Bio import PDB
from Bio.PDB.Polypeptide import PPBuilder

parser = PDB.PDBParser()
structure = parser.get_structure("2DN1", "dn/pdb2dn1.ent")
ppb = PPBuilder()
peptides = ppb.build_peptides(structure)
for pep in peptides:
    print pep.get_sequence()
    
示例#9
0
    def run_RMSD(self, file1, file2):
        ''' Calculates 4 RMSD values between two GPCR pdb files. It compares the two files using sequence numbers.
            1. overall all atoms RMSD
            2. overall backbone atoms RMSD
            3. 7TM all atoms RMSD
            4. 7TM backbone atoms RMSD
        '''
        parser = PDB.PDBParser(QUIET=True)
        pdb1 = parser.get_structure('struct1', file1)[0]
        pdb2 = parser.get_structure('struct2', file2)[0]
        pdb_array1, pdb_array2, pdb_array3, pdb_array4 = OrderedDict(
        ), OrderedDict(), OrderedDict(), OrderedDict()

        assign_gn1 = as_gn.GenericNumbering(structure=pdb1)
        pdb1 = assign_gn1.assign_generic_numbers()
        assign_gn2 = as_gn.GenericNumbering(structure=pdb2)
        pdb2 = assign_gn2.assign_generic_numbers()

        for i in pdb1:
            for j in pdb2:
                if i.get_id() == j.get_id():
                    chain1 = i.get_id()
                    chain2 = i.get_id()
                    break

        if 'chain1' not in locals():
            for i in pdb1.get_chains():
                chain1 = i.get_id()
                break
        if 'chain2' not in locals():
            for i in pdb2.get_chains():
                chain2 = i.get_id()
                break

        for residue1 in pdb1[chain1]:
            if residue1.get_full_id()[3][0] != ' ':
                continue
            pdb_array1[int(residue1.get_id()[1])] = residue1
            try:
                if -8.1 < residue1['CA'].get_bfactor() < 8.1:
                    pdb_array3[int(residue1.get_id()[1])] = residue1
            except:
                pass
        for residue2 in pdb2[chain2]:
            if residue2.get_full_id()[3][0] != ' ':
                continue
            pdb_array2[int(residue2.get_id()[1])] = residue2
            try:
                if -8.1 < residue2['CA'].get_bfactor() < 8.1:
                    pdb_array4[int(residue2.get_id()[1])] = residue2
            except:
                pass
        overall_all1, overall_all2, overall_backbone1, overall_backbone2, o_a, o_b = self.create_lists(
            pdb_array1, pdb_array2)
        TM_all1, TM_all2, TM_backbone1, TM_backbone2, t_a, t_b = self.create_lists(
            pdb_array3, pdb_array4)

        rmsd1 = self.calc_RMSD(overall_all1, overall_all2, o_a)
        rmsd2 = self.calc_RMSD(overall_backbone1, overall_backbone2, o_b)
        rmsd3 = self.calc_RMSD(TM_all1, TM_all2, t_a)
        rmsd4 = self.calc_RMSD(TM_backbone1, TM_backbone2, t_b)
        return [rmsd1, rmsd2, rmsd3, rmsd4]
示例#10
0
    def run_RMSD_list(self, files, seq_nums=None):
        ''' Calculates 4 RMSD values between a list of GPCR pdb files. It compares the files using sequence and generic
        numbers. First file in the list has to be the reference file.
            1. overall all atoms RMSD
            2. overall backbone atoms RMSD
            3. 7TM all atoms RMSD
            4. 7TM backbone atoms RMSD
        '''
        c = 0
        for f in files:
            c += 1
            if c == 1:
                self.number_of_residues_superposed['reference'] = OrderedDict()
                self.number_of_atoms_superposed['reference'] = OrderedDict()
                self.rmsds['reference'] = OrderedDict()
            else:
                self.number_of_residues_superposed['file{}'.format(
                    str(c))] = OrderedDict()
                self.number_of_atoms_superposed['file{}'.format(
                    str(c))] = OrderedDict()
                self.rmsds['file{}'.format(str(c))] = OrderedDict()
        parser = PDB.PDBParser(QUIET=True)
        count = 0
        pdbs = []
        for f in files:
            count += 1
            pdb = parser.get_structure('struct{}'.format(count), f)[0]
            assign_gn = as_gn.GenericNumbering(structure=pdb)
            pdb = assign_gn.assign_generic_numbers()
            pdbs.append(pdb)
        chains = []
        for p in pdbs:
            this = []
            for c in p.get_chains():
                this.append(c.get_id())
            chains.append(this)
        usable_chains = []
        for m in chains[1:]:
            for c in m:
                if c in chains[0]:
                    usable_chains.append(c)
        arrays = []
        for p in pdbs:
            try:
                if pdbs.index(p) == 0 and len(usable_chains) == 0:
                    chain = [c.get_id() for c in pdbs[0].get_chains()][0]
                else:
                    chain = p[usable_chains[0]].get_id()
            except:
                try:
                    chain = p[' '].get_id()
                except:
                    chain = p['A'].get_id()
            pdb_array1, pdb_array2 = OrderedDict(), OrderedDict()
            for residue in p[chain]:
                if residue.get_full_id()[3][0] != ' ':
                    continue
                if seq_nums != None and int(residue.get_id()[1]) in seq_nums:
                    pdb_array1[int(residue.get_id()[1])] = residue
                elif seq_nums == None:
                    pdb_array1[int(residue.get_id()[1])] = residue
                try:
                    if -8.1 < residue['CA'].get_bfactor() < 8.1:
                        pdb_array2[int(residue.get_id()[1])] = residue
                except:
                    pass
            arrays.append([pdb_array1, pdb_array2])

        all_deletes, TM_deletes = [], []
        all_keep, TM_keep = [], []
        for i in range(0, 2):
            for res in arrays[0][i]:
                for m in arrays[1:]:
                    if res not in m[i]:
                        if i == 0:
                            all_deletes.append(res)
                        else:
                            TM_deletes.append(res)
                    else:
                        if i == 0:
                            all_keep.append(res)
                        else:
                            TM_keep.append(res)
        deletes = [all_deletes, TM_deletes]
        keeps = [all_keep, TM_keep]
        num_atoms1, num_atoms2 = OrderedDict(), OrderedDict()
        num_atoms = [num_atoms1, num_atoms2]
        mismatches = []
        for m in arrays:
            for i in range(0, 2):
                for res in m[i]:
                    if res in deletes[i] or res not in keeps[i]:
                        del m[i][res]
                    else:
                        try:
                            if m[i][res].get_resname() != num_atoms[i][res][
                                    0].get_parent().get_resname():
                                del num_atoms[i][res]
                                mismatches.append(res)
                            else:
                                raise Exception()
                        except:
                            if res not in mismatches:
                                atoms = []
                                for atom in m[i][res]:
                                    atoms.append(atom)
                                if res not in num_atoms[i]:
                                    num_atoms[i][res] = atoms
                                else:
                                    if len(atoms) < len(num_atoms[i][res]):
                                        num_atoms[i][res] = atoms
        atom_lists = []
        for m in arrays:
            this_model = []
            for i in range(0, 2):
                this_list_all = []
                this_list_bb = []
                for res in m[i]:
                    if res in num_atoms[i]:
                        atoms = [a.get_id() for a in m[i][res].get_list()]
                        ref_atoms = [at.get_id() for at in num_atoms[i][res]]
                        for atom in sorted(atoms):
                            if atom in ref_atoms:
                                this_list_all.append(m[i][res][atom])
                                if atom in ['N', 'CA', 'C']:
                                    this_list_bb.append(m[i][res][atom])
                this_model.append(this_list_all)
                this_model.append(this_list_bb)
            atom_lists.append(this_model)
        TM_keys = list(num_atoms[1].keys())
        c = 0
        for m in atom_lists:
            c += 1
            for i in range(0, 4):
                if i < 2:
                    j = 0
                else:
                    j = 1
                if c > 1:
                    self.number_of_residues_superposed['file{}'.format(
                        str(c))][self.four_scores[i]] = len(num_atoms[j])
                    self.number_of_atoms_superposed['file{}'.format(
                        str(c))][self.four_scores[i]] = len(m[i])
                    rmsd = self.calc_RMSD(atom_lists[0][i], m[i])  #, TM_keys)
                    self.rmsds['file{}'.format(
                        str(c))][self.four_scores[i]] = rmsd
                else:
                    self.number_of_residues_superposed['reference'][
                        self.four_scores[i]] = len(num_atoms[j])
                    self.number_of_atoms_superposed['reference'][
                        self.four_scores[i]] = len(m[i])
                    self.rmsds['reference'][self.four_scores[i]] = None
示例#11
0
 def _read_pdb(self):
     """reads the input pdb as a structre object from BioPython"""
     parser = bp.PDBParser()
     self.structure = parser.get_structure(self.receptor_name, self.path)
import Bio.PDB as bpdb
import numpy as np
import pandas as pd
import easygui as eg
import multiprocessing.dummy as mp

#from time import time
parser = bpdb.PDBParser()
file = eg.fileopenbox(filetypes=['*.pdb'])
structure = parser.get_structure('4A_s2', file)

angles_by_frame = pd.DataFrame(columns=np.linspace(1, 4, num=4))

frame = 1
clmns = []
rows = {}
for i in range(2):
    clmns.append('phi' f'{i+2}')
    clmns.append('psi' f'{i+2}')

model_list = bpdb.Selection.unfold_entities(structure, 'M')
with mp.Pool(32) as pool:
    chain_list = pool.map(lambda x: x['A'], model_list)
    poly_list = pool.map(lambda x: bpdb.Polypeptide.Polypeptide(x), chain_list)
    angle_list = pool.map(lambda x: x.get_phi_psi_list(), poly_list)
    rowstuff = pool.map(
        lambda x: np.reshape(x, [1, len(x) * 2])[0][2:-2] * (180 / np.pi),
        angle_list)
    rowlist = list(rowstuff)

angles_by_frame = pd.DataFrame(rowlist,
示例#13
0
    def find_files(self):
        ''' Locates files of one receptor model that can be used as source.
        '''
        sf = SignprotFunctions()
        other_signprots = sf.get_other_subtypes_in_subfam(self.sign_prot)
        gprots_with_structure = sf.get_subtypes_with_templates()
        if self.sign_prot in gprots_with_structure:
            return None
        if not os.path.exists(self.zip_path):
            os.mkdir(self.zip_path)
        files = os.listdir(self.zip_path)
        for f in files:
            if f.endswith('.zip'):
                modelname = f.split('.')[0]
                if self.receptor not in modelname:
                    continue
                found_other_sf = False
                for o in other_signprots:
                    if o in modelname:
                        found_other_sf = True
                if not found_other_sf:
                    continue
                mod_dir = self.zip_path + modelname
                if not os.path.exists(mod_dir):
                    os.mkdir(mod_dir)
                zip_mod = zipfile.ZipFile(self.zip_path + f, 'r')
                zip_mod.extractall(mod_dir)
                zip_mod.close()

                name_list = modelname.split('_')
                if name_list[3] not in ['Inactive', 'Active', 'Intermediate'
                                        ] and name_list[4] != 'refined':
                    self.complex = True
                    self.revise_xtal = False
                    gpcr_class = name_list[0][-1]
                    gpcr_prot = '{}_{}'.format(name_list[1],
                                               name_list[2].split('-')[0])
                    sign_prot = '{}_{}'.format(name_list[2].split('-')[1],
                                               name_list[3])
                    main_structure = name_list[4]
                    build_date = name_list[5]

                    p = PDB.PDBParser()
                    self.path_to_pdb = os.sep.join(
                        [self.zip_path, modelname, modelname + '.pdb'])
                    model = p.get_structure(
                        'receptor',
                        os.sep.join(
                            [self.zip_path, modelname,
                             modelname + '.pdb']))[0]['R']
                    with open(
                            os.sep.join([
                                self.zip_path, modelname,
                                modelname + '.templates.csv'
                            ]), 'r') as templates_file:
                        templates = templates_file.readlines()
                    with open(
                            os.sep.join([
                                self.zip_path, modelname,
                                modelname + '.template_similarities.csv'
                            ]), 'r') as sim_file:
                        similarities = sim_file.readlines()
                    return model, templates, similarities
        return None
示例#14
0
                rama_pref_values[key][int(float(line.split()[1])) +
                                      180][int(float(line.split()[0])) +
                                           179] = float(line.split()[2])

normals = {}
outliers = {}
for key, val in rama_preferences.items():
    normals[key] = {"x": [], "y": []}
    outliers[key] = {"x": [], "y": []}

# Calculate the torsion angle of the inputs
for inp in sys.argv[1:]:
    if not os.path.isfile(inp):
        print("{} not found!".format(inp))
        continue
    structure = PDB.PDBParser().get_structure('input_structure', inp)
    for model in structure:
        for chain in model:
            polypeptides = PDB.PPBuilder().build_peptides(chain)
            for poly_index, poly in enumerate(polypeptides):
                phi_psi = poly.get_phi_psi_list()
                for res_index, residue in enumerate(poly):
                    res_name = "{}".format(residue.resname)
                    res_num = residue.id[1]
                    phi, psi = phi_psi[res_index]
                    if phi and psi:
                        aa_type = ""
                        if str(poly[res_index + 1].resname) == "PRO":
                            aa_type = "PRE-PRO"
                        elif res_name == "PRO":
                            aa_type = "PRO"
示例#15
0
def structure_from_pdb_file(file_path, name=''):
    '''Read the structure stored in a PDB file.'''
    parser = PDB.PDBParser()
    return parser.get_structure(name, file_path)
示例#16
0
            cofactors_dict[line[1]] = [line[3]]

pdbl = PDB.PDBList()
Error_out = open("microfolds_out.txt", "w")

for subdir, dirs, files in os.walk(rootdir):
    for file in files:

        try:
            line = file
            protein = line[3:7]
            #print ('pdb_code:'+protein)
            protein = protein.lower()
            Error_out.write('pdb_code:' + protein + '\n')

            parser = PDB.PDBParser(PERMISSIVE=1, get_header=1, QUIET=1)
            curdir = os.getcwd()
            filename = rootdir + protein[1:3] + '/' + file
            #print(filename)

            final_file = rootdir + protein[1:3] + '/pdb' + protein + '.ent'
            #print ('unziping')
            # unzipping gz file
            gz = gzip.open(filename, 'rb')
            with open(final_file, 'wb') as out:
                out.writelines(gz)
            gz.close()
            #print ('unziping done')
            #os.remove(filename)
            # openning pdb file
            structure = parser.get_structure(
示例#17
0
def main():
    usage = './align_stems.py [stem_length]'
    usage += 'Do diagnostics on the stem model'
    parser = OptionParser()

    parser.add_option('-i',
                      '--iterations',
                      dest='iterations',
                      default=1,
                      help="The number of times to repeat the alignment",
                      type='int')
    parser.add_option('-l',
                      '--length',
                      dest='length',
                      default=2,
                      help="The length of the stem",
                      type='int')
    parser.add_option('-o',
                      '--output-pdb',
                      dest='output_pdb',
                      default=False,
                      help="Output the structures to pdb files",
                      action='store_true')
    parser.add_option(
        '-f',
        '--from',
        dest='from_file',
        default=None,
        help='Specify a file to align from. Invalidates the -l option.',
        type='str')
    parser.add_option(
        '-t',
        '--to',
        dest='to_file',
        default=None,
        help='Specify a file to align to. Invalidates the -l option.',
        type='str')
    parser.add_option(
        '-m',
        '--method',
        dest='method',
        default='e',
        help=
        'Specify which method to use for the helix fitting. e = estimate (original, least accurate method), a = align (better, more accurate method), t = template (best, most accurate method)'
    )
    parser.add_option(
        '-a',
        '--average-twist',
        dest='use_average_method',
        default=False,
        action='store_true',
        help='Use the average of the two twists to align the stems.')

    (options, args) = parser.parse_args()

    if len(args) < 0:
        parser.print_help()
        sys.exit(1)

    stem_length = options.length
    if len(args) == 1:
        stem_length = int(args[0])

    if options.from_file == None or options.to_file == None:
        sss = cbs.get_stem_stats(
            os.path.join(cbc.Configuration.base_dir,
                         'fess/stats/temp.1jj2.stats'))

    rmsds = []

    for i in range(options.iterations):
        if options.from_file != None:
            filename = options.from_file
            stem_def = stem_def_from_filename(filename)
        else:
            stem_def = random.choice(sss[stem_length])
            filename = '%s_%s.pdb' % (stem_def.pdb_name, "_".join(
                map(str, stem_def.define)))

        pdb_file = os.path.join(cbc.Configuration.stem_fragment_dir, filename)

        # Extract the PDB coordinates of the original chain
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                chain = list(bpdb.PDBParser().get_structure(
                    'temp', pdb_file).get_chains())[0]
                chain = cbm.extract_stem_from_chain(chain, stem_def)
            except IOError as ie:
                cud.pv('ie')

        # Convert the chain into a stem model
        # This is where the method for fitting a helix is applied
        #m = cbm.define_to_stem_model(chain, stem_def.define)
        stem = cbm.StemModel(name=stem_def.define)
        define = stem_def.define
        mids = cgg.get_mids(chain, define, options.method)

        stem.mids = tuple([m.get_array() for m in mids])
        stem.twists = cgg.get_twists(chain, define)
        m = stem

        # Create a new chain by aligning the stem from the sampled define
        # to the model created from the original stem
        new_chain = bpdbc.Chain(' ')
        try:
            if options.to_file != None:
                new_stem_def = stem_def_from_filename(options.to_file)
            else:
                new_stem_def = random.choice(sss[stem_def.bp_length])

            cbm.reconstruct_stem_core(new_stem_def, stem_def.define, new_chain,
                                      dict(), m, options.use_average_method)
        except IOError as ie:
            cud.pv('ie')

        if options.output_pdb:
            rtor.output_chain(chain, 'out1.pdb')
            rtor.output_chain(new_chain, 'out3.pdb')

        unsuperimposed_rmsd = cup.pdb_rmsd(chain,
                                           new_chain,
                                           sidechains=False,
                                           superimpose=False)
        superimposed_rmsd = cup.pdb_rmsd(chain,
                                         new_chain,
                                         sidechains=False,
                                         superimpose=True,
                                         apply_sup=True)
        rmsds += [[superimposed_rmsd[1], unsuperimposed_rmsd[1]]]

        #cud.pv('(superimposed_rmsd, unsuperimposed_rmsd)')

        if options.output_pdb:
            rtor.output_chain(new_chain, 'out2.pdb')
            pp = cvp.PymolPrinter()
            (p, n) = m.mids
            pp.add_stem_like_core(m.mids, m.twists, stem_def.bp_length + 1, '')
            pp.stem_atoms(m.mids, m.twists, stem_def.bp_length + 1)
            pp.dump_pymol_file('ss')

        print stem_length, superimposed_rmsd[1], unsuperimposed_rmsd[
            1], unsuperimposed_rmsd[1] / superimposed_rmsd[1]
示例#18
0
 def setUp(self):
     self.pdb_list = biopdb.PDBList(verbose=False,
                                    obsolete_pdb=gettempdir())
     self.biopdb_parser = biopdb.PDBParser()
示例#19
0
 def __init__(self, ring, pdb, fragments, pdb_out, v):
     self.fragments = fragments
     self.ring = PDB.PDBParser().get_structure('ring', ring)
     self.pdb = PDB.PDBParser().get_structure('pdb', pdb)
     self.pdb_out = pdb_out
     self.v = v
示例#20
0
        if sym == 'HOH':
            if full:
                seq += '-'
            continue
        if sym in d1.keys():
            seq += d1[sym]
        elif len(seq) != 0:
            # TODO: convert to warning
            # h.write('ID: %s chain %s: unknown residue %s\n' % (pdbID, chainName, sym))
            seq += '-'
        # continue
    return seq


if __name__ == '__main__':
    parser = pdb.PDBParser(QUIET=True)

    output = 'chain_seq.txt'
    g = open(output, 'w')

    start_dir = './pdb/'
    files = os.listdir(start_dir)
    # print(len(files))

    for ifile in range(len(files)):
        filename = files[ifile]
        if filename == '.' or filename == '..': continue

        pdbID = filename.split('.')[0]

        g.write(str(pdbID) + '\n')
示例#21
0
    params.read(expandpath(args.iconf), "UTF-8")
    tmp = tempfile.mkstemp()[1]

    params["Protein"]["pdb"] = expandpath(params["Protein"]["pdb"])
    output_pdb_filepath = args.opdb
    os.system(f"{REDUCE} -Trim {params['Protein']['pdb']} > {tmp}.noH.pdb")
    os.system(
        f"""{GMX} trjconv -f {tmp}.noH.pdb -s {tmp}.noH.pdb -o {tmp}.center.pdb -center -boxcenter zero << EOF
Protein
System
EOF""")
    os.system(
        f"{GMX} editconf -f {tmp}.center.pdb -o {output_pdb_filepath} -resnr 1"
    )

    old_pdb = PDB.PDBParser().get_structure("old", params["Protein"]["pdb"])
    new_pdb = PDB.PDBParser().get_structure("new", output_pdb_filepath)
    old_to_new = gen_resis_table(old_pdb, new_pdb)

    length = len([res for res in new_pdb.get_residues() if res.id[0] == " "])
    params["Protein"]["resi_st"] = "1"
    params["Protein"]["resi_ed"] = f"{length}"
    params["Protein"]["pdb"] = output_pdb_filepath
    new_ssbonds = [
        old_to_new[int(s)] for s in params["Protein"]["ssbond"].split()
    ]
    params["Protein"]["ssbond"] = " ".join([str(s) for s in new_ssbonds])
    new_binding_residues = [
        old_to_new[int(s)]
        for s in params["Protein"]["binding_site_residues"].split()
    ]
示例#22
0
import sys
import re

def join_fasta(*fastas):
    text = ''
    for fhand in fastas:
        with open(fhand) as f1:
            for line in f1:
                text += line 
    return text

if __name__ == '__main__':
    directory = sys.argv[1] # Folder with the chain pdbs
    distance = float(sys.argv[2])
    directory_out = os.path.join(directory, 'pairs') # Folder where pairs will be saved
    PDBparser = PDB.PDBParser(QUIET=True)
    done = []
    for pdb_1 in list(filter(lambda x: x.endswith('.pdb'), os.listdir(directory))):
        for pdb_2 in list(filter(lambda x: x.endswith('.pdb'), os.listdir(directory))):
            if (pdb_1 == pdb_2):   
                continue # Not to duplicate itself
            elif not (pdb_1, pdb_2) in done or not (pdb_2, pdb_1) in done: # Not to duplicate pairs
                done.append((pdb_1, pdb_2))
                structure1 = PDBparser.get_structure(pdb_1[:-4], os.path.join(directory,pdb_1))
                structure2 = PDBparser.get_structure(pdb_2[:-4], os.path.join(directory,pdb_2))
                for chain in structure2.get_chains():
                    atoms = list(chain.get_atoms()) 
                    ns = PDB.NeighborSearch(atoms) # An object to search chains near an atom 
                    for target_atom in structure1.get_atoms():
                        near = ns.search(target_atom.coord, distance)
                        if(near and (pdb_1[-5:-4],pdb_2[-5:-4]) not in done and (pdb_2[-5:-4],pdb_1[-5:-4]) not in done): # If there's an atom near, they interact
示例#23
0
import glob
from Bio import PDB

pdb_files = glob.iglob('all_pdbs/*')

file = open('casp11.sec', 'w')

c = 0
for pdb in pdb_files:
    c += 1
    print(c)

    p = PDB.PDBParser()
    structure = p.get_structure(pdb[:-4], pdb)
    model = structure[0]
    dssp = PDB.DSSP(model, pdb)

    seq = ''
    ss = ''
    for key in list(dssp.keys()):
        ss += dssp[key][2]
        seq += dssp[key][1]

    file.write('>{}\n'.format(pdb))
    file.write('{}\n'.format(seq))
    file.write('>{}\n'.format(pdb))
    file.write('{}\n'.format(ss))

file.close()
示例#24
0
pdb_stats = []
for pdb_id in pdb_unique_ids:

    file_fasta = file_pref + pdb_id + ".fasta"
    file_pdb = file_pref + pdb_id + ".pdb"
    sequence_fasta = Seq("")
    num_monomers = 0
    for seq_record in SeqIO.parse(file_fasta, "fasta"):
        sequence_fasta += seq_record.seq
        num_monomers += 1
    # uncomment the following line to produce gc_stats.pkl
    #pdb_seqlength.append((pdb_id,len(sequence_fasta)/num_monomers))
    # i.e. all residues taken into account, not only ones seen in crystal
    # comment out calculation of the sequence from the pdb file
    ppb = PDB.PPBuilder()
    struct = PDB.PDBParser().get_structure(pdb_id, file_pdb)
    peptides = ppb.build_peptides(struct)
    sequence_pdb = Seq("")
    ref_residues = []
    for peptide in peptides:
        sequence_pdb += peptide.get_sequence()
        for residue in peptide:
            ref_residues.append(residue)

    pdb_seqlength.append((pdb_id, len(ref_residues) / num_monomers))
    alignment = pairwise2.align.globalds(sequence_fasta,
                                         sequence_pdb,
                                         matrix,
                                         gap_open,
                                         gap_extend,
                                         one_alignment_only=True)[0]
示例#25
0
    def get_rnapuzzle_ready(self, renumber_residues=True):
        """Get rnapuzzle ready structure.
        Submission format @http://ahsoka.u-strasbg.fr/rnapuzzles/

        Does:
        - keep only given atoms,
        - renumber residues from 1, if renumber_residues=True (by default)
        """
        try:
            from Bio import PDB
            from Bio.PDB import PDBIO
        except:
            sys.exit('Error: Install biopython to use this function (pip biopython)')

        import copy

        G_ATOMS = ['P', 'OP1', 'OP2', 'O5\'', 'C5\'', 'C4\'', 'O4\'', 'C3\'', 'O3\'', 'C2\'', 'O2\'', 'C1\'', 'N9', 'C8', 'N7', 'C5', 'C6', 'O6', 'N1', 'C2', 'N2', 'N3', 'C4']
        A_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split()
        U_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split()
        C_ATOMS = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split()

        ftmp = '/tmp/out.pdb'
        self.write(ftmp,v=False)

        parser = PDB.PDBParser()
        struct = parser.get_structure('', ftmp)
        model = struct[0]

        s2 = PDB.Structure.Structure(struct.id)
        m2 = PDB.Model.Model(model.id)

        chains2 = []

        missing = []
        for chain in model.get_list():
            res = [] 
            for r in chain:
                res.append(r)

            res = copy.copy(res)

            c2 = PDB.Chain.Chain(chain.id)        

            c = 1  # new chain, goes from 1 !!!
            for r in res:
                # hack for amber/qrna
                r.resname = r.resname.strip()
                if r.resname == 'RC3': r.resname = 'C'
                if r.resname == 'RU3': r.resname = 'U'
                if r.resname == 'RG3': r.resname = 'G'
                if r.resname == 'RA3': r.resname = 'A'

                if r.resname == 'C3': r.resname = 'C'
                if r.resname == 'U3': r.resname = 'U'
                if r.resname == 'G3': r.resname = 'G'
                if r.resname == 'A3': r.resname = 'A'

                if r.resname == 'RC5': r.resname = 'C'
                if r.resname == 'RU5': r.resname = 'U'
                if r.resname == 'RG5': r.resname = 'G'
                if r.resname == 'RA5': r.resname = 'A'

                if r.resname == 'C5': r.resname = 'C'
                if r.resname == 'U5': r.resname = 'U'
                if r.resname == 'G5': r.resname = 'G'
                if r.resname == 'A5': r.resname = 'A'

                if r.resname.strip() == 'RC': r.resname = 'C'
                if r.resname.strip() == 'RU': r.resname = 'U'
                if r.resname.strip() == 'RG': r.resname = 'G'
                if r.resname.strip() == 'RA': r.resname = 'A'

                r2 = PDB.Residue.Residue(r.id, r.resname.strip(), r.segid)
                if renumber_residues:
                    r2.id = (r2.id[0], c, r2.id[2]) ## renumber residues
                if str(r.get_resname()).strip() == "G":

                    for an in G_ATOMS:
                        try:
                            r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "A":
                    for an in A_ATOMS:
                        try:
                            r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "C":
                    for an in C_ATOMS:
                        try:
                            r2.add(r[an])
                        except:
                            #print 'Missing:', an, r, ' new resi', c
                            missing.append([an, chain.id, r, c])
                    c2.add(r2)

                elif str(r.get_resname()).strip() == "U":
                    for an in U_ATOMS:
                        try:
                            r2.add(r[an])
                        except KeyError:
                            #print 'Missing:', an, r,' new resi', c
                            missing.append([an, chain.id, r, c])
                    
                    c2.add(r2)
                c += 1
            chains2.append(c2)

        io = PDBIO()
        s2.add(m2)
        for chain2 in chains2:
            m2.add(chain2) 
        #print c2
        #print m2
        io.set_structure(s2)
        #fout = fn.replace('.pdb', '_fx.pdb')
        fout = '/tmp/outout.pdb' # hack
        io.save(fout)
        
        if missing:
            print('REMARK 000 Missing atoms:')
            for i in missing:
                print('REMARK 000  +', i[0], i[1], i[2], 'residue #', i[3])
            #raise Exception('Missing atoms in %s' % self.fn)
        s = StrucFile(fout)
        self.lines = s.lines
示例#26
0
    def __init__(self, filepath):
        """Constructor of one pdb file : PDBFile.

        Arguments :
        ------------
        filepath : string
            path to the pdb file
        """
        # -----
        # save id extracted from path :
        self.id = filepath[-8:-4]

        # -----
        # init parser :
        parser = PDB.PDBParser()
        struct = parser.get_structure("", filepath)

        # -----
        # extract from header :
        self.keywords = struct.header['keywords']
        self.name = struct.header['name']
        self.head = struct.header['head']
        self.deposition_date = struct.header['deposition_date']
        self.release_date = struct.header['release_date']
        self.structure_method = struct.header['structure_method']
        self.resolution = struct.header['resolution']
        self.structure_reference = str(struct.header['structure_reference'])
        self.journal_reference = struct.header['journal_reference']
        self.author = struct.header['author']
        self.compound = str(struct.header['compound'])

        # -----
        # Get the sequence and the angles

        # extract all polypeptides from the structure :
        ppb = PDB.CaPPBuilder()

        # The sequence of each polypeptide can then easily be obtained
        # from the Polypeptide objects :
        self.seq = ""
        atom_idx = 0
        start = 0
        end = 0

        for pp, chain in zip(ppb.build_peptides(struct), struct.get_chains()):
            print (pp)

            seq = str(pp.get_sequence())
            # The sequence is represented as a Biopython Seq object,
            # and its alphabet is defined by a ProteinAlphabet object.
            print (seq)
            self.seq += seq

            # Get the boundary of the peptide
            # using residu id
            # A residue id is a tuple with three elements:
            # - The hetero-flag
            # - *The sequence identifier in the chain*
            # - The insertion code,
            # start of the polypeptide : pp[0].get_id()[1]
            #  end of the polypeptide : pp[-1].get_id()[1]
            start = end + 1
            print (start)
            end = start + len(seq)-1
            print (end)
            # |-----------||-------------------|
            # sA        sA sB                  eB

            self.chains.append(Chain(chain.id, self.id, start, end))

            # Get phi psi angle
            angles = pp.get_phi_psi_list()
            # Some are None because :
            # - Some atoms are missing
            #   -> Phi/Psi cannot be calculated for some residue
            # - No phi for residue 0
            # - No psi for last residue
            print(angles)

            for phi, psi in angles:
                atom_idx += 1
                self.angles.append(Angle(self.id, atom_idx, phi, psi))
示例#27
0
 def fetch_residues_from_pdb(self,
                             structure,
                             generic_numbers,
                             modify_bulges=False,
                             just_nums=False):
     ''' Fetches specific lines from pdb file by generic number (if generic number is
         not available then by residue number). Returns nested OrderedDict()
         with generic numbers as keys in the outer dictionary, and atom names as keys
         in the inner dictionary.
         
         @param structure: Structure, Structure object where residues should be fetched from \n
         @param generic_numbers: list, list of generic numbers to be fetched \n
         @param modify_bulges: boolean, set it to true when used for bulge switching. E.g. you want a 5x461
         residue to be considered a 5x46 residue. 
     '''
     output = OrderedDict()
     atoms_list = []
     for gn in generic_numbers:
         rotamer = None
         if 'x' in str(gn):
             rotamer = list(
                 Rotamer.objects.filter(
                     structure__protein_conformation=structure.
                     protein_conformation,
                     residue__display_generic_number__label=dgn(
                         gn, structure.protein_conformation),
                     structure__preferred_chain=structure.preferred_chain))
         else:
             rotamer = list(
                 Rotamer.objects.filter(
                     structure__protein_conformation=structure.
                     protein_conformation,
                     residue__sequence_number=gn,
                     structure__preferred_chain=structure.preferred_chain))
             if just_nums == False:
                 try:
                     gn = ggn(
                         Residue.objects.get(protein_conformation=structure.
                                             protein_conformation,
                                             sequence_number=gn).
                         display_generic_number.label)
                 except:
                     pass
         if len(rotamer) > 1:
             for i in rotamer:
                 if i.pdbdata.pdb.startswith('COMPND') == False:
                     if i.pdbdata.pdb[21] in structure.preferred_chain:
                         rotamer = i
                         break
         else:
             rotamer = rotamer[0]
         io = StringIO(rotamer.pdbdata.pdb)
         rota_struct = PDB.PDBParser(QUIET=True).get_structure(
             'structure', io)[0]
         for chain in rota_struct:
             for residue in chain:
                 for atom in residue:
                     atoms_list.append(atom)
                 if modify_bulges == True and len(gn) == 5:
                     output[gn.replace('x', '.')[:-1]] = atoms_list
                 else:
                     try:
                         output[gn.replace('x', '.')] = atoms_list
                     except:
                         output[str(gn)] = atoms_list
                 atoms_list = []
     return output