Пример #1
0
 def iter_ca_hbonds_don(self, parsed_pdb):
     for hbond in self.iter_ca_triplets_don(parsed_pdb):
         angle = pr.calcAngle(hbond[0], hbond[1], hbond[2])
         if angle > 90:
             distance_acc_hyd = pr.calcDistance(hbond[0], hbond[1])
             distance_heavy = pr.calcDistance(hbond[0], hbond[2])
             yield hbond, angle, distance_acc_hyd, distance_heavy
Пример #2
0
 def iter_hbonds_acc(self, parsed_pdb):
     for hbond in self.iter_triplets_acc(parsed_pdb):
         if hbond[2].getResname() == 'HOH':
             hyds = parsed_pdb.contacts(1.1, hbond[0].getCoords()).select('element H D')
             if hyds is not None:
                 angle_tests = []
                 for hyd in hyds.iterAtoms():
                     angle = pr.calcAngle(hbond[0], hyd, hbond[2])
                     if angle > 90:
                         angle_tests.append(False)
                     else:
                         angle_tests.append(True)
                 if all(angle_tests):
                     distance_acc_hyd = 99
                     distance_heavy = pr.calcDistance(hbond[0], hbond[2])
                     angle = 360
                     yield hbond, angle, distance_acc_hyd, distance_heavy
             else:
                 distance_acc_hyd = 99
                 distance_heavy = pr.calcDistance(hbond[0], hbond[2])
                 angle = 360
                 yield hbond, angle, distance_acc_hyd, distance_heavy
         else:
             angle = pr.calcAngle(hbond[0], hbond[1], hbond[2])
             if angle > 90:
                 distance_acc_hyd = pr.calcDistance(hbond[0], hbond[1])
                 distance_heavy = pr.calcDistance(hbond[0], hbond[2])
                 yield hbond, angle, distance_acc_hyd, distance_heavy
def calculate_hbond(pdbfilename):
    '''
    Geometric criteria of hbond identification given by paper: Protein Eng. 15 (2002) 359.
    D2-D1-D-H...A-A1
        Rule 1: D-A     < 3.9 A;
        Rule 2: H-A     < 2.5 A;
        Rule 3: D-H-A   > 90.0 degree;
        Rule 4: A1-A-D  > 90.0 degree;
        Rule 5: A1-A-H  > 90.0 degree.
    0,1,2 for each residue (only consider backbone-backbone hbond)
    In rare cases, nhbond = 3: bifurcated hbond
    '''
    # calculate H-bond 
    import prody
    structure = prody.parsePDB(pdbfilename)
    protein   = structure.select('protein')
    hv        = protein.getHierView()
    nres      = len([s for s in protein.select('name CA').getResnames()])

    NHbond    = np.zeros(nres,dtype=int)
    CObond    = np.zeros(nres,dtype=int)
    nhbond    = np.zeros(nres,dtype=int)
    resTyp    = ['' for i in range(nres)]

    for i, res in enumerate(hv.iterResidues()):
        resIndex         = i
        resNum           = res.getResnum()
        resTyp[resIndex] = res.getResname()
        resChainIndex    = res.getChid()
        N  = res.select('name N')
        C  = res.select('name C')
        O  = res.select('name O')
        HN = res.select('name HN')
        # N-HN
        for k,res2 in enumerate(hv.iterResidues()):
            if res2.getResindex() != resIndex:
                AO = res2.select('name O')
                if (HN != None) and (AO != None) and (prody.calcDistance(N,AO) <= 3.9):
                    if prody.calcDistance(HN,AO) <= 2.5:           # dist_H_O_check
                        if prody.calcAngle(N,HN,AO) > 90:          # angle_N_HN_O_check:
                            AC = res2.select('name C')
                            if prody.calcAngle(AC,AO,N) > 90:      # angle_C_O_N_check
                                if prody.calcAngle(AC,AO,HN) > 90: # angle_C_O_HN_check
                                    NHbond[resIndex] += 1
            if NHbond[resIndex] == 1: break  # if NHbond[resIndex] == 1 (max), break
        # C=O
        for k,res2 in enumerate(hv.iterResidues()):
            if res2.getResindex() != resIndex:
                DHN = res2.select('name HN')
                if (DHN != None) and (O != None) and (prody.calcDistance(DHN,O) <= 2.5):
                    DN = res2.select('name N')
                    if prody.calcDistance(DN,O) <= 3.9:           # dist_N_O_check
                        if prody.calcAngle(DN,DHN,O) > 90:        # angle_N_HN_O_check
                            if prody.calcAngle(C,O,DN) > 90:      # angle_C_O_N_check_
                                if prody.calcAngle(C,O,DHN) > 90: # angle_C_O_HN_check 
                                    CObond[resIndex] += 1
            if CObond[resIndex] == 2: break  # if CObond[resIndex] == 2 (max), break
    # total nhbond
    hbond = NHbond+CObond
    return NHbond, np.where((NHbond == 0)), CObond, np.where((CObond == 0)), resTyp
Пример #4
0
def checkHBonds(appf,no_d, ox_a, ssindex):
    #1 Dist(don, acc) < 3.5
    da_dist = pr.calcDistance( no_d , ox_a )
    if( da_dist >= DONOR_ACCEPTOR_MAXDISTANCE ):
        return

    h_don = getHforAtom(appf,no_d)
    if(h_don == None):
        return
    #2 Dist(h_don, acc) < 3.5
    ha_dist  = pr.calcDistance(h_don, ox_a)
    if( ha_dist >=  HYDROGEN_ACCEPTOR_MAXDISTANCE):
        return
            
    #3 Angle(don, h_don, acc) > 90
    dha_ang = pr.calcAngle( no_d, h_don, ox_a )
    if( dha_ang < DHA_ANGLE ):
        return
            
    #4 Angle(don, acc, acc_ante) > 90
    #daa_ang =  pr.calcAngle( no_d, ox_a, acc_ante)
    #if( daa_ang < DAB_ANGLE ):
    #    return

    acc_ante = getAntecedent(appf, ox_a) #Get acc_ante
    if(acc_ante == None):
        return
    #5 Angle(h_don, acc, acc_ante) > 90
    haa_ang = pr.calcAngle( h_don, ox_a, acc_ante )
    if( haa_ang < HAB_ANGLE ):
        return

    #We have a valid H-Bond with no_d, ox_a, h_don
    #print 'HBond elements', h_don.getName(), no_d.getName(), ox_a.getName()
    #print 'DA dist', da_dist
    #print 'HA dist', ha_dist
    #print 'DHA ang', dha_ang
    #print 'DAA ang', daa_ang
    #print 'HAA ang', haa_ang
            
    beta_ang = getBetaAngle (appf,acc_ante,ox_a,h_don)
    gamm_ang = getGammaAngle(appf,acc_ante,ox_a,h_don)
        
    #PUT DATA INTO COLUMN DATA STRUCTURES
#    print ssindex
    #towrite = ",".join([str(h_don.getResnum()),str(h_don.getCoords())])
    #fp.write(towrite+"\r\n")
    NUM_H_BONDS[ssindex] += 1
    COLUMN_D_ON  [ssindex].append(da_dist)
    COLUMN_D_OH  [ssindex].append(ha_dist)
    COLUMN_A_NHO [ssindex].append(dha_ang)
    COLUMN_A_HOC [ssindex].append(haa_ang)
    COLUMN_BETA  [ssindex].append(beta_ang)
    COLUMN_GAMMA [ssindex].append(gamm_ang)
    return
Пример #5
0
def compute_distances_given_instruction(pdb_object, instruction, chain="L"):
    ligand_resname = get_resname_of_chain(pdb_object, chain)
    ligand_resnum = get_resnum_of_chain(pdb_object, chain)
    atom_name_ligand, atom_name_aa, resnum_aa, resname_aa = instruction
    atom_of_ligand = select_atom_given_name_type_and_num(
        pdb_object, ligand_resnum, atom_name_ligand)
    atom_of_protein = select_atom_given_name_type_and_num(
        pdb_object, ligand_resnum, atom_name_ligand)
    distance = prody.calcDistance(atom_of_ligand, atom_of_protein)
    return atom_name_ligand, atom_name_aa, resnum_aa, resname_aa, distance
Пример #6
0
def calcAllDist(coordset):
    
    from prody import calcDistance
    shape = coordset.shape
    distance = np.zeros((shape[1], shape[1]))
    
    for i in range(shape[1]):
        temp = np.tile(coordset[0,i,:], (1, shape[1], 1))
        distance[:,i] = calcDistance(coordset, temp)
    
    return distance
Пример #7
0
def calcAllDist(coordset):

    from prody import calcDistance
    shape = coordset.shape
    distance = np.zeros((shape[1], shape[1]))

    for i in range(shape[1]):
        temp = np.tile(coordset[0, i, :], (1, shape[1], 1))
        distance[:, i] = calcDistance(coordset, temp)

    return distance
Пример #8
0
def getHforAtom(appf, anAtom):
    if(anAtom.getResnum() < 1):
        return None
    resnumneighbors = appf.select('resnum '+str(anAtom.getResnum()))
    curMin = 100000
    target = None
    for at in resnumneighbors:
        if(at.getElement() == 'H'):
            #FILTER OTHER HYDROGENS
            dist = pr.calcDistance(at,anAtom)
            if(dist <= curMin):
            #print pr.calcDistance(at,anAtom), at.getResnum(), at.getName()
                curMin = dist
                target = at
    return target
Пример #9
0
def main_calc(pep):
    print(pep)
    Nterm = False
    ind = find_index(seq, pep)
    new_pep = []
    for i in range(ind[0], ind[1]):
        for j in seq_to_pdb[i]:
            new_pep.append([file[j], i])

    lengths = np.zeros((mV[0], mV[0]))
    index = np.zeros((mV[0], 2))
    bonded = np.zeros((mV[0], mV[0]))

    #    lengths = np.zeros((len(new_pep), len(new_pep)))
    #    index = np.zeros((len(new_pep),2))
    #    bonded = np.zeros((len(new_pep), len(new_pep)))
    for i in range(len(new_pep)):
        ele = float(ElementSymbols.index(str(new_pep[i][0].getName()[0]))) + 1
        if ele == 6.0:
            index[i][1] = 0.0
        elif ele == 7.0 and Nterm:
            index[i][1] = 1.0
        elif ele == 8.0:
            index[i][1] = 2.0
        if Nterm == False and new_pep[i][0].getName() == 'N':
            Nterm = True
        index[i][0] = ele  #/len(ElementSymbols)
        for j in range(len(new_pep)):
            if i != j and lengths[i][j] == 0:
                lengths[i][j] = (float(
                    pd.calcDistance(new_pep[i][0], new_pep[j][0])))
                lengths[j][i] = lengths[i][j]
            if (lengths[i][j] < 1.9) and (new_pep[i][0] != new_pep[j][0]):
                if new_pep[j][0].getSequence() in possible_bonds(
                        new_pep[i][1], pep, ind):
                    bonded[i][j] = lengths[i][j]
                    bonded[j][i] = bonded[i][j]

    final = {}
    final['sequence'] = pep
    final['index'] = index
    final['secondary'] = lengths
    final['primary'] = bonded
    return final
Пример #10
0
def CheckforGaps(structure, max_bond_distance):
    """
    This function checks all the residues in the protein and checks whether the N of the current residue and the
    C of the previous one are within peptidic bond distance (specified by the max_bond_distance variable)
    in order to check for gaps.
    :param structure: a prody object containing the structure to study.
    :param max_bond_distance: a float specifying the max bond distance to accept.
    :return: two dictionaries both containing the chain as key and the residues numbers (previous, current) involved
    in a bond as values, the first dictionary contains the information about the gaps and the second one contains
    the information about those not involved in gaps.
    """
    gaps = {}
    not_gaps = {}
    if max_bond_distance < 1.55:
        max_bond_distance = 1.55
    for chain in structure.iterChains():
        # initial, final = FindInitialAndFinalResidues(chain)
        residues = [residue for residue in chain]
        previous_residue_number = None
        for i, residue in enumerate(chain.iterResidues()):
            chain_id = chain.getChid()
            # print residue.getResnum() < previous_residue_number
            if previous_residue_number is not None:
                # if residue.getResnum() > previous_residue_number + 1:
                if residue.getResname() in default_supported_aminoacids:
                    if residue.getResname() == 'ACE':
                        try:
                            gaps[chain_id]
                        except KeyError:
                            gaps[chain_id] = []
                        gaps[chain_id].append(
                            [previous_residue_number,
                             residue.getResnum()])
                    else:
                        current_residue_N = residue.getAtom("N")
                        previous_residue = chain.getResidue(
                            previous_residue_number)
                        if not previous_residue:
                            previous_residue = residues[i - 1]
                        previous_residue_C = previous_residue.getAtom('C')
                        if current_residue_N is not None and previous_residue_C is not None:
                            distance = calcDistance(current_residue_N,
                                                    previous_residue_C)
                            if distance < max_bond_distance:
                                try:
                                    not_gaps[chain_id]
                                except KeyError:
                                    not_gaps[chain_id] = []
                                not_gaps[chain_id].append([
                                    previous_residue_number,
                                    residue.getResnum()
                                ])
                            else:
                                try:
                                    gaps[chain_id]
                                except KeyError:
                                    gaps[chain_id] = []
                                gaps[chain_id].append([
                                    previous_residue_number,
                                    residue.getResnum()
                                ])
                        elif current_residue_N is None:
                            print("   * There's a problem with residue {} {} {} it" \
                                  " doesn't have the N atom".format(residue.getResname(),
                                                                    residue.getResnum(),
                                                                    residue.getChid()))
                        elif previous_residue_C is None:
                            print(
                                "   * There's a problem with residue {} {} {} it doesn't have the C atom"
                                .format(previous_residue.getResname(),
                                        previous_residue.getResnum(),
                                        previous_residue.getChid()))
                if residue.getResnum() == previous_residue_number:
                    if residue.getIcode() == '':
                        if residue.hetero is not None:
                            residue.setResnum(previous_residue_number + 1)
                        else:
                            print('   * WARNING: There are two residues with the same resnum in the same chain.' \
                                  'The residue {} {} {} '.format(residue.getChid(),
                                                                 residue.getResnum(),
                                                                 residue.getResname()))
                elif residue.getResnum() < previous_residue_number + 1:
                    if residue.hetero is not None:
                        residue.setResnum(previous_residue_number + 1)
                    else:
                        # print residue.getResnum(), previous_residue_number
                        print(
                            "   * WARNING! The next residue has a lower resnum than the previous one in the chain??"
                        )
                        print("    * CHAIN: {} RESIDUE: {}  {} {}".format(
                            residue.getChid(), residue.getResnum(),
                            residue.getChid(), residue.getResname()))
                        print("    * previous: RESIDUE {} {}  {}".format(
                            previous_residue.getResname(),
                            previous_residue.getResnum(),
                            previous_residue.getChid()))
                        sys.exit(
                            'This should never happen, please review your structure.'
                        )
                else:
                    pass
            previous_residue_number = residue.getResnum()
    return gaps, not_gaps
Пример #11
0
def runThrough(pfile):
    appf = pr.parsePDB(pfile, model=1, secondary=True, chain='A', altLoc=False)
    # get secondary structure by aParsedPDBfile[i].getSecstr()
    nitrox_don = appf.select('element N O') #O can be donor in rare cases, the paper uses this convention
    oxygen_acc = appf.select('element O')
    fp = open('old-H.txt','wb')
    for no_d in nitrox_don:
        n_secStruct = getSSIndex(no_d)
        if(n_secStruct < 0):
            continue
        for ox_a in oxygen_acc:
            o_secStruct = getSSIndex(ox_a)
            if(o_secStruct != n_secStruct):
                continue
        #1 Dist(don, acc) < 3.5
            da_dist = pr.calcDistance( no_d , ox_a )
            if( da_dist >= DONOR_ACCEPTOR_MAXDISTANCE ):
                continue

        #2 Dist(h_don, acc) < 3.5
            h_don = getHforAtom( appf, no_d ) #Get h_don
            ha_dist  = pr.calcDistance(h_don, ox_a)
            if( ha_dist >=  HYDROGEN_ACCEPTOR_MAXDISTANCE):
                continue
            
        #3 Angle(don, h_don, acc) > 90
            dha_ang = pr.calcAngle( no_d, h_don, ox_a )
            if( dha_ang < DHA_ANGLE ):
                continue
            
        #4 Angle(don, acc, acc_ante) > 90
            acc_ante = getAntecedent(appf, ox_a) #Get acc_ante
            daa_ang =  pr.calcAngle( no_d, ox_a, acc_ante)
            if( daa_ang < DAB_ANGLE ):
                continue
            
        #5 Angle(h_don, acc, acc_ante) > 90
            haa_ang = pr.calcAngle( h_don, ox_a, acc_ante )
            if( haa_ang < HAB_ANGLE ):
                continue

        #We have a valid H-Bond with no_d, ox_a, h_don
        #       print 'HBond elements', h_don.getName(), no_d.getName(), ox_a.getName()
        #       print 'DA dist', da_dist
        #       print 'HA dist', ha_dist
        #       print 'DHA ang', dha_ang
        #       print 'DAA ang', daa_ang
        #       print 'HAA ang', haa_ang
            
        #place holders for beta and gamma for now
        #   beta_ang = 0
        #   gamm_ang = 0
            beta_ang = getBetaAngle (appf,acc_ante,ox_a,h_don)
            gamm_ang = getGammaAngle(appf,acc_ante,ox_a,h_don)
        #PUT DATA INTO COLUMN DATA STRUCTURES
            ssindex = getSSIndex(acc_ante)
        #if (ssindex == -1):
            #Not a structure we need
            #   continue
            towrite = ",".join([str(h_don.getResnum()),str(h_don.getCoords())])
            fp.write(towrite+"\r\n")
            COLUMN_D_ON  [ssindex].append(da_dist)
            COLUMN_D_OH  [ssindex].append(ha_dist)
            COLUMN_A_NHO [ssindex].append(dha_ang)
            COLUMN_A_HOC [ssindex].append(haa_ang)
            COLUMN_BETA  [ssindex].append(beta_ang)
            COLUMN_GAMMA [ssindex].append(gamm_ang)
    
    fp.close()
    COLUMN_D_ON_AV  = [sum(x)/len(x) for x in COLUMN_D_ON if len(x) > 0]
    COLUMN_D_OH_AV  = [sum(x)/len(x) for x in COLUMN_D_OH if len(x) > 0]
    COLUMN_A_NHO_AV = [sum(x)/len(x) for x in COLUMN_A_NHO if len(x) > 0]
    COLUMN_A_HOC_AV = [sum(x)/len(x) for x in COLUMN_A_HOC if len(x) > 0]
    COLUMN_BETA_AV  = [sum(x)/len(x) for x in COLUMN_BETA if len(x) > 0]
    COLUMN_GAMMA_AV = [sum(x)/len(x) for x in COLUMN_GAMMA if len(x) > 0]

    TABLE = [COLUMN_D_ON_AV, COLUMN_D_OH_AV, COLUMN_A_NHO_AV, COLUMN_A_HOC_AV, COLUMN_BETA_AV, COLUMN_GAMMA_AV]
    print '        D_ON          D_OH      ANGLE(NHO)    ANGLE(HOC)        BETA         GAMMA   '
    print np.array(TABLE).T
Пример #12
0
def CheckMetalsCoordination(structure):
    """
    A function to detect the metal atoms that could be coordinated with the protein.
    :param structure:
    :return:
    """
    selection_pattern = "(within 3 of metal) and (not resnum {}) and (not hydrogen) and (not carbon)"
    coordinated_metals = {}
    for metal in supported_metals:
        if structure.select('resname {}'.format(metal)) is not None:
            print("  * Checking the metals that can be coordinated. (" \
                  "a constraint should be used if they're really coordinated)")
            for metal_res in structure.select(
                    'resname {}'.format(metal)).copy().iterResidues():
                coordinated_atoms_list = []
                if metal_res.numAtoms() != 1:
                    continue
                coordinated_atoms = structure.select(selection_pattern.format(
                    metal_res.getResnum()),
                                                     metal=metal_res)
                if coordinated_atoms is None:
                    print(
                        "     * The metal atom {} isn't coordinated with the protein. Are you sure it's necessary?"
                    )
                else:
                    prev_atom = None
                    for at in coordinated_atoms.iterAtoms():
                        if prev_atom is None:
                            coordinated_atoms_list.append(at)
                            prev_atom = at
                        else:
                            if at.getResnum() == prev_atom.getResnum() and \
                                    (at.getIndex() == prev_atom.getIndex() + 1 or at.getResname() in ['ASP', 'GLU']):
                                if calcDistance(at, metal_res) > calcDistance(
                                        prev_atom, metal_res):
                                    prev_atom = None
                                    continue
                                else:
                                    coordinated_atoms_list.pop(-1)
                            coordinated_atoms_list.append(at)
                            prev_atom = at
                    coordinated_metals[
                        metal_res] = coordinated_atoms_list  #, 'angles': angles}
    coordinated_atoms_ids = {}
    if coordinated_metals:
        for metal, atoms_list in coordinated_metals.iteritems():
            metal_id = "{} {} {}".format(metal.getResname(), metal.getChid(),
                                         metal.getResnum())
            atoms_ids = [[
                "{} {} {} {}".format(at.getResnum(), at.getResname(),
                                     at.getChid(), at.getName()),
                calcDistance(metal, at)[0]
            ] for at in atoms_list]
            if len(atoms_list) in [
                    x[1] for x in coordination_geometries.itervalues()
            ]:
                coordinated_atoms_ids[metal_id] = atoms_ids
            print("     * The metal atom {0} has the following atoms within coordination " \
                  "distance:\n{1}".format(metal_id, "\n".join(['       * {0}'.format(x[0]) for x in atoms_ids])))
            angles = [[at, metal, at2,
                       calcAngle(at, metal, at2)[0]]
                      for idx, at in enumerate(atoms_list)
                      for at2 in atoms_list[idx + 1:]]
            if len(atoms_list) <= 4:
                print(
                    "      * Checking for a tetrahedric coordination for the atom."
                )
                found_conformation = CheckConformation(angles, 'tetrahedric')
                if not found_conformation:
                    print(
                        "        * WARNING: The angles are too distorted to ascertain this configuration. CHECK IT manually"
                    )
            elif 4 < len(atoms_list) <= 6:  #or not found_conformation:
                print(
                    "      * WARNING: Checking for an octahedric coordination for the atom."
                )
                found_conformation = CheckConformation(angles, 'octahedric')
                if not found_conformation:
                    print(
                        "        * The angles are too distorted to ascertain this configuration. CHECK IT manually"
                    )
            else:
                print(
                    "      * The metal doesn't have a coordination we can validate."
                )

    else:
        print("  * There are no coordinated metals.")

    return coordinated_metals
Пример #13
0
def CheckClashes(mutated_protein,
                 mutation,
                 zmatrix,
                 initial_residue,
                 final_residue,
                 overlapfactor=0.7):
    """
    :param initial_residue:
    :param mutation:
    :rtype : dictionary
    :param mutated_protein: prody AtomGroup
    :param zmatrix: ZMATRIX
    :param overlapfactor: float

    A function to check the clashes between a given residue and the rest of the
    structure. It needs the number of the mutated residue the structure, the
    zmatrix of the mutated residue and the overlapfactor.

    The mutated protein is an Atomgroup object from prody that has the structure
    to check for clashes.

    The num_mutated_residue is the number of the mutated residue to check for
    clashes with the rest of the structure.

    The zmatrix contains the information extracted from the template of PELE.

    The overlapfactor by default is 0.7 but it can be set by the user to any
    number between 0 and 1. If it's one the atoms can be completely superimposed
    if the number is 0 they can't superimpose at all.
    """
    max_vdw = max(zmatrix.VdWRadius)
    contacts_object = Contacts(mutated_protein)
    if mutation["chain"]:
        mutated_residue = mutated_protein.select(
            "resnum {0[resnum]} and chain {0[chain]} and not hydrogen".format(
                mutation))
    else:
        mutated_residue = mutated_protein.select(
            "resnum {0[resnum]} and not hydrogen".format(mutation))
    mutated_residue_indices = mutated_residue.getIndices()
    clashes2check = {}
    for at in mutated_residue.iterAtoms():
        atom_name = at.getName()
        if atom_name in fixed_atoms:
            continue
        try:
            at_vdw = zmatrix.VdWRadius[zmatrix.AtomNames.index(atom_name)]
        except ValueError:
            print(
                "The atom {} in the mutated residue doesn't agree with the Zmatrix."
                .format(atom_name))
            print(mutated_residue.getNames())
            print(zmatrix.AtomNames)
            raise ValueError
        dist = (at_vdw + max_vdw) * overlapfactor
        at_contacts = contacts_object.select(dist, at.getCoords())
        if at_contacts is None:
            continue
        possible_clashes = [
            clashing_atom.getIndex() for clashing_atom in at_contacts
            if clashing_atom.getIndex() not in mutated_residue_indices
        ]
        if possible_clashes:
            clashes2check[atom_name] = possible_clashes

    if mutated_residue.getResnames()[0] == "PRO":
        try:
            cd_atom_clashes = clashes2check["CD"]
        except KeyError:
            pass
        else:
            n_atom_index = mutated_residue.select("name N").getIndices()[0]
            try:
                cd_atom_clashes.index(n_atom_index)
            except ValueError:
                pass
            else:
                cd_atom_clashes.pop(cd_atom_clashes.index(n_atom_index))
    real_clashes = {}
    for key, indices in clashes2check.iteritems():
        real_clashing_indices = []
        for ind in indices:
            atom2check = mutated_protein.select('index {}'.format(ind))
            atom2check_resname = atom2check.getResnames()[0]
            atom2check_resnum = atom2check.getResnums()[0]
            if atom2check_resnum == initial_residue and atom2check_resname in default_supported_aminoacids:
                atom2check_zmatrix = ZMATRIX(atom2check_resname + 'B')
            elif atom2check_resnum == final_residue:
                atom2check_zmatrix = ZMATRIX(atom2check_resname + 'E')
            else:
                atom2check_zmatrix = ZMATRIX(atom2check_resname)
            if atom2check_zmatrix.Name is None:
                print("Cannot load the zmatrix for the residue {} {}.\n" \
                      "It won't be checked for clashes.".format(atom2check_resname, atom2check_resnum))
                continue
            atom2check_vdw_radius = atom2check_zmatrix.VdWRadius[
                atom2check_zmatrix.AtomNames.index(atom2check.getNames()[0])]
            key_vwd_radius = zmatrix.VdWRadius[zmatrix.AtomNames.index(key)]
            min_distance = (atom2check_vdw_radius +
                            key_vwd_radius) * overlapfactor
            real_distance = calcDistance(
                mutated_residue.select('name {}'.format(key)), atom2check)
            if real_distance < min_distance:
                real_clashing_indices.append(ind)
        if real_clashing_indices:
            real_clashes[key] = real_clashing_indices

    return real_clashes
Пример #14
0
    def process(self, args=None):
        if not args:
            args = self.args
        # Sfn = args['Sfn']

        # Open matrix file in parallel mode
        # Sf = h5py.File(Sfn, 'r')
        args['mpi'] = self.mpi
        extractor = er.PepExtractor(**args)
        lM = len(self.aplist)

        dtype = np.dtype([
            ('name', 'S10'),
            ('pnum', '<i4'),
            ('rnum', '<i4'),
            ('dist', '<f8'),
            ('hdist1', '<f8'),
            ('hdist2', '<f8'),
            ('BD', '<f8'),
            ('FL', '<f8'),
            ('AT', '<f8'),
            ('dir', 'S3'),
            ('aln', 'S20')
        ])

        if self.mpi.rank == 0:
            m = self.aplist[0]
            lm = len(m)
            S = extractor.extract_result(m)
            lS = S.numCoordsets()
            Sf = h5py.File(args['out'], 'w')
            out = Sf.create_dataset(
                'out', (len(self.plist) * lS * lm, ), dtype=dtype)
            Sf.close()

        self.mpi.comm.Barrier()

        Sf = h5py.File(args['out'], 'r+', driver='mpio', comm=self.mpi.comm)
        out = Sf['out']

        # Init storage for matrices
        # Get file name

        # tSfn = 'tmp.' + Sfn
        # tSfn = args['output']

        # stubs = {
        #     'ACE': 'X',
        #     'NME': 'Z',
        # }

        a = prody.parsePDB(args['receptor'])

        OG = a.select('resnum 151 name SG')
        Ob = a.select('resnum 168 and name O')
        ND1 = a.select('resnum 46 and name NE2')

        OXH = a.select("name N resnum 149 150 151")

        t0 = time.time()

        for cm in range(lM):
            m = self.aplist[cm]
            lm = len(m)

            t1 = time.time()
            dt = t1 - t0
            t0 = t1
            print('STEP: %d PERCENT: %.2f%% TIME: %s' % (
                cm, float(cm) / lM * 100, dt))

            try:
                S = extractor.extract_result(m)
            except:
                print('ERROR: BAD PEPTIDE: %s' % m)
                continue

            lS = S.numCoordsets()

            for S_ in range(lS):

                S.setACSIndex(S_)

                tC = S.select('name C')

                dist = np.inf
                rnum = None

                for C in tC.iterAtoms():

                    rnum = C.getResnum()

                    if rnum == 1:
                        continue

                    O = S.select('resnum %i and name O' % rnum)
                    Nl = S.select('resnum %i and name N' % (rnum))
                    N = S.select('resnum %i and name N' % (rnum + 1))

                    C_ = C.getCoords()
                    O_ = O.getCoords()[0]
                    N_ = N.getCoords()[0]

                    dist = prody.calcDistance(C, OG)[0]
                    hdist1 = np.min(prody.calcDistance(OXH, O))
                    hdist2 = np.min(prody.calcDistance(Ob, Nl))

                    nC_ = np.cross((C_ - N_), (O_ - C_))
                    nC_ /= np.linalg.norm(nC_)
                    nC_ = nC_ + C_
                    nC_ = nC_.reshape(1, 3)

                    nC = C.copy()
                    nC.setCoords(nC_)

                    BD = prody.calcAngle(OG, C, O)
                    FL = prody.calcDihedral(OG, nC, C, O)
                    AT = prody.calcAngle(ND1, OG, C)

                    angle_ = prody.calcDihedral(ND1, OG, C, N)

                    # angle_ = prody.calcDistance(Od, N)[0] \
                    #     - prody.calcDistance(Od, C)[0]

                    if angle_ < 0:
                        DIR = 'F'
                    else:
                        DIR = 'R'

                    s = 'X' + m + 'Z'
                    pref = ''

                    if DIR == 'F':
                        seq = s
                        pref = 6 - rnum
                    else:
                        seq = s[::-1]
                        pref = rnum

                    suf = 12 - (pref + len(seq))
                    seq = '-' * pref + seq + '-' * suf

                    # outfmt = "%-10s\t%d\t%6.2f\t%6.2f%6.2f\t%6.2f\t" \
                    #         "%6.2f\t%6.2f\t%3s\t%12s\n"

                    # outstr = outfmt % (
                    #    ('%s_%02d' % (m, S_ + 1),
                    #     rnum, dist, hdist1, hdist2, BD, FL, AT,
                    #     DIR, seq)
                    # )

                    outdata = (m, S_ + 1,
                               rnum, dist, hdist1, hdist2, BD, FL, AT,
                               DIR, seq)

                    ind = (self.tb + cm) * lS * lm + S_ * lm + rnum - 2
                    out[ind] = outdata

        self.database.close()
        Sf.close()
Пример #15
0
def calculate_distance_of_to_atoms(atom1, atom2):
    coords_atom1 = atom1.getCoords()
    coords_atom2 = atom2.getCoords()
    distance = prody.calcDistance(coords_atom1, coords_atom2)
    return distance[0]
Пример #16
0
def CheckforGaps(structure):
    gaps = {}
    not_gaps = {}
    for chain in structure.iterChains():
        # initial, final = FindInitialAndFinalResidues(chain)
        previous_residue_number = None
        for residue in chain.iterResidues():
            chain_id = chain.getChid()
            # print residue.getResnum() < previous_residue_number
            if previous_residue_number is not None:
                # if residue.getResnum() > previous_residue_number + 1:
                if residue.getResname() in supported_aminoacids:
                    if residue.getResname() == 'ACE':
                        try:
                            gaps[chain_id]
                        except KeyError:
                            gaps[chain_id] = []
                        gaps[chain_id].append([previous_residue_number, residue.getResnum()])
                    else:
                        current_residue_N = residue.getAtom("N")
                        previous_residue = chain.getResidue(previous_residue_number)
                        previous_residue_C = previous_residue.getAtom('C')
                        if current_residue_N is not None and previous_residue_C is not None:
                            distance = calcDistance(current_residue_N, previous_residue_C)
                            if distance < 2.5:
                                try:
                                    not_gaps[chain_id]
                                except KeyError:
                                    not_gaps[chain_id] = []
                                not_gaps[chain_id].append([previous_residue_number, residue.getResnum()])
                            else:
                                try:
                                    gaps[chain_id]
                                except KeyError:
                                    gaps[chain_id] = []
                                gaps[chain_id].append([previous_residue_number, residue.getResnum()])
                        elif current_residue_N is None:
                            print("   * There's a problem with residue {} {} {} it" \
                                  " doesn't have the N atom".format(residue.getResname(),
                                                                    residue.getResnum(),
                                                                    residue.getChid()))
                        elif previous_residue_C is None:
                            print("   * There's a problem with residue {} {} {} it doesn't have the C atom".format(
                                previous_residue.getResname(),
                                previous_residue.getResnum(),
                                previous_residue.getChid()))
                if residue.getResnum() == previous_residue_number:
                    if residue.getIcode() == '':
                        if residue.hetero is not None:
                            residue.setResnum(previous_residue_number + 1)
                        else:
                            print('   * WARNING: There are two residues with the same resnum in the same chain.' \
                                  'The residue {} {} {} '.format(residue.getChid(),
                                                                 residue.getResnum(),
                                                                 residue.getResname()))
                elif residue.getResnum() < previous_residue_number + 1:
                    if residue.hetero is not None:
                        residue.setResnum(previous_residue_number + 1)
                    else:
                        # print residue.getResnum(), previous_residue_number
                        print("   * WARNING! The next residue has a lower resnum than the previous one in the chain??")
                        print("    * CHAIN: {} RESIDUE: {}  {} {}".format(residue.getChid(),
                                                                    residue.getResnum(),
                                                                    residue.getChid(),
                                                                    residue.getResname()))
                        print("    * previous: RESIDUE {} {}  {}".format(previous_residue.getResname(),
                                                                   previous_residue.getResnum(),
                                                                   previous_residue.getChid()))
                        sys.exit('This should never happen, please review your structure.')
                else:
                    pass
            previous_residue_number = residue.getResnum()
    return gaps, not_gaps
Пример #17
0
def SolveClashes(initial_structure, initial_clashes, mutation, zmatrix,
                 initial_residue_number, final_residue_number):
    print("Trying to solve the clashes.")
    dihedral2check = 0
    initial_part_of_the_protein = initial_structure.select(
        "resnum < {}".format(mutation["resnum"])).copy()
    final_part_of_the_protein = initial_structure.select("resnum > {}".format(
        mutation["resnum"])).copy()
    initial_residue = initial_structure.select("resnum {}".format(
        mutation["resnum"])).copy()
    resname = initial_residue.getResnames()[0]

    if resname in ["HIE", "HID", "HIP"]:
        resname = "HIS"
    elif resname in ["LYN"]:
        resname = "LYS"
    residue_rotamer_lib = ROTAMERLIB(resname)
    maximum_number_of_trials = residue_rotamer_lib.NumberOfDihedrals
    initial_number_of_clashes = 0
    for values in initial_clashes.values():
        initial_number_of_clashes += len(values)
    print("Starting number of heavy atoms clashes: {}".format(
        len(initial_clashes.keys())))
    print("Starting number of total clashes: {}".format(
        initial_number_of_clashes))

    zmatrix.ComputeDeltaFi()
    while maximum_number_of_trials > 0:
        maximum_number_of_trials -= 1
        new_residue = ChangeResidueCoordinates(initial_residue.copy(), zmatrix,
                                               residue_rotamer_lib,
                                               dihedral2check)
        if mutation["fin_resname"] == "PRO":
            CD_N_distance = calcDistance(new_residue.select("name N"),
                                         new_residue.select("name CD"))
            if CD_N_distance > 1.5:
                dihedral2check += 1
                continue
        dihedral2check += 1
        mutated_structure = initial_part_of_the_protein + new_residue + final_part_of_the_protein
        new_clashes = CheckClashes(mutated_structure, mutation, zmatrix,
                                   initial_residue_number,
                                   final_residue_number)
        current_number_of_clashes = 0
        for values in new_clashes.values():
            current_number_of_clashes += len(values)
        if new_clashes == {}:
            initial_residue = new_residue
            break
        elif len(new_clashes.keys()) < len(initial_clashes.keys()):

            print(
                'The number of clashing heavy atoms in the residue now is: {} and clashes with {} atoms'
                .format(len(new_clashes.keys()), current_number_of_clashes))
            initial_residue = new_residue.copy()
            initial_number_of_clashes = current_number_of_clashes
            initial_clashes = new_clashes
        elif len(new_clashes.keys()) == len(initial_clashes.keys()):
            if current_number_of_clashes < initial_number_of_clashes:
                print(
                    "The number of clashing heavy atoms is still the same but now they clash with {} atoms."
                    .format(current_number_of_clashes))
                initial_residue = new_residue.copy()
                initial_number_of_clashes = current_number_of_clashes
    return initial_part_of_the_protein + initial_residue + final_part_of_the_protein
Пример #18
0
def process(args):
    # Sfn = args['Sfn']

    # Open matrix file in parallel mode
    # Sf = h5py.File(Sfn, 'r')

    extractor = er.PepExtractor(**args)
    lM = len(extractor.plist)

    # Init storage for matrices
    # Get file name

    # tSfn = 'tmp.' + Sfn
    # tSfn = args['output']

    stubs = {
        'ACE': 'X',
        'NME': 'Z',
    }

    a = prody.parsePDB(args['receptor'])

    SG = a.select('resnum 241 name OG')
    O = a.select('resnum 262 and name O')
    ND1 = a.select('resnum 79 and name NE2')

    tHN_ = a.select("name N resnum 239 240 241")
    tHN = np.average(tHN_.getCoords(), axis=0)

    t0 = time.time()

    out = open(args['out'], 'w')

    for cm in range(lM):
        m = extractor.plist[cm]

        t1 = time.time()
        dt = t1 - t0
        t0 = t1
        print('STEP: %d PERCENT: %.2f%% TIME: %s' % (
            cm, float(cm) / lM * 100, dt))

        try:
            S = extractor.extract_result(m)
        except:
            print('ERROR: BAD PEPTIDE: %s' % m)
            continue

        lS = S.numCoordsets()

        for S_ in range(lS):

            S.setACSIndex(S_)

            tC = S.select('name C')

            dist = np.inf

            for c in tC.iterAtoms():

                dist_ = prody.calcDistance(c, SG)[0]

                if dist_ < dist:
                    dist = dist_
                    rnum = c.getResnum()

            C = S.select('resnum %i and name C' % rnum)
            CO = S.select('resnum %i and name O' % rnum)
            # N = S.select('resnum %i and name N' % rnum_)
            N_ = S.select('resnum %i and name N' % (rnum + 1))

            angleAttack = prody.calcAngle(ND1, SG, C)
            angle_ = prody.calcDistance(O, N_)[0] - prody.calcDistance(O, C)[0]

            if angle_ > 0:
                DIR = 'F'
            else:
                DIR = 'R'

            s = 'X' + m + 'Z'
            pref = ''

            if DIR == 'F':
                seq = s
                pref = 5 - rnum
            else:
                seq = s[::-1]
                pref = rnum

            suf = 10 - (pref + len(seq))
            seq = '-' * pref + seq + '-' * suf

            # hangle = prody.calcAngle(tHN, C, N)
            hdist = np.linalg.norm(tHN - CO.getCoords())

            # HN = b.select('resnum %i and name H' % rnum)
            # N_B = b.select('resnum %i and name N' % (rnum + 1))
            # HN_B = b.select('resnum %i and name H' % (rnum + 1))

            # distN = prody.calcDistance(O, N)[0]
            # angleHN = prody.calcAngle(O, HN, N)

            # distN_B = prody.calcDistance(O, N_B)[0]
            # angleHN_B = prody.calcAngle(O, HN_B, N_B)

            outstr = "%-10s\t%6.2f\t%6.2f\t%6.2f\t%3s\t%12s\t%d\n" % (
                ('%s_%02d' % (m, S_ + 1),
                dist, angleAttack, hdist,
                # distN, angleHN,
                # distN_B, angleHN_B,
                DIR, seq, rnum)
                )

            out.write(outstr)

    # tSf.close()
    # Sf.close()
#    os.remove(tSfn)
    out.close()
Пример #19
0
def get_dist(vdm, ifg, comb):
    return pr.calcDistance(
        vdm.ires_sele.select('name ' + dist[vdm.resname][0]),
        ifg.sele.select('name ' + comb.vandarotamer_dict['A1']))[0]
Пример #20
0
def generateTrainingSet(inputdict, distance, output=None, combineOutput=True):

    devnull = open(os.devnull, 'w')
    subprocess.check_call('dssp --version', shell=True, stdout=devnull,
                          stderr=devnull)
    subprocess.check_call('stride -h', shell=True, stdout=devnull,
                          stderr=devnull)
    subprocess.check_call('netsurfp -h', shell=True, stdout=devnull,
                          stderr=devnull)
    subprocess.check_call('runpsipred', shell=True, stdout=devnull,
                          stderr=devnull)
    devnull.close()

    finalData = pd.DataFrame() if combineOutput else []

    for target, models in inputdict.items():

        targetPDB = prody.parsePDB(target)
        assert distance, "Distance is not valid"

        tempdir = tempfile.mkdtemp()

        # we don't want to run NetSurfP and PSIPRED over and over again for all
        # model structures. we compute them for target structure and just reuse
        # on model structures
        netsurfp.parseNetSurfP(netsurfp.execNetSurfP(target, outputdir=tempdir),
                                                     targetPDB)

        psipred.parsePSIPRED(psipred.execPSIPRED(target, outputdir=tempdir),
                                                 targetPDB)

        for i, modelFilename in enumerate(models):
            datadict = {}
            modelPDB = prody.parsePDB(modelFilename)

            if not modelPDB:
                print('Model file %s cannot be parsed, skipping...' %
                      modelFilename)
                continue

            #if model has no chainID, let's assign one. That makes STRIDE parser
            #happy
            if np.unique(modelPDB.getChids()) == ' ':
                modelPDB.all.setChids('A')
                modelFilename = os.path.join(tempdir,
                                             os.path.basename(modelFilename))

                modelFilename = prody.writePDB(modelFilename,
                                               modelPDB,
                                               autoext=False)

            #superimpose model onto target structure
            match = prody.matchAlign(modelPDB, targetPDB, tarsel='calpha',
                                     seqid=50, overlap=20)
            mapmodel = match[1]
            maptarget = match[2]

            #and copy NetSurfP and PSIPRED data from target to model
            copyDataFromTarget(targetPDB, modelPDB)

            #run STRIDE
            prody.parseSTRIDE(prody.execSTRIDE(modelFilename, outputdir=tempdir),
                                               modelPDB)

            datadict['STRIDEarea'] = \
                pd.Series(modelPDB.ca.getData('stride_area')[mapmodel.getResindices()],
                          index=maptarget.getResindices())

            ss = pd.Series(mapmodel.getSecstrs(), index=maptarget.getResindices())
            ss[ss == ''] = '-' #empty strings cause trouble in csv load/save
            datadict['STRIDEss'] = ss

            #run DSSP
            prody.parseDSSP(prody.execDSSP(modelFilename, outputdir=tempdir), modelPDB)

            datadict['DSSPacc'] = \
                pd.Series(modelPDB.ca.getData('dssp_acc')[mapmodel.getResindices()],
                          index=maptarget.getResindices())

            ss = pd.Series(mapmodel.getSecstrs(), index=maptarget.getResindices())
            ss[ss == ''] = '-' #empty strings cause trouble in csv load/save
            datadict['DSSPss'] = ss


            #save NetSurfP data
            datadict['NetSurfP_exp'] = \
                pd.Series(modelPDB.ca.getData('netsurfp_exposure')[mapmodel.getResindices()],
                          index=maptarget.getResindices())

            datadict['NetSurfP_asa'] = \
                pd.Series(modelPDB.ca.getData('netsurfp_asa')[mapmodel.getResindices()],
                          index=maptarget.getResindices())
            datadict['NetSurfP_rsa'] = \
                pd.Series(modelPDB.ca.getData('netsurfp_rsa')[mapmodel.getResindices()],
                          index=maptarget.getResindices())
            datadict['NetSurfP_alpha'] = \
                pd.Series(modelPDB.ca.getData('netsurfp_alphascore')[mapmodel.getResindices()],
                          index=maptarget.getResindices())
            datadict['NetSurfP_beta'] = \
                pd.Series(modelPDB.ca.getData('netsurfp_betascore')[mapmodel.getResindices()],
                          index=maptarget.getResindices())
            datadict['NetSurfP_coil'] = \
                pd.Series(modelPDB.ca.getData('netsurfp_coilscore')[mapmodel.getResindices()],
                          index=maptarget.getResindices())

            #save PSIPRED data
            datadict['PSIPRED_ss'] = \
                pd.Series(modelPDB.ca.getData('psipred_ss')[mapmodel.getResindices()],
                          index=maptarget.getResindices())
            datadict['PSIPRED_coilscore'] = \
                pd.Series(modelPDB.ca.getData('psipred_coilscore')[mapmodel.getResindices()],
                          index=maptarget.getResindices())
            datadict['PSIPRED_helixscore'] = \
                pd.Series(modelPDB.ca.getData('psipred_helixscore')[mapmodel.getResindices()],
                          index=maptarget.getResindices())
            datadict['PSIPRED_strandscore'] = \
                pd.Series(modelPDB.ca.getData('psipred_strandscore')[mapmodel.getResindices()],
                          index=maptarget.getResindices())

            #Compute class labels based on the distance argument
            datadict['ClassLabel'] = pd.Series((np.abs(
                prody.calcDistance(maptarget.copy(), mapmodel.copy())) < distance).astype(int),
                index=maptarget.getResindices())

            if combineOutput:
                finalData = pd.concat([finalData, pd.DataFrame(datadict)])
            else:
                finalData.append(pd.DataFrame(datadict))

        #remove temporary directory
        shutil.rmtree(tempdir, ignore_errors=True)

    if output:
        if combineOutput:
            finalData.to_csv(output, index=False, quoting=csv.QUOTE_NONNUMERIC)
            #dataframe.to_csv(output)
        else:
            print('Warning! Output must be combined to be saved into a CSV '
                  'file')

    return finalData
Пример #21
0
def compute_atom_distances(pdb_target, res_file, output_report, chain="L"):
    """
    This function calculate atom-atom distances for ligand and residue atoms. The residue number and atom names
    (for both, ligand and residue) must be specified in a file ('res_file').
    :param pdb_target: input PDB file path
    :param res_file: file with instructions. This file must have n rows with three format: RESNUM LATOMNAME/SLIGAND
    ATOMNAMES/SRESIDUE. If you want to calculate a distance using a center of mass write [ATOM1,ATOM2,ATOMN]

    :param output_report:
    :param chain:
    :return:
    """
    # Load PDB files
    target = pdb2prody(pdb_target)
    ligand = target.select("chain {}".format(chain))
    print(ligand.getNames())
    # Reading instructions from file
    list_of_instructions = read_selecteds_from_file(res_file)
    # Select the input atoms
    report = []
    for line in list_of_instructions:
        resnum, atom_name_ref, atom_name_tar = line.split()
        # If the user wants to select more than one atom he has to put them in a string with this format: [atom1,atomN...]
        if "[" in atom_name_ref or "]" in atom_name_ref:
            print("Multiple atom selection for the ligand")
            atom_string_with_comas = atom_name_ref.strip("[").strip("]")
            atom_list = atom_string_with_comas.split(",")
            atom_string = ' '.join(atom_list)
            atom_ref_selected = ligand.select("name {}".format(atom_string))
            print("Selected atoms: {}".format(atom_ref_selected.getNames()))
        else:
            print("Single atom selection for the ligand")
            atom_ref_selected = ligand.select("name {}".format(atom_name_ref))
            print("Selected atom: {}".format(atom_ref_selected.getNames()))

        if "[" in atom_name_tar or "]" in atom_name_tar:
            print("Multiple atom selection for the system")
            atom_string_with_comas = atom_name_tar.strip("[").strip("]")
            atom_list = atom_string_with_comas.split(",")
            atom_string = ' '.join(atom_list)
            atom_tar_selected = select_atom_given_name_type_and_num(
                target, resnum, atom_string)
            print("Selected atoms: {}".format(atom_tar_selected.getNames()))
        else:
            print("Single atom selection for the system")
            atom_tar_selected = select_atom_given_name_type_and_num(
                target, resnum, atom_name_tar)
            print("Selected atom: {}".format(atom_tar_selected.getNames()))
        try:
            number_of_selected_atoms_ref = len(atom_ref_selected.getNames())
        except AttributeError:
            exit(
                "None atoms where selected. Please, check if the selected atoms exists in the ligand in {}"
                .format(pdb_target))
        try:
            number_of_selected_atoms_tar = len(atom_tar_selected.getNames())
        except AttributeError:
            exit(
                "None atoms where selected. Please, check if the selected atoms exists in the residue {} in {}"
                .format(resnum, pdb_target))
        # Now there are four possibilities: len 1 in target and ref, len > 1 in one of both and len >1 in both.
        # If the len is more than 1 we will use the center of mass as a point to compute the distance.
        if number_of_selected_atoms_ref <= 1 and number_of_selected_atoms_tar <= 1:
            distance = prody.calcDistance(atom_tar_selected, atom_ref_selected)
        elif number_of_selected_atoms_ref <= 1 and number_of_selected_atoms_tar > 1:
            center_tar = prody.calcCenter(atom_tar_selected)
            atom_coords = atom_ref_selected.getCoords()[0]
            distance = np.linalg.norm(center_tar - atom_coords)
        elif number_of_selected_atoms_ref > 1 and number_of_selected_atoms_tar <= 1:
            center_ref = prody.calcCenter(atom_ref_selected)
            atom_coords = atom_tar_selected.getCoords()[0]
            distance = np.linalg.norm(atom_coords - center_ref)
        else:
            center_tar = prody.calcCenter(atom_tar_selected)
            center_ref = prody.calcCenter(atom_ref_selected)
            distance = np.linalg.norm(center_tar - center_ref)
        report_line = "{:4} {:10} {:10} {:6.3f}\n".format(
            resnum, ''.join(atom_ref_selected.getNames()),
            ''.join(atom_tar_selected.getNames()), distance)
        report.append(report_line)

    report_final = ''.join(report)

    if output_report:
        with open(output_report, "w") as report_file:
            report_file.write(report_final)
    print(report_final)
    return report_final
Пример #22
0
    def main_calc(pep_info, seq_info, mV, file):
        Nterm = False
        # puts the pep sequence into pep
        pep = pep_info[0]
        #    print(pep)
        # puts the pep extra info int ind
        ind = [pep_info[1], pep_info[2]]
        new_pep = []
        # for everything in the sequence that we want
        for i in range(ind[0], ind[1]):
            # for all of the AtomObjects in the current seq
            for j in seq_info[i]:
                # append the current atom object and the current index in the sequence
                new_pep.append([file[j], i])

        # length is a mV x mV array of 0s
        lengths = np.zeros((mV, mV))
        # index is an element index + 1 x mV array of 0s
        index = np.zeros((len(ELEMENT_INDEX) + 1, mV))
        # bonded is a mV x mV array of 0s
        bonded = np.zeros((mV, mV))
        amino_acid_list = []
        # for all of the atom objects
        for i in range(len(new_pep)):
            # ele is a float of the current atom object element index + 1
            ele = float(ELEMENT_SYMBOLS.index(str(
                new_pep[i][0].getElement()))) + 1
            # sets the 5th row of index to appropriate value
            if ele == 6.0:
                index[5][i] = 0.0
            elif ele == 7.0 and Nterm:
                index[5][i] = 1.0
            elif ele == 8.0:
                index[5][i] = 2.0
            # checks to see if the current pep is N, and set NTerm accordingly
            if Nterm == False and new_pep[i][0].getName() == 'N':
                Nterm = True

            # sets the index of the row of the element index to one
            index[ELEMENT_INDEX.index(new_pep[i][0].getElement())][i] = 1
            # add name and sequence of pep to the list
            amino_acid_list.append([
                str(new_pep[i][0].getName()),
                str(new_pep[i][0].getSequence())
            ])
            # calculates lengths and bonds of peps
            for j in range(len(new_pep)):
                if i != j and lengths[i][j] == 0:
                    lengths[i][j] = (float(
                        pd.calcDistance(new_pep[i][0], new_pep[j][0])))
                    lengths[j][i] = lengths[i][j]
                if (lengths[i][j] < 1.9) and (new_pep[i][0] != new_pep[j][0]):
                    if new_pep[j][0].getSequence() in possible_bonds(
                            new_pep[i][1], pep, ind):
                        bonded[i][j] = lengths[i][j]
                        bonded[j][i] = bonded[i][j]

        # returns a final representation of the given pdb peps?
        final = {}
        final['sequence'] = pep
        final['ele_to_amino'] = amino_acid_list
        final['index'] = index
        final['secondary'] = lengths
        final['primary'] = bonded
        return final