Exemplo n.º 1
0
 def __init__(self,path, pdb_id='pdb', cif_id='cif'):
     # copied from input_output.py _read_structure
     self.file_name = os.path.basename(path).split('.')[0]
     self.file_sufix = os.path.basename(path).split('.')[1]
     self.dir_path = os.path.dirname(path)
     self.params = CP.read_charmm_FF()
     self.chains = []
     self.models = {}
     if self.file_sufix == 'pdb':
         self.header = struct.parse_pdb_header(path)
         self.structure = struct.PDBParser(QUIET=True).get_structure(pdb_id, path)
         self.has_sequence = False
     elif self.file_sufix == 'cif':
         self.header = struct.MMCIF2Dict()
         self.structure = struct.MMCIFParser().get_structure(cif_id, path)
         self.has_sequence = True
     else:
         print("ERROR: Unreognized file type " + self.file_sufix + " in " + self.file_name)
         sys.exit(1)
Exemplo n.º 2
0
def main():
    usage = "usage: %prog [options] arg"
    d = "This program reads a CSV file that has been generated by Super_Structure.\
         One residue or terminal will be deleted at the time.\n \
         WARNING: Deleting residues will leave a 'hole' in the structure. Amino Acids will not be renumbered.\n \
         This program can only delete residues or terminals that are in the parameter file.\
         The program will create a new and modified CSV file with the name of the input file plus the entity number."
    opt_parser = optparse.OptionParser(usage,description=d)
    opt_parser.add_option("--rem", type="str",help="Enter Instruction for removing amino acid or terminal in hard \n \
                                                         quotes '\"'. Place: Amino Acid Number, Entity ID, Chain ID, \
                                                         Residue or Terminal to be deleted separated by comas.\n \
                                                         Example \"1,1,A,ACE\", \"1,1,A,CTER\" or \"20,2,A,LYS\". \n \
                                                         Chain ID or terminal name are case sensitive and \
                                                         do not need to go in quotes.\n\
                                                         For residues all atoms will be deleted. For terminals, only the \
                                                         atoms that correspond to the terminal will be deleted.")
    opt_parser.add_option("--inp", type="str",help="Path to CSV file for removing residue.")
    opt_parser.add_option("--out", type="str",help="Name of output CSV file after removal of amino acid or terminal.")
    opt_parser.add_option("--par", type="str",help="Path to charmm parameters folder.")
    options, args = opt_parser.parse_args()
    if not os.path.exists(options.inp):
        print "Error: File path for Super Structure CSV file does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    params = CP.read_charmm_FF(options.par)
    insulin = SS.Super_Structure(params, options.inp,'add_linker')
    parse_list = options.rem.split(',') 
    #insulin = SS.Super_Structure(params, '/home/noel/Projects/Protein_design/EntropyMaxima/examples/Linker_minimization/2zta.csv','add_linker')
    #parse_list = "1,1,A,CTER".split(',')
    message = ''
    if len(parse_list) == 4:
        amino_acid_number = int(parse_list[0])
        entity_number = int(parse_list[1])
        chain = str(parse_list[2]).upper()
        term_res = str(parse_list[3]).upper()
        # So far this only works with natural aminoacids and ACE and CTER
        if term_res in ['ILE','GLN','GLY','GLU','CYS','ASP','SER','HSD','HSE','PRO','HSP','ASN','VAL','THR','TRP','CTER',\
                        'LYS','PHE','ALA','MET','ACE','LEU','ARG','TYR']:            
            message += 'Deleting a '+term_res+' from '
            message += 'amino acid number '+str(amino_acid_number)+' in entity id '+str(entity_number)+' and chain '+chain+'.\n'
            insulin.delete_aa(amino_acid_number,entity_number,chain,term_res)
            # If amino acid that is at the protein terminal is deleted, the terminal must be deleted too.
            min_aa = min(insulin.Full_Structure.aaid[(insulin.Full_Structure.ent_id == entity_number) & (insulin.Full_Structure.chain == chain)])
            max_aa = max(insulin.Full_Structure.aaid[(insulin.Full_Structure.ent_id == entity_number) & (insulin.Full_Structure.chain == chain)])
            if amino_acid_number == min_aa:
                del_terminal = False
                # This checks that there is no ACETYL atoms and removes them it does.
                for ii in insulin.Full_Structure.index[(insulin.Full_Structure.aaid == amino_acid_number) &\
                                                       (insulin.Full_Structure.ent_id == entity_number) &\
                                                       (insulin.Full_Structure.chain == chain)]:
                    if insulin.Full_Structure.loc[ii,'component'] == 'ACETY':
                        del_terminal = True
                        term_res = 'ACE'
                        break
                if del_terminal:
                    insulin.delete_aa(amino_acid_number,entity_number,chain,term_res)
            if amino_acid_number == max_aa:
                if term_res != 'CTER':
                    del_terminal = False
                    for ii in insulin.Full_Structure.index[(insulin.Full_Structure.aaid == amino_acid_number) &\
                                                           (insulin.Full_Structure.ent_id == entity_number) &\
                                                           (insulin.Full_Structure.chain == chain)]:
                        if insulin.Full_Structure.loc[ii,'component'] == 'CTERM':
                            del_terminal = True
                            term_res = 'CTER'
                            break
                    if del_terminal:
                        insulin.delete_aa(amino_acid_number,entity_number,chain,term_res)
                else:
                    pass
            file_name = os.path.basename(options.out).split('.')[0]
            dir_path = os.path.dirname(options.out)
            # Super Structure needs to know about models. 
            num_models = len(range(((insulin.Full_Structure.shape[1]-20)/5)))+1
            insulin.models = [str(i) for i in range(1,num_models+1)]
            
            insulin.write_csv(dir_path,file_name)
            IO.write_pdb(insulin, dir_path, file_name, 'all')
        else:
            print('ERROR: del_residue.py only works with natural aminoacids and ACE and CTER terminals.')
            sys.exit(1)
    else:
        message += 'The number of entries in the instruction field, followed by -o or --rem, is not right.\n'
        message += 'Type -h or --help for instructions\n'
    print(message)
Exemplo n.º 3
0
 def mmgbsa_CA_bindingMatrix(self, options):
     ####################################      READ FILES     #######################################################
     #directory, filename = os.path.split()
     crd_file = crd(self.dirpath + '/' + options.crd)
     psf_file = psf(self.dirpath + '/' + options.psf)
     out_file = os.path.basename(options.crd).split('.')[0]
     ###################### After reading files, Generate and Index and Super Structure  ##############################
     params = CP.read_charmm_FF(param_path)
     insu = SS.Super_Structure(params, self.dirpath, 'charmm_input')
     # At this point, a XPLOR psf could only have been creted from a complete structure, so no worries of gaps.
     insu.create_super_structure_df_from_CRD_PSF(crd_file, psf_file)
     ################################################################################################################
     insu.add_column_to_super_structure_df_from_CRD_PSF(
         'gb_z0', self.dirpath + '/' + options.gb)
     insu.add_column_to_super_structure_df_from_CRD_PSF(
         'gb_z500', self.dirpath + '/' + options.gbz)
     insu.add_column_to_super_structure_df_from_CRD_PSF(
         'sa_z0', self.dirpath + '/' + options.sa)
     insu.add_column_to_super_structure_df_from_CRD_PSF(
         'sa_z500', self.dirpath + '/' + options.saz)
     insu.create_column_with_ztranslated('z1_d', 'z1', "A", float(500))
     ###################### After reading files, Generate and Index a Super Structure  ##############################
     idx_ss = SS.index_super_structure(insu.Full_Structure)
     idx_ss.generate_indexes_from_Full_Structure()
     idx_ss.sort_comp_index_by_aaid_within_chains()
     new_index = []
     for i in range(len(idx_ss.comp_indx)):
         if idx_ss.comp_indx[i][3] == 'PRO':
             if idx_ss.comp_indx[i][4] == 'AMIN':
                 temp_list = range(idx_ss.comp_indx[i][5],
                                   idx_ss.comp_indx[i][6] + 1)
                 temp_list = [temp_list[j] for j in [0, 4, 5, 1, 2, 3]]
                 for j in temp_list[3:]:
                     insu.Full_Structure.loc[j, 'component'] = 'SIDE2'
                 new_index = new_index + temp_list
             elif idx_ss.comp_indx[i][4] == 'SIDE':
                 new_index = new_index + range(idx_ss.comp_indx[i][5],
                                               idx_ss.comp_indx[i][6] + 1)
             elif idx_ss.comp_indx[i][4] == 'CARB':
                 new_index = new_index + range(idx_ss.comp_indx[i][5],
                                               idx_ss.comp_indx[i][6] + 1)
         else:
             new_index = new_index + range(idx_ss.comp_indx[i][5],
                                           idx_ss.comp_indx[i][6] + 1)
     insu.Full_Structure = insu.Full_Structure.loc[new_index]
     insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & (
         insu.Full_Structure.atmtyp1 == 'CB')] = 'SIDE3'
     insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & (
         insu.Full_Structure.atmtyp1 == 'HB1')] = 'SIDE3'
     insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & (
         insu.Full_Structure.atmtyp1 == 'HB2')] = 'SIDE3'
     insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & (
         insu.Full_Structure.atmtyp1 == 'CG')] = 'SIDE4'
     insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & (
         insu.Full_Structure.atmtyp1 == 'HG1')] = 'SIDE4'
     insu.Full_Structure.component[(insu.Full_Structure.aa == 'PRO') & (
         insu.Full_Structure.atmtyp1 == 'HG2')] = 'SIDE4'
     insu.Full_Structure = insu.Full_Structure.reset_index(drop=True)
     idx_ss = SS.index_super_structure(insu.Full_Structure)
     idx_ss.generate_indexes_from_Full_Structure()
     idx_ss.sort_comp_index_by_aaid_within_chains()
     # CHARMM_Test/gbsw_ab_nbxmod5.dat
     # ELECB,ELECU, GBENB, GBENU, VDWAB, VDWAU, ASPB, ASPU
     # -379.654,-332.641,-495.252, -595.916, 5672.62, 2338.65, 37.9186, 48.284
     ch_AB = insu.Full_Structure.loc[idx_ss.chn_indx[0][0]:idx_ss.chn_indx[-1][-1],['charg','epsilon',\
                   'rmin_half','x1','y1','z1','gb_z0','gb_z500','z1_d','sa_z0','sa_z500','atmtyp1','chain',\
                   'component','aa','aaid']].values
     EE_K = -327.90
     EE_K2 = -163.95
     #Surface tension coefficient        (sgamma) =   0.010 [kcal/mol/Angs**2]
     SA_K = 0.010
     l = len(idx_ss.comp_indx)
     idxFile = open(self.dirpath + '/' + out_file + '_MMGBSA.idx', 'w')
     if options.ver:
         outFile = open(
             self.dirpath + '/' + out_file + '_MMGBSA_verbose.out', 'w')
     else:
         outFile = open(self.dirpath + '/' + out_file + '_MMGBSA.out', 'w')
     tempGB = 0
     tempGB_Z = 0
     tempSA = 0
     tempSA_Z = 0
     tempEE = 0
     tempEE_Z = 0
     tempVDW = 0
     tempVDW_Z = 0
     pairGB = 0
     pairGB_Z = 0
     DGB = 0.0
     DGB_Z = 0.0
     DEE = 0.0
     DEE_Z = 0.0
     DVW = 0.0
     DVW_Z = 0.0
     DSA = 0.0
     DSA_Z = 0.0
     for i in range(0, l):
         idxFile.write(str(i)+" "+str(idx_ss.comp_indx[i][2])+" "+idx_ss.comp_indx[i][0]+" "+idx_ss.comp_indx[i][3]+\
                       " "+idx_ss.comp_indx[i][4]+"\n")
         for k in range(idx_ss.comp_indx[i][5], idx_ss.comp_indx[i][6]):
             temp5 = EE_K2 * ch_AB[k][0] * ch_AB[k][0] / ch_AB[k][6]
             tempGB += temp5
             temp5_Z = EE_K2 * ch_AB[k][0] * ch_AB[k][0] / ch_AB[k][7]
             tempGB_Z += temp5_Z
             tempSA += SA_K * ch_AB[k][9]
             tempSA_Z += SA_K * ch_AB[k][10]
             for kk in range(k + 1, idx_ss.comp_indx[i][6] + 1):
                 last = kk
                 d1 = np.linalg.norm(ch_AB[k][3:6] - ch_AB[kk][3:6])
                 r2 = d1 * d1
                 d1_Z = np.linalg.norm(ch_AB[k][3:6] - ch_AB[kk][3:6])
                 r2_Z = d1_Z * d1_Z
                 temp3 = ch_AB[k][6] * ch_AB[kk][6]
                 temp4 = (-1 * r2) / (4 * temp3)
                 temp3_Z = ch_AB[k][7] * ch_AB[kk][7]
                 temp4_Z = (-1 * r2_Z) / (4 * temp3_Z)
                 denm = np.sqrt(r2 + (temp3 * np.exp(temp4)))
                 denm_Z = np.sqrt(r2_Z + (temp3_Z * np.exp(temp4_Z)))
                 Kqq = EE_K * ch_AB[k][0] * ch_AB[kk][0]
                 temp5 = Kqq / denm
                 temp5_Z = Kqq / denm_Z
                 tempGB += temp5
                 tempGB_Z += temp5_Z
         tempSA += SA_K * ch_AB[last][9]
         tempSA_Z += SA_K * ch_AB[last][10]
         temp5 = EE_K2 * ch_AB[last][0] * ch_AB[last][0] / ch_AB[last][6]
         tempGB += temp5
         temp5_Z = EE_K2 * ch_AB[last][0] * ch_AB[last][0] / ch_AB[last][7]
         tempGB_Z += temp5_Z
         DGB += tempGB
         DGB_Z += tempGB_Z
         DSA += tempSA
         DSA_Z += tempSA_Z
         if options.ver:
             # Output for comparison with GB_Comp_Analy.prl
             outFile.write("SELF "+str(i+1)+" "+str(i+1)+" "+idx_ss.comp_indx[i][0]+" "+str(idx_ss.comp_indx[i][2])\
                           +" "+str(idx_ss.comp_indx[i][3])+" "+str(idx_ss.comp_indx[i][4])+"\n      GB "+\
                           str(tempGB)+"\n      GBZ "+str(tempGB_Z)+"\n      SA "+str(tempSA)+"\n      SAZ "+\
                           str(tempSA_Z)+"\n")
         else:
             outFile.write(
                 str(i) + " " + str(i) + " " + str(tempGB - tempGB_Z) +
                 " " + str(tempSA - tempSA_Z) + "\n")
         tempSA = 0.0
         tempSA_Z = 0.0
         for j in range(i + 1, l):
             for m in range(idx_ss.comp_indx[i][5],
                            idx_ss.comp_indx[i][6] + 1):
                 for n in range(idx_ss.comp_indx[j][5],
                                idx_ss.comp_indx[j][6] + 1):
                     r = np.linalg.norm(ch_AB[m][3:6] - ch_AB[n][3:6])
                     r2 = r * r
                     r_Z = np.linalg.norm(np.array((ch_AB[m][3],ch_AB[m][4],ch_AB[m][8]))-\
                           np.array((ch_AB[n][3],ch_AB[n][4],ch_AB[n][8])))
                     r2_Z = r_Z * r_Z
                     temp = (332.06 * ch_AB[m][0] * ch_AB[n][0]) / r
                     temp_Z = (332.06 * ch_AB[m][0] * ch_AB[n][0]) / r_Z
                     tempEE += temp
                     tempEE_Z += temp_Z
                     Eps = np.sqrt(ch_AB[m][1] * ch_AB[n][1])
                     Rmin = ch_AB[m][2] + ch_AB[n][2]
                     A = Rmin / r
                     A_Z = Rmin / r_Z
                     A2 = A * A
                     A2_Z = A_Z * A_Z
                     A6 = A2 * A2 * A2
                     A6_Z = A2_Z * A2_Z * A2_Z
                     A12 = A6 * A6
                     A12_Z = A6_Z * A6_Z
                     tempVDW += Eps * (A12 - (2 * A6))
                     tempVDW_Z += Eps * (A12_Z - (2 * A6_Z))
                     temp3 = ch_AB[m][6] * ch_AB[n][6]
                     temp4 = (-1 * r2) / (4 * temp3)
                     temp3_Z = ch_AB[m][7] * ch_AB[n][7]
                     temp4_Z = (-1 * r2_Z) / (4 * temp3_Z)
                     denm = np.sqrt(r2 + (temp3 * np.exp(temp4)))
                     denm_Z = np.sqrt(r2_Z + (temp3_Z * np.exp(temp4_Z)))
                     temp5 = (EE_K * ch_AB[m][0] * ch_AB[n][0]) / denm
                     temp5_Z = (EE_K * ch_AB[m][0] * ch_AB[n][0]) / denm_Z
                     pairGB += temp5
                     pairGB_Z += temp5_Z
             DGB += pairGB
             DGB_Z += pairGB_Z
             DEE += tempEE
             DEE_Z += tempEE_Z
             DVW += tempVDW
             DVW_Z += tempVDW_Z
             if options.ver:
                 # Output for comparison with GB_Comp_Analy.prl
                 outFile.write("PAIR " + str(i + 1) + " " + str(j + 1) +
                               " ")
                 outFile.write(idx_ss.comp_indx[i][0]+" "+str(idx_ss.comp_indx[i][2])+" "+str(idx_ss.comp_indx[i][3])\
                               +" "+str(idx_ss.comp_indx[i][4])+" - "+idx_ss.comp_indx[j][0]+" "+\
                               str(idx_ss.comp_indx[j][2])+" "+str(idx_ss.comp_indx[j][3])+" "+\
                               str(idx_ss.comp_indx[j][4])+"\n      EE "+str(tempEE)+"\n      EEZ "+str(tempEE_Z)+\
                               "\n      VDW "+str(tempVDW)+"\n      VDWZ "+str(tempVDW_Z)+"\n      pGB "+str(pairGB)\
                               +"\n      pGBZ "+str(pairGB_Z)+"\n")
             else:
                 outFile.write(str(i)+" "+str(j)+" "+str(tempEE-tempEE_Z)+" "+str(tempVDW-tempVDW_Z)+" "+\
                               str(pairGB-pairGB_Z)+"\n")
             pairGB = 0
             pairGB_Z = 0
             tempEE = 0
             tempEE_Z = 0
             tempVDW = 0
             tempVDW_Z = 0
         tempGB = 0
         tempGB_Z = 0
         tempSA = 0
         tempSA_Z = 0
Exemplo n.º 4
0
def main():
    usage = "usage: %prog [options]"
    d = "It takes two pdbs and aligns the structures in different orientations. A center-pdb is placed at (0,0,0),and \
    a rotate-pdb is place around center at given angle intervals and distances. You can choose to join chains to make \
    a fusion peptide, or, if no, you can just align proteins for searching protein-protein interactions."

    option_parser = optparse.OptionParser(usage, description=d)
    option_parser.add_option("--center", type="str", \
                             help="Path to pdb structure to place at (0,0,0) according to its center of mass.")
    option_parser.add_option("--rotate", type="str", \
                             help="Path to pdb structure to rotated around center structure at X degree intervals. \
                             Angle intervals must be multiple of 360."                                                                      )
    option_parser.add_option("--angle", type="int", \
                             help="Angle intervals used to place rotated structured around center one.")
    option_parser.add_option("--distance", type="float",\
                             help="Center of mass distance between rotated and centered structures.")
    option_parser.add_option(
        '--id',
        type="str",
        action="store",
        default='s',
        help="A prefix id to identify the output structures. Default is s.")
    option_parser.add_option('--link', type="str",action = "store", default = 'x', \
                             help = "Option only works with a 'yes' value from the --join option. It will join chains \
                             in the following format, for example: 'A.r,B.f:B.f,A.f'. This --link option can be read as\
                             the centered structure's chain 'A' amino acid sequence numbering is reversed and joind to \
                             the rotated structure's chain B with amino acid sequenced numbers not reversed, or kept \
                             forward. The values after the column are read similarly. Default if missing: 'x'"                                                                                                              )
    option_parser.add_option('--join', type="str",action = "store", default = 'no', \
                             help = "Only two possible options 'CR' and 'RC'. In both options C stands for Center, and \
                             R for rotate. CR means that the numbering starts at the first amino acid of --center \
                             structure up to its end, and it is followed by the first amino acid of --rotate up to its \
                             end. This option might overide the directions (r or f) of amino acid numbering for each\
                             chain in --link option. This gives total manipulation flexibility for joining chains."                                                                                                                   )
    options, args = option_parser.parse_args()
    if not os.path.exists(options.center):
        print "Error: File path for molecule to be centered does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    if not os.path.exists(options.rotate):
        print "Error: File path for molecule to be rotated does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    if options.map.lower() == 'yes' and options.link.lower() == 'x':
        print("Error: when option map is equal to yes, option link must be")
        print(
            "       a series of chains identifiers for association as described"
        )
        print("       in the link paramater help.")
        sys.exit(1)
    pdb_parser = PDBParser(QUIET=True)
    Angle = options.angle
    distance = options.distance
    #directory = os.path.dirname(options.center)
    filepath1 = options.center
    filepath2 = options.rotate
    join_o = options.join
    link_o = options.link
    ###########################################################################
    # Uncomment to test from spyder IDE
    #pdb_parser = PDBParser(QUIET = True)
    #Angle = 45
    #distance = 45
    #file_name = os.path.basename(options.out).split('.')[0]
    #directory = "/home/noel/Projects/Protein_design/ccl_lectures/Lecture_4/"
    #filepath1 = directory+'2hiu_1rr.pdb'
    #filepath2 = directory+'2zta_1rr.pdb'
    #param_path = "/home/noel/Projects/Protein_design/EntropyMaxima/params/charmm27.ff/"
    #join_o = "RC"
    #link_o = "A.f,A.f:B.f,B.f"
    ####################################################################################################################
    # Process strig that the determines how the centered and rotated structures will be connected.
    params = CP.read_charmm_FF()
    cmc = md.CenterOfMassCalculator(params)
    rig = MRM.Molecular_Rigid_Manipulation(params)
    ####################################################################################################################
    # Check that the structures only have one model, and Place the structures' center of mass at (0,0,0) to give an idea
    # of their location in the cartesian coordinate system
    s1 = pdb_parser.get_structure('Centered', filepath1)
    countS1 = 0
    modelS1 = -1
    for i in s1.get_models():
        countS1 += 1
        modelS1 = i.id
    if countS1 != 1:
        print(
            "ERROR: Number of models cannot be different from 1. Models found:"
            + str(countS1))
        print("       Make sure Centered PDBs have only one model.")
        sys.exit(1)
    rig.translate_molecule(s1, modelS1,
                           rig.center_molecule(cmc.get_center_of_mass(s1)))
    s2 = pdb_parser.get_structure('Rotated', filepath2)
    countS2 = 0
    modelS2 = -1
    for i in s2.get_models():
        countS2 += 1
        modelS2 = i.id
    if countS2 != 1:
        print(
            "ERROR: Number of models cannot be different from 1. Models found:"
            + str(countS2))
        print("       Make sure Rotated PDBs have only one model.")
        sys.exit(1)
    rig.translate_molecule(s2, modelS2,
                           rig.center_molecule(cmc.get_center_of_mass(s2)))
    ####################################################################################################################
    # TODO: This works only for angles between 0 and 90 not including 0, and 90 and will generate angles in all 8
    # quadrants of the cartesian coordinate system (I do not see why using an angle other than 45 for now.)
    # TODO: quaternions might work better.
    # The location list has a list of normalized vectors releative to (0,0,0) that will be use to place s2's in the
    # right orientation relative to s1
    locations = []
    angles = []
    for h in range(0, 3):
        for i in range(0, 360, Angle):
            for j in range(0, 90 / Angle - 1):
                if h == 0:
                    z = 0
                    angles.append(str(i) + "_" + str(0))
                elif h == 1:
                    z = np.cos(45 * np.pi / 180)
                    angles.append(str(i) + "_" + str(45))
                elif h == 2:
                    z = -1 * np.cos(45 * np.pi / 180)
                    angles.append(str(i) + "_" + str(315))
                locations.append(
                    [np.cos(i * np.pi / 180),
                     np.sin(i * np.pi / 180), z])
    locations.append([0, 0, 1])
    angles.append(str(0) + "_" + str(90))
    locations.append([0, 0, -1])
    angles.append(str(0) + "_" + str(270))
    for i in range(0, len(locations)):
        locations[i] = list(locations[i] / np.linalg.norm(locations[i]))
    ####################################################################################################################
    # This works when you do not want to link two proteins but instead you want to place one around the other and check
    # protein-protein interaction's binding affinity.
    if join_o.lower() == 'no' and link_o.lower() == 'x':
        ids = {}
        for i in string.ascii_uppercase:
            ids[i] = False
        # First We used model 0 of structure 1 and turn ids for chains to True identifier to True.
        for i in s1[0]:
            ids[i.id] = True
        # Now we go through structure 2 and if there are any chains with the same id as those found in structure 1,
        # we will change the chain ids to something else becase if chain ids are repeated in the same structure
        # it will be consider one chain when they are actually separated.
        for i in s2[0]:
            if ids[i.id]:
                id_found = False
                for j in string.ascii_uppercase:
                    if not ids[j]:
                        i.id = j
                        ids[j] = True
                        # ID founds means id is unused so far and will be reserved and changed on S2
                        id_found = True
                        break
                # If the number of chains and identifiers exceeds letters in the alphabet,
                # It is necessary to modify the code. Until then, let's check this won't happen by exiting.
                if not id_found:
                    print(
                        "ERROR: Number of chains in both structures exceeds letters in the alphabet. No ID \
                           identifiers available. Program will exit without output. Fix the code. June 24, 2016"
                    )
                    sys.exit(1)
        s3 = copy.deepcopy(s1)
        s3.id = 'Ensamble'
        for i in s2.get_chains():
            s3[0].add(i)
    ###############################################################################
    structure_id = 0
    for i in locations:
        if structure_id == 0:
            ccc = md.ChargeCalculator(params)
            cm = ccc.calculate_center_of_charge(s2)
            cm = cm / np.linalg.norm(
                cm)  # ix orientation of LZ here only the first time.
            RM = rig.alignVectors(i, cm)
            for j in s2.get_atoms():
                v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]]
                jj = np.dot(v2, RM)
                j.set_coord(jj)
            last_direction = i
            # After aligning along centerofcharge/dipolemoment, the structure
            # is flipped to have the cterm closest to insulin. This only works with
            # LZ because it is a homodimer with both helices aligned in parallel.
            # For any other structure, this might not work.
            # TODO This Vector needs to be picked by the user from information from the pdb_cif.py --summary
            #
            # For some reason after modifications to flower the next comented out code in this if step does not seem necessery
            #m = rotaxis(np.pi, Vector(0, 1, 0))
            #for j2 in s2.get_atoms():
            #    v2 = Vector([j2.get_coord()[0],j2.get_coord()[1],j2.get_coord()[2]])
            #    v3 = v2.left_multiply(m)
            #    j2.set_coord(v3.get_array())
        else:
            # FIX: Something is wrong with rig.alignVectors([0,0,1],[0,0,-1]) it could just be a trigonometry case that
            # gives some sort of singularity. The following code inside the if statement patch just avoids the problem
            # but does not explain it. Find out.
            if i == [0.0, 0.0, -1.0] and last_direction == [0.0, 0.0, 1.0]:
                RM = rig.alignVectors([0.0, 1.0, 1.0], [0.0, 0.0, 1.0])
                for j in s2.get_atoms():
                    v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]]
                    jj = np.dot(v2, RM)
                    j.set_coord(jj)
                last_direction = [0.0, 1.0, 1.0]
            RM = rig.alignVectors(i, last_direction)
            for j in s2.get_atoms():
                v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]]
                jj = np.dot(v2, RM)
                j.set_coord(jj)
            last_direction = i
        # This 45 changes to explore different distaces between insulin and LZ
        ii = [k * distance for k in i]
        rig.translate_molecule(s2, modelS2, ii)
        ###
        # We want to join the two structures into one structure, with one model
        # and the chains of structure 1 and 2 joined and named consistently.
        if join_o.lower() == 'no' and link_o.lower() == 'x':
            pass
        else:
            s3 = struct.StructureBuilder.Structure('newrot')
            s3.add(struct.Model.Model(0))
            lnk_label = join_chains(s1, s2, s3, join_o, link_o)
        io = PDBIO()
        io.set_structure(s3)
        io.save('s' + '_' + angles[structure_id] + "_" + str(structure_id) +
                lnk_label + '.pdb')
        #io.save(directory+options.id+'_'+angles[structure_id]+"_"+str(structure_id)+lnk_label+'.pdb')
        structure_id = structure_id + 1
        rig.translate_molecule(s2, modelS2,
                               rig.center_molecule(cmc.get_center_of_mass(s2)))
Exemplo n.º 5
0
def main():
    usage = "usage: %prog [options] arg"
    d = "This program reads a CSV file that has been generated by Super_Structure.\
         The file corresponds to a Super Structure of a Protein. \
         Multiple residues can be added at the time, No terminal will be added..\n \
         This program can only add residues or terminals that are in the parameter file."

    opt_parser = optparse.OptionParser(usage, description=d)
    opt_parser.add_option(
        "--apn",
        type="str",
        help="Enter Instruction for where to append residues in hard '\"'\n \
                                                         quotes. Place: Amino Acid Number, Entity ID, Chain ID and  \n \
                                                         the direction to add residues separated by comas. Add. The \
                                                         direction to add residues is either Ndir or Cdir. This means \
                                                         that if a residue is added in residue 10, it could be toward \
                                                         the N or C terminal. This is important so that the program \
                                                         knows if the new residue is placed before or after the residue.\
                                                         Example \"1,1,A,Ndir\" or \"20,2,A,Cdir\". \n \
                                                         Chain ID, amino acid or terminal name are not case sensitive \
                                                         and do not need to go in quotes.\n"
    )
    opt_parser.add_option(
        "-r",
        "--res",
        type="str",
        help="Enter list of amino acids to be added in hard quotes.'\"'\n\
                                                         Example: \"ALA,VAL,ASP,ASN,GLU\"."
    )
    opt_parser.add_option("--inp",
                          type="str",
                          help="Path to CSV file for adding residue.")
    opt_parser.add_option(
        "--out",
        type="str",
        help="Path and name to CSV and PDB outputs with added residues.")
    opt_parser.add_option("--pep", type="str", help="Path to peptide file.")
    opt_parser.add_option("--par",
                          type="str",
                          help="Path to Charmm parameter folder.")
    options, args = opt_parser.parse_args()
    if not os.path.exists(options.inp):
        print "Error: File path Super Structure CSV file does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    ########################## Init Setup #####################################
    # Comment out the next four lines to test in Spyder.
    directory, filename = os.path.split(options.inp)
    params = CP.read_charmm_FF(options.par)
    insulin = SS.Super_Structure(params, options.inp, 'add_linker')
    parse_list = options.apn.split(',')
    if options.res.find(',') == -1:
        aa_add = [i for i in options.res]
        aa_add = [ut.utilities.residueDict1_1[i] for i in aa_add]
    else:
        aa_add = options.res.split(',')
    parser2 = PDBParser()
    pep_file = parser2.get_structure('Peptides', options.pep)
    # Uncomment the next four lines to test
    #file_path = '/home/noel/Projects/Protein_design/EntropyMaxima/examples/Linker_minimization/2hiu.csv'
    #insulin = SS.Super_Structure(params, file_path,'add_linker')
    #parse_list = "1,1,A,Ndir".split(',')
    #aa_add =  "ALA".split(',')
    ###############################################
    insulin.build_pep_and_anchers(pep_file)
    ############### Begin processing parse_list and aa_add ####################
    message = ''
    print(parse_list, len(parse_list))
    if len(parse_list) == 4 and len(aa_add) > 0:
        aaid_add = int(parse_list[0])
        ent_id_add = int(parse_list[1])
        chain_add = str(parse_list[2]).upper()
        term_dir = str(parse_list[3])
        # So far this only works with natural aminoacids and ACE and CTER
        if term_dir in ['Ndir', 'Cdir']:
            message += 'Adding residues ' + str(
                aa_add) + ' in th ' + term_dir + ' at amino acid ' + str(
                    aaid_add) + ', ' + 'entity '
            message += str(ent_id_add) + ' and direction ' + term_dir + '.'
            print(message)
            # TODO: counting atoms do not seem necessary. Consider deleting.
            #count_atoms_added = 0
            #for i in aa_add:
            #    for j in insulin.params.AA[i].atoms:
            #        for k in j:
            #            count_atoms_added += 1
            #count_aa_added = len(aa_add)
            ###################################################################
            # So we now create the link dataframe and follow the prosses in
            # Super_Structures to populate its fields.
            link = pd.DataFrame()
            aa = []
            aaid = []
            entity_id = []
            chain_id = []
            atmtyp1 = []
            atmtyp2 = []
            charg = []
            component = []
            snum = 1
            for res in aa_add:
                chrm = res
                pdbx = res
                if chrm in insulin.params.AA:
                    comp = 1
                    for k in insulin.params.AA[chrm].atoms:
                        for l in k:
                            aa.append(pdbx)
                            aaid.append(snum)
                            entity_id.append(ent_id_add)
                            chain_id.append(chain_add)
                            atmtyp1.append(insulin.corrections(chrm, l))
                            atmtyp2.append(insulin.params.AA[chrm].atom_type[
                                insulin.corrections(chrm, l)])
                            charg.append(insulin.params.AA[chrm].atom_chrg[
                                insulin.corrections(chrm, l)])
                            if comp == 1:
                                component.append('AMINO')
                            else:
                                if l in ['C', 'O']:
                                    component.append('CARBO')
                                else:
                                    component.append(('SIDE' + str(comp)))
                        comp += 1
                    snum += 1
                else:
                    print('Warning: Amino Acid identifier', chrm,
                          ' is not found in parameters.')
                    sys.exit(1)
            link['aa'] = pd.Series(aa)
            link['aaid'] = pd.Series(aaid)
            link['ent_id'] = pd.Series(entity_id)
            link['chain'] = pd.Series(chain_id)
            link['atmtyp1'] = pd.Series(atmtyp1)
            link['atmtyp2'] = pd.Series(atmtyp2)
            link['component'] = pd.Series(component)
            link['charg'] = pd.Series(charg)
            ###########################################################################
            # Add atomtyp, masses and atmNumber to each atom type
            mass = []
            atmNum = []
            atmtyp3 = []
            epsilon = []
            rmin_half = []
            atminfo = []
            aainfo = []
            for i in link['atmtyp2']:
                atmNum.append(params.am.MASS[i][0])
                mass.append(params.am.MASS[i][1])
                atmtyp3.append(params.am.MASS[i][2])
                epsilon.append(params.NONBONDED[i][1])
                rmin_half.append(params.NONBONDED[i][2])
                atminfo.append(True)
                aainfo.append(False)
            link['epsilon'] = pd.Series(epsilon)
            link['rmin_half'] = pd.Series(rmin_half)
            link['atmtyp3'] = pd.Series(atmtyp3)
            link['mass'] = pd.Series(mass)
            link['atmNum'] = pd.Series(atmNum)
            ###########################################################################
            # DF Type correction.
            link['aaid'] = link['aaid'].apply(int)
            link['ent_id'] = link['ent_id'].apply(int)
            link['mass'] = link['mass'].apply(float)
            link['epsilon'] = link['epsilon'].apply(float)
            link['rmin_half'] = link['rmin_half'].apply(float)
            link['atmNum'] = link['atmNum'].apply(int)
            # We now  fill out the number of columns in the DataFrame with nan
            for i in insulin.Full_Structure.columns:
                if i not in list(link.columns):
                    if i[0:6] == 'aainfo':
                        link[i] = pd.Series(aainfo)
                    elif i[0:7] == 'atminfo':
                        link[i] = pd.Series(atminfo)
                    else:
                        link[i] = pd.Series(
                            [float('nan') for j in range(len(link))])
            if term_dir == 'Ndir':
                beg_insert =  min(insulin.Full_Structure.index[(insulin.Full_Structure.aaid == aaid_add) &\
                                                               (insulin.Full_Structure.ent_id == ent_id_add) &\
                                                               (insulin.Full_Structure.chain == chain_add)])
                end_insert = beg_insert + link.shape[0]
            elif term_dir == 'Cdir':
                print(
                    'WARNING: The code has not been design and tested for insertions in the CTER.'
                )
                print('Exiting the program without finishing.')
                sys.exit(1)
            else:
                print(
                    'ERROR: wrong terminal to insert link. Ndir and Cdir are the only choices. Exiting now.'
                )
                sys.exit(1)
            joint_df = pd.DataFrame(columns=link.columns)
            count = 0
            insert = True
            # When links are added , aaid needs to be fixed to reflect added residues
            aaid_offset = 0
            for i in insulin.Full_Structure.index:
                if (i >= beg_insert) and (i < end_insert):
                    if insert:
                        for j in link.index:
                            joint_df.loc[count] = link.loc[j]
                            joint_df.loc[count, 'aaid'] = joint_df.loc[
                                count, 'aaid'] + aaid_offset
                            current_aaid = link.loc[j, 'aaid']
                            count += 1
                        insert = False
                        aaid_offset = aaid_offset + current_aaid
                joint_df.loc[count] = insulin.Full_Structure.loc[i]
                # So that only residues after the added link get increased in the given ent_id and chain
                # Any other entity or chain in the molecules is not fixed.
                if (joint_df.loc[count, 'ent_id'] == ent_id_add) & (
                        joint_df.loc[count, 'chain'] == chain_add):
                    joint_df.loc[count,
                                 'aaid'] = joint_df.loc[count,
                                                        'aaid'] + aaid_offset
                count += 1
            # After adding residues, it all gets copied back to original dataframe.
            for i in joint_df.index:
                insulin.Full_Structure.loc[i] = joint_df.loc[i]
            # The way to get number of models is very specific to the way this program
            # stores data in DataFrame. Be careful if the data frame column structure changes.
            # TODO: missing atom coordinates are added manually. It needs to be automated more.
            num_models = len(
                range(((insulin.Full_Structure.shape[1] - 20) / 5))) + 1
            for i in range(1, num_models + 1):
                for j in range(len(aa_add), 0, -1):
                    insulin.fit_coordinates(term_dir, j, ent_id_add, chain_add,
                                            str(i), aa_add[j - 1])
            # NOTE: insulin.models are not in the Super Structure Class, but it is added here.
            #       This works, but it does not seem the best way to do it. should models be a field of super
            #       structures and be initialized there?
            insulin.models = [str(i) for i in range(1, num_models + 1)]
            ################   Write to outputs ####################
            file_name = os.path.basename(options.out).split('.')[0]
            dir_path = os.path.dirname(options.out)
            insulin.write_csv(os.path.dirname(options.out), file_name)
            IO.write_pdb(insulin, dir_path, file_name, 'all')
        else:
            print("ERROR: only two directions to add residues, Ndir and Cdir.")
            print("       The entries are not case sensitive.")
    else:
        message += 'The number of entries in the instruction field, followed by -a or --apn, is not right.\n'
        message += 'Type -h or --help for instructions\n'
    print(message)
Exemplo n.º 6
0
def main():
    usage = "usage: %prog [options]"
    d = "It takes two pdbs and aligns the structures in different \
    orientations. A center-pdb is placed at (0,0,0),and a rotate-pdb \
    is place around at given angle intervals and distances. \
    The program assumes that the rotated protein's N-terminal will \
    become the N-Terminal when joined to the center protein. \
    WARNING: The programs needs to be extended to allow the reading \
    of the dipole vectors of the two proteins, and align them in any \
    way the user wants. As is, it centers the center-protein in whatever \
    orientation it is, and it alings the center of charge vector with \
    vector <0, 1, 0>. From that starting point, it rotates rotate-protein \
    by the angle given as input."

    option_parser = optparse.OptionParser(usage, description=d)
    option_parser.add_option("--center", type="str", \
                             help="Path to pdb structure to place at (0,0,0) \
                             according to its center of mass."                                                              )
    option_parser.add_option("--rotate", type="str", \
                             help="Path to pdb structure to rotated around \
                             center structure at X degree intervals. Angle \
                             intervals must be multiple of 360."                                                                )
    option_parser.add_option("--angle", type="int", \
                             help="Angle intervals used to place rotated \
                             structured around center one."                                                           )
    option_parser.add_option("--distance", type="float",\
                             help="Center of mass distance between rotated and \
                            centered structures."                                                 )
    option_parser.add_option('--id',
                             type="str",
                             action="store",
                             default='s',
                             help="A prefix id to identify the \
                             output structures. Default is s.")
    option_parser.add_option('--map',
                             type="str",
                             action="store",
                             default='no',
                             help="Optional arg if not present it \
                             is 'no', and it will add rotate with different chain \
                             identifiers. if it is 'yes' then it expects --link \
                             to be something different from 'X' and in the right format."
                             )
    option_parser.add_option('--link',
                             type="str",
                             action="store",
                             default="X",
                             help="Optional arg if not present it \
                             is 'X', and it will add \'rotate\' structure with different chain \
                             identifiers. If it is 'yes', it will add the \'rotate\' structure \
                             with the same identifiers as \'center\' structure in the following \
                             format: A:A,B:B or chain A in \'rotate\' will link with A in \'center\' \
                             and B to B respectively. When both proteins have two chains, the only \
                             other option is A:B,B:A. There can be as many chains linked separated \
                             by commas but be careful because the program does not checks the \
                             physicality of this connections.")
    option_parser.add_option("--par",
                             type="str",
                             help="Path to charmm parameters folder.")
    options, args = option_parser.parse_args()
    if not os.path.exists(options.center):
        print "Error: File path for molecule to be centered does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    if not os.path.exists(options.rotate):
        print "Error: File path for molecule to be rotated does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    if options.map.lower() == 'yes' and options.link.lower() == 'x':
        print("Error: when option map is equal to yes, option link must be")
        print(
            "       a series of chains identifiers for association as described"
        )
        print("       in the link paramater help.")
        sys.exit(1)
    pdb_parser = PDBParser(QUIET=True)
    Angle = options.angle
    distance = options.distance
    #directory = os.path.dirname(options.center)
    filepath1 = options.center
    filepath2 = options.rotate
    map_o = options.map
    lnk_o = options.link
    param_path = options.par
    ###########################################################################
    # Uncomment to test from spyder IDE
    #pdb_parser = PDBParser(QUIET = True)
    #Angle = 45
    #distance = 45
    ##file_name = os.path.basename(options.out).split('.')[0]
    #directory = "/home/noel/Projects/Protein_design/ccl_lectures/Lecture_4/"
    #filepath1 = directory+'2hiu_1rr.pdb'
    #filepath2 = directory+'2zta_1rr.pdb'
    #param_path = "/home/noel/Projects/Protein_design/EntropyMaxima/params/charmm27.ff/"
    #map_o = "yes"
    #lnk_o = "A:A,B:B"
    ####################################################################################################################
    # Process strig that the determines how the centered and rotated structures will be connected.
    lnk_o = lnk_o.split(',')
    lnk_o = [i.split(':') for i in lnk_o]
    lnk_label = ''
    for i in lnk_o:
        lnk_label += '_'
        for j in i:
            lnk_label += j.lower()
    params = CP.read_charmm_FF(param_path)
    cmc = md.CenterOfMassCalculator(params)
    rig = MRM.Molecular_Rigid_Manipulation(param_path)
    ####################################################################################################################
    # Check that the structures only have one model, and Place the structures' center of mass at (0,0,0) to give an idea
    # of their location in the cartesian coordinate system
    s1 = pdb_parser.get_structure('Centered', filepath1)
    countS1 = 0
    modelS1 = -1
    for i in s1.get_models():
        countS1 += 1
        modelS1 = i.id
    if countS1 != 1:
        print(
            "ERROR: Number of models cannot be different from 1. Models found:"
            + str(countS1))
        print("       Make sure Centered PDBs have only one model.")
        sys.exit(1)
    rig.translate_molecule(s1, modelS1,
                           rig.center_molecule(cmc.get_center_of_mass(s1)))
    s2 = pdb_parser.get_structure('Rotated', filepath2)
    countS2 = 0
    modelS2 = -1
    for i in s2.get_models():
        countS2 += 1
        modelS2 = i.id
    if countS2 != 1:
        print(
            "ERROR: Number of models cannot be different from 1. Models found:"
            + str(countS2))
        print("       Make sure Rotated PDBs have only one model.")
        sys.exit(1)
    rig.translate_molecule(s2, modelS2,
                           rig.center_molecule(cmc.get_center_of_mass(s2)))
    ####################################################################################################################
    # TODO: This works only for angles between 0 and 90 not including 0, and 90 and will generate angles in all 8
    # quadrants of the cartesian coordinate system (I do not see why using an angle other than 45 for now.)
    # TODO: quaternions might work better.
    # The location list has a list of normalized vectors releative to (0,0,0) that will be use to place s2's in the
    # right orientation relative to s1
    locations = []
    angles = []
    for h in range(0, 3):
        for i in range(0, 360, Angle):
            for j in range(0, 90 / Angle - 1):
                if h == 0:
                    z = 0
                    angles.append(str(i) + "_" + str(0))
                elif h == 1:
                    z = np.cos(45 * np.pi / 180)
                    angles.append(str(i) + "_" + str(45))
                elif h == 2:
                    z = -1 * np.cos(45 * np.pi / 180)
                    angles.append(str(i) + "_" + str(315))
                locations.append(
                    [np.cos(i * np.pi / 180),
                     np.sin(i * np.pi / 180), z])
    locations.append([0, 0, 1])
    angles.append(str(0) + "_" + str(90))
    locations.append([0, 0, -1])
    angles.append(str(0) + "_" + str(270))
    for i in range(0, len(locations)):
        locations[i] = list(locations[i] / np.linalg.norm(locations[i]))
    ####################################################################################################################
    # This works when you do not want to link two proteins but instead you want to place one around the other and check
    # protein-protein interaction's binding affinity.
    if map_o.lower() == 'no' and options.link.lower() == 'x':
        ids = {}
        for i in string.ascii_uppercase:
            ids[i] = False
        # First We used model 0 of structure 1 and turn ids for chains to True identifier to True.
        for i in s1[0]:
            ids[i.id] = True
        # Now we go through structure 2 and if there are any chains with the same id as those found in structure 1,
        # we will change the chain ids to something else becase if chain id is repeated in the same structure
        # it will be consider one chain when they are actually separated.
        for i in s2[0]:
            if ids[i.id]:
                id_found = False
                for j in string.ascii_uppercase:
                    if not ids[j]:
                        i.id = j
                        ids[j] = True
                        # ID founds means id is unused so far and will be reserved and changed on S2
                        id_found = True
                        break
                # If the number of chains and identifiers exceeds letters in the alphabet,
                # It is necessary to modify the code. Until then, let's check this won't happen by exiting.
                if not id_found:
                    print(
                        "ERROR: Number of chains in both structures exceeds letters in the alphabet. No ID \
                           identifiers available. Program will exit without output. Fix the code. June 24, 2016"
                    )
                    sys.exit(1)
        s3 = copy.deepcopy(s1)
        s3.id = 'Ensamble'
        for i in s2.get_chains():
            s3[0].add(i)
    else:
        chain_info_s1 = get_chains_info(s1)
        chain_info_s2 = get_chains_info(s2)
    ###############################################################################
    structure_id = 0
    for i in locations:
        if structure_id == 0:
            ccc = md.ChargeCalculator(params)
            cm = ccc.calculate_center_of_charge(s2)
            cm = cm / np.linalg.norm(
                cm)  # ix orientation of LZ here only the first time.
            RM = rig.alignVectors(i, cm)
            for j in s2.get_atoms():
                v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]]
                jj = np.dot(v2, RM)
                j.set_coord(jj)
            last_direction = i
            # After aligning along centerofcharge/dipolemoment, the structure
            # is flipped to have the cterm closest to insulin. This only works with
            # LZ because it is a homodimer with both helices aligned in parallel.
            # For any other structure, this might not work.
            # TODO This Vector needs to be picked by the user from information from the pdb_cif.py --summary
            #
            # For some reason after modifications to flower the next comented out code in this if step does not seem necessery
            #m = rotaxis(np.pi, Vector(0, 1, 0))
            #for j2 in s2.get_atoms():
            #    v2 = Vector([j2.get_coord()[0],j2.get_coord()[1],j2.get_coord()[2]])
            #    v3 = v2.left_multiply(m)
            #    j2.set_coord(v3.get_array())
        else:
            # FIX: Something is wrong with rig.alignVectors([0,0,1],[0,0,-1]) it could just be a trigonometry case that
            # gives some sort of singularity. The following code inside the if statement patch just avoids the problem
            # but does not explain it. Find out.
            if i == [0.0, 0.0, -1.0] and last_direction == [0.0, 0.0, 1.0]:
                RM = rig.alignVectors([0.0, 1.0, 1.0], [0.0, 0.0, 1.0])
                for j in s2.get_atoms():
                    v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]]
                    jj = np.dot(v2, RM)
                    j.set_coord(jj)
                last_direction = [0.0, 1.0, 1.0]
            RM = rig.alignVectors(i, last_direction)
            for j in s2.get_atoms():
                v2 = [j.get_coord()[0], j.get_coord()[1], j.get_coord()[2]]
                jj = np.dot(v2, RM)
                j.set_coord(jj)
            last_direction = i
        # This 45 changes to explore different distaces between insulin and LZ
        ii = [k * distance for k in i]
        rig.translate_molecule(s2, modelS2, ii)

        # We want to join the two structures into one structure, with one model
        # and the chains of structure 1 and 2 joined and name consistently.
        if map_o.lower() == 'no' and lnk_o.lower() == 'x':
            pass
        else:
            s3 = struct.StructureBuilder.Structure('newrot')
            s3.add(struct.Model.Model(0))
            join_range1 = [(chain_info_s2[lnk_o[0][0]]['min_res'],
                            chain_info_s2[lnk_o[0][0]]['max_res']),
                           (chain_info_s1[lnk_o[0][1]]['min_res'],
                            chain_info_s1[lnk_o[0][1]]['max_res'])]
            join_chains(s3, s2, s1, lnk_o[0], join_range1)
            join_range2 = [(chain_info_s2[lnk_o[1][0]]['min_res'],
                            chain_info_s2[lnk_o[1][0]]['max_res']),
                           (chain_info_s1[lnk_o[1][1]]['min_res'],
                            chain_info_s1[lnk_o[1][1]]['max_res'])]
            join_chains(s3, s2, s1, lnk_o[1], join_range2)
        io = PDBIO()
        io.set_structure(s3)
        io.save('s' + '_' + angles[structure_id] + "_" + str(structure_id) +
                lnk_label + '.pdb')
        #io.save(directory+options.id+'_'+angles[structure_id]+"_"+str(structure_id)+lnk_label+'.pdb')
        structure_id = structure_id + 1
        rig.translate_molecule(s2, modelS2,
                               rig.center_molecule(cmc.get_center_of_mass(s2)))
Exemplo n.º 7
0
def main():
    usage = "usage: %prog [options] arg"
    d = "This program reads a CIF file and checks that all residues in the file\
         are found in the CHARMM top_27 parameters. Residues found, but missing in \
         the structure, are added to the structure. The full structure is outputed \
         to a CSV file where Charmm, CIF and additional information is stored. \
         Added residues are copied from a peptide structure with all amino acids \
         present in the local CHARMM parameters files with fixed dihedral angles. \
         Info in the CSV file should be all there is to explore the conformational \
         space of added atoms."

    opt_parser = optparse.OptionParser(usage, description=d)

    group = optparse.OptionGroup(
        opt_parser,
        "Generates CSV and PDB files for each model from a CIF file.")
    group.add_option("--fromcif",
                     action="store_true",
                     help="Flag to generate a CSV frile from a CIF file.")
    group.add_option("-i", "--cif", type="str", help="Path to input cif file.")
    group.add_option("-o", "--out1", type="str", help="Path to output csv.")
    group.add_option("-p",
                     "--pep",
                     type="str",
                     help="Path to CHARMM peptide file.")
    opt_parser.add_option_group(group)

    group = optparse.OptionGroup(
        opt_parser, "Generates a CSV file from CRD and PSF files.")
    group.add_option(
        "--frompsfcrd",
        action="store_true",
        help="Flag to generates a CSV frile from a CRD and PSF file.")
    group.add_option("-f",
                     "--psf",
                     type="str",
                     help="Path to input PSF file in XPLOR format.")
    group.add_option("-d", "--crd", type="str", help="Path to input CRD file.")
    opt_parser.add_option_group(group)

    options, args = opt_parser.parse_args()
    ############################################  Options Entered ##########################################################
    if options.fromcif:
        if options.frompsfcrd:
            opt_parser.error(
                "Two option flags can't be selected at the same time. Enter -h for help."
            )


########################################################################################################################
    if options.fromcif:
        if not os.path.exists(options.cif):
            print "Error: File path for input file does not exist."
            print("Type -h or --help for description and options.")
            sys.exit(1)
        params = CP.read_charmm_FF()
        parser2 = PDBParser(QUIET=True)
        pep_file_path = pkg_resources.resource_filename(
            'em', 'params/' + 'peptides.pdb')
        p1 = parser2.get_structure('Peptides', pep_file_path)
        ###########################################################################
        # The peptide construct is build with charmm so corrections for some atom
        # names to PDB/Databank atom types is needed.
        # TODO: this might not be necessary as the correction and inv_correction dictionary in Super Structure takes care of it.
        # Check before removing the correction here.
        for i in p1.get_models():
            for j in i.get_chains():
                for k in j.get_residues():
                    for l in k.get_atom():
                        if k.get_resname() == 'ILE' and l.get_id() == 'CD':
                            l.name = 'CD1'
                            l.id = 'CD1'
        ###########################################################################
        # Create Super Structure
        myCIF = SS.Super_Structure(params, options.cif, 'setup')
        myCIF.build_pep_and_anchers(p1)
        myCIF.read_dict_into_dataframes()
        myCIF.check_models()
        myCIF.create_super_structure_df()
        ###########################################################################
        # Find missing residues to add to the Super Structure. Missing residues
        # are group in lists of contiguous residues and aded to another list.
        myCIF.build_missing_aa()
        file_name = os.path.basename(options.cif).split('.')[0]
        myCIF.write_csv('', file_name)
        #outPDB = IO.pdb()
        IO.write_pdb(myCIF, '', file_name, 'all')
    if options.frompsfcrd:
        if not os.path.exists(options.psf):
            print "Error: File path for PSF file does not exist."
            print("Type -h or --help for description and options.")
            sys.exit(1)
        if not os.path.exists(options.crd):
            print "Error: File path for CRD file does not exist."
            print("Type -h or --help for description and options.")
            sys.exit(1)
        directory, filename = os.path.split(options.crd)
        crd_file = IO.crd(options.crd)
        psf_file = IO.psf(options.psf)
        file_name = filename.split('.')[0]
        ################################################################################################################
        ###################### After reading files, Generate and Index a Super Structure  ##############################
        params = CP.read_charmm_FF()
        myCSV = SS.Super_Structure(params, directory, 'charmm_input')
        # At this point, a XPLOR psf could only have been creted from a complete structure, so no worries of gaps.
        myCSV.create_super_structure_df_from_CRD_PSF(crd_file, psf_file)
        myCSV.write_csv(directory, file_name)