def main(): usage = "usage: %prog [options] arg" d = "This program reads a CSV file that has been generated by Super_Structure.\ The file corresponds to a Super Structure of a Protein. \ Multiple residues can be added at the time, No terminal will be added..\n \ This program can only add residues or terminals that are in the parameter file." opt_parser = optparse.OptionParser(usage, description=d) opt_parser.add_option( "--apn", type="str", help="Enter Instruction for where to append residues in hard '\"'\n \ quotes. Place: Amino Acid Number, Entity ID, Chain ID and \n \ the direction to add residues separated by comas. Add. The \ direction to add residues is either Ndir or Cdir. This means \ that if a residue is added in residue 10, it could be toward \ the N or C terminal. This is important so that the program \ knows if the new residue is placed before or after the residue.\ Example \"1,1,A,Ndir\" or \"20,2,A,Cdir\". \n \ Chain ID, amino acid or terminal name are not case sensitive \ and do not need to go in quotes.\n" ) opt_parser.add_option( "-r", "--res", type="str", help="Enter list of amino acids to be added in hard quotes.'\"'\n\ Example: \"ALA,VAL,ASP,ASN,GLU\"." ) opt_parser.add_option("--inp", type="str", help="Path to CSV file for adding residue.") opt_parser.add_option( "--out", type="str", help="Path and name to CSV and PDB outputs with added residues.") opt_parser.add_option("--pep", type="str", help="Path to peptide file.") opt_parser.add_option("--par", type="str", help="Path to Charmm parameter folder.") options, args = opt_parser.parse_args() if not os.path.exists(options.inp): print "Error: File path Super Structure CSV file does not exist." print("Type -h or --help for description and options.") sys.exit(1) ########################## Init Setup ##################################### # Comment out the next four lines to test in Spyder. directory, filename = os.path.split(options.inp) params = CP.read_charmm_FF(options.par) insulin = SS.Super_Structure(params, options.inp, 'add_linker') parse_list = options.apn.split(',') if options.res.find(',') == -1: aa_add = [i for i in options.res] aa_add = [ut.utilities.residueDict1_1[i] for i in aa_add] else: aa_add = options.res.split(',') parser2 = PDBParser() pep_file = parser2.get_structure('Peptides', options.pep) # Uncomment the next four lines to test #file_path = '/home/noel/Projects/Protein_design/EntropyMaxima/examples/Linker_minimization/2hiu.csv' #insulin = SS.Super_Structure(params, file_path,'add_linker') #parse_list = "1,1,A,Ndir".split(',') #aa_add = "ALA".split(',') ############################################### insulin.build_pep_and_anchers(pep_file) ############### Begin processing parse_list and aa_add #################### message = '' print(parse_list, len(parse_list)) if len(parse_list) == 4 and len(aa_add) > 0: aaid_add = int(parse_list[0]) ent_id_add = int(parse_list[1]) chain_add = str(parse_list[2]).upper() term_dir = str(parse_list[3]) # So far this only works with natural aminoacids and ACE and CTER if term_dir in ['Ndir', 'Cdir']: message += 'Adding residues ' + str( aa_add) + ' in th ' + term_dir + ' at amino acid ' + str( aaid_add) + ', ' + 'entity ' message += str(ent_id_add) + ' and direction ' + term_dir + '.' print(message) # TODO: counting atoms do not seem necessary. Consider deleting. #count_atoms_added = 0 #for i in aa_add: # for j in insulin.params.AA[i].atoms: # for k in j: # count_atoms_added += 1 #count_aa_added = len(aa_add) ################################################################### # So we now create the link dataframe and follow the prosses in # Super_Structures to populate its fields. link = pd.DataFrame() aa = [] aaid = [] entity_id = [] chain_id = [] atmtyp1 = [] atmtyp2 = [] charg = [] component = [] snum = 1 for res in aa_add: chrm = res pdbx = res if chrm in insulin.params.AA: comp = 1 for k in insulin.params.AA[chrm].atoms: for l in k: aa.append(pdbx) aaid.append(snum) entity_id.append(ent_id_add) chain_id.append(chain_add) atmtyp1.append(insulin.corrections(chrm, l)) atmtyp2.append(insulin.params.AA[chrm].atom_type[ insulin.corrections(chrm, l)]) charg.append(insulin.params.AA[chrm].atom_chrg[ insulin.corrections(chrm, l)]) if comp == 1: component.append('AMINO') else: if l in ['C', 'O']: component.append('CARBO') else: component.append(('SIDE' + str(comp))) comp += 1 snum += 1 else: print('Warning: Amino Acid identifier', chrm, ' is not found in parameters.') sys.exit(1) link['aa'] = pd.Series(aa) link['aaid'] = pd.Series(aaid) link['ent_id'] = pd.Series(entity_id) link['chain'] = pd.Series(chain_id) link['atmtyp1'] = pd.Series(atmtyp1) link['atmtyp2'] = pd.Series(atmtyp2) link['component'] = pd.Series(component) link['charg'] = pd.Series(charg) ########################################################################### # Add atomtyp, masses and atmNumber to each atom type mass = [] atmNum = [] atmtyp3 = [] epsilon = [] rmin_half = [] atminfo = [] aainfo = [] for i in link['atmtyp2']: atmNum.append(params.am.MASS[i][0]) mass.append(params.am.MASS[i][1]) atmtyp3.append(params.am.MASS[i][2]) epsilon.append(params.NONBONDED[i][1]) rmin_half.append(params.NONBONDED[i][2]) atminfo.append(True) aainfo.append(False) link['epsilon'] = pd.Series(epsilon) link['rmin_half'] = pd.Series(rmin_half) link['atmtyp3'] = pd.Series(atmtyp3) link['mass'] = pd.Series(mass) link['atmNum'] = pd.Series(atmNum) ########################################################################### # DF Type correction. link['aaid'] = link['aaid'].apply(int) link['ent_id'] = link['ent_id'].apply(int) link['mass'] = link['mass'].apply(float) link['epsilon'] = link['epsilon'].apply(float) link['rmin_half'] = link['rmin_half'].apply(float) link['atmNum'] = link['atmNum'].apply(int) # We now fill out the number of columns in the DataFrame with nan for i in insulin.Full_Structure.columns: if i not in list(link.columns): if i[0:6] == 'aainfo': link[i] = pd.Series(aainfo) elif i[0:7] == 'atminfo': link[i] = pd.Series(atminfo) else: link[i] = pd.Series( [float('nan') for j in range(len(link))]) if term_dir == 'Ndir': beg_insert = min(insulin.Full_Structure.index[(insulin.Full_Structure.aaid == aaid_add) &\ (insulin.Full_Structure.ent_id == ent_id_add) &\ (insulin.Full_Structure.chain == chain_add)]) end_insert = beg_insert + link.shape[0] elif term_dir == 'Cdir': print( 'WARNING: The code has not been design and tested for insertions in the CTER.' ) print('Exiting the program without finishing.') sys.exit(1) else: print( 'ERROR: wrong terminal to insert link. Ndir and Cdir are the only choices. Exiting now.' ) sys.exit(1) joint_df = pd.DataFrame(columns=link.columns) count = 0 insert = True # When links are added , aaid needs to be fixed to reflect added residues aaid_offset = 0 for i in insulin.Full_Structure.index: if (i >= beg_insert) and (i < end_insert): if insert: for j in link.index: joint_df.loc[count] = link.loc[j] joint_df.loc[count, 'aaid'] = joint_df.loc[ count, 'aaid'] + aaid_offset current_aaid = link.loc[j, 'aaid'] count += 1 insert = False aaid_offset = aaid_offset + current_aaid joint_df.loc[count] = insulin.Full_Structure.loc[i] # So that only residues after the added link get increased in the given ent_id and chain # Any other entity or chain in the molecules is not fixed. if (joint_df.loc[count, 'ent_id'] == ent_id_add) & ( joint_df.loc[count, 'chain'] == chain_add): joint_df.loc[count, 'aaid'] = joint_df.loc[count, 'aaid'] + aaid_offset count += 1 # After adding residues, it all gets copied back to original dataframe. for i in joint_df.index: insulin.Full_Structure.loc[i] = joint_df.loc[i] # The way to get number of models is very specific to the way this program # stores data in DataFrame. Be careful if the data frame column structure changes. # TODO: missing atom coordinates are added manually. It needs to be automated more. num_models = len( range(((insulin.Full_Structure.shape[1] - 20) / 5))) + 1 for i in range(1, num_models + 1): for j in range(len(aa_add), 0, -1): insulin.fit_coordinates(term_dir, j, ent_id_add, chain_add, str(i), aa_add[j - 1]) # NOTE: insulin.models are not in the Super Structure Class, but it is added here. # This works, but it does not seem the best way to do it. should models be a field of super # structures and be initialized there? insulin.models = [str(i) for i in range(1, num_models + 1)] ################ Write to outputs #################### file_name = os.path.basename(options.out).split('.')[0] dir_path = os.path.dirname(options.out) insulin.write_csv(os.path.dirname(options.out), file_name) IO.write_pdb(insulin, dir_path, file_name, 'all') else: print("ERROR: only two directions to add residues, Ndir and Cdir.") print(" The entries are not case sensitive.") else: message += 'The number of entries in the instruction field, followed by -a or --apn, is not right.\n' message += 'Type -h or --help for instructions\n' print(message)
def main(): usage = "usage: %prog [options] arg" d = "This program reads a CSV file that has been generated by Super_Structure.\ One residue or terminal will be deleted at the time.\n \ WARNING: Deleting residues will leave a 'hole' in the structure. Amino Acids will not be renumbered.\n \ This program can only delete residues or terminals that are in the parameter file.\ The program will create a new and modified CSV file with the name of the input file plus the entity number." opt_parser = optparse.OptionParser(usage,description=d) opt_parser.add_option("--rem", type="str",help="Enter Instruction for removing amino acid or terminal in hard \n \ quotes '\"'. Place: Amino Acid Number, Entity ID, Chain ID, \ Residue or Terminal to be deleted separated by comas.\n \ Example \"1,1,A,ACE\", \"1,1,A,CTER\" or \"20,2,A,LYS\". \n \ Chain ID or terminal name are case sensitive and \ do not need to go in quotes.\n\ For residues all atoms will be deleted. For terminals, only the \ atoms that correspond to the terminal will be deleted.") opt_parser.add_option("--inp", type="str",help="Path to CSV file for removing residue.") opt_parser.add_option("--out", type="str",help="Name of output CSV file after removal of amino acid or terminal.") opt_parser.add_option("--par", type="str",help="Path to charmm parameters folder.") options, args = opt_parser.parse_args() if not os.path.exists(options.inp): print "Error: File path for Super Structure CSV file does not exist." print("Type -h or --help for description and options.") sys.exit(1) params = CP.read_charmm_FF(options.par) insulin = SS.Super_Structure(params, options.inp,'add_linker') parse_list = options.rem.split(',') #insulin = SS.Super_Structure(params, '/home/noel/Projects/Protein_design/EntropyMaxima/examples/Linker_minimization/2zta.csv','add_linker') #parse_list = "1,1,A,CTER".split(',') message = '' if len(parse_list) == 4: amino_acid_number = int(parse_list[0]) entity_number = int(parse_list[1]) chain = str(parse_list[2]).upper() term_res = str(parse_list[3]).upper() # So far this only works with natural aminoacids and ACE and CTER if term_res in ['ILE','GLN','GLY','GLU','CYS','ASP','SER','HSD','HSE','PRO','HSP','ASN','VAL','THR','TRP','CTER',\ 'LYS','PHE','ALA','MET','ACE','LEU','ARG','TYR']: message += 'Deleting a '+term_res+' from ' message += 'amino acid number '+str(amino_acid_number)+' in entity id '+str(entity_number)+' and chain '+chain+'.\n' insulin.delete_aa(amino_acid_number,entity_number,chain,term_res) # If amino acid that is at the protein terminal is deleted, the terminal must be deleted too. min_aa = min(insulin.Full_Structure.aaid[(insulin.Full_Structure.ent_id == entity_number) & (insulin.Full_Structure.chain == chain)]) max_aa = max(insulin.Full_Structure.aaid[(insulin.Full_Structure.ent_id == entity_number) & (insulin.Full_Structure.chain == chain)]) if amino_acid_number == min_aa: del_terminal = False # This checks that there is no ACETYL atoms and removes them it does. for ii in insulin.Full_Structure.index[(insulin.Full_Structure.aaid == amino_acid_number) &\ (insulin.Full_Structure.ent_id == entity_number) &\ (insulin.Full_Structure.chain == chain)]: if insulin.Full_Structure.loc[ii,'component'] == 'ACETY': del_terminal = True term_res = 'ACE' break if del_terminal: insulin.delete_aa(amino_acid_number,entity_number,chain,term_res) if amino_acid_number == max_aa: if term_res != 'CTER': del_terminal = False for ii in insulin.Full_Structure.index[(insulin.Full_Structure.aaid == amino_acid_number) &\ (insulin.Full_Structure.ent_id == entity_number) &\ (insulin.Full_Structure.chain == chain)]: if insulin.Full_Structure.loc[ii,'component'] == 'CTERM': del_terminal = True term_res = 'CTER' break if del_terminal: insulin.delete_aa(amino_acid_number,entity_number,chain,term_res) else: pass file_name = os.path.basename(options.out).split('.')[0] dir_path = os.path.dirname(options.out) # Super Structure needs to know about models. num_models = len(range(((insulin.Full_Structure.shape[1]-20)/5)))+1 insulin.models = [str(i) for i in range(1,num_models+1)] insulin.write_csv(dir_path,file_name) IO.write_pdb(insulin, dir_path, file_name, 'all') else: print('ERROR: del_residue.py only works with natural aminoacids and ACE and CTER terminals.') sys.exit(1) else: message += 'The number of entries in the instruction field, followed by -o or --rem, is not right.\n' message += 'Type -h or --help for instructions\n' print(message)
def main(): usage = "usage: %prog [options] arg" d = "This program reads a CIF file and checks that all residues in the file\ are found in the CHARMM top_27 parameters. Residues found, but missing in \ the structure, are added to the structure. The full structure is outputed \ to a CSV file where Charmm, CIF and additional information is stored. \ Added residues are copied from a peptide structure with all amino acids \ present in the local CHARMM parameters files with fixed dihedral angles. \ Info in the CSV file should be all there is to explore the conformational \ space of added atoms." opt_parser = optparse.OptionParser(usage, description=d) group = optparse.OptionGroup( opt_parser, "Generates CSV and PDB files for each model from a CIF file.") group.add_option("--fromcif", action="store_true", help="Flag to generate a CSV frile from a CIF file.") group.add_option("-i", "--cif", type="str", help="Path to input cif file.") group.add_option("-o", "--out1", type="str", help="Path to output csv.") group.add_option("-p", "--pep", type="str", help="Path to CHARMM peptide file.") opt_parser.add_option_group(group) group = optparse.OptionGroup( opt_parser, "Generates a CSV file from CRD and PSF files.") group.add_option( "--frompsfcrd", action="store_true", help="Flag to generates a CSV frile from a CRD and PSF file.") group.add_option("-f", "--psf", type="str", help="Path to input PSF file in XPLOR format.") group.add_option("-d", "--crd", type="str", help="Path to input CRD file.") opt_parser.add_option_group(group) options, args = opt_parser.parse_args() ############################################ Options Entered ########################################################## if options.fromcif: if options.frompsfcrd: opt_parser.error( "Two option flags can't be selected at the same time. Enter -h for help." ) ######################################################################################################################## if options.fromcif: if not os.path.exists(options.cif): print "Error: File path for input file does not exist." print("Type -h or --help for description and options.") sys.exit(1) params = CP.read_charmm_FF() parser2 = PDBParser(QUIET=True) pep_file_path = pkg_resources.resource_filename( 'em', 'params/' + 'peptides.pdb') p1 = parser2.get_structure('Peptides', pep_file_path) ########################################################################### # The peptide construct is build with charmm so corrections for some atom # names to PDB/Databank atom types is needed. # TODO: this might not be necessary as the correction and inv_correction dictionary in Super Structure takes care of it. # Check before removing the correction here. for i in p1.get_models(): for j in i.get_chains(): for k in j.get_residues(): for l in k.get_atom(): if k.get_resname() == 'ILE' and l.get_id() == 'CD': l.name = 'CD1' l.id = 'CD1' ########################################################################### # Create Super Structure myCIF = SS.Super_Structure(params, options.cif, 'setup') myCIF.build_pep_and_anchers(p1) myCIF.read_dict_into_dataframes() myCIF.check_models() myCIF.create_super_structure_df() ########################################################################### # Find missing residues to add to the Super Structure. Missing residues # are group in lists of contiguous residues and aded to another list. myCIF.build_missing_aa() file_name = os.path.basename(options.cif).split('.')[0] myCIF.write_csv('', file_name) #outPDB = IO.pdb() IO.write_pdb(myCIF, '', file_name, 'all') if options.frompsfcrd: if not os.path.exists(options.psf): print "Error: File path for PSF file does not exist." print("Type -h or --help for description and options.") sys.exit(1) if not os.path.exists(options.crd): print "Error: File path for CRD file does not exist." print("Type -h or --help for description and options.") sys.exit(1) directory, filename = os.path.split(options.crd) crd_file = IO.crd(options.crd) psf_file = IO.psf(options.psf) file_name = filename.split('.')[0] ################################################################################################################ ###################### After reading files, Generate and Index a Super Structure ############################## params = CP.read_charmm_FF() myCSV = SS.Super_Structure(params, directory, 'charmm_input') # At this point, a XPLOR psf could only have been creted from a complete structure, so no worries of gaps. myCSV.create_super_structure_df_from_CRD_PSF(crd_file, psf_file) myCSV.write_csv(directory, file_name)