def run_nvt(input_file): print('\nRunning NVT on de novo system', end='\r') os.chdir(g_var.merged_directory) gen.mkdir_directory(g_var.merged_directory + 'NVT') if g_var.user_at_input and g_var.args.disre and g_var.gmx_version: write_steered_mdp(g_var.merged_directory + 'nvt.mdp', '-DDISRES -DPOSRESCA', 5000, 0.001) elif 'PROTEIN' in g_var.system: write_steered_mdp(g_var.merged_directory + 'nvt.mdp', '-DPOSRESCA', 5000, 0.001) else: write_steered_mdp(g_var.merged_directory + 'nvt.mdp', '', 5000, 0.001) gromacs([ g_var.args.gmx + ' grompp' + ' -po md_out-merged_cg2at_nvt' + ' -f nvt.mdp' + ' -p topol_final.top' + ' -r ' + input_file + '.pdb ' + ' -c ' + input_file + '.pdb ' + ' -o NVT/merged_cg2at_de_novo_nvt' + ' -maxwarn ' + str(2), 'NVT/merged_cg2at_de_novo_nvt.tpr' ]) os.chdir(g_var.merged_directory + 'NVT') gromacs([ g_var.args.gmx + ' mdrun -v -nt ' + str(g_var.args.ncpus) + ' -pin on -deffnm merged_cg2at_de_novo_nvt' + ' -c merged_cg2at_de_novo_nvt.pdb -cpo merged_cg2at_de_novo_nvt.cpt', 'merged_cg2at_de_novo_nvt.pdb' ]) gen.file_copy_and_check('merged_cg2at_de_novo_nvt.pdb', g_var.final_dir + 'final_cg2at_de_novo.pdb') print('Completed NVT, please find final de_novo system: \n' + g_var.final_dir + 'final_cg2at_de_novo.pdb')
def create_aligned(): print('\nCreating aligned system') at_mod.merge_system_pdbs( '_aligned') ## create restraint positions for aligned system aligned_atoms, chain_count = read_in.read_in_atomistic( g_var.working_dir + 'PROTEIN/PROTEIN_aligned_merged.pdb') ## reads in final pdb rmsd = at_mod_p.RMSD_measure_de_novo( aligned_atoms) ## gets rmsd of de novo for chain in rmsd: if rmsd[chain] > 3: print( 'Your aligned structure is quite far from the CG, therefore running gentle steering\n' ) print_rmsd(rmsd) steer = ['very_low', 'low', 'mid', 'high', 'very_high', 'ultra'] break else: steer = ['low', 'high', 'ultra'] final_file = run_steer( steer, g_var.merged_directory + 'checked_ringed_lipid_de_novo') if final_file: gen.file_copy_and_check( final_file, g_var.final_dir + 'final_cg2at_aligned.pdb') ## copy to final folder else: final_file = run_steer( ['very_low', 'low', 'mid', 'high', 'very_high', 'ultra'], g_var.merged_directory + 'checked_ringed_lipid_de_novo') print('Completed alignment, please find final aligned system: \n' + g_var.final_dir + 'final_cg2at_aligned.pdb') gen.file_copy_and_check( final_file, g_var.final_dir + 'final_cg2at_aligned.pdb') ## copy to final folder
def run_parallel_pdb2gmx_min(res_type, sys_info): with mp.Pool(g_var.args.ncpus) as pool: m = mp.Manager() q = m.Queue() os.chdir(g_var.working_dir + res_type) make_min(res_type) gen.folder_copy_and_check( g_var.forcefield_location + g_var.forcefield, g_var.working_dir + res_type + '/' + g_var.forcefield + '/.') gen.file_copy_and_check( g_var.forcefield_location + '/residuetypes.dat', g_var.working_dir + res_type + '/residuetypes.dat') pdb2gmx_selections = ask_terminal(sys_info, res_type) pool_process = pool.starmap_async( pdb2gmx_minimise, [(chain, pdb2gmx_selections, res_type, q) for chain in range(0, g_var.system[res_type])]) while not pool_process.ready(): report_complete('pdb2gmx/minimisation', q.qsize(), g_var.system[res_type]) for chain in range(0, g_var.system[res_type]): if not os.path.exists(res_type + '_de_novo_' + str(chain) + '_gmx.pdb') or not os.path.exists( g_var.working_dir + res_type + '/MIN/' + res_type + '_de_novo_' + str(chain) + '.pdb'): print('For some reason parallisation of pdb2gmx failed on chain ' + str(chain) + ', now rerunning in serial.') pdb2gmx_minimise(chain, pdb2gmx_selections, res_type, q) print('{:<130}'.format(''), end='\r') print('\npdb2gmx/minimisation completed on residue type: ' + res_type + '\n')
def collect_input(): if not os.path.exists(g_var.args.c): sys.exit('Cannot find CG input file: '+g_var.args.c) gen.mkdir_directory(g_var.working_dir) gen.mkdir_directory(g_var.final_dir) gen.mkdir_directory(g_var.input_directory) gen.mkdir_directory(g_var.merged_directory) #### collates all input files in input directory if g_var.args.a != None: for file_num, file_name in enumerate(g_var.args.a): if not os.path.exists(file_name): sys.exit('cannot find atomistic input file: '+file_name) gen.file_copy_and_check(file_name, g_var.input_directory+gen.path_leaf(file_name)[1]) os.chdir(g_var.input_directory) gromacs([g_var.args.gmx+' editconf -f '+gen.path_leaf(file_name)[1]+' -resnr 0 -o '+g_var.input_directory+'AT_INPUT_'+str(file_num)+'.pdb', g_var.input_directory+'AT_INPUT_'+str(file_num)+'.pdb']) if not os.path.exists(g_var.input_directory+'AT_INPUT_'+str(file_num)+'.pdb'): sys.exit('\nFailed to process atomistic input file') else: g_var.user_at_input = True os.chdir(g_var.start_dir) gen.file_copy_and_check(g_var.args.c, g_var.input_directory+gen.path_leaf(g_var.args.c)[1]) os.chdir(g_var.input_directory) gromacs([g_var.args.gmx+' -version', 'version.txt']) gromacs([g_var.args.gmx+' editconf -f '+gen.path_leaf(g_var.args.c)[1]+' -resnr 0 -c -o '+g_var.input_directory+'CG_INPUT.pdb', g_var.input_directory+'CG_INPUT.pdb']) if not os.path.exists(g_var.input_directory+'CG_INPUT.pdb'): sys.exit('\nFailed to process coarsegrain input file')
def check_ringed_lipids(protein): print('Checking for ringed lipids') if not os.path.exists(g_var.merged_directory+'checked_ringed_lipid_de_novo.pdb'): if not os.path.exists(g_var.merged_directory+'merged_cg2at_threaded.pdb'): os.chdir(g_var.merged_directory) merge, merge_coords = read_in_merged_pdbs([], [], protein) ringed=False lipid_atoms = [] with open(g_var.merged_directory+'threaded_lipids.dat', 'w') as ring_ouput: for at_val, atom in enumerate(merge): resname = get_np_resname(at_val) if resname in g_var.np_residues: offset = fetch_start_of_residue_np(at_val, resname) if atom['atom_number']-offset in g_var.heavy_bond[resname]: for at_bond in g_var.heavy_bond[resname][atom['atom_number']-offset]: at_bond -=1 if merge[at_bond+offset]['atom_number'] > merge[at_val]['atom_number']: merge[at_bond+offset]['x'], merge[at_bond+offset]['y'], merge[at_bond+offset]['z'] = np.array(read_in.brute_mic(merge_coords[at_val],merge_coords[at_bond+offset])) merge_coords[at_bond+offset] = merge[at_bond+offset]['x'], merge[at_bond+offset]['y'], merge[at_bond+offset]['z'] dist = gen.calculate_distance(merge_coords[at_val], merge_coords[at_bond+offset]) if 2 < dist < 6: lipid_atoms.append([at_val, at_bond+offset, (np.array(merge_coords[at_val])+np.array(merge_coords[at_bond+offset]))/2]) ring_ouput.write('{0:6}{1:6}{2:2}{3:4}{4:2}{5:7}{6:5}{7:5}{8:5}{9:5}{10:5}{11:5}\n'.format( 'distance: ',str(np.round(dist,2)),'residue: ', merge[at_val]['residue_name'], merge[at_val]['residue_id'], ' atom_1: ', merge[at_val]['atom_name'], 'atom_2: ', merge[at_bond+offset]['atom_name'], 'rough line num: ', at_val, at_bond+offset)) ringed = True if ringed or os.path.exists(g_var.merged_directory+'merged_cg2at_threaded.pdb'): print('Found '+str(len(lipid_atoms))+' abnormal bonds, now attempting to fix.') print('See this file for a complete list: '+g_var.merged_directory+'threaded_lipids.dat') fix_threaded_lipids(lipid_atoms, merge, merge_coords) else: gen.file_copy_and_check(g_var.merged_directory+'MIN/merged_cg2at_de_novo_minimised.pdb', g_var.merged_directory+'checked_ringed_lipid_de_novo.pdb')
def collect_input(cg, at): if not os.path.exists(cg): sys.exit('\ncannot find CG input file: '+cg) gen.mkdir_directory(g_var.working_dir) gen.mkdir_directory(g_var.final_dir) gen.mkdir_directory(g_var.input_directory) if not g_var.at2cg: gen.mkdir_directory(g_var.merged_directory) #### collates all input files in input directory gen.file_copy_and_check(cg, g_var.input_directory+cg.split('/')[-1]) if at != None: if not os.path.exists(at): sys.exit('cannot find AT input file: '+at) gen.file_copy_and_check(at, g_var.input_directory+at.split('/')[-1]) os.chdir(g_var.input_directory) if cg.split('/')[-1].endswith('.tpr'): input_sort(cg, 'conversion') else: gromacs([g_var.gmx+' editconf -f '+cg.split('/')[-1]+' -resnr 0 -o conversion_input.pdb -pbc', 'conversion_input.pdb']) #### converts input files into pdb files if at != None: # input_sort(at, 'AT') gromacs([g_var.gmx+' editconf -f '+at.split('/')[-1]+' -resnr 0 -o AT_input.pdb -pbc', 'AT_input.pdb']) return True return False
def minimise_protein(protein, p_system, user_at_input): #### makes em.mdp for each chain os.chdir(g_var.working_dir+'/PROTEIN') gen.folder_copy_and_check(f_loc.forcefield_location+f_loc.forcefield, g_var.working_dir+'PROTEIN/'+f_loc.forcefield+'/.') gen.file_copy_and_check(f_loc.forcefield_location+'/residuetypes.dat', 'residuetypes.dat') make_min('PROTEIN') for chain in range(protein): pdb2gmx_selections=ask_terminal(chain, p_system) minimise_protein_chain(chain, 'novo_', ' << EOF \n1\n'+str(pdb2gmx_selections[0])+'\n'+str(pdb2gmx_selections[1])) pdb2gmx_selections = histidine_protonation(chain, 'novo_', pdb2gmx_selections) if user_at_input: minimise_protein_chain(chain, 'at_rep_user_supplied_', pdb2gmx_selections) os.chdir('..')
def fix_threaded_lipids(lipid_atoms, merge, merge_coords): if not os.path.exists(g_var.merged_directory + 'merged_cg2at_threaded.pdb'): tree = cKDTree(merge_coords) for threaded in lipid_atoms: resname = get_np_resname(threaded[0]) atoms = tree.query_ball_point(threaded[2], r=3) for at in atoms: if merge[at]['residue_id'] != merge[threaded[0]]['residue_id']: P_count = fetch_start_of_residue(at, merge) break NP_count = fetch_start_of_residue_np(threaded[0], resname) bb = [] if 'P_count' not in locals(): sys.exit('There is an issue with the bond length detection') for at in merge[P_count:]: if at['residue_id'] != merge[P_count]['residue_id']: break if at['atom_name'] in g_var.res_top[ at['residue_name']]['ATOMS']: bb.append([at['x'], at['y'], at['z']]) bb = np.mean(np.array(bb), axis=0) BB_M3 = (threaded[2] - bb) / np.linalg.norm((threaded[2] - bb)) for heavy_atom in threaded[:2]: merge_coords[heavy_atom] += BB_M3 * 3 merge[heavy_atom]['x'], merge[heavy_atom]['y'], merge[ heavy_atom]['z'] = merge_coords[heavy_atom] for hydrogen in g_var.hydrogen[resname][heavy_atom - NP_count + 1]: merge_coords[NP_count + hydrogen - 1] += BB_M3 * 3 merge[NP_count + hydrogen - 1]['x'], merge[NP_count + hydrogen - 1]['y'], merge[ NP_count + hydrogen - 1]['z'] = merge_coords[NP_count + hydrogen - 1] coords, index_conversion = index_conversion_generate( merge, merge_coords) write_pdb(merge, coords, index_conversion, g_var.merged_directory + 'merged_cg2at_threaded.pdb') if not os.path.exists(g_var.merged_directory + 'MIN/merged_cg2at_threaded_minimised.pdb'): gro.minimise_merged_pdbs('_threaded') gen.file_copy_and_check( g_var.merged_directory + 'MIN/merged_cg2at_threaded_minimised.pdb', g_var.merged_directory + 'checked_ringed_lipid_de_novo.pdb')
def alchembed(system): os.chdir(g_var.working_dir+'MERGED') gen.mkdir_directory('alchembed') #### runs through each chain and run alchembed on each sequentially for chain in range(system): print('Running alchembed on chain: '+str(chain)) #### creates a alchembed mdp for each chain if not os.path.exists('alchembed_'+str(chain)+'.mdp'): with open('alchembed_'+str(chain)+'.mdp', 'w') as alchembed: alchembed.write('define = -DPOSRES\nintegrator = sd\nnsteps = 500\ndt = 0.001\ncontinuation = no\nconstraint_algorithm = lincs') alchembed.write('\nconstraints = h-bonds\nns_type = grid\nnstlist = 25\nrlist = 1\nrcoulomb = 1\nrvdw = 1\ncoulombtype = PME') alchembed.write('\npme_order = 4\nfourierspacing = 0.16\ntc-grps = system\ntau_t = 0.1\nref_t = 310\npcoupl = no\ncutoff-scheme = Verlet') alchembed.write('\npbc = xyz\nDispCorr = no\ngen_vel = yes\ngen_temp = 310\ngen_seed = -1\nfree_energy = yes\ninit_lambda = 0.00') alchembed.write('\ndelta_lambda = 1e-3\nsc-alpha = 0.1000\nsc-power = 1\nsc-r-power = 6\ncouple-moltype = protein_'+str(chain)) alchembed.write('\ncouple-lambda0 = none\ncouple-lambda1 = vdw') #### if 1st chain use minimised structure for coordinate input if chain == 0: gromacs([g_var.gmx+' grompp '+ '-po md_out-merged_cg2at_alchembed_'+str(chain)+' '+ '-f alchembed_'+str(chain)+'.mdp '+ '-p topol_final.top '+ '-r min/merged_cg2at_at_rep_user_supplied_minimised.pdb '+ '-c min/merged_cg2at_at_rep_user_supplied_minimised.pdb '+ '-o alchembed/merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain)+' '+ '-maxwarn 1', 'alchembed/merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain)+'.tpr']) #### if not 1st chain use the previous output of alchembed tfor the input of the next chain else: gromacs([g_var.gmx+' grompp '+ '-po md_out-merged_cg2at_alchembed_'+str(chain)+' '+ '-f alchembed_'+str(chain)+'.mdp '+ '-p topol_final.top '+ '-r min/merged_cg2at_at_rep_user_supplied_minimised.pdb '+ '-c alchembed/merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain-1)+'.pdb '+ '-o alchembed/merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain)+' '+ '-maxwarn 1', 'alchembed/merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain)+'.tpr']) os.chdir('alchembed') #### run alchembed on the chain of interest gromacs([g_var.gmx+' mdrun -v -pin on -deffnm merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain)+ ' -c merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain)+'.pdb', 'merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain)+'.pdb']) os.chdir('..') #### copy final output to the FINAL folder gen.file_copy_and_check('alchembed/merged_cg2at_at_rep_user_supplied_alchembed_'+str(chain)+'.pdb', g_var.final_dir+'final_cg2at_at_rep_user_supplied.pdb') gen.file_copy_and_check('merged_cg2at_no_steered.pdb', g_var.final_dir+'final_cg2at_no_steered.pdb')
def write_merged_topol(system, protein): os.chdir(g_var.working_dir+'MERGED') if not os.path.exists('topol_final.top'): with open('topol_final.top', 'w') as topol_write: #### writes topology headers (will probably need updating with other forcefields) topol_write.write('; Include forcefield parameters\n#include \"'+g_var.final_dir+f_loc.forcefield+'/forcefield.itp\"\n') if 'SOL' in system: gen.file_copy_and_check(f_loc.water_dir+f_loc.water+'.itp', f_loc.water+'.itp') topol_write.write('#include \"'+f_loc.water+'.itp\"') topol_write.write('\n#include \"'+g_var.final_dir+f_loc.forcefield+'/ions.itp\"\n\n') #### runs through residue types and copies to MERGED directory and simplifies the names for residue_type in system: if residue_type not in ['ION','SOL']: #### copies 1st itp file it comes across for directory in f_loc.np_directories: if os.path.exists(directory[0]+residue_type+'/'+residue_type+'.itp'): topol_write.write('#include \"'+residue_type+'.itp\"\n') gen.file_copy_and_check(directory[0]+residue_type+'/'+residue_type+'.itp', residue_type+'.itp') break #### copies across protein itp files and simplifies the names if residue_type == 'PROTEIN': for protein_unit in range(system[residue_type]): topol_write.write('#include \"PROTEIN_'+str(protein_unit)+'.itp\"\n') gen.file_copy_and_check(g_var.working_dir+'PROTEIN/PROTEIN'+protein+'_'+str(protein_unit)+'.itp', 'PROTEIN_'+str(protein_unit)+'.itp') gen.file_copy_and_check(g_var.working_dir+'PROTEIN/PROTEIN_'+str(protein_unit)+'_steered_posre.itp', 'PROTEIN_'+str(protein_unit)+'_steered_posre.itp') gen.file_copy_and_check(g_var.working_dir+'PROTEIN/PROTEIN'+protein+'_'+str(protein_unit)+'_posre.itp', 'PROTEIN_'+str(protein_unit)+'_posre.itp') topol_write.write('[ system ]\n; Name\nSomething clever....\n\n[ molecules ]\n; Compound #mols\n') #### adds number of residues to the topology for residue_type in system: if residue_type not in ['PROTEIN']: topol_write.write(residue_type+' '+str(system[residue_type])+'\n') #### adds monomers separately if residue_type == 'PROTEIN': for protein_unit in range(system[residue_type]): topol_write.write('PROTEIN_'+str(protein_unit)+' 1\n')
### MERGES system g_var.tc['n_p_t'] = time.time() print('Merging all residue types to single file. (Or possibly tea)\n') gro.write_merged_topol() ## make final topology in merged directory #### copies all itp files and topologies from wherever they are stored into the FINAL folder for file_name in os.listdir(g_var.merged_directory): if not any(f in file_name for f in [ 'steered_posre.itp', 'low_posre.itp', 'mid_posre.itp', 'high_posre.itp' ]): if file_name.endswith('.itp') or file_name.endswith('final.top'): gen.file_copy_and_check(g_var.merged_directory + file_name, g_var.final_dir + file_name) #### merges provided atomistic protein and residues types into a single pdb file into merged directory if not os.path.exists(g_var.merged_directory + 'merged_cg2at_de_novo.pdb'): at_mod.merge_system_pdbs( '_de_novo') ## merge all minimised residues into a complete system ## minimise merged system if not os.path.exists(g_var.merged_directory + 'MIN/merged_cg2at_de_novo_minimised.pdb'): gro.make_min('merged_cg2at') gro.minimise_merged_pdbs('_de_novo') ## minimise system pdb g_var.tc['m_t'] = time.time() ## checks for threaded lipids, e.g. abnormal bonds lengths (not had a issue for a long time might delete) if not os.path.exists(g_var.merged_directory + 'checked_ringed_lipid_de_novo.pdb'):
def write_merged_topol(): os.chdir(g_var.working_dir + 'MERGED') # if not os.path.exists('topol_final.top'): with open('topol_final.top', 'w') as topol_write: topologies_to_include = [] #### runs through residue types and copies to MERGED directory and simplifies the names for residue_type in g_var.system: #### copies 1st itp file it comes across for directory in g_var.np_directories + g_var.sol_directories + g_var.ion_directories: residue_type_name = gen.swap_to_solvent(residue_type) if os.path.exists(directory[0] + residue_type + '/' + residue_type_name + '.itp'): if not any(residue_type_name + '.itp' in s for s in topologies_to_include): topologies_to_include.append('#include \"' + residue_type_name + '.itp\"\n') gen.file_copy_and_check( directory[0] + residue_type + '/' + residue_type_name + '.itp', residue_type_name + '.itp') gen.file_copy_and_check( directory[0] + residue_type + '/' + residue_type_name + '_posre.itp', residue_type_name + '_posre.itp') strip_atomtypes(residue_type_name + '.itp') break #### copies across protein itp files and simplifies the names if residue_type in ['PROTEIN', 'OTHER']: for unit in range(g_var.system[residue_type]): topologies_to_include.append('#include \"' + residue_type + '_' + str(unit) + '.itp\"\n') gen.file_copy_and_check( g_var.working_dir + residue_type + '/' + residue_type + '_de_novo_' + str(unit) + '.itp', residue_type + '_' + str(unit) + '.itp') if residue_type in ['PROTEIN']: for posres_type in [ '_very_low_posre.itp', '_low_posre.itp', '_mid_posre.itp', '_high_posre.itp', '_very_high_posre.itp', '_ultra_posre.itp', '_ca_posre.itp', '_posre.itp' ]: gen.file_copy_and_check( g_var.working_dir + 'PROTEIN/PROTEIN_' + str(unit) + posres_type, 'PROTEIN_' + str(unit) + posres_type) gen.file_copy_and_check( g_var.working_dir + 'PROTEIN/PROTEIN_disres.itp', 'PROTEIN_disres.itp') if os.path.exists('extra_atomtypes.itp'): topol_write.write('; Include forcefield parameters\n#include \"' + g_var.final_dir + g_var.forcefield + '/forcefield.itp\"\n') topol_write.write('#include \"extra_atomtypes.itp\"\n') else: topol_write.write('; Include forcefield parameters\n#include \"' + g_var.final_dir + g_var.forcefield + '/forcefield.itp\"\n') for line in topologies_to_include: topol_write.write(line) topol_write.write( '[ system ]\n; Name\nSomething clever....\n\n[ molecules ]\n; Compound #mols\n' ) #### adds number of residues to the topology for residue_type in g_var.system: if residue_type not in ['PROTEIN', 'OTHER']: topol_write.write( gen.swap_to_solvent(residue_type) + ' ' + str(g_var.system[residue_type]) + '\n') #### adds monomers separately else: for unit in range(g_var.system[residue_type]): topol_write.write(residue_type + '_' + str(unit) + ' 1\n') topol_write.write( '\n#ifdef DISRES\n#include \"PROTEIN_disres.itp\"\n#endif')
print('\nMerging all residue types to single file. (Or possibly tea)\n') if len(system)>0: #### make final topology in merged directory gro.write_merged_topol(system, '_novo') #### make minimisation directory gro.make_min('merged_cg2at') #### merges provided atomistic protein and residues types into a single pdb file into merged directory if user_at_input and 'PROTEIN' in system: at_mod.merge_system_pdbs(system, '_no_steered', cg_residues, box_vec) at_mod.merge_system_pdbs(system, '_at_rep_user_supplied', cg_residues, box_vec) gro.minimise_merged_pdbs(system, '_at_rep_user_supplied') if len(system) > 1 and g_var.alchembed: gro.alchembed(system['PROTEIN']) else: gen.file_copy_and_check(g_var.working_dir+'MERGED/min/merged_cg2at_at_rep_user_supplied_minimised.pdb', g_var.final_dir+'final_cg2at_at_rep_user_supplied.pdb') gen.file_copy_and_check(g_var.working_dir+'MERGED/merged_cg2at_no_steered.pdb', g_var.final_dir+'final_cg2at_no_steered.pdb') #### merges de novo protein and residues types into a single pdb file into merged directory at_mod.merge_system_pdbs(system, '_novo', cg_residues, box_vec) gro.minimise_merged_pdbs(system, '_novo') gen.file_copy_and_check('merged_cg2at_novo_minimised.pdb', g_var.final_dir+'final_cg2at_de_novo.pdb') time_counter['m_t']=time.time() #### copies all itp files and topologies from whereever they are stored for file_name in os.listdir(g_var.working_dir+'MERGED'): if file_name.endswith('.itp') or file_name.endswith('final.top'): gen.file_copy_and_check(g_var.working_dir+'MERGED/'+file_name, g_var.final_dir+file_name) if 'PROTEIN' in cg_residues: #### creates mdp file if user wants to pull the structure to initial input
def CG2AT_run(user_at_input): gen.flags_used() time_counter = {} time_counter['i_t'] = time.time() print( '\nThis script is now hopefully doing the following (Good luck):\n\nReading in your CG representation\n' ) #### reads in CG file and separates into residue types cg_residues, box_vec_initial = read_in.read_initial_cg_pdb() #### box size update if g_var.box != None: box_vec, box_shift = gen.new_box_vec(box_vec_initial, g_var.box) else: box_vec = box_vec_initial box_shift = np.array([0, 0, 0]) #### simple pbc fix and residue truncation if required cg_residues = read_in.fix_pbc(cg_residues, box_vec_initial, box_vec, box_shift) #### checks if fragment database is correct at_mod.sanity_check(cg_residues) time_counter['r_i_t'] = time.time() system = {} ### convert protein to atomistic representation if 'PROTEIN' in cg_residues: p_system, backbone_coords, final_coordinates_atomistic, sequence = at_mod_p.build_protein_atomistic_system( cg_residues['PROTEIN'], box_vec) system['PROTEIN'] = p_system['PROTEIN'] time_counter['p_d_n_t'] = time.time() #### reads in user supplied atomistic structure if user_at_input and 'PROTEIN' in system: atomistic_protein_input = at_mod_p.read_in_atomistic( g_var.input_directory + 'AT_input.pdb', system['PROTEIN'], sequence, True) ## reads in user structure atomistic_protein_centered, cg_com = at_mod_p.center_atomistic( atomistic_protein_input, backbone_coords) ## centers each monomer by center of mass at_mod_p.rotate_protein_monomers( atomistic_protein_centered, final_coordinates_atomistic, backbone_coords, cg_com, box_vec) ## rigid fits each monomer #### minimise each protein chain gro.minimise_protein(system['PROTEIN'], p_system, user_at_input) #### read in minimised de novo protein chains and merges chains merge_de_novo = at_mod_p.read_in_protein_pdbs( system['PROTEIN'], g_var.working_dir + 'PROTEIN/min/PROTEIN_novo', '.pdb') at_mod_p.write_merged_pdb(merge_de_novo, '_novo', box_vec) #### runs steered MD on user supplied protein chains if user_at_input and 'PROTEIN' in system: print('\tRunning steered MD on input atomistic structure\n') #### runs steered MD on atomistic structure on CA and CB atoms for chain in range(system['PROTEIN']): gro.steered_md_atomistic_to_cg_coord(chain) #### read in minimised user supplied protein chains and merges chains merge_at_user = at_mod_p.read_in_protein_pdbs( system['PROTEIN'], g_var.working_dir + 'PROTEIN/steered_md/PROTEIN_at_rep_user_supplied', '.pdb') at_mod_p.write_merged_pdb(merge_at_user, '_at_rep_user_supplied', box_vec) merge_at_user_no_steer = at_mod_p.read_in_protein_pdbs( system['PROTEIN'], g_var.working_dir + 'PROTEIN/PROTEIN_at_rep_user_supplied', '_gmx.pdb') at_mod_p.write_merged_pdb(merge_at_user_no_steer, '_no_steered', box_vec) time_counter['f_p_t'] = time.time() #### converts non protein residues into atomistic (runs on all cores) if len([ key for value, key in enumerate(cg_residues) if key not in ['PROTEIN'] ]) > 0: np_system = {} pool = mp.Pool(mp.cpu_count()) pool_process = pool.starmap_async( at_mod_np.build_atomistic_system, [(cg_residues, residue_type, box_vec) for residue_type in [ key for value, key in enumerate(cg_residues) if key not in ['PROTEIN'] ]]).get() ## minimisation grompp parallised pool.close() for residue_type in pool_process: np_system.update(residue_type) #### minimises each residue separately print('\nThis may take some time....(probably time for a coffee)\n') for residue_type in [ key for value, key in enumerate(cg_residues) if key not in ['PROTEIN', 'ION'] ]: print('Minimising individual residues: ' + residue_type) gro.non_protein_minimise(np_system[residue_type], residue_type) at_mod_np.merge_minimised(residue_type, np_system, box_vec) print('Minimising merged: ' + residue_type) gro.minimise_merged(residue_type, np_system) system.update(np_system) time_counter['b_n_p_t'] = time.time() time_counter['n_p_t'] = time.time() #### creates merged folder print('\nMerging all residue types to single file. (Or possibly tea)\n') if len(system) > 0: #### make final topology in merged directory gro.write_merged_topol(system, '_novo') #### make minimisation directory gro.make_min('merged_cg2at') #### merges provided atomistic protein and residues types into a single pdb file into merged directory if user_at_input and 'PROTEIN' in system: at_mod.merge_system_pdbs(system, '_no_steered', cg_residues, box_vec) at_mod.merge_system_pdbs(system, '_at_rep_user_supplied', cg_residues, box_vec) gro.minimise_merged_pdbs(system, '_at_rep_user_supplied') if len(system) > 1 and g_var.alchembed: gro.alchembed(system['PROTEIN']) else: gen.file_copy_and_check( g_var.working_dir + 'MERGED/min/merged_cg2at_at_rep_user_supplied_minimised.pdb', g_var.final_dir + 'final_cg2at_at_rep_user_supplied.pdb') gen.file_copy_and_check( g_var.working_dir + 'MERGED/merged_cg2at_no_steered.pdb', g_var.final_dir + 'final_cg2at_no_steered.pdb') #### merges de novo protein and residues types into a single pdb file into merged directory at_mod.merge_system_pdbs(system, '_novo', cg_residues, box_vec) gro.minimise_merged_pdbs(system, '_novo') gen.file_copy_and_check('merged_cg2at_novo_minimised.pdb', g_var.final_dir + 'final_cg2at_de_novo.pdb') time_counter['m_t'] = time.time() #### copies all itp files and topologies from whereever they are stored for file_name in os.listdir(g_var.working_dir + 'MERGED'): if file_name.endswith('.itp') or file_name.endswith('final.top'): gen.file_copy_and_check( g_var.working_dir + 'MERGED/' + file_name, g_var.final_dir + file_name) if 'PROTEIN' in cg_residues: #### creates mdp file if user wants to pull the structure to initial input if not os.path.exists(g_var.final_dir + 'steered_md.mdp'): with open(g_var.final_dir + 'steered_md.mdp', 'w') as steered_md: steered_md.write( 'define = -DPOSRES\nintegrator = md\nnsteps = 3000\ndt = 0.001\ncontinuation = no\nconstraint_algorithm = lincs\n' ) steered_md.write( 'constraints = h-bonds\nns_type = grid\nnstlist = 25\nrlist = 1\nrcoulomb = 1\nrvdw = 1\ncoulombtype = PME\n' ) steered_md.write( 'pme_order = 4\nfourierspacing = 0.16\ntcoupl = V-rescale\ntc-grps = system\ntau_t = 0.1\nref_t = 310\npcoupl = no\n' ) steered_md.write( 'pbc = xyz\nDispCorr = no\ngen_vel = yes\ngen_temp = 310\ngen_seed = -1' ) #### calculates final RMS RMSD = {} de_novo_atoms = at_mod_p.read_in_atomistic( g_var.final_dir + 'final_cg2at_de_novo.pdb', system['PROTEIN'], sequence, False) RMSD['de novo '] = at_mod_p.RMSD_measure(de_novo_atoms, system, backbone_coords) if user_at_input and 'PROTEIN' in system: at_input_atoms = at_mod_p.read_in_atomistic( g_var.final_dir + 'final_cg2at_at_rep_user_supplied.pdb', system['PROTEIN'], sequence, False) RMSD['at input'] = at_mod_p.RMSD_measure(at_input_atoms, system, backbone_coords) print('\n{0:^10}{1:^25}{2:^10}'.format('output ', 'chain', 'RMSD (' + chr(197) + ')')) print('{0:^10}{1:^25}{2:^10}'.format('-------', '-----', '---------')) for rmsd in RMSD: for chain in RMSD[rmsd]: print('{0:^10}{1:^25}{2:^10}'.format(rmsd, str(chain), float(RMSD[rmsd][chain]))) #### removes temp file from script, anything with temp in really if g_var.clean: gen.clean(cg_residues) time_counter['f_t'] = time.time() #### prints out system information print('\n{:-<100}'.format('')) print('{0:^100}'.format('Script has completed, time for a beer')) print('\n{0:^20}{1:^10}'.format('molecules', 'number')) print('{0:^20}{1:^10}'.format('---------', '------')) for section in system: print('{0:^20}{1:^10}'.format(section, system[section])) #### prints out script timings for each section if g_var.v >= 1: gen.print_script_timings(time_counter, system, user_at_input)