def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] out_file = args[2] score_type = int(args[3]) #set up timer to figure out how long the code took to run t0=time() # Initialize Rosetta. init(extra_options='-mute basic -mute core -mute protocol -mute warn') # Constants PACK_RADIUS = 5 #Amino acids, notice there is no C AAs = ("A","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") #Number of mutations to accept max_accept_mut = 2000 #Population size N = 1 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,ChainA,ChainB,ChainC,InterfaceAB,InterfaceAC,"delta-delta-G",Probability,Generation\n'] initial_pose = pose_from_pdb(pdb_file) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap This is where you turn the bb and side chain flexibility on and off mm = MoveMap() mm.set_bb(False) #Get the init score of the struct to calc the threshold pre_pre_packing_score = sf(initial_pose) print(pre_pre_packing_score) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') cp_init_pdb = Pose() cp_init_pdb.assign(initial_pose) chains=cp_init_pdb.split_by_chain() #split up AB inter and AC inter initial_poseAB = Pose() initial_poseAB.assign(initial_pose) initial_poseAC = Pose() initial_poseAC.assign(initial_pose) init_chain_moverAB = SwitchChainOrderMover() init_chain_moverAB.chain_order("12") init_chain_moverAB.apply(initial_poseAB) init_chain_moverAC = SwitchChainOrderMover() init_chain_moverAC.chain_order("13") init_chain_moverAC.apply(initial_poseAC) #score the inital stabs of each chain wt_a=sf(chains[1]) wt_b=sf(chains[2]) wt_c=sf(chains[3]) #score the intial interfaces inter_AB=InterfaceEnergy_split(initial_poseAB) inter_AC=InterfaceEnergy_split(initial_poseAC) #init thresholds set to half of the init stabilities, if you want to do a different protein change these threshold_a=-138.41754752 threshold_b=-61.378619136 threshold_c=-61.378619136 threshold_inter_ab=-10.3726691079 threshold_inter_ac=-10.3726691079 data.append('WT,' + str(wt_a)+','+str(wt_b)+','+str(wt_c)+','+str(inter_AB)+','+str(inter_AC)+',0.0,0.0,0\n') #check the inital starting score init_score=score_all(initial_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) print(init_score) #number of residues to select from n_res = initial_pose.total_residue() print(n_res) #start sim i=0 gen=0 while i < max_accept_mut: #update the number of generations that have pased gen+=1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #mut_location = random.randint(1, 10) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while(res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to toname = res.name1() new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while(proposed_res == res.name1()): new_mut_key = random.randint(0,len(AAs)-1) proposed_res = AAs[new_mut_key] #init mutant with current mutant_pose = Pose() mutant_pose.assign(initial_pose) #mutate mutant_pose=mutate_residue_chain(mutant_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant mut_score=score_all(mutant_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type) #get the probability that the mutation will be accepted probability = calc_prob_scores(mut_score['score'], init_score['score'], N) rand = random.random() #test to see if mutation is accepted if float(rand) < float(probability): print "accepted" #make a name for the new mutant variant_name = str(toname) + str(initial_pose.pdb_info().number(mut_location)) + str(proposed_res) # Assuming some burn in phase, make this zero if you want to store everything if i>=0: #save name and energy change data.append(variant_name +',' + str(mut_score['a'])+','+str(mut_score['b'])+','+str(mut_score['c'])+','+str(mut_score['ab'])+','+str(mut_score['ac'])+',' + str(mut_score['score'] - init_score['score']) + "," + str(probability) + "," + str(gen) + "\n") #save the new accepted mutation pdb_name=str(i)+".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose init_score = mut_score #update number of accepts i+=1
def main(): parser = argparse.ArgumentParser() parser.add_argument('pdb_filename', action="store", type=str) parser.add_argument('replicate_number', action="store", type=int) inputs = parser.parse_args() #takes name of pdb file without the extention pdb_file = inputs.pdb_filename prot_name = pdb_file.split('/')[-1].split('.')[0] #set up timer to figure out how long the code took to run t0 = time() fasta_file = pdb_file.replace('/structures/', '/fastas/').replace('.pdb', '.fasta') records = list(SeqIO.parse(fasta_file, 'fasta')) assert len(records) == 1 wt_seq = str(records[0].seq) # Initialize Rosetta. #init(extra_options='-mute basic -mute core') init(extra_options= '-mute basic -mute core -rebuild_disulf false -detect_disulf false') ######################## # Constants ######################## PACK_RADIUS = 12.0 #Amino acids AAs = ("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") AAs_choice_dict = {} for aa in AAs: AAs_choice_dict[aa] = [other_aa for other_aa in AAs if other_aa != aa] #Number of mutations to accept max_accept_mut = 10 * len(wt_seq) #max_accept_mut = 2048 #Population size N = 1000 #Beta (temp term) beta = 1 #Fraction of the WT stability value to shoot for threshold_fraction = 0.5 ######################## ######################## #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load a clean pdb file initial_pose = pose_from_pdb(pdb_file) if '.clean' in pdb_file: pdb_file = ''.join(pdb_file.split('.clean')) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Threshold for selection threshold = post_pre_packing_score * threshold_fraction print 'threshold:', threshold data.append('WT,' + str(post_pre_packing_score) + ',0.0,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start evolution i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 #print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to #new_mut_key = random.randint(0,len(AAs)-1) #proposed_res = AAs[new_mut_key] proposed_res = random.choice(AAs_choice_dict[res.name1()]) #make the mutation mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") # if i == (max_accept_mut - 1): # final_pdb_name=pdb_file.replace('.pdb', '_thresh={}_Neff={}_beta={}_i={}_nmut={}.pdb'.format(threshold_fraction, N, beta, inputs.replicate_number, i)) # mutant_pose.dump_pdb(final_pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. output_filename = '../Results/{}/{}_thresh={}_Neff={}_beta={}_i={}.csv'.format( prot_name, prot_name, threshold_fraction, N, beta, inputs.replicate_number) with open(output_filename, "w") as outfile: outfile.writelines(data) print 'Data written to:', output_filename print 'program takes %f' % (t1 - t0)
def main(): #read in the file made by the forward sim args = sys.argv inputfile = args[1] data = open(inputfile) first_line = data.readlines()[1] var_line=first_line.split(',') start_stab=var_line[1] #the first entry in the file is the wild type structure, calc the threshold using this threshold=float(start_stab)+10 print(threshold) # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 0 #Population size N = 100 #Beta (temp term) beta = .6 #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') #Prepare data headers data = ['pdbfile_target,pdbfile_used,step,RevertTo,Change,Pos,From,OrgScore,RevScore,Change,Prob\n'] # Get the reversions file, the output file the score_mutant_pdb has made variant_scores=open(inputfile) #get just the mutation we want to revert to lines= variant_scores.readlines() var_line=lines[500] #gets the Nth line how ever long you want the burn to be print "staring here", var_line var_line=var_line.split(',')[0] var_loc=int(filter(str.isdigit, var_line)) var_rev=var_line[:1] gen=1 #get all the pdb files sort_list=sorted(glob.glob('*[0-9].pdb'), key=numericalSort) sort_list=sort_list[-1016:] #include the last 1000 and some pdbs, the 16 is because we want the ones that happened before the 500th mutation too. for i in range(1,len(sort_list)-30): step=-15 #calc reversion for next 15 moves for infile in sort_list[i:i+31]: #for each mutation var_line=lines[gen+500] #gets the Nth line how ever long you want the burn to be var_line=var_line.split(',')[0] print(var_line) var_loc=int(filter(str.isdigit, var_line)) var_rev="" old="" if(step<0): var_rev=var_line[len(var_line)-1:len(var_line)] old=var_line[:1] else: var_rev=var_line[:1] old=var_line[len(var_line)-1:len(var_line)] print "Current File Being Processed is: " + infile print "revering to:", var_rev print "at:", var_loc #get the pdb you want to revert and make the reversion initial_pose = pose_from_pdb(infile) mutant_pose = mutate_residue(initial_pose, var_loc , var_rev, PACK_RADIUS, sf) #repack mut task1 = standard_packer_task(mutant_pose) task1.restrict_to_repacking() task1.or_include_current(True) packer_rotamers_mover1 = RotamerTrialsMover(sf,task1) packer_rotamers_mover1.apply(mutant_pose) #repack init task2 = standard_packer_task(initial_pose) task2.restrict_to_repacking() task2.or_include_current(True) pack_rotamers_mover2 = RotamerTrialsMover(sf, task2) pack_rotamers_mover2.apply(initial_pose) #apply min mover min_mover.apply(mutant_pose) min_mover.apply(initial_pose) #get scores variant_score = sf(mutant_pose) initial_score = sf(initial_pose) #get prob probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold) print(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(variant _score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n") data.append(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(v ariant_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n") step=step+1 gen+=1 print '\nDONE' data_filename = 'premutate_rep1_bb_T_ch_T.csv' with open(data_filename, "w") as f: f.writelines(data)
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 1500 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() #change these for more or less flexability mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) #Set threshold for selection threshold = pre_pre_packing_score / 2 data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n') #number of residues to select from n_res = initial_pose.total_residue() #start sim i = 0 gen = 0 while i < max_accept_mut: #update the number of generations that have pased gen += 1 print 'accepts:', i #pick a place to mutate mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #don't mess with C, just choose again while (res.name1() == 'C'): mut_location = random.randint(1, n_res) #get the amino acid at that position res = initial_pose.residue(mut_location) #choose the amino acid to mutate to new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #don't bother mutating to the same amino acid it just takes more time while (proposed_res == res.name1()): new_mut_key = random.randint(0, len(AAs) - 1) proposed_res = AAs[new_mut_key] #make the mutation #this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it. mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res, PACK_RADIUS, sf) #score mutant variant_score = sf(mutant_pose) #get the probability that the mutation will be accepted probability = calc_prob_mh(variant_score, post_pre_packing_score, N, beta, threshold) #test to see if mutation is accepted if random.random() < probability: #create a name for the mutant if its going to be kept variant_name = res.name1() + str(initial_pose.pdb_info().number( mut_location)) + str(proposed_res) # Assuming 1000 burn in phase, take this if out if you want to store everything if i > 1000: #save name and energy change data.append(variant_name + "," + str(variant_score) + "," + str(variant_score - post_pre_packing_score) + "," + str(probability) + "," + str(gen) + "\n") pdb_name = str(i) + ".pdb" mutant_pose.dump_pdb(pdb_name) #update the wildtype initial_pose = mutant_pose post_pre_packing_score = variant_score #update number of accepts i += 1 print '\nMutations and scoring complete.' t1 = time() # Output results. data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename print 'program takes %f' % (t1 - t0)
# Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) # Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('linmin') if args.minimize: min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) print print 'Reference Protein:', args.pdb_filename print ' Score:' print ' Before pre-packing:', pre_pre_packing_score print ' After pre-packing:', post_pre_packing_score print data.append('WT,' + str(post_pre_packing_score) + ',0.0\n')
def main(): #takes name of pdb file without the extention args = sys.argv pdb_file = args[1] #set up timer to figure out how long the code took to run t0 = time() # Initialize Rosetta. init(extra_options='-mute basic -mute core') # Constants PACK_RADIUS = 10.0 #Amino acids, notice there is no C AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y") #Number of mutations to accept max_accept_mut = 5000 #Population size N = 100 #Beta (temp term) beta = 1 #Prepare data headers data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n'] #Load and clean up pdb file name = pdb_file + ".pdb" cleanATOM(name) clean_name = pdb_file + ".clean.pdb" initial_pose = pose_from_pdb(clean_name) #Set up ScoreFunction sf = get_fa_scorefxn() #Set up MoveMap. mm = MoveMap() mm.set_bb(True) mm.set_chi(True) #Pack and minimize initial pose to remove clashes. pre_pre_packing_score = sf(initial_pose) task = standard_packer_task(initial_pose) task.restrict_to_repacking() task.or_include_current(True) pack_rotamers_mover = RotamerTrialsMover(sf, task) pack_rotamers_mover.apply(initial_pose) min_mover = MinMover() min_mover.movemap(mm) min_mover.score_function(sf) min_mover.min_type('dfpmin_armijo_nonmonotone') min_mover.apply(initial_pose) post_pre_packing_score = sf(initial_pose) pdb_name = str(pdb_file) + "_min.pdb" initial_pose.dump_pdb(pdb_name) #Set threshold for selection #threshold = post_pre_packing_score/2 #threshold = post_pre_packing_score data.append(str(pdb_file) + str(post_pre_packing_score) + ',0.0,0.0,0\n') data_filename = pdb_file + '.score' with open(data_filename, "w") as f: f.writelines(data) print 'Data written to:', data_filename '''