示例#1
0
def main():
    #takes name of pdb file without the extention
    args =  sys.argv	
    pdb_file = args[1]
    out_file = args[2]
    score_type = int(args[3])
    #set up timer to figure out how long the code took to run
    t0=time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core -mute protocol -mute warn')

    # Constants
    PACK_RADIUS = 5
    #Amino acids, notice there is no C
    AAs = ("A","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
    #Number of mutations to accept
    max_accept_mut = 2000
    #Population size
    N = 1
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,ChainA,ChainB,ChainC,InterfaceAB,InterfaceAC,"delta-delta-G",Probability,Generation\n']

    initial_pose = pose_from_pdb(pdb_file)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()
       
    #Set up MoveMap This is where you turn the bb and side chain flexibility on and off
    mm = MoveMap()
    mm.set_bb(False)

    #Get the init score of the struct to calc the threshold
    pre_pre_packing_score = sf(initial_pose)
    print(pre_pre_packing_score)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')

    cp_init_pdb = Pose()
    cp_init_pdb.assign(initial_pose)
    chains=cp_init_pdb.split_by_chain()

    #split up AB inter and AC inter 
    initial_poseAB = Pose()
    initial_poseAB.assign(initial_pose)
    initial_poseAC = Pose()
    initial_poseAC.assign(initial_pose)

    init_chain_moverAB = SwitchChainOrderMover()
    init_chain_moverAB.chain_order("12")
    init_chain_moverAB.apply(initial_poseAB)

    init_chain_moverAC = SwitchChainOrderMover()
    init_chain_moverAC.chain_order("13")
    init_chain_moverAC.apply(initial_poseAC)

    #score the inital stabs of each chain
    wt_a=sf(chains[1])

    wt_b=sf(chains[2])

    wt_c=sf(chains[3])

    #score the intial interfaces 
    inter_AB=InterfaceEnergy_split(initial_poseAB)

    inter_AC=InterfaceEnergy_split(initial_poseAC)

    #init thresholds set to half of the init stabilities, if you want to do a different protein change these
    threshold_a=-138.41754752
    threshold_b=-61.378619136
    threshold_c=-61.378619136
    threshold_inter_ab=-10.3726691079
    threshold_inter_ac=-10.3726691079

    data.append('WT,' + str(wt_a)+','+str(wt_b)+','+str(wt_c)+','+str(inter_AB)+','+str(inter_AC)+',0.0,0.0,0\n')

	#check the inital starting score
    init_score=score_all(initial_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type)
    print(init_score)

    #number of residues to select from
    n_res = initial_pose.total_residue()
    print(n_res)
  
    #start sim
    i=0
    gen=0
    while i < max_accept_mut:
            #update the number of generations that have pased
            gen+=1

	    print 'accepts:', i 

	    #pick a place to mutate
	    mut_location = random.randint(1, n_res)
	    #mut_location = random.randint(1, 10)

	    #get the amino acid at that position
	    res = initial_pose.residue(mut_location)

	    #don't mess with C, just choose again
	    while(res.name1() == 'C'):
			mut_location = random.randint(1, n_res)
	    	#get the amino acid at that position
	    	res = initial_pose.residue(mut_location)


	    #choose the amino acid to mutate to
	    toname = res.name1()
	    new_mut_key = random.randint(0,len(AAs)-1)
	    proposed_res = AAs[new_mut_key]
	  
	    #don't bother mutating to the same amino acid it just takes more time
	    while(proposed_res == res.name1()):
			new_mut_key = random.randint(0,len(AAs)-1)
	        proposed_res = AAs[new_mut_key]

	    #init mutant with current 
	    mutant_pose = Pose()
	    mutant_pose.assign(initial_pose)
		
		#mutate 
	    mutant_pose=mutate_residue_chain(mutant_pose, mut_location, proposed_res, PACK_RADIUS, sf)
		
	    #score mutant
	     mut_score=score_all(mutant_pose,sf,min_mover,beta,threshold_a, threshold_b, threshold_c,threshold_inter_ab,threshold_inter_ac,score_type)

	    #get the probability that the mutation will be accepted
	    probability = calc_prob_scores(mut_score['score'], init_score['score'], N)
		
	    rand = random.random()

	    #test to see if mutation is accepted
	    if float(rand) < float(probability):
			print "accepted" 	
		
			#make a name for the new mutant
			variant_name = str(toname) + str(initial_pose.pdb_info().number(mut_location)) + str(proposed_res)


			# Assuming some burn in phase, make this zero if you want to store everything
			if i>=0:
				#save name and energy change
				data.append(variant_name +',' + str(mut_score['a'])+','+str(mut_score['b'])+','+str(mut_score['c'])+','+str(mut_score['ab'])+','+str(mut_score['ac'])+',' + str(mut_score['score'] - init_score['score']) + "," + str(probability) + "," + str(gen) + "\n")

				#save the new accepted mutation	
				pdb_name=str(i)+".pdb"	
				mutant_pose.dump_pdb(pdb_name)

			#update the wildtype 
			initial_pose = mutant_pose
			init_score = mut_score

			#update number of accepts
	    	i+=1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('pdb_filename', action="store", type=str)
    parser.add_argument('replicate_number', action="store", type=int)

    inputs = parser.parse_args()
    #takes name of pdb file without the extention
    pdb_file = inputs.pdb_filename
    prot_name = pdb_file.split('/')[-1].split('.')[0]
    #set up timer to figure out how long the code took to run
    t0 = time()
    fasta_file = pdb_file.replace('/structures/',
                                  '/fastas/').replace('.pdb', '.fasta')
    records = list(SeqIO.parse(fasta_file, 'fasta'))
    assert len(records) == 1
    wt_seq = str(records[0].seq)

    # Initialize Rosetta.
    #init(extra_options='-mute basic -mute core')
    init(extra_options=
         '-mute basic -mute core -rebuild_disulf false -detect_disulf false')

    ########################
    # Constants
    ########################
    PACK_RADIUS = 12.0
    #Amino acids
    AAs = ("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P",
           "Q", "R", "S", "T", "V", "W", "Y")
    AAs_choice_dict = {}
    for aa in AAs:
        AAs_choice_dict[aa] = [other_aa for other_aa in AAs if other_aa != aa]
    #Number of mutations to accept
    max_accept_mut = 10 * len(wt_seq)
    #max_accept_mut = 2048

    #Population size
    N = 1000
    #Beta (temp term)
    beta = 1
    #Fraction of the WT stability value to shoot for
    threshold_fraction = 0.5
    ########################
    ########################

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load a clean pdb file
    initial_pose = pose_from_pdb(pdb_file)
    if '.clean' in pdb_file:
        pdb_file = ''.join(pdb_file.split('.clean'))

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    #Threshold for selection
    threshold = post_pre_packing_score * threshold_fraction
    print 'threshold:', threshold

    data.append('WT,' + str(post_pre_packing_score) + ',0.0,0.0,0\n')

    #number of residues to select from
    n_res = initial_pose.total_residue()

    #start evolution
    i = 0
    gen = 0
    while i < max_accept_mut:

        #update the number of generations that have pased
        gen += 1

        #print 'accepts:', i

        #pick a place to mutate
        mut_location = random.randint(1, n_res)

        #get the amino acid at that position
        res = initial_pose.residue(mut_location)

        #choose the amino acid to mutate to
        #new_mut_key = random.randint(0,len(AAs)-1)
        #proposed_res = AAs[new_mut_key]
        proposed_res = random.choice(AAs_choice_dict[res.name1()])

        #make the mutation
        mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res,
                                     PACK_RADIUS, sf)

        #score mutant
        variant_score = sf(mutant_pose)

        #get the probability that the mutation will be accepted
        probability = calc_prob_mh(variant_score, post_pre_packing_score, N,
                                   beta, threshold)

        #test to see if mutation is accepted
        if random.random() < probability:

            #create a name for the mutant if its going to be kept
            variant_name = res.name1() + str(initial_pose.pdb_info().number(
                mut_location)) + str(proposed_res)

            #save name and energy change
            data.append(variant_name + "," + str(variant_score) + "," +
                        str(variant_score - post_pre_packing_score) + "," +
                        str(probability) + "," + str(gen) + "\n")

            #            if i == (max_accept_mut - 1):
            #                final_pdb_name=pdb_file.replace('.pdb', '_thresh={}_Neff={}_beta={}_i={}_nmut={}.pdb'.format(threshold_fraction, N, beta, inputs.replicate_number, i))
            #                mutant_pose.dump_pdb(final_pdb_name)

            #update the wildtype
            initial_pose = mutant_pose
            post_pre_packing_score = variant_score

            #update number of accepts
            i += 1

    print '\nMutations and scoring complete.'
    t1 = time()
    # Output results.
    output_filename = '../Results/{}/{}_thresh={}_Neff={}_beta={}_i={}.csv'.format(
        prot_name, prot_name, threshold_fraction, N, beta,
        inputs.replicate_number)
    with open(output_filename, "w") as outfile:
        outfile.writelines(data)

    print 'Data written to:', output_filename
    print 'program takes %f' % (t1 - t0)
示例#3
0
def main():
    #read in the file made by the forward sim
    args = sys.argv
    inputfile = args[1]
    data = open(inputfile)
    first_line = data.readlines()[1]
    var_line=first_line.split(',')
    start_stab=var_line[1]

    #the first entry in the file is the wild type structure, calc the threshold using this
    threshold=float(start_stab)+10
    print(threshold)
    
    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 0
    #Population size
    N = 100
    #Beta (temp term)
    beta = .6
  
    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')

    #Prepare data headers
    data = ['pdbfile_target,pdbfile_used,step,RevertTo,Change,Pos,From,OrgScore,RevScore,Change,Prob\n']

    # Get the reversions file, the output file the score_mutant_pdb has made
    variant_scores=open(inputfile)

    #get just the mutation we want to revert to
    lines= variant_scores.readlines()
    var_line=lines[500] #gets the Nth line how ever long you want the burn to be
    print "staring here", var_line  
    var_line=var_line.split(',')[0]
  
    var_loc=int(filter(str.isdigit, var_line))
    var_rev=var_line[:1]

    gen=1
    #get all the pdb files
    sort_list=sorted(glob.glob('*[0-9].pdb'), key=numericalSort)
    sort_list=sort_list[-1016:] #include the last 1000 and some pdbs, the 16 is because we want the ones that happened before the 500th mutation too. 

  
    for i in range(1,len(sort_list)-30):
      step=-15
      #calc reversion for next 15 moves
      for infile in sort_list[i:i+31]:

	#for each mutation	
        var_line=lines[gen+500] #gets the Nth line how ever long you want the burn to be
        var_line=var_line.split(',')[0]
	print(var_line)
        var_loc=int(filter(str.isdigit, var_line))
	var_rev=""
	old=""
	if(step<0):
        	var_rev=var_line[len(var_line)-1:len(var_line)]
		old=var_line[:1]
	
	else:
		var_rev=var_line[:1]
		old=var_line[len(var_line)-1:len(var_line)]

      	print "Current File Being Processed is: " + infile
        print "revering to:", var_rev
        print "at:", var_loc

	#get the pdb you want to revert and make the reversion
        initial_pose = pose_from_pdb(infile)
        mutant_pose = mutate_residue(initial_pose, var_loc , var_rev, PACK_RADIUS, sf)

	#repack mut
        task1 = standard_packer_task(mutant_pose)
	task1.restrict_to_repacking()
        task1.or_include_current(True)
        packer_rotamers_mover1 = RotamerTrialsMover(sf,task1)
	packer_rotamers_mover1.apply(mutant_pose)

	#repack init
        task2 = standard_packer_task(initial_pose)
	task2.restrict_to_repacking()
	task2.or_include_current(True)
	pack_rotamers_mover2 = RotamerTrialsMover(sf, task2)
	pack_rotamers_mover2.apply(initial_pose)

	#apply min mover
	min_mover.apply(mutant_pose)
	min_mover.apply(initial_pose)
	
	#get scores    
	variant_score = sf(mutant_pose)
        initial_score = sf(initial_pose)

	#get prob
        probability = calc_prob_mh(variant_score, initial_score, N, beta, threshold)

	print(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(variant
_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n")
      	data.append(str(gen+499)+".pdb"+","+str(infile)+","+str(step)+","+ str(var_line) + ","+str(var_rev)+","+str(var_loc)+","+str(old)+"," +str(initial_score) + "," + str(v
ariant_score) + "," + str(variant_score - initial_score)+ ","+ str(probability)+ "\n")
	step=step+1
      gen+=1

    print '\nDONE'

    data_filename = 'premutate_rep1_bb_T_ch_T.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)
示例#4
0
def main():
    #takes name of pdb file without the extention
    args = sys.argv
    pdb_file = args[1]
    #set up timer to figure out how long the code took to run
    t0 = time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Amino acids, notice there is no C
    AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q",
           "R", "S", "T", "V", "W", "Y")
    #Number of mutations to accept
    max_accept_mut = 1500
    #Population size
    N = 100
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load and clean up pdb file
    name = pdb_file + ".pdb"
    cleanATOM(name)
    clean_name = pdb_file + ".clean.pdb"
    initial_pose = pose_from_pdb(clean_name)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    #change these for more or less flexability
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    #Set threshold for selection
    threshold = pre_pre_packing_score / 2

    data.append('WT,' + str(post_pre_packing_score) + ',0.0 ,0.0,0\n')

    #number of residues to select from
    n_res = initial_pose.total_residue()

    #start sim
    i = 0
    gen = 0
    while i < max_accept_mut:
        #update the number of generations that have pased
        gen += 1

        print 'accepts:', i

        #pick a place to mutate
        mut_location = random.randint(1, n_res)

        #get the amino acid at that position
        res = initial_pose.residue(mut_location)

        #don't mess with C, just choose again
        while (res.name1() == 'C'):
            mut_location = random.randint(1, n_res)
            #get the amino acid at that position
            res = initial_pose.residue(mut_location)

#choose the amino acid to mutate to
        new_mut_key = random.randint(0, len(AAs) - 1)

        proposed_res = AAs[new_mut_key]

        #don't bother mutating to the same amino acid it just takes more time
        while (proposed_res == res.name1()):
            new_mut_key = random.randint(0, len(AAs) - 1)
            proposed_res = AAs[new_mut_key]

#make the mutation
#this is actually a really bad model, and probably shouldnt be used. In new version is repack the whole thing, then reminimize, I should also backrub it.
        mutant_pose = mutate_residue(initial_pose, mut_location, proposed_res,
                                     PACK_RADIUS, sf)

        #score mutant
        variant_score = sf(mutant_pose)

        #get the probability that the mutation will be accepted
        probability = calc_prob_mh(variant_score, post_pre_packing_score, N,
                                   beta, threshold)

        #test to see if mutation is accepted
        if random.random() < probability:

            #create a name for the mutant if its going to be kept
            variant_name = res.name1() + str(initial_pose.pdb_info().number(
                mut_location)) + str(proposed_res)

            # Assuming 1000 burn in phase, take this if out if you want to store everything
            if i > 1000:
                #save name and energy change
                data.append(variant_name + "," + str(variant_score) + "," +
                            str(variant_score - post_pre_packing_score) + "," +
                            str(probability) + "," + str(gen) + "\n")

                pdb_name = str(i) + ".pdb"
                mutant_pose.dump_pdb(pdb_name)

            #update the wildtype
            initial_pose = mutant_pose
            post_pre_packing_score = variant_score

            #update number of accepts
            i += 1

    print '\nMutations and scoring complete.'
    t1 = time()
    # Output results.
    data_filename = pdb_file[:-5] + 'mh_1500_rep3.csv'
    with open(data_filename, "w") as f:
        f.writelines(data)

    print 'Data written to:', data_filename
    print 'program takes %f' % (t1 - t0)
示例#5
0
mm = MoveMap()
mm.set_bb(True)
mm.set_chi(True)

# Pack and minimize initial pose to remove clashes.
pre_pre_packing_score = sf(initial_pose)

task = standard_packer_task(initial_pose)
task.restrict_to_repacking()
task.or_include_current(True)
pack_rotamers_mover = RotamerTrialsMover(sf, task)
pack_rotamers_mover.apply(initial_pose)

min_mover = MinMover()
min_mover.movemap(mm)
min_mover.score_function(sf)
min_mover.min_type('linmin')
if args.minimize:
    min_mover.apply(initial_pose)

post_pre_packing_score = sf(initial_pose)

print
print 'Reference Protein:', args.pdb_filename
print '  Score:'
print '    Before pre-packing:', pre_pre_packing_score
print '    After pre-packing:', post_pre_packing_score
print

data.append('WT,' + str(post_pre_packing_score) + ',0.0\n')
示例#6
0
def main():
    #takes name of pdb file without the extention
    args = sys.argv
    pdb_file = args[1]
    #set up timer to figure out how long the code took to run
    t0 = time()

    # Initialize Rosetta.
    init(extra_options='-mute basic -mute core')

    # Constants
    PACK_RADIUS = 10.0
    #Amino acids, notice there is no C
    AAs = ("A", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q",
           "R", "S", "T", "V", "W", "Y")
    #Number of mutations to accept
    max_accept_mut = 5000
    #Population size
    N = 100
    #Beta (temp term)
    beta = 1

    #Prepare data headers
    data = ['Variant,Rosetta Score,"delta-delta-G",Probability,Generation\n']

    #Load and clean up pdb file
    name = pdb_file + ".pdb"
    cleanATOM(name)
    clean_name = pdb_file + ".clean.pdb"
    initial_pose = pose_from_pdb(clean_name)

    #Set up ScoreFunction
    sf = get_fa_scorefxn()

    #Set up MoveMap.
    mm = MoveMap()
    mm.set_bb(True)
    mm.set_chi(True)

    #Pack and minimize initial pose to remove clashes.
    pre_pre_packing_score = sf(initial_pose)

    task = standard_packer_task(initial_pose)
    task.restrict_to_repacking()
    task.or_include_current(True)
    pack_rotamers_mover = RotamerTrialsMover(sf, task)
    pack_rotamers_mover.apply(initial_pose)

    min_mover = MinMover()
    min_mover.movemap(mm)
    min_mover.score_function(sf)
    min_mover.min_type('dfpmin_armijo_nonmonotone')
    min_mover.apply(initial_pose)

    post_pre_packing_score = sf(initial_pose)

    pdb_name = str(pdb_file) + "_min.pdb"
    initial_pose.dump_pdb(pdb_name)

    #Set threshold for selection
    #threshold = post_pre_packing_score/2
    #threshold = post_pre_packing_score

    data.append(str(pdb_file) + str(post_pre_packing_score) + ',0.0,0.0,0\n')

    data_filename = pdb_file + '.score'
    with open(data_filename, "w") as f:
        f.writelines(data)

    print 'Data written to:', data_filename
    '''