def certify_pdb_pdb(pdbfile1, pdbfile2): fileexist = 0 value = 0 try: fileexist1 = os.path.isfile(pdbfile1) fileexist2 = os.path.isfile(pdbfile2) if (fileexist1 and fileexist2): fileexist = 1 pdbmol1 = sasmol.SasMol(0) pdbmol2 = sasmol.SasMol(1) try: pdbmol1.read_pdb(pdbfile1, fastread=True) pdbmol2.read_pdb(pdbfile2, fastread=True) name1 = pdbmol1.name() name2 = pdbmol2.name() if (name1 == name2): value = 1 except: value = 0 else: return fileexist, value except: value = 0 return fileexist, value
def replace_N_atoms(): ''' swap N9-N1 atoms in rename DNA residues GUA or ADE N1 -> N9 CYT or THY N1 -> N9 ''' dna1_file = '../dna1_right_seq.pdb' dna1_out = '../dna1_right.pdb' dna1 = sasmol.SasMol(0) dna1.read_pdb(dna1_file) dna2_file = '../dna2_right_seq.pdb' dna2_out = '../dna2_right.pdb' dna2 = sasmol.SasMol(0) dna2.read_pdb(dna2_file) ''' Count the number of resids for each resid Loop over each resid with <13 atoms . Use enumerate to get the indices for the atoms the residue . Iterate over those indices to find the N1 or N9 atom . Depending on which base type it is, replace the atom name Store the names in the Sasmol object Save the pdb Loop over every atom, add 1 to the number of some in that residue (can use count instead) Store the index for the n9 and n1 atoms Then loop over just the group s to for n atoms ''' replace_n1_n9(dna1, dna1_out) replace_n1_n9(dna2, dna2_out) return
def certify_pdb_dcd(pdbfile, dcdfile): ''' This method checks that the number of atoms in the pdb file is equal to the number of atoms in the dcd file. The method assumes that the pdb and dcd files exist and are readable. ''' value = 0 try: pdbmol = sasmol.SasMol(0) dcdmol = sasmol.SasMol(1) pdbmol.read_pdb(pdbfile, fastread=True) natoms_pdb = pdbmol.natoms() dcdfile = dcdmol.open_dcd_read(dcdfile) natoms_dcd = dcdfile[1] if (natoms_pdb == natoms_dcd): value = 1 except: value = 0 return value
def split_dcd(inputs): import sassie.sasmol.sasmol as sasmol import numpy as np inputs.out_dir = inputs.runname + '/crysol' if os.path.exists(inputs.out_dir): print 'WARNING: run folder exists (%s), moving it\n' % inputs.out_dir append_bk(inputs.out_dir) # print 'select one of the following (0/1/2): quit / move / replace' # folder = folder_exists() # folder.runname = inputs.out_dir # result = folder.cmdloop() else: print 'created new run folder: %s' % inputs.out_dir mkdir_p(inputs.out_dir) mol = sasmol.SasMol(0) mol.read_pdb(inputs.pdb) # mol.read_dcd(inputs.dcd) dcd_file = mol.open_dcd_read(inputs.dcd) total_frames = dcd_file[2] n_atoms = dcd_file[1] copy_mask = np.ones(n_atoms, dtype=np.int32) if inputs.ncpu < 0: print 'ncpu: %d < 0, using |%d| = %d instead' % ( inputs.ncpu, inputs.ncpu, abs(inputs.ncpu)) inputs.ncpu = abs(inputs.ncpu) n_frames_sub = total_frames / inputs.ncpu last_frame = 0 sub_dirs = [] dcd_file_names = [] for cpu in xrange(1, inputs.ncpu + 1): sub_dir = inputs.out_dir + '/sub' + str(cpu).zfill(2) + '/' sub_dirs.append(sub_dir) mkdir_p(sub_dir) os.system('cp %s %s' % (inputs.pdb, sub_dir)) sub_mol = sasmol.SasMol(0) mol.copy_molecule_using_mask(sub_mol, copy_mask, 0) with cd(sub_dir): if cpu == inputs.ncpu: n_frames_sub = n_frames_sub + total_frames % inputs.ncpu dcd_out_name = 'sub' + str(cpu).zfill(2) + '.dcd' dcd_file_names.append(dcd_out_name) first = last_frame last = last_frame + n_frames_sub dcd_out_file = sub_mol.open_dcd_write(dcd_out_name) for (i, frame) in enumerate(xrange(first, last)): sub_mol.read_dcd_step(dcd_file, frame) sub_mol.write_dcd_step(dcd_out_file, 0, i + 1) sub_mol.close_dcd_write(dcd_out_file) del sub_mol last_frame += n_frames_sub print return sub_dirs, dcd_file_names
def get_pdb_complex_stats(filename, segname, variables): value = 0 try: o = sasmol.SasMol(0) o.read_pdb(filename, fastread=True) seg_filter = 'segname[i] == "' + segname.strip() + '"' error, seg_mask = o.get_subset_mask(seg_filter) a = sasmol.SasMol(1) error = o.copy_molecule_using_mask(a, seg_mask, 0) result = [] try: for i in xrange(len(variables)): if (variables[i] == 'atom'): result.append(a.atom()) elif (variables[i] == 'index'): result.append(a.index()) elif (variables[i] == 'name'): result.append(a.name()) elif (variables[i] == 'loc'): result.append(a.loc()) elif (variables[i] == 'resname'): result.append(a.resname()) elif (variables[i] == 'chain'): result.append(a.chain()) elif (variables[i] == 'resid'): result.append(a.resid()) elif (variables[i] == 'rescode'): result.append(a.rescode()) elif (variables[i] == 'x'): result.append(coor[0, :, 0]()) elif (variables[i] == 'y'): result.append(coor[0, :, 1]()) elif (variables[i] == 'z'): result.append(coor[0, :, 2]()) elif (variables[i] == 'occupancy'): result.append(a.occupancy()) elif (variables[i] == 'beta'): result.append(a.beta()) elif (variables[i] == 'segname'): result.append(a.segname()) elif (variables[i] == 'element'): result.append(a.element()) elif (variables[i] == 'charge'): result.append(a.charge()) elif (variables[i] == 'moltype'): result.append(a.moltype()) value = 1 except: value = 0 result = None except: value = 0 result = None return value, result
def align_mol(inputs): ''' input: ------ intputs: object that should contain the following attributes aa_goal: goal sasmol object aa_move: sasmol object to align goal_basis: goal basis for alignment move_basis: move basis for alignment returns: -------- out: aligned sasmol object note: inputs.ref and inputs.move are typically the same pdb/dcd ''' aa_goal = inputs.aa_goal aa_move = inputs.aa_move goal_basis = inputs.goal_basis move_basis = inputs.move_basis # create the SasMol objects sub_goal = sasmol.SasMol(0) sub_move = sasmol.SasMol(0) error, goal_seg_mask = aa_goal.get_subset_mask(goal_basis) error, move_seg_mask = aa_move.get_subset_mask(move_basis) error = aa_goal.copy_molecule_using_mask(sub_goal, goal_seg_mask, 0) error = aa_move.copy_molecule_using_mask(sub_move, move_seg_mask, 0) # calculate the center of mass of the subset of m1 com_sub_goal = sub_goal.calccom(0) sub_goal.center(0) # center the m1 coordinates # get the m1 centered coordinates coor_sub_goal = sub_goal.coor()[0] aa_move.center(0) # move m2 to be centered at the origin error, sub_move.coor = aa_move.get_coor_using_mask(0, move_seg_mask) sub_move.setCoor(sub_move.coor) # calculate the center of mass of the subset of m2 com_sub_move = sub_move.calccom(0) # move the subset of m2 to be centered at the origin sub_move.center(0) # get the new coordinates of the subset of m2 coor_sub_move = sub_move.coor[0] # align m2 using the transformation from sub_m2 to sub_m1 aa_move.align(0, coor_sub_move, com_sub_move, coor_sub_goal, com_sub_goal)
def certify_dcd_psf(dcdfile, psffile): ''' This method checks that the number of atoms in the psf file is equal to the number of atoms in the dcd file. The method assumes that the psf and dcd files exist and are readable. ''' fileexist = 0 value = 0 try: fileexist = os.path.isfile(psffile) if (fileexist): fileexist = 1 try: natoms_psf, names_psf = read_psf_file(psffile) dcdmol = sasmol.SasMol(1) dcdfile = dcdmol.open_dcd_read(dcdfile) natoms_dcd = dcdfile[1] if (natoms_psf == natoms_dcd): value = 1 except: value = 0 else: return fileexist, value except: value = 0 return fileexist, value
def make_complex_groups(hybrid, residues_in_groups, segnames_in_groups): assert len(residues_in_groups) == len(segnames_in_groups), ( 'inputs do not match') frame = 0 resid = hybrid.resid() group_masks = [] groups = [] for (i, res_group) in enumerate(residues_in_groups): basis = '( ' for (j, resids) in enumerate(res_group): if j > 0: basis += ' or ' for (k, resid) in enumerate(resids): if k == 0: basis += '((segname[i] == "' + segnames_in_groups[i][j] + \ '") and (resid[i] == ' + str(resid) else: basis += ' or resid[i] == ' + str(resid) basis += ')) ' basis += ')' print '>> creating basis = ', basis error, mask = hybrid.get_subset_mask(basis) group_masks.append(mask) this_group = sasmol.SasMol(0) error = hybrid.copy_molecule_using_mask(this_group, mask, frame) groups.append(this_group) return groups, group_masks
def certify_pdb_psf(pdbfile, psffile): fileexist = 0 value = 0 try: fileexist = os.path.isfile(psffile) if (fileexist): fileexist = 1 try: natoms_psf, names_psf = read_psf_file(psffile) pdbmol = sasmol.SasMol(1) pdbmol.read_pdb(pdbfile, fastread=True) natoms_pdb = pdbmol.natoms() names_pdb = pdbmol.name() # if((natoms_pdb == natoms_psf) and (names_pdb == names_psf)): if ((natoms_pdb == natoms_psf)): value = 1 except: value = 0 else: return fileexist, value except: value = 0 return fileexist, value
def makeLongDNA(n_lp): print 'making DNA that is %d*lp long' % n_lp # 15 bp/bead or 51 A/bead (3.4 A/bp) lp = 530 # persistence length in A l = 2**(1. / 6.) * 46 # separation distance between beads = 51.6A longDNA = sasmol.SasMol(0) L = n_lp * lp N = int(L / l) natoms = N + 1 print 'natoms = ', natoms longDNA._L = L longDNA._natoms = natoms # initialize the long DNA coordinates longCoor = np.zeros((1, natoms, 3), np.float) # set the z-values to the index of the array longCoor[0][:, 2] = range(natoms) # scale the z-values to the right seperation longCoor *= l # print longCoor[-5:] longDNA.setCoor(longCoor) longDNA.setElement(['C'] * natoms) vecXYZ = np.zeros((natoms * 3, 3)) vecXYZ[0:natoms] = [1, 0, 0] vecXYZ[natoms:2 * natoms] = [0, 1, 0] vecXYZ[2 * natoms:3 * natoms] = [0, 0, 1] # n = L/l # number of times need to repeat the grain # print '(l, L, n)', (l, L, n) return (longDNA, vecXYZ)
def get_coords_from_argon(argon_file_name): argon = sasmol.SasMol(0) argon.read_pdb(argon_file_name) com_coor = argon.coor()[0] return com_coor
def pdb_get_sequence(pdbobj=None, outfile=None): ''' get the sequence of a sasmol object ''' if isinstance(pdbobj, basestring): pdbfile = pdbobj pdbobj = sasmol.SasMol(0) pdbobj.read_pdb(pdbfile) resname2seq = { 'ALA': 'A', # amino acids 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V', 'HSE': 'H', 'G': 'G', # DNA 'A': 'A', 'T': 'T', 'C': 'C', 'DG': 'G', 'DA': 'A', 'DT': 'T', 'DC': 'C', 'GUA': 'G', 'ADE': 'A', 'THY': 'T', 'CYT': 'C' } resid_all = pdbobj.resid() idx_unique = np.nonzero(np.insert(resid_all[1:] - resid_all[0:-1], 0, 1)) idx_unique = idx_unique[0] # it appears to be a tuple resname_all = pdbobj.resname() sequence = map(lambda i: resname2seq[resname_all[i]], idx_unique) if outfile == None: print "Sequence: total {} residues".format(len(sequence)) print "".join(sequence) else: with open(outfile, 'w') as fileobj: fileobj.write("".join(sequence)) return sequence
def split_dcd(pdb_full_name, dcd_full_name, n_cpus, starting_dir): mol = sasmol.SasMol(0) mol.read_pdb(pdb_full_name) dcd_file = mol.open_dcd_read(dcd_full_name) total_frames = dcd_file[2] n_atoms = dcd_file[1] # copy_mask = np.ones(n_atoms, dtype=np.int32) _, copy_mask = mol.get_subset_mask('all') n_frames_sub = total_frames / n_cpus last_frame = 0 sub_dirs = [] sub_dcd_names = [] first_last = [] for cpu in xrange(1, n_cpus + 1): sub_dir = op.join(starting_dir, 'sub%s' % str(cpu).zfill(2)) sub_dirs.append(sub_dir) mkdir_p(sub_dir) sub_mol = sasmol.SasMol(0) mol.copy_molecule_using_mask(sub_mol, copy_mask, 0) with cd(sub_dir): if cpu == n_cpus: n_frames_sub = n_frames_sub + total_frames % n_cpus dcd_out_name = 'sub%s.dcd' % str(cpu).zfill(2) sub_dcd_names.append(dcd_out_name) first = last_frame last = last_frame + n_frames_sub if n_cpus == 1: rel_dcd_name = '../../../%s' % dcd_full_name assert op.exists(rel_dcd_name), 'ERROR: did not find dcd file' subprocess.call(['ln', '-s', rel_dcd_name, dcd_out_name]) else: dcd_out_file = sub_mol.open_dcd_write(dcd_out_name) for (i, frame) in enumerate(xrange(first, last)): sub_mol.read_dcd_step(dcd_file, frame) sub_mol.write_dcd_step(dcd_out_file, 0, i + 1) sub_mol.close_dcd_write(dcd_out_file) first_last.append([first, last]) last_frame += n_frames_sub mol.close_dcd_read(dcd_file[0]) return sub_dirs, sub_dcd_names, first_last
def pdb_get_chains(pdbobj=None, outfile='seg_', segnames=None, chainids=None, get_seq=True): ''' get the sequence of a sasmol object ''' # filename is passed, read it if isinstance(pdbobj, basestring): pdbfile = pdbobj pdbobj = sasmol.SasMol(0) pdbobj.read_pdb(pdbfile) # set the filter prefix (chainid has priority) if segnames != None: filter_name = segnames filter_tmpl = "(segname[i] == '{}')" if chainids != None: filter_name = chainids filter_tmpl = "(chain[i] == '{}')" # a single string is passed, convert it to a list if isinstance(filter_name, basestring): filter_name = [filter_name] seg_mols = [] for eachfilter in filter_name: print "Filter pdb by: ", filter_tmpl.format(eachfilter) error, mask = pdbobj.get_subset_mask(filter_tmpl.format(eachfilter)) if error: print error eachfilter_mol = sasmol.SasMol(0) error = pdbobj.copy_molecule_using_mask(eachfilter_mol, mask, 0) if error: print error # eachfilter_mol.setSegname(eachfilter) eachfilter_mol.write_pdb(outfile + '.pdb', 0, 'w') seg_mols.append(eachfilter_mol) if get_seq: pdb_get_sequence.pdb_get_sequence(eachfilter_mol) print 'COMPLETE' return seg_mols
def main(): m1 = sasmol.SasMol(0) m1.read_pdb(ARGS.pdb) print ARGS.segnames segname1 = ARGS.segnames[0] segname2 = ARGS.segnames[1] print 'segname 1: ', segname1 print 'segname 2: ', segname2 names = m1.resname() ids = m1.resid() c = m1.segname() psfgenFile = ARGS.pdb[:-4] + '_patches.txt' outfile = open(psfgenFile, 'w') # open the file timestr = time.strftime("# created on %d %B %Y by 'pdb2psfgen.py'\n") outfile.write(timestr) outfile.write('# dna1: segname ' + segname1 + '\n') outfile.write('# dna2: segname ' + segname2 + '\n') pyr = ['C', 'T', 'DC', 'DT', 'CYT', 'THY'] pur = ['A', 'G', 'DA', 'DG', 'ADE', 'GUA'] pyrStr = 'patch DEO1 ' purStr = 'patch DEO2 ' n = 0 for (j, i) in enumerate(ids): # only want this to happend once for each residue if n != i: n = i # print 'adding line %d' % i if c[j] in segname1: dna = 'dna1:%d\n' % i elif c[j] in segname2: dna = 'dna2:%d\n' % i else: print 'Skipping residue from unspecified segname: ', c[j] break # s dna = 'protein:%d\n' % i if names[j] in pyr: outfile.write(pyrStr + dna) # print pyrStr + dna elif names[j] in pur: outfile.write(purStr + dna) # print purStr + dna else: print 'ERROR!!! unknown resname in specified segname: ', names[j] print '\n' outfile.close() print 'COMPLETE \m/ >.< \m/'
def get_linker_assign_coor(three_ball, ball_diameter, minimum_base_angle, surface1_to_anchor_cord_length, linker_pdb): frame = 0 linker = sasmol.SasMol(0) linker.read_pdb(linker_pdb) # move linker com to origin linker.moveto(frame, [0.0, 0.0, 0.0]) last_c = linker.coor()[0][-1] # move linker last_c to origin # not debugged for all directions of linker ... linker.translate(frame, -last_c) first_n = linker.coor()[0][0] last_c = linker.coor()[0][-1] linker_length = numpy.sqrt(numpy.sum((first_n - last_c)**2.0)) # define end-point vector b b = first_n - last_c # define axis to rotate to (z-axis here) a = numpy.zeros(3, numpy.float) a[2] = 1.0 # align coordinates onto axis (z-axis from "a" above) R = get_alignment_rotation_matrix(b, a) rotate_coordinates(b, R, linker) # move linker so that c-terminal is at origin linker.translate(frame, -linker.coor()[0][-1]) # move linker to anchor point linker.moveto(frame, [ 0.0, 0.0, (ball_diameter / 2.0) + surface1_to_anchor_cord_length + (linker_length / 2.0) ]) linker.write_pdb('new_single_linker.pdb', frame, 'w') # build hybrid molecule hybrid = build_hybrid(linker, three_ball, minimum_base_angle) return hybrid
def get_pdb_stats(filename, variables): value = 0 try: a = sasmol.SasMol(0) a.read_pdb(filename, fastread=True) result = [] try: for i in xrange(len(variables)): if (variables[i] == 'atom'): result.append(a.atom()) elif (variables[i] == 'index'): result.append(a.index()) elif (variables[i] == 'name'): result.append(a.name()) elif (variables[i] == 'loc'): result.append(a.loc()) elif (variables[i] == 'resname'): result.append(a.resname()) elif (variables[i] == 'chain'): result.append(a.chain()) elif (variables[i] == 'resid'): result.append(a.resid()) elif (variables[i] == 'rescode'): result.append(a.rescode()) elif (variables[i] == 'x'): result.append(coor[0, :, 0]()) elif (variables[i] == 'y'): result.append(coor[0, :, 1]()) elif (variables[i] == 'z'): result.append(coor[0, :, 2]()) elif (variables[i] == 'occupancy'): result.append(a.occupancy()) elif (variables[i] == 'beta'): result.append(a.beta()) elif (variables[i] == 'segname'): result.append(a.segname()) elif (variables[i] == 'element'): result.append(a.element()) elif (variables[i] == 'charge'): result.append(a.charge()) elif (variables[i] == 'moltype'): result.append(a.moltype()) value = 1 except: value = 0 result = None except: value = 0 result = None return value, result
def replace_tail(sasmol, basis_to_python): # mv_A_to_E() mono_file = '../../1KX5_tailfold/1KX5tailfold_167bp.pdb' ncp = sasmol.SasMol(0) ncp.read_pdb(mono_file) replace_basis = basis_to_python.parse_basis('chain E and resid < 40') error, replace_mask = ncp.get_subset_mask(replace_basis) print sum(replace_mask) histone_file = '1KX5tailfold_A2E_2.pdb' new_histone = sasmol.SasMol(0) new_histone.read_pdb(histone_file) print new_histone.coor().shape part_histone = sasmol.SasMol(0) part_basis = basis_to_python.parse_basis('resid < 40') error, part_mask = new_histone.get_subset_mask(part_basis) new_histone.copy_molecule_using_mask(part_histone, part_mask, 0) ncp.set_coor_using_mask(part_histone, 0, replace_mask) ncp.write_pdb('1KX5tailfold_fxd2.pdb', 0, 'w')
def dna_main(pdb_file_name, number_of_groups, residues_in_groups, rotate_type, group_to_rotate, residue_to_rotate, angle, theta, backward, dna_segnames, dna_resids, bp_per_bead): if rotate_type == 'ds_dna': # determine which bead has the 'residue_to_rotate' in it bead_to_rotate = 9 for i in xrange(100): # treating this as thetaX theta = theta + (5.0 * numpy.pi / 180.0) thetaXYZ = [theta, 0, 0] # rotate that bead rotate_a_group_2(this_group, rotate_type, residue_to_rotate, angle, theta, backward) (cg_dna.coor()[0][bead_to_rotate:], vecXYZ[:, bead_to_rotate:], dummy) = ddmc.beadRotate(cg_dna.coor()[0][bead_to_rotate - 1:], vecXYZ[:, bead_to_rotate - 1:], thetaXYZ, numpy.zeros((0, 3))) # s rotate_dna_group(this_group, rotate_type, residue_to_rotate, # angle, theta, backward) else: mol = sasmol.SasMol(0) mol.read_pdb(pdb_file_name) groups, group_masks = make_groups(mol, number_of_groups, residues_in_groups) this_group = groups[group_to_rotate] itheta = theta for i in xrange(100): theta = theta + (5.0 * numpy.pi / 180.0) rotate_a_group(this_group, rotate_type, residue_to_rotate, angle, theta, backward) return
def main(pdb_file_name, residues_in_groups, rotate_type, group_to_rotate, residue_to_rotate, angle, theta, backward, segnames_in_groups, seg_type_in_groups): txtOutput = multiprocessing.JoinableQueue() hybrid = sasmol.SasMol(0) hybrid.read_pdb(pdb_file_name) # this assumes that every residue is unique -> separate segs could have same resids # groups, group_masks = make_groups(hybrid, number_of_groups, residues_in_groups) groups, group_masks = make_complex_groups(hybrid, residues_in_groups, segnames_in_groups) this_group = groups[group_to_rotate] this_group.write_pdb('this_group.pdb', 0, 'w') dcd_out = this_group.open_dcd_write('this_group.dcd') for i in xrange(100): theta = theta + (5.0 * numpy.pi / 180.0) before = numpy.copy(this_group.coor()) if rotate_type == 'protein_backbone_dihedral': rotate_a_group(this_group, rotate_type, residue_to_rotate, angle, theta, backward) elif rotate_type == 'ds_nucleic': residues_in_group = residues_in_groups[group_to_rotate] segnames_in_group = segnames_in_groups[group_to_rotate] seg_type_in_group = seg_type_in_groups[group_to_rotate] rotate_a_nucleic_group(this_group, rotate_type, residue_to_rotate, angle, theta, backward, residues_in_group, segnames_in_group, seg_type_in_group, txtOutput) after = numpy.copy(this_group.coor()) diff = after - before print numpy.mean(diff), numpy.max(diff) this_group.write_dcd_step(dcd_out, 0, i + 1) this_group.close_dcd_write(dcd_out) return
def build_ball_coordinates(ball_diameter, fc_to_fab_vector_length, miniumum_base_angle): coor = numpy.zeros((1, 3, 3), numpy.float) coor_1 = numpy.zeros((1, 3), numpy.float) coor_2 = numpy.zeros((1, 3), numpy.float) coor_3 = numpy.zeros((1, 3), numpy.float) coor_2[0][2] = fc_to_fab_vector_length m1 = sasmol.SasMol(0) dum = numpy.copy(coor_2) m1.setCoor(dum) frame = 0 m1.rotate(frame, 'y', minimum_base_angle) coor_3 = m1.coor() print 'coor_1 = ', coor_1[0] print 'coor_2 = ', coor_2[0] print 'coor_3 = ', coor_3[0] dist_1_2 = numpy.sqrt(numpy.sum((coor_1[0] - coor_2[0])**2.0)) dist_1_3 = numpy.sqrt(numpy.sum((coor_1[0] - coor_3[0])**2.0)) dist_2_3 = numpy.sqrt(numpy.sum((coor_2[0] - coor_3[0])**2.0)) print 'dist_1_2 = ', dist_1_2 print 'dist_1_3 = ', dist_1_3 print 'dist_2_3 = ', dist_2_3 print 'ball_diameter = ', ball_diameter coor[0][0] = coor_1[0] coor[0][1] = coor_2[0] coor[0][2] = coor_3[0] get_pdb_values(m1, 3) m1.setCoor(coor) m1.write_pdb('three_ball.pdb', frame, 'w') return m1
def make_groups(hybrid, number_of_groups, residues_in_groups): frame = 0 resid = hybrid.resid() group_masks = [] groups = [] for i in xrange(number_of_groups): this_resids = residues_in_groups[i] for j in xrange(len(this_resids)): if (j == 0): basis = 'resid[i] == ' + str(this_resids[j]) + ' ' else: basis += ' or resid[i] == ' + str(this_resids[j]) print '>> creating basis = ', basis error, mask = hybrid.get_subset_mask(basis) group_masks.append(mask) this_group = sasmol.SasMol(0) error = hybrid.copy_molecule_using_mask(this_group, mask, frame) groups.append(this_group) return groups, group_masks
def combine_pdbs(all_pdbs, out_pdb=None): ''' given a list of pdb files, this will combine them into one pdb inputs: all_pdbs - list of pdb file names out_pdb - optional file name to save the combined pdbs to outputs: combined_mol - the combined sasmol object see also: combine_sasmols ''' all_mols = [] for (i, pdb) in enumerate(all_pdbs): mol = sasmol.SasMol(0) mol.read_pdb(pdb) all_mols.append(mol) combined_mol = combine_sasmols(all_mols) if out_pdb: combined_mol.write_pdb(out_pdb, 0, 'w') return combined_mol
def check_pdb_dcd(infile, filetype): fileexist = 0 value = 0 try: fileexist = os.path.isfile(infile) if (fileexist): binary = check_binary(infile) print 'binary = ', binary test_mol = sasmol.SasMol(0) fileexist = 1 if (filetype == 'pdb' and not binary): test_mol.read_pdb(infile, fastread=True) elif (filetype == 'dcd' and binary): test_mol.read_single_dcd_step(infile, 0) else: return fileexist, value value = 1 else: return fileexist, value except: value = 0 return fileexist, value
def align(variables, txtOutput): ''' ALIGN is the function to read in variables from GUI input and overlap the molecules in a dcd/pdb file onto the coordinates of a reference pdb structure over a given basis. runname: project name path: input/output filepath pdbmol1: reference pdb (mol 1) pdbmol2: input pdb file (mol 2) infile: input (pdb or dcd) filename (mol 2) basis1: basis for molecule 1 basis2: basis for molecule 2 lowres1: low residue for overlap molecule 1 highres1: high residue for overlap molecule 1 lowres2: low residue for overlap molecule 2 highres2: high residue for overlap molecule 2 OUTPUT: files stored in "runname"/align directory ofile: output filename ofile*.minmax: text file with min & max dimensions ''' runname, path, infile, pdbmol1, pdbmol2, basis1, lowres1, highres1, basis2, lowres2, highres2, ofile = unpack_variables( variables) alignpath = runname + '/align/' direxist = os.path.exists(alignpath) if (direxist == 0): os.system('mkdir -p ' + alignpath) print 'runname = ', runname dcd = [] dcd.append(infile) ndcd = 1 minmaxfile = ofile + '.minmax' mmfile = open(alignpath + minmaxfile, 'w') ttxt = time.ctime() st = ''.join(['=' for x in xrange(60)]) txtOutput.put("\n%s \n" % (st)) txtOutput.put("DATA FROM RUN: %s \n\n" % (ttxt)) m1 = sasmol.SasMol(0) m2 = sasmol.SasMol(1) m1.readpdb(path + pdbmol1) m2.readpdb(path + pdbmol2) try: if (infile[-3:] == 'dcd'): m2.readdcd(path + infile) elif (infile[-3:] == 'pdb'): m2.readpdb(path + infile) except: message = 'input filename is a PDB or DCD file but it must end with ".pdb" or ".dcd" ' message += ' : stopping here' print_failure(message, txtOutput) nf2 = m2.number_of_frames() txtOutput.put("Total number of frames = %d\n\n" % (nf2)) mass1 = m1.mass() mass2 = m2.mass() name1 = m1.name() name2 = m2.name() basis_filter_1 = 'name[i] == "' + basis1 + '" and (resid[i] >= ' + str( lowres1) + ' and resid[i] <= ' + str(highres1) + ')' basis_filter_2 = 'name[i] == "' + basis2 + '" and (resid[i] >= ' + str( lowres2) + ' and resid[i] <= ' + str(highres2) + ')' error, mask1 = m1.get_subset_mask(basis_filter_1) error, mask2 = m2.get_subset_mask(basis_filter_2) print 'numpy.sum(mask1) = ', numpy.sum(mask1) print 'numpy.sum(mask2) = ', numpy.sum(mask2) sub_m1 = sasmol.SasMol(2) error = m1.copy_molecule_using_mask(sub_m1, mask1, 0) print 'error = ', error sub_m2 = sasmol.SasMol(3) error = m2.copy_molecule_using_mask(sub_m2, mask2, 0) print 'error = ', error com_sub_m1 = sub_m1.calccom(0) sub_m1.center(0) coor_sub_m1 = sub_m1.coor()[0] print 'com_sub_m1 = ', com_sub_m1 for i in xrange(nf2): m2.center(i) error, sub_m2.coor = m2.get_coor_using_mask(i, mask2) sub_m2.setCoor(sub_m2.coor) com_sub_m2 = sub_m2.calccom(0) sub_m2.center(0) coor_sub_m2 = sub_m2.coor[0] m2.align(i, coor_sub_m2, com_sub_m2, coor_sub_m1, com_sub_m1) if (((i + 1) % (float(nf2) / 10.0) == 0 or (nf2 < 10))): fraction_done = (float(i + 1) / float(nf2)) progress_string = 'COMPLETED ' + \ str(i + 1) + ' of ' + str(nf2) + ' : ' + \ str(fraction_done * 100.0) + ' % done' print('%s\n' % progress_string) report_string = 'STATUS\t' + str(fraction_done) txtOutput.put(report_string) try: if (ofile[-3:] == 'dcd'): print ' writing DCD file' m2.writedcd(alignpath + ofile) elif (ofile[-3:] == 'pdb' and nf2 == 1): print ' writing PDB file' m2.writepdb(alignpath + ofile, 0, 'w') elif (ofile[-3:] == 'pdb' and nf2 > 1): print ' writing PDB file' for i in xrange(nf2): if (i == 0): m2.writepdb(alignpath + ofile, i, 'w') else: m2.writepdb(alignpath + ofile, i, 'a') else: message = 'output filename ' + ofile + \ ' needs to end in either ".pdb" (1 frame) or ".dcd" (1 or more frames)\n' message += ' : writing output file as a ' + ofile + '.dcd\n' print '\n\n', message, '\n\n' print ' writing DCD file' ofile = ofile + '.dcd' m2.writedcd(alignpath + ofile) except: message = 'Could not write output file' print_failure(message, txtOutput) total_min_array, total_max_array = m2.calcminmax() min_x = total_min_array[0] max_x = total_max_array[0] min_y = total_min_array[1] max_y = total_max_array[1] min_z = total_min_array[2] max_z = total_max_array[2] txtOutput.put( "minimum x = %lf\t maximum x = %lf -> range: %lf Angstroms\n" % (min_x, max_x, (max_x - min_x))) txtOutput.put( "minimum y = %lf\t maximum y = %lf -> range: %lf Angstroms\n" % (min_y, max_y, (max_y - min_y))) txtOutput.put( "minimum z = %lf\t maximum z = %lf -> range: %lf Angstroms\n\n" % (min_z, max_z, (max_z - min_z))) print 'Aligned data (nf=%i) were written to %s\n' % (nf2, './' + alignpath + ofile) txtOutput.put("\nAligned data (nf=%i) were written to %s\n\n" % (nf2, './' + alignpath + ofile)) txtOutput.put("\n%s \n" % (st)) time.sleep(0.5) print 'ALIGN2 IS DONE' return ()
def main(inputs): # aa_pdb = '../1zbb_tetra_uncombined.pdb' # aa_pdb = '1zbb_original.pdb' aa = sasmol.SasMol(0) aa.read_pdb(inputs.pdb) segname_mols = [] errors = [] amino_acids = { 'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V', 'HSE': 'H' } dna = { 'G': 'G', 'A': 'A', 'T': 'T', 'C': 'C', 'DG': 'G', 'DA': 'A', 'DT': 'T', 'DC': 'C', 'GUA': 'G', 'ADE': 'A', 'THY': 'T', 'CYT': 'C' } # segnames = ['I','J'] # print 'segnames =', segnames print 'inputs.segnames =', inputs.segnames for segname in inputs.segnames: if segname.lower() == segname: segname_name = '_seg_' + segname + '0' else: segname_name = '_seg_' + segname + '1' segname_name = inputs.pdb[:-4] + segname_name basis_filter = "(segname[i] == '" + segname + "')" error, mask = aa.get_subset_mask(basis_filter) if error: print error segname_mol = sasmol.SasMol(0) error = aa.copy_molecule_using_mask(segname_mol, mask, 0) if error: print error segname_mol.write_pdb(segname_name + '.pdb', 0, 'w') segname_mols.append(segname_mol) # resids.sort() resA = 0 res_min = np.min(segname_mol.resids()) res_max = np.max(segname_mol.resids()) print 'min resid:', res_min resA = 0 residue_list = [] # create a sorted list of the residues for (i, resB) in enumerate(segname_mol.resid()): if resB != resA: # print 'segname_mol.resname()[i]:', segname_mol.resname()[i] # print 'segname_mol.resid()[i]:', segname_mol.resid()[i] residue_list.append(residue(resB, segname_mol.resname()[i])) resA = resB residue_sequence = sorted(residue_list, key=lambda residue: residue.resid) # with open(segname_name+'.txt', 'w') as outFile: # for res in residue_sequence: #outFile.write(str(res.resid) + '\t' + res.resname + '\n') if 'rna' in segname_mol.moltypes(): segname_mol.moltypes().remove('rna') print "removed 'rna' from moltypes" if len(segname_mol.moltypes()) == 0: segname_mol.moltypes().append('dna') print "appended 'dna' to moltypes" with open(segname_name + '.seq', 'w') as outFile: print outFile.closed if segname_mol.moltypes() == ['protein']: for (i, res) in enumerate(residue_sequence): outFile.write(amino_acids[res.resname]) if 0 == (i + 1) % 50: outFile.write('\n') elif segname_mol.moltypes() == ['dna']: for (i, res) in enumerate(residue_sequence): outFile.write(dna[res.resname]) # print 'printed', dna[res.resname], 'to', segname_name, # '.seq' if 0 == (i + 1) % 50: outFile.write('\n') else: print 'ERROR, unexpected molecule type' # s for resB in resids: # s if resB != resA + 1: # s print 'missing residue/s, skipped segname', segname, 'btwn residues:', resA, resB # s resA = resB print 'max resid:', res_max print 'finished segname', segname_name print outFile.closed print 'COMPLETE'
def two_body_grid(variables, txtOutput): runname, path, pdbmol1, pdbmol2, ofile, accpos, pos, trans, dtrans, theta, dtheta, basis, cutoff, lowrg, highrg, zflag, zcutoff, cflag, confile, nexsegments1, nsegments1, reslow1, numcont1, nexsegments2, nsegments2, reslow2, numcont2 = unpack_variables( variables) if (runname[-1] == '/'): lin = len(runname) runname = runname[:lin - 1] direxist = os.path.exists(runname) if (direxist == 0): os.system('mkdir -p ' + runname) genpath = runname + '/two_body_grid' genpaths = genpath + '/' direxist = os.path.exists(genpath) if (direxist == 0): os.system('mkdir -p ' + genpath) m1 = sasmol.SasMol(0) m2 = sasmol.SasMol(1) m3 = sasmol.SasMol(2) m1.read_pdb(path + '/' + pdbmol1) m2.read_pdb(path + '/' + pdbmol2) error = m3.merge_two_molecules(m1, m2) if (error != []): print 'ERROR:' + error[0] print 'ERROR:' + error[0] print 'ERROR:' + error[0] m3.write_pdb(genpaths + ofile + '.pdb', 0, 'w') cpst = 'cp ' + path + '/' + pdbmol1 + ' ' + genpaths os.system(cpst) cpst = 'cp ' + path + '/' + pdbmol2 + ' ' + genpaths os.system(cpst) frame = 0 mm1 = m1.calcminmax() mm2 = m2.calcminmax() ''' print 'mm1 = ',mm1 print 'mm2 = ',mm2 ''' # set overlap basis for each molecule segment_names_1 = string.split(nsegments1, ',') segment_names_2 = string.split(nsegments2, ',') ''' print 'segment_names_1 = ',segment_names_1 print 'segment_names_2 = ',segment_names_2 ''' if (nexsegments1 > 0): for i in xrange(nexsegments1): if (i == 0): basis1st = "(name[i] == 'CA' and not (segname[i] == '" + segment_names_1[ i] + "' and ( resid[i] >= " + str( reslow1[i]) + " and resid[i] <= " + str( reslow1[i] + numcont1[i]) + ")))" else: basis1st = basis1st + " or (name[i] == 'CA' and not (segname[i] == '" + segment_names_1[ i] + "' and ( resid[i] >= " + str( reslow1[i]) + " and resid[i] <= " + str( reslow1[i] + numcont1[i]) + " )))" else: basis1st = "name[i] == 'CA'" ''' print 'basis1st = ',basis1st ''' if (nexsegments2 > 0): for i in xrange(nexsegments2): if (i == 0): basis2st = "(name[i] == 'CA' and not (segname[i] == '" + segment_names_2[ i] + "' and ( resid[i] >= " + str( reslow2[i]) + " and resid[i] <= " + str( reslow2[i] + numcont2[i]) + ")))" else: basis2st = basis2st + " or (name[i] == 'CA' and not (segname[i] == '" + segment_names_2[ i] + "' and ( resid[i] >= " + str( reslow2[i]) + " and resid[i] <= " + str( reslow2[i] + numcont2[i]) + " )))" else: basis2st = "name[i] == 'CA'" ''' print 'basis2st = ',basis2st ''' error, mask_array1 = m1.get_subset_mask(basis1st) error, mask_array2 = m2.get_subset_mask(basis2st) if (cflag == 1): filter_flag = 0 error, constraint_basis1_array, constraint_basis2_array, distance_array, type_array = constraints.read_constraints( m3, confile, filter_flag) mask_a_array = [] mask_b_array = [] for i in xrange(len(distance_array)): print constraint_basis1_array[i] print constraint_basis2_array[i] print distance_array[i] print type_array[i] error, local_mask_a_array = m3.get_subset_mask( constraint_basis1_array[i]) error, local_mask_b_array = m3.get_subset_mask( constraint_basis2_array[i]) mask_a_array.append(local_mask_a_array) mask_b_array.append(local_mask_b_array) else: mask_a_array = [] mask_b_array = [] distance_array = [] type_array = [] #molgrid(m1,m2,m3,ofile,genpaths,accpos,pos,trans,dtrans,theta,dtheta,cutoff,basis,mask_array1,mask_array2,zflag,zcutoff,cflag,mask_a_array,mask_b_array,distance_array,type_array,txtOutput) fft_docking(m1, m2, m3, ofile, genpaths, accpos, pos, trans, dtrans, theta, dtheta, cutoff, basis, mask_array1, mask_array2, zflag, zcutoff, cflag, mask_a_array, mask_b_array, distance_array, type_array, txtOutput) return
import os import os.path as op import subprocess import logging import sassie.sasmol.sasmol as sasmol import numpy as np # chain1_out = 'dimer/dna1_right_seq.pdb' # chain1_pdb = 'dimer/dna1_wrong_seq.pdb' # chain1 = sasmol.SasMol(0) # chain1.read_pdb(chain1_pdb) # sequenceFile = 'dimer/dimer_dna1_correct.seq' chain1_out = 'gH5_NCP_dna1.pdb' chain1_pdb = 'gH5_NCP_dna1bb.pdb' chain1 = sasmol.SasMol(0) chain1.read_pdb(chain1_pdb) sequenceFile = 'correct_dna1.seq' chain2_out = 'gH5_NCP_dna2.pdb' chain2_pdb = 'gH5_NCP_dna2bb.pdb' chain2 = sasmol.SasMol(0) chain2.read_pdb(chain2_pdb) with open(sequenceFile) as f: lines = f.read().splitlines() sequence = lines[0] reverse = sequence[::-1] amino_acids_pdb2seq = { 'ALA': 'A',
if __name__ == '__main__': basis = [] basis.append('(name CA and name NH) or resid > 43') basis.append('(name CA and name NH) or resid > 43 and resid < 57') basis.append('segname HC1 and (resid >= 210 and resid <=214)') basis.append('segname HC1 and resid < 210') basis.append('(resid > 23 and resid < 68) and name "CA"') for i in xrange(5): print '#####' new_basis = parse_basis(basis[i]) print print import sassie.sasmol.sasmol as sasmol m = sasmol.SasMol(0) m.read_pdb('min3.pdb') basis = '(resid > 23 and resid < 68) and name "CA"' python_basis = parse_basis(basis) sub_mol = sasmol.SasMol(0) frame = 0 error, mask = m.get_subset_mask(python_basis) if (len(error) > 0): print 'error = ', error import numpy print numpy.sum(mask)
def driven_dna_mc(ARGS, cg_dna, aa_dna, cg_pro, aa_pro, vecXYZ, lp, trialbeads, beadgroups, group_masks, all_beads, dna_bead_masks, aa_pgroup_masks, cg_pgroup_masks, all_proteins, aa_all, aa_pro_mask, aa_dna_mask, dna_type='b'): ''' this function perform nsteps Monte-Carlo moves on the cg_dna ''' timestr = time.strftime("%y%m%d_%H%M%S_") # prefix for output files all_dcd_name = timestr + ARGS.pdb[:-4] + '.dcd' aa_all_dcd_out = aa_all.open_dcd_write(all_dcd_name) if False: aa_all.send_coordinates_to_vmd(2222, 0) # create the coarse-grained DNA and protein dcd and pdb files cg_dna_dcd_name = timestr + 'cg_dna.dcd' cg_pro_dcd_name = timestr + 'cg_pro.dcd' cg_dna_dcd_out = cg_dna.open_dcd_write(cg_dna_dcd_name) cg_pro_dcd_out = cg_pro.open_dcd_write(cg_pro_dcd_name) cg_dna.write_dcd_step(cg_dna_dcd_out, 0, 1) cg_pro.write_dcd_step(cg_pro_dcd_out, 0, 1) cg_dna.write_pdb(timestr + 'cg_dna.pdb', 0, 'w') cg_pro.write_pdb(timestr + 'cg_pro.pdb', 0, 'w') # create a dummy sasmol object for the 3 orientation vectors for each bead # will write these out to dcd files to store the coordinates along the way vecX_mol = sasmol.SasMol(0) vecY_mol = sasmol.SasMol(0) vecZ_mol = sasmol.SasMol(0) error, mask = cg_dna.get_subset_mask('(all)') error = cg_dna.copy_molecule_using_mask(vecX_mol, mask, 0) error = cg_dna.copy_molecule_using_mask(vecY_mol, mask, 0) error = cg_dna.copy_molecule_using_mask(vecZ_mol, mask, 0) # the np.array recast these so they vecX_mol.setCoor(np.array([vecXYZ[0]])) vecY_mol.setCoor(np.array([vecXYZ[1]])) # do not update with vecXYZ vecZ_mol.setCoor(np.array([vecXYZ[2]])) vecX_dcd_name = timestr + 'vecX.dcd' vecY_dcd_name = timestr + 'vecY.dcd' vecZ_dcd_name = timestr + 'vecZ.dcd' vecX_dcd_out = vecX_mol.open_dcd_write(vecX_dcd_name) vecY_dcd_out = vecY_mol.open_dcd_write(vecY_dcd_name) vecZ_dcd_out = vecZ_mol.open_dcd_write(vecZ_dcd_name) vecX_mol.write_dcd_step(vecX_dcd_out, 0, 1) vecY_mol.write_dcd_step(vecY_dcd_out, 0, 1) vecZ_mol.write_dcd_step(vecZ_dcd_out, 0, 1) # initialize variables for each run steps_from_0 = np.zeros(ARGS.nsteps, dtype='int64') xyz = np.copy(vecXYZ) d_coor = np.copy(cg_dna.coor()[0]) # unique memory for each p_coor = np.copy(cg_pro.coor()[0]) # unique memory for each # vectors between beads u, and average distance l (u, l) = dna_move.checkU(d_coor) # s print "(u, l) =", (u, l) # debug info lpl = lp / l # setup the presistence length paramater # yet to use a, and z type dna dna_energy_width = {'a': 0, 'b': 46., 'z': 0} w = dna_energy_width[dna_type.lower()] if w > l: w = np.floor(l) # print '~~~ %.2f > %.2f ~~~~~~~~~~~~~~~~~~~~~~~~' % (w, l) print('>>> setting chain width (w) to %d (chain width < distance' % w, ' btwn beads)') dna_diam = {'a': 25.5, 'b': 23.7, 'z': 18.4} dna_bead_radius = 4.5 pro_bead_radius = 1.0 # 2A min seperation of CA atoms in database pro_pro_test = pro_bead_radius + pro_bead_radius dna_pro_test = dna_bead_radius + pro_bead_radius # calculate the energy of the starting positions wca0 = np.zeros((cg_dna.natoms(), cg_dna.natoms())) Ub0 = dna_move.energyBend(lpl, u, l) (Uwca0, wca0) = dna_move.f_energy_wca(w, d_coor, wca0, 0) U_T0 = Ub0 + Uwca0 # print '(Ub0, Uwca0, Ub0/U_T0, Uwca0/U_T0) = ', (Ub0, Uwca0, Ub0/U_T0, # Uwca0/U_T0) n_accept = 0 # total times configuration was accepted n_reject = 0 # total times configuration was rejected n_written = 0 # total times dcd write has been called fail_tally = 0 # number of times failed for particular iteration n_from_reload = 0 # number of stps since last reload n_reload = [0] # listt containing the i_goback values # this should not actually be >=, come back to this assert np.size(ARGS.theta_max) - 1 >= np.max(beadgroups), ( 'each group needs its own theta_max: %d < %d' % (np.size(ARGS.theta_max) - 1, np.max(beadgroups))) rg_old = cg_dna.calcrg(0) # Main MC loop # while n_accept < ARGS.nsteps: # Choose a bead to rotate trial_bead = trialbeads[int((trialbeads.size) * np.random.random())] # Determine rotation to perform theta_max = ARGS.theta_max[beadgroups[trial_bead]] # option to scale thetaZ separatly thetaZ_max = 0 * np.float(theta_max) # thetaZ_max = np.float(theta_max) # option to scale thetaZ separatly thetaZ = 2 * thetaZ_max * np.random.random() - thetaZ_max thetaX = 2 * theta_max * np.random.random() - theta_max thetaY = 2 * theta_max * np.random.random() - theta_max thetaXYZ = [ thetaX / ARGS.n_soft, thetaY / ARGS.n_soft, thetaZ / ARGS.n_soft ] # print theta_max, thetaXYZ if len(group_masks) == 0 or beadgroups[trial_bead] == len(group_masks): # Only DNA will be moving, create place-holder dummy coordinates p_coor_rot = np.zeros((0, 3)) else: p_mask = group_masks[beadgroups[trial_bead]] p_ind_rot = mask2ind(p_mask) p_ind_fix = mask2ind(-(p_mask - 1)) p_coor_rot = p_coor[p_ind_rot] p_coor_fix = p_coor[p_ind_fix] # generate a newly rotated model (d_coor[trial_bead:], xyz[:, trial_bead:], p_coor_rot) = dna_move.beadRotate(d_coor[trial_bead - 1:], xyz[:, trial_bead - 1:], thetaXYZ, ARGS.n_soft, p_coor_rot) # store the rotated protein coordinates if beadgroups[trial_bead] < len(group_masks): p_coor[p_ind_rot] = p_coor_rot # verify the Rg_new < Rg_old * 1.01 d_coor_old = np.copy(cg_dna.coor()[0]) cg_dna.setCoor(np.array([(d_coor)])) # update dna coordinates rg_new = cg_dna.calcrg(0) if rg_new < rg_old * 1.01: rg_pass = True print 'rg_old * 1.01 < rg_new: %f < %f' % (rg_old * 1.01, rg_new) else: rg_pass = False print 'rg_old * 1.01 > rg_new: %f > %f' % (rg_old * 1.01, rg_new) if rg_pass: # calculate the change in energy (dU) and the boltzman factor (p) (u, l) = dna_move.checkU(d_coor) Ub1 = dna_move.energyBend(lpl, u, l) # ~~~~ DNA interaction energy ~~~~~~# (Uwca1, wca1) = dna_move.f_energy_wca(w, d_coor, wca0, trial_bead) U_T1 = Ub1 + Uwca1 dU = U_T1 - U_T0 with warnings.catch_warnings(): warnings.filterwarnings('error') # need this for np warnings try: p = np.exp(-dU) except Warning: if dU > 99: p = 0 # s print 'energy was large, setting probability to 0' elif dU < 0: p = 1 # s print 'energy was negative, setting probability to # 1' else: print 'Warning: ~~> unclear OverflowError <~~ dU = ', dU print 'not sure where the error originated from' test = np.random.random() if p <= test: dna_pass = False # print 'step failed because of DNA energy' else: dna_pass = True # now check for collisions protein involved collisions if len(p_coor_rot) > 0: # only if proteins were rotated # ~~~~ Check for overlap, DNA-protein or protein-protein ~~~~~~# d_coor_fix = d_coor[trial_bead:] d_coor_rot = d_coor[:trial_bead] # check for protein-protein overlap if 1 == f_overlap2(p_coor_rot, p_coor_fix, pro_pro_test): print 'Protein-Protein' # print 'collision, set p=0' collisionless = False # print 'currently ignoring DNA-protein overlap' # check for DNA-protein overlap elif 1 == f_overlap2(p_coor_rot, d_coor_fix, dna_pro_test): print 'Potein-DNA (rot-fix)' # print 'collision, set p=0' collisionless = False print 'ignoring this for now' elif 1 == f_overlap2(p_coor_fix, d_coor_rot, dna_pro_test): print 'Potein-DNA (fix-rot)' # print 'collision, set p=0' collisionless = False else: collisionless = True if not collisionless: print 'failed because of collision' else: collisionless = True # no protein to collide with if rg_pass and dna_pass and collisionless: rg_old = rg_new n_from_reload += 1 steps_from_0[n_accept] = n_from_reload + n_reload[-1] n_accept += 1 # increment accept counter # cg_dna.setCoor(d_coor) # <-- DO NOT use setCoor, want uniuqe mem # cg_pro.setCoor(p_coor) # <-- DO NOT use setCoor, want uniuqe mem cg_pro.setCoor(np.array([(p_coor)])) # update protein coordinates vecXYZ = np.copy(xyz) # update dna orientations vecX_mol.setCoor(np.array([vecXYZ[0]])) # independent of vecXYZ[0] vecY_mol.setCoor(np.array([vecXYZ[1]])) # independent of vecXYZ[1] vecZ_mol.setCoor(np.array([vecXYZ[2]])) # independent of vecXYZ[2] wca0 = np.copy(wca1) # update DNA WCA energy U_T0 = U_T1 # update total energy # print output regarding trial print "trial_bead(%3d) = %2d\t failed attempts = %2d" % ( n_accept, trial_bead, fail_tally) fail_tally = 0 # reset fail_tally # print out the Rg print cg_dna.calcrg(0) # write out the accepted configuration for go-back use if ARGS.goback > 0: # these are incremented by one because the original coordinates # are saved (that is not the case for aa_all) cg_dna.write_dcd_step(cg_dna_dcd_out, 0, n_written + 1) cg_pro.write_dcd_step(cg_pro_dcd_out, 0, n_written + 1) vecX_mol.write_dcd_step(vecX_dcd_out, 0, n_written + 1) vecY_mol.write_dcd_step(vecY_dcd_out, 0, n_written + 1) vecZ_mol.write_dcd_step(vecZ_dcd_out, 0, n_written + 1) # recover an all atom representation and save coordinates to a dcd # this requires re-inserting the aa-coordinates which takes added # time so only do when designated if 0 == n_accept % ARGS.n_dcd_write: # ~~recover aa-DNA~~ error = dna_move.recover_aaDNA_model(cg_dna, aa_dna, vecXYZ, all_beads, dna_bead_masks) # ~~recover aa-Protein~~ dna_move.recover_aaPro_model(aa_pgroup_masks, cg_pgroup_masks, cg_pro, all_proteins, aa_pro) # ~~Combine aa Complete Structure~~ aa_all.set_coor_using_mask(aa_pro, 0, aa_pro_mask) aa_all.set_coor_using_mask(aa_dna, 0, aa_dna_mask) # ~~Write DCD step~~ n_written += 1 aa_all.write_dcd_step(aa_all_dcd_out, 0, n_written) else: # default ARGS.goback is -1 so this returns FALSE without user # input if fail_tally == ARGS.goback: i_goback = rewind(ARGS, n_accept, cg_dna_dcd_name, cg_dna, cg_pro_dcd_name, cg_pro, vecX_dcd_name, vecX_mol, vecY_mol, vecY_dcd_name, vecZ_mol, vecZ_dcd_name, vecXYZ) # revert dna coordinates cg_dna.setCoor(np.array([(d_coor_old)])) d_coor = np.copy(cg_dna.coor()[0]) # reset the dna coordinates # reset the reference energy (u, l) = checkU(d_coor) (Uwca0, wca0) = f_energy_wca(w, d_coor, wca0, 0) Ub0 = energyBend(lpl, u, l) U_T0 = Ub0 + Uwca0 n_from_reload = 0 n_reload.append(steps_from_0[i_goback - 1]) fail_tally = 0 # reset the fail counter else: fail_tally += 1 # increment bead reject counter # increment total reject counter n_reject += 1 # revert dna coordinates cg_dna.setCoor(np.array([(d_coor_old)])) d_coor = np.copy(cg_dna.coor()[0]) # reset the dna coordinates p_coor = np.copy(cg_pro.coor()[0]) # reset the protein coordinates xyz = np.copy(vecXYZ) # reset the dna orientations # save previous coordinates again if not ARGS.keep_unique: # ~~Write DCD step~~ n_written += 1 aa_all.write_dcd_step(aa_all_dcd_out, 0, n_written) cg_dna.write_dcd_step(cg_dna_all_dcd_out, 0, n_written + 1) cg_pro.write_dcd_step(cg_pro_all_dcd_out, 0, n_written + 1) aa_all.close_dcd_write(aa_all_dcd_out) cg_dna.close_dcd_write(cg_dna_dcd_out) # uncomment if wanting to keep # os.remove(timestr + 'cg_dna.pdb') # remove/comment to keep the cg dna coor # os.remove(cg_dna_dcd_name) # remove/comment to keep the cg dna coor # cg_pro.close_dcd_write(cg_pro_dcd_out) #uncomment if wanting to keep os.remove(timestr + 'cg_pro.pdb') # remove/comment to keep the cg pro coor os.remove(cg_pro_dcd_name) # remove/comment to keep the cg pro coor os.remove(vecX_dcd_name) os.remove(vecY_dcd_name) os.remove(vecZ_dcd_name) if ARGS.goback > 0: np.savetxt(timestr + 'n_from_0.txt', steps_from_0, fmt='%d') print "accepted %d moves" % n_accept print "rejected %d moves" % n_reject