num_confs = cal_confs(traj_file) import UTILS.base #this needs to be imported after the model type is set r2 = LorenzoReader2(traj_file, top_file) #how do you want to get your eRMSDs? Do you need to do the time-consuming calculation or is it done and you have a pickle? if not parallel: r1 = LorenzoReader2(traj_file, top_file) eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, get_eRMSDs, num_confs, n_cpus, r2, inputfile, traj_file, top_file, matrix=True) eRMSDs = np.sum((i for i in out), axis=0) #eRMSDs = pickle.load(open('tmp_eRMSDs', 'rb')) #the eRMSD matrix is actually only half a matrix for ni, i in enumerate(eRMSDs): for nj, j in enumerate(i): eRMSDs[nj][ni] = j if ni == nj: eRMSDs[ni][nj] = 0 #since calculating the eRMSDs are so time-consuming to calculate we're gonna pickle it to iterate the DBSCAN later.
top_file = args.topology[0] traj_file = args.trajectory[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] num_confs = cal_confs(traj_file) r = LorenzoReader2(traj_file, top_file) if not parallel: torsions, dihedrals = get_internal_coords(r, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_internal_coords, num_confs, n_cpus) # Out Dims: 1 Processor, 2 Torsion or Dihedrals, 3 Specific list of torsions listed by conf torsions = np.concatenate([out[i][0] for i in range(n_cpus)], axis=1) dihedrals = np.concatenate([out[i][1] for i in range(n_cpus)], axis=1) torsion_mean = np.mean(torsions, axis=1).tolist() dihedral_mean = np.mean(dihedrals, axis=1).tolist() #make something akin to a ramachandran plot for DNA origami?? import matplotlib.pyplot as plt plt.scatter(torsion_mean[1:], dihedral_mean) plt.xlabel("torsion_angle") plt.ylabel("dihedral_angle") plt.show() torsion_mean.insert(0, torsion_mean[0]) torsion_mean.insert(0, torsion_mean[0])
fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides]) with LorenzoReader2(mean_file, top_file) as reader: s = reader._get_system() align_conf = fetch_np(s) cms = np.mean(align_conf, axis=0) #all structures must have the same center of mass align_conf -= cms #Compute the deviations if not parallel: r = LorenzoReader2(traj_file, top_file) deviations_matrix = get_pca(r, align_conf, num_confs) if parallel: out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_pca, num_confs, n_cpus, align_conf) deviations_matrix = np.concatenate([i for i in out]) #now that we have the deviations matrix we're gonna get the covariance and PCA it #note that in the future we might want a switch for covariance vs correlation matrix because correlation (cov/stdev so all diagonals are 1) is better for really floppy structures pca = PCA(n_components=3) pca.fit(deviations_matrix) transformed = pca.transform(deviations_matrix) #THIS IS AS FAR AS I GOT import matplotlib.pyplot as plt print("INFO: Saving scree plot to scree.png", file=stderr) plt.scatter(range(0, len(evalues)), evalues, s=25) plt.xlabel("component") plt.ylabel("eigenvalue")
#Get the mean distance to all other particles if not parallel: print( "INFO: Computing interparticle distances of {} configurations using 1 core." .format(num_confs), file=stderr) r = LorenzoReader2(traj_file, top_file) cartesian_distances = get_mean(r, num_confs) mean_distance_map = cartesian_distances * (1 / (num_confs)) if parallel: print( "INFO: Computing interparticle distances of {} configurations using {} cores." .format(num_confs, n_cpus), file=stderr) out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, get_mean, num_confs, n_cpus) cartesian_distances = np.sum(np.array([i for i in out]), axis=0) mean_distance_map = cartesian_distances * (1 / (num_confs)) #Making a new configuration file from scratch is hard, so we're just going to read in one and then overwrite the positional information r = LorenzoReader2(traj_file, top_file) output_system = r._get_system() #make heatmap of the summed distances #make_heatmap(mean_distance_map) masked_mean = np.ma.masked_array(mean_distance_map, ~(mean_distance_map < cutoff_distance)) #I tried to use DGSOL to analytically solve this, but origamis were too big
num_confs = cal_confs(traj_file) import UTILS.base #this needs to be imported after the model type is set with open(designfile, 'r') as file: pairs = file.readlines() if not parallel: print("INFO: Computing base pairs in {} configurations using 1 core.".format(num_confs), file=stderr) r = LorenzoReader2(traj_file,top_file) tot_bonds, tot_missbonds, out_array, confid = bond_analysis(r, pairs, num_confs) if parallel: print("INFO: Computing base pairs in {} configurations using {} cores.".format(num_confs, n_cpus), file=stderr) out = parallelize_lorenzo_onefile.fire_multiprocess(traj_file, top_file, bond_analysis, num_confs, n_cpus, pairs) tot_bonds = sum((i[0] for i in out)) tot_missbonds = sum((i[1] for i in out)) out_array = sum((i[2] for i in out)) confid = sum((i[3] for i in out)) print("\nSummary:\navg bonds: {}\navg_missbonds: {}".format(tot_bonds/(int(confid)),tot_missbonds/int(confid))) print("INFO: Writing bond occupancy data to {}".format(outfile)) with open(outfile, "w+") as file: file.write("{\n\"occupancy\" : [") file.write(str(out_array[0]/int(confid))) for n in out_array[1:]: file.write(", {}".format(n/int(confid))) file.write("] \n}") # with open(outfile, "w+") as file:
#launch find_angle using the appropriate number of threads to find all duplexes. if not parallel: print( "INFO: Fitting duplexes to {} configurations using 1 core.".format( num_confs), file=stderr) r = LorenzoReader2(traj_file, top_file) duplexes_at_step = find_angles(r, num_confs) if parallel: print("INFO: Fitting duplexes to {} configurations using {} cores.". format(num_confs, n_cpus), file=stderr) duplexes_at_step = [] out = parallelize_lorenzo_onefile.fire_multiprocess( traj_file, top_file, find_angles, num_confs, n_cpus) [duplexes_at_step.extend(i) for i in out] #print duplexes to a file print( "INFO: Writing duplex data to {}. Use axis_analysis_overlay.py to graph data" .format(outfile), file=stderr) output = open(outfile, 'w') output.write( "time\tduplex\tstart1\tend1\tstart2\tend2\taxisX\taxisY\taxisZ\thel_pos\n" ) for i in range(0, len(duplexes_at_step)): for j in range(0, len(duplexes_at_step[i])): line = '{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t[{},{},{}]\n'.format( duplexes_at_step[i][j].time, duplexes_at_step[i][j].index,