def pick_starting_configuration(traj_file, top_file, max_bound): """ Pick a random conf out of the trajectory file to use as the reference structure. We assume that that is optimal to align against. Based on experience, the choice of reference configuration has very little impact on the mean structure Parameters: traj_file (string): The name of the trajectory file top_file (string): The name of the topology file associated with the trajectory file max_bound (int): The reference configuration will be chosen at random from the first max_bound configurations in the trajectory file Returns: stop_at (int): The configuration ID of the reference configuration initial_structure (base.System): The oxDNA system representing the reference configuration. """ with LorenzoReader2(traj_file, top_file) as reader: if args.align: stop_at = int(args.align[0]) else: stop_at = randint(0, max_bound-1) print("INFO: We chose {} as reference".format(stop_at), file=stderr) initial_structure = reader._get_system(N_skip=stop_at) #this is way faster than using next(), but doesn't automatically inbox the system if not initial_structure: print("ERROR: Couldn't read structure at conf num {0}. Something has gone weird".format(stop_at), file=stderr) exit(1) print("INFO: reference structure loaded", file=stderr) initial_structure.inbox() return stop_at, initial_structure
def get_centroid(points, metric_name, num_confs, labs, traj_file, inputfile): """ Takes the output from DBSCAN and produces the trajectory and centroid from each cluster. Parameters: points (numpy.array): The points fed to the clstering algorithm. metric_name (str): The type of data the points represent. labs (numpy.array): The cluster each point belongs to. traj_file (str): The analyzed trajectory file. inputfile (str): The input file used to run the analyzed simulation. """ print("INFO: splitting clusters...", file=stderr) print("INFO: Will write cluster trajectories to traj_<cluster_number>.dat", file=stderr) print ("cluster\tn\tavg_E\tE_dev\tavg_H\tH_dev\tcentroid_t") for cluster in (set(labs)): if metric_name == "precomputed": masked = points[labs == cluster] in_cluster_id = np.sum(masked, axis = 1).argmin() in_cluster = list(labs).count(cluster) centroid_id = find_element(in_cluster_id, cluster, labs) top_file = get_input_parameter(inputfile, "topology") r = LorenzoReader2(traj_file, top_file) output = r._get_system(N_skip=centroid_id) filename = "centroid"+str(cluster) output.print_lorenzo_output(filename+".dat", filename+".top") make_heatmap(inputfile, output, filename)
def split_trajectory(traj_file, top_file, num_confs, n_cpus, confs_per_processor): """ Splits a trajectory file into temporary files and attaches a reader to each file. Parameters: traj_file (str): Name of the trajectory file to split. top_file (str): Name of the topology file associated with the trajectory. num_confs (int): The number of configurations in the trajectory. n_cpus (int): The number of chunks to split the trajectory into. conf_per_processor (int): The number of configurations per chunk (equivalent to floor(num_confs/n_cpus)) Returns: readers (list of LorenzoReader2s): A list of readers with each one on a unique chunk of the file. """ n_files = 0 readers = [] files = [] rem = num_confs % n_cpus with open(traj_file, "rb") as f: it = blocks(f) chunk = next(it) # iterator producing 1 MB chunks of the trajectory last_conf_byte = 0 #create a number of temporary file equal to the number of CPUs while n_files < n_cpus: out = NamedTemporaryFile(mode='w+b', delete=False) conf_count = 0 #If there is a remainder after dividing the number of configurations by the number of CPUs #Add one extra configuration to the first rem files if n_files < rem: a = 1 else: a = 0 #Find successive configuration start points and write them out to the tempfiles while conf_count < confs_per_processor + a: next_conf_byte = chunk.find(b"t", last_conf_byte + 1) if next_conf_byte == -1: out.write(chunk[last_conf_byte:]) try: chunk = next(it) except: #next() throws an error if there isn't another chunk break last_conf_byte = 0 else: out.write(chunk[last_conf_byte:next_conf_byte]) conf_count += 1 last_conf_byte = next_conf_byte #create a reader from the newly created trajectory chunk readers.append(LorenzoReader2(out.name, top_file)) files.append(out) n_files += 1 return (readers, files)
def fire_multiprocess(traj_file, top_file, function, num_confs, n_cpus, *args, **kwargs): confs_per_processor = int(np.floor(num_confs / n_cpus)) reader_pool = [] processor_pool = pp.Pool(n_cpus) #for calculations on symmetric matricies (eRMSD) #can't just hand each line to the parallelizer if ("matrix", True) in kwargs.items(): total_calculations = sum([(num_confs - i) for i in range(1, num_confs)]) calcs_per_cpus = total_calculations / n_cpus split_ends = [] i = 0 while i < num_confs: e = 0 calcs = 0 while calcs < calcs_per_cpus: calcs += num_confs - i e += 1 i += 1 if i >= num_confs: break split_ends.append(e) #define sizes of trajectory chunks else: split_ends = [confs_per_processor for _ in range(n_cpus)] split_ends[ -1] += num_confs % n_cpus #last chunk gets all the leftovers #now figure out which configuration each chunk starts on split_starts = [0] for i in range(n_cpus): reader_pool.append(LorenzoReader2(traj_file, top_file)) #rint(split_starts[i-1], split_ends[i-1]) if i != 0: split_starts.append(split_starts[i - 1] + split_ends[i - 1]) #staple everything together, send it out to the workers, and collect the results as a list results = [] lst = [(r, *args, num_confs, s, e) for r, s, e in zip(reader_pool, split_starts, split_ends)] results = processor_pool.starmap_async(function, lst).get() processor_pool.close() return (results)
def get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs, start=None, stop=None): if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) confid = 0 system1 = r1._get_system(N_skip=start) system2 = r2._get_system(N_skip=start + 1) eRMSDs = np.zeros((num_confs, num_confs)) i = start j = start + 1 while system1 != False and confid < stop: print("working on configuration", i, "t =", system1._time) system1.inbox_system() mat1 = calc_matrix(system1, inputfile) while system2: print("working on configuration", i, "compared to", j) system2.inbox_system() mat2 = calc_matrix(system2, inputfile) eRMSDs[i][j] = calc_eRMSD(mat1, mat2) system2 = r2._get_system() j += 1 i += 1 j = i + 1 confid += 1 system1 = r1._get_system() r2 = LorenzoReader2(traj_file, top_file) system2 = r2._get_system(N_skip=j) return (eRMSDs)
def main(): #doesn't actually do anything... import argparse from UTILS.readers import LorenzoReader2, get_input_parameter parser = argparse.ArgumentParser( description= "A python wrapper for getting all vectors between nucleotides from a simulation" ) parser.add_argument('inputfile', type=str, nargs=1, help="The inputfile used to run the simulation") parser.add_argument( 'trajectory', type=str, nargs=1, help= "The file containing the configurations of which the contact map is needed" ) args = parser.parse_args() from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy"]) inputfile = args.inputfile[0] traj_file = args.trajectory[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import UTILS.base #this needs to be imported after the model type is set r = LorenzoReader2(traj_file, top_file) system = r._get_system() while system: m = all_vectors(inputfile, system, True) system = r._get_system() print("well, it finished...")
traj_file = args.trajectory[0] inputfile = args.inputfile[0] parallel = args.parallel if parallel: n_cpus = args.parallel[0] top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" num_confs = cal_confs(traj_file) import UTILS.base #this needs to be imported after the model type is set r2 = LorenzoReader2(traj_file, top_file) #how do you want to get your eRMSDs? Do you need to do the time-consuming calculation or is it done and you have a pickle? if not parallel: r1 = LorenzoReader2(traj_file, top_file) eRMSDs = get_eRMSDs(r1, r2, inputfile, traj_file, top_file, num_confs) if parallel: out = parallelize.fire_multiprocess(traj_file, top_file, get_eRMSDs, num_confs, n_cpus, r2, inputfile, traj_file,
else: outfile = "forces.txt" print( "INFO: No outfile name provided, defaulting to \"{}\"".format(outfile), file=stderr) if args.pairs: pairsfile = args.pairs[0] else: pairsfile = False #Get relevant parameters from the input file top_file = get_input_parameter(inputfile, "topology") #get base pairs r = LorenzoReader2(conf_file, top_file) mysystem = r._get_system() out = output_bonds(inputfile, mysystem) out = out.split('\n') #Find out the forming bonds series print("INFO: Analyze the output...", file=stderr) Bonded = {} for i in out: if i[0] == '#': continue splitline = i.split(' ') try: HB = float(splitline[6]) except: continue
traj_file = args.trajectory[0] inputfile = args.inputfile[0] try: outfile = args.outfile[0] visualize = True except: visualize = False top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import UTILS.base #this needs to be imported after the model type is set myreader = LorenzoReader2(traj_file, top_file) mysystem = myreader._get_system() energies = np.zeros(mysystem.N) count = 0 while mysystem != False: out = output_bonds(inputfile, mysystem) if visualize: for line in out.split('\n'): if not line.startswith('#'): line = [float(l) for l in line.split(' ')] energies[int(line[0])] += sum(line[2:]) energies[int(line[1])] += sum(line[2:]) else: print(out)
environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import UTILS.base #this needs to be imported after the model type is set num_confs = cal_confs(traj_file) if mean_file.split(".")[-1] == "json": with open(mean_file) as file: align_conf = load(file)['g_mean'] elif mean_file.split(".")[-1] == "dat": fetch_np = lambda conf: np.array([ n.cm_pos for n in conf._nucleotides ]) with LorenzoReader2(mean_file, top_file) as reader: s = reader._get_system() align_conf = fetch_np(s) cms = compute_cms(align_conf) #all structures must have the same center of mass align_conf -= cms #Compute the deviations if not parallel: r = LorenzoReader2(traj_file,top_file) deviations_matrix = get_pca(r, align_conf, num_confs) if parallel: out = parallelize.fire_multiprocess(traj_file, top_file, get_pca, num_confs, n_cpus, align_conf) deviations_matrix = np.concatenate([i for i in out])
def split_trajectory(traj_file, inputfile, labs, n_clusters): """ Splits the trajectory into the clustered trajectories Parameters: traj_file (str): The analyzed trajectory file. inputfile (str): The input file used to run the analyzed simulation. labs (numpy.array): The cluster each point belongs to. """ top_file = get_input_parameter(inputfile, "topology") print ("cluster\tmembers") #energies = [] #H_counts = [] for cluster in (set(labs)): in_cluster = list(labs).count(cluster) print ("{}\t{}".format(cluster, in_cluster)) #energies.append([]) #H_counts.append([]) #for making trajectories of each cluster try: remove("cluster_"+str(cluster)+".dat") except: pass confid = 0 r1 = LorenzoReader2(traj_file, top_file) system = r1._get_system() print ("INFO: splitting trajectory...", file=stderr) print ("INFO: Will write cluster trajectories to cluster_<cluster number>.dat", file=stderr) while system != False: system.print_traj_output("cluster_"+str(labs[confid])+".dat", "/dev/null") ########### #If you want to get additional information about a cluster, add that code here #for example, if you want average energy and hydrogen bonds: ''' energies[labs[confid]].append(0) H_counts[labs[confid]].append(0) system.map_nucleotides_to_strands() out = output_bonds(inputfile, system) for line in out.split('\n'): if line[0] != '#' and line[0] != '\n': line = line.split(" ") for m in line[2:9]: energies[labs[confid]][-1] += float(m) if float(line[6]) != 0: H_counts[labs[confid]][-1] += 1 energies[labs[confid]][-1] /= len(system._nucleotides) ''' ############ confid += 1 system = r1._get_system() #This is where you print the information about each cluster '''
type=str, nargs=1, help="The reference configuration to superimpose to") parser.add_argument('victims', type=str, nargs='+', help="The configuraitons to superimpose on the reference") args = parser.parse_args() #Get the reference files top_file = args.topology[0] ref_dat = args.reference[0] #Create list of configurations to superimpose to_sup = [] r = LorenzoReader2(ref_dat, top_file) ref = r._get_system() ref.inbox() ref_conf = fetch_np(ref) for i in args.victims: r = LorenzoReader2(i, top_file) sys = r._get_system() to_sup.append(sys) sup = SVDSuperimposer() #Run the biopython superimposer on each configuration and rewrite its configuration file for i, sys in enumerate(to_sup): cur_conf = fetch_np(sys) sup.set(ref_conf, cur_conf) sup.run()
outfile), file=stderr) #Get relevant parameters from the input file top_file = get_input_parameter(inputfile, "topology") if "RNA" in get_input_parameter(inputfile, "interaction_type"): environ["OXRNA"] = "1" else: environ["OXRNA"] = "0" import UTILS.base #this needs to be imported after the model type is set #Calculate the number of configurations. num_confs = cal_confs(traj_file) r0 = LorenzoReader2(traj_file, top_file) r0._get_system() #launch find_angle using the appropriate number of threads to find all duplexes. if not parallel: print( "INFO: Fitting duplexes to {} configurations using 1 core.".format( num_confs), file=stderr) r = LorenzoReader2(traj_file, top_file) duplexes_at_step = find_angles(r, num_confs) if parallel: print("INFO: Fitting duplexes to {} configurations using {} cores.". format(num_confs, n_cpus), file=stderr)