def RNAstructure_sample_process(worker_num, in_file_prefix, output_dir, e, seed, wn_tag="", lock=None): """ Process used in RNAstructure_sample. Called from RNAstructure_sample_process_helper. """ wn = str(worker_num) + wn_tag print "Worker num: " + wn if lock is not None: lock.acquire() runRNAstructure_stochastic(in_file_prefix + ".pfs", output_dir + wn + "temp.ct", e=e, seed=seed, parallel=False) if lock is not None: lock.release() structs = get_ct_structs(output_dir + wn + "temp.ct") structs_str = [",".join(s) for s in structs] OSU.remove_file(output_dir + wn + "temp.ct") return structs_str
def write_R2D2_output_to_files(reactivities_prefix, R2D2_pairs, R2D2_consensus, R2D2_consensus_ct, react_rhos, crystals_mat, crystals_ctfile, crystals_ct, cryst_seq): # Write out results of R2D2 iterations with open("%s_R2D2_pairs.txt" % (reactivities_prefix), "w") as f: f.write("\n".join([ "\t".join([str(bp) for bp in row]) for row in R2D2_pairs.tolist() ]) + "\n") with open("%s_R2D2_consensus.txt" % (reactivities_prefix), "w") as f: f.write("\n".join([ "\t".join([str(bp) for bp in row]) for row in R2D2_consensus.tolist() ]) + "\n") with open("%s_R2D2_consensus.stats" % (reactivities_prefix), "w") as f: f.write( str( SU.calc_benchmark_statistics_matrix(R2D2_consensus, crystals_mat))) write_reactivities_in_ct( SU.binary_mat_to_binary_ct(R2D2_consensus), react_rhos, reactivities_prefix + "_R2D2_consensus_ct_react.txt") SU.ct_list_to_file(R2D2_consensus_ct, cryst_seq, "%s_R2D2_consensus.ct" % (reactivities_prefix)) SU.runRNAstructure_CircleCompare( "%s_R2D2_consensus.ct" % (reactivities_prefix), crystals_ctfile, "%s_R2D2_consensus.ps" % (reactivities_prefix)) OSU.system_command("convert %s_R2D2_consensus.ps %s_R2D2_consensus.jpg" % (reactivities_prefix, reactivities_prefix)) write_reactivities_in_ct(crystals_ct, react_rhos, reactivities_prefix + "_crystal_ct_react.txt")
def convert_center(image): """ Centers image. """ OSU.system_command("convert %s -background none -gravity Center %s.temp" % (image, image)) os.rename(image + ".temp", image)
def load_train_model(rho_midpoint, constrain_val, paired_weight, reactivities, crystals, sample_n, react_rhos, structs_pickle_dir, output_dir, out_stat_dir, outname, scaling_func, cap_rhos, shape_slope, shape_intercept): """ Runs the analysis of a (rho_midpoint, constrain_val, paired_weight) triple by loading in the sampled structures from sampling with hard constraints and constrain_val. """ constrained_folds = {} # Obselete since we use 1 reactivity at a time for k in reactivities: if isinstance(reactivities[k][3], list): constrained_folds[k] = reactivities[k][3] else: constrained_folds[k] = [] out_param_dir = "/".join([ out_stat_dir, "_".join( [str(constrain_val), str(rho_midpoint), str(paired_weight)]) ]) + "/" OSU.create_directory(out_param_dir) if constrain_val != "no_constrained": # load sampled folds with hard constrain c constrained_structs_dict = pickle.load( open( "%sconstrained_folds_%s.p" % (structs_pickle_dir, str(constrain_val)), "rb")) # Obselete since we use 1 reactivity at a time for k in reactivities: constrained_structs = set(constrained_structs_dict[k]) constrained_structs.update(set(constrained_folds[k])) constrained_structs = set( SU.merge_labels(list(constrained_structs), to_string=False)) constrained_folds[k] = [(s.split(","), l) for s, l in constrained_structs] # call training routine stats_dict, min_dist_ind_dict, F_score = train_constraint_model( crystals, constrained_folds, constrain_val, react_rhos, rho_midpoint, out_stat_dir, outname + str(rho_midpoint), weight=paired_weight, scaling_func=scaling_func, cap_rhos=cap_rhos) return [[rho_midpoint, constrain_val, paired_weight, F_score]]
def run_Fold(seqfile, reactivities_prefix, react_rhos, num_proc, crystals_mat, crystals_ctfile, output_suffix, shape_direct=False, shape_slope=1.1, shape_intercept=-0.3): """ RNAstructure-Fold process Will handle both SHAPE-directed and not SHAPE-directed """ if lock is not None: lock.acquire() if shape_direct: SU.runRNAstructure_fold(seqfile, "%s_%s.ct" % (reactivities_prefix, output_suffix), shapefile=reactivities_prefix + ".rho", p=num_proc, shape_intercept=shape_intercept, shape_slope=shape_slope) else: SU.runRNAstructure_fold(seqfile, "%s_%s.ct" % (reactivities_prefix, output_suffix), p=num_proc, shape_intercept=shape_intercept, shape_slope=shape_slope) SU.runRNAstructure_CircleCompare( "%s_%s.ct" % (reactivities_prefix, output_suffix), crystals_ctfile, "%s_%s.ps" % (reactivities_prefix, output_suffix)) if lock is not None: lock.release() OSU.system_command("convert %s_%s.ps %s_%s.jpg" % (reactivities_prefix, output_suffix, reactivities_prefix, output_suffix)) with open("%s_%s.stats" % (reactivities_prefix, output_suffix), "w") as f: fold_shape_ct = SU.get_ct_structs( "%s_%s.ct" % (reactivities_prefix, output_suffix))[0] fold_shape_react_mat = SU.ct_struct_to_binary_mat(fold_shape_ct) f.write( str( SU.calc_benchmark_statistics_matrix(fold_shape_react_mat, crystals_mat))) write_reactivities_in_ct( fold_shape_ct, react_rhos, "%s_%s_ct_react.txt" % (reactivities_prefix, output_suffix)) return fold_shape_ct, fold_shape_react_mat
def convert_center_resize(image, res): """ Centers image and resizes. """ try: print "convert %s -alpha discrete -blur 0x1 -background none -gravity Center -extent %s %s.temp" % ( image, res, image) OSU.system_command( "convert %s -alpha discrete -blur 0x1 -background none -gravity Center -extent %s %s.temp" % (image, res, image)) os.rename(image + ".temp", image) except: print "convert %s -background none -gravity Center -extent %s %s.temp" % ( image, res, image) OSU.system_command( "convert %s -background none -gravity Center -extent %s %s.temp" % (image, res, image)) os.rename(image + ".temp", image)
def wait_for_jcoll(jcoll, out_dir, wait=30): """ Makes current process wait until no jobs are left in a job collection jcoll """ wait_jlist = True while wait_jlist: jlist_jcoll = subprocess.Popen( ['/opt/voyager/nbs/bin/jlist', '-jcoll', jcoll], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = jlist_jcoll.communicate() if "listempty" in out: wait_jlist = False break OSU.system_command( "echo \"waiting\n%s\n%s\n\" >> %sjcoll_waiting.txt" % (out, err, out_dir)) time.sleep(wait) OSU.system_command("echo \"%s\n\" >> %sjcoll_waiting.txt" % (time.localtime(), out_dir))
def generate_DG_output(cotrans, start=-1, end=-1): """ Generate DG state plots and .dump file """ if start == -1: start = sorted(cotrans.file_data)[0] if end == -1: end = sorted(cotrans.file_data)[-1] print "generate_DG_output: " + str(start) + " " + str(end) with open(cotrans.output_dir + "/DG_state_plot.dump", 'w') as dump: dump.write("nt\tDG\tmfe_flag\tbest_flag\tdistance\trc_flag\n") for length in sorted(cotrans.file_data): DG = cotrans.file_data[length]["free_energies"] min_DG = min(DG) best = cotrans.file_data[length][ "min_dist_indices"] # list of struct_num of min_distance line = [ "\t".join([ str(length), # nt str(dg), # DG str(int(min_DG == dg)), # mfe_flag str(int(c in best)), # best_flag str(cotrans.file_data[length]["distances"][c]), # distance str(cotrans.file_data[length]["rc_flag"]) ]) # rc_flag for dg, c in zip(DG, range(len(DG))) ] dump.write("\n".join(line)) dump.write("\n") print "R < make_DG_state_plot.R --no-save --args %s/DG_state_plot.pdf %s/DG_state_plot.dump %s %s" % ( cotrans.output_dir, cotrans.output_dir, start, end) OSU.system_command( "R < make_DG_state_plot.R --no-save --args %s/DG_state_plot.pdf %s/DG_state_plot.dump %s %s" % (cotrans.output_dir, cotrans.output_dir, start, end)) return
def output_train_model_stats(all_diffs_stats, min_dist_struct_indices, F_score, crystals, reactivities_structs, output_dir): """ Output summary of the benchmarking """ # Pickle benchmark statistics and indices of structures with the minimum distance with open(output_dir + "/save_all_diffs_stats.p", "wb") as f: pickle.dump(all_diffs_stats, f) with open(output_dir + "/save_min_dist_struct_indices.p", "wb") as f: pickle.dump(min_dist_struct_indices, f) # Generate circle compare diagrams of minimum distance structures for k in sorted(reactivities_structs.keys()): ck = k.split('-')[ 0] # corresponding crystal key from the reactivity key for mdi in min_dist_struct_indices[k]: outpre = output_dir + "/" + k + str(mdi) SU.ct_list_to_file(reactivities_structs[k][mdi][0], crystals[ck][1], outpre + ".ct") SU.runRNAstructure_CircleCompare(outpre + ".ct", crystals[ck][3], outpre + ".ps") OSU.system_command("convert %s.ps %s.jpg" % (outpre, outpre)) # Write out information on highest F score structures with open(output_dir + "/diffs_best_F.txt", 'w') as f: f.write("Max avg F score: " + str(F_score) + "\n") for k in sorted(reactivities_structs.keys()): reactivities_labels = [ reactivities_structs[k][x][1] for x in min_dist_struct_indices[k] ] f.write("Methods: " + str(k) + "\t" + str(reactivities_labels) + "\n") for k, stat_i in all_diffs_stats.items(): f.write("Structure key: %s\n" % (k)) f.write(str(stat_i) + "\n") return
def generate_output(self): """ Majority of Cotranscriptional SHAPE-Seq output is created here. This includes the DG plot, best structure images, and the movie of the best structures. """ draw_dir = OSU.create_directory(self.output_dir + "/draw/") nn_dir = OSU.create_directory(self.output_dir + "/nn/") OSU.create_directory(nn_dir + "distances/") sorted_lengths = sorted(self.file_data) zero_padding = int(math.floor(math.log10(sorted_lengths[-1])) + 1) # Parallelized function calls to generate DG plot, distance matrices for clustering of structures, and creating images of minimum distance structures draw_struct_nums = [ length for length in sorted_lengths if self.file_data[length]["rc_flag"] ] draw_args_pool = zip([self.file_data[dsn] for dsn in draw_struct_nums], draw_struct_nums, repeat(sorted_lengths[-1]), range(1, len(draw_struct_nums) + 1), repeat(zero_padding), repeat(draw_dir), repeat(self.output_dir + "/ct/"), repeat(self.draw_all), repeat(self.most_count_tie_break)) args_pool = [ (PCSU.generate_DG_output, (self, 1, sorted_lengths[-1])) ] + zip(repeat(PCSU.generate_best_struct_images), draw_args_pool) if self.p == 1: for i in range(len(args_pool)): PCSU.calculate_function_helper(args_pool[i]) else: PCSU.generate_DG_output( self, 1, sorted_lengths[-1] ) # moved this outside of multiprocessing because Quest has issues running it in a pool PCSU.run_output_multiprocessing_pool( PCSU.calculate_function_helper, args_pool[1:], self.p) if not OSU.check_file_exists( self.output_dir + "/DG_state_plot.pdf" ): # Weird error on quest that it will ignore this command if sample size is very large PCSU.generate_DG_output(self, 1, sorted_lengths[-1]) # Use ffmpeg on varna_num.png's to create video of the minimum distance folding pathway OSU.make_symbolic_link( draw_dir + str(len(draw_struct_nums)).zfill(zero_padding) + "_structure.png", draw_dir + str(len(draw_struct_nums) + 1).zfill(zero_padding) + "_structure.png") # ffmpeg needs a duplicate of the last frame VIU.generate_movie(draw_dir + "%%%dd_structure.png" % (zero_padding), self.output_dir + "/movie.mp4") return
def read_all_dbn_dirs(dbn_dirs): """ Read dbns in multiple directories to pair correct lengths together. Returns a dictionary of .png files name that should have been made by R2D2 organized by length. """ all_dbns = defaultdict(list) for dbn_dir in dbn_dirs: dbns = glob.glob(dbn_dir + "*.dbn") for dbn_f in dbns: with open(dbn_f, "r") as f: lines = f.readlines() length = len(lines[-1].split()[0]) image = re.sub('(_mult\d+)?.dbn', '_structure.png', dbn_f) if image not in all_dbns[length] and OSU.check_file_exists(image): all_dbns[length].append(image) return all_dbns
def merge_labels(list_sl, to_string=True): """ Merges labels of a list of tuples where the second element in the tuple is the label. to_String: flag if True, then the structs are turned into a string. Else, structs are kept as a comma separated list. """ sampled_structs_dict = {} for e in list_sl: struct_string = e[0] if isinstance(e[1], basestring): labels = [e[1]] else: labels = list(set(OSU.flatten_list([b.split(",") for b in e[1]]))) if to_string: struct_string = ",".join( e[0] ) #JBL Q: this is flattening the struct_string to a string, not the labels as indicated in documentation above? #AMY: Yes, fixed it. for l in labels: if struct_string not in sampled_structs_dict: sampled_structs_dict[struct_string] = [l] elif l not in sampled_structs_dict[struct_string]: sampled_structs_dict[struct_string].append(l) return list([(k, ",".join(sampled_structs_dict[k])) for k in sampled_structs_dict])
def generate_movie(regex, outfile, size="1200x2800"): """ Generate a movie with images as described by regex. """ if size != "": try: print "ffmpeg -r 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -s " + size + " -pix_fmt yuv420p " + outfile OSU.system_command("ffmpeg -r 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -s " + size + " -pix_fmt yuv420p " + outfile) except: print "ffmpeg -framerate 1 -i " + regex + " -c:v libx264 -r 10 -s " + size + " -pix_fmt yuv420p " + outfile OSU.system_command("ffmpeg -framerate 1 -i " + regex + " -c:v libx264 -r 10 -s " + size + " -pix_fmt yuv420p " + outfile) else: print "ffmpeg -framerate 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -pix_fmt yuv420p " + outfile OSU.system_command("ffmpeg -framerate 1 -i " + regex + " -vcodec mpeg4 -b 800k -r 10 -pix_fmt yuv420p " + outfile)
def wait_jcoll_finish_any(jcoll, out_dir, max_jobs, wait=30): """ Waits until any job in the job collection is done before returning the number of available jobs """ wait_jlist = True while wait_jlist: num_running = count_jcoll_remaining(jcoll, out_dir) if num_running < max_jobs: wait_jlist = False OSU.system_command( "echo \"jobs available\n%s\n%s\n\" >> %sjcoll_waiting.txt" % (num_running, max_jobs, out_dir)) break OSU.system_command( "echo \"waiting jobs available\n%s\n%s\n\" >> %sjcoll_waiting.txt" % (num_running, max_jobs, out_dir)) time.sleep(wait) OSU.system_command("echo \"%s\n\" >> %sjcoll_waiting.txt" % (time.localtime(), out_dir)) return max_jobs - num_running
Copyright (C) 2017 Julius B. Lucks, Angela M Yu. All rights reserved. Distributed under the terms of the GNU General Public License, see 'LICENSE'. """ import LucksLabUtils_config import OSU import SU import glob import re LucksLabUtils_config.config("Quest_R2D2") # set up environment # parse command line arguments opts = OSU.getopts("", [ "KineFold_dir=", "outdir=", "KineFold_times=", "seq_name=", "time=", "pseudoknots=", "entanglements=", "speed=", "sequence=" ]) print opts KineFold_dir = opts["--KineFold_dir"] outdir = opts["--outdir"] KF_times = int(opts["--KineFold_times"]) if "--KineFold_times" in opts else 0 seq_name = opts["--seq_name"] if "--seq_name" in opts else "test" time = opts["--time"] if "--time" in opts else 160000 pseudoknots = bool( opts["--pseudoknots"] == "True") if "--pseudoknots" in opts else False entanglements = bool( opts["--entanglements"] == "True") if "--entanglementss" in opts else False speed = opts["--speed"] if "--speed" in opts else 20 sequence = opts["--sequence"] if "--sequence" in opts else "" # create directories
Copyright (C) 2016, 2017 Julius B. Lucks, Angela M Yu. All rights reserved. Distributed under the terms of the GNU General Public License, see 'LICENSE'. """ import LucksLabUtils_config import VIU import OSU import SU import re LucksLabUtils_config.config("Quest_R2D2") # set up environment # parse command line arguments opts = OSU.getopts("", [ "seq=", "seq_start=", "seq_end=", "outdir=", "rhos_dir=", "SHAPE_direct=", "make_DG_dump=" ]) print opts seq = opts["--seq"] outdir = opts["--outdir"] seq_start = int(opts["--seq_start"]) if "--seq_start" in opts else -1 seq_end = int(opts["--seq_end"]) if "--seq_end" in opts else -1 rhos_dir = opts["--rhos_dir"] if "--rhos_dir" in opts else "" SHAPE_direct = bool( opts["--SHAPE_direct"] == "True") if "--SHAPE_direct" in opts else False make_DG_dump = bool( opts["--make_DG_dump"] == "True") if "--make_DG_dump" in opts else False # generate MFE movie VIU.generate_MFE_CoTrans_movie(seq, outdir, seq_start, seq_end, rhos_dir, SHAPE_direct)
Version: 0.0.1 Author: Angela M Yu, 2014-2017 Copyright (C) 2017 Julius B. Lucks and Angela M Yu. All rights reserved. Distributed under the terms of the GNU General Public License, see 'LICENSE'. """ import SU import OSU import LucksLabUtils_config import glob # setup environment variables LucksLabUtils_config.config("Quest_R2D2") opts = OSU.getopts("o:", ["dbn_dir=", "out_prefix="]) print opts output_dir = OSU.create_directory(opts['-o']) dbn_dir = opts['--dbn_dir'] out_prefix = opts['--out_prefix'] dbns = {} for dbnf in glob.glob(dbn_dir + "/*.dbn"): # read in each rho reactivitiy spectra with open(dbnf, "r") as f: dbn = f.readlines()[-1].strip() # last line with dotbracket SU.run_dot2ct(dbnf, output_dir + "temp.ct") SU.runRNAstructure_efn2(output_dir + "temp.ct", output_dir + "temp.efn2", parallel=False)
def run(self): """ The main routine of R2D2. Parses reactivities output from Spats and controls the calls to PCSU. """ max_best_states = -1 # max number of best states across the lengths OSU.create_directory(self.output_dir) ct_dir = OSU.create_directory( self.output_dir + "/ct/" ) #JBL - extra // in this directory name # AMY - did this on purpose in case user forgets a trailing '/' pickle_dir = OSU.create_directory( self.output_dir + "/pickles/" ) #JBL - extra // in this directory name # AMY - did this on purpose in case user forgets a trailing '/' infiles = glob.glob(self.input_dir + "/*_reactivities.txt") # Pre-processing all input reactivities files - trimming adapter, recalculating thetas, calculating rhos max_best_states = 0 rhos = {} rhos_cut = {} #JBL TODO - check for 3 input files # Set up and run parallized calculations on each length args_pool = zip(infiles, repeat(self.output_dir), repeat(ct_dir), repeat(pickle_dir), repeat(self.adapterseq), repeat(self.endcut), repeat(self.pol_fp), repeat(self.e), repeat(self.constrained_c), repeat(self.cap_rhos), repeat(self.scale_rho_max), repeat(self.scaling_func), repeat(self.weight_paired)) print "run args_pool length: " + str(len(args_pool)) if self.p > 1: # start pool if multithread #JBL TODO - check multithread with 3 input files pool = Pool(processes=self.p) for length_key, file_data_length_key, struct_distances_length, num_min_states, rho, rho_cut in pool.imap( PCSU.run_cotrans_length_helper, args_pool): print "done length_key: " + str(length_key) if max_best_states < num_min_states: max_best_states = num_min_states self.file_data[length_key] = file_data_length_key self.struct_distances[length_key] = struct_distances_length rhos[length_key + self.endcut] = "\t".join([str(r) for r in rho]) + "\n" rhos_cut[length_key + self.endcut] = "\t".join([str(r) for r in rho_cut]) + "\n" else: # no multiprocessing for args_slice in args_pool: length_key, file_data_length_key, struct_distances_length, num_min_states, rho, rho_cut = PCSU.run_cotrans_length_helper( args_slice) print "done length_key: " + str(length_key) if max_best_states < num_min_states: max_best_states = num_min_states self.file_data[length_key] = file_data_length_key self.struct_distances[length_key] = struct_distances_length rhos[length_key + self.endcut] = "\t".join([str(r) for r in rho]) + "\n" rhos_cut[length_key + self.endcut] = "\t".join([str(r) for r in rho_cut]) + "\n" # Output the rho reactivity matrix with open(self.output_dir + "rho_table.txt", 'w') as f: print "sorted(rhos): " + str(len(rhos.keys())) for key in sorted(rhos): f.write(rhos[key]) with open(self.output_dir + "rho_table_cut.txt", 'w') as f: print "sorted(rhos): " + str(len(rhos_cut.keys())) for key in sorted(rhos_cut): f.write(rhos_cut[key]) # organizing files into their respective directories for file_ext in ["rho", "theta", "seq", "pfs", "con", "efn2"]: OSU.create_directory(self.output_dir + file_ext + "_dir/") OSU.system_command( "mv %s/*%s %s/%s_dir/" % (self.output_dir, file_ext, self.output_dir, file_ext)) #import ipdb; ipdb.set_trace() #JBL- entering debugging here - breakpoint 1 self.generate_output() # generate majority of output
all_dbns = defaultdict(list) for dbn_dir in dbn_dirs: dbns = glob.glob(dbn_dir + "*.dbn") for dbn_f in dbns: with open(dbn_f, "r") as f: lines = f.readlines() length = len(lines[-1].split()[0]) image = re.sub('(_mult\d+)?.dbn', '_structure.png', dbn_f) if image not in all_dbns[length] and OSU.check_file_exists(image): all_dbns[length].append(image) return all_dbns if __name__ == "__main__": # read in arguments opts = OSU.getopts("", ["dbn_dirs=", "output_dir="]) dbn_dirs = opts["--dbn_dirs"].split(",") width = 1200 * len(dbn_dirs) output_dir = OSU.create_directory(opts["--output_dir"]) # read dbns to pair correct lengths together all_dbns = read_all_dbn_dirs(dbn_dirs) # create images by horizontally concatenating previously made images from R2D2 output count = 0 zero_padding = int(math.floor(math.log10(len(all_dbns))) + 1) for len in sorted(all_dbns): count += 1 VIU.horizontal_image_concat( "%s/%s.png" % (output_dir, str(count).zfill(zero_padding)), all_dbns[len])
All rights reserved. Distributed under the terms of the GNU General Public License, see 'LICENSE'. """ import OSU import LucksLabUtils_config import VIU import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from seaborn import color_palette import numpy as np LucksLabUtils_config.config("Quest_R2D2") opts = OSU.getopts("", [ "Y_first_sampled=", "MDS_ct_coords=", "MDS_mat_coords=", "PCA_coords=", "outfile_pre=", "output_dir=", "first_color_dict=" ]) print opts filename_Y_first_sampled = opts["--Y_first_sampled"] filename_MDS_ct_coords = opts["--MDS_ct_coords"] filename_MDS_mat_coords = opts["--MDS_mat_coords"] filename_PCA_coords = opts["--PCA_coords"] outfile = opts["--outfile_pre"] output_dir = OSU.create_directory(opts["--output_dir"]) filename_first_color_dict = opts["--first_color_dict"] # plotting setup plt.style.use('seaborn-whitegrid') fig = plt.figure(figsize=(3, 3), dpi=300) # load input
def horizontal_image_concat(outfile, images): """ Horizontal concatenate images to a new image file. """ OSU.system_command("convert +append " + " ".join(images) + " " + outfile)
def vertical_image_concat(outfile, images): """ Vertical concatenate images to a new image file. """ OSU.system_command("convert -append " + " ".join(images) + " " + outfile)
""" from __future__ import division # allows division of ints to output decimals import LucksLabUtils_config import OSU import SU import glob import re import numpy from collections import defaultdict LucksLabUtils_config.config("Quest_R2D2") # set up environment # parse command line arguments opts = OSU.getopts("", [ "KineFold_dir=", "out_dir=", "time_weight", "simulation_time_ms=", "last_structure" ]) print opts KineFold_dir = opts["--KineFold_dir"] outdir = OSU.create_directory(opts["--out_dir"]) time_weight = True if "--time_weight" in opts else False last_structure = True if "--last_structure" in opts else False simulation_time_ms = int( opts["--simulation_time_ms"]) if "--simulation_time_ms" in opts else -1 assert int(time_weight) + int(last_structure) <= 1, ( "Only can specify either time_weight OR last_structure") # From Paul Gasper's pairs_from_dbn_2.py def read_dbn(dbn_fn):
import re from itertools import repeat from multiprocessing import Pool, Lock import numpy from sys import maxsize from sklearn.decomposition import PCA from sklearn.manifold import MDS from sklearn.preprocessing import scale from sklearn.metrics import pairwise_distances import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt # setup environment variables LucksLabUtils_config.config("Quest_R2D2") opts = OSU.getopts("o:c:r:p:", ["shape_intercept=", "shape_slope="]) print opts numpy.set_printoptions(threshold=maxsize) plt.style.use('seaborn-whitegrid') fig = plt.figure(figsize=(8, 8)) reactivities_files = glob.glob(opts['-r']) crystal_files = glob.glob(opts['-c']) output_dir = OSU.create_directory(opts['-o']) R2D2_output_dir = OSU.create_directory(output_dir + "/R2D2_intermediate/") num_proc = int(opts['-p']) if '-p' in opts else 1 shape_intercept = float(opts["--shape_intercept"]) \ if "--shape_intercept" in opts else -0.3 shape_slope = float(opts["--shape_slope"]) if "--shape_slope" in opts else 1.1 reactivities = PAU.parse_input_panels(reactivities_files, output_dir)
Version: 0.0.1 Author: Angela M Yu, 2014-2016 Copyright (C) 2016 Julius B. Lucks and Angela M Yu. All rights reserved. Distributed under the terms of the GNU General Public License, see 'LICENSE'. """ import OSU import LucksLabUtils_config from collections import defaultdict # setup environment variables LucksLabUtils_config.config("Quest_R2D2") opts = OSU.getopts("o:", [ "3_times_dirs=", "50_times_dir=", "47_times_dir=", "100_times_dir=", "file_prefix=" ]) print opts output_dir = OSU.create_directory(opts['-o']) file_prefix = opts["--file_prefix"] if "--100_times_dir" in opts: times_dirs = [ "%s/%s%s/" % (opts["--100_times_dir"], file_prefix, i) for i in range(1, 101) ] elif "--3_times_dirs" in opts and "--50_times_dir" in opts and "--47_times_dir" in opts: times_dirs = opts["--3_times_dirs"].split(",") times_dirs += [ "%s/%s%s/" % (opts["--50_times_dir"], file_prefix, i)
def generate_MFE_CoTrans_movie(seq, outdir, seq_start=-1, seq_end=-1, rhos_dir="", SHAPE_direct=False): """ Generate co-transcriptional MFE folding movie. Options to start and end at specific lengths, seq_start and seq_end respectively. Can overlay rho reactivities if given a directory with .rho files corresponding to the sequence. """ OSU.create_directory(outdir) OSU.create_directory(outdir + "/seq/") OSU.create_directory(outdir + "/ct/") if seq_start == -1: seq_start = 0 if seq_end == -1: seq_end = len(seq) else: seq_end += 1 zero_padding = int(math.floor(math.log10(seq_end)) + 1) varna_num = 0 rhos = {} if rhos_dir != "": # reads through .rho files found in rhos_dir for rf in glob.glob(rhos_dir + "/*.rho"): # read in each rho reactivitiy spectra with open(rf, "r") as f: rho = [line.split()[1] for line in f.readlines()] rhos[len(rho)] = [rho, rf] # add in rho file here for seqi in range(seq_start + 1, seq_end + 1): if seqi in rhos: rho_varna = "\"" + ";".join(rhos[seqi][0] + (["-1"] * (seq_end - seqi))) + "\"" else: rho_varna = "\"" + ";".join(["-1"] * (seq_end)) + "\"" seqf = outdir + "/seq/" + str(seqi) + ".seq" ctf = outdir + "/ct/" + str(seqi) + ".ct" NAU.make_seq(seq[seq_start:seqi], seqf) if SHAPE_direct and seqi in rhos: SU.runRNAstructure_fold(seqf, ctf, rhos[seqi][1]) elif SHAPE_direct: continue else: SU.runRNAstructure_fold(seqf, ctf) SU.run_ct2dot(ctf, 0, "temp.dbn") OSU.system_command("sed '$s/$/&%s/' temp.dbn > temp_ext.dbn " % ("." * (seq_end - seqi))) varna_num += 1 run_VARNA( "temp_ext.dbn", outdir + str(varna_num).zfill(zero_padding) + "_structure.png", rho_varna) convert_center_resize( outdir + str(varna_num).zfill(zero_padding) + "_structure.png", "1440x2000") OSU.remove_file(outdir + "temp.dbn") OSU.remove_file(outdir + "temp_ext.dbn") generate_movie(outdir + "%%%dd_structure.png" % (zero_padding), outdir + "/movie.mp4", "")
def R2D2_process(input_prefix, R2D2_output_dir, draw_dir, react_rhos, crystals_ck, rnum): """ Slightly reduced version of a R2D2 process for this benchmarking code. # taking code from cotranscriptional case (PCSU.run_cotrans_length) which has extraneous parts in this use case # few lines in PCSU.run_cotrans_length made it unable to be used for this case. ex. length_key Options: input_prefix - full path plus input prefix of reactivities R2D2_output_dir - R2D2 output directoory draw_dir - directory for .dbn output react_rhos - rho reactivities rnum - iteration number, names output files accordingly """ e = 50000 fname = re.findall("([^/]+)$", input_prefix)[0] output_prefix = "%s/%s_%s" % (R2D2_output_dir, fname, rnum) scaling_fns = { "D": SU.invert_scale_rho_vec, "U": SU.scale_vec_avg1, "K": SU.cap_rho_or_ct_list } scaling_func = "K" scale_rho_max = 1.0 constrained_c = 3.5 cap_rhos = True weight_paired = 0.8 sampled_structs_count = defaultdict(int) sampled_structs = set() # Vanilla Sampling if lock is None: structs, structs_labels = SU.RNAstructure_sample(input_prefix, e, R2D2_output_dir, label="noshape", num_proc=1, wn_tag="_%s" % (rnum)) else: structs, structs_labels = SU.RNAstructure_sample(input_prefix, e, R2D2_output_dir, label="noshape", num_proc=1, wn_tag="_%s" % (rnum), lock=lock) sampled_structs.update(structs_labels) OSU.increment_dict_counts(sampled_structs_count, structs) # Sampling with SHAPE constraints if lock is None: structs, structs_labels = SU.RNAstructure_sample( input_prefix, e, R2D2_output_dir, shapefile=input_prefix + ".rho", label="shape", num_proc=1, wn_tag="_%s" % (rnum)) else: structs, structs_labels = SU.RNAstructure_sample( input_prefix, e, R2D2_output_dir, shapefile=input_prefix + ".rho", label="shape", num_proc=1, wn_tag="_%s" % (rnum), lock=lock) sampled_structs.update(structs_labels) OSU.increment_dict_counts(sampled_structs_count, structs) # Sampling with hard constraints XB = SU.get_indices_rho_gt_c(react_rhos, constrained_c, one_index=True) # RNAstructure is 1-indexed SU.make_constraint_file(output_prefix + ".con", [], XB, [], [], [], [], [], []) if lock is None: structs, structs_labels = SU.RNAstructure_sample( input_prefix, e, R2D2_output_dir, constraintfile=output_prefix + ".con", label="constrained_" + str(constrained_c), num_proc=1, wn_tag="_%s" % (rnum)) else: structs, structs_labels = SU.RNAstructure_sample( input_prefix, e, R2D2_output_dir, constraintfile=output_prefix + ".con", label="constrained_" + str(constrained_c), num_proc=1, wn_tag="_%s" % (rnum), lock=lock) sampled_structs.update(structs_labels) OSU.increment_dict_counts(sampled_structs_count, structs) # Compressing sampled structures further by removing duplicates sampled by multiple methods. Keeping track of this though. # Saving more than I need to in this use case... ex. energies sampled_structs = SU.merge_labels(sampled_structs, to_string=False) structs = [t[0].split(",") for t in sampled_structs] SU.cts_to_file(structs, crystals_ck[1], output_prefix + "_unique.ct") SU.runRNAstructure_efn2(output_prefix + "_unique.ct", output_prefix + ".efn2") free_energies = SU.get_free_energy_efn2(output_prefix + ".efn2") if cap_rhos: scaled_rhos = scaling_fns[scaling_func](react_rhos, scale_rho_max) else: scaled_rhos = scaling_fns[scaling_func](react_rhos) with open(input_prefix + ".best_scaled_rho", "w") as f: f.write("\n".join([ "\t".join([str(zi), str(zr)]) for zi, zr in enumerate(scaled_rhos) ])) # Compute distances between scaled rhos and paired-vectors from drawn structures binary_structs = SU.ct_struct_to_binary_vec(structs) distances = [] for s in binary_structs: distances.append( SU.calc_bp_distance_vector_weighted( s, scaled_rhos, scaling_func=scaling_func, invert_struct="D" != scaling_func, paired_weight=weight_paired)) min_distance = min(distances) min_dist_indices = [ i for i, v in enumerate(distances) if v == min_distance ] # compare R2D2 against crystal structure selected_react_mats = [] for mdi in min_dist_indices: react_mat = SU.ct_struct_to_binary_mat(structs[mdi]) selected_react_mats.append(numpy.matrix(react_mat)) curr_prefix = "%s_%s_R2D2" % (output_prefix, mdi) curr_stats = SU.calc_benchmark_statistics_matrix( react_mat, crystals_ck[2]) with open(curr_prefix + ".stats", "w") as f: f.write(str(curr_stats)) #make file SU.ct_list_to_file(structs[mdi], crystals_ck[1], curr_prefix + ".ct") SU.runRNAstructure_CircleCompare(curr_prefix + ".ct", crystals_ck[3], curr_prefix + ".ps") OSU.system_command("convert %s.ps %s.jpg" % (curr_prefix, curr_prefix)) # saving R2D2 results R2D2_save = {} R2D2_save["structs"] = structs R2D2_save["distances"] = distances R2D2_save["min_dist_indices"] = min_dist_indices R2D2_save["min_distance"] = min_distance R2D2_save["scaled_rhos"] = scaled_rhos R2D2_save["react_mat"] = react_mat pickle.dump(R2D2_save, open(curr_prefix + ".p", "wb")) # output .dbn's like in normal R2D2 process # code taken from PCSU.generate_best_struct_images # PCSU.generate_best_struct_images contained some extraneuous calls for this use case. ex. draw_all = False, running VARNA seen_snum = [] iter_dbn_dir = OSU.create_directory("%s/%s" % (draw_dir, rnum)) for snum in min_dist_indices: seen_snum.append(snum) for sf in range(len(seen_snum)): draw_outname_pre = "%s/%snt_%s" % (iter_dbn_dir, len(react_rhos), seen_snum[sf]) if len(seen_snum) > 1: draw_outname_pre += "_mult" + str(sf) SU.run_ct2dot(output_prefix + "_unique.ct", seen_snum[sf], draw_outname_pre + ".dbn") # return curr_stats and selected structures return curr_stats, selected_react_mats
See examples/run_CoTrans_example.sh for an example of how to use this code. Author: Angela M Yu, 2014-2016 Version: 0.0.1 Copyright (C) 2016 Julius B. Lucks and Angela M Yu. All rights reserved. Distributed under the terms of the GNU General Public License, see 'LICENSE'. """ import R2D2 import LucksLabUtils_config import OSU LucksLabUtils_config.config("Quest_R2D2") OSU.system_command("echo $PATH") OSU.system_command("echo $CLASSPATH") opts = OSU.getopts("", [ "in_dir=", "out_dir=", "adapter=", "p=", "e=", "endcut=", "constrained_c=", "scale_rho_max=", "draw_all=", "most_count_tie_break=", "scaling_func=", "weight_paired=", "cap_rhos=", "pol_fp=" ]) print opts # This specifically calls R2D2.R2D2() assuming the user has specified the arguments: # in_dir, out_dir, adapter, e, endcut, constrained_c, scale_rho_max, draw_all, most_count_tie_break, scaling_func, weight_paired, cap_rhos, pol_fp # Only in_dir, out_dir, and adapter are truly required to run R2D2.R2D2(). Default values for the other parameters are set within R2D2.py. cotrans = R2D2.R2D2( opts['--in_dir'],
import OSU import LucksLabUtils_config from collections import defaultdict, Counter import re import SU import VIU from itertools import combinations, chain import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from seaborn import color_palette LucksLabUtils_config.config("Quest_R2D2") opts = OSU.getopts("", [ "sample_sizes=", "outfile_pre=", "output_dir=", "reactivities_files=", "linker_seq=", "pol_fp=", "processors=", "MDS_processors=" ]) print opts infiles = opts["--reactivities_files"].split(",") outfile = opts["--outfile_pre"] output_dir = OSU.create_directory(opts["--output_dir"]) reactivities_files = opts["--reactivities_files"].split(",") linker_seq = opts["--linker_seq"] sample_sizes = [int(s) for s in opts["--sample_sizes"].split(",")] pol_fp = int(opts["--pol_fp"]) if "--pol_fp" in opts else 0 p = int(opts["--processors"]) if "--processors" in opts else 1 MDS_p = int(opts["--MDS_processors"]) if "--MDS_processors" in opts else 1 # setup counters, scaling functions, and output file header unique_struct_nums = defaultdict(list) unique_struct_dists = defaultdict(list)
import glob import SU import OSU import NAU import re import LucksLabUtils_config import PAU from itertools import cycle from collections import namedtuple import cPickle as pickle # setup environment variables specific to the ICSE cluster at Cornell LucksLabUtils_config.config("ICSE") opts = OSU.getopts("o:c:r:n:p:", [ "noshape", "shape", "constrain", "scaling_func=", "cluster_flag=", "job_name=", "sub_proc=", "arg_slice=", "load_results=", "generate_structs=", "structs_pickle_dir=", "cap_rhos=", "shape_intercept=", "shape_slope=", "restart" ]) print opts reactivities_files = glob.glob(opts['-r']) crystal_files = glob.glob(opts['-c']) output_dir = opts['-o'] sample_n = int(opts['-n']) num_proc = int(opts['-p']) scaling_func = opts["--scaling_func"] cluster_flag = opts["--cluster_flag"] == "True" job_name = opts["--job_name"] sub_proc = opts["--sub_proc"] == "True" load_results = opts["--load_results"] == "True" generate_structs = opts["--generate_structs"] == "True"