def generate_output(self): """ Majority of Cotranscriptional SHAPE-Seq output is created here. This includes the DG plot, best structure images, and the movie of the best structures. """ draw_dir = OSU.create_directory(self.output_dir + "/draw/") nn_dir = OSU.create_directory(self.output_dir + "/nn/") OSU.create_directory(nn_dir + "distances/") sorted_lengths = sorted(self.file_data) zero_padding = int(math.floor(math.log10(sorted_lengths[-1])) + 1) # Parallelized function calls to generate DG plot, distance matrices for clustering of structures, and creating images of minimum distance structures draw_struct_nums = [ length for length in sorted_lengths if self.file_data[length]["rc_flag"] ] draw_args_pool = zip([self.file_data[dsn] for dsn in draw_struct_nums], draw_struct_nums, repeat(sorted_lengths[-1]), range(1, len(draw_struct_nums) + 1), repeat(zero_padding), repeat(draw_dir), repeat(self.output_dir + "/ct/"), repeat(self.draw_all), repeat(self.most_count_tie_break)) args_pool = [ (PCSU.generate_DG_output, (self, 1, sorted_lengths[-1])) ] + zip(repeat(PCSU.generate_best_struct_images), draw_args_pool) if self.p == 1: for i in range(len(args_pool)): PCSU.calculate_function_helper(args_pool[i]) else: PCSU.generate_DG_output( self, 1, sorted_lengths[-1] ) # moved this outside of multiprocessing because Quest has issues running it in a pool PCSU.run_output_multiprocessing_pool( PCSU.calculate_function_helper, args_pool[1:], self.p) if not OSU.check_file_exists( self.output_dir + "/DG_state_plot.pdf" ): # Weird error on quest that it will ignore this command if sample size is very large PCSU.generate_DG_output(self, 1, sorted_lengths[-1]) # Use ffmpeg on varna_num.png's to create video of the minimum distance folding pathway OSU.make_symbolic_link( draw_dir + str(len(draw_struct_nums)).zfill(zero_padding) + "_structure.png", draw_dir + str(len(draw_struct_nums) + 1).zfill(zero_padding) + "_structure.png") # ffmpeg needs a duplicate of the last frame VIU.generate_movie(draw_dir + "%%%dd_structure.png" % (zero_padding), self.output_dir + "/movie.mp4") return
def read_all_dbn_dirs(dbn_dirs): """ Read dbns in multiple directories to pair correct lengths together. Returns a dictionary of .png files name that should have been made by R2D2 organized by length. """ all_dbns = defaultdict(list) for dbn_dir in dbn_dirs: dbns = glob.glob(dbn_dir + "*.dbn") for dbn_f in dbns: with open(dbn_f, "r") as f: lines = f.readlines() length = len(lines[-1].split()[0]) image = re.sub('(_mult\d+)?.dbn', '_structure.png', dbn_f) if image not in all_dbns[length] and OSU.check_file_exists(image): all_dbns[length].append(image) return all_dbns
# generate MFE movie VIU.generate_MFE_CoTrans_movie(seq, outdir, seq_start, seq_end, rhos_dir, SHAPE_direct) # generate DG dump file if make_DG_dump: ct_dir = outdir + "/ct/" efn2_dir = OSU.create_directory(outdir + "/efn2/") name_nums = range(1, len(seq) + 1) if seq_start != -1: name_nums = name_nums[seq_start - 1:] if seq_end != -1: name_nums = name_nums[:seq_end - seq_start + 2] # write dumpfile header fname_dump = outdir + re.match( ".*\/(.*)$", outdir.rstrip("/")).group(1) + "_DG_state_plot.dump" with open(fname_dump, "w") as f: f.write("nt\tDG\n") for n in name_nums: ct_file = "%s%s.ct" % (ct_dir, n) if not OSU.check_file_exists(ct_file): continue energy_file = "%s%s.efn2" % (efn2_dir, n) SU.runRNAstructure_efn2("%s%s.ct" % (ct_dir, n), energy_file) energy = SU.get_free_energy_efn2(energy_file)[0] with open(fname_dump, "a") as f: f.write("%s\t%s\n" % (n, energy))
elif num_proc == 1 and cluster_flag and not load_results: # This case is the first executed for the parallel version that utilizes the full cluster. # Surrounded job execution code to catch any subproc that doesn't finish to the pickling step. # Also acts as a limiter into the number of jobs that can be submitted to the queue at once. max_jobs = 511 jobs_available = min(max_jobs, len(rm_cv_w)) params_submitted = [] while len(rm_cv_w) > 0: sub_proc_dir = OSU.create_directory(output_dir + "sub_proc_out/") for param in rm_cv_w: # loop through all parameter sets param_string = "_".join([str(s) for s in param]) job_name_param = "_".join([ job_name, param_string ])[:31] # Job name can only be up to 31 characters long # create .sh for parameter set if not exists if not OSU.check_file_exists("%snbs_script_%s.sh" % (sub_proc_sh_dir, param_string)): header = "##NBS-stdout:%s\n##NBS-stderr:%s\n##NBS-queue:batch\n##NBS-name:\"%s\"\n##NBS-jcoll:\"%s\"\n\nrm %s %s\n" % ( sub_proc_dir + job_name_param + ".out", sub_proc_dir + job_name_param + ".err", job_name_param, job_name, sub_proc_dir + job_name_param + ".out", sub_proc_dir + job_name_param + ".err") OSU.system_command( "echo \"%s/usr/bin/time /fs/home/amy35/tools/anaconda/bin/python ../find_parameters.py -r \'%s\' -c \'%s\' -o %s %s -n %s -p 1 --scaling_func %s --cluster_flag False --sub_proc True --arg_slice \'%s\' --job_name %s --load_results \'False\' --generate_structs \'False\' --cap_rhos %s --structs_pickle_dir %s\"> %snbs_script_%s.sh" % (header, opts['-r'], opts['-c'], opts['-o'], sampling_opts_string, opts['-n'], opts['--scaling_func'], param, job_name_param, cap_rhos, structs_pickle_dir, sub_proc_sh_dir, param_string)) # submit .sh to queue if not running, completed, or no job slots available if jobs_available > 0 and not PAU.check_job_on_queue( job_name_param) and not OSU.check_file_exists("".join([
for i in range(1, 51) ] times_dirs += [ "%s/%s%s/" % (opts["--47_times_dir"], file_prefix, i) for i in range(1, 48) ] else: raise NotImplementedError( "Needs --100_times_dir option or --3_times_dirs, --50_times_dir, and --47_times_dir" ) combined = defaultdict(set) for count, td in enumerate(times_dirs): dg_dump_file = td + "/DG_state_plot.dump" if not OSU.check_file_exists(dg_dump_file): if OSU.check_file_exists(td + "results_except_draw.tgz"): print td + "results_except_draw.tgz: unpacking DG_state_plot.dump" OSU.system_command( "tar -zxvf %sresults_except_draw.tgz -C %s ./DG_state_plot.dump" % (td, td)) else: raise IOError("results_except_draw.tgz not found in " + td) with open(dg_dump_file, "r") as f: print "Reading: " + dg_dump_file f.readline() # throw away header for line in f: vars = line.split() str_key = "%s,%s" % (vars[0], vars[1]) if vars[3] == "1" and vars[-1] == "1":