def excise_precursors(): global file_genome, parsed_arf, dir_tmp, stack_height_min, dir_tmp, max_pres # excise precursors from the genome pprint("#excising precursors\n") print_stderr("#excising precursors\n") start() ret_excise_precursors = None if options.get('-a'): cmd = "excise_precursors.py {} {}/{}_parsed.arf {}/precursors.coords -a {} > {}/precursors.fa\n\n".format( file_genome, dir_tmp, parsed_arf, dir_tmp, stack_height_min, dir_tmp) print_stderr(cmd) ret_excise_precursors = os.popen(cmd).read() else: cmd = "excise_precursors_iterative_final.py {} {}/{}_parsed.arf {}/precursors.fa {}/precursors.coords {}\n".format( file_genome, dir_tmp, parsed_arf, dir_tmp, dir_tmp, max_pres) print_stderr(cmd) ret_excise_precursors = os.popen(cmd).read() fname = '{}/precursors.fa_stack'.format(dir_tmp) OSS = open_or_die2(fname, 'rb') stack_height_min = OSS.readline().strip() OSS.close() end() fname = '{}/precursors.fa'.format(dir_tmp) # if (-z "$dir_tmp/precursors.fa" or not -f "$dir_tmp/precursors.fa"): if not file_s(fname) or not os.path.isfile( fname): # empty or not a regular plain file die("No precursors excised\n") return 0
def resolve_potential_precursor(): ''' dissects the potential precursor in parts by filling hashes, and tests if it passes the initial filter and the scoring filter binary variable whether the potential precursor is still viable ''' global hash_seq, db_old, hash_struct fill_structure() fill_pri() fill_mature() fill_star() fill_loop() # if db_old == 'CHROMOSOME_I_165': # print_stderr(hash_comp, "\n\n") # print_stderr(hash_query, "\n\n") # print_stderr(hash_bp, '\n') if pass_filtering_initial(): seq = hash_seq[db_old] struct = hash_struct[db_old] pprint("{}\n".format(db_old)) reset_variables()
def perform_controls(): global dir_tmp, _dir, file_mature_ref_other_species, ltime # run permuted controls: pprint("#running permuted controls\n") print_stderr("#running permuted controls\n") start() line = None if not re.search('none', file_mature_ref_other_species, re.IGNORECASE): line = "core_algorithm.py {}/signature.arf {}/precursors.str -s {}/{} -v -50".format( dir_tmp, dir_tmp, dir_tmp, file_mature_ref_other_species) else: line = "core_algorithm.py {}/signature.arf {}/precursors.str -v -50".format( dir_tmp, dir_tmp) if not (options.get('-c') == ''): line += " -y {}/precursors_for_randfold.rand".format(dir_tmp) cmd = "echo '{} > {}/output.mrd' > {}/command_line\n\n".format( line, _dir, dir_tmp, ) print_stderr(cmd) ret_command_line = os.system(cmd) cmd = "perform_controls.py {}/command_line {}/precursors.str 100 -a > {}/output_permuted.mrd 2>>error_{}.log\n\n".format( dir_tmp, dir_tmp, dir_tmp, ltime) print_stderr(cmd) ret_perform_controls = os.system(cmd) end()
def compute_randfold(): global options, dir_tmp if options.get('-c') == '': return # compute randfold p-values for the subset of precursors which are # plausible Dicer substrates pprint("#computing randfold p-values\n") print_stderr("#computing randfold p-values\n") cmd = "select_for_randfold.py {}/signature.arf {}/precursors.str > {}/precursors_for_randfold.ids\n\n".format( dir_tmp, dir_tmp, dir_tmp) print_stderr(cmd) start() ret_select_for_randfold = os.system(cmd) end() start() cmd = "fastaselect.py {}/precursors.fa {}/precursors_for_randfold.ids > {}/precursors_for_randfold.fa\n\n".format( dir_tmp, dir_tmp, dir_tmp) print_stderr(cmd) ret_fasta_select = os.system(cmd) end() start() cmd = "randfold -s {}/precursors_for_randfold.fa 99 > {}/precursors_for_randfold.rand\n\n".format( dir_tmp, dir_tmp) print_stderr(cmd) ret_randfold = os.system(cmd) end()
def perform_controls(_dir, rounds, command_line, options): os.mkdir(_dir) _round = 1 while _round <= rounds: if options.get('-a') == '': print_stderr('{}\r'.format(_round)) cmd = 'permute_structure.py {} > {}/precursors_permuted.str 2> /dev/null'.format( file_structure, _dir) # print(cmd) os.system(cmd) pprint('permutation {}\n\n'.format(_round)) cmd = '{} 2> /dev/null'.format(command_line) os.system(cmd) # ret = os.popen(cmd).read().strip() # pprint(ret) _round += 1 shutil.rmtree(_dir) if options.get('-a') == '': print_stderr('controls performed\n\n')
def survey_known(score): ''' summary statistics for mature miRNAs ''' global hash_ref, hash_sig # mature miRNAs for the species in reference (miRBase) file matures_cnt = len(hash_ref) pprint('\t{}'.format(matures_cnt)) # matures present in data _matures_present = len(hash_sig.keys()) pprint("\t{}".format(_matures_present)) # matures recovered matures_recov_cnt = 0 matures_present = hash_sig.keys() for mature_present in matures_present: score_mature_present = hash_sig[mature_present] if score <= score_mature_present: matures_recov_cnt += 1 pprint("\t{}".format(matures_recov_cnt)) # matures recovered in percent percent = 100 * round_decimals(div(matures_recov_cnt, _matures_present), 2) pprint(" ({:.0f}%)".format(percent))
def core_algorithm(): ''' run moRNA Finder core algorithm ''' global _dir, dir_tmp, file_mature_ref_other_species, ltime pprint("#running moRNA Finder core algorithm\n") print_stderr("#running moRNA Finder core algorithm\n") line = None longest_id = 40 if not re.search('none', file_mature_ref_this_species, re.IGNORECASE): longest_id = get_longest_id("{}/{}".format( dir_tmp, file_mature_ref_this_species)) start() if not re.search('none', file_mature_ref_other_species, re.IGNORECASE): line = "core_algorithm.py {}/signature.arf {}/precursors.str -s {}/{} -v -50 -l {}".format( dir_tmp, dir_tmp, dir_tmp, file_mature_ref_other_species, longest_id) else: line = "core_algorithm.py {}/signature.arf {}/precursors.str -v -50 -l {}".format( dir_tmp, dir_tmp, longest_id) if not options.get('-c') == '': line += " -y {}/precursors_for_randfold.rand".format(dir_tmp) cmd = "{} > {}/output.mrd\n".format(line, _dir) print_stderr(cmd) ret_mor_core = os.system(cmd) if options.get('-E'): ret_mor_core = os.system('{} -t > {}/error.output.mrd'.format( line, _dir)) end() # check if file is empty fname = "{}/output.mrd".format(_dir) if not file_s(fname): print_stderr("Error:\n\tFile {} is empty\n\n".format(fname)) print_stderr( "Now running core_algorithm.py with option -t to see why all precursors were discarded\n" ) ret_mor_core = os.system('{} -t > error.output.mrd_{}'.format( line, ltime)) print_stderr( "The debug file is called error.output.mrd_{}\n".format(ltime)) die("\nExiting now\n\n")
def parse_mappings(): global file_reads_vs_genome, parsed_arf, dir_tmp # parse mappings to retain only perfect mappings of reads 18 nt <= length # <= 25 nt that map perfectly to five loci or less pprint("#parsing genome mappings\n") print_stderr("#parsing genome mappings\n") cmd = "parse_mappings.py {} -a 0 -b 18 -c 25 -i 5 > {}/{}_parsed.arf\n\n".format( file_reads_vs_genome, dir_tmp, parsed_arf) print_stderr(cmd) start() ret_parse_mappings = os.popen(cmd).read() end() return 0
def parse_fasta(file_fasta): FASTA = open_or_die(file_fasta, 'rb', 'can not open {}\n'.format(file_fasta)) while True: line = FASTA.readline() if not line: break m = re.match(r'^(>\S+)', line) if m: pprint('{}\n'.format(m.groups()[0])) else: pprint('{}'.format(re.sub('U', 'T', line).upper())) FASTA.close() return
def survey_signal_to_noise(score): # total hairpins reported hairpins_total = hairpins_cnt("total", score) (hairpins_total_fp_mean, hairpins_total_fp_sd, estimated_total_true_mean, estimated_total_true_sd, percent_total_mean, percent_total_sd) = mean_sd("total", score, hairpins_total) hairpins_total_fp_mean_round = round_decimals(hairpins_total_fp_mean, 0) hairpins_total_fp_sd_round = round_decimals(hairpins_total_fp_sd, 0) # print "\t$hairpins_total_fp_mean_round +/- $hairpins_total_fp_sd_round"; signal_to_noise_total = 0 if hairpins_total_fp_mean == 0: signal_to_noise_total = 0 else: signal_to_noise_total = round_decimals( div(hairpins_total, hairpins_total_fp_mean), 1) pprint("\t{}".format(signal_to_noise_total))
def fold_precursors(): ''' predicting RNA secondary structures with RNAfold ''' global dir_tmp, ltime pprint("#folding precursors\n") print_stderr("#folding precursors\n") print_stderr( "RNAfold < {}/precursors.fa -noPS > {}/precursors.str\n\n".format( dir_tmp, dir_tmp)) start() ret_fold_precursors = os.system( "RNAfold < {}/precursors.fa -noPS > {}/precursors.str 2>>error_{}.log". format(dir_tmp, dir_tmp, ltime)) if ret_fold_precursors: ret_fold_precursors = os.system( "RNAfold < {}/precursors.fa --noPS > {}/precursors.str".format( dir_tmp, dir_tmp)) if ret_fold_precursors: die("Some RNAfold error occurred. Error {}\n".format( ret_fold_precursors)) end()
def make_survey(): # get overview of the output: global _dir, dir_tmp, file_mature_ref_this_species, stack_height_min pprint("#doing survey of accuracy\n") print_stderr("#doing survey of accuracy\n") if not re.search('none', file_mature_ref_this_species, re.IGNORECASE): cmd = "survey.py {}/output.mrd -a {}/output_permuted.mrd -b {}/{} -c {}/signature.arf -d {} > {}/survey.csv\n\n".format( _dir, dir_tmp, dir_tmp, file_mature_ref_this_species, dir_tmp, stack_height_min, _dir) print_stderr(cmd) start() ret_survey = os.system(cmd) end() else: cmd = "survey.py {}/output.mrd -a {}/output_permuted.mrd -d {} > {}/survey.csv\n\n".format( _dir, dir_tmp, stack_height_min, _dir) print_stderr(cmd) start() ret_survey = os.system(cmd) end()
def prepare_signature(): ''' prepare signature file ''' global file_reads, dir_tmp, read_align_mismatches, file_mature_ref_this_species, ltime pprint("#preparing signature\n") print_stderr("#preparing signature\n") if not re.search('none', file_mature_ref_this_species, re.IGNORECASE): cmd = "prepare_signature.py {} {}/precursors.fa {} -a {}/{} -o {}/signature.arf 2>>error_{}.log\n\n".format( file_reads, dir_tmp, read_align_mismatches, dir_tmp, file_mature_ref_this_species, dir_tmp, ltime) print_stderr(cmd) start() ret_prepare_signature = os.popen(cmd).read() end() else: cmd = "prepare_signature.py {} {}/precursors.fa {} -o {}/signature.arf 2>>error_{}.log\n\n".format( file_reads, dir_tmp, read_align_mismatches, dir_tmp, ltime) start() ret_prepare_signature = os.popen(cmd).read() end() return 0
def survey(options): # print(hash_con) # print_stderr(hash_out, '\n') # print(hash_sig) # print(hash_out) for score in range(10, -11, -1): pprint(score) if options.get('-b'): survey_hairpins(score) survey_known(score) if options.get('-a'): survey_signal_to_noise(score) if options.get('-d'): read_stack_min = options.get('-d') pprint('\t{}'.format(read_stack_min)) pprint('\n')
def survey_hairpins(score): ''' summary statistics for miRNA hairpin precursors ''' # partitioning of hairpins into known and novel hairpins_known = hairpins_cnt("known", score) hairpins_novel = hairpins_cnt("novel", score) # print_stderr(hairpins_novel, "\t") pprint('\t{}'.format(hairpins_novel)) # estimation of false positives for the set of novel hairpins if options.get('-a'): (hairpins_fp_mean, hairpins_fp_sd, estimated_true_mean, estimated_true_sd, percent_mean, percent_sd) = (0, 0, 0, 0, 0, 0) if hairpins_novel: # check if novel hairpins detected (hairpins_fp_mean, hairpins_fp_sd, estimated_true_mean, estimated_true_sd, percent_mean, percent_sd) = mean_sd("novel", score, hairpins_novel) hairpins_fp_mean_round = round_decimals(hairpins_fp_mean, 0) hairpins_fp_sd_round = round_decimals(hairpins_fp_sd, 0) pprint("\t{} +/- {}".format(hairpins_fp_mean_round, hairpins_fp_sd_round)) estimated_true_mean_round = round_decimals(estimated_true_mean, 0) pprint("\t{}".format(estimated_true_mean_round)) estimated_true_sd_round = round_decimals(estimated_true_sd, 0) pprint(" +/- {}".format(estimated_true_sd_round)) percent_mean_round = round_decimals(percent_mean, 0) pprint(" ({}".format(percent_mean_round)) percent_sd_round = round_decimals(percent_sd, 0) pprint(" +/- {}%)".format(percent_sd_round))
def print_header(options): pprint("moRNA Finder score") if options.get('-b'): pprint("\tnovel miRNAs reported by moRNA Finder") if options.get('-a'): pprint("\tnovel miRNAs, estimated false positives") pprint("\tnovel miRNAs, estimated true positives") pprint("\tknown miRNAs in species") pprint("\tknown miRNAs in data") pprint("\tknown miRNAs detected by moRNA Finder") if options.get('-a'): pprint("\testimated signal-to-noise") if options.get('-d'): pprint("\texcision gearing") pprint("\n")
def parse_file_arf(file_arf, options): global running, gscan, hash_edits FILE_ARF = open_or_die(file_arf, 'rb', 'can not open {}\n'.format(file_arf)) while True: line = FILE_ARF.readline() if not line: break m = re.match( r'^(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)', line) if m: m = m.groups() query = m[0] query_map_lng = int(m[1]) query_beg = m[2] query_end = int(m[3]) query_seq = m[4] db = m[5] db_map_lng = int(m[6]) db_beg = m[7] db_end = int(m[8]) db_seq = m[9] strand = m[10] edits = int(m[11]) edit_string = m[12] running += 1 if options.get('-j') == '': (query_map_lng, query_end, query_seq, db_map_lng, db_end, db_seq, edits, edit_string) = remove_trailing_nts( query_map_lng, query_end, query_seq, db_map_lng, db_end, db_seq, edits, edit_string) if '-a' in options.keys() and int(options.get('-a')) < edits: continue if options.get('-b') and query_map_lng < int(options.get('-b')): continue if options.get('-c') and int(options.get('-c')) < query_map_lng: continue if options.get('-d') and query not in hash_queries_incl.keys(): continue if options.get('-e') and query in hash_queries_excl.keys(): continue if options.get('-f') and db not in hash_dbs_incl.keys(): continue if options.get('-g') and db in hash_dbs_excl.keys(): continue if not (options.get('-h') == '' or options.get('-i')): pprint('{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n'. format(query, query_map_lng, query_beg, query_end, query_seq, db, db_map_lng, db_beg, db_end, db_seq, strand, edits, edit_string)) continue if gscan: create_hash_key_chain(hash_edits, 0, query, edits) hash_edits[query][edits] += 1 else: evaluation = evaluate_query(query, edits, options) if evaluation: pprint( "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n". format(query, query_map_lng, query_beg, query_end, query_seq, db, db_map_lng, db_beg, db_end, db_seq, strand, edits, edit_string))
def test_input_files(): global file_reads, file_reads_vs_genome, file_genome, file_precursors, minpreslen, file_mature_ref_other_species, file_mature_ref_this_species IN = open_or_die2(file_reads, 'rb') line = IN.readline().strip() if not re.search(r'^>\S+', line): printErr() die("The first line of file $file_reads does not start with '>identifier'\nReads file {} is not a valid fasta file\n\n" .format(file_reads)) if re.search(r'\s', line): printErr() die('File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nReads file {} is not a fasta file\n\n' .format(file_reads, file_reads)) line = IN.readline() if not re.search(r'^[ACGTUNacgtun]*$', line): printErr() die('File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nReads file {} is not a fasta file\n\n' .format(file_reads, file_reads)) IN.close() IN = open_or_die2(file_genome, 'rb') line = IN.readline().strip() if not re.search(r'>\S+', line): printErr() die("The first line of file {} does not start with '>identifier'\nGenome file {} is not a fasta file\n\n" .format(file_genome, file_genome)) if re.search(r'\s', line): printErr() die('Genome file {} has not allowed whitespaces in its first identifier\n\n' .format(file_genome)) # get genome ids tmps = os.popen('grep ">" {}'.format(file_genome)).read().strip() genomeids = dict(map(lambda x: (x, 1), re.split("\n", tmps))) line = IN.readline() if not re.search(r'^[ACGTUNacgtun]*$', line): printErr() die('File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nGenome file {} is not a fasta file\n\n' .format(file_genome, file_genome)) IN.close() IN = open_or_die2(file_reads_vs_genome, 'rb') line = IN.readline() if not re.search( r'^(\S+_x\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+([+-])\s+(\d+)\s*([mDIM]*)$', line): printErr() die('Mapping file {} is not in arf format\n\nEach line of the mapping file must consist of the following fields\nreadID_wo_whitespaces length start end read_sequence genomicID_wo_whitspaces length start end genomic_sequence strand #mismatches editstring\nThe editstring is optional and must not be contained\nThe readID must end with _xNumber and is not allowed to contain whitespaces.\nThe genomeID is not allowed to contain whitespaces.' .format(file_reads_vs_genome)) IN.close() # get ids from arf file and compare them with ids from the genome file tmps = os.popen( 'cut -f6 {}|sort -u'.format(file_reads_vs_genome)).read().strip() for s in re.split("\n", tmps): if not genomeids.get(">{}".format(s)): die("The mapped reference id {} from file {} is not an id of the genome file {}\n\n" .format(s, file_reads_vs_genome, file_genome)) if not re.search('none', file_mature_ref_this_species): IN = open_or_die2(file_mature_ref_this_species, 'rb') line = IN.readline().strip() if not re.search(r'>\S+', line): printErr() die("The first line of file {} does not start with '>identifier'\nmiRNA reference this species file {} is not a fasta file\n\n" .format(file_mature_ref_this_species, file_mature_ref_this_species)) if re.search(r'\s', line): printErr() die("miRNA reference this species file {} has not allowed whitespaces in its first identifier\n\n" .format(file_mature_ref_this_species)) line = IN.readline() if not re.search(r'^[ACGTUNacgtun]*$', line): printErr() die("File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nmiRNA reference this species file {} is not a fasta file\n\n" .format(file_mature_ref_this_species, file_mature_ref_this_species)) IN.close() if not re.search('none', file_mature_ref_other_species): IN = open_or_die2(file_mature_ref_other_species, 'rb') line = IN.readline().strip() if not re.search(r'>\S+', line): printErr() die("The first line of file {} does not start with '>identifier'\nmiRNA reference this species file {} is not a fasta file\n\n" .format(file_mature_ref_other_species, file_mature_ref_other_species)) if re.search(r'\s', line): printErr() die("miRNA reference this species file {} has not allowed whitespaces in its first identifier\n\n" .format(file_mature_ref_other_species)) line = IN.readline() if not re.search(r'^[ACGTUNacgtun]*$', line): printErr() die("File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nmiRNA reference this species file {} is not a fasta file\n\n" .format(file_mature_ref_other_species, file_mature_ref_other_species)) IN.close() if not re.search('none', file_precursors): IN = open_or_die2(file_precursors, 'rb') line = IN.readline().strip() if not re.search(r'>\S+', line): printErr() die("The first line of file {} does not start with '>identifier'\nmiRNA reference this species file {} is not a fasta file\n\n" .format(file_precursors, file_precursors)) if re.search(r'\s', line): printErr() die("precursor file {} has not allowed whitespaces in its first identifier\n\n" .format(file_precursors)) line = IN.readline() if not re.search(r'^[ACGTUNacgtun]*$', line): printErr() die("File {} contains not allowed characters in sequences\nAllowed characters are ACGTUN\nmiRNA reference this species file {} is not a fasta file\n\n" .format(file_precursors, file_precursors)) if len(line) < minpreslen: printErr() die("The precursor file {} does not contain sequences of at least {} nt\nPlease make sure that you provided the correct file and the correct parameter ordering when calling {}\nIf you have precursors with less than {} please use option -p <int> to specify this length\n" .format(file_precursors, minpreslen, sys.argv[0], minpreslen)) IN.close() # ################################################# # precheck finished # ################################################# # do stringent testing of all input files pprint("#testing input files\n") print_stderr("#testing input files\n") if not re.search('none', file_mature_ref_this_species): start() cmd = "sanity_check_mature_ref.py {} 2>&1\n\n".format( file_mature_ref_this_species) print_stderr(cmd) ret_file_mature_ref_this_species = os.popen(cmd).read().strip() if ret_file_mature_ref_this_species: printErr() die("problem with {} {}\n".format( file_mature_ref_this_species, ret_file_mature_ref_this_species)) end() if not re.search(r'none', file_mature_ref_other_species): start() cmd = "sanity_check_mature_ref.py {} 2>&1\n\n".format( file_mature_ref_other_species) print_stderr(cmd) ret_file_mature_ref_other_species = os.popen(cmd).read().strip() if ret_file_mature_ref_other_species: printErr() die("problem with {} {}\n".format( file_mature_ref_other_species, ret_file_mature_ref_other_species)) end() cmd = "sanity_check_reads_ready_file.py {} 2>&1\n\n".format(file_reads) print_stderr(cmd) start() ret_test_file_reads = os.popen(cmd).read().strip() if ret_test_file_reads: printErr() die("problem with {} {}\n".format(file_reads, ret_test_file_reads)) end() start() cmd = "sanity_check_genome.py {} 2>&1;\n\n".format(file_genome) print_stderr(cmd) ret_test_file_genome = os.popen(cmd).read().strip() if ret_test_file_genome: printErr() die("problem with {} {}\n".format(file_genome, ret_test_file_genome)) end() start() cmd = "sanity_check_mapping_file.py {} 2>&1".format(file_reads_vs_genome) print_stderr(cmd) ret_test_file_reads_genome = os.popen(cmd).read().strip() if ret_test_file_reads_genome: printErr() die("problem with {} {}\n".format(file_reads_vs_genome, ret_test_file_reads_genome)) end() if not re.search('none', file_precursors): start() cmd = "sanity_check_mature_ref.py {} 2>&1".format(file_precursors) print_stderr(cmd) ret_file_precursors = os.popen(cmd).read().strip() if ret_file_precursors: printErr() die("problem with {} {}\n".format(file_precursors, ret_file_precursors)) end() start() if not re.search('none', file_mature_ref_this_species, re.IGNORECASE): print_stderr("Quantitation of expressed miRNAs in data\n\n\n") species = '' if options.get('-t'): species = "-t {}".format(options.get('-t')) file_star = '' if options.get('-s'): if file_s(options.get('-s')): file_star = "-s {}".format(options.get('-s')) else: print_stderr( "File {} specified by option -s is empty or not found\n" .format(options.get('-s'))) options['-s'] = 0 print("#Quantitation of known miRNAs in data\n") dopt = "" Popt = "" if options.get('-d') == '': dopt = "-d" if options.get('-P') == '': Popt = "-P" quant = "quantifier.py -p {} -m {} -r {} {} {} -y {} -k {} {}".format( file_precursors, file_mature_ref_this_species, file_reads, file_star, species, ltime, dopt, Popt) print_stderr(quant, "\n") os.system(quant) options[ '-q'] = "expression_analyses/expression_analyses_{}/miRBase.mrd".format( ltime) end() else: print_stderr( "Pre-quantitation is skipped caused by missing file with known miRNAs\n\n\n" ) else: print_stderr( "Pre-quantitation is skipped caused by missing file with known precursor miRNAs\n\n\n" )
def output_results(): ''' making final results html file: ''' global options, dir_tmp, ltime, version, scripts, file_mature_ref_this_species pprint("#producing graphic results\n") print_stderr("#producing graphic results\n") start() # sort aligned reads in pdf not by sample if option -o is given sort_by_sample = '-o' if options.get('-o'): sort_by_sample = '' line = None # choose file to use for counting miRNAs in data xopt = "{}/signature.arf".format(dir_tmp) if os.path.isfile( "expression_analyses/expression_analyses_{}/miRNA_expressed.csv". format(ltime)): xopt = "expression_analyses/expression_analyses_{}/miRNA_expressed.csv".format( ltime) sc = 0 if options.get('-b'): sc = options.get('-b') OE = "" if options.get('-E') == '': OE = " -E" if not re.search('none', file_mature_ref_this_species, re.IGNORECASE): if options.get('-q'): line = "make_html.py -f {}/output.mrd -k {}/{} -p {}/precursors.coords -s {}/survey.csv -c -e -q {} -x {} -r {}Rfam_for_moR.fa -v {} -y {} {} {}".format( _dir, dir_tmp, file_mature_ref_this_species, dir_tmp, _dir, options.get('-q'), xopt, scripts, sc, ltime, sort_by_sample, OE) else: line = "make_html.py -f {}/output.mrd -k {}/{} -p {}/precursors.coords -s {}/survey.csv -c -e -r {}Rfam_for_moR.fa -v {} -y {} {} {}".format( _dir, dir_tmp, file_mature_ref_this_species, dir_tmp, _dir, scripts, sc, ltime, sort_by_sample, OE) else: if options.get('-q'): line = "make_html.py -f {}/output.mrd -p {}/precursors.coords -s {}/survey.csv -c -e -q {} -x {} -r {}Rfam_for_moR.fa -v {} -y {} {} {}".format( _dir, dir_tmp, _dir, options.get('-q'), xopt, scripts, sc, ltime, sort_by_sample, OE) else: line = "make_html.py -f {}/output.mrd -p {}/precursors.coords -v {} -s {}/survey.csv -c -e -r {}Rfam_for_moR.fa -y {} {} {}".format( _dir, dir_tmp, sc, _dir, scripts, ltime, sort_by_sample, OE) if options.get('-t'): line += " -t {}".format(options.get('-t')) dopt = "" if options.get('-d'): dopt = "-d" cmd = '{} -V {} {}\n\n'.format(line, version, dopt) print_stderr(cmd) ret_make_html = os.system(cmd) end()
if __name__ == '__main__': # measuring times (sTime, eTime, stime, etime, sTimeG, eTimeG, stimeG, etimeG) = (None, None, None, None, None, None, None, None) (second, minute, hour, dayOfMonth, month, yearOffset, dayOfWeek, dayOfYear, daylightSavings) = (None, None, None, None, None, None, None, None, None) (second, minute, hour, dayOfMonth, month, yearOffset, dayOfWeek, dayOfYear, daylightSavings) = localtime() if re.search(r'^\d$', str(second)): second = "0{}".format(second) sTimeG = "{}:{}:{}".format(hour, minute, second) stimeG = int(time.time()) pprint("moRNA Finder started at {}\n\n\n".format(sTimeG)) ctime = int(time.time()) ltime = myTime(ctime) scripts = os.popen('which moR.py').read() # scripts = re.sub(r'moR.py', '', scripts, count=1) # scripts = re.sub(r'\s+', '', scripts) scripts = os.path.dirname(scripts) + '/' pprint('#Starting moRNA Finder\n') print_stderr('#Starting moRNA Finder\n{} {}\n\n'.format( sys.argv[0], ' '.join(sys.argv[1:]))) print_stderr("moRNA Finder started at {}\n\n\n".format(sTimeG)) test_first_argument()