def is_tRNA(clus_obj, out_dir, args): """ Iterates through cluster precursors to predict sRNA types """ ref = os.path.abspath(args.reference) utils.safe_dirs(out_dir) for nc in clus_obj[0]: c = clus_obj[0][nc] loci = c['loci'] out_fa = "cluster_" + nc if loci[0][3] - loci[0][2] < 500: with make_temp_directory() as tmpdir: os.chdir(tmpdir) get_loci_fasta({loci[0][0]: [loci[0][0:5]]}, out_fa, ref) summary_file, str_file = _run_tRNA_scan(out_fa) if "predictions" not in c: c['predictions'] = {} c['predictions']['tRNA'] = _read_tRNA_scan(summary_file) score = _read_tRNA_scan(summary_file) logger.debug(score) shutil.move(summary_file, op.join(out_dir, summary_file)) shutil.move(str_file, op.join(out_dir, str_file)) else: c['errors'].add("precursor too long") clus_obj[0][nc] = c return clus_obj
def make_predictions(clus_obj, out_dir, args): """ Iterates through cluster precursors to predict sRNA types """ ref = os.path.abspath(args.reference) utils.safe_dirs(out_dir) for nc in clus_obj[0]: c = clus_obj[0][nc] loci = c['loci'] out_fa = "cluster_" + nc if loci[0][3] - loci[0][2] < 500: with make_temp_directory() as tmpdir: os.chdir(tmpdir) get_loci_fasta({loci[0][0]: [loci[0][0:5]]}, out_fa, ref) summary_file, str_file = _run_tRNA_scan(out_fa) if "predictions" not in c: c['predictions'] = {} c['predictions']['tRNA'] = _read_tRNA_scan(summary_file) score = _read_tRNA_scan(summary_file) logger.debug(score) shutil.move(summary_file, op.join(out_dir, summary_file)) shutil.move(str_file, op.join(out_dir, str_file)) else: c['errors'].add("precursor too long") clus_obj[0][nc] = c return clus_obj
def make_profile(data, out_dir, args): """ Make data report for each cluster """ safe_dirs(out_dir) main_table = [] header = ['id', 'ann'] n = len(data[0]) bar = ProgressBar(maxval=n).start() bar.update(0) for itern, c in enumerate(data[0]): bar.update(itern) logger.debug("creating cluser: {}".format(c)) safe_dirs(os.path.join(out_dir, c)) valid, ann, pos_structure = _single_cluster(c, data, os.path.join(out_dir, c, "maps.tsv"), args) data[0][c].update({'profile': pos_structure}) loci = data[0][c]['loci'] logger.debug("precursor_sequence") data[0][c]['precursor'] = {"seq": precursor_sequence(loci[0][0:5], args.ref)} logger.debug("parse alignments") data[0][c]['precursor']["colors"] = list(_parse(data[0][c]['profile'], data[0][c]['precursor']["seq"])) logger.debug("update rnafold") data[0][c]['precursor'].update(run_rnafold(data[0][c]['precursor']['seq'])) return data
def make_profile(data, out_dir, args): """ Make data report for each cluster """ safe_dirs(out_dir) main_table = [] header = ['id', 'ann'] n = len(data[0]) bar = ProgressBar(maxval=n) bar.start() bar.update(0) for itern, c in enumerate(data[0]): bar.update(itern) logger.debug("creating cluser: {}".format(c)) safe_dirs(os.path.join(out_dir, c)) valid, ann, pos_structure = _single_cluster( c, data, os.path.join(out_dir, c, "maps.tsv"), args) data[0][c].update({'profile': pos_structure}) loci = data[0][c]['loci'] data[0][c]['precursor'] = { "seq": precursor_sequence(loci[0][0:5], args.ref) } data[0][c]['precursor']["colors"] = _parse( data[0][c]['profile'], data[0][c]['precursor']["seq"]) data[0][c]['precursor'].update( run_rnafold(data[0][c]['precursor']['seq'])) return data
def run_coral(clus_obj, out_dir, args): """ Run some CoRaL modules to predict small RNA function """ if not args.bed: raise ValueError("This module needs the bed file output from cluster subcmd.") workdir = op.abspath(op.join(args.out, 'coral')) safe_dirs(workdir) bam_in = op.abspath(args.bam) bed_in = op.abspath(args.bed) reference = op.abspath(args.ref) with chdir(workdir): bam_clean = coral.prepare_bam(bam_in, bed_in) out_dir = op.join(workdir, "regions") safe_dirs(out_dir) prefix = "seqcluster" loci_file = coral.detect_regions(bam_clean, bed_in, out_dir, prefix) coral.create_features(bam_clean, loci_file, reference, out_dir)
def run_coral(clus_obj, out_dir, args): """ Run some CoRaL modules to predict small RNA function """ if not args.bed: raise ValueError( "This module needs the bed file output from cluster subcmd.") workdir = op.abspath(op.join(args.out, 'coral')) safe_dirs(workdir) bam_in = op.abspath(args.bam) bed_in = op.abspath(args.bed) reference = op.abspath(args.ref) with chdir(workdir): bam_clean = coral.prepare_bam(bam_in, bed_in) out_dir = op.join(workdir, "regions") safe_dirs(out_dir) prefix = "seqcluster" loci_file = coral.detect_regions(bam_clean, bed_in, out_dir, prefix) coral.create_features(bam_clean, loci_file, reference, out_dir)
def predictions(args): """ Create predictions of clusters """ logger.info(args) logger.info("Reading sequences") out_file = os.path.abspath(os.path.splitext(args.json)[0] + "_prediction.json") data = load_data(args.json) out_dir = os.path.abspath(safe_dirs(os.path.join(args.out, "predictions"))) logger.info("Make predictions") data = is_tRNA(data, out_dir, args) if args.coral: logger.info("Make CoRaL predictions") run_coral(data, out_dir, args) write_data(data[0], out_file) logger.info("Done")
def predictions(args): """ Create predictions of clusters """ logger.info(args) logger.info("reading sequeces") out_file = os.path.abspath(os.path.splitext(args.json)[0] + "_prediction.json") data = load_data(args.json) out_dir = os.path.abspath(safe_dirs(os.path.join(args.out, "predictions"))) logger.info("make predictions") data = is_tRNA(data, out_dir, args) if args.coral: logger.info("make CoRaL predictions") run_coral(data, out_dir, args) write_data(data[0], out_file) logger.info("Done")
def make_database(data, name="seqcluster.db", out_dir="database"): out_dir = safe_dirs(out_dir) op.abspath(out_dir) con = _create_db(op.join(out_dir, name)) _insert_data(con, data) _close(con)