def report(args): """ Create report in html format """ logger.info("reading sequeces") data = load_data(args.json) out_dir = os.path.join(args.out, "html") safe_dirs(out_dir) logger.info("create profile") data = make_profile(data, out_dir, args) logger.info("create database") make_database(data, "seqcluster.db", args.out) path_template = os.path.normpath( os.path.dirname(os.path.realpath(templates.__file__))) css_template = os.path.join(path_template, "info.css") js_template = os.path.join(path_template, "jquery.tablesorter.min.js") css = os.path.join(out_dir, "info.css") js = os.path.join(out_dir, "jquery.tablesorter.min.js") if not os.path.exists(css): shutil.copy(css_template, css) shutil.copy(js_template, js) logger.info( "Done. Download https://github.com/lpantano/seqclusterViz/archive/master.zip to browse the output." )
def report(args): """ Create report in html format """ logger.info("reading sequeces") data = load_data(args.json) logger.info("create profile") data = make_profile(data, os.path.join(args.out, "profiles"), args) logger.info("create database") make_database(data, "seqcluster.db", args.out) logger.info( "Done. Download https://github.com/lpantano/seqclusterViz/archive/master.zip to browse the output." )
def predictions(args): """ Create predictions of clusters """ logger.info("reading sequeces") data = load_data(args.json) out_dir = os.path.join(args.out, "predictions") safe_dirs(out_dir) logger.info("make predictions") # make_predictions(data, out_dir, args) if args.coral: logger.info("make CoRaL predictions") run_coral(data, out_dir, args) logger.info("Done")
def explore(args): """Create mapping of sequences of two clusters """ logger.info("reading sequeces") data = load_data(args.json) logger.info("get sequences from json") #get_sequences_from_cluster() c1, c2 = args.names.split(",") seqs, names = get_sequences_from_cluster(c1, c2, data[0]) loci = get_precursors_from_cluster(c1, c2, data[0]) logger.info("map all sequences to all loci") print "%s" % (loci) map_to_precursors(seqs, names, loci, os.path.join(args.out, "map.tsv"), args) #map_sequences_w_bowtie(sequences, precursors) logger.info("plot sequences on loci") #get_matrix_position() #plot_sequences() logger.info("Done")
def predictions(args): """ Create predictions of clusters """ logger.info(args) logger.info("reading sequeces") out_file = os.path.abspath(os.path.splitext(args.json)[0] + "_prediction.json") data = load_data(args.json) out_dir = os.path.abspath(safe_dirs(os.path.join(args.out, "predictions"))) logger.info("make predictions") data = is_tRNA(data, out_dir, args) if args.coral: logger.info("make CoRaL predictions") run_coral(data, out_dir, args) write_data(data[0], out_file) logger.info("Done")
def explore(args): """Create mapping of sequences of two clusters """ logger.info("reading sequeces") data = load_data(args.json) logger.info("get sequences from json") #get_sequences_from_cluster() c1, c2 = args.names.split(",") seqs, names = get_sequences_from_cluster(c1, c2, data[0]) loci = get_precursors_from_cluster(c1, c2, data[0]) logger.info("map all sequences to all loci") print("%s" % (loci)) map_to_precursors(seqs, names, loci, os.path.join(args.out, "map.tsv"), args) #map_sequences_w_bowtie(sequences, precursors) logger.info("plot sequences on loci") #get_matrix_position() #plot_sequences() logger.info("Done")
def report(args): """ Create report in html format """ logger.info("reading sequeces") data = load_data(args.json) out_dir = os.path.join(args.out, "html") safe_dirs(out_dir) logger.info("create profile") make_profile(data, out_dir, args) path_template = os.path.normpath(os.path.dirname(os.path.realpath(templates.__file__))) css_template = os.path.join(path_template, "info.css") js_template = os.path.join(path_template, "jquery.tablesorter.min.js") css = os.path.join(out_dir, "info.css") js = os.path.join(out_dir, "jquery.tablesorter.min.js") if not os.path.exists(css): shutil.copy(css_template, css) shutil.copy(js_template, js) logger.info("Done")
def report(args): """ Create report in html format """ logger.info("reading sequeces") data = load_data(args.json) out_dir = os.path.join(args.out, "html") safe_dirs(out_dir) logger.info("create profile") data = make_profile(data, out_dir, args) logger.info("create database") make_database(data, "seqcluster.db", args.out) path_template = os.path.normpath(os.path.dirname(os.path.realpath(templates.__file__))) css_template = os.path.join(path_template, "info.css") js_template = os.path.join(path_template, "jquery.tablesorter.min.js") css = os.path.join(out_dir, "info.css") js = os.path.join(out_dir, "jquery.tablesorter.min.js") if not os.path.exists(css): shutil.copy(css_template, css) shutil.copy(js_template, js) logger.info("Done. Download https://github.com/lpantano/seqclusterViz/archive/master.zip to browse the output.")
def cluster(args): args = _check_args(args) read_stats_file = op.join(args.dir_out, "read_stats.tsv") if file_exists(read_stats_file): os.remove(read_stats_file) bam_file, seq_obj = _clean_alignment(args) logger.info("Parsing matrix file") seqL, y, l = parse_ma_file(seq_obj, args.ffile) # y, l = _total_counts(seqL.keys(), seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'aligned' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') if len(seqL.keys()) < 10: logger.error("It seems you have low coverage. Please check your fastq files have enough sequences.") raise ValueError("So few sequences.") logger.info("Cleaning bam file") y, l = _total_counts(seqL.keys(), seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'cleaned' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') clusL = _create_clusters(seqL, bam_file, args) y, l = _total_counts(clusL.seq.keys(), clusL.seq, aligned=True) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'clusters' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') logger.info("Solving multi-mapping events in the network of clusters") clusLred = _cleaning(clusL, args.dir_out) y, l = _total_counts(clusLred.clus, seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'meta-cluster' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') logger.info("Clusters up to %s" % (len(clusLred.clus.keys()))) if args.show: logger.info("Creating sequences alignment to precursor") clusLred = show_seq(clusLred, args.index) clusLred = peak_calling(clusLred) clusLred = _annotate(args, clusLred) logger.info("Creating json and count matrix") json_file = _create_json(clusLred, args) logger.info("Output file in: %s" % args.dir_out) if args.db: name = args.db + ".db" logger.info("Create database: database/" + name) data = load_data(json_file) out_dir = op.join(args.dir_out, "database") make_database(data, name, out_dir) logger.info("Finished")
def cluster(args): args = _check_args(args) read_stats_file = op.join(args.dir_out, "read_stats.tsv") if file_exists(read_stats_file): os.remove(read_stats_file) bam_file, seq_obj = _clean_alignment(args) logger.info("Parsing matrix file") seqL, y, l = parse_ma_file(seq_obj, args.ffile) # y, l = _total_counts(seqL.keys(), seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'aligned' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') if len(seqL.keys()) < 10: logger.error("It seems you have so low coverage. Please check your fastq files have enough sequences.") raise ValueError("So few sequences.") logger.info("Cleaning bam file") y, l = _total_counts(seqL.keys(), seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'cleaned' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') clusL = _create_clusters(seqL, bam_file, args) y, l = _total_counts(clusL.seq.keys(), clusL.seq, aligned=True) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'clusters' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') logger.info("Solving multi-mapping events in the network of clusters") clusLred = _cleaning(clusL, args.dir_out) y, l = _total_counts(clusLred.clus, seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'meta-cluster' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') logger.info("Clusters up to %s" % (len(clusLred.clus.keys()))) if args.show: logger.info("Creating sequences alignment to precursor") clusLred = show_seq(clusLred, args.index) clusLred = peak_calling(clusLred) clusLred = _annotate(args, clusLred) logger.info("Creating json and count matrix") json_file = _create_json(clusLred, args) logger.info("Output file in: %s" % args.dir_out) if args.db: name = args.db + ".db" logger.info("Create database: database/" + name) data = load_data(json_file) out_dir = op.join(args.dir_out, "database") make_database(data, name, out_dir) logger.info("Finished")