def clean_bam_file(bam_in, mask=None): """ Remove from alignment reads with low counts and highly # of hits """ seq_obj = defaultdict(int) if mask: mask_file = op.splitext(bam_in)[0] + "_mask.bam" if not file_exists(mask_file): pybedtools.BedTool(bam_file).intersect(b=mask, v=True).saveas(mask_file) bam_in = mask_file out_file = op.splitext(bam_in)[0] + "_rmlw.bam" # bam.index(bam_in, {'algorithm':{}}) run("samtools index %s" % bam_in) if not file_exists(bam_in + ".bai"): raise IOError("Failed to created bam index of %s. Try to do it manually" % bam_in) bam_handle = pysam.AlignmentFile(bam_in, "rb") with pysam.AlignmentFile(out_file, "wb", template=bam_handle) as out_handle: for read in bam_handle.fetch(): seq_name = int(read.query_name.replace('seq_', '')) match_size = [nts for oper, nts in read.cigartuples if oper == 0] subs_size = [nts for oper, nts in read.cigartuples if oper == 4] if match_size[0] < 17: continue if subs_size: if subs_size[0] > 3: continue try: nh = read.get_tag('NH') except KeyError: nh = 1 seq_obj[seq_name] = sequence(seq_name) seq_obj[seq_name].align = nh out_handle.write(read) return out_file, seq_obj
def miraligner(args): """ Realign BAM hits to miRBAse to get better accuracy and annotation """ hairpin, mirna = _download_mirbase(args) precursors = _read_precursor(args.hairpin, args.sps) matures = _read_mature(args.mirna, args.sps) gtf = _read_gtf(args.gtf) out_dts = [] for bam_fn in args.files: sample = op.splitext(op.basename(bam_fn))[0] if bam_fn.endswith("bam") or bam_fn.endswith("sam"): logger.info("Reading %s" % bam_fn) bam_fn = _sam_to_bam(bam_fn) bam_sort_by_n = op.splitext(bam_fn)[0] + "_sort" pysam.sort("-n", bam_fn, bam_sort_by_n) reads = _read_bam(bam_sort_by_n + ".bam", precursors) elif bam_fn.endswith("fasta") or bam_fn.endswith("fa") or bam_fn.endswith("fastq"): out_file = op.join(args.out, sample + ".premirna") bam_fn = _filter_seqs(bam_fn) if args.miraligner: _cmd_miraligner(bam_fn, out_file, args.sps, args.hairpin) reads = _read_miraligner(out_file) else: if bam_fn.endswith("fastq"): bam_fn = _convert_to_fasta(bam_fn) logger.info("Aligning %s" % bam_fn) if not file_exists(out_file): pyMatch.Miraligner(hairpin, bam_fn, out_file, 1, 4) reads = _read_pyMatch(out_file, precursors) else: raise ValueError("Format not recognized.") if not args.miraligner: reads = _annotate(reads, matures, precursors) out_file = op.join(args.out, sample + ".mirna") out_file, dt, dt_pre= _tab_output(reads, out_file, sample) try: vcf_file = op.join(args.out, sample + ".vcf") if not file_exists(vcf_file): # if True: create_vcf(dt_pre, matures, gtf, vcf_file) try: import vcf vcf.Reader(filename=vcf_file) except Exception as e: logger.warning(e.__doc__) logger.warning(e.message) except Exception as e: # traceback.print_exc() logger.warning(e.__doc__) logger.warning(e.message) if isinstance(dt, pd.DataFrame): out_dts.append(dt) if out_dts: _create_counts(out_dts, args.out) # _summarize(out_dts) else: print "No files analyzed!"
def _single_cluster(c, data, out_file, args): """ Map sequences on precursors and create expression profile """ valid, ann = 0, 0 raw_file = None freq = defaultdict() [freq.update({s.keys()[0]: s.values()[0]}) for s in data[0][c]['freq']] names = [s.keys()[0] for s in data[0][c]['seqs']] seqs = [s.values()[0] for s in data[0][c]['seqs']] loci = data[0][c]['loci'] if loci[0][3] - loci[0][2] > 500: logger.info("locus bigger > 500 nt, skipping: %s" % loci) return valid, ann, {} if not file_exists(out_file): if args.razer: logger.debug("map with razer all sequences to all loci %s " % loci) map_to_precursors(seqs, names, {loci[0][0]: [loci[0][0:5]]}, out_file, args) else: logger.debug("map with C fn all sequences to all loci %s " % loci) if args.debug: raw_file = out_file out_file = map_to_precursors_on_fly(seqs, names, loci[0][0:5], args) logger.debug("plot sequences on loci") df = _convert_to_df(out_file, freq, raw_file) if df: valid, ann = _make(data[0][c]) return valid, ann, df
def _download_mirbase(args, version="CURRENT"): """ Download files from mirbase """ if not args.hairpin or not args.mirna: logger.info("Working with version %s" % version) hairpin_fn = op.join(op.abspath(args.out), "hairpin.fa.gz") mirna_fn = op.join(op.abspath(args.out), "miRNA.str.gz") if not file_exists(hairpin_fn): cmd_h = "wget ftp://mirbase.org/pub/mirbase/%s/hairpin.fa.gz -O %s && gunzip -f !$" % (version, hairpin_fn) do.run(cmd_h, "download hairpin") if not file_exists(mirna_fn): cmd_m = "wget ftp://mirbase.org/pub/mirbase/%s/miRNA.str.gz -O %s && gunzip -f !$" % (version, mirna_fn) do.run(cmd_m, "download mirna") else: return args.hairpin, args.mirna
def _single_cluster(c, data, out_file, args): """ Map sequences on precursors and create expression profile """ valid, ann = 0, 0 raw_file = None freq = defaultdict() [freq.update({list(s.keys())[0]: list(s.values())[0]}) for s in data[0][c]['freq']] names = [list(s.keys())[0] for s in data[0][c]['seqs']] seqs = [list(s.values())[0] for s in data[0][c]['seqs']] loci = data[0][c]['loci'] if loci[0][3] - loci[0][2] > 500: logger.info("locus bigger > 500 nt, skipping: %s" % loci) return valid, ann, {} if not file_exists(out_file): if args.razer: logger.debug("map with razer all sequences to all loci %s " % loci) map_to_precursors(seqs, names, {loci[0][0]: [loci[0][0:5]]}, out_file, args) else: logger.debug("map with biopython fn all sequences to all loci %s " % loci) if args.debug: raw_file = out_file out_file = map_to_precursor_biopython(seqs, names, loci[0][0:5], args) logger.debug("plot sequences on loci") df = _convert_to_df(out_file, freq, raw_file) if df: logger.debug("create html") valid, ann = _make(data[0][c]) logger.debug("done single cluster") return valid, ann, df
def _create_clusters(seqL, bam_file, args): """ Cluster sequences and create metaclusters with multi-mappers. """ clus_obj = [] cluster_file = op.join(args.out, "cluster.bed") if not os.path.exists(op.join(args.out, 'list_obj.pk')): if not file_exists(cluster_file): logger.info("Parsing aligned file") logger.info("Merging sequences") bedtools = os.path.join(os.path.dirname(sys.executable), "bedtools") bedtools = bedtools if os.path.exists(bedtools) else "bedtools" parse_cmd = "awk '{i=i+1;print $1\"\\t\"$2\"\\t\"$3\"\\t\"$4\"\\t\"i\"\\t\"$6}'" cmd = "{bedtools} bamtobed -i {bam_file} | {parse_cmd} | {bedtools} cluster -s -d 20 -i - > {cluster_file}" do.run(cmd.format(**locals())) c = pybedtools.BedTool(cluster_file) logger.info("Creating clusters") clus_obj = detect_clusters(c, seqL, args.min_seqs, args.non_un_gl) with open(op.join(args.out, 'list_obj.pk'), 'wb') as output: pickle.dump(clus_obj, output, pickle.HIGHEST_PROTOCOL) else: logger.info("Loading previous clusters") with open(op.join(args.out, 'list_obj.pk'), 'rb') as input: clus_obj = pickle.load(input) # bedfile = pybedtools.BedTool(generate_position_bed(clus_obj), from_string=True) # seqs_2_loci = bedfile.intersect(pybedtools.BedTool(aligned_bed, from_string=True), wo=True, s=True) # seqs_2_position = add_seqs_position_to_loci(seqs_2_loci, seqL) logger.info("%s clusters found" % (len(clus_obj.clusid))) return clus_obj
def _create_clusters(seqL, bam_file, args): """ Cluster sequences and create metaclusters with multi-mappers. """ clus_obj = [] logger.info("Parsing aligned file") cluster_file = op.join(args.out, "cluster.bed") if not os.path.exists(args.out + '/list_obj.pk'): logger.info("Merging position") if not file_exists(cluster_file): aligned_bed = parse_align_file(bam_file) a = pybedtools.BedTool(aligned_bed, from_string=True) c = a.cluster(s=True, d=20) c.saveas(cluster_file) else: c = pybedtools.BedTool(cluster_file) logger.info("Creating clusters") clus_obj = detect_clusters(c, seqL, args.min_seqs, args.non_un_gl) with open(args.out + '/list_obj.pk', 'wb') as output: pickle.dump(clus_obj, output, pickle.HIGHEST_PROTOCOL) else: logger.info("Loading previous clusters") with open(args.out + '/list_obj.pk', 'rb') as input: clus_obj = pickle.load(input) # bedfile = pybedtools.BedTool(generate_position_bed(clus_obj), from_string=True) # seqs_2_loci = bedfile.intersect(pybedtools.BedTool(aligned_bed, from_string=True), wo=True, s=True) # seqs_2_position = add_seqs_position_to_loci(seqs_2_loci, seqL) logger.info("%s clusters found" % (len(clus_obj.clusid))) return clus_obj
def _filter_seqs(fn): """Convert names of sequences to unique ids""" out_file = op.splitext(fn)[0] + "_unique.fa" idx = 0 if not file_exists(out_file): with open(out_file, 'w') as out_handle: with open(fn) as in_handle: for line in in_handle: if line.startswith("@") or line.startswith(">"): fixed_name = _make_unique(line.strip(), idx) seq = in_handle.next().strip() counts = _get_freq(fixed_name) if len(seq) < 26 and (counts > 1 or counts == 0): idx += 1 #print >>out_handle, fixed_name print(fixed_name, file=out_handle) #print >>out_handle, seq print(seq, file=out_handle) try: if line.startswith("@"): in_handle.next() in_handle.next() except: pass return out_file
def _download_mirbase(args, version="CURRENT"): """ Download files from mirbase """ if not args.hairpin or not args.mirna: logger.info("Working with version %s" % version) hairpin_fn = op.join(op.abspath(args.out), "hairpin.fa.gz") mirna_fn = op.join(op.abspath(args.out), "miRNA.str.gz") if not file_exists(hairpin_fn): cmd_h = "wget ftp://mirbase.org/pub/mirbase/%s/hairpin.fa.gz -O %s && gunzip -f !$" % ( version, hairpin_fn) do.run(cmd_h, "download hairpin") if not file_exists(mirna_fn): cmd_m = "wget ftp://mirbase.org/pub/mirbase/%s/miRNA.str.gz -O %s && gunzip -f !$" % ( version, mirna_fn) do.run(cmd_m, "download mirna") else: return args.hairpin, args.mirna
def _single_cluster(c, data, out_file, args): """ Map sequences on precursors and create expression profile """ valid, ann = 0, 0 raw_file = None figure_file = out_file.replace(".tsv", ".png") html_file = out_file.replace(".tsv", ".html") prefix = os.path.dirname(out_file) freq = defaultdict() [freq.update({s.keys()[0]: s.values()[0]}) for s in data[0][c]['freq']] names = [s.keys()[0] for s in data[0][c]['seqs']] seqs = [s.values()[0] for s in data[0][c]['seqs']] loci = data[0][c]['loci'] if loci[0][3] - loci[0][2] > 500: logger.info("locus bigger > 500 nt, skipping: %s" % loci) return valid, ann, {} if not file_exists(out_file): if args.razer: logger.debug("map with razer all sequences to all loci %s " % loci) map_to_precursors(seqs, names, {loci[0][0]: [loci[0][0:5]]}, out_file, args) else: logger.debug("map with C fn all sequences to all loci %s " % loci) if args.debug: raw_file = out_file out_file = map_to_precursors_on_fly(seqs, names, loci[0][0:5], args) logger.debug("plot sequences on loci") df = _convert_to_df(out_file, freq, raw_file) if df: if not file_exists(figure_file): fig = plt.figure() for s in df: plt.plot(df[s].keys(), df[s].values()) plt.ylabel('Normalized expression', fontsize=15) plt.xlabel('Position', fontsize=15) plt.savefig(figure_file) plt.close(fig) valid, ann = _make_html(data[0][c], html_file, figure_file, prefix) return valid, ann, df
def _cmd_miraligner(fn, out_file, species, hairpin, out): """ Run miraligner for miRNA annotation """ tool = _get_miraligner() path_db = op.dirname(op.abspath(hairpin)) cmd = "{tool} -freq -i {fn} -o {out_file} -s {species} -db {path_db} -sub 1 -trim 3 -add 3" if not file_exists(out_file): logger.info("Running miraligner with %s" % fn) do.run(cmd.format(**locals()), "miraligner with %s" % fn) shutil.move(out_file + ".mirna", out_file) return out_file
def _check_args(args): """ check arguments before starting analysis. """ logger.info("Checking parameters and files") args.dir_out = args.out args.samplename = "pro" global decision_cluster global similar if not os.path.isdir(args.out): logger.warning("the output folder doens't exists") os.mkdirs(args.out) if args.bed and args.gtf: logger.error("cannot provide -b and -g at the same time") raise SyntaxError if args.debug: logger.info("DEBUG messages will be showed in file.") if args.bed: args.list_files = args.bed args.type_ann = "bed" if args.gtf: args.list_files = args.gtf args.type_ann = "gtf" logger.info("Output dir will be: %s" % args.dir_out) if not all([file_exists(args.ffile), file_exists(args.afile)]): logger.error("I/O error: Seqs.ma or Seqs.bam. ") raise IOError("Seqs.ma or/and Seqs.bam doesn't exists.") if hasattr(args, 'list_files'): beds = args.list_files.split(",") for filebed in beds: if not file_exists(filebed): logger.error("I/O error: {0}".format(filebed)) raise IOError("%s annotation files doesn't exist" % filebed) param.decision_cluster = args.method if args.similar: param.similar = float(args.similar) if args.min_seqs: param.min_seqs = int(args.min_seqs) return args
def detect_complexity(bam_in, genome): """ genome coverage of small RNA """ if not genome: logger.info("No genome given. skipping.") return None out_file = bam_in + "_cov.tsv" if file_exists(out_file): return None fai = genome + ".fai" cov = pybedtools.BedTool(bam_in).genome_coverage(g=fai, max=1) cov.saveas(out_file) total = 0 for region in cov: if region[0] == "genome" and int(region[1]) != 0: total += float(region[4]) logger.info("Total genome with sequences: %s " % total)
def _get_miraligner(): opts = "-Xms750m -Xmx4g" try: tool = "miraligner" ret = os.system(tool) if ret != 0: raise SystemExit("%s not installed." % tool) except SystemExit: tool = None pass if not tool: if not utils.file_exists(op.abspath("miraligner.jar")): url = "https://raw.githubusercontent.com/lpantano/seqbuster/miraligner/modules/miraligner/miraligner.jar" cmd = ["wget", "-O miraligner.jar", "--no-check-certificate", url] do.run(" ".join(cmd), "Download miraligner.") tool = "java -jar {opts} %s" % op.abspath("miraligner.jar") else: tool = "%s {opts}" % tool return tool.format(**locals())
def _make_html(c, html_file, figure_file, prefix): """ create html from template, adding figure, annotation and sequences counts """ ann = defaultdict(list) seqs_table = [] src_img = "<img src=\"%s\" width=\"800\" height=\"350\" />" % os.path.basename(figure_file) coor_list = [" ".join(map(str, l)) for l in c['loci']] for pos in c['ann']: for db in pos: ann[db] += list(pos[db]) logger.debug(ann) valid = [l for l in c['valid']] ann_list = [", ".join(list(set(ann[feature]))) for feature in ann if feature in valid] seqs = [s.values()[0] for s in c['seqs']] freq = [map(float, s.values()[0].values()) for s in c['freq']] header = ['seq'] + c['freq'][0].values()[0].keys() for s, f in zip(seqs, freq): f = map(round, f) seqs_table.append([s] + map(str, f)) # seqs_html = seqs_html.replace("TABLE", "TABLE id=\"keywords\"") if not file_exists(html_file): coor_html = HTML.list(coor_list) ann_html = HTML.list(ann_list) seqs_html = HTML.table(seqs_table, header_row=header, attribs={'id': 'keywords'}) html_template = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath(templates.__file__)), "cluster")) content = open(html_template).read() data = {'profile': src_img, 'loci': coor_html, 'annotation': ann_html, 'table': seqs_html} out_content = string.Template(content).safe_substitute(data) with open(html_file, 'w') as out_handle: print >>out_handle, out_content return valid, ann_list
def _filter_seqs(fn): """Convert names of sequences to unique ids""" out_file = op.splitext(fn)[0] + "_unique.fa" idx = 0 if not file_exists(out_file): with open(out_file, 'w') as out_handle: with open(fn) as in_handle: line = in_handle.readline() while line: if line.startswith("@") or line.startswith(">"): fixed_name = _make_unique(line.strip(), idx) seq = in_handle.readline().strip() counts = _get_freq(fixed_name) if len(seq) < 26 and (counts > 1 or counts == 0): idx += 1 print(fixed_name, file=out_handle, end="\n") print(seq, file=out_handle, end="\n") if line.startswith("@"): in_handle.readline() in_handle.readline() line = in_handle.readline() return out_file
def _filter_seqs(fn): """Convert names of sequences to unique ids""" out_file = op.splitext(fn)[0] + "_unique.fa" idx = 0 if not file_exists(out_file): with open(out_file, 'w') as out_handle: with open(fn) as in_handle: for line in in_handle: if line.startswith("@") or line.startswith(">"): fixed_name = _make_unique(line.strip(), idx) seq = in_handle.next().strip() counts = _get_freq(fixed_name) if len(seq) < 26 and (counts > 1 or counts == 0): idx += 1 print >>out_handle, fixed_name print >>out_handle, seq try: if line.startswith("@"): in_handle.next() in_handle.next() except: pass return out_file
def cluster(args): args = _check_args(args) read_stats_file = op.join(args.dir_out, "read_stats.tsv") if file_exists(read_stats_file): os.remove(read_stats_file) bam_file, seq_obj = _clean_alignment(args) logger.info("Parsing matrix file") seqL, y, l = parse_ma_file(seq_obj, args.ffile) # y, l = _total_counts(seqL.keys(), seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'aligned' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') if len(seqL.keys()) < 10: logger.error("It seems you have low coverage. Please check your fastq files have enough sequences.") raise ValueError("So few sequences.") logger.info("Cleaning bam file") y, l = _total_counts(seqL.keys(), seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'cleaned' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') clusL = _create_clusters(seqL, bam_file, args) y, l = _total_counts(clusL.seq.keys(), clusL.seq, aligned=True) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'clusters' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') logger.info("Solving multi-mapping events in the network of clusters") clusLred = _cleaning(clusL, args.dir_out) y, l = _total_counts(clusLred.clus, seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'meta-cluster' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') logger.info("Clusters up to %s" % (len(clusLred.clus.keys()))) if args.show: logger.info("Creating sequences alignment to precursor") clusLred = show_seq(clusLred, args.index) clusLred = peak_calling(clusLred) clusLred = _annotate(args, clusLred) logger.info("Creating json and count matrix") json_file = _create_json(clusLred, args) logger.info("Output file in: %s" % args.dir_out) if args.db: name = args.db + ".db" logger.info("Create database: database/" + name) data = load_data(json_file) out_dir = op.join(args.dir_out, "database") make_database(data, name, out_dir) logger.info("Finished")
def cluster(args): args = _check_args(args) read_stats_file = op.join(args.dir_out, "read_stats.tsv") if file_exists(read_stats_file): os.remove(read_stats_file) bam_file, seq_obj = _clean_alignment(args) logger.info("Parsing matrix file") seqL, y, l = parse_ma_file(seq_obj, args.ffile) # y, l = _total_counts(seqL.keys(), seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'aligned' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') if len(seqL.keys()) < 10: logger.error("It seems you have so low coverage. Please check your fastq files have enough sequences.") raise ValueError("So few sequences.") logger.info("Cleaning bam file") y, l = _total_counts(seqL.keys(), seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'cleaned' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') clusL = _create_clusters(seqL, bam_file, args) y, l = _total_counts(clusL.seq.keys(), clusL.seq, aligned=True) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'clusters' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') logger.info("Solving multi-mapping events in the network of clusters") clusLred = _cleaning(clusL, args.dir_out) y, l = _total_counts(clusLred.clus, seqL) logger.info("counts after: %s" % sum(y.values())) logger.info("# sequences after: %s" % l) dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()}) dt['step'] = 'meta-cluster' dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a') logger.info("Clusters up to %s" % (len(clusLred.clus.keys()))) if args.show: logger.info("Creating sequences alignment to precursor") clusLred = show_seq(clusLred, args.index) clusLred = peak_calling(clusLred) clusLred = _annotate(args, clusLred) logger.info("Creating json and count matrix") json_file = _create_json(clusLred, args) logger.info("Output file in: %s" % args.dir_out) if args.db: name = args.db + ".db" logger.info("Create database: database/" + name) data = load_data(json_file) out_dir = op.join(args.dir_out, "database") make_database(data, name, out_dir) logger.info("Finished")
def miraligner(args): """ Realign BAM hits to miRBAse to get better accuracy and annotation """ hairpin, mirna = _download_mirbase(args) precursors = _read_precursor(args.hairpin, args.sps) matures = _read_mature(args.mirna, args.sps) gtf = _read_gtf(args.gtf) out_dts = [] out_files = [] for bam_fn in args.files: sample = op.splitext(op.basename(bam_fn))[0] logger.info("Reading %s" % bam_fn) if bam_fn.endswith("bam") or bam_fn.endswith("sam"): bam_fn = _sam_to_bam(bam_fn) bam_sort_by_n = op.splitext(bam_fn)[0] + "_sort" pysam.sort("-n", bam_fn, bam_sort_by_n) reads = _read_bam(bam_sort_by_n + ".bam", precursors) elif bam_fn.endswith("fasta") or bam_fn.endswith("fa") or \ bam_fn.endswith("fastq"): if args.collapse: bam_fn = _collapse_fastq(bam_fn) out_file = op.join(args.out, sample + ".premirna") bam_fn = _filter_seqs(bam_fn) if args.miraligner: _cmd_miraligner(bam_fn, out_file, args.sps, args.hairpin, args.out) reads = _read_miraligner(out_file) out_files.append(out_file) else: raise ValueError("Format not recognized.") if args.miraligner: _mirtop(out_files, args.hairpin, args.gtf, args.sps, args.out) if not args.miraligner: reads = _annotate(reads, matures, precursors) out_file = op.join(args.out, sample + ".mirna") out_file, dt, dt_pre = _tab_output(reads, out_file, sample) try: vcf_file = op.join(args.out, sample + ".vcf") if not file_exists(vcf_file): # if True: create_vcf(dt_pre, matures, gtf, vcf_file) try: import vcf vcf.Reader(filename=vcf_file) except Exception as e: logger.warning(e.__doc__) logger.warning(e.message) except Exception as e: # traceback.print_exc() logger.warning(e.__doc__) logger.warning(e.message) if isinstance(dt, pd.DataFrame): out_dts.append(dt) if out_dts: _create_counts(out_dts, args.out) else: print("No files analyzed!")