def _xsq_convert_region(filename, sample, region, tags, outname): out = gzip.open(outname, 'w') xsq = XSQFile(filename) for name, seq, quals in xsq.fetch_region(sample, region, tags): if suffix: out.write('@%s%s\n%s\n+\n%s\n' % (name, suffix, seq, ''.join([chr(q + 33) for q in quals]))) else: out.write('@%s\n%s\n+\n%s\n' % (name, seq, ''.join([chr(q + 33) for q in quals]))) xsq.close() out.close() return region
def xsq_list(filename, count=False, minreads=-1, total=False): xsq = XSQFile(filename) print 'Tags: ' for tag in xsq.tags: t = xsq.tags[tag] if t.is_colorspace: print ' %s[cs/%s]' % (tag, t.prefix) else: print ' %s[nt]' % (tag,) print '' print 'Samples: ' acc = 0 try: for sample in xsq.get_samples(): desc = xsq.get_sample_desc(sample).strip() if count: readcount = xsq.get_read_count(sample) if readcount > minreads: pn = pretty_number(readcount) if sample != 'Unclassified': acc += readcount if desc: print ' %s (%s) %s' % (sample, desc, pn) else: print ' %s %s' % (sample, pn) else: if desc: print ' %s (%s)' % (sample, desc) else: print ' %s' % (sample, ) except KeyboardInterrupt: pass if count and total: print '' print ' Total reads => %s' % pretty_number(acc) print '' xsq.close()
def xsq_convert_all(filename, tags=None, force=False, suffix=None, noz=False, usedesc=False, minreads=0, fsuffix=None, unclassified=False, procs=1, tmpdir=None): xsq = XSQFile(filename) samples = [] for sample in xsq.get_samples(): fname = sample if not fsuffix: fsuffix = '' if usedesc: fname = xsq.get_sample_desc(sample) if not fname: fname = sample if fname == sample: sys.stderr.write('Sample: %s... ' % fname) else: sys.stderr.write('Sample: (%s) %s... ' % (sample, fname)) if noz: outname = '%s%s.fastq' % (fname, fsuffix) else: outname = '%s%s.fastq.gz' % (fname, fsuffix) if force or not os.path.exists(outname): if sample == 'Unclassified' and not unclassified: sys.stderr.write(' Skipping unclassified\n') continue count = xsq.get_read_count(sample) if count < minreads: sys.stderr.write(' Too few reads (%s)\n' % count) continue samples.append((sample, outname)) sys.stderr.write('\n') xsq.close() for sample, outname in samples: xsq_convert(filename, sample, tags, suffix, procs=procs, outname=outname, noz=noz, tmpdir=tmpdir)
def xsq_info(filename): xsq = XSQFile(filename) xsq.dump(xsq.hdf.root.RunMetadata) xsq.close()
def xsq_convert(filename, sample=None, tags=None, suffix=None, procs=1, outname='-', tmpdir=None, noz=False): sys.stderr.write("Converting: %s\n" % sample) if tmpdir is None: tmpdir = '.' if procs < 1: procs = multiprocessing.cpu_count() pool = multiprocessing.Pool(procs) xsq = XSQFile(filename) regions = [] tmpnames = [] for region in xsq.get_regions(sample): regions.append(region) tmpnames.append(os.path.join(tmpdir, '.tmp.%s.%s.%s.fastq.gz.%s' % (os.path.basename(filename), sample, region, os.getpid()))) xsq.close() if ETA: callback = Callback(len(regions)) else: callback = None for region, tmpname in zip(regions, tmpnames): pool.apply_async(_xsq_convert_region, (filename, sample, region, tags, tmpname), callback=callback) pool.close() try: pool.join() except KeyboardInterrupt: pool.terminate() sys.exit(1) if callback: callback.done() sys.stderr.write("Merging temp files...\n") if ETA: callback = Callback(len(regions)) else: callback = None tmpname = os.path.join(tmpdir, '.tmp.%s.%s.%s' % (os.path.basename(outname), sample, os.getpid())) if outname == '-': out = sys.stdout elif noz: out = open(tmpname, 'w') else: out = gzip.open(tmpname, 'w') for tmp in tmpnames: src = gzip.open(tmp) _dump_stream(src, out) src.close() os.unlink(tmp) if callback: callback() if out != sys.stdout: out.close() shutil.move(tmpname, outname) if callback: callback.done()