def flush(table, collect, name): # commit the changes if collect: table.append(collect) table.flush() # nicer information size = util.commify(len(table)) logger.info("table=%s, contains %s rows" % (name, size))
def flush(table, collect, name): # commit the changes if collect: table.append(collect) table.flush() # nicer information size = util.commify(len(table)) logger.info('table=%s, contains %s rows' % (name, size))
def predict(inpname, outname, options): """ Generate the peak predictions on a genome wide scale """ if options.strand == TWOSTRAND: logger.info('operating in twostrand mode') if options.index: index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir) else: index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir) fp = file(outname, 'wt') for label in index.labels: table = index.table(label) size = table.cols.idx[-1] info = util.commify(size) logger.info('predicting on %s of total size %s' % (label, info)) lo = 0 hi = min( (size, options.maxsize) ) while True: if lo >= size: break perc = '%.1f%%' % (100.0*lo/size) logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc)) # get the data res = index.query(start=lo, end=hi, label=label) # exclusion zone w = options.exclude/2 def predict(x, y): fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level ) peaks = fitlib.detect_peaks(x=fx, y=fy ) if options.mode != 'all': peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level) return peaks if options.strand == TWOSTRAND: # operates in two strand mode for yval, strand in [ (res.fwd, '+'), (res.rev, '-') ]: logger.debug('processing strand %s' % strand) peaks = predict(x=res.idx, y=yval) output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand) else: # combine strands peaks = predict(x=res.idx, y=res.val) output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+') # switching to a higher interval lo = hi hi += options.maxsize fp.close()
def build(self): "May be overriden to use different parsers and schemas" logger.info("file='%s'" % self.fname) logger.info("index='%s'" % self.index) # check file for existance if missing(self.fname): raise IOError("missing data %s" % self.fname) # provides timing information timer = util.Timer() # iterate over the file reader = csv.reader(file(self.fname, "rt"), delimiter="\t") # unwind the reader until it hits the header for row in reader: if row[0] == "chrom": break # helper function that flushes a table def flush(table, collect, name): # commit the changes if collect: table.append(collect) table.flush() # nicer information size = util.commify(len(table)) logger.info("table=%s, contains %s rows" % (name, size)) # print messages at every CHUNK line last_chrom = table = None db = openFile(self.index, mode="w", title="HDF index database") # continue on with reading, optimized for throughput # with minimal function calls collect = [] for linec, row in izip(count(1), reader): # prints progress on processing, also flushes to periodically if (linec % CHUNK) == 0: logger.info("... processed %s lines" % util.commify(linec)) flush(table=table, collect=collect, name=last_chrom) collect = [] # get the values from each row chrom, index, fwd, rev, value = row fwd, rev, value = float(fwd), float(rev), float(value) # flush when switching chromosomes if chrom != last_chrom: # table==None at the beginning if table is not None: # logger.debug("... flushing at line %s" % row) flush(table=table, collect=collect, name=last_chrom) collect = [] # creates the new HDF table here table = db.createTable("/", chrom, PositionalSchema, "label %s" % chrom) logger.info("creating table:%s" % chrom) last_chrom = chrom collect.append((index, fwd, rev, value)) # flush for last chromosome, report some timing information flush(table, collect, chrom) lineno = util.commify(linec) elapsed = timer.report() logger.info("finished inserting %s lines in %s" % (lineno, elapsed)) # close database db.close()
def build(self): "May be overriden to use different parsers and schemas" logger.info("file='%s'" % self.fname) logger.info("index='%s'" % self.index) # check file for existance if missing(self.fname): raise IOError('missing data %s' % self.fname) # provides timing information timer = util.Timer() # iterate over the file reader = csv.reader(file(self.fname, 'rt'), delimiter='\t') # unwind the reader until it hits the header for row in reader: if row[0] == 'chrom': break # helper function that flushes a table def flush(table, collect, name): # commit the changes if collect: table.append(collect) table.flush() # nicer information size = util.commify(len(table)) logger.info('table=%s, contains %s rows' % (name, size)) # print messages at every CHUNK line last_chrom = table = None db = openFile(self.index, mode='w', title='HDF index database') # continue on with reading, optimized for throughput # with minimal function calls collect = [] for linec, row in izip(count(1), reader): # prints progress on processing, also flushes to periodically if (linec % CHUNK) == 0: logger.info("... processed %s lines" % util.commify(linec)) flush(table=table, collect=collect, name=last_chrom) collect = [] # get the values from each row chrom, index, fwd, rev, value = row fwd, rev, value = float(fwd), float(rev), float(value) # flush when switching chromosomes if chrom != last_chrom: # table==None at the beginning if table is not None: #logger.debug("... flushing at line %s" % row) flush(table=table, collect=collect, name=last_chrom) collect = [] # creates the new HDF table here table = db.createTable("/", chrom, PositionalSchema, 'label %s' % chrom) logger.info("creating table:%s" % chrom) last_chrom = chrom collect.append((index, fwd, rev, value)) # flush for last chromosome, report some timing information flush(table, collect, chrom) lineno = util.commify(linec) elapsed = timer.report() logger.info("finished inserting %s lines in %s" % (lineno, elapsed)) # close database db.close()
from django import forms from genetrack import logger, util # needs a custom class to create a submit widget class SubmitWidget( forms.widgets.Input) : input_type = 'submit' # custom widgets ButtonWidget = SubmitWidget( attrs={'class': 'nav_btn'} ) FeatureWidget = forms.TextInput( attrs={'size': '10', 'id': 'feature'}) ImageWidget = forms.TextInput( attrs={'size': '4'}) FloatWidget = forms.TextInput( attrs={'size': '1'}) # generate zoom levels with user friendly numbers ZOOM_LEVELS = "50 100 250 500 1000 2500 5000 10000 25000 50000 100000 250000 500000 1000000".split() ZOOM_CHOICES = map(lambda x: (x, util.commify(x) ), ZOOM_LEVELS) def zoom_change(value, step): """ Gets the next zoom level, either up or down >>> levels = [(100, 1), (100, -1), (1000000, 1)] >>> it = starmap( zoom_change, levels ) >>> list(it) [250, 50, 1000000] """ global ZOOM_LEVELS try: index = ZOOM_LEVELS.index( str(value) ) index += step index = index if index>0 else 0
idx = int(start) + shift fwd, rev, val = 1, 0, 1 elif strand == '-': # on reverse strand, 5' is at end idx = int(end) - shift fwd, rev, val = 0, 1, 1 else: # no strand specified, generate interval centers idx = (int(start)+int(end))/2 fwd, rev, val = 0, 0, 1 # it is essential be able to sort the index as a string! fp.write('%s\t%012d\t%s\t%s\t%s\n' % (chrom, idx, fwd, rev, val)) fp.close() linet = util.commify(linec) logger.debug("parsing %s lines finished in %s" % (linet, timer.report())) # if it is producing coverage then it will expand reads into full intervaals # now let the sorting commence cmd = "sort %s > %s" % (flat, sorted) logger.debug("sorting into '%s'" % sorted) os.system(cmd) logger.debug("sorting finished in %s" % timer.report() ) logger.debug("consolidating into '%s'" % outname) consolidate( sorted, outname, format=format) logger.debug("consolidate finished in %s" % timer.report() ) logger.debug("output saved to '%s'" % outname) logger.debug("full conversion finished in %s" % full.report() )
def predict(inpname, outname, options): """ Generate the peak predictions on a genome wide scale """ if options.strand == TWOSTRAND: logger.info('operating in twostrand mode') if options.index: index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir) else: index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir) fp = file(outname, 'wt') for label in index.labels: table = index.table(label) size = table.cols.idx[-1] info = util.commify(size) logger.info('predicting on %s of total size %s' % (label, info)) lo = 0 hi = min((size, options.maxsize)) while True: if lo >= size: break perc = '%.1f%%' % (100.0 * lo / size) logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc)) # get the data res = index.query(start=lo, end=hi, label=label) # exclusion zone w = options.exclude / 2 def predict(x, y): fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level) peaks = fitlib.detect_peaks(x=fx, y=fy) if options.mode != 'all': peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level) return peaks if options.strand == TWOSTRAND: # operates in two strand mode for yval, strand in [(res.fwd, '+'), (res.rev, '-')]: logger.debug('processing strand %s' % strand) peaks = predict(x=res.idx, y=yval) output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand) else: # combine strands peaks = predict(x=res.idx, y=res.val) output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+') # switching to a higher interval lo = hi hi += options.maxsize fp.close()