def execute(limit=1): """ Executes a job """ # no locking here for now, TODO jobs = models.Job.objects.filter(status=status.WAITING).order_by('id')[0:limit] for job in jobs: #logger.info( 'executing %s' % job) set_status(job, status=status.RUNNING) else: logger.info('no jobs are waiting') # run the selected jobs for job in jobs: logger.info( 'executing %s' % job) jobtype = job.json.get('type') data = get_data(job) try: if jobtype == status.INDEXING_JOB: jobdefs.indexing_job(data=data) else: raise Exception ("unknown jobtype") job.delete() except Exception, exc: logger.error(exc) set_error(exc, (data, job))
def flush(table, collect, name): # commit the changes if collect: table.append(collect) table.flush() # nicer information size = util.commify(len(table)) logger.info("table=%s, contains %s rows" % (name, size))
def flush(table, collect, name): # commit the changes if collect: table.append(collect) table.flush() # nicer information size = util.commify(len(table)) logger.info('table=%s, contains %s rows' % (name, size))
def display(self, msg, reps=1): total = (time.time() - self.start) if reps == 1: info("%s takes %.3f seconds" % (msg, total)) else: value = total / reps info("%s performs at %.3f per second" % (msg, value)) self.reset()
def display(self, msg, reps=1): total = (time.time() - self.start ) if reps == 1: info( "%s takes %.3f seconds" % (msg, total) ) else: value = total/reps info( "%s performs at %.3f per second" % (msg, value) ) self.reset()
def generate(n=5): projects = [] for i in range(1, n): name = random.choice(project_names) % i info = 'some info=%s' % i project = authorize.create_project(user=user, name=name, info=info) projects.append(project) logger.info('creating %s' % name) # data names data_names = ('short-good-input.gtrack', 'short-data.bed') # visualization names track_names = ('differential expression', 'HELA subtract', 'Default track') # a subset of projects get data, visualization and results added to them subset = projects[-1:] for project in subset: # create some tracks for this project for tname in track_names: json = dict() track = authorize.create_track(user=user, pid=project.id, name=tname, json=json) logger.info('creating track %s' % track.name) assert (project.track_count(), len(track_names)) # upload some data names for name in data_names: logger.info('uploading data %s' % name) stream = File(open(conf.testdata(name))) data = authorize.create_data(user=user, pid=project.id, stream=stream, name=name, info='test data') # create some results logger.info('adding results content and image') stream1 = File(open(conf.testdata('short-results.txt'))) image1 = File(open(conf.testdata('readcounts.png'), 'rb')) result1 = authorize.create_result(user=user, data=data, content=stream1, image=image1) image2 = File(open(conf.testdata('shift.png'), 'rb')) result2 = authorize.create_result(user=user, data=data, content=None, image=image2)
def generate_coverage(func, path, *args, **kwds): """ Generates code coverage for the function and places the results in the path """ import figleaf from figleaf import annotate_html # Fix for figleaf misbehaving. It is adding a logger at root level # and that will add a handler to all subloggers (ours as well) # needs to be fixed in figleaf import logging root = logging.getLogger() # remove all root handlers for hand in root.handlers: root.removeHandler(hand) if os.path.isdir(path): shutil.rmtree(path) info("collecting coverage information") figleaf.start() # execute the function itself return_vals = func(*args, **kwds) figleaf.stop() info('generating coverage') coverage = figleaf.get_data().gather_files() annotate_html.prepare_reportdir(path) # skip python modules and the test modules regpatt = lambda patt: re.compile(patt, re.IGNORECASE) patterns = map(regpatt, ['python', 'tests', 'django', 'path*']) annotate_html.report_as_html(coverage, path, exclude_patterns=patterns, files_list='') return return_vals
def generate_coverage( func, path, *args, **kwds): """ Generates code coverage for the function and places the results in the path """ import figleaf from figleaf import annotate_html # Fix for figleaf misbehaving. It is adding a logger at root level # and that will add a handler to all subloggers (ours as well) # needs to be fixed in figleaf import logging root = logging.getLogger() # remove all root handlers for hand in root.handlers: root.removeHandler(hand) if os.path.isdir( path ): shutil.rmtree( path ) info( "collecting coverage information" ) figleaf.start() # execute the function itself return_vals = func( *args, **kwds) figleaf.stop() info( 'generating coverage' ) coverage = figleaf.get_data().gather_files() annotate_html.prepare_reportdir( path ) # skip python modules and the test modules regpatt = lambda patt: re.compile( patt, re.IGNORECASE ) patterns = map( regpatt, [ 'python', 'tests', 'django', 'path*' ] ) annotate_html.report_as_html( coverage, path, exclude_patterns=patterns, files_list='') return return_vals
def predict(inpname, outname, options): """ Generate the peak predictions on a genome wide scale """ if options.strand == TWOSTRAND: logger.info('operating in twostrand mode') if options.index: index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir) else: index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir) fp = file(outname, 'wt') for label in index.labels: table = index.table(label) size = table.cols.idx[-1] info = util.commify(size) logger.info('predicting on %s of total size %s' % (label, info)) lo = 0 hi = min( (size, options.maxsize) ) while True: if lo >= size: break perc = '%.1f%%' % (100.0*lo/size) logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc)) # get the data res = index.query(start=lo, end=hi, label=label) # exclusion zone w = options.exclude/2 def predict(x, y): fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level ) peaks = fitlib.detect_peaks(x=fx, y=fy ) if options.mode != 'all': peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level) return peaks if options.strand == TWOSTRAND: # operates in two strand mode for yval, strand in [ (res.fwd, '+'), (res.rev, '-') ]: logger.debug('processing strand %s' % strand) peaks = predict(x=res.idx, y=yval) output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand) else: # combine strands peaks = predict(x=res.idx, y=res.val) output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+') # switching to a higher interval lo = hi hi += options.maxsize fp.close()
def generate(n=5): projects = [] for i in range(1, n): name = random.choice(project_names) % i info = 'some info=%s' % i project = authorize.create_project(user=user, name=name, info=info) projects.append(project) logger.info('creating %s' % name) # data names data_names = ( 'short-good-input.gtrack', 'short-data.bed') # visualization names track_names = ( 'differential expression', 'HELA subtract', 'Default track') # a subset of projects get data, visualization and results added to them subset = projects[-1:] for project in subset: # create some tracks for this project for tname in track_names: json = dict() track = authorize.create_track(user=user, pid=project.id, name=tname, json=json ) logger.info('creating track %s' % track.name) assert (project.track_count(), len(track_names)) # upload some data names for name in data_names: logger.info('uploading data %s' % name) stream = File( open(conf.testdata(name)) ) data = authorize.create_data(user=user, pid=project.id, stream=stream, name=name, info='test data') # create some results logger.info('adding results content and image') stream1 = File( open(conf.testdata('short-results.txt')) ) image1 = File( open(conf.testdata('readcounts.png'),'rb') ) result1 = authorize.create_result( user=user, data=data, content=stream1, image=image1) image2 = File( open(conf.testdata('shift.png'), 'rb') ) result2 = authorize.create_result( user=user, data=data, content=None, image=image2)
def build(self): "May be overriden to use different parsers and schemas" logger.info("file='%s'" % self.fname) logger.info("index='%s'" % self.index) # check file for existance if missing(self.fname): raise IOError("missing data %s" % self.fname) # provides timing information timer = util.Timer() # iterate over the file reader = csv.reader(file(self.fname, "rt"), delimiter="\t") # unwind the reader until it hits the header for row in reader: if row[0] == "chrom": break # helper function that flushes a table def flush(table, collect, name): # commit the changes if collect: table.append(collect) table.flush() # nicer information size = util.commify(len(table)) logger.info("table=%s, contains %s rows" % (name, size)) # print messages at every CHUNK line last_chrom = table = None db = openFile(self.index, mode="w", title="HDF index database") # continue on with reading, optimized for throughput # with minimal function calls collect = [] for linec, row in izip(count(1), reader): # prints progress on processing, also flushes to periodically if (linec % CHUNK) == 0: logger.info("... processed %s lines" % util.commify(linec)) flush(table=table, collect=collect, name=last_chrom) collect = [] # get the values from each row chrom, index, fwd, rev, value = row fwd, rev, value = float(fwd), float(rev), float(value) # flush when switching chromosomes if chrom != last_chrom: # table==None at the beginning if table is not None: # logger.debug("... flushing at line %s" % row) flush(table=table, collect=collect, name=last_chrom) collect = [] # creates the new HDF table here table = db.createTable("/", chrom, PositionalSchema, "label %s" % chrom) logger.info("creating table:%s" % chrom) last_chrom = chrom collect.append((index, fwd, rev, value)) # flush for last chromosome, report some timing information flush(table, collect, chrom) lineno = util.commify(linec) elapsed = timer.report() logger.info("finished inserting %s lines in %s" % (lineno, elapsed)) # close database db.close()
def predict(inpname, outname, options): """ Generate the peak predictions on a genome wide scale """ if options.strand == TWOSTRAND: logger.info('operating in twostrand mode') if options.index: index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir) else: index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir) fp = file(outname, 'wt') for label in index.labels: table = index.table(label) size = table.cols.idx[-1] info = util.commify(size) logger.info('predicting on %s of total size %s' % (label, info)) lo = 0 hi = min((size, options.maxsize)) while True: if lo >= size: break perc = '%.1f%%' % (100.0 * lo / size) logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc)) # get the data res = index.query(start=lo, end=hi, label=label) # exclusion zone w = options.exclude / 2 def predict(x, y): fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level) peaks = fitlib.detect_peaks(x=fx, y=fy) if options.mode != 'all': peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level) return peaks if options.strand == TWOSTRAND: # operates in two strand mode for yval, strand in [(res.fwd, '+'), (res.rev, '-')]: logger.debug('processing strand %s' % strand) peaks = predict(x=res.idx, y=yval) output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand) else: # combine strands peaks = predict(x=res.idx, y=res.val) output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+') # switching to a higher interval lo = hi hi += options.maxsize fp.close()
dest="limit", type='int', default=1, help="how many jobs to run in parallel" ) # flushes all content away, drops all database content! parser.add_option( '--server', action="store_true", dest="server", default=False, help="runs as a server and invokes the jobrunner at every delay seconds", ) # parse the argument list options, args = parser.parse_args() logger.disable(options.verbosity) # missing file names if options.server and not options.delay: parser.print_help() else: if options.server: logger.info('server mode, delay=%ss' % options.delay) while 1: # this is used to start multiple jobs with cron (at every minute but # having them actually start up at smaller increments time.sleep(options.delay) execute(limit=options.limit) if not options.server: break else: logger.debug( 'jobserver waiting %ss' % options.delay)
tc.follow("Project view") self.delete_project(name) def get_suite(): "Returns the testsuite" return testlib.make_suite( [] ) def local_suite(): "Returns the testsuite" tests = [ BaseTest, ServerTest, ] return testlib.make_suite( tests ) def test_runner( suite, verbosity=0 ): "Runs the functional tests on a test database" from django.db import connection old_name = settings.DATABASE_NAME utils.setup_test_environment() connection.creation.create_test_db(verbosity=verbosity, autoclobber=True) result = unittest.TextTestRunner(verbosity=2).run(suite) connection.creation.destroy_test_db(old_name, verbosity) utils.teardown_test_environment() if __name__ == '__main__': logger.info("executing functional tests") suite = local_suite() test_runner( suite, verbosity=0)
def flush_database(): "Delets all entries" logger.info( "flushing the database" ) #Data.objects.all().delete() call_command( 'flush' )
def get_suite(): "Returns the testsuite" return testlib.make_suite([]) def local_suite(): "Returns the testsuite" tests = [ BaseTest, ServerTest, ] return testlib.make_suite(tests) def test_runner(suite, verbosity=0): "Runs the functional tests on a test database" from django.db import connection old_name = settings.DATABASE_NAME utils.setup_test_environment() connection.creation.create_test_db(verbosity=verbosity, autoclobber=True) result = unittest.TextTestRunner(verbosity=2).run(suite) connection.creation.destroy_test_db(old_name, verbosity) utils.teardown_test_environment() if __name__ == '__main__': logger.info("executing functional tests") suite = local_suite() test_runner(suite, verbosity=0)
def build(self): "May be overriden to use different parsers and schemas" logger.info("file='%s'" % self.fname) logger.info("index='%s'" % self.index) # check file for existance if missing(self.fname): raise IOError('missing data %s' % self.fname) # provides timing information timer = util.Timer() # iterate over the file reader = csv.reader(file(self.fname, 'rt'), delimiter='\t') # unwind the reader until it hits the header for row in reader: if row[0] == 'chrom': break # helper function that flushes a table def flush(table, collect, name): # commit the changes if collect: table.append(collect) table.flush() # nicer information size = util.commify(len(table)) logger.info('table=%s, contains %s rows' % (name, size)) # print messages at every CHUNK line last_chrom = table = None db = openFile(self.index, mode='w', title='HDF index database') # continue on with reading, optimized for throughput # with minimal function calls collect = [] for linec, row in izip(count(1), reader): # prints progress on processing, also flushes to periodically if (linec % CHUNK) == 0: logger.info("... processed %s lines" % util.commify(linec)) flush(table=table, collect=collect, name=last_chrom) collect = [] # get the values from each row chrom, index, fwd, rev, value = row fwd, rev, value = float(fwd), float(rev), float(value) # flush when switching chromosomes if chrom != last_chrom: # table==None at the beginning if table is not None: #logger.debug("... flushing at line %s" % row) flush(table=table, collect=collect, name=last_chrom) collect = [] # creates the new HDF table here table = db.createTable("/", chrom, PositionalSchema, 'label %s' % chrom) logger.info("creating table:%s" % chrom) last_chrom = chrom collect.append((index, fwd, rev, value)) # flush for last chromosome, report some timing information flush(table, collect, chrom) lineno = util.commify(linec) elapsed = timer.report() logger.info("finished inserting %s lines in %s" % (lineno, elapsed)) # close database db.close()