示例#1
0
 def PATCH( self, request):
     """
     Insert/update run
     """
     self.app.logger.info("Run.PATHC()")
     req = self._req_to_dict( request )
     self.app.logger.debug( "request %r" % req )
     #make no distinction between insert and update
     #testing autopush
     req.pop('run_id', None)#remove run_id from dict, using self.run_id
     req.pop('date_created', None) #created is non-writeable
     if 'status' in req and req['status'] not in [0, 30]:
         #only initialize or abort
         req.pop('status') #status unwriteable from web
     exists = run.get_ANRun( self.run_id )
     if exists:
         self.app.logger.info("Update %s" % self.run_id )
         result = run.update_ANRun( self.run_id, **req )
     else:
         #all new runs start in the config status
         self.app.logger.info("Insert %s" % self.run_id )
         req['status'] = run.CONFIG
         result = run.insert_ANRun( self.run_id, **req )
     result = self._clean_response( result )
     msg = {'status': 'complete',
             'data': result }
     return (msg, 200)
示例#2
0
 def _get_gpu_run(self):
     for run in run_mdl.get_ANRun():
         if run["master_name"] != self._master_name:
             # run does not belong to this master
             continue
         elif run["status"] in [run_mdl.ACTIVE_ALL_SENT, run_mdl.ACTIVE]:
             return run
     return None
示例#3
0
 def GET( self ):
     """
     Returns a single run
     """
     self.app.logger.info("Run.GET(%r)" % self.run_id)
     result = run.get_ANRun( self.run_id )
     self.app.logger.info("%r" % result)
     result = self._clean_response( result )
     if result:
         status = 200
     else:
         status = 404
     msg = {'status':'complete',
             'data': result }
     return (msg, status)
示例#4
0
 def handle_state(self):
     self.get_responses()
     self.check_response()
     if not self.cluster_active:
         #cluster down
         self.set_status( svr_mdl.TERMINATED )
     state = self.status
     self.logger.debug("handle_state[%s]" % state)
     if state == svr_mdl.INIT:
         self.send_init()
     elif state == svr_mdl.WAITING:
         if self._run_id is None:
             self.restart()
         elif run_mdl.get_ANRun( self._run_id )['status'] != run_mdl.ACTIVE:
             self._run_id = None
             self.restart()
     elif state == svr_mdl.TERMINATED:
         self.delete_queues()
     elif state==svr_mdl.RESTARTING and self._restart_timeout < datetime.now():
         self.hard_restart()
示例#5
0
 def handle_heartbeat(self):
     while len(self.status_queue) > 0:
         mess = self.status_queue.pop()
         if "message" in mess:
             if mess["message"] == "terminated":
                 self.set_status(svr_mdl.TERMINATED)
                 return
         term = mess["terminating"]
         if term == 0:
             if mess["source-q"] == 0:
                 if not self.status == svr_mdl.RESTARTING:
                     run = run_mdl.get_ANRun(self._run_id)
                     if run["status"] == run_mdl.COMPLETE:
                         if self._complete_timeout is None:
                             self._complete_timeout = datetime.now() + timedelta(minutes=1)
                         if self._complete_timeout < datetime.now():
                             self._run_id = None
                             self._restart()
             else:
                 self._complete_timeout = None
                 self._idle = 0
         else:
             self._terminated = True
示例#6
0
 def run_config(self):
     if self._run_config is None:
         self._run_config = run_mdl.get_ANRun( self.run_id )
         self.logger.debug("Run Config set %r" % self._run_config )
     return self._run_config
示例#7
0
from datadirac.aggregator.accumulator import Truthiness
from datadirac.aggregator.controller import AggManager
import masterdirac.models.run as run_mdl
from datadirac.aggregator.accumulator import Accumulator
import pandas

#change this to current run id
run_id = 'b6-q50-nov28-3'
run_model = run_mdl.get_ANRun( run_id )
truth_obj = Truthiness( run_model )
truth = truth_obj._get_truth()
accum = Accumulator(run_model)
nets = accum.networks

df = pandas.DataFrame( truth )
df.index = nets

df.to_csv("%s.csv" % run_id)


def createEVApackage( run_id, windows ):
    """
    Generate the files for EVA

    """
    if not os.path.exists( run_id ):
        os.makedirs( run_id )
    run_model = r_model.get_ANRun( run_id )
    sd = run_model['source_data']
    net_config = run_model['network_config']
    #download source data
    ###DEBUG
    working_dir = os.path.join( os.getcwd(), run_id )
    if not os.path.exists( working_dir ):
        os.makedirs( working_dir )
    pandas_file = os.path.join( working_dir, "expression.pnd" )
    if not os.path.exists( pandas_file ):
        _get_source_data( working_dir , run_model )
        hdg =  hdp.HDDataGen( working_dir  )
        df, _ =  hdg.generate_dataframe( run_model['source_data'], run_model['network_config'] )
        df.save( pandas_file )
    sd_obj = dd.SourceData()
    sd_obj.load_dataframe( pandas_file )
    net_table = run_model['network_config']['network_table']
    net_source = run_model['network_config']['network_source']
    sd_obj.load_net_info(net_table, net_source )

    _, meta_file = os.path.split( run_model['source_data']['meta_file'] )
    mi = dd.MetaInfo( os.path.join( run_id, meta_file ) )
    strain = mi.get_strains()
    if len(strain) > 1:
        logging.warning("More than one strain, only getting first")
        logging.warning("Strains %r" % strain )
    alleles = mi.get_nominal_alleles()
    if len( alleles ) > 2:
        logging.warning("More than two alleles, only using 'WT' and other")
        logging.warning("Alleles %r" % alleles )
    if 'WT' not in alleles:
        raise Exception("Wild type not in alleles. Alleles = %r" % alleles)
    second_allele = [allele for allele in alleles if allele != 'WT'][0]
    wt_samples = mi.get_sample_ids( strain=strain[0], allele='WT' )
    comp_samples = mi.get_sample_ids( strain=strain[0], allele = second_allele)
    assert len(wt_samples) > 0
    assert len( comp_samples ) > 0
    wt_s_a = sorted( [(mi.get_age( sid), sid) for sid in wt_samples] )
    comp_s_a = sorted( [(mi.get_age( sid), sid) for sid in comp_samples] )
    comparisons = {}
    gene_names_fname = "gene_names.txt"
    with open(os.path.join(working_dir , gene_names_fname), 'w') as gnf:
        gnf.write('\n'.join(['"%s"' % gn for gn in sd_obj.source_dataframe.index]))

    logging.info("Wrote %s" % gene_names_fname )
    network_fname = "net.gmt"
    with open( os.path.join(working_dir, network_fname), 'w') as nf:
        for pw in sd_obj.get_pathways():
            nf.write( '\t'.join([pw, 'na'] + sd_obj.get_genes( pw )) + '\n' )
    logging.info("Wrote %s" % network_fname )

    for start, end in windows:
        comparisons[(start, end)] = ( window( start, end, wt_s_a), window( start, end, comp_s_a))
    result = {}
    for win, v in comparisons.iteritems():
        window_pattern = "start%iend%i" % win
        wt_s, comp_s = v
        curr_df = sd_obj.get_expression( wt_s + comp_s )

        exp_table_fname = "%s.expression.tsv" % (window_pattern) 
        curr_df.to_csv(  os.path.join(working_dir, exp_table_fname), index=False, header=False, sep='\t')
        pheno_fname = "%s.pheno" % ( window_pattern)
        with open(  os.path.join(working_dir, pheno_fname), 'w') as ph:
            for s in wt_s:
                ph.write('0\n')
            for s in comp_s:
                ph.write('1\n')
        params = ( exp_table_fname, gene_names_fname, network_fname, pheno_fname, "%s.%s.result.txt" %  (run_id, window_pattern ))
        params = tuple([ os.path.join(run_id,p) for p in params]) 
        fin, mess = EVA( *params )
        for m in mess:
            if len(m[1].strip()) > 0:
                logging.info("%s: %s" % (m[0], m[1]))
        result[win] = parse_result( params[-1] )
        #DEBUG
    t = result.keys()[0]
    n = result[t].keys()[0]

    for dt in result[t][n].keys():
        save_table( result, "%s.%s.csv" % (run_id, dt), val_type=dt )
    return result
    line = sc_p.stdout.readline().strip()
    while line != '':
        messages.append(('stdout', line))
        line = sc_p.stdout.readline().strip()
    line = sc_p.stderr.readline().strip()
    while line != '':
        messages.append(('stderr', line))
        line = sc_p.stderr.readline().strip()
    messages.append(('wrapper', 'Complete: returned[%i]' % cont))
    return (cont, messages)

if __name__ == "__main__":
    #get runs we've already completed
    complete =  get_complete_run_ids( 'eva-results' )


    logging.basicConfig(level=logging.DEBUG, filename="megarun.log")
    #loops over runs
    for r in r_model.get_ANRun():
        if r['run_id'] in ['fvb-biocarta']:
            #if r['status'] == 20 and r['run_id'][:4] not in ['test', 'lab-', 'joc-']:
            #    if r['run_id'] in complete:
            #        logging.warning("Skipping %s.  Already exists" % (r['run_id'],))
            #        continue
            run_id = r['run_id']
            windows = [(i, i+5) for i in range(4,16)] + [(4,20), (4,12), (12,20)]
            try:
                eva_res = createEVApackage(run_id, windows)
            except:
                logging.exception("Error running Eva")
示例#10
0
from pynamodb.attributes import UnicodeAttribute
import os
import os.path

from multiprocessing import Pool

class NetworkInfo(Model):
    class Meta:
        table_name = 'net_info_table'
    src_id = UnicodeAttribute(hash_key=True)
    pw_id = UnicodeAttribute(range_key=True)
    broad_url=UnicodeAttribute(default='')
    gene_ids=UnicodeAttribute(default='')


runs = run_mdl.get_ANRun()

def writeit( r):
    try:
        s_run = r['run_id'].split('-')
        if 'trn' not in s_run or  r['status'] != run_mdl.COMPLETE:
            return
        if r['run_id'] in ['b6-q111-kegg', 'fvb-analysis1']:
            return 
        if r['status'] != run_mdl.COMPLETE:
            return
            print r['run_id']
        net_table = r['network_config']['network_table']
        net_source_id = r['network_config']['network_source']
        source_dataframe = r['dest_data']['dataframe_file']
        metadata_file = r['dest_data']['meta_file']
示例#11
0
 def get_run_model(self):
     self.logger.info("Getting run[%s] info" % self.run_id )
     return run_mdl.get_ANRun( self.run_id )
示例#12
0
 def _get_active_run( self ):
     for run in run_mdl.get_ANRun():
         if run['master_name'] == self._master_name:
             if run['status'] == run_mdl.ACTIVE:
                 return run
     return None