def summarize_helper(model_path): logger.info(time.strftime('%c')) logger.info('Loading original model.') inc_model_file = os.path.join(model_path, 'model.pkl') model = IncrementalModel(inc_model_file) stmts = model.get_statements() click.echo('Number of statements: {}'.format(len(stmts))) agents = model.get_model_agents() click.echo('Number of agents: {}'.format(len(agents)))
def test_add_stmts_prior_all(): im = IncrementalModel() im.stmts['prior'] = [stmts[0]] im.prior_genes = ['MAPK1', 'MAPK3'] im.add_statements('12345', [stmts[1]]) im.preassemble(filters=['prior_all']) assert len(im.assembled_stmts) == 1
def test_add_stmts_prior_all(): im = IncrementalModel() # Start out with MAPK1 and MAPK3 im.stmts['prior'] = [stmts[0]] im.prior_genes = ['MAPK1', 'MAPK3'] # Try to add MAP2K1 and MAPK3 im.add_statements('12345', [stmts[1]]) im.preassemble(filters=['prior_all']) assert len(im.assembled_stmts) == 1, im.assembled_stmts
def test_grounding_all(): im = IncrementalModel() stmt = Complex([Agent('A', db_refs={'UP': 'ABCD'}), Agent('B', db_refs={'HGNC': '1234'})]) im.add_statements('12345', [stmt]) im.preassemble(filters=['grounding']) assert len(im.assembled_stmts) == 1
def run_machine(model_path, pmids, belief_threshold, search_genes=None, ndex_cred=None, twitter_cred=None): start_time_local = datetime.datetime.now(tzlocal.get_localzone()) date_str = make_date_str() # Save PMIDs in file and send for remote reading if aws_available: pmid_fname = 'pmids-%s.txt' % date_str all_pmids = [] for v in pmids.values(): all_pmids += v all_pmids = list(set(all_pmids)) with open(pmid_fname, 'wt') as fh: for pmid in all_pmids: fh.write('%s\n' % pmid) # Submit reading job_list = submit_reading('rasmachine', pmid_fname, ['reach']) # Wait for reading to complete wait_for_complete('run_reach_queue', job_list, idle_log_timeout=600, kill_on_log_timeout=True) # Load the model logger.info(time.strftime('%c')) logger.info('Loading original model.') inc_model_file = os.path.join(model_path, 'model.pkl') model = IncrementalModel(inc_model_file) # Include search genes as prior genes if search_genes: model.prior_genes = search_genes stats = {} logger.info(time.strftime('%c')) logger.info('Preassembling original model.') model.preassemble(filters=global_filters) logger.info(time.strftime('%c')) # Original statistics stats['orig_stmts'] = len(model.get_statements()) stats['orig_assembled'] = len(model.assembled_stmts) orig_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'], belief_threshold) orig_stmts = ac.filter_top_level(orig_stmts) stats['orig_final'] = len(orig_stmts) logger.info('%d final statements' % len(orig_stmts)) # Extend the model with PMIDs logger.info('----------------') logger.info(time.strftime('%c')) logger.info('Extending model.') stats['new_papers'], stats['new_abstracts'], stats['existing'] = \ extend_model(model_path, model, pmids, start_time_local) # Having added new statements, we preassemble the model model.preassemble(filters=global_filters) # New statistics stats['new_stmts'] = len(model.get_statements()) stats['new_assembled'] = len(model.assembled_stmts) new_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'], belief_threshold) new_stmts = ac.filter_top_level(new_stmts) stats['new_final'] = len(new_stmts) logger.info('%d final statements' % len(new_stmts)) check_pmids(model.get_statements()) # Save model logger.info(time.strftime('%c')) logger.info('Saving model') model.save(inc_model_file) logger.info(time.strftime('%c')) # Save a time stamped version of the pickle for backup/diagnostic purposes if not aws_available: inc_model_bkp_file = os.path.join(model_path, 'model-%s.pkl' % date_str) model.save(inc_model_bkp_file) else: key = 'rasmachine/%s/model-%s.pkl' % (model_path.replace( '/', '_'), date_str) s3 = boto3.client('s3') s3.upload_file(inc_model_file, 'bigmech', key) # Upload the new, final statements to NDEx if ndex_cred: upload_new_ndex(model_path, new_stmts, ndex_cred) # Print and tweet the status message logger.info('--- Final statistics ---') for k, v in sorted(stats.items(), key=lambda x: x[0]): logger.info('%s: %s' % (k, v)) logger.info('------------------------') msg_str = make_status_message(stats) if msg_str is not None: logger.info('Status message: %s' % msg_str) if twitter_cred: logger.info('Now tweeting: %s' % msg_str) twitter_client.update_status(msg_str, twitter_cred)
def test_add_stmts_blank_nofilter(): im = IncrementalModel() im.add_statements('12345', stmts) im.preassemble(filters=None) assert(len(im.assembled_stmts) == 2)
def test_grounding_none_agent(): im = IncrementalModel() im.add_statements('12345', stmts2) im.preassemble(filters=['grounding']) assert len(im.assembled_stmts) == 1
def test_add_stmts_blank_emptyfilter(): im = IncrementalModel() im.add_statements('12345', stmts) im.preassemble(filters=[]) assert len(im.assembled_stmts) == 2
def test_add_stmts_blank_noprior2(): im = IncrementalModel() im.add_statements('12345', stmts) im.preassemble(filters=['prior_all']) assert len(im.assembled_stmts) == 2
def test_grounding_none_agent(): im = IncrementalModel() im.add_statements('12345', stmts2, filters=['grounding']) assert(len(im.get_statements()) == 1)
def load_model(model_path): logger.info(time.strftime('%c')) logger.info('Loading original model.') inc_model_file = os.path.join(model_path, 'model.pkl') return IncrementalModel(inc_model_file)
def test_add_stmts_model_all(): im = IncrementalModel() im.add_statements('12345', [stmts[0]]) im.add_statements('23456', [stmts[1]], filters=['model_all']) assert(len(im.get_statements()) == 1)
def test_preassemble_grounded_prior_all(): im = IncrementalModel() im.stmts['prior'] = [stmt3] im.stmts['12345'] = [stmt5] im.preassemble(filters=['prior_all']) assert(len(im.unique_stmts) == 1)
from indra.mechlinker import MechLinker from indra.assemblers import EnglishAssembler def print_linked_stmt(stmt): source_txts = [] for source_stmt in stmt.source_stmts: source_txt = EnglishAssembler([source_stmt]).make_model() source_txts.append(source_txt) query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model() final_txt = 'I know that ' for i, t in enumerate(source_txts): final_txt += '(%d) %s ' % (i + 1, t) if i < len(source_txts) - 1: final_txt = final_txt[:-2] + ', and ' final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?' print(final_txt) return final_txt if __name__ == '__main__': fname = 'models/rasmachine/rem/model.pkl' model = IncrementalModel(fname) model.preassemble() stmts = model.assembled_stmts linked_stmts = MechLinker.infer_active_forms(stmts) linked_stmts += MechLinker.infer_modifications(stmts) linked_stmts += MechLinker.infer_activations(stmts) for stmt in linked_stmts: print_linked_stmt(stmt)
def test_add_stmts_blank_emptyfilter(): im = IncrementalModel() im.add_statements('12345', stmts, filters=[]) assert(len(im.get_statements()) == 2)
def test_add_stmts_blank(): im = IncrementalModel() im.add_statements('12345', stmts) assert len(im.get_statements()) == 2 im.preassemble() assert len(im.assembled_stmts) == 2
def test_add_stmts_blank_nofilter(): im = IncrementalModel() im.add_statements('12345', stmts) im.preassemble(filters=None) assert (len(im.assembled_stmts) == 2)
def run_machine(model_path, pmids, belief_threshold, search_genes=None, ndex_cred=None, twitter_cred=None, grounding_map=None): start_time_local = datetime.datetime.now(tzlocal.get_localzone()) date_str = make_date_str() # Save PMIDs in file and send for remote reading if aws_available: pmid_fname = 'pmids-%s.txt' % date_str all_pmids = [] for v in pmids.values(): all_pmids += v all_pmids = list(set(all_pmids)) with open(pmid_fname, 'wt') as fh: for pmid in all_pmids: fh.write('%s\n' % pmid) # Submit reading job_list = submit_reading('rasmachine', pmid_fname, ['reach']) # Wait for reading to complete wait_for_complete('run_reach_queue', job_list, idle_log_timeout=600, kill_on_log_timeout=True) # Load the model logger.info(time.strftime('%c')) logger.info('Loading original model.') inc_model_file = os.path.join(model_path, 'model.pkl') model = IncrementalModel(inc_model_file) # Include search genes as prior genes if search_genes: model.prior_genes = search_genes stats = {} logger.info(time.strftime('%c')) logger.info('Preassembling original model.') model.preassemble(filters=global_filters, grounding_map=grounding_map) logger.info(time.strftime('%c')) # Original statistics stats['orig_stmts'] = len(model.get_statements()) stats['orig_assembled'] = len(model.assembled_stmts) orig_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'], belief_threshold) orig_stmts = ac.filter_top_level(orig_stmts) stats['orig_final'] = len(orig_stmts) logger.info('%d final statements' % len(orig_stmts)) # Extend the model with PMIDs logger.info('----------------') logger.info(time.strftime('%c')) logger.info('Extending model.') stats['new_papers'], stats['new_abstracts'], stats['existing'] = \ extend_model(model_path, model, pmids, start_time_local) # Having added new statements, we preassemble the model model.preassemble(filters=global_filters, grounding_map=grounding_map) # New statistics stats['new_stmts'] = len(model.get_statements()) stats['new_assembled'] = len(model.assembled_stmts) new_stmts = filter_db_highbelief(model.assembled_stmts, ['bel', 'biopax'], belief_threshold) new_stmts = ac.filter_top_level(new_stmts) stats['new_final'] = len(new_stmts) logger.info('%d final statements' % len(new_stmts)) check_pmids(model.get_statements()) # Save model logger.info(time.strftime('%c')) logger.info('Saving model') model.save(inc_model_file) logger.info(time.strftime('%c')) # Save a time stamped version of the pickle for backup/diagnostic purposes if not aws_available: inc_model_bkp_file = os.path.join(model_path, 'model-%s.pkl' % date_str) model.save(inc_model_bkp_file) else: key = 'rasmachine/%s/model-%s.pkl' % (model_path.replace('/', '_'), date_str) s3 = boto3.client('s3') s3.upload_file(inc_model_file, 'bigmech', key) # Upload the new, final statements to NDEx if ndex_cred: upload_new_ndex(model_path, new_stmts, ndex_cred) # Print and tweet the status message logger.info('--- Final statistics ---') for k, v in sorted(stats.items(), key=lambda x: x[0]): logger.info('%s: %s' % (k, v)) logger.info('------------------------') msg_str = make_status_message(stats) if msg_str is not None: logger.info('Status message: %s' % msg_str) if twitter_cred: logger.info('Now tweeting: %s' % msg_str) twitter_client.update_status(msg_str, twitter_cred)
def test_add_stmts_blank(): im = IncrementalModel() im.add_statements('12345', stmts) assert(len(im.get_statements()) == 2)
def test_add_stmts_blank_noprior2(): im = IncrementalModel() im.add_statements('12345', stmts, filters=['prior_all']) assert(len(im.get_statements()) == 2)
from indra.mechlinker import MechLinker from indra.assemblers.english import EnglishAssembler def print_linked_stmt(stmt): source_txts = [] for source_stmt in stmt.source_stmts: source_txt = EnglishAssembler([source_stmt]).make_model() source_txts.append(source_txt) query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model() final_txt = 'I know that ' for i, t in enumerate(source_txts): final_txt += '(%d) %s ' % (i+1, t) if i < len(source_txts) -1: final_txt = final_txt[:-2] + ', and ' final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?' print(final_txt) return final_txt if __name__ == '__main__': fname = 'models/rasmachine/rem/model.pkl' model = IncrementalModel(fname) model.preassemble() stmts = model.assembled_stmts linked_stmts = MechLinker.infer_active_forms(stmts) linked_stmts += MechLinker.infer_modifications(stmts) linked_stmts += MechLinker.infer_activations(stmts) for stmt in linked_stmts: print_linked_stmt(stmt)
def test_add_stmts_prior_all(): im = IncrementalModel() im.stmts['prior'] = [stmts[0]] im.add_statements('12345', [stmts[1]], filters=['prior_all']) assert(len(im.get_statements()) == 1)
def test_human_only(): im = IncrementalModel() stmt1 = Phosphorylation(None, Agent('BRAF', db_refs={'UP': 'P15056'})) stmt2 = Phosphorylation(None, Agent('BRAF', db_refs={'UP': 'P28028'})) stmt3 = Phosphorylation(None, Agent('BRAF', db_refs={'HGNC': '1097'})) stmt4 = Phosphorylation(None, Agent('BRAF', db_refs={})) im.add_statements('12345', [stmt1]) im.preassemble(filters=['human_only']) assert len(im.assembled_stmts) == 1 im.add_statements('12346', [stmt2]) im.preassemble(filters=['human_only']) assert len(im.assembled_stmts) == 1 im.add_statements('12346', [stmt3]) im.preassemble(filters=['human_only']) assert len(im.assembled_stmts) == 1 im.add_statements('12346', [stmt4]) im.preassemble(filters=['human_only']) assert len(im.assembled_stmts) == 2, \ (im.assembled_stmts[0].sub.db_refs, im.assembled_stmts[1].sub.db_refs)
def test_grounding_not_all(): im = IncrementalModel() stmt = Complex([Agent('A', db_refs={'UP': 'ABCD'}), Agent('B')]) im.add_statements('12345', [stmt], filters=['grounding']) assert(len(im.get_statements()) == 0)
def test_human_only(): im = IncrementalModel() stmt1 = Phosphorylation(None, Agent('BRAF', db_refs={'UP': 'P15056'})) stmt2 = Phosphorylation(None, Agent('BRAF', db_refs={'UP': 'P28028'})) stmt3 = Phosphorylation(None, Agent('BRAF', db_refs={'HGNC': 'BRAF'})) stmt4 = Phosphorylation(None, Agent('BRAF', db_refs={})) im.add_statements('12345', [stmt1], filters=['human_only']) assert(len(im.get_statements()) == 1) im.add_statements('12346', [stmt2], filters=['human_only']) assert(len(im.get_statements()) == 1) im.add_statements('12346', [stmt3], filters=['human_only']) assert (len(im.get_statements()) == 2) im.add_statements('12346', [stmt4], filters=['human_only']) assert (len(im.get_statements()) == 3)
# are lagging behind and cannot be time-limited if not search_genes: logger.info('No search genes argument (search_genes) specified.') else: logger.info('Using search genes: %s' % ', '.join(search_genes)) pmids_gene = get_searchgenes_pmids(search_genes, num_days=5) num_pmids = sum([len(pm) for pm in pmids_gene.values()]) logger.info('Collected %d PMIDs from PubMed search_genes.' % num_pmids) pmids = _extend_dict(pmids, pmids_gene) ''' # Load the model logger.info(time.strftime('%c')) logger.info('Loading original model.') inc_model_file = os.path.join(model_path, model_name, 'model.pkl') model = IncrementalModel(inc_model_file) # Include search genes as prior genes model.prior_genes = search_genes stats = {} logger.info(time.strftime('%c')) logger.info('Preassembling original model.') model.preassemble(filters=global_filters) logger.info(time.strftime('%c')) # Original statistics stats['orig_stmts'] = len(model.get_statements()) stats['orig_assembled'] = len(model.assembled_stmts) db_stmts = ac.filter_evidence_source(model.assembled_stmts, ['biopax', 'bel'], policy='one') no_db_stmts = ac.filter_evidence_source(model.assembled_stmts,