Exemplo n.º 1
0
def test_api_load_from_s3():
    from emmaa_service.api import is_available, get_latest_available_date, \
        _get_test_corpora, _get_model_meta_data, get_model_config
    from emmaa.model import last_updated_date
    client = setup_bucket(add_model=True,
                          add_model_stats=True,
                          add_test_stats=True)
    today = make_date_str()[:10]
    other_day = '2020-01-01'
    assert is_available('test', 'simple_tests', today, TEST_BUCKET_NAME)
    assert not is_available('test', 'large_corpus_tests', today,
                            TEST_BUCKET_NAME)
    assert not is_available('test', 'simple_tests', other_day,
                            TEST_BUCKET_NAME)
    assert get_latest_available_date('test',
                                     'simple_tests',
                                     bucket=TEST_BUCKET_NAME) == today
    config = get_model_config('test', TEST_BUCKET_NAME)
    assert config
    test_corpora = _get_test_corpora('test', TEST_BUCKET_NAME)
    assert test_corpora == {'simple_tests'}
    metadata = _get_model_meta_data(bucket=TEST_BUCKET_NAME)
    assert len(metadata) == 1
    assert len(metadata[0]) == 2
    assert metadata[0][0] == 'test'
    assert metadata[0][1] == config
Exemplo n.º 2
0
def test_handler():
    """Test the lambda handler locally."""
    dts = make_date_str()
    key = f'models/test/test_model_{dts}.pkl'
    event = {'Records': [{'s3': {'object': {'key': key}}}]}
    context = None
    res = lambda_handler(event, context)
    print(res)
    assert res['statusCode'] == 200, res
    assert res['result'] == 'SUCCESS', res
    assert res['job_id'], res
    job_id = res['job_id']

    results = {}
    wait_for_complete(QUEUE,
                      job_list=[{
                          'jobId': job_id
                      }],
                      result_record=results)
    print(results)
    assert job_id in [job_def['jobId'] for job_def in results['succeeded']], \
        results['failed']

    s3 = get_s3_client()
    s3_res = s3.list_objects(Bucket='emmaa', Prefix='results/test/' + dts[:10])
    print(s3_res.keys())
    assert s3_res, s3_res
Exemplo n.º 3
0
 def answer_dynamic_query(self, query, bucket=EMMAA_BUCKET_NAME):
     """Answer user query by simulating a PySB model."""
     pysb_model, use_kappa, time_limit, num_times, num_sim = \
         self._get_dynamic_components()
     tra = TRA(use_kappa=use_kappa)
     tp = query.get_temporal_pattern(time_limit)
     try:
         sat_rate, num_sim, kpat, pat_obj, fig_path = tra.check_property(
             pysb_model, tp, num_times=num_times)
         if self.mode == 's3':
             fig_name, ext = os.path.splitext(os.path.basename(fig_path))
             date_str = make_date_str()
             s3_key = (f'query_images/{self.model.name}/{fig_name}_'
                       f'{date_str}{ext}')
             s3_path = f'https://{bucket}.s3.amazonaws.com/{s3_key}'
             client = get_s3_client(unsigned=False)
             logger.info(f'Uploading image to {s3_path}')
             client.upload_file(fig_path, Bucket=bucket, Key=s3_key)
             fig_path = s3_path
         resp_json = {
             'sat_rate': sat_rate,
             'num_sim': num_sim,
             'kpat': kpat,
             'fig_path': fig_path
         }
     except (MissingMonomerError, MissingMonomerSiteError):
         resp_json = RESULT_CODES['QUERY_NOT_APPLICABLE']
     return [('pysb', self.hash_response_list(resp_json), resp_json)]
Exemplo n.º 4
0
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME):
    """Create StatementCheckingTests from model statements."""
    stmts, _ = get_assembled_statements(model_name, bucket=bucket)
    config = load_config_from_s3(model_name, bucket=bucket)
    # Filter statements if needed
    if isinstance(config.get('make_tests'), dict):
        conditions = config['make_tests']['filter']['conditions']
        evid_policy = config['make_tests']['filter']['evid_policy']
        stmts = filter_indra_stmts_by_metadata(stmts, conditions, evid_policy)
    tests = [
        StatementCheckingTest(stmt) for stmt in stmts if all(stmt.agent_list())
    ]
    date_str = make_date_str()
    test_description = (
        f'These tests were generated from the '
        f'{config.get("human_readable_name")} on {date_str[:10]}')
    test_name = f'{config.get("human_readable_name")} model test corpus'
    test_dict = {
        'test_data': {
            'description': test_description,
            'name': test_name
        },
        'tests': tests
    }
    if upload:
        save_tests_to_s3(test_dict, bucket,
                         f'tests/{model_name}_tests_{date_str}.pkl', 'pkl')
    return test_dict
Exemplo n.º 5
0
 def answer_intervention_query(self, query, bucket=EMMAA_BUCKET_NAME):
     """Answer user intervention query by simulating a PySB model."""
     pysb_model, use_kappa, time_limit, num_times, num_sim, _ = \
         self._get_dynamic_components('intervention')
     tra = TRA(use_kappa=use_kappa)
     try:
         res, fig_path = tra.compare_conditions(pysb_model,
                                                query.condition_entity,
                                                query.target_entity,
                                                query.direction, time_limit,
                                                num_times)
         if self.mode == 's3':
             fig_name, ext = os.path.splitext(os.path.basename(fig_path))
             date_str = make_date_str()
             s3_key = (f'query_images/{self.model.name}/{fig_name}_'
                       f'{date_str}{ext}')
             s3_path = f'https://{bucket}.s3.amazonaws.com/{s3_key}'
             client = get_s3_client(unsigned=False)
             logger.info(f'Uploading image to {s3_path}')
             client.upload_file(fig_path, Bucket=bucket, Key=s3_key)
             fig_path = s3_path
         resp_json = {'result': res, 'fig_path': fig_path}
         return [('pysb', self.hash_response_list(resp_json), resp_json)]
     except (MissingMonomerError, MissingMonomerSiteError):
         resp_json = RESULT_CODES['QUERY_NOT_APPLICABLE']
         return [('pysb', self.hash_response_list(resp_json), {
             'fail_reason': RESULT_CODES['QUERY_NOT_APPLICABLE']
         })]
Exemplo n.º 6
0
 def __init__(self, model):
     self.model = model
     self.mc_mapping = {
         'pysb':
         (self.model.assemble_pysb, PysbModelChecker, stmts_from_pysb_path),
         'pybel': (self.model.assemble_pybel, PybelModelChecker,
                   stmts_from_pybel_path),
         'signed_graph':
         (self.model.assemble_signed_graph, SignedGraphModelChecker,
          stmts_from_indranet_path),
         'unsigned_graph':
         (self.model.assemble_unsigned_graph, UnsignedGraphModelChecker,
          stmts_from_indranet_path)
     }
     self.mc_types = {}
     for mc_type in model.test_config.get('mc_types', ['pysb']):
         self.mc_types[mc_type] = {}
         assembled_model = self.mc_mapping[mc_type][0]()
         self.mc_types[mc_type]['model'] = assembled_model
         self.mc_types[mc_type]['model_checker'] = (
             self.mc_mapping[mc_type][1](assembled_model))
         self.mc_types[mc_type]['test_results'] = []
     self.entities = self.model.get_assembled_entities()
     self.applicable_tests = []
     self.date_str = make_date_str()
     self.path_stmt_counts = defaultdict(int)
Exemplo n.º 7
0
 def get_dates(self):
     if not self.previous_json_stats:
         previous_dates = []
     else:
         previous_dates = (
             self.previous_json_stats['changes_over_time']['dates'])
     previous_dates.append(make_date_str())
     return previous_dates
Exemplo n.º 8
0
 def save_to_s3(self, bucket=EMMAA_BUCKET_NAME):
     """Dump the model state to S3."""
     date_str = make_date_str()
     fname = f'models/{self.name}/model_{date_str}'
     # Dump as pickle
     save_pickle_to_s3(self.stmts, bucket, key=fname + '.pkl')
     # Dump as json
     save_json_to_s3(self.stmts, bucket, key=fname + '.json')
Exemplo n.º 9
0
def format_results(results, query_type='path_property'):
    """Format db output to a standard json structure."""
    model_types = ['pysb', 'pybel', 'signed_graph', 'unsigned_graph']
    formatted_results = {}
    for result in results:
        model = result[0]
        query = result[1]
        query_hash = query.get_hash_with_model(model)
        if query_hash not in formatted_results:
            formatted_results[query_hash] = {
                'query': query.to_english(),
                'model': model,
                'date': make_date_str(result[4])
            }
        mc_type = result[2]
        response_json = result[3]
        response = []
        for v in response_json.values():
            if isinstance(v, str):
                response = v
            elif isinstance(v, dict):
                response.append(v)
        if query_type == 'path_property':
            if mc_type == '' and \
                    response == 'Query is not applicable for this model':
                for mt in model_types:
                    formatted_results[query_hash][mt] = ['n_a', response]
            elif isinstance(response, str) and \
                    response == 'Statement type not handled':
                formatted_results[query_hash][mc_type] = ['n_a', response]
            elif isinstance(response, str) and \
                    not response == 'Path found but exceeds search depth':
                formatted_results[query_hash][mc_type] = ['Fail', response]
            else:
                formatted_results[query_hash][mc_type] = ['Pass', response]
        elif query_type == 'dynamic_property':
            if response == 'Query is not applicable for this model':
                formatted_results[query_hash]['result'] = ['n_a', response]
            else:
                res = int(response[0]['sat_rate'] * 100)
                expl = (f'Satisfaction rate is {res}% after '
                        f'{response[0]["num_sim"]} simulations.')
                if res > 50:
                    formatted_results[query_hash]['result'] = ['Pass', expl]
                else:
                    formatted_results[query_hash]['result'] = ['Fail', expl]
                formatted_results[query_hash]['image'] = response[0][
                    'fig_path']
    if query_type == 'path_property':
        # Loop through the results again to make sure all model types are there
        for qh in formatted_results:
            for mt in model_types:
                if mt not in formatted_results[qh]:
                    formatted_results[qh][mt] = [
                        'n_a', 'Model type not supported'
                    ]
    return formatted_results
Exemplo n.º 10
0
 def save_to_s3(self):
     json_stats_str = json.dumps(self.json_stats, indent=1)
     client = get_s3_client(unsigned=False)
     date_str = make_date_str()
     stats_key = f'stats/{self.model_name}/stats_{date_str}.json'
     logger.info(f'Uploading test round statistics to {stats_key}')
     client.put_object(Bucket='emmaa',
                       Key=stats_key,
                       Body=json_stats_str.encode('utf8'))
Exemplo n.º 11
0
 def save_to_s3(self):
     """Dump the model state to S3."""
     date_str = make_date_str()
     fname = f'models/{self.name}/model_{date_str}'
     client = get_s3_client(unsigned=False)
     # Dump as pickle
     client.put_object(Body=pickle.dumps(self.stmts),
                       Bucket='emmaa',
                       Key=fname + '.pkl')
     # Dump as json
     client.put_object(Body=str.encode(json.dumps(self.to_json()),
                                       encoding='utf8'),
                       Bucket='emmaa',
                       Key=fname + '.json')
Exemplo n.º 12
0
def format_results(results):
    """Format db output to a standard json structure."""
    formatted_results = []
    for result in results:
        formatted_result = {}
        formatted_result['model'] = result[0]
        query = result[1]
        formatted_result['query'] = _make_query_simple_dict(query)
        formatted_result['mc_type'] = result[2]
        response_json = result[3]
        response = _process_result_to_html(response_json)
        formatted_result['response'] = response
        formatted_result['date'] = make_date_str(result[4])
        formatted_results.append(formatted_result)
    return formatted_results
Exemplo n.º 13
0
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME):
    """Create StatementCheckingTests from model statements."""
    stmts, _ = get_assembled_statements(model_name, bucket=bucket)
    config = load_config_from_s3(model_name, bucket=bucket)
    tests = [StatementCheckingTest(stmt) for stmt in stmts if
             all(stmt.agent_list())]
    date_str = make_date_str()
    test_description = (
        f'These tests were generated from the '
        f'{config.get("human_readable_name")} on {date_str[:10]}')
    test_name = f'{config.get("human_readable_name")} model test corpus'
    test_dict = {'test_data': {'description': test_description,
                               'name': test_name},
                 'tests': tests}
    if upload:
        save_tests_to_s3(test_dict, bucket,
                         f'tests/{model_name}_tests_{date_str}.pkl', 'pkl')
    return test_dict
Exemplo n.º 14
0
 def __init__(self, name, config, paper_ids=None):
     self.name = name
     self.stmts = []
     self.assembly_config = {}
     self.test_config = {}
     self.reading_config = {}
     self.query_config = {}
     self.search_terms = []
     self.ndex_network = None
     self.human_readable_name = None
     self.export_formats = []
     self._load_config(config)
     self.assembled_stmts = []
     if paper_ids:
         self.paper_ids = set(paper_ids)
     else:
         self.paper_ids = set()
     self.date_str = make_date_str()
Exemplo n.º 15
0
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME):
    em = EmmaaModel.load_from_s3(model_name, bucket=bucket)
    em.run_assembly()
    tests = [
        StatementCheckingTest(stmt) for stmt in em.assembled_stmts
        if all(stmt.agent_list())
    ]
    date_str = make_date_str()
    test_description = (
        f'These tests were generated from the {em.human_readable_name} '
        f'on {date_str[:10]}')
    test_dict = {
        'test_data': {
            'description': test_description
        },
        'tests': tests
    }
    if upload:
        save_pickle_to_s3(test_dict, bucket,
                          f'tests/{model_name}_tests_{date_str}.pkl')
    return test_dict
Exemplo n.º 16
0
def setup_bucket(add_model=False,
                 add_mm=False,
                 add_tests=False,
                 add_results=False,
                 add_model_stats=False,
                 add_test_stats=False):
    """
    This function creates a new (local) bucket mocking S3 bucket at each call.
    Then all calls to S3 are calling this bucket instead of real S3 bucket.
    Depending on the test we might or might not need the bucket to contain
    different files. For faster computation, only required files for the test
    are generated and stored in the bucket. Files can be added by setting
    corresponding arguments to True when calling this function.
    """
    # Local imports are recommended when using moto
    from emmaa.util import get_s3_client
    from emmaa.model import save_config_to_s3
    from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \
        StatementCheckingTest
    # Create a mock s3 bucket
    client = get_s3_client()
    bucket = client.create_bucket(Bucket=TEST_BUCKET_NAME, ACL='public-read')
    date_str = make_date_str()
    emmaa_model = None
    if add_model:
        # Put config and model files into empty bucket
        config_dict = {
            'ndex': {
                'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
            },
            'search_terms': [{
                'db_refs': {
                    'HGNC': '20974'
                },
                'name': 'MAPK1',
                'search_term': 'MAPK1',
                'type': 'gene'
            }],
            'test': {
                'test_corpus': 'simple_tests',
                'default_test_corpus': 'simple_tests'
            },
            'human_readable_name':
            'Test Model',
            'assembly': [{
                'function': 'filter_no_hypothesis'
            }, {
                'function': 'map_grounding'
            }, {
                'function': 'filter_grounded_only'
            }, {
                'function': 'filter_human_only'
            }, {
                'function': 'map_sequence'
            }, {
                'function': 'run_preassembly',
                'kwargs': {
                    'return_toplevel': False
                }
            }, {
                'function': 'filter_top_level'
            }]
        }
        save_config_to_s3('test', config_dict, bucket=TEST_BUCKET_NAME)
        emmaa_model = create_model()
        emmaa_model.save_to_s3(bucket=TEST_BUCKET_NAME)
    if add_mm:
        # Add a ModelManager to bucket
        if not emmaa_model:
            emmaa_model = create_model()
        mm = ModelManager(emmaa_model)
        mm.date_str = date_str
        mm.save_assembled_statements(upload_to_db=False,
                                     bucket=TEST_BUCKET_NAME)
        save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME)
    if add_tests:
        tests = [
            StatementCheckingTest(Activation(Agent('BRAF'), Agent('MAPK1')))
        ]
        test_dict = {
            'test_data': {
                'description': 'Tests for functionality testing'
            },
            'tests': tests
        }
        client.put_object(Body=pickle.dumps(test_dict),
                          Bucket=TEST_BUCKET_NAME,
                          Key=f'tests/simple_tests.pkl')
    if add_results:
        client.put_object(
            Body=json.dumps(previous_results, indent=1),
            Bucket=TEST_BUCKET_NAME,
            Key=f'results/test/results_simple_tests_{date_str}.json')
    if add_model_stats:
        client.put_object(Body=json.dumps(previous_model_stats, indent=1),
                          Bucket=TEST_BUCKET_NAME,
                          Key=f'model_stats/test/model_stats_{date_str}.json')
    if add_test_stats:
        client.put_object(
            Body=json.dumps(previous_test_stats, indent=1),
            Bucket=TEST_BUCKET_NAME,
            Key=f'stats/test/test_stats_simple_tests_{date_str}.json')
    return client
Exemplo n.º 17
0
def format_results(results, query_type='path_property'):
    """Format db output to a standard json structure."""
    model_types = ['pysb', 'pybel', 'signed_graph', 'unsigned_graph']
    formatted_results = {}
    for result in results:
        model = result[0]
        query = result[1]
        query_hash = query.get_hash_with_model(model)
        if query_hash not in formatted_results:
            formatted_results[query_hash] = {
                'query': query.to_english(),
                'model': model,
                'date': make_date_str(result[5])
            }
        mc_type = result[2]
        response_json = result[3]
        delta = result[4]
        response = []
        for k, v in response_json.items():
            if isinstance(v, str):
                response = v
            elif isinstance(v, dict):
                if k in delta:
                    new_v = deepcopy(v)
                    new_v['path'] = ('new', new_v['path'])
                    response.append(new_v)
                else:
                    response.append(v)
        if query_type in ['path_property', 'open_search_query']:
            if mc_type == '' and \
                    response == 'Query is not applicable for this model':
                for mt in model_types:
                    formatted_results[query_hash][mt] = ['n_a', response]
            elif isinstance(response, str) and \
                    response == 'Statement type not handled':
                formatted_results[query_hash][mc_type] = ['n_a', response]
            elif isinstance(response, str) and \
                    not response == 'Path found but exceeds search depth':
                formatted_results[query_hash][mc_type] = ['Fail', response]
            else:
                formatted_results[query_hash][mc_type] = ['Pass', response]
        elif query_type == 'simple_intervention_property':
            if response == 'Query is not applicable for this model':
                formatted_results[query_hash]['result'] = ['n_a', response]
            else:
                res = response[0]['result']
                if res == 'no_change':
                    action = 'did not change'
                elif res.endswith('increase'):
                    action = 'increased'
                elif res.endswith('decrease'):
                    action = 'decreased'
                if res.startswith('no'):
                    expl = f'No, the amount of target entity {action}.'
                    formatted_results[query_hash]['result'] = ['Fail', expl]
                else:
                    expl = f'Yes, the amount of target entity {action}.'
                    formatted_results[query_hash]['result'] = ['Pass', expl]
                formatted_results[query_hash]['image'] = (
                    response[0]['fig_path'])
        elif query_type == 'dynamic_property':
            if response == 'Query is not applicable for this model':
                formatted_results[query_hash]['result'] = ['n_a', response]
            else:
                res = int(response[0]['sat_rate'] * 100)
                expl = (f'Satisfaction rate is {res}% after '
                        f'{response[0]["num_sim"]} simulations.')
                if res > 50:
                    formatted_results[query_hash]['result'] = ['Pass', expl]
                else:
                    formatted_results[query_hash]['result'] = ['Fail', expl]
                formatted_results[query_hash]['image'] = (
                    response[0]['fig_path'])
    if query_type in ['path_property', 'open_search_query']:
        # Loop through the results again to make sure all model types are there
        for qh in formatted_results:
            for mt in model_types:
                if mt not in formatted_results[qh]:
                    formatted_results[qh][mt] = [
                        'n_a', 'Model type not supported'
                    ]
    return formatted_results
Exemplo n.º 18
0
def run_model_tests_from_s3(model_name,
                            upload_mm=True,
                            upload_results=True,
                            upload_stats=True,
                            registered_queries=True,
                            db=None):
    """Run a given set of tests on a given model, both loaded from S3.

    After loading both the model and the set of tests, model/test overlap
    is determined using a ScopeTestConnector and tests are run.


    Parameters
    ----------
    model_name : str
        Name of EmmaaModel to load from S3.
    upload_mm : Optional[bool]
        Whether to upload a model manager instance to S3 as a pickle file.
        Default: True
    upload_results : Optional[bool]
        Whether to upload test results to S3 in JSON format. Can be set
        to False when running tests. Default: True
    upload_stats : Optional[bool]
        Whether to upload latest statistics about model and a test.
        Default: True
    registered_queries : Optional[bool]
        If True, registered queries are fetched from the database and
        executed, the results are then saved to the database. Default: True
    db : Optional[emmaa.db.manager.EmmaaDatabaseManager]
        If given over-rides the default primary database.

    Returns
    -------
    emmaa.model_tests.ModelManager
        Instance of ModelManager containing the model data, list of applied
        tests and the test results.
    emmaa.analyze_test_results.StatsGenerator
        Instance of StatsGenerator containing statistics about model and test.
    """
    model = EmmaaModel.load_from_s3(model_name)
    test_corpus = model.test_config.get('test_corpus',
                                        'large_corpus_tests.pkl')
    tests = load_tests_from_s3(test_corpus)
    mm = ModelManager(model)
    if upload_mm:
        save_model_manager_to_s3(model_name, mm)
    tm = TestManager([mm], tests)
    tm.make_tests(ScopeTestConnector())
    tm.run_tests()
    results_json_dict = mm.results_to_json()
    results_json_str = json.dumps(results_json_dict, indent=1)
    # Optionally upload test results to S3
    if upload_results:
        client = get_s3_client(unsigned=False)
        date_str = make_date_str()
        result_key = f'results/{model_name}/results_{date_str}.json'
        logger.info(f'Uploading test results to {result_key}')
        client.put_object(Bucket='emmaa',
                          Key=result_key,
                          Body=results_json_str.encode('utf8'))
    tr = TestRound(results_json_dict)
    sg = StatsGenerator(model_name, latest_round=tr)
    sg.make_stats()

    # Optionally upload statistics to S3
    if upload_stats:
        sg.save_to_s3()
    if registered_queries:
        qm = QueryManager(db=db, model_managers=[mm])
        qm.answer_registered_queries(model_name)
    return (mm, sg)