Пример #1
0
    def __init__(self, env):
        self.tracker = LogTracker()
        self.start_time = datetime.now()
        self._env = env

        self.web_query = request.args.copy()

        # Get the offset and limit
        self.offs = self._pop('offset', type_cast=int)
        if 'limit' in self.web_query:
            self.limit = min(self._pop('limit', MAX_STMTS, int), MAX_STMTS)
        else:
            self.limit = min(self._pop('max_stmts', MAX_STMTS, int), MAX_STMTS)

        # Sort out the sorting.
        sort_by = self._pop('sort_by', None)
        best_first = self._pop('best_first', True, bool)
        if sort_by is not None:
            self.sort_by = sort_by
        elif best_first:
            self.sort_by = 'ev_count'
        else:
            self.sort_by = None

        # Gather other miscellaneous options
        self.fmt = self._pop('format', 'json')
        self.w_english = self._pop('with_english', False, bool)
        self.w_cur_counts = self._pop('with_cur_counts', False, bool)
        self.strict = self._pop('strict', False, bool)

        # Prime agent recorders.
        self.agent_dict = None
        self.agent_set = None

        # Figure out authorization.
        self.has = dict.fromkeys(['elsevier', 'medscan'], False)
        if not TESTING['status']:
            failure_reason = {}
            self.user, roles = resolve_auth(self.web_query, failure_reason)
            if failure_reason:
                logger.info(
                    f"Auth error ({failure_reason['auth_attempted']}): "
                    f"{failure_reason['reason']}")
            for role in roles:
                for resource in self.has.keys():
                    self.has[resource] |= role.permissions.get(resource, False)
            logger.info('Auths: %s' % str(self.has))
        else:
            api_key = self.web_query.pop('api_key', None)
            if api_key is None:  # any key will do for testing.
                self.has['elsevier'] = False
                self.has['medscan'] = False
            else:
                self.has['elsevier'] = True
                self.has['medscan'] = True

        self.db_query = None
        self.ev_filter = None
        self.special = {}
        return
Пример #2
0
def get_home():
    user, roles = resolve_auth(dict(request.args))
    model_data = _get_model_meta_data()
    return render_template('index_template.html',
                           model_data=model_data,
                           link_list=link_list,
                           user_email=user.email if user else "")
Пример #3
0
def get_model_dashboard(model):
    user, roles = resolve_auth(dict(request.args))
    model_meta_data = _get_model_meta_data()
    mod_link_list = [('.' + t[0], t[1]) for t in link_list]

    last_update = model_last_updated(model=model)
    ndex_id = 'None available'
    for mid, mmd in model_meta_data:
        if mid == model:
            ndex_id = mmd['ndex']['network']
    if ndex_id == 'None available':
        logger.warning(f'No ndex ID found for {model}')
    if not last_update:
        logger.warning(f'Could not get last update for {model}')
        last_update = 'Not available'
    model_info_contents = [[('', 'Last Updated'), ('', last_update)],
                           [('', 'Network on Ndex'),
                            (f'http://www.ndexbio.org/#/network/{ndex_id}',
                             ndex_id)]]
    model_stats = get_model_stats(model)
    all_new_tests = [(k, v) for k, v in model_stats['test_round_summary']
                     ['all_test_results'].items()]
    current_model_types = [
        mt for mt in ALL_MODEL_TYPES if mt in model_stats['test_round_summary']
    ]
    all_stmts = model_stats['model_summary']['all_stmts']
    most_supported = model_stats['model_summary']['stmts_by_evidence'][:10]
    top_stmts_counts = [(all_stmts[h], ('', str(c)))
                        for h, c in most_supported]
    added_stmts_hashes = \
        model_stats['model_delta']['statements_hashes_delta']['added']
    added_stmts = [[(all_stmts[h])] for h in added_stmts_hashes]
    return render_template(
        'model_template.html',
        model=model,
        model_data=model_meta_data,
        model_stats_json=model_stats,
        link_list=mod_link_list,
        user_email=user.email if user else "",
        stmts_counts=top_stmts_counts,
        added_stmts=added_stmts,
        model_info_contents=model_info_contents,
        model_types=[
            "Test", *[FORMATTED_MODEL_NAMES[mt] for mt in current_model_types]
        ],
        new_applied_tests=_new_applied_tests(model_stats_json=model_stats,
                                             model_types=current_model_types,
                                             model_name=model),
        all_test_results=_format_table_array(tests_json=all_new_tests,
                                             model_types=current_model_types,
                                             model_name=model),
        new_passed_tests=_new_passed_tests(model, model_stats,
                                           current_model_types))
Пример #4
0
def auth_curation():
    if not TESTING['status']:
        failure_reason = {}
        user, roles = resolve_auth(request.args.copy(), failure_reason)
        if failure_reason:
            raise InvalidCredentials(failure_reason['auth_attempted'])
    else:
        api_key = request.args.get('api_key', None)
        if api_key is None:  # any key will do for testing.
            raise InvalidCredentials("API key")
        user = None

        class MockRole:
            permissions = {"get_curations": api_key == 'GET_CURATIONS'}

        roles = [MockRole()]
    return user, roles
Пример #5
0
def get_query_page():
    user, roles = resolve_auth(dict(request.args))
    user_email = user.email if user else ""
    user_id = user.id if user else None
    model_meta_data = _get_model_meta_data()
    stmt_types = get_queryable_stmt_types()

    # user_email = '*****@*****.**'
    old_results = qm.get_registered_queries(user_email) if user_email else []

    return render_template('query_template.html',
                           model_data=model_meta_data,
                           stmt_types=stmt_types,
                           old_results=old_results,
                           link_list=link_list,
                           user_email=user_email,
                           user_id=user_id,
                           model_names=FORMATTED_MODEL_NAMES)
Пример #6
0
def submit_curation_endpoint(hash_val, **kwargs):
    user, roles = resolve_auth(dict(request.args))
    if not roles and not user:
        res_dict = {"result": "failure", "reason": "Invalid Credentials"}
        return jsonify(res_dict), 401

    if user:
        email = user.email
    else:
        email = request.json.get('email')
        if not email:
            res_dict = {
                "result": "failure",
                "reason": "POST with API key requires a user email."
            }
            return jsonify(res_dict), 400

    logger.info("Adding curation for statement %s." % hash_val)
    ev_hash = request.json.get('ev_hash')
    source_api = request.json.pop('source', 'EMMAA')
    tag = request.json.get('tag')
    ip = request.remote_addr
    text = request.json.get('text')
    is_test = 'test' in request.args
    if not is_test:
        assert tag is not 'test'
        try:
            dbid = submit_curation(hash_val, tag, email, ip, text, ev_hash,
                                   source_api)
        except BadHashError as e:
            abort(Response("Invalid hash: %s." % e.mk_hash, 400))
        res = {'result': 'success', 'ref': {'id': dbid}}
    else:
        res = {'result': 'test passed', 'ref': None}
    logger.info("Got result: %s" % str(res))
    return jsonify(res)
Пример #7
0
def expand_meta_row():
    start_time = datetime.now()

    # Get the agent_json and hashes
    agent_json = request.json.get('agent_json')
    if not agent_json:
        logger.error("No agent_json provided!")
        return Response("No agent_json in request!", 400)
    stmt_type = request.json.get('stmt_type')
    hashes = request.json.get('hashes')
    logger.info(
        f"Expanding on agent_json={agent_json}, stmt_type={stmt_type}, "
        f"hashes={hashes}")

    w_cur_counts = \
        request.args.get('with_cur_counts', 'False').lower() == 'true'

    # Figure out authorization.
    has_medscan = False
    if not TESTING['status']:
        user, roles = resolve_auth(request.args.copy())
        for role in roles:
            has_medscan |= role.permissions.get('medscan', False)
    else:
        api_key = request.args.get('api_key', None)
        has_medscan = api_key is not None
    logger.info(f'Auths for medscan: {has_medscan}')

    # Get the sorting parameter.
    sort_by = request.args.get('sort_by', 'ev_count')

    # Get the more detailed results.
    q = AgentJsonExpander(agent_json, stmt_type=stmt_type, hashes=hashes)
    result = q.expand(sort_by=sort_by)

    # Filter out any medscan content, and construct english.
    entry_hash_lookup = defaultdict(list)
    for key, entry in result.results.copy().items():
        # Filter medscan...
        if not has_medscan:
            result.evidence_counts[key] -= \
                entry['source_counts'].pop('medscan', 0)
            entry['total_count'] = result.evidence_counts[key]
            if not entry['source_counts']:
                logger.warning("Censored content present. Removing it.")
                result.results.pop(key)
                result.evidence_counts.pop(key)
                continue

        # Add english...
        eng = _make_english_from_meta(entry)
        if not eng:
            logger.warning(f"English not formed for {key}:\n" f"{entry}")
        entry['english'] = eng

        # Prep curation counts
        if w_cur_counts:
            if 'hashes' in entry:
                for h in entry['hashes']:
                    entry['cur_count'] = 0
                    entry_hash_lookup[h].append(entry)
            elif 'hash' in entry:
                entry['cur_count'] = 0
                entry_hash_lookup[entry['hash']].append(entry)
            else:
                assert False, "Entry has no hash info."

        # Stringify hashes for JavaScript
        if 'hashes' in entry:
            entry['hashes'] = [str(h) for h in entry['hashes']]
        else:
            entry['hash'] = str(entry['hash'])

    if w_cur_counts:
        curations = get_curations(pa_hash=set(entry_hash_lookup.keys()))
        for cur in curations:
            for entry in entry_hash_lookup[cur['pa_hash']]:
                entry['cur_count'] += 1

    res_json = result.json()
    res_json['relations'] = list(res_json['results'].values())
    res_json.pop('results')
    resp = Response(json.dumps(res_json), mimetype='application/json')
    logger.info(f"Returning expansion with {len(result.results)} meta results "
                f"that represent {result.total_evidence} total evidence. Size "
                f"is {sys.getsizeof(resp.data) / 1e6} MB after "
                f"{sec_since(start_time)} seconds.")
    return resp
Пример #8
0
def process_query():
    # Print inputs.
    logger.info('Got model query')
    logger.info("Args -----------")
    logger.info(request.args)
    logger.info("Json -----------")
    logger.info(str(request.json))
    logger.info("------------------")

    user, roles = resolve_auth(dict(request.args))
    user_email = user.email if user else ""
    user_id = user.id if user else None

    # Extract info.
    expected_static_query_keys = {
        f'{pos}Selection'
        for pos in ['subject', 'object', 'type']
    }
    expected_dynamic_query_keys = {
        f'{pos}Selection'
        for pos in ['pattern', 'value', 'agent']
    }
    expected_models = {mid for mid, _ in _get_model_meta_data()}
    tab = 'static'
    try:
        # If user tries to register query without logging in, refuse query
        # with 401 (unauthorized)
        if request.json['register']:
            if user_email:
                # Logged in
                subscribe = request.json['register']
            else:
                # Not logged in
                logger.warning('User not logged in! Query handling aborted.')
                return jsonify({
                    'result': 'failure',
                    'reason': 'Invalid credentials'
                }), 401
        # Does not try to register
        else:
            subscribe = False
        query_json = request.json['query']
        assert (
            (set(query_json.keys()) == expected_static_query_keys
             or set(query_json.keys()) == expected_dynamic_query_keys),
            f'Did not get expected query keys: got {set(query_json.keys())} ')
        models = set(request.json.get('models'))
        assert models < expected_models, \
            f'Got unexpected models: {models - expected_models}'
    except (KeyError, AssertionError) as e:
        logger.exception(e)
        logger.error("Invalid query!")
        abort(Response(f'Invalid request: {str(e)}', 400))
    try:
        query, tab = _make_query(query_json)
    except GroundingError as e:
        logger.exception(e)
        logger.error("Invalid grounding!")
        abort(Response(f'Invalid entity: {str(e)}', 400))

    is_test = 'test' in request.json or 'test' == request.json.get('tag')

    if is_test:
        logger.info('Test passed')
        res = {'result': 'test passed', 'ref': None}

    else:
        logger.info('Query submitted')
        try:
            result = qm.answer_immediate_query(user_email,
                                               user_id,
                                               query,
                                               models,
                                               subscribe,
                                               use_kappa=False)
        except Exception as e:
            logger.exception(e)
            raise (e)
        logger.info('Answer to query received: rendering page, returning '
                    'redirect endpoint')
        redir_url = f'/query?tab={tab}'

        # Replace existing entry
        session['query_hashes'] = result
        res = {'redirectURL': redir_url}

    logger.info('Result: %s' % str(res))
    return Response(json.dumps(res), mimetype='application/json')
Пример #9
0
def get_query_page():
    user, roles = resolve_auth(dict(request.args))
    user_email = user.email if user else ""
    tab = request.args.get('tab', 'model')
    model_meta_data = _get_model_meta_data()
    stmt_types = get_queryable_stmt_types()

    # Queried results
    queried_results = 'Results for submitted queries'
    immediate_table_headers = None
    dynamic_results = 'Results for submitted queries'
    dynamic_immediate_headers = None
    if session.get('query_hashes'):
        for query_type, query_hashes in session['query_hashes'].items():
            qr = qm.retrieve_results_from_hashes(query_hashes, query_type)
            if query_type == 'path_property':
                immediate_table_headers = ['Query', 'Model'] + [
                    FORMATTED_TYPE_NAMES[mt]
                    for mt in ALL_MODEL_TYPES if mt in list(qr.values())[0]
                ] if qr else []
                queried_results = _format_query_results(qr) if qr else\
                    'No stashed results for subscribed queries. Please ' \
                    're-run query to see latest result.'
            elif query_type == 'dynamic_property':
                dynamic_immediate_headers = [
                    'Query', 'Model', 'Result', 'Image'
                ]
                dynamic_results = _format_dynamic_query_results(qr)

    # Subscribed results
    # user_email = '*****@*****.**'
    subscribed_path_headers = []
    subscribed_dyn_headers = []
    if user_email:
        sub_path_res = qm.get_registered_queries(user_email, 'path_property')
        if sub_path_res:
            subscribed_path_results = _format_query_results(sub_path_res)
            subscribed_path_headers = \
                ['Query', 'Model'] + \
                [FORMATTED_TYPE_NAMES[mt] for mt in list(
                    sub_path_res.values())[0] if mt in ALL_MODEL_TYPES]
        else:
            subscribed_path_results = 'You have no subscribed queries'
        sub_dyn_res = qm.get_registered_queries(user_email, 'dynamic_property')
        if sub_dyn_res:
            subscribed_dyn_results = _format_dynamic_query_results(sub_dyn_res)
            subscribed_dyn_headers = ['Query', 'Model', 'Result', 'Image']
        else:
            subscribed_dyn_results = 'You have no subscribed queries'
    else:
        subscribed_path_results = 'Please log in to see your subscribed queries'
        subscribed_dyn_results = 'Please log in to see your subscribed queries'
    return render_template('query_template.html',
                           immediate_table_headers=immediate_table_headers,
                           immediate_query_result=queried_results,
                           immediate_dynamic_results=dynamic_results,
                           dynamic_immediate_headers=dynamic_immediate_headers,
                           model_data=model_meta_data,
                           stmt_types=stmt_types,
                           subscribed_results=subscribed_path_results,
                           subscribed_headers=subscribed_path_headers,
                           subscribed_dynamic_headers=subscribed_dyn_headers,
                           subscribed_dynamic_results=subscribed_dyn_results,
                           link_list=link_list,
                           user_email=user_email,
                           tab=tab)
Пример #10
0
def get_model_dashboard(model):
    test_corpus = request.args.get(
        'test_corpus', _default_test(model, get_model_config(model)))
    if not test_corpus:
        abort(Response('Could not identify test corpus', 404))
    date = request.args.get('date',
                            get_latest_available_date(model, test_corpus))
    tab = request.args.get('tab', 'model')
    user, roles = resolve_auth(dict(request.args))
    model_meta_data = _get_model_meta_data()
    model_stats, _ = get_model_stats(model, 'model', date=date)
    test_stats, _ = get_model_stats(model,
                                    'test',
                                    tests=test_corpus,
                                    date=date)
    if not model_stats or not test_stats:
        abort(
            Response(
                f'Data for {model} and {test_corpus} for {date} '
                f'was not found', 404))
    ndex_id = 'None available'
    description = 'None available'
    for mid, mmd in model_meta_data:
        if mid == model:
            ndex_id = mmd['ndex']['network']
            description = mmd['description']
    if ndex_id == 'None available':
        logger.warning(f'No ndex ID found for {model}')
    available_tests = _get_test_corpora(model)
    latest_date = get_latest_available_date(model, test_corpus)
    model_info_contents = [
        [('', 'Model Description', ''), ('', description, '')],
        [('', 'Latest Data Available', ''), ('', latest_date, '')],
        [('', 'Data Displayed', ''),
         ('', date, 'Click on the point on time graph to see earlier results')
         ],
        [('', 'Network on Ndex', ''),
         (f'http://www.ndexbio.org/#/network/{ndex_id}', ndex_id,
          'Click to see network on Ndex')]
    ]
    test_data = test_stats['test_round_summary'].get('test_data')
    test_info_contents = None
    if test_data:
        test_info_contents = [[('', k.capitalize(), ''), ('', v, '')]
                              for k, v in test_data.items()]
    current_model_types = [
        mt for mt in ALL_MODEL_TYPES if mt in test_stats['test_round_summary']
    ]
    # Get correct and incorrect curation hashes to pass it per stmt
    correct, incorrect = _label_curations()
    # Filter out rows with all tests == 'n_a'
    all_tests = []
    for k, v in test_stats['test_round_summary']['all_test_results'].items():
        cur = _set_curation(k, correct, incorrect)
        v['test'].append(cur)
        if all(v[mt][0].lower() == 'n_a' for mt in current_model_types):
            continue
        else:
            all_tests.append((k, v))

    all_stmts = model_stats['model_summary']['all_stmts']
    for st_hash, st_value in all_stmts.items():
        url_param = parse.urlencode({
            'stmt_hash': st_hash,
            'source': 'model_statement',
            'model': model
        })
        st_value[0] = f'/evidence?{url_param}'
        st_value[2] = stmt_db_link_msg
        cur = _set_curation(st_hash, correct, incorrect)
        st_value.append(cur)
    most_supported = model_stats['model_summary']['stmts_by_evidence'][:10]
    top_stmts_counts = [((all_stmts[h]), ('', str(c), ''))
                        for h, c in most_supported]
    added_stmts_hashes = \
        model_stats['model_delta']['statements_hashes_delta']['added']
    if len(added_stmts_hashes) > 0:
        added_stmts = [((all_stmts[h]), ) for h in added_stmts_hashes]
    else:
        added_stmts = 'No new statements were added'
    return render_template(
        'model_template.html',
        model=model,
        model_data=model_meta_data,
        model_stats_json=model_stats,
        test_stats_json=test_stats,
        test_corpus=test_corpus,
        available_tests=available_tests,
        link_list=link_list,
        user_email=user.email if user else "",
        stmts_counts=top_stmts_counts,
        added_stmts=added_stmts,
        model_info_contents=model_info_contents,
        test_info_contents=test_info_contents,
        model_types=[
            "Test", *[FORMATTED_TYPE_NAMES[mt] for mt in current_model_types]
        ],
        new_applied_tests=_new_applied_tests(test_stats, current_model_types,
                                             model, date, test_corpus),
        all_test_results=_format_table_array(all_tests, current_model_types,
                                             model, date, test_corpus),
        new_passed_tests=_new_passed_tests(model, test_stats,
                                           current_model_types, date,
                                           test_corpus),
        date=date,
        latest_date=latest_date,
        tab=tab)
Пример #11
0
    def decorator(*args, **kwargs):
        tracker = LogTracker()
        start_time = datetime.now()
        logger.info("Got query for %s at %s!" %
                    (get_db_query.__name__, start_time))

        web_query = request.args.copy()
        offs = _pop(web_query, 'offset', type_cast=int)
        ev_lim = _pop(web_query, 'ev_limit', type_cast=int)
        best_first = _pop(web_query, 'best_first', True, bool)
        max_stmts = min(_pop(web_query, 'max_stmts', MAX_STATEMENTS, int),
                        MAX_STATEMENTS)
        fmt = _pop(web_query, 'format', 'json')
        w_english = _pop(web_query, 'with_english', False, bool)
        w_cur_counts = _pop(web_query, 'with_cur_counts', False, bool)

        # Figure out authorization.
        has = dict.fromkeys(['elsevier', 'medscan'], False)
        if not TESTING:
            user, roles = resolve_auth(web_query)
            for role in roles:
                for resource in has.keys():
                    has[resource] |= role.permissions.get(resource, False)
            logger.info('Auths: %s' % str(has))
        else:
            web_query.pop('api_key', None)
            has['elsevier'] = False
            has['medscan'] = False

        # Actually run the function.
        logger.info("Running function %s after %s seconds." %
                    (get_db_query.__name__, sec_since(start_time)))
        db_query = get_db_query(web_query, *args, **kwargs)
        if isinstance(db_query, Response):
            return db_query
        elif not isinstance(db_query, QueryCore):
            raise RuntimeError("Result should be a child of QueryCore.")

        if ev_lim is None:
            if get_db_query is get_statement_by_hash:
                ev_lim = 10000
            else:
                ev_lim = 10

        if not has['medscan']:
            minus_q = ~HasOnlySource('medscan')
            db_query &= minus_q
            ev_filter = minus_q.ev_filter()
        else:
            ev_filter = None

        result = db_query.get_statements(offset=offs,
                                         limit=max_stmts,
                                         ev_limit=ev_lim,
                                         best_first=best_first,
                                         evidence_filter=ev_filter)

        logger.info("Finished function %s after %s seconds." %
                    (get_db_query.__name__, sec_since(start_time)))

        # Handle any necessary redactions
        res_json = result.json()
        stmts_json = res_json.pop('results')
        elsevier_redactions = 0
        source_counts = result.source_counts
        if not all(has.values()) or fmt == 'json-js' or w_english:
            for h, stmt_json in stmts_json.copy().items():
                if w_english:
                    stmt = stmts_from_json([stmt_json])[0]
                    stmt_json['english'] = _format_stmt_text(stmt)
                    stmt_json['evidence'] = _format_evidence_text(stmt)

                if has['elsevier'] and fmt != 'json-js' and not w_english:
                    continue

                if not has['medscan']:
                    source_counts[h].pop('medscan', 0)

                for ev_json in stmt_json['evidence'][:]:
                    if fmt == 'json-js':
                        ev_json['source_hash'] = str(ev_json['source_hash'])

                    # Check for elsevier and redact if necessary
                    if not has['elsevier'] and \
                            get_source(ev_json) == 'elsevier':
                        text = ev_json['text']
                        if len(text) > 200:
                            ev_json['text'] = text[:200] + REDACT_MESSAGE
                            elsevier_redactions += 1

        logger.info(f"Redacted {elsevier_redactions} pieces of elsevier "
                    f"evidence.")

        logger.info("Finished redacting evidence for %s after %s seconds." %
                    (get_db_query.__name__, sec_since(start_time)))

        # Get counts of the curations for the resulting statements.
        if w_cur_counts:
            curations = get_curations(pa_hash=set(stmts_json.keys()))
            logger.info("Found %d curations" % len(curations))
            cur_counts = {}
            for curation in curations:
                # Update the overall counts.
                if curation.pa_hash not in cur_counts:
                    cur_counts[curation.pa_hash] = 0
                cur_counts[curation.pa_hash] += 1

                # Work these counts into the evidence dict structure.
                for ev_json in stmts_json[curation.pa_hash]['evidence']:
                    if str(ev_json['source_hash']) == str(
                            curation.source_hash):
                        ev_json['num_curations'] = \
                            ev_json.get('num_curations', 0) + 1
                        break
            res_json['num_curations'] = cur_counts

        # Add derived values to the res_json.
        res_json['offset'] = offs
        res_json['evidence_limit'] = ev_lim
        res_json['statement_limit'] = MAX_STATEMENTS
        res_json['statements_returned'] = len(stmts_json)
        res_json['end_of_statements'] = (len(stmts_json) < MAX_STATEMENTS)
        res_json['statements_removed'] = 0
        res_json['evidence_returned'] = result.returned_evidence

        if fmt == 'html':
            title = TITLE + ': ' + 'Results'
            ev_totals = res_json.pop('evidence_totals')
            stmts = stmts_from_json(stmts_json.values())
            html_assembler = HtmlAssembler(stmts,
                                           res_json,
                                           ev_totals,
                                           source_counts,
                                           title=title,
                                           db_rest_url=request.url_root[:-1])
            idbr_template = env.get_template('idbr_statements_view.html')
            identity = user.identity() if user else None
            content = html_assembler.make_model(idbr_template,
                                                identity=identity)
            if tracker.get_messages():
                level_stats = [
                    '%d %ss' % (n, lvl.lower())
                    for lvl, n in tracker.get_level_stats().items()
                ]
                msg = ' '.join(level_stats)
                content = html_assembler.append_warning(msg)
            mimetype = 'text/html'
        else:  # Return JSON for all other values of the format argument
            res_json.update(tracker.get_level_stats())
            res_json['statements'] = stmts_json
            res_json['source_counts'] = source_counts
            content = json.dumps(res_json)
            mimetype = 'application/json'

        resp = Response(content, mimetype=mimetype)
        logger.info("Exiting with %d statements with %d/%d evidence of size "
                    "%f MB after %s seconds." %
                    (res_json['statements_returned'],
                     res_json['evidence_returned'], res_json['total_evidence'],
                     sys.getsizeof(resp.data) / 1e6, sec_since(start_time)))
        return resp
Пример #12
0
def get_metadata(level):
    start = datetime.utcnow()
    query = request.args.copy()

    # Figure out authorization.
    has = dict.fromkeys(['elsevier', 'medscan'], False)
    user, roles = resolve_auth(query)
    for role in roles:
        for resource in has.keys():
            has[resource] |= role.permissions.get(resource, False)
    logger.info('Auths: %s' % str(has))

    w_curations = _pop(query, 'with_cur_counts', False)

    kwargs = dict(limit=_pop(query, 'limit', type_cast=int),
                  offset=_pop(query, 'offset', type_cast=int),
                  best_first=_pop(query, 'best_first', True))
    try:
        db_query = _db_query_from_web_query(query, {'HasAgent'}, True)
    except Exception as e:
        abort(Response(f'Problem forming query: {e}', 400))
        return

    if not has['medscan']:
        db_query -= HasOnlySource('medscan')

    if level == 'hashes':
        res = db_query.get_interactions(**kwargs)
    elif level == 'relations':
        res = db_query.get_relations(with_hashes=w_curations, **kwargs)
    elif level == 'agents':
        res = db_query.get_agents(with_hashes=w_curations, **kwargs)
    else:
        abort(Response(f'Invalid level: {level}'))
        return

    dt = (datetime.utcnow() - start).total_seconds()
    logger.info("Got %s results after %.2f." % (len(res.results), dt))

    ret = res.json()
    res_list = []
    for key, entry in ret.pop('results').items():
        # Filter medscan from source counts.
        if not has['medscan']:
            res.evidence_totals[key] -= entry['source_counts'].pop(
                'medscan', 0)
            entry['total_count'] = res.evidence_totals[key]
            if not entry['source_counts']:
                logger.warning("Censored content present.")
                continue

        # Create english
        if level == 'agents':
            ag_dict = entry['agents']
            if len(ag_dict) == 0:
                eng = ''
            else:
                ag_list = list(ag_dict.values())
                eng = ag_list[0]
                if len(ag_dict) > 1:
                    eng += ' interacts with ' + ag_list[1]
                    if len(ag_dict) > 3:
                        eng += ', ' + ', '.join(ag_list[2:-1])
                    if len(ag_dict) > 2:
                        eng += ', and ' + ag_list[-1]
        else:
            eng = EnglishAssembler([stmt_from_interaction(entry)]).make_model()
        entry['english'] = eng

        res_list.append(entry)

    # Look up curations, if result with_curations was set.
    if w_curations:
        rel_hash_lookup = {}
        if level == 'hashes':
            for rel in res_list:
                rel['cur_count'] = 0
                rel_hash_lookup[rel['hash']] = rel
        else:
            for rel in res_list:
                for h in rel['hashes']:
                    rel['cur_count'] = 0
                    rel_hash_lookup[h] = rel
        curations = get_curations(pa_hash=set(rel_hash_lookup.keys()))
        for cur in curations:
            rel_hash_lookup[cur.pa_hash]['cur_count'] += 1

    # Finish up the query.
    dt = (datetime.utcnow() - start).total_seconds()
    logger.info("Returning with %s results after %.2f seconds." %
                (len(res_list), dt))

    ret['relations'] = res_list
    resp = Response(json.dumps(ret), mimetype='application/json')

    dt = (datetime.utcnow() - start).total_seconds()
    logger.info("Result prepared after %.2f seconds." % dt)
    return resp