Пример #1
0
def get_race_data(document_id):
    if util.use_redis_caching == "true":
        util.add_cache_query_count()
        key = "raceFinder:" + str(document_id)
        res = util.get_from_redis_cache(key)
        if res:
            return json.loads(res)
        else:
            util.add_cache_compute_count()
            res2 = get_race_for_doc(document_id)
            util.write_to_redis_cache(key, json.dumps(res2))
            return res2
    elif util.use_memory_caching == "true":
        util.add_cache_query_count()
        return _get_race_data(document_id)
    else:
        return get_race_for_doc(document_id)
def get_race_data(document_id):
    if util.use_redis_caching == "true":
        util.add_cache_query_count()
        key = "raceFinder:" + str(document_id)
        res = util.get_from_redis_cache(key)
        if res:
            return json.loads(res)
        else:
            util.add_cache_compute_count()
            res2 = get_race_for_doc(document_id)
            util.write_to_redis_cache(key, json.dumps(res2))
            return res2
    elif util.use_memory_caching == "true":
        util.add_cache_query_count()
        return _get_race_data(document_id)
    else:
        return get_race_for_doc(document_id)
Пример #3
0
def get_document_by_id(document_id):
    doc = None

    if util.use_redis_caching == "true":
        util.add_cache_query_count()
        txt = util.get_from_redis_cache("doc:" + document_id)
        if not txt:
            util.add_cache_compute_count()
            doc = solr_data.query_doc_by_id(document_id,
                                            solr_url=util.solr_url)
            util.write_to_redis_cache("doc:" + document_id, json.dumps(doc))
        else:
            doc = json.loads(txt)
    elif util.use_memory_caching == "true":
        util.add_cache_query_count()
        doc = _get_document_by_id(document_id)

    if not doc:
        return solr_data.query_doc_by_id(document_id, solr_url=util.solr_url)
    else:
        return doc
Пример #4
0
def get_job_performance(job_ids: list, connection_string: str):
    if not job_ids or len(job_ids) == 0:
        return dict()

    conn = psycopg2.connect(connection_string)
    cursor = conn.cursor()
    metrics = dict()

    try:
        in_clause = ''
        for i in job_ids:
            if len(in_clause) > 0:
                in_clause += ', '
            in_clause += '%s'
        cursor.execute(
            """
            SELECT status, nlp_job_id from nlp.nlp_job 
            where nlp_job_id in ({})
            """.format(in_clause), job_ids)
        status = None
        job_id = -1
        statuses = cursor.fetchall()
        for s in statuses:
            status = s[0]
            job_id = s[1]

            metrics[job_id] = {
                "status": status,
                "final_results": 0,
                "final_subjects": 0,
                "intermediate_results": 0,
                "intermediate_subjects": 0,
                "counts_found": 0
            }

        cursor.execute(
            """
            SELECT status, description, date_updated, nlp_job_id from nlp.nlp_job_status
            where nlp_job_id in  ({}) 
            order by date_updated
            """.format(in_clause), job_ids)
        updates = cursor.fetchall()

        for row in updates:
            status_name = row[0]
            status_date = row[2]
            status_value = row[1]
            job_id = row[3]

            performance = metrics[job_id]
            counts_found = performance.get('counts_found', 0)

            if status_name == 'STATS_FINAL_SUBJECTS':
                performance['final_subjects'] = status_value
                counts_found += int(status_value)
            elif status_name == 'STATS_FINAL_RESULTS':
                performance['final_results'] = status_value
                counts_found += int(status_value)
            elif status_name == 'STATS_INTERMEDIATE_SUBJECTS':
                performance['intermediate_subjects'] = status_value
            elif status_name == 'STATS_INTERMEDIATE_RESULTS':
                performance['intermediate_results'] = status_value

            performance['counts_found'] = counts_found
            metrics[job_id] = performance

        for k in metrics.keys():
            performance = metrics[k]
            counts_found = performance.get('counts_found', 0)
            if counts_found == 0 and status == COMPLETED:
                final_subjects = util.get_from_redis_cache(
                    'final_subjects_{}'.format(k))
                final_results = util.get_from_redis_cache(
                    'final_results{}'.format(k))
                if final_results and final_subjects:
                    performance['final_subjects'] = final_subjects
                    performance['final_results'] = final_results
                else:
                    stats = phenotype_stats(str(job_id), True)

                    performance['final_subjects'] = stats["subjects"]
                    performance['final_results'] = stats["results"]

                    util.write_to_redis_cache('final_subjects_{}'.format(k),
                                              stats["subjects"])
                    util.write_to_redis_cache('final_results_{}'.format(k),
                                              stats["results"])

                int_subjects = util.get_from_redis_cache(
                    'intermediate_subjects_{}'.format(k))
                int_results = util.get_from_redis_cache(
                    'intermediate_results{}'.format(k))
                if int_subjects and int_results:
                    performance['intermediate_subjects'] = int_subjects
                    performance['intermediate_results'] = int_results
                else:
                    intermediate_stats = phenotype_stats(str(job_id), False)

                    performance['intermediate_subjects'] = intermediate_stats[
                        "subjects"]
                    performance['intermediate_results'] = intermediate_stats[
                        "results"]

                    util.write_to_redis_cache(
                        'intermediate_subjects_{}'.format(k),
                        intermediate_stats["subjects"])
                    util.write_to_redis_cache(
                        'intermediate_results_{}'.format(k),
                        intermediate_stats["results"])

            if 'counts_found' in performance:
                del performance['counts_found']
            metrics[job_id] = performance
    except Exception as ex:
        traceback.print_exc(file=sys.stdout)
    finally:
        conn.close()

    return metrics