def get_race_data(document_id): if util.use_redis_caching == "true": util.add_cache_query_count() key = "raceFinder:" + str(document_id) res = util.get_from_redis_cache(key) if res: return json.loads(res) else: util.add_cache_compute_count() res2 = get_race_for_doc(document_id) util.write_to_redis_cache(key, json.dumps(res2)) return res2 elif util.use_memory_caching == "true": util.add_cache_query_count() return _get_race_data(document_id) else: return get_race_for_doc(document_id)
def get_document_by_id(document_id): doc = None if util.use_redis_caching == "true": util.add_cache_query_count() txt = util.get_from_redis_cache("doc:" + document_id) if not txt: util.add_cache_compute_count() doc = solr_data.query_doc_by_id(document_id, solr_url=util.solr_url) util.write_to_redis_cache("doc:" + document_id, json.dumps(doc)) else: doc = json.loads(txt) elif util.use_memory_caching == "true": util.add_cache_query_count() doc = _get_document_by_id(document_id) if not doc: return solr_data.query_doc_by_id(document_id, solr_url=util.solr_url) else: return doc
def get_job_performance(job_ids: list, connection_string: str): if not job_ids or len(job_ids) == 0: return dict() conn = psycopg2.connect(connection_string) cursor = conn.cursor() metrics = dict() try: in_clause = '' for i in job_ids: if len(in_clause) > 0: in_clause += ', ' in_clause += '%s' cursor.execute( """ SELECT status, nlp_job_id from nlp.nlp_job where nlp_job_id in ({}) """.format(in_clause), job_ids) status = None job_id = -1 statuses = cursor.fetchall() for s in statuses: status = s[0] job_id = s[1] metrics[job_id] = { "status": status, "final_results": 0, "final_subjects": 0, "intermediate_results": 0, "intermediate_subjects": 0, "counts_found": 0 } cursor.execute( """ SELECT status, description, date_updated, nlp_job_id from nlp.nlp_job_status where nlp_job_id in ({}) order by date_updated """.format(in_clause), job_ids) updates = cursor.fetchall() for row in updates: status_name = row[0] status_date = row[2] status_value = row[1] job_id = row[3] performance = metrics[job_id] counts_found = performance.get('counts_found', 0) if status_name == 'STATS_FINAL_SUBJECTS': performance['final_subjects'] = status_value counts_found += int(status_value) elif status_name == 'STATS_FINAL_RESULTS': performance['final_results'] = status_value counts_found += int(status_value) elif status_name == 'STATS_INTERMEDIATE_SUBJECTS': performance['intermediate_subjects'] = status_value elif status_name == 'STATS_INTERMEDIATE_RESULTS': performance['intermediate_results'] = status_value performance['counts_found'] = counts_found metrics[job_id] = performance for k in metrics.keys(): performance = metrics[k] counts_found = performance.get('counts_found', 0) if counts_found == 0 and status == COMPLETED: final_subjects = util.get_from_redis_cache( 'final_subjects_{}'.format(k)) final_results = util.get_from_redis_cache( 'final_results{}'.format(k)) if final_results and final_subjects: performance['final_subjects'] = final_subjects performance['final_results'] = final_results else: stats = phenotype_stats(str(job_id), True) performance['final_subjects'] = stats["subjects"] performance['final_results'] = stats["results"] util.write_to_redis_cache('final_subjects_{}'.format(k), stats["subjects"]) util.write_to_redis_cache('final_results_{}'.format(k), stats["results"]) int_subjects = util.get_from_redis_cache( 'intermediate_subjects_{}'.format(k)) int_results = util.get_from_redis_cache( 'intermediate_results{}'.format(k)) if int_subjects and int_results: performance['intermediate_subjects'] = int_subjects performance['intermediate_results'] = int_results else: intermediate_stats = phenotype_stats(str(job_id), False) performance['intermediate_subjects'] = intermediate_stats[ "subjects"] performance['intermediate_results'] = intermediate_stats[ "results"] util.write_to_redis_cache( 'intermediate_subjects_{}'.format(k), intermediate_stats["subjects"]) util.write_to_redis_cache( 'intermediate_results_{}'.format(k), intermediate_stats["results"]) if 'counts_found' in performance: del performance['counts_found'] metrics[job_id] = performance except Exception as ex: traceback.print_exc(file=sys.stdout) finally: conn.close() return metrics