def lookup_phenotype_results_by_id(id_list: list): client = util.mongo_client() db = client[util.mongo_db] obj = dict() obj['results'] = list() obj['indexes'] = dict() try: # db.phenotype_results.find({"_id": { $in: [ObjectId("5b117352bcf26f020e392a9c"), # ObjectId("5b117352bcf26f020e3926e2")]}}) # TODO TODO TODO ids = list(map(lambda x: ObjectId(x), id_list)) res = db.phenotype_results.find({"_id": {"$in": ids}}) obj['results'] = list(res) n = 0 for o in obj['results']: o = display_mapping(o) id = str(o['_id']) obj['indexes'][id] = n n = n + 1 except Exception as e: traceback.print_exc(file=sys.stdout) obj['success'] = False return obj
def writeResultFeedback(data): try: # Parsing info job_id = data['job_id'] result_id = data['result_id'] # connecting to the Mongo DB client = util.mongo_client() db = client[util.mongo_db] # checking if the results collection exists # if collection doesn't exist, creating collection = db['result_feedback'] # checking if the result exists in Mongo query = {'result_id': result_id} existing_entry = collection.find_one(query) if existing_entry is None: # Writing a new result to Mongo collection.insert_one(data) else: # Updating existing result in Mongo updated_entry = data element = {"$set": updated_entry} collection.update_one(query, element) # returning 200 response return Response("Successfully wrote result feedback", status=200, mimetype='application/json') except Exception as e: # returning 400 response return Response(str(e), status=400, mimetype='application/json')
def phenotype_subjects(job_id: str, phenotype_final: bool): client = util.mongo_client() db = client[util.mongo_db] res = [] # db.phenotype_results.aggregate([ {"$match":{"job_id":{"$eq":10201}, "phenotype_final":{"$eq":true}}}, # {"$group" : {_id:"$subject", count:{$sum:1}}} ]) try: q = [{ "$match": { "phenotype_final": { "$eq": phenotype_final }, "job_id": { "$eq": int(job_id) } } }, { "$group": { "_id": "$subject", "count": { "$sum": 1 } } }] res = list(db.phenotype_results.aggregate(q)) res = sorted(res, key=lambda r: r['count'], reverse=True) except Exception as e: traceback.print_exc(file=sys.stdout) return res
def phenotype_subject_results(job_id: str, phenotype_final: bool, subject: str): client = util.mongo_client() db = client[util.mongo_db] res = [] try: query = { "job_id": int(job_id), "phenotype_final": phenotype_final, "subject": subject } temp = list(db["phenotype_results"].find(query)) for r in temp: obj = r.copy() for k in r.keys(): val = r[k] if (isinstance(val, int) or isinstance(val, float)) and math.isnan(val): del obj[k] res.append(obj) except Exception as e: traceback.print_exc(file=sys.stdout) return res
def delete_job(job_id: str, connection_string: str): conn = psycopg2.connect(connection_string) client = util.mongo_client() cursor = conn.cursor() flag = -1 # To determine whether the update was successful or not try: cursor.execute("DELETE FROM nlp.nlp_job_status WHERE nlp_job_id=" + job_id) cursor.execute("DELETE FROM nlp.nlp_job WHERE nlp_job_id=" + job_id) conn.commit() db = client[util.mongo_db] db.phenotype_results.remove({ "job_id": int(job_id) }) flag = 1 except Exception as e: flag = -1 traceback.print_exc(file=sys.stdout) finally: conn.close() client.close() return flag
def run(self): task_family_name = str(self.task_family) if self.task_name == "ClarityNLPLuigiTask": self.task_name = task_family_name client = util.mongo_client() try: with self.output().open('w') as temp_file: temp_file.write("start writing custom task") jobs.update_job_status(str(self.job), util.conn_string, jobs.IN_PROGRESS, "Running Batch %s" % self.batch) self.pipeline_config = config.get_pipeline_config( self.pipeline, util.conn_string) jobs.update_job_status(str(self.job), util.conn_string, jobs.IN_PROGRESS, "Running Solr query") self.docs = solr_data.query( self.solr_query, rows=util.row_count, start=self.start, solr_url=util.solr_url, tags=self.pipeline_config.report_tags, mapper_inst=util.report_mapper_inst, mapper_url=util.report_mapper_url, mapper_key=util.report_mapper_key, types=self.pipeline_config.report_types, sources=self.pipeline_config.sources, filter_query=self.pipeline_config.filter_query, cohort_ids=self.pipeline_config.cohort, job_results_filters=self.pipeline_config.job_results) for d in self.docs: doc_id = d[util.solr_report_id_field] if util.use_memory_caching == "true": k = keys.hashkey(doc_id) document_cache[k] = d if util.use_redis_caching == "true": util.write_to_redis_cache("doc:" + doc_id, json.dumps(d)) jobs.update_job_status(str(self.job), util.conn_string, jobs.IN_PROGRESS, "Running %s main task" % self.task_name) self.run_custom_task(temp_file, client) temp_file.write("Done writing custom task!") self.docs = list() except Exception as ex: traceback.print_exc(file=sys.stderr) jobs.update_job_status(str(self.job), util.conn_string, jobs.WARNING, ''.join(traceback.format_stack())) print(ex) finally: client.close()
def paged_phenotype_results(job_id: str, phenotype_final: bool, last_id: str = ''): client = util.mongo_client() db = client[util.mongo_db] obj = dict() try: columns = sorted(get_columns(db, job_id, 'phenotype', phenotype_final)) if last_id == '' and last_id != '-1': res = list( db.phenotype_results.find({ "job_id": int(job_id), "phenotype_final": phenotype_final }).limit(page_size)) obj['count'] = int( db.phenotype_results.find({ "job_id": int(job_id), "phenotype_final": phenotype_final }).count()) else: res = list( db.phenotype_results.find({ "_id": { "$gt": ObjectId(last_id) }, "job_id": int(job_id), "phenotype_final": phenotype_final }).limit(page_size)) results_length = len(res) no_more = False if results_length < page_size: no_more = True if results_length > 0: new_last_id = res[-1]['_id'] else: new_last_id = '' obj['results'] = list(map(display_mapping, res)) obj['no_more'] = no_more obj['new_last_id'] = new_last_id obj['columns'] = columns obj['result_count'] = results_length obj['success'] = True except Exception as e: traceback.print_exc(file=sys.stdout) obj['success'] = False finally: client.close() return obj
def lookup_phenotype_result_by_id(id: str): client = util.mongo_client() db = client[util.mongo_db] obj = dict() try: obj = db.phenotype_results.find_one({'_id': ObjectId(id)}) obj = display_mapping(obj) except Exception as e: traceback.print_exc(file=sys.stdout) obj['success'] = False return obj
def phenotype_performance_results(jobs: list): client = util.mongo_client() db = client[util.mongo_db] metrics = dict() if len(jobs) < 1: return metrics try: for job in jobs: performance = { 'total_answered': 0, 'total_correct': 0, 'total_incorrect': 0, 'accuracy_score': 0.0, 'total_comments': 0 } query = {"job_id": int(job.strip())} has_comments = 0 count = 0 correct = 0 query_results = db['result_feedback'].find(query) # ['comments', 'feature', 'is_correct', 'job_id', 'subject', 'report_id', 'result_id'] for res in query_results: if len(res['comments']) > 0: has_comments += 1 else: count += 1 if res['is_correct'] == 'true' or res[ 'is_correct'] == 'True': correct += 1 if count > 0: performance['accuracy_score'] = float( (correct * 1.0) / (count * 1.0)) else: performance['accuracy_score'] = 0.0 performance['total_incorrect'] = count - correct performance['total_correct'] = correct performance['total_answered'] = count performance['total_comments'] = has_comments metrics[job] = performance except Exception as e: log(e, ERROR) finally: client.close() return metrics
def cleanup(self, pipeline_id, job, owner, pipeline_type, p_config): client = util.mongo_client() db = client[util.mongo_db] try: jobs.update_job_status(job, util.conn_string, jobs.IN_PROGRESS, "Running Collector Cleanup") self.custom_cleanup(pipeline_id, job, owner, pipeline_type, p_config, client, db) except Exception as ex: traceback.print_exc(file=sys.stderr) jobs.update_job_status(job, util.conn_string, jobs.WARNING, ''.join(traceback.format_stack())) print(ex)
def cleanup(self, pipeline_id, job, owner, pipeline_type, p_config): client = util.mongo_client() db = client[util.mongo_db] try: jobs.update_job_status(job, util.conn_string, jobs.IN_PROGRESS, "Running Collector Cleanup") self.custom_cleanup(pipeline_id, job, owner, pipeline_type, p_config, client, db) except Exception as ex: jobs.update_job_status(job, util.conn_string, jobs.WARNING, ''.join(traceback.format_stack())) log(ex, ERROR) finally: client.close()
def phenotype_results_by_context(context: str, query_filters: dict): client = util.mongo_client() db = client[util.mongo_db] res = [] try: if context.lower() == 'patient' or context.lower() == 'subject': project = 'subject' else: project = 'report_id' res = list(db["phenotype_results"].find(query_filters, {project: 1})) except Exception as e: traceback.print_exc(file=sys.stdout) return res
def generic_results(job: str, job_type: str, phenotype_final: bool = False): client = util.mongo_client() db = client[util.mongo_db] today = datetime.today().strftime('%m_%d_%Y_%H%M') filename = '/tmp/job%s_%s_%s.csv' % (job, job_type, today) try: with open(filename, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=util.delimiter, quotechar=util.quote_character, quoting=csv.QUOTE_MINIMAL) header_written = False length = 0 if job_type == 'phenotype': query = { "job_id": int(job), "phenotype_final": phenotype_final } else: query = {"job_id": int(job)} query_results = db[job_type + "_results"].find(query) columns = sorted(get_columns(db, job, job_type, phenotype_final)) for res in query_results: keys = list(res.keys()) if not header_written: length = len(columns) csv_writer.writerow(columns) header_written = True output = [''] * length i = 0 for key in columns: if key in keys: val = res[key] output[i] = val else: output[i] = '' i += 1 csv_writer.writerow(output) except Exception as e: log(e) finally: client.close() return filename
def phenotype_feature_results(job_id: str, feature: str, subject: str): client = util.mongo_client() db = client[util.mongo_db] res = [] try: query = { "job_id": int(job_id), "nlpql_feature": feature, "subject": subject } res = list(db["phenotype_results"].find(query)) except Exception as e: traceback.print_exc(file=sys.stdout) return res
def pipeline_results(job: str): client = util.mongo_client() today = datetime.today().strftime('%m_%d_%Y_%H%M') filename = '/tmp/job%s_pipeline_%s.csv' % (job, today) length = len(pipeline_output_positions) db = client[util.mongo_db] try: with open(filename, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=util.delimiter, quotechar=util.quote_character, quoting=csv.QUOTE_MINIMAL) header_written = False header_values = pipeline_output_positions length = 0 for res in db.pipeline_results.find({"job_id": int(job)}): keys = list(res.keys()) if not header_written: new_cols = [] for k in keys: if k not in header_values: new_cols.append(k) new_cols = sorted(new_cols) header_values.extend(new_cols) length = len(header_values) csv_writer.writerow(header_values) header_written = True i = 0 output = [''] * length for key in header_values: if key in keys: val = res[key] output[i] = val i += 1 csv_writer.writerow(output) except Exception as e: print(e) finally: client.close() return filename
def phenotype_feedback_results(job: str): job_type = 'annotations' client = util.mongo_client() db = client[util.mongo_db] today = datetime.today().strftime('%m_%d_%Y_%H%M') filename = '/tmp/job_feedback%s_%s_%s.csv' % (job, job_type, today) try: with open(filename, 'w', newline='') as csvfile: csv_writer = csv.writer(csvfile, delimiter=util.delimiter, quotechar=util.quote_character, quoting=csv.QUOTE_MINIMAL) header_written = False query = {"job_id": int(job)} query_results = db['result_feedback'].find(query) columns = sorted([ 'comments', 'feature', 'is_correct', 'job_id', 'subject', 'report_id', 'result_id' ]) for res in query_results: keys = list(res.keys()) if not header_written: length = len(columns) csv_writer.writerow(columns) header_written = True output = [''] * length i = 0 for key in columns: if key in keys: val = res[key] output[i] = val else: output[i] = '' i += 1 csv_writer.writerow(output) except Exception as e: log(e) finally: client.close() return filename
def mongo_process_operations(expr_obj_list, db, job_id, phenotype: PhenotypeModel, phenotype_id, phenotype_owner, c: PhenotypeOperations, final=False): """ Use MongoDB aggregation to evaluate NLPQL expressions. """ print('mongo_process_operations expr_object_list: ') for expr_obj in expr_obj_list: print(expr_obj) context_var = phenotype.context.lower() if 'document' == context_var: # document IDs are in the report_id field context_field = 'report_id' else: # patient IDs are in the subject field context_field = 'subject' # setup access to the Mongo collection client = util.mongo_client() mongo_db_obj = client[util.mongo_db] mongo_collection_obj = mongo_db_obj['phenotype_results'] is_final_save = c['final'] for expr_obj in expr_obj_list: # the 'is_final' flag only applies to the last subexpression if expr_obj != expr_obj_list[-1]: is_final = False else: is_final = is_final_save # evaluate the (sub)expression in expr_obj eval_result = expr_eval.evaluate_expression(expr_obj, job_id, context_field, mongo_collection_obj) # query MongoDB to get result docs cursor = mongo_collection_obj.find({'_id': {'$in': eval_result.doc_ids}}) # initialize for MongoDB result document generation phenotype_info = expr_result.PhenotypeInfo( job_id = job_id, phenotype_id = phenotype_id, owner = phenotype_owner, context_field = context_field, is_final = is_final ) # generate result documents if expr_eval.EXPR_TYPE_MATH == eval_result.expr_type: output_docs = expr_result.to_math_result_docs(eval_result, phenotype_info, cursor) else: assert expr_eval.EXPR_TYPE_LOGIC == eval_result.expr_type # flatten the result set into a set of Mongo documents doc_map, oid_list_of_lists = expr_eval.flatten_logical_result(eval_result, mongo_collection_obj) output_docs = expr_result.to_logic_result_docs(eval_result, phenotype_info, doc_map, oid_list_of_lists) if len(output_docs) > 0: mongo_collection_obj.insert_many(output_docs) else: print('mongo_process_operations ({0}): ' \ 'no phenotype matches on "{1}".'.format(eval_result.expr_type, eval_result.expr_text)) client.close()
def mongo_process_operations(expr_obj_list, db, job_id, phenotype: PhenotypeModel, phenotype_id, phenotype_owner, c: PhenotypeOperations, final=False): """ Use MongoDB aggregation to evaluate NLPQL expressions. """ log('mongo_process_operations expr_object_list: ') for expr_obj in expr_obj_list: log(expr_obj) context_var = phenotype.context.lower() if 'document' == context_var: # document IDs are in the report_id field context_field = 'report_id' else: # patient IDs are in the subject field context_field = 'subject' # setup access to the Mongo collection client = util.mongo_client() mongo_db_obj = client[util.mongo_db] mongo_collection_obj = mongo_db_obj['phenotype_results'] # ensure integer job_id; expression evaluator needs to lookup results # by job_id, but a job id of 42 is different from a job_id of '42' job_id = int(job_id) try: is_final_save = c['final'] for expr_obj in expr_obj_list: # the 'is_final' flag only applies to the last subexpression if expr_obj != expr_obj_list[-1]: is_final = False else: is_final = is_final_save # evaluate the (sub)expression in expr_obj eval_result = expr_eval.evaluate_expression(expr_obj, job_id, context_field, mongo_collection_obj) # query MongoDB to get result docs cursor = mongo_collection_obj.find({'_id': {'$in': eval_result.doc_ids}}) # initialize for MongoDB result document generation phenotype_info = expr_result.PhenotypeInfo( job_id=job_id, phenotype_id=phenotype_id, owner=phenotype_owner, context_field=context_field, is_final=is_final ) # generate result documents if expr_eval.EXPR_TYPE_MATH == eval_result.expr_type: output_docs = expr_result.to_math_result_docs(eval_result, phenotype_info, cursor) else: assert expr_eval.EXPR_TYPE_LOGIC == eval_result.expr_type # flatten the result set into a set of Mongo documents doc_map, oid_list_of_lists = expr_eval.flatten_logical_result(eval_result, mongo_collection_obj) output_docs = expr_result.to_logic_result_docs(eval_result, phenotype_info, doc_map, oid_list_of_lists) if len(output_docs) > 0: log('***** mongo_process_operations: writing {0} ' \ 'output_docs *****'.format(len(output_docs))) try: mongo_collection_obj.insert_many(output_docs) except pymongo.errors.BulkWriteError as e: log('****** mongo_process_operations: ' \ 'mongo insert_many failure ******') log(e) else: log('mongo_process_operations ({0}): ' \ 'no phenotype matches on "{1}".'.format(eval_result.expr_type, eval_result.expr_text)) except Exception as exc: log(exc, ERROR) finally: client.close()