def mongo_writer(client, pipeline, job, batch, pipeline_config: config.PipelineConfig, val, doc, type): db = client[util.mongo_db] obj = { "pipeline_type": type, "pipeline_id": pipeline, "job_id": job, "batch": batch, "owner": pipeline_config.owner, "sentence": val.sentence, "report_type": doc["report_type"], "nlpql_feature": pipeline_config.name, "inserted_date": datetime.datetime.now(), "report_id": doc["report_id"], "subject": doc["subject"], "report_date": doc["report_date"], "section": "", "concept_code": pipeline_config.concept_code, "term": val.text, "text": val.text, "start": val.start, "end": val.end, "label": val.label, "description": val.description, "phenotype_final": False } inserted = config.insert_pipeline_results(pipeline_config, db, obj) return inserted
def mongo_writer(client, pipeline, job, batch, pipeline_config, term, doc, type): db = client[util.mongo_db] obj = { "pipeline_type": type, "pipeline_id": pipeline, "job_id": job, "batch": batch, "owner": pipeline_config.owner, "sentence": term.sentence, "report_type": doc["report_type"], "nlpql_feature": pipeline_config.name, "inserted_date": datetime.datetime.now(), "report_id": doc["report_id"], "subject": doc["subject"], "report_date": doc["report_date"], "section": term.section, "term": term.term, "start": term.start, "end": term.end, "concept_code": pipeline_config.concept_code, "negation": term.negex, "temporality": term.temporality, "experiencer": term.experiencer, "phenotype_final": False } inserted = config.insert_pipeline_results(pipeline_config, db, obj) return inserted
def mongo_writer(client, pipeline, job, batch, pipeline_config, val, doc, p_type): db = client[util.mongo_db] obj = { "pipeline_type": p_type, "pipeline_id": pipeline, "job_id": job, "batch": batch, "owner": pipeline_config.owner, "sentence": val.sentence, "report_type": doc["report_type"], "nlpql_feature": pipeline_config.name, "inserted_date": datetime.datetime.now(), "report_id": doc["report_id"], "subject": doc["subject"], "report_date": doc["report_date"], "section": "", "concept_code": pipeline_config.concept_code, "term": val.text, "text": val.text, "lemma": val.lemma, "pos": val.pos, "tag": val.tag, "dep": val.dep, "shape": val.shape, "is_alpha": val.is_alpha, "is_stop": val.is_stop, "description": val.description, "phenotype_final": False } inserted = config.insert_pipeline_results(pipeline_config, db, obj) return inserted
def pipeline_mongo_writer(client, pipeline_id, pipeline_type, job, batch, p_config: pipeline_config.PipelineConfig, doc, data_fields: dict, prefix: str = '', phenotype_final: bool = False): db = client[util.mongo_db] if not data_fields: print('must have additional data fields') return None if not p_config: print('must have pipeline config') return None data_fields["pipeline_type"] = pipeline_type data_fields["pipeline_id"] = pipeline_id data_fields["job_id"] = job data_fields["batch"] = batch data_fields["owner"] = p_config.owner data_fields["nlpql_feature"] = (prefix + p_config.name) data_fields["inserted_date"] = datetime.datetime.now() data_fields["concept_code"] = p_config.concept_code data_fields["concept_code_system"] = p_config.concept_code_system data_fields["phenotype_final"] = (phenotype_final or p_config.final) if doc: data_fields["report_id"] = doc[util.solr_report_id_field] data_fields["subject"] = doc[util.solr_subject_field] data_fields["report_date"] = doc[util.solr_report_date_field] data_fields["report_type"] = doc[util.solr_report_type_field] data_fields["source"] = doc[util.solr_source_field] data_fields["solr_id"] = doc[util.solr_id_field] else: for df in doc_fields: if df not in data_fields: data_fields[df] = '' inserted = config.insert_pipeline_results(p_config, db, data_fields) return inserted
def mongo_writer(client, pipeline, job, batch, pipeline_config: config.PipelineConfig, meas: Measurement, doc, type): db = client[util.mongo_db] value = meas['X'] obj = { "pipeline_type": type, "pipeline_id": pipeline, "job_id": job, "batch": batch, "owner": pipeline_config.owner, "sentence": meas.sentence, "report_type": doc["report_type"], "nlpql_feature": pipeline_config.name, "inserted_date": datetime.datetime.now(), "report_id": doc["report_id"], "subject": doc["subject"], "report_date": doc["report_date"], "section": "", "concept_code": pipeline_config.concept_code, "text": meas.text, "start": meas.start, "value": value, "end": meas.end, "term": meas.subject, "dimension_X": meas.X, "dimension_Y": meas.Y, "dimension_Z": meas.Z, "units": meas.units, "location": meas.location, "condition": meas.condition, "value1": meas.value1, "value2": meas.value2, "temporality": meas.temporality, "phenotype_final": False } inserted = config.insert_pipeline_results(pipeline_config, db, obj) return inserted
def pipeline_mongo_writer(client, pipeline_id, pipeline_type, job, batch, p_config: pipeline_config.PipelineConfig, doc, data_fields: dict): db = client[util.mongo_db] data_fields["pipeline_type"] = pipeline_type data_fields["pipeline_id"] = pipeline_id data_fields["job_id"] = job data_fields["batch"] = batch data_fields["owner"] = p_config.owner data_fields["nlpql_feature"] = p_config.name data_fields["inserted_date"] = datetime.datetime.now() data_fields["report_id"] = doc[util.solr_report_id_field] data_fields["subject"] = doc[util.solr_subject_field] data_fields["report_date"] = doc[util.solr_report_date_field] data_fields["concept_code"] = p_config.concept_code data_fields["phenotype_final"] = False inserted = config.insert_pipeline_results(p_config, db, data_fields) return inserted
def pipeline_mongo_writer(client, pipeline_id, pipeline_type, job, batch, p_config: pipeline_config.PipelineConfig, doc, data_fields: dict, prefix: str = '', phenotype_final: bool = False): db = client[util.mongo_db] if not data_fields: log('must have additional data fields', ERROR) return None if not p_config: log('must have pipeline config', ERROR) return None # log('writing results...', DEBUG) data_fields["pipeline_type"] = pipeline_type data_fields["pipeline_id"] = int(pipeline_id) data_fields["job_id"] = int(job) data_fields["batch"] = batch data_fields["owner"] = p_config.owner data_fields["nlpql_feature"] = (prefix + p_config.name) data_fields["inserted_date"] = datetime.datetime.now() data_fields["concept_code"] = p_config.concept_code data_fields["concept_code_system"] = p_config.concept_code_system data_fields["phenotype_final"] = (phenotype_final or p_config.final) data_fields["display_name"] = p_config.display_name if doc: data_fields["report_id"] = doc[util.solr_report_id_field] data_fields["subject"] = doc[util.solr_subject_field] data_fields["report_date"] = doc[util.solr_report_date_field] data_fields["report_type"] = doc[util.solr_report_type_field] data_fields["source"] = doc[util.solr_source_field] data_fields["solr_id"] = doc[util.solr_id_field] else: for df in doc_fields: if df not in data_fields: data_fields[df] = '' highlight_fields = [ 'term', 'text', 'value', 'units', 'word', 'highlight', 'highlights' ] if "result_display" not in data_fields: s = data_fields.get('start') e = data_fields.get('end') if not s: s = 0 if not e: e = 0 highlights = [] for h in highlight_fields: txt = data_fields.get(h, '') if len(txt) > 0: highlights.append(txt) break data_fields["result_display"] = { "date": data_fields.get('report_date'), "result_content": data_fields.get('sentence'), "highlights": highlights, "sentence": data_fields.get('sentence'), 'start': [s], 'end': [e] } else: display = data_fields.get('result_display') highlights = display.get("highlights", list()) if len(highlights) == 0: highlights = [] for h in highlight_fields: txt = data_fields.get(h, '') if len(txt) > 0: highlights.append(txt) break data_fields['result_display']['highlights'] = highlights inserted = config.insert_pipeline_results(p_config, db, data_fields) log( '(job={}; pipeline={}) inserted into mongodb {}'.format( job, pipeline_id, repr(inserted.inserted_id)), DEBUG) return inserted
def pipeline_mongo_writer(client, pipeline_id, pipeline_type, job, batch, p_config: pipeline_config.PipelineConfig, doc, data_fields: dict, prefix: str = '', phenotype_final: bool = False): db = client[util.mongo_db] if not data_fields: print('must have additional data fields') return None if not p_config: print('must have pipeline config') return None data_fields["pipeline_type"] = pipeline_type data_fields["pipeline_id"] = pipeline_id data_fields["job_id"] = job data_fields["batch"] = batch data_fields["owner"] = p_config.owner data_fields["nlpql_feature"] = (prefix + p_config.name) data_fields["inserted_date"] = datetime.datetime.now() data_fields["concept_code"] = p_config.concept_code data_fields["concept_code_system"] = p_config.concept_code_system data_fields["phenotype_final"] = (phenotype_final or p_config.final) data_fields["display_name"] = p_config.display_name if doc: data_fields["report_id"] = doc[util.solr_report_id_field] data_fields["subject"] = doc[util.solr_subject_field] data_fields["report_date"] = doc[util.solr_report_date_field] data_fields["report_type"] = doc[util.solr_report_type_field] data_fields["source"] = doc[util.solr_source_field] data_fields["solr_id"] = doc[util.solr_id_field] else: for df in doc_fields: if df not in data_fields: data_fields[df] = '' if "result_display" not in data_fields: s = data_fields.get('start') e = data_fields.get('end') if not s: s = 0 if not e: e = 0 highlights = [] txt = data_fields.get('text') if txt: highlights = [txt] data_fields["result_display"] = { "date": data_fields.get('report_date'), "result_content": data_fields.get('sentence'), "highlights": highlights, "sentence": data_fields.get('sentence'), 'start': [s], 'end': [e] } inserted = config.insert_pipeline_results(p_config, db, data_fields) return inserted