def _set_transport(self): """Plug in the appropriate transport mechanism""" # Transport strategies differ for the different reports if self.criteria.reportable_region: self._transport = Distribute_client(zip_first=True) else: self._transport = PHINMS_client(zip_first=True)
def testUnZippedUpload(self): "Can we upload an unzipped copy" tmpfile = tempfile.NamedTemporaryFile('w') tmpfile.write(lorem_ipsum) tmpfile.seek(0) doc_id = document_store(tmpfile.name, document_type='essence', compress_with=None) self.assertTrue(doc_id) agent = PHINMS_client(zip_first=False) self.assertFalse(agent.transfer_file(doc_id))
class GenerateReport(object): """ Process options and generate the requested report. Optionally persists the file to the filesystem, and uploads to the DOH sftp server. """ __version__ = '0.2' config = Config() IGNORE_SITE = config.get('longitudinal', 'ignore_site', default='') # Order matters, create a tuple of paired values (reportColumn, # essenceColumn) - NB, the Diagnosis column is being bastardized. # Previously there was an SQL function to do the subselect, but it # ran way too slow. Now contains the foreign key to join w/ the # diagnosis for the respective visit. diagnosis_column_index = 7 patient_class_column_index = 11 columns = ( ('Hosp', 'hospital'), ('Reg Date', 'visit_date'), ('Time', 'visit_time'), ('Sex', 'gender'), ('Age', 'age'), ('Reason For Visit', 'chief_complaint'), ('Zip Code', 'zip'), ('Diagnosis', 'visit_pk'), ('Admit Status', 'gipse_disposition'), ('Medical Record No.', 'patient_id'), ('Visit Record No.', 'visit_id'), ('Service Area', 'patient_class'), ) assert (columns[diagnosis_column_index][1] == 'visit_pk') assert (columns[patient_class_column_index][1] == 'patient_class') def __init__(self, user=None, password=None, report_criteria=None, datefile=None): """Initialize report generation. :param user: database user :param password: database password :param report_criteria: ReportCriteria defining specifics :param datefile: useful for persistent walks through time """ self.user = user self.password = password self.criteria = report_criteria self.database = self.criteria.database if datefile: assert ((self.criteria.start_date, self.criteria.end_date) == datefile.get_date_range()) self.datePersistence = datefile self._diags = {} self._prepare_output_file() self._prepare_columns() self._set_transport() def _prepare_columns(self): # Don't include the patient_class column if splitting out by # patient_class if self.criteria.patient_class: len_b4 = len(self.columns) self.columns =\ self.columns[:self.patient_class_column_index] \ + self.columns[self.patient_class_column_index + 1:] assert (len(self.columns) + 1 == len_b4) def _set_transport(self): """Plug in the appropriate transport mechanism""" # Transport strategies differ for the different reports if self.criteria.reportable_region: self._transport = Distribute_client(zip_first=True) else: self._transport = PHINMS_client(zip_first=True) def _generate_output_filename(self, start_date=None, end_date=None): start_date = self.criteria.start_date if start_date is None\ else start_date end_date = self.criteria.end_date if end_date is None else end_date datestr = end_date.strftime('%Y%m%d') if start_date != end_date: datestr = '-'.join( (start_date.strftime('%Y%m%d'), end_date.strftime('%Y%m%d'))) filename = self.criteria.report_method + '-' + datestr + '.txt' config = Config() tmp_dir = config.get('general', 'tmp_dir', default='/tmp') filepath = os.path.join(tmp_dir, filename) return filepath def _prepare_output_file(self): """Open the local filesystem file for output""" filepath = self.\ _generate_output_filename(start_date=self.criteria.start_date, end_date=self.criteria.end_date) # watch for oversight errors; notify if like report exists - # unless it's size zero (from a previous failed run) if os.path.exists(filepath) and os.path.getsize(filepath): logging.warning("Found requested report file already "\ "exists - overwriting: '%s'"\ % filepath) self.output = open(filepath, 'w') self._output_filename = self.output.name @property def output_filename(self): if not hasattr(self, '_output_filename'): raise RuntimeError("prerequisite call to "\ "_prepare_output_file() "\ "didn't happen!") return self._output_filename def _header(self): if self.criteria.include_vitals: columns = [c[0] for c in self.columns] columns += ('Measured Temperature', 'O2 Saturation', 'Self-Reported Influenza Vaccine', 'Self-Reported H1N1 Vaccine') return '|'.join(columns) else: return '|'.join([c[0] for c in self.columns]) def _build_join_tables(self): """ Scope continues to grow, build all join tables necessary for the query. Some are only necessary with certain features on. """ # Always need the list of reportable visits self._build_visit_join_table() if self.criteria.include_vitals: self._build_vitals_join_table() def _build_visit_join_table(self): """ Helper in selection of visits for the report - this method builds a temporary table and populates it with the visit_pks that belong in the report. This should include all visit_pks with the matching admit_datetime as well as any that have received updates since the last like report was produced. """ # If include_vitals is on, we also need the visit_id to keep # the joins managable. vitals don't have a patient class, so # you can't join on the same values. sql = "CREATE TEMPORARY TABLE reportable_pks (pk "\ "integer not null unique)" selectCols = "fact_visit.pk" self._getConn() self.access.raw_query(sql) # If we're only selecting those facilites in a region, the SQL # is more complicated - build up the respective clauses. joinClause = regionClause = "" if self.criteria.reportable_region: joinClause = "JOIN internal_reportable_region ON "\ "internal_reportable_region.dim_facility_pk = "\ "fact_visit.dim_facility_pk" regionClause = "AND region_name = '%s'" %\ self.criteria.reportable_region # Another HACK! One site is not even wanted by the state DOH, # as it's being duplicated from another source, and ESSENCE # can't help but count them twice. Remove this one site # regardless else: joinClause = "JOIN internal_reportable_region ON "\ "internal_reportable_region.dim_facility_pk = "\ "fact_visit.dim_facility_pk" regionClause = "AND region_name = '%s'" % self.IGNORE_SITE # Limit by patient_class if requested. Note we may still end # up with visit ids that have changed patient classes, so more # pruning later is necessary. pc_limit = "" if self.criteria.patient_class: pc_limit = "AND patient_class = '%c'" %\ self.criteria.patient_class # Start with all visits for the requested date range sql = "INSERT INTO reportable_pks SELECT %s FROM "\ "fact_visit %s WHERE admit_datetime BETWEEN '%s' AND "\ "'%s' %s %s" %\ (selectCols, joinClause, self.criteria.start_date, self.criteria.end_date + timedelta(days=1), pc_limit, regionClause) self.access.raw_query(sql) if self.criteria.include_updates: # In this case, add all visits with updates since the # last run, but no newer than the requested date (in case # we're building reports forward from historical data) sql = "SELECT max(processed_datetime) FROM internal_report "\ "WHERE report_method = '%s'" % self.criteria.report_method cursor = self.access.raw_query(sql) last_report_generated = cursor.fetchall()[0][0] if last_report_generated is None: last_report_generated = '2009-01-01' # our epoch logging.debug("including updates, last_report_generated: "\ "%s", last_report_generated) sql = "INSERT INTO reportable_pks SELECT %(sel_cols)s FROM "\ "fact_visit %(join_clause)s LEFT JOIN reportable_pks ON "\ "reportable_pks.pk = fact_visit.pk WHERE "\ "last_updated > '%(last_report)s' AND admit_datetime "\ "< '%(date)s' AND reportable_pks.pk IS NULL "\ "%(pc_limit)s %(region_clause)s" %\ {'sel_cols': selectCols, 'last_report': last_report_generated, 'date': self.criteria.end_date + timedelta(days=1), 'pc_limit': pc_limit, 'join_clause': joinClause, 'region_clause': regionClause} self.access.raw_query(sql) cursor = self.access.raw_query("SELECT COUNT(*) FROM "\ "reportable_pks") logging.debug("%d visits to report on", cursor.fetchall()[0][0]) def _build_vitals_join_table(self): """When report is to include vitals - we use an additional temporary table (visit_loinc_data) to hold the data for more timely queries. Like the rest of the report, the list of interesting visits is limited to the rows in the reportable_pks - see _build_join_table() for details. """ raise ValueError('not ported yet') sql = """ CREATE TEMPORARY TABLE visit_loinc_data ( visit_id VARCHAR(255) not null, patient_class CHAR(1) default null, observation_id VARCHAR(255) not null, observation_result VARCHAR(255) not null) """ self._getConn() self.access.raw_query(sql) sql = """ INSERT INTO visit_loinc_data (visit_id, patient_class, observation_id, observation_result) SELECT visit.visit_id, visit.patient_class, observation_id, observation_result FROM visit JOIN hl7_visit ON visit.visit_id = hl7_visit.visit_id JOIN hl7_obx ON hl7_visit.hl7_msh_id = hl7_obx.hl7_msh_id JOIN reportable_pks ON reportable_pks.visit_id = visit.visit_id AND reportable_pks.patient_class = visit.patient_class WHERE observation_id in ('8310-5', '20564-1', '46077-4', '29544-4') """ self.access.raw_query(sql) def _select_from_essence_view(self): """Build up the SQL select statement to be used in gathering the data for this report. """ stmt = """SELECT %s FROM essence e JOIN reportable_pks ri ON e.visit_pk = ri.pk""" %\ (','.join(['e.' + c[1] for c in self.columns])) return stmt def _select_diagnosis(self): """ Need to pull in all the diagnosis data for this report. This is saved in an instance dictionary for use in self._diagnosis to generate the list of diagnoses for each respective visit. A list of unique diagnoses ordered by rank is required. """ # We order descending on dx_datetime as the most recent should # be best. Add any others as the persistence mechanism only # saves a unique icd9 dx that has changed status. stmt = "SELECT fact_visit_pk, rank, icd9 "\ "FROM assoc_visit_dx JOIN "\ "dim_dx ON dim_dx_pk = dim_dx.pk JOIN "\ "reportable_pks ON "\ "assoc_visit_dx.fact_visit_pk = reportable_pks.pk "\ "ORDER BY dx_datetime DESC" cursor = self.access.raw_query(stmt) for row in cursor.fetchall(): visit_pk = row[0] if visit_pk in self._diags: self._diags[visit_pk].add(row[0], row[1], row[2]) else: self._diags[visit_pk] = \ SortedDiagnosis(row[0], row[1], row[2]) def _diagnosis(self, visit_pk): if visit_pk in self._diags: return [ self._diags[visit_pk].__repr__(), ] else: return [ '', ] def _select_vitals(self): """ Need to pull in all the vitals data for this report. This is saved in an instance dictionary for use in self._vitals_for_visit to generate the list of vitals for each respective visit. This is an effective NOP when self.criteria.include_vitals = False """ if not self.criteria.include_vitals: return None self._vitals = {} stmt = """SELECT reportable_pks.visit_pk, observation_id, observation_result FROM visit_loinc_data JOIN reportable_pks ON reportable_pks.visit_id = visit_loinc_data.visit_id""" cursor = self.access.raw_query(stmt) for row in cursor.fetchall(): visit_pk = row[0] if visit_pk in self._vitals: self._vitals[visit_pk].add(row[1], row[2]) else: self._vitals[visit_pk] = \ Vitals(row[1], row[2]) def _vitals_for_visit(self, visit_pk): """Returns the list of vitals for the visit in question. This is an effective NOP when self.criteria.include_vitals = False """ if not self.criteria.include_vitals: return [] if visit_pk in self._vitals: return self._vitals[visit_pk].__repr__() else: return Vitals().__repr__() def _write_report(self, save_report=False): """ Write out and potentially store the results. Generate results via database queries and write the results to self.output. :param save_report: If set, persist the document and related metadata to the mbds archive. returns the document ID, the mbds archive key, if saved """ out = self.output print >> out, self._header() self._build_join_tables() self._select_diagnosis() self._select_vitals() cursor = self.access.raw_query(self._select_from_essence_view()) for row in cursor.fetchall(): # Each row is the colums up to the diagnosis + the # comma separated diagnosis + the rest of the columns # and finally with vitals if configured for such visit_pk = row[self.diagnosis_column_index] # yuck, but true print >> out,\ '|'.join([strSansNone(column) for column in row[:self.diagnosis_column_index]] + self._diagnosis(visit_pk) + [strSansNone(column) for column in row[self.diagnosis_column_index + 1:]] + self._vitals_for_visit(visit_pk)) # Close the file and persist to the document archive if # requested self.output.close() if save_report: metadata = { k: v for k, v in self.criteria._crit.items() if v is not None } # At this point, all documents are of 'essence' type return document_store(document=self.output.name, allow_duplicate_filename=True, document_type='essence', **metadata) def _record_report(self, report_oid): """Record the details from this report generation in the db""" if not report_oid: return report = Report(processed_datetime=datetime.now(), file_path=report_oid, report_method=self.criteria.report_method) alchemy = AlchemyAccess(database=self.database) alchemy.session.add(report) alchemy.session.commit() alchemy.disconnect() def _transmit_report(self, report): """Transmit report using self._transport()""" logging.info("initiate upload of %s", report) self._transport.transfer_file(report) def _transmit_differences(self, report): """Compute differences from yesterday's like report; transport""" # This option really only makes sense on date range reports, # as updates hit older data than just 'yesterday'. if self.criteria.start_date == self.criteria.end_date: raise ValueError("difference calculation not supported on "\ "single day reports") # See if we can find a similar report in the archive from # yesterday search_criteria = { 'report_method': self.criteria.report_method, 'start_date': self.criteria.start_date - timedelta(days=1), 'end_date': self.criteria.end_date - timedelta(days=1) } old_doc = document_find(search_criteria, limit=1) if old_doc is None: logging.info("No comparable report found for difference "\ "generation") self._transmit_report(report) else: target_filename = self.\ _generate_output_filename(start_date=self.criteria.start_date, end_date=self.criteria.end_date) # RemoveDuplicates not yet ported!! raise ValueError("RemoveDuplicates not ported") #from pheme.essence.remove_duplicates import RemoveDuplicates #rd = RemoveDuplicates(new_report=report, # old_report=old_doc, # out=target_filename) #rd.generate_report() #logging.info("initiate upload of difference %s", target_filename) #self._transport.transfer_file(target_filename) def _getConn(self): """ Local wrapper to get database connection """ if hasattr(self, 'access'): return self.access = DirectAccess(database=self.database, user=self.user, password=self.password) def _closeConn(self): """ Local wrapper to close database connection """ if hasattr(self, 'access'): self.access.close() def tearDown(self): "Public interface to clean up internals" self._closeConn() def execute(self, save_report=False, transmit_report=False, transmit_differences=False): """Execute the report generation """ logging.info("Initiate ESSENCE report generation [%s-%s] for %s", self.criteria.start_date, self.criteria.end_date, self.criteria.report_method) self._getConn() report_oid = self._write_report(save_report) self._record_report(report_oid) if transmit_report: self._transmit_report(report_oid) if transmit_differences: self._transmit_differences(report_oid) self._closeConn() if hasattr(self, 'datePersistence'): self.datePersistence.bump_date() logging.info("Completed ESSENCE report generation [%s-%s] for %s", self.criteria.start_date, self.criteria.end_date, self.criteria.report_method)
class GenerateReport(object): """ Process options and generate the requested report. Optionally persists the file to the filesystem, and uploads to the DOH sftp server. """ __version__ = '0.2' config = Config() IGNORE_SITE = config.get('longitudinal', 'ignore_site', default='') # Order matters, create a tuple of paired values (reportColumn, # essenceColumn) - NB, the Diagnosis column is being bastardized. # Previously there was an SQL function to do the subselect, but it # ran way too slow. Now contains the foreign key to join w/ the # diagnosis for the respective visit. diagnosis_column_index = 7 patient_class_column_index = 11 columns = (('Hosp', 'hospital'), ('Reg Date', 'visit_date'), ('Time', 'visit_time'), ('Sex', 'gender'), ('Age', 'age'), ('Reason For Visit', 'chief_complaint'), ('Zip Code', 'zip'), ('Diagnosis', 'visit_pk'), ('Admit Status', 'gipse_disposition'), ('Medical Record No.', 'patient_id'), ('Visit Record No.', 'visit_id'), ('Service Area', 'patient_class'),) assert(columns[diagnosis_column_index][1] == 'visit_pk') assert(columns[patient_class_column_index][1] == 'patient_class') def __init__(self, user=None, password=None, report_criteria=None, datefile=None): """Initialize report generation. :param user: database user :param password: database password :param report_criteria: ReportCriteria defining specifics :param datefile: useful for persistent walks through time """ self.user = user self.password = password self.criteria = report_criteria self.database = self.criteria.database if datefile: assert((self.criteria.start_date, self.criteria.end_date) == datefile.get_date_range()) self.datePersistence = datefile self._diags = {} self._prepare_output_file() self._prepare_columns() self._set_transport() def _prepare_columns(self): # Don't include the patient_class column if splitting out by # patient_class if self.criteria.patient_class: len_b4 = len(self.columns) self.columns =\ self.columns[:self.patient_class_column_index] \ + self.columns[self.patient_class_column_index + 1:] assert(len(self.columns) + 1 == len_b4) def _set_transport(self): """Plug in the appropriate transport mechanism""" # Transport strategies differ for the different reports if self.criteria.reportable_region: self._transport = Distribute_client(zip_first=True) else: self._transport = PHINMS_client(zip_first=True) def _generate_output_filename(self, start_date=None, end_date=None): start_date = self.criteria.start_date if start_date is None\ else start_date end_date = self.criteria.end_date if end_date is None else end_date datestr = end_date.strftime('%Y%m%d') if start_date != end_date: datestr = '-'.join((start_date.strftime('%Y%m%d'), end_date.strftime('%Y%m%d'))) filename = self.criteria.report_method + '-' + datestr + '.txt' config = Config() tmp_dir = config.get('general', 'tmp_dir', default='/tmp') filepath = os.path.join(tmp_dir, filename) return filepath def _prepare_output_file(self): """Open the local filesystem file for output""" filepath = self.\ _generate_output_filename(start_date=self.criteria.start_date, end_date=self.criteria.end_date) # watch for oversight errors; notify if like report exists - # unless it's size zero (from a previous failed run) if os.path.exists(filepath) and os.path.getsize(filepath): logging.warning("Found requested report file already "\ "exists - overwriting: '%s'"\ % filepath) self.output = open(filepath, 'w') self._output_filename = self.output.name @property def output_filename(self): if not hasattr(self, '_output_filename'): raise RuntimeError("prerequisite call to "\ "_prepare_output_file() "\ "didn't happen!") return self._output_filename def _header(self): if self.criteria.include_vitals: columns = [c[0] for c in self.columns] columns += ('Measured Temperature', 'O2 Saturation', 'Self-Reported Influenza Vaccine', 'Self-Reported H1N1 Vaccine') return '|'.join(columns) else: return '|'.join([c[0] for c in self.columns]) def _build_join_tables(self): """ Scope continues to grow, build all join tables necessary for the query. Some are only necessary with certain features on. """ # Always need the list of reportable visits self._build_visit_join_table() if self.criteria.include_vitals: self._build_vitals_join_table() def _build_visit_join_table(self): """ Helper in selection of visits for the report - this method builds a temporary table and populates it with the visit_pks that belong in the report. This should include all visit_pks with the matching admit_datetime as well as any that have received updates since the last like report was produced. """ # If include_vitals is on, we also need the visit_id to keep # the joins managable. vitals don't have a patient class, so # you can't join on the same values. sql = "CREATE TEMPORARY TABLE reportable_pks (pk "\ "integer not null unique)" selectCols = "fact_visit.pk" self._getConn() self.access.raw_query(sql) # If we're only selecting those facilites in a region, the SQL # is more complicated - build up the respective clauses. joinClause = regionClause = "" if self.criteria.reportable_region: joinClause = "JOIN internal_reportable_region ON "\ "internal_reportable_region.dim_facility_pk = "\ "fact_visit.dim_facility_pk" regionClause = "AND region_name = '%s'" %\ self.criteria.reportable_region # Another HACK! One site is not even wanted by the state DOH, # as it's being duplicated from another source, and ESSENCE # can't help but count them twice. Remove this one site # regardless else: joinClause = "JOIN internal_reportable_region ON "\ "internal_reportable_region.dim_facility_pk = "\ "fact_visit.dim_facility_pk" regionClause = "AND region_name = '%s'" % self.IGNORE_SITE # Limit by patient_class if requested. Note we may still end # up with visit ids that have changed patient classes, so more # pruning later is necessary. pc_limit = "" if self.criteria.patient_class: pc_limit = "AND patient_class = '%c'" %\ self.criteria.patient_class # Start with all visits for the requested date range sql = "INSERT INTO reportable_pks SELECT %s FROM "\ "fact_visit %s WHERE admit_datetime BETWEEN '%s' AND "\ "'%s' %s %s" %\ (selectCols, joinClause, self.criteria.start_date, self.criteria.end_date + timedelta(days=1), pc_limit, regionClause) self.access.raw_query(sql) if self.criteria.include_updates: # In this case, add all visits with updates since the # last run, but no newer than the requested date (in case # we're building reports forward from historical data) sql = "SELECT max(processed_datetime) FROM internal_report "\ "WHERE report_method = '%s'" % self.criteria.report_method cursor = self.access.raw_query(sql) last_report_generated = cursor.fetchall()[0][0] if last_report_generated is None: last_report_generated = '2009-01-01' # our epoch logging.debug("including updates, last_report_generated: "\ "%s", last_report_generated) sql = "INSERT INTO reportable_pks SELECT %(sel_cols)s FROM "\ "fact_visit %(join_clause)s LEFT JOIN reportable_pks ON "\ "reportable_pks.pk = fact_visit.pk WHERE "\ "last_updated > '%(last_report)s' AND admit_datetime "\ "< '%(date)s' AND reportable_pks.pk IS NULL "\ "%(pc_limit)s %(region_clause)s" %\ {'sel_cols': selectCols, 'last_report': last_report_generated, 'date': self.criteria.end_date + timedelta(days=1), 'pc_limit': pc_limit, 'join_clause': joinClause, 'region_clause': regionClause} self.access.raw_query(sql) cursor = self.access.raw_query("SELECT COUNT(*) FROM "\ "reportable_pks") logging.debug("%d visits to report on", cursor.fetchall()[0][0]) def _build_vitals_join_table(self): """When report is to include vitals - we use an additional temporary table (visit_loinc_data) to hold the data for more timely queries. Like the rest of the report, the list of interesting visits is limited to the rows in the reportable_pks - see _build_join_table() for details. """ raise ValueError('not ported yet') sql = """ CREATE TEMPORARY TABLE visit_loinc_data ( visit_id VARCHAR(255) not null, patient_class CHAR(1) default null, observation_id VARCHAR(255) not null, observation_result VARCHAR(255) not null) """ self._getConn() self.access.raw_query(sql) sql = """ INSERT INTO visit_loinc_data (visit_id, patient_class, observation_id, observation_result) SELECT visit.visit_id, visit.patient_class, observation_id, observation_result FROM visit JOIN hl7_visit ON visit.visit_id = hl7_visit.visit_id JOIN hl7_obx ON hl7_visit.hl7_msh_id = hl7_obx.hl7_msh_id JOIN reportable_pks ON reportable_pks.visit_id = visit.visit_id AND reportable_pks.patient_class = visit.patient_class WHERE observation_id in ('8310-5', '20564-1', '46077-4', '29544-4') """ self.access.raw_query(sql) def _select_from_essence_view(self): """Build up the SQL select statement to be used in gathering the data for this report. """ stmt = """SELECT %s FROM essence e JOIN reportable_pks ri ON e.visit_pk = ri.pk""" %\ (','.join(['e.' + c[1] for c in self.columns])) return stmt def _select_diagnosis(self): """ Need to pull in all the diagnosis data for this report. This is saved in an instance dictionary for use in self._diagnosis to generate the list of diagnoses for each respective visit. A list of unique diagnoses ordered by rank is required. """ # We order descending on dx_datetime as the most recent should # be best. Add any others as the persistence mechanism only # saves a unique icd9 dx that has changed status. stmt = "SELECT fact_visit_pk, rank, icd9 "\ "FROM assoc_visit_dx JOIN "\ "dim_dx ON dim_dx_pk = dim_dx.pk JOIN "\ "reportable_pks ON "\ "assoc_visit_dx.fact_visit_pk = reportable_pks.pk "\ "ORDER BY dx_datetime DESC" cursor = self.access.raw_query(stmt) for row in cursor.fetchall(): visit_pk = row[0] if visit_pk in self._diags: self._diags[visit_pk].add(row[0], row[1], row[2]) else: self._diags[visit_pk] = \ SortedDiagnosis(row[0], row[1], row[2]) def _diagnosis(self, visit_pk): if visit_pk in self._diags: return [self._diags[visit_pk].__repr__(), ] else: return ['', ] def _select_vitals(self): """ Need to pull in all the vitals data for this report. This is saved in an instance dictionary for use in self._vitals_for_visit to generate the list of vitals for each respective visit. This is an effective NOP when self.criteria.include_vitals = False """ if not self.criteria.include_vitals: return None self._vitals = {} stmt = """SELECT reportable_pks.visit_pk, observation_id, observation_result FROM visit_loinc_data JOIN reportable_pks ON reportable_pks.visit_id = visit_loinc_data.visit_id""" cursor = self.access.raw_query(stmt) for row in cursor.fetchall(): visit_pk = row[0] if visit_pk in self._vitals: self._vitals[visit_pk].add(row[1], row[2]) else: self._vitals[visit_pk] = \ Vitals(row[1], row[2]) def _vitals_for_visit(self, visit_pk): """Returns the list of vitals for the visit in question. This is an effective NOP when self.criteria.include_vitals = False """ if not self.criteria.include_vitals: return [] if visit_pk in self._vitals: return self._vitals[visit_pk].__repr__() else: return Vitals().__repr__() def _write_report(self, save_report=False): """ Write out and potentially store the results. Generate results via database queries and write the results to self.output. :param save_report: If set, persist the document and related metadata to the mbds archive. returns the document ID, the mbds archive key, if saved """ out = self.output print >> out, self._header() self._build_join_tables() self._select_diagnosis() self._select_vitals() cursor = self.access.raw_query(self._select_from_essence_view()) for row in cursor.fetchall(): # Each row is the colums up to the diagnosis + the # comma separated diagnosis + the rest of the columns # and finally with vitals if configured for such visit_pk = row[self.diagnosis_column_index] # yuck, but true print >> out,\ '|'.join([strSansNone(column) for column in row[:self.diagnosis_column_index]] + self._diagnosis(visit_pk) + [strSansNone(column) for column in row[self.diagnosis_column_index + 1:]] + self._vitals_for_visit(visit_pk)) # Close the file and persist to the document archive if # requested self.output.close() if save_report: metadata = {k: v for k, v in self.criteria._crit.items() if v is not None} # At this point, all documents are of 'essence' type return document_store(document=self.output.name, allow_duplicate_filename=True, document_type='essence', **metadata) def _record_report(self, report_oid): """Record the details from this report generation in the db""" if not report_oid: return report = Report(processed_datetime=datetime.now(), file_path=report_oid, report_method=self.criteria.report_method) alchemy = AlchemyAccess(database=self.database) alchemy.session.add(report) alchemy.session.commit() alchemy.disconnect() def _transmit_report(self, report): """Transmit report using self._transport()""" logging.info("initiate upload of %s", report) self._transport.transfer_file(report) def _transmit_differences(self, report): """Compute differences from yesterday's like report; transport""" # This option really only makes sense on date range reports, # as updates hit older data than just 'yesterday'. if self.criteria.start_date == self.criteria.end_date: raise ValueError("difference calculation not supported on "\ "single day reports") # See if we can find a similar report in the archive from # yesterday search_criteria = {'report_method': self.criteria.report_method, 'start_date': self.criteria.start_date - timedelta(days=1), 'end_date': self.criteria.end_date - timedelta(days=1)} old_doc = document_find(search_criteria, limit=1) if old_doc is None: logging.info("No comparable report found for difference "\ "generation") self._transmit_report(report) else: target_filename = self.\ _generate_output_filename(start_date=self.criteria.start_date, end_date=self.criteria.end_date) # RemoveDuplicates not yet ported!! raise ValueError("RemoveDuplicates not ported") #from pheme.essence.remove_duplicates import RemoveDuplicates #rd = RemoveDuplicates(new_report=report, # old_report=old_doc, # out=target_filename) #rd.generate_report() #logging.info("initiate upload of difference %s", target_filename) #self._transport.transfer_file(target_filename) def _getConn(self): """ Local wrapper to get database connection """ if hasattr(self, 'access'): return self.access = DirectAccess(database=self.database, user=self.user, password=self.password) def _closeConn(self): """ Local wrapper to close database connection """ if hasattr(self, 'access'): self.access.close() def tearDown(self): "Public interface to clean up internals" self._closeConn() def execute(self, save_report=False, transmit_report=False, transmit_differences=False): """Execute the report generation """ logging.info("Initiate ESSENCE report generation [%s-%s] for %s", self.criteria.start_date, self.criteria.end_date, self.criteria.report_method) self._getConn() report_oid = self._write_report(save_report) self._record_report(report_oid) if transmit_report: self._transmit_report(report_oid) if transmit_differences: self._transmit_differences(report_oid) self._closeConn() if hasattr(self, 'datePersistence'): self.datePersistence.bump_date() logging.info("Completed ESSENCE report generation [%s-%s] for %s", self.criteria.start_date, self.criteria.end_date, self.criteria.report_method)