def parse_nbhd(self, nbhd_object, date): properties = nbhd_object["properties"] nbhd = properties["community"] deaths = properties["value"] population = properties["population"] summary_location_submitter_id = format_submitter_id( "summary_location", {"country": self.country, "state": self.state, "nbhd": nbhd}, ) summary_location = { "submitter_id": summary_location_submitter_id, "community_area": nbhd, "projects": [{"code": self.project_code}], } summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", {"date": date}, ) summary_clinical = { "submitter_id": summary_clinical_submitter_id, "date": date, "deaths_per_10000": round(10000 * deaths / population, 2), "deaths": deaths, "summary_locations": [{"submitter_id": summary_location_submitter_id}], } return summary_location, summary_clinical
def parse_row(self, row): fields_mapping = { "NPI": ("summary_location", "npi"), "Provider_First_Line_Business_Pra": ( "summary_location", "first_line_address", ), "Provider_Second_Line_Business_Pr": ( "summary_location", "second_line_address", ), "Provider_Business_Practice_City": ("summary_location", "city"), "Provider_Business_Practice_ST": ("summary_location", "province_state"), "TaxonomyCode": ("summary_clinical", "taxonomy_code"), "ProviderType": ("summary_clinical", "provider_type"), "ProviderSubtype": ("summary_clinical", "provider_subtype"), "DetailedSpecialty": ("summary_clinical", "detailed_specialty"), } npi = row["NPI"] state = row["Provider_Business_Practice_ST"] summary_location_submitter_id = format_submitter_id( "summary_location", { "country": self.country, "state": state, "npi": npi }) summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", {}) result = { "summary_location": { "submitter_id": summary_location_submitter_id, "projects": [{ "code": self.project_code }], }, "summary_clinical": { "submitter_id": summary_clinical_submitter_id, "summary_locations": [{ "submitter_id": summary_location_submitter_id }], }, } for original_field, mappings in fields_mapping.items(): node, node_field = mappings if node_field == "npi": result[node][node_field] = str(row[original_field]) else: result[node][node_field] = row[original_field] return result["summary_location"], result["summary_clinical"]
def get_group_clinical_demographic_submitter_id( self, summary_clinical_submitter_id, key_dict): summary_group_demographic_submitter_id = derived_submitter_id( summary_clinical_submitter_id, "summary_clinical", "summary_group_demographic", key_dict, ) return summary_group_demographic_submitter_id
def parse_historical(self, utilization, summary_clinical_statewide_current): utilization_mapping = { "reportDate": "date", "TotalBeds": "state_total_beds", "TotalOpenBeds": "total_open_beds", "TotalInUseBedsNonCOVID": "total_in_use_beds_non_covid", "TotalInUseBedsCOVID": "total_in_use_beds_covid", "ICUBeds": "icu_beds", "ICUOpenBeds": "icu_open_beds", "ICUInUseBedsNonCOVID": "icu_in_use_beds_non_covid", "ICUInUseBedsCOVID": "icu_in_use_beds_covid", "VentilatorCapacity": "ventilator_capacity", "VentilatorAvailable": "ventilator_available", "VentilatorInUseNonCOVID": "ventilator_in_use_non_covid", "VentilatorInUseCOVID": "ventilator_in_use_covid", } date = utilization["reportDate"] summary_location_submitter_id = format_submitter_id( "summary_location", { "project": "idph_hospital", "country": self.country, "state": self.state }, ) summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", { "project": "idph_hospital", "date": date }, ) summary_clinical = { "submitter_id": summary_clinical_submitter_id, "date": date, "summary_locations": [{ "submitter_id": summary_location_submitter_id }], } for k, v in utilization.items(): summary_clinical[utilization_mapping[k]] = v if (summary_clinical_submitter_id == summary_clinical_statewide_current["submitter_id"]): summary_clinical.update(summary_clinical_statewide_current) return summary_clinical
def parse_facility(self, date, facility): """ From county-level data, generate the data we can submit via Sheepdog """ county = facility["County"] facility_name = facility["FacilityName"] confirmed_cases = facility["confirmed_cases"] deaths = facility["deaths"] status = facility.get("status", None) summary_location_submitter_id = format_submitter_id( "summary_location", { "country": self.country, "state": self.state, "facility_name": facility_name, "reporting_org_status": status, }, ) summary_location = { "country_region": self.country, "submitter_id": summary_location_submitter_id, "projects": [{ "code": self.project_code }], "province_state": self.state, "county": county, "reporting_org": facility_name, "reporting_org_status": status, } summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", {"date": date}, ) summary_clinical = { "confirmed": confirmed_cases, "deaths": deaths, "submitter_id": summary_clinical_submitter_id, "lastUpdateEt": date, "date": date, "summary_locations": [{ "submitter_id": summary_location_submitter_id }], } return summary_location, summary_clinical
def parse_region(self, date, hospital_region): """ From county-level data, generate the data we can submit via Sheepdog """ region = hospital_region["region"] region_description = hospital_region["region_description"] summary_location_submitter_id = format_submitter_id( "summary_location", { "project": "idph_hospital", "country": self.country, "state": self.state, "region": region, }, ) summary_location = { "country_region": self.country, "submitter_id": summary_location_submitter_id, "projects": [{ "code": self.project_code }], "province_state": self.state, "state_hospital_region": region, "state_region_description": strip_prefix(region_description), } summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", { "project": "idph_hospital", "date": date }, ) summary_clinical = { "submitter_id": summary_clinical_submitter_id, "date": date, "summary_locations": [{ "submitter_id": summary_location_submitter_id }], "region_icu_avail": hospital_region["ICUAvail"], "region_icu_capacity": hospital_region["ICUCapacity"], "region_vents_available": hospital_region["VentsAvailable"], "region_vents_capacity": hospital_region["VentsCapacity"], } return summary_location, summary_clinical
def get_location_and_clinical_submitter_id(self, county, date): summary_location_submitter_id = format_submitter_id( "summary_location", {"country": self.country, "state": self.state, "county": county} if county is not None else {"country": self.country, "state": self.state}, ) summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", {"date": date}, ) return summary_location_submitter_id, summary_clinical_submitter_id
def parse_historical_data(self, illinois_data): """ Parses historical state-level data. "summary_location" node is created from "characteristics_by_county" data. Args: illinois_data (dict): data JSON with "testDate", "total_tested", "confirmed_cases" and "deaths" Returns: dict: "summary_clinical" node for Sheepdog """ county = "Illinois" date = datetime.datetime.strptime(illinois_data["testDate"], "%m/%d/%Y").strftime("%Y-%m-%d") summary_location_submitter_id = format_submitter_id( "summary_location", { "country": self.country, "state": self.state, "county": county }, ) summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", {"date": date}, ) summary_clinical = { "submitter_id": summary_clinical_submitter_id, "date": date, "confirmed": illinois_data["confirmed_cases"], "testing": illinois_data["total_tested"], "deaths": illinois_data["deaths"], "summary_locations": [{ "submitter_id": summary_location_submitter_id }], } return summary_clinical
def parse_zipcode(self, date, zipcode_values): """ From county-level data, generate the data we can submit via Sheepdog """ zipcode = zipcode_values["zip"] summary_location_submitter_id = format_submitter_id( "summary_location", { "country": self.country, "state": self.state, "zipcode": zipcode }, ) summary_location = { "submitter_id": summary_location_submitter_id, "country_region": self.country, "province_state": self.state, "zipcode": zipcode, "projects": [{ "code": self.project_code }], } summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", {"date": date}, ) summary_clinical = { "submitter_id": summary_clinical_submitter_id, "date": date, "confirmed": zipcode_values["confirmed_cases"], "summary_locations": [{ "submitter_id": summary_location_submitter_id }], } if "demographics" in zipcode_values: demographic = zipcode_values["demographics"] for k, v in fields_mapping.items(): field, mapping = v demographic_group = demographic[k] for item in demographic_group: dst_field = mapping[item[field]] if dst_field: if "count" in item: age_group_count_field = "{}_{}".format( mapping[item[field]], "count") summary_clinical[age_group_count_field] = item[ "count"] if "tested" in item: age_group_tested_field = "{}_{}".format( mapping[item[field]], "tested") summary_clinical[age_group_tested_field] = item[ "tested"] return summary_location, summary_clinical
def parse_file(self, latest_submitted_date, url): """ Converts a JSON files to data we can submit via Sheepdog. Stores the records to submit in `self.summary_locations` and `self.summary_clinicals`. Args: latest_submitted_date (date): the date of latest available "summary_clinical" for project url (str): URL at which the JSON file is available """ print("Getting data from {}".format(url)) with closing(requests.get(url, stream=True)) as r: data = r.json() date = idph_get_date(data["LastUpdateDate"]) if latest_submitted_date and date == latest_submitted_date.strftime( "%Y-%m-%d"): print( "Nothing to submit: latest submitted date and date from data are the same." ) return if "LTC_Reported_Cases" in data: summary_location_submitter_id = format_submitter_id( "summary_location", { "country": self.country, "state": self.state }) summary_location = { "country_region": self.country, "submitter_id": summary_location_submitter_id, "projects": [{ "code": self.project_code }], "province_state": self.state, } summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", {"date": date}, ) summary_clinical = { "confirmed": data["LTC_Reported_Cases"]["confirmed_cases"], "deaths": data["LTC_Reported_Cases"]["deaths"], "submitter_id": summary_clinical_submitter_id, "lastUpdateEt": date, "date": date, "summary_locations": [{ "submitter_id": summary_location_submitter_id }], } self.summary_locations[ summary_location_submitter_id] = summary_location self.summary_clinicals[ summary_clinical_submitter_id] = summary_clinical for facility in data["FacilityValues"]: (summary_location, summary_clinical) = self.parse_facility(date, facility) summary_location_submitter_id = summary_location[ "submitter_id"] summary_clinical_submitter_id = summary_clinical[ "submitter_id"] self.summary_locations[ summary_location_submitter_id] = summary_location if summary_clinical_submitter_id in self.summary_clinicals: existed = self.summary_clinicals[ summary_clinical_submitter_id] summary_clinical["confirmed"] = max( summary_clinical["confirmed"], existed["confirmed"]) summary_clinical["deaths"] = max( summary_clinical["deaths"], existed["deaths"]) self.summary_clinicals[ summary_clinical_submitter_id] = summary_clinical
def parse_statewide_values(self, date, statewide_values): statewide_mapping = { "ICUCapacity": "state_icu_capacity", "ICUCovidPatients": "state_icu_covid_patients", "VentCapacity": "state_vent_capacity", "VentCovidPatients": "state_vent_covid_patients", "ICUAvailable": "state_icu_available", "VentsAvailable": "state_vents_available", "TotalBeds": "state_total_beds", "TotalBedsAvailable": "state_total_beds_available", "TotalBedsUsed": "state_total_beds_used", "PctHospitalBedsAvailable": "state_pct_hospital_beds_available", "AdultICUCapacity": "state_adult_icu_capacity", "ICUOpenBeds": "state_icu_open_beds", "ICUBedsUsed": "state_icu_beds_used", "ICUOpenBedsPct": "state_icu_open_beds_pct", "COVIDPUIPatients": "state_covid_pui_patients", "COVIDPUIPatientsPct": "state_covid_pui_patients_pct", "COVIDPUIPatientsBedsInUsePct": "state_covid_pui_patients_beds_in_use_pct", "VentilatorCapacity": "state_ventilator_capacity", "VentilatorsOpen": "state_ventilators_open", "VentilatorsOpenPct": "state_Ventilators_open_pct", "VentilatorsInUse": "state_ventilators_in_use", "VentilatorsInUseCOVID": "state_ventilators_in_use_covid", "VentilatorsCOVIDPatientsPct": "state_ventilators_covid_patients_pct", "VentilatorsCOVIDPatientsInUsePct": "state_ventilators_covid_patients_in_use_pct", "CovidPatientsNonICU": "state_covid_patients_non_icu", "TotalCOVIDPUIInICU": "state_total_covid_pui_in_icu", "TotalCOVIDPUIInHospital": "state_total_covid_pui_in_hospital", "PctBedsCOVIDPUI": "state_pct_beds_covid_pui", "MedSurgBeds": "state_med_surg_beds", "MedSurgBedsOpen": "state_med_surg_beds_open", "MedSurgBedsOpenPct": "state_med_surg_beds_open_pct", "MedSurgBedsInUse": "state_med_surg_beds_in_use", } summary_location_submitter_id = format_submitter_id( "summary_location", { "project": "idph_hospital", "country": self.country, "state": self.state }, ) summary_location = { "submitter_id": summary_location_submitter_id, "projects": [{ "code": self.project_code }], "country_region": self.country, "province_state": self.state, } summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", { "project": "idph_hospital", "date": date }, ) summary_clinical = { "submitter_id": summary_clinical_submitter_id, "date": date, "summary_locations": [{ "submitter_id": summary_location_submitter_id }], } for k, v in statewide_values.items(): summary_clinical[statewide_mapping[k]] = v return summary_location, summary_clinical
def parse_row(self, headers, row): cmc_submitter_id = format_submitter_id("cmc_coxray", {}) subject_submitter_id = format_submitter_id( "subject_coxray", {"patientid": row[headers.index("patientid")]}) observation_submitter_id = derived_submitter_id( subject_submitter_id, "subject_coxray", "observation_coxray", {}) follow_up_submitter_id = derived_submitter_id( subject_submitter_id, "subject_coxray", "follow_up_coxray", {"offset": row[headers.index("offset")]}, ) demographic_submitter_id = derived_submitter_id( subject_submitter_id, "subject_coxray", "demographic_coxray", {}) imaging_file_submitter_id = format_submitter_id( "imaging_file_coxray", {"filename": row[headers.index("filename")]}) study_submitter_id = format_submitter_id( "study_coxray", {"doi": row[headers.index("doi")]}) filename = row[headers.index("filename")] filename = Path(filename) filepath = Path(COXRAY_DATA_PATH).joinpath("images", filename) filepath_exist = filepath.exists() nodes = { "core_metadata_collection": { "submitter_id": cmc_submitter_id, "projects": [{ "code": self.project_code }], }, "study": { "submitter_id": study_submitter_id, "projects": [{ "code": self.project_code }], }, "subject": { "submitter_id": subject_submitter_id, "projects": [{ "code": self.project_code }], "studies": [{ "submitter_id": study_submitter_id }], }, "observation": { "submitter_id": observation_submitter_id, "subjects": [{ "submitter_id": subject_submitter_id }], }, "follow_up": { "submitter_id": follow_up_submitter_id, "subjects": [{ "submitter_id": subject_submitter_id }], }, "demographic": { "submitter_id": demographic_submitter_id, "subjects": [{ "submitter_id": subject_submitter_id }], }, } if filepath_exist: data_type = "".join(filename.suffixes) did, rev, md5sum, filesize = self.file_helper.find_by_name( filename=filename) assert ( did ), f"file {filename} does not exist in the index, rerun COXRAY_FILE ETL" self.file_helper.update_authz(did=did, rev=rev) nodes["imaging_file"] = { "submitter_id": imaging_file_submitter_id, "subjects": [{ "submitter_id": subject_submitter_id }], "follow_ups": [{ "submitter_id": follow_up_submitter_id }], "core_metadata_collections": [{ "submitter_id": cmc_submitter_id }], "data_type": data_type, "data_format": "Image File", "data_category": "X-Ray Image", "file_size": filesize, "md5sum": md5sum, "object_id": did, } else: print( f"subject references the file that doesn't exist as a file: {filepath}" ) for k, (node, field, converter) in fields_mapping.items(): value = row[headers.index(k)] if node in nodes and value: if converter: nodes[node][field] = converter(value) else: nodes[node][field] = value return nodes
def parse_input(self, row_data, date_mode=None): # (original property, (gen3 node, gen3 property, property type)) mapping = [ ("reportingOrg", ("summary_location", "reporting_org", str)), ("reportDate", ("statistical_summary_report", "report_date", str)), ("num_COVID", ("statistical_summary_report", "num_COVID", int)), ( "num_COVID_deaths", ("statistical_summary_report", "num_COVID_deaths", int), ), ("num_outpatient", ("statistical_summary_report", "num_outpatient", int)), ("num_admitted", ("statistical_summary_report", "num_admitted", int)), ("num_icu", ("statistical_summary_report", "num_icu", int)), ("num_vent", ("statistical_summary_report", "num_vent", int)), ("num_resp", ("statistical_summary_report", "num_resp", int)), ("num_pneu", ("statistical_summary_report", "num_pneu", int)), ("num_diab", ("statistical_summary_report", "num_diab", int)), ("num_asth", ("statistical_summary_report", "num_asth", int)), ("num_obes", ("statistical_summary_report", "num_obes", int)), ("num_card", ("statistical_summary_report", "num_card", int)), ("num_chf", ("statistical_summary_report", "num_chf", int)), ] # row_records = { <node ID>: { <record data> } } # (there is only 1 record of each node type per row) row_records = defaultdict(dict) for orig_prop_name, (node_type, prop_name, _type) in mapping: if row_data[orig_prop_name]: row_records[node_type][prop_name] = format_value( prop_name, row_data[orig_prop_name], _type, date_mode) # add missing summary_location props summary_location_submitter_id = format_submitter_id( "summary_location", { "reporting_org": row_records["summary_location"]["reporting_org"] }, ) row_records["summary_location"].update({ "type": "summary_location", "submitter_id": summary_location_submitter_id, "projects": { "code": self.project_code }, "country_region": self.country, "province_state": self.state, }) # add missing statistical_summary_report props ssr_submitter_id = derived_submitter_id( summary_location_submitter_id, "statistical_summary_report", "ssr", { "report_date": row_records["statistical_summary_report"]["report_date"] }, ) row_records["statistical_summary_report"].update({ "type": "statistical_summary_report", "submitter_id": ssr_submitter_id, "summary_locations": { "submitter_id": summary_location_submitter_id }, }) for node_type in row_records: rec = row_records[node_type] self.records[node_type][rec["submitter_id"]] = rec
def parse_county(self, date, county_json, demographic): """ From county-level data, generate the data we can submit via Sheepdog Args: date (date): date county_json (dict): JSON for county statistics Returns: (dict, dict): "summary_location" and "summary_clinical" records """ county = county_json["County"] summary_location_submitter_id = format_submitter_id( "summary_location", { "country": self.country, "state": self.state, "county": county }, ) summary_location = { "submitter_id": summary_location_submitter_id, "country_region": self.country, "province_state": self.state, "projects": [{ "code": self.project_code }], } # the IDPH data use Illinois in "County" field for aggregated data # in Gen3 it would equal to location with "province_state" equal to "IL" and no "County" field if county != "Illinois": summary_location["county"] = county if county in self.county_dict: summary_location["latitude"] = self.county_dict[county]["lat"] summary_location["longitude"] = self.county_dict[county]["lon"] else: if county_json["lat"] != 0: summary_location["latitude"] = str(county_json["lat"]) if county_json["lon"] != 0: summary_location["longitude"] = str(county_json["lon"]) summary_clinical_submitter_id = derived_submitter_id( summary_location_submitter_id, "summary_location", "summary_clinical", {"date": date}, ) summary_clinical = { "submitter_id": summary_clinical_submitter_id, "date": date, "confirmed": county_json["confirmed_cases"], "testing": county_json["total_tested"], "deaths": county_json["deaths"], "summary_locations": [{ "submitter_id": summary_location_submitter_id }], } if "negative" in county_json: summary_clinical["negative"] = county_json["negative"] if county == "Illinois" and demographic: for k, v in fields_mapping.items(): field, mapping = v demographic_group = demographic[k] for item in demographic_group: dst_field = mapping[item[field]] if dst_field: if "count" in item: age_group_count_field = "{}_{}".format( mapping[item[field]], "count") summary_clinical[age_group_count_field] = item[ "count"] if "tested" in item: age_group_tested_field = "{}_{}".format( mapping[item[field]], "tested") summary_clinical[age_group_tested_field] = item[ "tested"] return summary_location, summary_clinical