예제 #1
0
    def parse_nbhd(self, nbhd_object, date):
        properties = nbhd_object["properties"]
        nbhd = properties["community"]
        deaths = properties["value"]
        population = properties["population"]

        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {"country": self.country, "state": self.state, "nbhd": nbhd},
        )

        summary_location = {
            "submitter_id": summary_location_submitter_id,
            "community_area": nbhd,
            "projects": [{"code": self.project_code}],
        }

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "summary_location",
            "summary_clinical",
            {"date": date},
        )

        summary_clinical = {
            "submitter_id": summary_clinical_submitter_id,
            "date": date,
            "deaths_per_10000": round(10000 * deaths / population, 2),
            "deaths": deaths,
            "summary_locations": [{"submitter_id": summary_location_submitter_id}],
        }

        return summary_location, summary_clinical
예제 #2
0
    def parse_row(self, row):
        fields_mapping = {
            "NPI": ("summary_location", "npi"),
            "Provider_First_Line_Business_Pra": (
                "summary_location",
                "first_line_address",
            ),
            "Provider_Second_Line_Business_Pr": (
                "summary_location",
                "second_line_address",
            ),
            "Provider_Business_Practice_City": ("summary_location", "city"),
            "Provider_Business_Practice_ST":
            ("summary_location", "province_state"),
            "TaxonomyCode": ("summary_clinical", "taxonomy_code"),
            "ProviderType": ("summary_clinical", "provider_type"),
            "ProviderSubtype": ("summary_clinical", "provider_subtype"),
            "DetailedSpecialty": ("summary_clinical", "detailed_specialty"),
        }

        npi = row["NPI"]
        state = row["Provider_Business_Practice_ST"]

        summary_location_submitter_id = format_submitter_id(
            "summary_location", {
                "country": self.country,
                "state": state,
                "npi": npi
            })

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id, "summary_location",
            "summary_clinical", {})

        result = {
            "summary_location": {
                "submitter_id": summary_location_submitter_id,
                "projects": [{
                    "code": self.project_code
                }],
            },
            "summary_clinical": {
                "submitter_id":
                summary_clinical_submitter_id,
                "summary_locations": [{
                    "submitter_id":
                    summary_location_submitter_id
                }],
            },
        }

        for original_field, mappings in fields_mapping.items():
            node, node_field = mappings
            if node_field == "npi":
                result[node][node_field] = str(row[original_field])
            else:
                result[node][node_field] = row[original_field]

        return result["summary_location"], result["summary_clinical"]
예제 #3
0
 def get_group_clinical_demographic_submitter_id(
         self, summary_clinical_submitter_id, key_dict):
     summary_group_demographic_submitter_id = derived_submitter_id(
         summary_clinical_submitter_id,
         "summary_clinical",
         "summary_group_demographic",
         key_dict,
     )
     return summary_group_demographic_submitter_id
예제 #4
0
    def parse_historical(self, utilization,
                         summary_clinical_statewide_current):
        utilization_mapping = {
            "reportDate": "date",
            "TotalBeds": "state_total_beds",
            "TotalOpenBeds": "total_open_beds",
            "TotalInUseBedsNonCOVID": "total_in_use_beds_non_covid",
            "TotalInUseBedsCOVID": "total_in_use_beds_covid",
            "ICUBeds": "icu_beds",
            "ICUOpenBeds": "icu_open_beds",
            "ICUInUseBedsNonCOVID": "icu_in_use_beds_non_covid",
            "ICUInUseBedsCOVID": "icu_in_use_beds_covid",
            "VentilatorCapacity": "ventilator_capacity",
            "VentilatorAvailable": "ventilator_available",
            "VentilatorInUseNonCOVID": "ventilator_in_use_non_covid",
            "VentilatorInUseCOVID": "ventilator_in_use_covid",
        }
        date = utilization["reportDate"]

        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {
                "project": "idph_hospital",
                "country": self.country,
                "state": self.state
            },
        )

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "summary_location",
            "summary_clinical",
            {
                "project": "idph_hospital",
                "date": date
            },
        )

        summary_clinical = {
            "submitter_id": summary_clinical_submitter_id,
            "date": date,
            "summary_locations": [{
                "submitter_id": summary_location_submitter_id
            }],
        }

        for k, v in utilization.items():
            summary_clinical[utilization_mapping[k]] = v

        if (summary_clinical_submitter_id ==
                summary_clinical_statewide_current["submitter_id"]):
            summary_clinical.update(summary_clinical_statewide_current)

        return summary_clinical
예제 #5
0
    def parse_facility(self, date, facility):
        """
        From county-level data, generate the data we can submit via Sheepdog
        """
        county = facility["County"]
        facility_name = facility["FacilityName"]
        confirmed_cases = facility["confirmed_cases"]
        deaths = facility["deaths"]
        status = facility.get("status", None)

        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {
                "country": self.country,
                "state": self.state,
                "facility_name": facility_name,
                "reporting_org_status": status,
            },
        )

        summary_location = {
            "country_region": self.country,
            "submitter_id": summary_location_submitter_id,
            "projects": [{
                "code": self.project_code
            }],
            "province_state": self.state,
            "county": county,
            "reporting_org": facility_name,
            "reporting_org_status": status,
        }

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "summary_location",
            "summary_clinical",
            {"date": date},
        )

        summary_clinical = {
            "confirmed": confirmed_cases,
            "deaths": deaths,
            "submitter_id": summary_clinical_submitter_id,
            "lastUpdateEt": date,
            "date": date,
            "summary_locations": [{
                "submitter_id": summary_location_submitter_id
            }],
        }

        return summary_location, summary_clinical
예제 #6
0
    def parse_region(self, date, hospital_region):
        """
        From county-level data, generate the data we can submit via Sheepdog
        """
        region = hospital_region["region"]
        region_description = hospital_region["region_description"]

        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {
                "project": "idph_hospital",
                "country": self.country,
                "state": self.state,
                "region": region,
            },
        )

        summary_location = {
            "country_region": self.country,
            "submitter_id": summary_location_submitter_id,
            "projects": [{
                "code": self.project_code
            }],
            "province_state": self.state,
            "state_hospital_region": region,
            "state_region_description": strip_prefix(region_description),
        }

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "summary_location",
            "summary_clinical",
            {
                "project": "idph_hospital",
                "date": date
            },
        )

        summary_clinical = {
            "submitter_id": summary_clinical_submitter_id,
            "date": date,
            "summary_locations": [{
                "submitter_id": summary_location_submitter_id
            }],
            "region_icu_avail": hospital_region["ICUAvail"],
            "region_icu_capacity": hospital_region["ICUCapacity"],
            "region_vents_available": hospital_region["VentsAvailable"],
            "region_vents_capacity": hospital_region["VentsCapacity"],
        }

        return summary_location, summary_clinical
예제 #7
0
 def get_location_and_clinical_submitter_id(self, county, date):
     summary_location_submitter_id = format_submitter_id(
         "summary_location",
         {"country": self.country, "state": self.state, "county": county}
         if county is not None
         else {"country": self.country, "state": self.state},
     )
     summary_clinical_submitter_id = derived_submitter_id(
         summary_location_submitter_id,
         "summary_location",
         "summary_clinical",
         {"date": date},
     )
     return summary_location_submitter_id, summary_clinical_submitter_id
예제 #8
0
    def parse_historical_data(self, illinois_data):
        """
        Parses historical state-level data. "summary_location" node is created
        from "characteristics_by_county" data.

        Args:
            illinois_data (dict): data JSON with "testDate", "total_tested",
                "confirmed_cases" and "deaths"

        Returns:
            dict: "summary_clinical" node for Sheepdog
        """
        county = "Illinois"

        date = datetime.datetime.strptime(illinois_data["testDate"],
                                          "%m/%d/%Y").strftime("%Y-%m-%d")

        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {
                "country": self.country,
                "state": self.state,
                "county": county
            },
        )

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "summary_location",
            "summary_clinical",
            {"date": date},
        )

        summary_clinical = {
            "submitter_id": summary_clinical_submitter_id,
            "date": date,
            "confirmed": illinois_data["confirmed_cases"],
            "testing": illinois_data["total_tested"],
            "deaths": illinois_data["deaths"],
            "summary_locations": [{
                "submitter_id": summary_location_submitter_id
            }],
        }

        return summary_clinical
예제 #9
0
    def parse_zipcode(self, date, zipcode_values):
        """
        From county-level data, generate the data we can submit via Sheepdog
        """
        zipcode = zipcode_values["zip"]

        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {
                "country": self.country,
                "state": self.state,
                "zipcode": zipcode
            },
        )

        summary_location = {
            "submitter_id": summary_location_submitter_id,
            "country_region": self.country,
            "province_state": self.state,
            "zipcode": zipcode,
            "projects": [{
                "code": self.project_code
            }],
        }

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "summary_location",
            "summary_clinical",
            {"date": date},
        )
        summary_clinical = {
            "submitter_id": summary_clinical_submitter_id,
            "date": date,
            "confirmed": zipcode_values["confirmed_cases"],
            "summary_locations": [{
                "submitter_id": summary_location_submitter_id
            }],
        }

        if "demographics" in zipcode_values:
            demographic = zipcode_values["demographics"]

            for k, v in fields_mapping.items():
                field, mapping = v
                demographic_group = demographic[k]

                for item in demographic_group:
                    dst_field = mapping[item[field]]
                    if dst_field:
                        if "count" in item:
                            age_group_count_field = "{}_{}".format(
                                mapping[item[field]], "count")
                            summary_clinical[age_group_count_field] = item[
                                "count"]
                        if "tested" in item:
                            age_group_tested_field = "{}_{}".format(
                                mapping[item[field]], "tested")
                            summary_clinical[age_group_tested_field] = item[
                                "tested"]

        return summary_location, summary_clinical
예제 #10
0
    def parse_file(self, latest_submitted_date, url):
        """
        Converts a JSON files to data we can submit via Sheepdog. Stores the
        records to submit in `self.summary_locations` and `self.summary_clinicals`.

        Args:
            latest_submitted_date (date): the date of latest available "summary_clinical" for project
            url (str): URL at which the JSON file is available
        """
        print("Getting data from {}".format(url))
        with closing(requests.get(url, stream=True)) as r:
            data = r.json()
            date = idph_get_date(data["LastUpdateDate"])

            if latest_submitted_date and date == latest_submitted_date.strftime(
                    "%Y-%m-%d"):
                print(
                    "Nothing to submit: latest submitted date and date from data are the same."
                )
                return

            if "LTC_Reported_Cases" in data:
                summary_location_submitter_id = format_submitter_id(
                    "summary_location", {
                        "country": self.country,
                        "state": self.state
                    })

                summary_location = {
                    "country_region": self.country,
                    "submitter_id": summary_location_submitter_id,
                    "projects": [{
                        "code": self.project_code
                    }],
                    "province_state": self.state,
                }

                summary_clinical_submitter_id = derived_submitter_id(
                    summary_location_submitter_id,
                    "summary_location",
                    "summary_clinical",
                    {"date": date},
                )
                summary_clinical = {
                    "confirmed":
                    data["LTC_Reported_Cases"]["confirmed_cases"],
                    "deaths":
                    data["LTC_Reported_Cases"]["deaths"],
                    "submitter_id":
                    summary_clinical_submitter_id,
                    "lastUpdateEt":
                    date,
                    "date":
                    date,
                    "summary_locations": [{
                        "submitter_id":
                        summary_location_submitter_id
                    }],
                }
                self.summary_locations[
                    summary_location_submitter_id] = summary_location
                self.summary_clinicals[
                    summary_clinical_submitter_id] = summary_clinical

            for facility in data["FacilityValues"]:
                (summary_location,
                 summary_clinical) = self.parse_facility(date, facility)
                summary_location_submitter_id = summary_location[
                    "submitter_id"]
                summary_clinical_submitter_id = summary_clinical[
                    "submitter_id"]

                self.summary_locations[
                    summary_location_submitter_id] = summary_location

                if summary_clinical_submitter_id in self.summary_clinicals:
                    existed = self.summary_clinicals[
                        summary_clinical_submitter_id]
                    summary_clinical["confirmed"] = max(
                        summary_clinical["confirmed"], existed["confirmed"])
                    summary_clinical["deaths"] = max(
                        summary_clinical["deaths"], existed["deaths"])

                self.summary_clinicals[
                    summary_clinical_submitter_id] = summary_clinical
예제 #11
0
    def parse_statewide_values(self, date, statewide_values):
        statewide_mapping = {
            "ICUCapacity": "state_icu_capacity",
            "ICUCovidPatients": "state_icu_covid_patients",
            "VentCapacity": "state_vent_capacity",
            "VentCovidPatients": "state_vent_covid_patients",
            "ICUAvailable": "state_icu_available",
            "VentsAvailable": "state_vents_available",
            "TotalBeds": "state_total_beds",
            "TotalBedsAvailable": "state_total_beds_available",
            "TotalBedsUsed": "state_total_beds_used",
            "PctHospitalBedsAvailable": "state_pct_hospital_beds_available",
            "AdultICUCapacity": "state_adult_icu_capacity",
            "ICUOpenBeds": "state_icu_open_beds",
            "ICUBedsUsed": "state_icu_beds_used",
            "ICUOpenBedsPct": "state_icu_open_beds_pct",
            "COVIDPUIPatients": "state_covid_pui_patients",
            "COVIDPUIPatientsPct": "state_covid_pui_patients_pct",
            "COVIDPUIPatientsBedsInUsePct":
            "state_covid_pui_patients_beds_in_use_pct",
            "VentilatorCapacity": "state_ventilator_capacity",
            "VentilatorsOpen": "state_ventilators_open",
            "VentilatorsOpenPct": "state_Ventilators_open_pct",
            "VentilatorsInUse": "state_ventilators_in_use",
            "VentilatorsInUseCOVID": "state_ventilators_in_use_covid",
            "VentilatorsCOVIDPatientsPct":
            "state_ventilators_covid_patients_pct",
            "VentilatorsCOVIDPatientsInUsePct":
            "state_ventilators_covid_patients_in_use_pct",
            "CovidPatientsNonICU": "state_covid_patients_non_icu",
            "TotalCOVIDPUIInICU": "state_total_covid_pui_in_icu",
            "TotalCOVIDPUIInHospital": "state_total_covid_pui_in_hospital",
            "PctBedsCOVIDPUI": "state_pct_beds_covid_pui",
            "MedSurgBeds": "state_med_surg_beds",
            "MedSurgBedsOpen": "state_med_surg_beds_open",
            "MedSurgBedsOpenPct": "state_med_surg_beds_open_pct",
            "MedSurgBedsInUse": "state_med_surg_beds_in_use",
        }

        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {
                "project": "idph_hospital",
                "country": self.country,
                "state": self.state
            },
        )

        summary_location = {
            "submitter_id": summary_location_submitter_id,
            "projects": [{
                "code": self.project_code
            }],
            "country_region": self.country,
            "province_state": self.state,
        }

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "summary_location",
            "summary_clinical",
            {
                "project": "idph_hospital",
                "date": date
            },
        )

        summary_clinical = {
            "submitter_id": summary_clinical_submitter_id,
            "date": date,
            "summary_locations": [{
                "submitter_id": summary_location_submitter_id
            }],
        }

        for k, v in statewide_values.items():
            summary_clinical[statewide_mapping[k]] = v

        return summary_location, summary_clinical
예제 #12
0
    def parse_row(self, headers, row):
        cmc_submitter_id = format_submitter_id("cmc_coxray", {})
        subject_submitter_id = format_submitter_id(
            "subject_coxray", {"patientid": row[headers.index("patientid")]})
        observation_submitter_id = derived_submitter_id(
            subject_submitter_id, "subject_coxray", "observation_coxray", {})
        follow_up_submitter_id = derived_submitter_id(
            subject_submitter_id,
            "subject_coxray",
            "follow_up_coxray",
            {"offset": row[headers.index("offset")]},
        )
        demographic_submitter_id = derived_submitter_id(
            subject_submitter_id, "subject_coxray", "demographic_coxray", {})
        imaging_file_submitter_id = format_submitter_id(
            "imaging_file_coxray",
            {"filename": row[headers.index("filename")]})
        study_submitter_id = format_submitter_id(
            "study_coxray", {"doi": row[headers.index("doi")]})

        filename = row[headers.index("filename")]
        filename = Path(filename)
        filepath = Path(COXRAY_DATA_PATH).joinpath("images", filename)
        filepath_exist = filepath.exists()

        nodes = {
            "core_metadata_collection": {
                "submitter_id": cmc_submitter_id,
                "projects": [{
                    "code": self.project_code
                }],
            },
            "study": {
                "submitter_id": study_submitter_id,
                "projects": [{
                    "code": self.project_code
                }],
            },
            "subject": {
                "submitter_id": subject_submitter_id,
                "projects": [{
                    "code": self.project_code
                }],
                "studies": [{
                    "submitter_id": study_submitter_id
                }],
            },
            "observation": {
                "submitter_id": observation_submitter_id,
                "subjects": [{
                    "submitter_id": subject_submitter_id
                }],
            },
            "follow_up": {
                "submitter_id": follow_up_submitter_id,
                "subjects": [{
                    "submitter_id": subject_submitter_id
                }],
            },
            "demographic": {
                "submitter_id": demographic_submitter_id,
                "subjects": [{
                    "submitter_id": subject_submitter_id
                }],
            },
        }

        if filepath_exist:
            data_type = "".join(filename.suffixes)
            did, rev, md5sum, filesize = self.file_helper.find_by_name(
                filename=filename)
            assert (
                did
            ), f"file {filename} does not exist in the index, rerun COXRAY_FILE ETL"
            self.file_helper.update_authz(did=did, rev=rev)

            nodes["imaging_file"] = {
                "submitter_id": imaging_file_submitter_id,
                "subjects": [{
                    "submitter_id": subject_submitter_id
                }],
                "follow_ups": [{
                    "submitter_id": follow_up_submitter_id
                }],
                "core_metadata_collections": [{
                    "submitter_id": cmc_submitter_id
                }],
                "data_type": data_type,
                "data_format": "Image File",
                "data_category": "X-Ray Image",
                "file_size": filesize,
                "md5sum": md5sum,
                "object_id": did,
            }
        else:
            print(
                f"subject references the file that doesn't exist as a file: {filepath}"
            )

        for k, (node, field, converter) in fields_mapping.items():
            value = row[headers.index(k)]
            if node in nodes and value:
                if converter:
                    nodes[node][field] = converter(value)
                else:
                    nodes[node][field] = value

        return nodes
예제 #13
0
    def parse_input(self, row_data, date_mode=None):
        # (original property, (gen3 node, gen3 property, property type))
        mapping = [
            ("reportingOrg", ("summary_location", "reporting_org", str)),
            ("reportDate", ("statistical_summary_report", "report_date", str)),
            ("num_COVID", ("statistical_summary_report", "num_COVID", int)),
            (
                "num_COVID_deaths",
                ("statistical_summary_report", "num_COVID_deaths", int),
            ),
            ("num_outpatient", ("statistical_summary_report", "num_outpatient",
                                int)),
            ("num_admitted", ("statistical_summary_report", "num_admitted",
                              int)),
            ("num_icu", ("statistical_summary_report", "num_icu", int)),
            ("num_vent", ("statistical_summary_report", "num_vent", int)),
            ("num_resp", ("statistical_summary_report", "num_resp", int)),
            ("num_pneu", ("statistical_summary_report", "num_pneu", int)),
            ("num_diab", ("statistical_summary_report", "num_diab", int)),
            ("num_asth", ("statistical_summary_report", "num_asth", int)),
            ("num_obes", ("statistical_summary_report", "num_obes", int)),
            ("num_card", ("statistical_summary_report", "num_card", int)),
            ("num_chf", ("statistical_summary_report", "num_chf", int)),
        ]

        # row_records = { <node ID>: { <record data> } }
        # (there is only 1 record of each node type per row)
        row_records = defaultdict(dict)

        for orig_prop_name, (node_type, prop_name, _type) in mapping:
            if row_data[orig_prop_name]:
                row_records[node_type][prop_name] = format_value(
                    prop_name, row_data[orig_prop_name], _type, date_mode)

        # add missing summary_location props
        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {
                "reporting_org":
                row_records["summary_location"]["reporting_org"]
            },
        )
        row_records["summary_location"].update({
            "type": "summary_location",
            "submitter_id": summary_location_submitter_id,
            "projects": {
                "code": self.project_code
            },
            "country_region": self.country,
            "province_state": self.state,
        })

        # add missing statistical_summary_report props
        ssr_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "statistical_summary_report",
            "ssr",
            {
                "report_date":
                row_records["statistical_summary_report"]["report_date"]
            },
        )
        row_records["statistical_summary_report"].update({
            "type": "statistical_summary_report",
            "submitter_id": ssr_submitter_id,
            "summary_locations": {
                "submitter_id": summary_location_submitter_id
            },
        })

        for node_type in row_records:
            rec = row_records[node_type]
            self.records[node_type][rec["submitter_id"]] = rec
예제 #14
0
    def parse_county(self, date, county_json, demographic):
        """
        From county-level data, generate the data we can submit via Sheepdog

        Args:
            date (date): date
            county_json (dict): JSON for county statistics

        Returns:
            (dict, dict): "summary_location" and "summary_clinical" records
        """
        county = county_json["County"]

        summary_location_submitter_id = format_submitter_id(
            "summary_location",
            {
                "country": self.country,
                "state": self.state,
                "county": county
            },
        )

        summary_location = {
            "submitter_id": summary_location_submitter_id,
            "country_region": self.country,
            "province_state": self.state,
            "projects": [{
                "code": self.project_code
            }],
        }

        # the IDPH data use Illinois in "County" field for aggregated data
        # in Gen3 it would equal to location with "province_state" equal to "IL" and no "County" field
        if county != "Illinois":
            summary_location["county"] = county

        if county in self.county_dict:
            summary_location["latitude"] = self.county_dict[county]["lat"]
            summary_location["longitude"] = self.county_dict[county]["lon"]
        else:
            if county_json["lat"] != 0:
                summary_location["latitude"] = str(county_json["lat"])
            if county_json["lon"] != 0:
                summary_location["longitude"] = str(county_json["lon"])

        summary_clinical_submitter_id = derived_submitter_id(
            summary_location_submitter_id,
            "summary_location",
            "summary_clinical",
            {"date": date},
        )

        summary_clinical = {
            "submitter_id": summary_clinical_submitter_id,
            "date": date,
            "confirmed": county_json["confirmed_cases"],
            "testing": county_json["total_tested"],
            "deaths": county_json["deaths"],
            "summary_locations": [{
                "submitter_id": summary_location_submitter_id
            }],
        }

        if "negative" in county_json:
            summary_clinical["negative"] = county_json["negative"]

        if county == "Illinois" and demographic:
            for k, v in fields_mapping.items():
                field, mapping = v
                demographic_group = demographic[k]

                for item in demographic_group:
                    dst_field = mapping[item[field]]
                    if dst_field:
                        if "count" in item:
                            age_group_count_field = "{}_{}".format(
                                mapping[item[field]], "count")
                            summary_clinical[age_group_count_field] = item[
                                "count"]
                        if "tested" in item:
                            age_group_tested_field = "{}_{}".format(
                                mapping[item[field]], "tested")
                            summary_clinical[age_group_tested_field] = item[
                                "tested"]

        return summary_location, summary_clinical