Exemplos de DataPointMerger.append em Python, exemplos de covid_db.datatypes.DatapointMerger.DataPointMerger.append em Python

Exemplo n.º 1

0

Exibir arquivo

    def __get_tests_datapoints(self, SOURCE_URL, path_tests):
        r = DataPointMerger()

        with open(path_tests, 'r', encoding='utf-8') as f:
            for item in json.loads(f.read())['data']:
                try:
                    item['POA_NAME16'] = str(int(float(item['POA_NAME16'])))
                except ValueError:
                    pass

                date = self.__get_partial_date(path_tests, item['Date'])
                number = int(item['Number'])
                # recent = item['Recent'] # TODO: ADD ME!!! ========================================================
                postcode = item['POA_NAME16'] if item['POA_NAME16'] else 'Unknown'

                r.append(DataPoint(
                    region_schema=Schemas.POSTCODE,
                    region_parent='AU-NSW',
                    region_child=postcode,
                    datatype=DataTypes.TESTS_TOTAL,
                    value=number,
                    date_updated=date,
                    source_url=SOURCE_URL,
                    source_id=self.SOURCE_ID
                ))

        return r

Exemplo n.º 2

0

Exibir arquivo

    def __postcode_datapoints_to_lga(self, SOURCE_URL, postcode_to_lga, r, source_id):
        # Convert postcode to LGA where possible
        new_r = DataPointMerger()
        added_to_lga = set()
        processed_postcode = set()
        mapping = Counter()

        for datapoint in sorted(r, key=lambda i: i.date_updated):
            if datapoint.region_schema == Schemas.LGA:
                added_to_lga.add((
                    datapoint.region_child,
                    datapoint.datatype
                ))
                continue
            elif datapoint.region_schema != Schemas.POSTCODE:
                continue
            elif datapoint.region_child in postcode_to_lga:
                lga = postcode_to_lga[datapoint.region_child]
            else:
                lga = 'unknown'
                if datapoint.region_child != 'unknown':
                    print("NOT FOUND:", datapoint.region_child)
                # continue  # WARNINIG!!! ================================================================================

            if (datapoint.region_child, datapoint.datatype, datapoint.date_updated) in processed_postcode:
                #print("IGNORING DOUBLE-UP:", datapoint)
                continue
            processed_postcode.add((datapoint.region_child, datapoint.datatype, datapoint.date_updated))

            #if lga == 'cumberland':
            #    print('USING:', datapoint)

            mapping[
                lga,
                datapoint.datatype,
                datapoint.date_updated
            ] += datapoint.value

        new_r.extend(r)

        for (lga, datatype, date_updated), value in mapping.items():
            if (lga, datatype) in added_to_lga:
                # Don't add to LGA if available using direct data!
                continue

            new_r.append(DataPoint(
                region_schema=Schemas.LGA,
                region_parent='AU-NSW',
                region_child=lga,
                datatype=datatype,
                value=value,
                date_updated=date_updated,
                source_url=SOURCE_URL,
                source_id=source_id
            ))

        return new_r

Exemplo n.º 3

0

Exibir arquivo

    def get_datapoints(self):
        date = (
            datetime.now() - timedelta(hours=20, minutes=30)
        ).strftime('%Y_%m_%d')

        dates = sorted(listdir(get_data_dir() / 'nsw' / 'open_data'))
        if not date in dates:
            dates.append(date)

        website_data = DataPointMerger()
        for i_date in dates:
            download = i_date == date
            for datapoint in self.__get_website_datapoints(i_date, download=download):
                website_data.append(datapoint)

        r = []
        r.extend(website_data)
        return r

Exemplo n.º 4

0

Exibir arquivo

Arquivo: NSWJSONOpenData.py Projeto: mcyph/world_subnational_covid_crawler

    def get_datapoints(self):
        date = datetime.now() - timedelta(hours=20, minutes=30)
        date = date.strftime('%Y_%m_%d')

        dates = sorted(listdir(get_data_dir() / 'nsw' / 'open_data'))
        if not date in dates:
            dates.append(date)

        open_data = DataPointMerger()  # source_id=self.SOURCE_ID

        for i_date in dates:  # open_data.iter_unprocessed_dates(dates)
            download = i_date == date
            for datapoint in self.__get_open_datapoints(i_date,
                                                        download=download):
                open_data.append(datapoint)

        #open_data.save_state()

        r = []
        r.extend(open_data)
        return r

Exemplo n.º 5

0

Exibir arquivo

Arquivo: QLDNews.py Projeto: mcyph/world_subnational_covid_crawler

    def get_datapoints(self):
        r = DataPointMerger()
        ua = URLArchiver(f'{self.STATE_NAME}/current_statistics')
        ua.get_url_data(self.STATS_BY_REGION_URL_2,
                        cache=False if ALWAYS_DOWNLOAD_LISTING else True)

        for period in ua.iter_periods():
            for subperiod_id, subdir in ua.iter_paths_for_period(period):
                path = ua.get_path(subdir)

                with open(path, 'r', encoding='utf-8', errors='ignore') as f:
                    html = f.read()

                cbr = self._get_total_cases_by_region(
                    self.STATS_BY_REGION_URL_2, html)
                if cbr: r.extend(cbr)

                total = self._get_total_cases(self.STATS_BY_REGION_URL_2, html)
                if total: r.append(total)

                new = self._get_total_new_cases(self.STATS_BY_REGION_URL_2,
                                                html)
                if new: r.append(new)

                tested = self._get_total_cases_tested(
                    self.STATS_BY_REGION_URL_2, html)
                if tested: r.append(tested)

                age_breakdown = self._get_total_age_breakdown(
                    self.STATS_BY_REGION_URL_2, html)
                if age_breakdown: r.extend(age_breakdown)

                dhr = self._get_total_dhr(self.STATS_BY_REGION_URL_2, html)
                if dhr: r.extend(dhr)

                soi = self._get_total_source_of_infection(
                    self.STATS_BY_REGION_URL_2, html)
                if soi: r.extend(soi)

        r.extend(StateNewsBase.get_datapoints(self))
        return r

Exemplo n.º 6

0

Exibir arquivo

    def __get_totals_datapoints(self, SOURCE_URL, path_totals):
        # Pretty sure this is dupe data (for now)
        # Don't uncomment this without making sure this won't double the result(!)
        r = DataPointMerger()

        with open(path_totals, 'r', encoding='utf-8') as f:
            for item in json.loads(f.read())['data']:
                if 'active_cases' in item:
                    try:
                        item['postcode'] = str(int(float(item['postcode'])))
                    except ValueError:
                        pass

                    # New format (2022)
                    date = item['date'].replace('-', '_')
                    total = int(item['total_cases'])
                    active = int(item['active_cases'])
                    postcode = item['postcode'] if item['postcode'] else 'Unknown'

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.TOTAL,
                        value=total,
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))
                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.STATUS_ACTIVE,
                        value=active,
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))
                else:
                    try:
                        item['POA_NAME16'] = str(int(float(item['POA_NAME16'])))
                    except ValueError:
                        pass

                    date = self.__get_partial_date(path_totals, item['Date'])
                    number = int(item['Number'])
                    postcode = item['POA_NAME16'] if item['POA_NAME16'] else 'Unknown'

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.TOTAL,
                        value=number,
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))

        return r

Exemplo n.º 7

0

Exibir arquivo

    def __get_active_deaths_datapoints(self, SOURCE_URL, path_active_deaths, active_data):
        r = DataPointMerger()

        with open(path_active_deaths, 'r', encoding='utf-8') as f:
            for item in json.loads(f.read())['data']:
                try:
                    item['POA_NAME16'] = str(int(float(item['POA_NAME16'])))
                except ValueError:
                    pass

                date = self.__get_partial_date(path_active_deaths, item['Date'])
                print(item)
                recovered = int(item.get('Recovered', 0))
                deaths = int(item['Deaths'])
                cases = int(item['Cases'])
                censored = int(item.get('censored',
                                        0))  # NOTE ME:  From 12 June, this heatmap reporting of active cases has changed. Cases that are not recorded as recovered or deceased after six weeks are not included.
                postcode = item['POA_NAME16'] if item['POA_NAME16'] else 'Unknown'

                r.append(DataPoint(
                    region_schema=Schemas.POSTCODE,
                    region_parent='AU-NSW',
                    region_child=postcode,
                    datatype=DataTypes.TOTAL,
                    value=cases,
                    date_updated=date,
                    source_url=SOURCE_URL,
                    source_id=self.SOURCE_ID
                ))

                if postcode in active_data:
                    num_active = active_data[postcode].pop()

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.STATUS_ACTIVE,
                        value=num_active, # CHECK ME!!!!! =====================================
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.STATUS_RECOVERED,
                        value=cases-num_active-deaths,  # CHECK ME!!!!! =====================================
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))
                elif date <= '2020_06_12':
                    active = cases - recovered - deaths - censored

                    r.append(DataPoint(
                        region_schema=Schemas.POSTCODE,
                        region_parent='AU-NSW',
                        region_child=postcode,
                        datatype=DataTypes.STATUS_ACTIVE,
                        value=active,
                        date_updated=date,
                        source_url=SOURCE_URL,
                        source_id=self.SOURCE_ID
                    ))

                #r.append(DataPoint(
                #    region_schema=Schemas.POSTCODE,
                #    region_parent='AU-NSW',
                #    region_child=postcode,
                #    datatype=DataTypes.STATUS_RECOVERED,
                #    value=recovered,
                #    date_updated=date,
                #    source_url=SOURCE_URL
                #))
                r.append(DataPoint(
                    region_schema=Schemas.POSTCODE,
                    region_parent='AU-NSW',
                    region_child=postcode,
                    datatype=DataTypes.STATUS_DEATHS,
                    value=deaths,
                    date_updated=date,
                    source_url=SOURCE_URL,
                    source_id=self.SOURCE_ID
                ))

        return r

Exemplo n.º 8

0

Exibir arquivo

    def get_nsw_age_data(self, dir_, date, download=True):
        r = DataPointMerger()

        path_fatalitiesdata = dir_ / 'fatalitiesdata.json'
        path_agedata = dir_ / 'agedata.json'
        path_listing = dir_ / 'find-facts-about-covid-19.html'

        if not exists(path_fatalitiesdata) or not exists(path_agedata) or not exists(path_listing):
            if not download:
                return []

            urlretrieve(
                'https://nswdac-covid-19-postcode-heatmap.azurewebsites.net/datafiles/fatalitiesdata.json',
                path_fatalitiesdata
            )
            urlretrieve(
                'https://nswdac-covid-19-postcode-heatmap.azurewebsites.net/datafiles/agedata.json',
                path_agedata
            )
            urlretrieve(
                'https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                path_listing
            )

        if False:
            # This actually could be unreliable - it's on an external web service even though on the
            # same page and suspect doesn't necessarily get updated at the same as other elements on the page

            with open(path_listing, 'r', encoding='utf-8') as f:
                html = f.read()
                try:
                    _date = html.split('Last updated')[1].strip().partition(' ')[-1].split('.')[0].strip()
                    date = datetime.strptime(_date, '%d %B %Y').strftime('%Y_%m_%d')
                except IndexError:
                    # It seems this info isn't always supplied(?) =============================================================
                    import traceback
                    traceback.print_exc()

        with open(path_agedata, 'r', encoding='utf-8') as f:
            # {"data":[{"ageGroup":"0-9","Males":null,"Females":null},
            agedata = json.loads(f.read())

            for age_dict in agedata['data']:
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.TOTAL_MALE,
                    value=age_dict['Males'] or 0,
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID
                ))
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.TOTAL_FEMALE,
                    value=age_dict['Females'] or 0,
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID
                ))
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.TOTAL,
                    value=(age_dict['Females'] or 0) + (age_dict['Males'] or 0),
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID
                ))

        """
        with open(path_fatalitiesdata, 'r', encoding='utf-8') as f:
            agedata = json.loads(f.read())

            for age_dict in agedata['data']:
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.STATUS_DEATHS_MALE,
                    value=age_dict['Males'] or 0,
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID_WEBSITE_DATA
                ))
                r.append(DataPoint(
                    region_schema=Schemas.ADMIN_1,
                    region_parent='AU',
                    region_child='AU-NSW',
                    datatype=DataTypes.STATUS_DEATHS_FEMALE,
                    value=age_dict['Females'] or 0,
                    agerange=age_dict['ageGroup'],
                    date_updated=date,
                    source_url='https://www.nsw.gov.au/covid-19/find-facts-about-covid-19',
                    text_match=None,
                    source_id=self.SOURCE_ID_WEBSITE_DATA
                ))
        """

        return r