def get_vic_carto_datapoints():
    # [{"postcode": 3006, "_new": 0, "activedisp": "Five or fewer active cases",
    # "cases": 65, "ratedisp": 13, "population": 18811},
    #
    # {"rows":[{"cartodb_id":287,
    # "the_geom":"0101000020E6100000386744696F226240E10B93A982E942C0",
    # "the_geom_webmercator":"0101000020110F00008D3881B2A4CD6E41295C51BCE25F51C1",
    # "postcode":3126,"affected":0,"band":"None","lat":-37.8243,"lon":145.0761,
    # "suburbs":"Camberwell East, Canterbury","active":0,"rate":0,"total":2},

    date = (datetime.now() -
            timedelta(hours=20, minutes=30)).strftime('%Y_%m_%d')
    dir_ = get_data_dir() / 'vic' / 'newmap_postcode' / date
    if not exists(dir_):
        makedirs(dir_)

    postcode_json_path = dir_ / 'postcode.json'
    if not exists(postcode_json_path):
        urlretrieve(
            "https://dhhs.carto.com:443/api/v2/sql?q=select%20*%20from%20public.covid19_postcodes",
            postcode_json_path)

    r = DataPointMerger()
    dates = sorted(listdir(get_data_dir() / 'vic' / 'newmap_postcode'))
    if not date in dates:
        dates.append(date)

    for i_date in dates:
        path = get_data_dir(
        ) / 'vic' / 'newmap_postcode' / i_date / 'postcode.json'
        r.extend(_get_datapoints(i_date, path))
    return r
    def _get_postcode_datapoints(self, date):
        # postcode	population	active	cases	rate	new	band	data_date
        # 	3000	37979	18	119	47.4	0	2	29/08/2020
        # 	3001	0	0	1	0	0	0	29/08/2020
        # 	3002	4957	2	14	40.3	0	2	29/08/2020
        # 	3003	5516	3	36	54.4	0	3	29/08/2020
        # 	3004	9311	6	63	64.4	2	3	29/08/2020
        # 	3005	523	0	0	0	0	0	29/08/2020
        # 	3006	18811	1	64	5.3	0	1	29/08/2020
        # 	3008	10438	2	49	19.2	0	1	29/08/2020
        # 	3010	1595	0	0	0	0	0	29/08/2020
        # 	3011	21464	36	164	167.7	2	4	29/08/2020

        r = []
        print("PostCode:", get_data_dir() / 'vic' / 'csv_data' / date)

        with open(get_data_dir() / 'vic' / 'csv_data' / date / 'postcode.json',
                  'r',
                  encoding='utf-8') as f:
            for row in csv.DictReader(f):
                date_updated = self.convert_date(row['data_date'])

                for datatype, value in ((DataTypes.STATUS_ACTIVE,
                                         row['active']), (DataTypes.TOTAL,
                                                          row['cases'])):
                    r.append(
                        DataPoint(region_schema=Schemas.POSTCODE,
                                  region_parent='AU-VIC',
                                  region_child=row['postcode'],
                                  datatype=datatype,
                                  value=int(value),
                                  date_updated=date_updated,
                                  source_url=self.SOURCE_URL,
                                  source_id=self.SOURCE_ID))
        return r
    def _get_lga_datapoints(self, date):
        # LGA	lga_pid	population	active	cases	rate	new	band	LGADisplay	data_date
        # 	Alpine (S)	VIC242	12814	0	1	0	0	0	Alpine	29/08/2020
        # 	Ararat (RC)	VIC220	11845	1	7	8.4	0	1	Ararat	29/08/2020
        # 	Ballarat (C)	VIC241	109505	6	61	5.5	0	1	Ballarat	29/08/2020
        # 	Banyule (C)	VIC188	131631	30	437	22.8	0	2	Banyule	29/08/2020
        # Bass Coast (S) VIC173	36320	0	11	0	0	0	Bass Coast	29/08/2020
        # 	Baw Baw (S)	VIC194	53396	1	15	1.9	0	1	Baw Baw	29/08/2020
        # 	Bayside (C)	VIC182	106862	72	227	67.4	6	3	Bayside	29/08/2020
        # 	Benalla (RC)	VIC199	14037	0	3	0	0	0	Benalla	29/08/2020

        r = []
        print("LGA:", get_data_dir() / 'vic' / 'csv_data' / date)

        with open(get_data_dir() / 'vic' / 'csv_data' / date / 'lga.json',
                  'r',
                  encoding='utf-8') as f:
            for row in csv.DictReader(f):
                #print(row)
                date_updated = self.convert_date(row['data_date'])

                for datatype, value in ((DataTypes.STATUS_ACTIVE,
                                         row['active']), (DataTypes.TOTAL,
                                                          row['cases'])):
                    r.append(
                        DataPoint(region_schema=Schemas.LGA,
                                  region_parent='AU-VIC',
                                  region_child=normalize_locality_name(
                                      row['LGA'].split('(')[0].strip()),
                                  datatype=datatype,
                                  value=int(value),
                                  date_updated=date_updated,
                                  source_url=self.SOURCE_URL,
                                  source_id=self.SOURCE_ID))
        return r
def get_vic_tableau_datapoints():
    r = []
    for date in listdir(get_data_dir() / 'vic' / 'tableau'):
        path = get_data_dir() / 'vic' / 'tableau' / date / 'output.json'
        with open(path, 'r', encoding='utf-8') as f:
            data = json.loads(f.read())
        r.extend(_get_agegroup(data['agegroup']))
        r.extend(_get_transmissions(
            data['transmissions_over_time']))  # Mix-up??
    return r
示例#5
0
    def __init__(self):
        # Only raw_data4.json is currently being updated,
        # so won't download the others every day
        URLBase.__init__(
            self,
            output_dir=get_data_dir() / 'vic' / 'tableau_native',
            urls_dict={
                'agegroup.json':
                URL('https://public.tableau.com/workbooks/Agegroup_15982346382420.twb',
                    static_file=False),
                'genderagegroup.json':
                URL('https://public.tableau.com/workbooks/GenderAgeGroup.twb',
                    static_file=False),
                'transmissions.json':
                URL('https://public.tableau.com/workbooks/Transmissions.twb',
                    static_file=False),
                'transmissions_over_time.json':
                URL('https://public.tableau.com/workbooks/Transmissionsovertime.twb',
                    static_file=False),
                'active_cases.json':
                URL('https://public.tableau.com/workbooks/Activecases_15982341517530.twb',
                    static_file=False),
                'cases.json':
                URL('https://public.tableau.com/workbooks/Cases_15982342702770.twb',
                    static_file=False),

                # TODO: Support healthcare workers graphs!
                'hcw_source_infections.json':
                URL('https://public.tableau.com/workbooks/HCWSourceInfections.twb',
                    static_file=False),
                'hcw_cases.json':
                URL('https://public.tableau.com/workbooks/HCWCases.twb',
                    static_file=False),
            })
        self.update()
 def __init__(self):
     URLBase.__init__(
         self,
         output_dir=get_data_dir() / 'vic' / 'csv_data',
         urls_dict={
             'lga.json':
             URL('https://docs.google.com/spreadsheets/d/e/2PACX-1vQ9oKYNQhJ6v85dQ9qsybfMfc-eaJ9oKVDZKx-VGUr6szNoTbvsLTzpEaJ3oW_LZTklZbz70hDBUt-d/pub?gid=0&single=true&output=csv',
                 static_file=False),
             'postcode.json':
             URL('https://docs.google.com/spreadsheets/d/e/2PACX-1vTwXSqlP56q78lZKxc092o6UuIyi7VqOIQj6RM4QmlVPgtJZfbgzv0a3X7wQQkhNu8MFolhVwMy4VnF/pub?gid=0&single=true&output=csv',
                 static_file=False),
             'agegroup.csv':
             URL('https://www.dhhs.vic.gov.au/ncov-covid-cases-by-age-group-csv',
                 static_file=False),
             'all_lga.csv':
             URL('https://www.dhhs.vic.gov.au/ncov-covid-cases-by-lga-csv',
                 static_file=False),
             'all_lga_acquired_source':
             URL('https://www.dhhs.vic.gov.au/ncov-covid-cases-by-lga-source-csv',
                 static_file=False),
             'all_acquired_source':
             URL('https://www.dhhs.vic.gov.au/ncov-covid-cases-by-source-csv',
                 static_file=False)
         })
     self.update()
    def _get_all_lga_datapoints(self, date):
        r = []
        current_date = None
        by_agegroup = ExpiringCounter()

        with open(get_data_dir() / 'vic' / 'csv_data' / date / 'all_lga.csv',
                  'r',
                  encoding='utf-8') as f:
            for row in sorted(csv.DictReader(f), key=lambda x: x['diagnosis_date']) + \
                       [{'diagnosis_date': '1111-01-01', 'Localgovernmentarea': None}]:

                date_updated = self.convert_date(row['diagnosis_date'])

                if current_date != date_updated:
                    if current_date is not None:
                        for lga, value in by_agegroup.items():
                            r.append(
                                DataPoint(region_schema=Schemas.LGA,
                                          region_parent='AU-VIC',
                                          region_child=normalize_locality_name(
                                              lga.split('(')[0].strip()),
                                          datatype=DataTypes.TOTAL,
                                          value=int(value),
                                          date_updated=current_date,
                                          source_url=self.SOURCE_URL,
                                          source_id=self.SOURCE_ID))
                    current_date = date_updated

                if row['Localgovernmentarea']:
                    by_agegroup[row['Localgovernmentarea'].strip('_')] += 1
        return r
示例#8
0
    def get_datapoints(self):
        dir_ = get_data_dir() / 'vic' / 'google_sheets'
        path_data_page_31_Jul = dir_ / 'data_page_2020_07_31.csv'
        path_data_page_08_Aug = dir_ / 'data_page_2020_08_06.csv'
        path_source_page = dir_ / 'source_page.csv'

        if not exists(path_data_page_31_Jul):
            urlretrieve(
                URL_TEMPLATE % {
                    'long_id': '1oxJt0BBPzk-w2Gn1ImO4zASBCdqeeLJRwHEA4DASBFQ',
                    'short_id': '0'
                }, path_data_page_31_Jul)
            urlretrieve(
                URL_TEMPLATE % {
                    'long_id': '1oxJt0BBPzk-w2Gn1ImO4zASBCdqeeLJRwHEA4DASBFQ',
                    'short_id': '1919344323'
                }, path_data_page_08_Aug)
            urlretrieve(
                URL_TEMPLATE % {
                    'long_id': '1oxJt0BBPzk-w2Gn1ImO4zASBCdqeeLJRwHEA4DASBFQ',
                    'short_id': '1195577978'
                }, path_source_page)

        r = []
        r.extend(self._get_from_path(path_data_page_31_Jul, '2020_07_31'))
        r.extend(self._get_from_path(path_data_page_08_Aug, '2020_08_06'))
        return r
    def _get_agegroup_datapoints(self, date):
        r = []
        current_date = None
        by_agegroup = Counter()

        with open(get_data_dir() / 'vic' / 'csv_data' / date / 'agegroup.csv',
                  'r',
                  encoding='utf-8') as f:
            for row in sorted(csv.DictReader(f), key=lambda x: x['diagnosis_date']) + \
                       [{'diagnosis_date': '1111-01-01', 'agegroup': None}]:

                assert len(
                    row['diagnosis_date']) in (9, 10), row['diagnosis_date']
                date_updated = self.convert_date(row['diagnosis_date'])

                if current_date != date_updated:
                    if current_date is not None:
                        for agerange, value in by_agegroup.items():
                            r.append(
                                DataPoint(region_schema=Schemas.ADMIN_1,
                                          region_parent='AU',
                                          region_child='AU-VIC',
                                          datatype=DataTypes.TOTAL,
                                          agerange=agerange,
                                          value=int(value),
                                          date_updated=current_date,
                                          source_url=self.SOURCE_URL,
                                          source_id=self.SOURCE_ID))
                    current_date = date_updated

                if row['agegroup']:
                    by_agegroup[row['agegroup'].strip('_')] += 1
        return r
    def get_datapoints(self):
        r = DataPointMerger()
        for date in r.iter_unprocessed_dates(
                sorted(listdir(get_data_dir() / 'vic' / 'csv_data'))):
            r.extend(self._get_postcode_datapoints(date))
            r.extend(self._get_lga_datapoints(date))

            #print(get_data_dir(), date)

            if (get_data_dir() / 'vic' / 'csv_data' / date /
                    'agegroup.csv').exists():
                r.extend(self._get_agegroup_datapoints(date))
            if (get_data_dir() / 'vic' / 'csv_data' / date /
                    'all_lga.csv').exists():
                r.extend(self._get_all_lga_datapoints(date))
            if (get_data_dir() / 'vic' / 'csv_data' / date /
                    'all_lga_acquired_source').exists():
                r.extend(self._get_all_lga_acquired_source_datapoints(date))
            if (get_data_dir() / 'vic' / 'csv_data' / date /
                    'all_acquired_source').exists():
                r.extend(self._get_all_acquired_source_datapoints(date))
        return r
示例#11
0
    def _get_new_dir(self):
        revision_id = 0
        while True:
            fmt = f'%%y_%%m_%%d-%03d' % revision_id
            child_dir_name = datetime.now().strftime(fmt)
            path = get_data_dir(
            ) / self.STATE_NAME / 'case_locs' / child_dir_name

            if path.exists():
                revision_id += 1
                continue
            else:
                path.mkdir()
                return path
示例#12
0
    def __get_website_datapoints(self, date, download=True):
        dir_ = get_data_dir() / 'nsw' / 'open_data' / date
        if not exists(dir_):
            makedirs(dir_)

        postcode_to_lga, _ = NSWJSONOpenData().get_nsw_cases_data_postcode_to_lga(dir_, download=download)
        NSWJSONOpenData().get_nsw_tests_data(dir_, postcode_to_lga, download=download)

        # Add website data
        website_data = []
        website_data.extend(self.get_nsw_postcode_data(dir_, download=download))
        website_data.extend(self.__postcode_datapoints_to_lga('https://data.nsw.gov.au/nsw-covid-19-data',
                                                              postcode_to_lga, website_data,
                                                              source_id=self.SOURCE_ID))
        # Age distributions
        website_data.extend(self.get_nsw_age_data(dir_, date, download=download))
        return website_data
示例#13
0
    def get_datapoints(self):
        date = (
            datetime.now() - timedelta(hours=20, minutes=30)
        ).strftime('%Y_%m_%d')

        dates = sorted(listdir(get_data_dir() / 'nsw' / 'open_data'))
        if not date in dates:
            dates.append(date)

        website_data = DataPointMerger()
        for i_date in dates:
            download = i_date == date
            for datapoint in self.__get_website_datapoints(i_date, download=download):
                website_data.append(datapoint)

        r = []
        r.extend(website_data)
        return r
    def __get_open_datapoints(self, date, download=True):
        dir_ = get_data_dir() / 'nsw' / 'open_data' / date
        if not exists(dir_):
            makedirs(dir_)

        # Add open data
        open_data = []
        postcode_to_lga, datapoints = self.get_nsw_cases_data_postcode_to_lga(
            dir_, download=download)
        open_data.extend(datapoints)
        open_data.extend(
            self.get_nsw_tests_data(dir_, postcode_to_lga, download=download))
        open_data.extend(
            self.__postcode_datapoints_to_lga(
                'https://data.nsw.gov.au/nsw-covid-19-data',
                postcode_to_lga,
                open_data,
                source_id=self.SOURCE_ID))
        return open_data
    def get_datapoints(self):
        date = datetime.now() - timedelta(hours=20, minutes=30)
        date = date.strftime('%Y_%m_%d')

        dates = sorted(listdir(get_data_dir() / 'nsw' / 'open_data'))
        if not date in dates:
            dates.append(date)

        open_data = DataPointMerger()  # source_id=self.SOURCE_ID

        for i_date in dates:  # open_data.iter_unprocessed_dates(dates)
            download = i_date == date
            for datapoint in self.__get_open_datapoints(i_date,
                                                        download=download):
                open_data.append(datapoint)

        #open_data.save_state()

        r = []
        r.extend(open_data)
        return r
    def _get_all_acquired_source_datapoints(self, date):
        r = []
        current_date = None
        by_source = Counter()

        sources = {
            'Acquired in Australia, unknown source':
            DataTypes.SOURCE_COMMUNITY,
            'Contact with a confirmed case': DataTypes.SOURCE_CONFIRMED,
            'Travel overseas': DataTypes.SOURCE_OVERSEAS,
            'Under investigation': DataTypes.SOURCE_UNDER_INVESTIGATION
        }

        with open(get_data_dir() / 'vic' / 'csv_data' / date /
                  'all_acquired_source',
                  'r',
                  encoding='utf-8') as f:
            for row in sorted(csv.DictReader(f), key=lambda x: x['diagnosis_date']) + \
                       [{'diagnosis_date': '1111-01-01', 'acquired': None}]:

                date_updated = self.convert_date(row['diagnosis_date'])

                if current_date != date_updated:
                    if current_date is not None:
                        for source, value in by_source.items():
                            r.append(
                                DataPoint(region_schema=Schemas.ADMIN_1,
                                          region_parent='AU',
                                          region_child='AU-VIC',
                                          datatype=sources[source],
                                          value=int(value),
                                          date_updated=current_date,
                                          source_url=self.SOURCE_URL,
                                          source_id=self.SOURCE_ID))
                    current_date = date_updated

                if row['acquired']:
                    by_source[row['acquired'].strip('_')] += 1
        return r
import json
import datetime
from os import listdir

from _utility.URLArchiver import URLArchiver
from _utility.cache_by_date import cache_by_date
from _utility.get_package_dir import get_data_dir
from covid_db.datatypes.DataPoint import DataPoint
from covid_db.datatypes.enums import Schemas, DataTypes
from covid_db.datatypes.DatapointMerger import DataPointMerger


SA_MAP_DIR = get_data_dir() / 'sa' / 'custom_map'


class SARegionsReader:
    SOURCE_ID = 'au_sa_old_dashmap'
    SOURCE_URL = 'https://www.covid-19.sa.gov.au/home/dashboard'
    SOURCE_DESCRIPTION = ''

    def get_datapoints(self):
        SA_DASH_JSON_URL = 'https://www.covid-19.sa.gov.au/__data/assets/' \
                           'file/0004/145849/covid_19_daily.json'
        ua = URLArchiver(f'sa/dashboard')
        ua.get_url_data(SA_DASH_JSON_URL, cache=False)

        r = []
        dpm = DataPointMerger()
        for sub_dir in sorted(listdir(SA_MAP_DIR)):
            joined_dir = f'{SA_MAP_DIR}/{sub_dir}'
            r.extend(self._get_data(joined_dir, dpm))
示例#18
0
from urllib.request import urlretrieve, urlopen

from _utility.get_package_dir import get_data_dir

#ssl._create_default_https_context = lambda: ssl.create_default_context(cafile=certifi.where())
ssl._create_default_https_context = ssl._create_unverified_context

proxy = urllib.request.ProxyHandler({})
opener = urllib.request.build_opener(proxy)
opener.addheaders = [(
    'User-Agent',
    'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0'
)]
urllib.request.install_opener(opener)

BASE_PATH = get_data_dir()


def slugify(value):
    """
    Function from Django, under the 3-clause BSD
    https://docs.djangoproject.com/en/3.0/ref/utils/

    Converts to lowercase, removes non-word characters (alphanumerics and
    underscores) and converts spaces to hyphens. Also strips leading and
    trailing whitespace.
    """
    value = unicodedata.normalize('NFKD',
                                  value).encode('ascii',
                                                'ignore').decode('ascii')
    value = re.sub('[^\w\s-]', '', value).strip().lower()
from os import makedirs, environ, pathsep, system
from os.path import expanduser
from browsermobproxy import Server
from selenium import webdriver
from _utility.get_package_dir import get_data_dir


BROWSER_MOB_PROXY_LOC = expanduser(
    '~/browsermob-proxy-2.1.4-bin/'
    'browsermob-proxy-2.1.4/bin/'
    'browsermob-proxy'
)
GECKO_BROWSER_DIR = expanduser(
    '~/geckodriver-v0.26.0-linux64/'
)
PATH_PREFIX = get_data_dir() / 'vic' / 'tableau'


JSON_URL_INCLUDES = '/sessions/'

TRANSMISSIONS_OVER_TIME_URL = 'https://public.tableau.com/profile/vicdhhs#!/vizhome/Transmissionsovertime/DashboardPage'
TRANSMISSIONS_URL = 'https://public.tableau.com/profile/vicdhhs#!/vizhome/Transmissions/dashpage'
AGEGROUP_URL = 'https://public.tableau.com/profile/vicdhhs#!/vizhome/Agegroup_15982346382420/DashboardPage'


class _VicTableau:
    def run_vic_tableau(self):
        self.output_dir = self._get_output_json_dir()

        path.append(GECKO_BROWSER_DIR)
        environ["PATH"] += pathsep + GECKO_BROWSER_DIR
)
WA_REGIONS_URL = (
    #'https://ww2.health.wa.gov.au/Articles/'
    #'A_E/Coronavirus/COVID19-statistics'
    'https://experience.arcgis.com/experience/359bca83a1264e3fb8d3b6f0a028d768'
)


URL_REGIONS = 'https://services.arcgis.com/Qxcws3oU4ypcnx4H/arcgis/rest/services/confirmed_cases_by_LGA_view_layer/FeatureServer/0/query'
URL_SOURCE_OF_INFECTION = 'https://services.arcgis.com/Qxcws3oU4ypcnx4H/arcgis/rest/services/Epidemic_curve_date_new_view_layer/FeatureServer/0/query?f=json&where=Total_Confirmed%20IS%20NOT%20NULL&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Date%20asc&outSR=102100&resultOffset=0&resultRecordCount=32000&resultType=standard&cacheHint=true'
URL_OTHER_STATS = 'https://services.arcgis.com/Qxcws3oU4ypcnx4H/arcgis/rest/services/COVID19_Dashboard_Chart_ViewLayer/FeatureServer/0/query?f=json&where=new_cases%20IS%20NOT%20NULL&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=date%20asc&outSR=102100&resultOffset=0&resultRecordCount=32000&resultType=standard&cacheHint=true'
URL_MF_BALANCE = 'https://services.arcgis.com/Qxcws3oU4ypcnx4H/arcgis/rest/services/Age_sex_total_COVID19_Chart_view_layer/FeatureServer/0/query?f=json&where=Age_Group%3D%27Total%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&resultOffset=0&resultRecordCount=50&resultType=standard&cacheHint=true'
URL_AGE_BALANCE = 'https://services.arcgis.com/Qxcws3oU4ypcnx4H/arcgis/rest/services/Age_sex_total_COVID19_Chart_view_layer/FeatureServer/0/query?f=json&where=Age_Group%3C%3E%27Total%27&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&resultOffset=0&resultRecordCount=32000&resultType=standard&cacheHint=true'


PATH_PREFIX = get_data_dir() / 'wa' / 'custom_dash'


class _WADash:
    def run_wa_dash(self):
        self.output_dir = self._get_output_json_dir()

        path.append(GECKO_BROWSER_DIR)
        environ["PATH"] += pathsep + GECKO_BROWSER_DIR
        system('killall browsermob-prox')

        self.__grab()

    def _get_output_json_dir(self):
        time_format = datetime.datetime \
            .now() \
示例#21
0
import pprint
import json
from _utility.get_package_dir import get_data_dir
from covid_db.datatypes.DatapointMerger import DataPointMerger
from covid_db.datatypes.DataPoint import DataPoint
from covid_db.datatypes.enums import Schemas, DataTypes
from _utility.normalize_locality_name import normalize_locality_name


SA_TABLEAU_MAP_DIR = get_data_dir() / 'sa' / 'custom_map_tableau'


class SARegionsTableauReader:
    SOURCE_ID = 'au_sa_dashmap'
    SOURCE_URL = 'https://www.covid-19.sa.gov.au/home/dashboard'
    SOURCE_DESCRIPTION = ''

    def __init__(self):
        pass

    def get_datapoints(self):
        r = DataPointMerger()
        for path in SA_TABLEAU_MAP_DIR.iterdir():
            r.extend(self._get_datapoints(path))
        return r

    def _get_datapoints(self, path):
        date = path.name.split('-')[0]
        print(date)

        for path in path.iterdir():
示例#22
0
from os.path import exists
from datetime import datetime

from covid_db.datatypes.enums import Schemas, DataTypes
from covid_crawlers.oceania.au_data.PowerBIDataReader import PowerBIDataReader
from covid_crawlers.oceania.au_data.vic.deprecated.VicPowerBI import VicPowerBI, get_globals
from covid_db.datatypes.DataPoint import DataPoint
from _utility.get_package_dir import get_data_dir

BASE_PATH = get_data_dir() / 'vic' / 'powerbi'


class VicPowerBIReader(PowerBIDataReader):
    SOURCE_ID = 'au_vic_powerbi'
    SOURCE_URL = 'https://app.powerbi.com/view?r=' \
             'eyJrIjoiODBmMmE3NWQtZWNlNC00OWRkLTk1NjYtM' \
             'jM2YTY1MjI2NzdjIiwidCI6ImMwZTA2MDFmLTBmYW' \
             'MtNDQ5Yy05Yzg4LWExMDRjNGViOWYyOCJ9'
    SOURCE_DESCRIPTION = ''

    def __init__(self):
        base_path = VicPowerBI.PATH_PREFIX
        source_url = VicPowerBI.POWERBI_URL

        self.base_path = base_path
        self.source_url = source_url

        PowerBIDataReader.__init__(self, base_path, get_globals())

    def get_datapoints(self):
        # Use a fallback only if can't get from the source
示例#23
0
from os.path import expanduser
from urllib.request import urlopen
from browsermobproxy import Server
from selenium import webdriver
from _utility.get_package_dir import get_data_dir

BROWSER_MOB_PROXY_LOC = expanduser('~/browsermob-proxy-2.1.4-bin/'
                                   'browsermob-proxy-2.1.4/bin/'
                                   'browsermob-proxy')
GECKO_BROWSER_DIR = expanduser('~/geckodriver-v0.26.0-linux64/')
SA_REGIONS_URL = (
    #'https://dpc.geohub.sa.gov.au/portal/apps/View/index.html?appid=963e7887610146ec813e7889bb658805'
    #'https://dpc.geohub.sa.gov.au/portal/apps/View/index.html?appid=1ae1bf4b7b6a46bda4a65b48c2da9406'
    'https://public.tableau.com/views/COVID-19casesinSouthAustraliabyLocalGovernmentArea_15971074477850/MapLive?:embed=y&:showVizHome=no&:host_url=https%3A%2F%2Fpublic.tableau.com%2F&:embed_code_version=3&:tabs=no&:toolbar=no&:animate_transition=yes&:display_static_image=no&:display_spinner=no&:display_overlay=yes&:display_count=yes&:language=en&:loadOrderID=0'
)
PATH_PREFIX = get_data_dir() / 'sa' / 'custom_map_tableau'


class _SARegions:
    def run_wa_regions(self):
        self.output_dir = self._get_output_json_dir()

        path.append(GECKO_BROWSER_DIR)
        environ["PATH"] += pathsep + GECKO_BROWSER_DIR
        system('killall browsermob-prox')

        for xx, json_data in enumerate(self.__grab()):
            if not isinstance(json_data, bytes):
                with open(f"{self.output_dir}/json_output-{xx}.json",
                          'w',
                          encoding='utf-8') as f:
    def _get_all_lga_acquired_source_datapoints(self, date):
        r = []
        current_date = None
        by_postcode = {}
        by_lga = {}

        sources = {
            'Acquired in Australia, unknown source':
            DataTypes.SOURCE_COMMUNITY,
            'Contact with a confirmed case': DataTypes.SOURCE_CONFIRMED,
            'Travel overseas': DataTypes.SOURCE_OVERSEAS,
            'Under investigation': DataTypes.SOURCE_UNDER_INVESTIGATION
        }

        with open(get_data_dir() / 'vic' / 'csv_data' / date /
                  'all_lga_acquired_source',
                  'r',
                  encoding='utf-8') as f:
            for row in sorted(csv.DictReader(f), key=lambda x: x['diagnosis_date']) + \
                       [{'diagnosis_date': '1111-01-01',
                         'Postcode': None,
                         'Localgovernmentarea': None,
                         'acquired': None}]:

                date_updated = self.convert_date(row['diagnosis_date'])

                if current_date != date_updated:
                    if current_date is not None:
                        #for postcode, by_source in by_postcode.items():
                        #    for source, value in by_source.items():
                        #        r.append(DataPoint(
                        #            region_schema=Schemas.POSTCODE,
                        #            region_parent='AU-VIC',
                        #            region_child=postcode,
                        #            datatype=sources[source],
                        #            value=int(value),
                        #            date_updated=current_date,
                        #            source_url=self.SOURCE_URL,
                        #            source_id=self.SOURCE_ID
                        #        ))
                        for lga, by_source in by_lga.items():
                            for source, value in by_source.items():
                                r.append(
                                    DataPoint(
                                        region_schema=Schemas.LGA,
                                        region_parent='AU-VIC',
                                        region_child=normalize_locality_name(
                                            lga),
                                        datatype=sources[source],
                                        value=int(value),
                                        date_updated=current_date,
                                        source_url=self.SOURCE_URL,
                                        source_id=self.SOURCE_ID))
                    current_date = date_updated

                if row['Localgovernmentarea']:
                    by_lga.setdefault(
                        row['Localgovernmentarea'].split('(')[0].strip(),
                        ExpiringCounter())[row['acquired']] += 1
                if row['Postcode']:
                    by_postcode.setdefault(
                        row['Localgovernmentarea'].strip('_'),
                        ExpiringCounter())[row['acquired']] += 1

        return r
 def get_latest_dir(self):
     return sorted(
         (get_data_dir() / self.STATE_NAME / self.SOURCE_ID).iterdir())[-1]