def __init__(self,standard=_list_standard[0]): """ __init__ member function, with default definition of the used standard. To get the current default standard, see get_list_standard()[0]. """ verb("Init of GeoManager()") self.set_standard(standard) self._gr=GeoRegion()
def __init__(self,gm=0): """ __init__ member function. """ verb("Init of GeoInfo()") if gm != 0: self._gm=gm else: self._gm=GeoManager() self._grp=self._gm._gr.get_pandas()
def __init__(self,): """ __init__ member function. """ #if 'XK' in self._country_list: # del self._country_list['XK'] # creates bugs in pycountry and is currently a contested country as country # --- get the UN M49 information and organize the data in the _region_dict verb("Init of GeoRegion()") try: p_m49=pd.read_html(self._source_dict["UN_M49"])[1] except: raise CocoaConnectionError('Cannot connect to the UN_M49 ' 'wikipedia page. ' 'Please check your connection or availability of the page.') p_m49.columns=['code','region_name'] p_m49['region_name']=[r.split('(')[0].rstrip() for r in p_m49.region_name] # suppress information in parenthesis in region name p_m49.set_index('code') self._region_dict.update(p_m49.to_dict('split')['data']) self._region_dict.update({ "UE":"European Union", "G7":"G7", "G8":"G8", "G20":"G20", "OECD":"Oecd", "G77":"G77", }) # add UE for other analysis # --- get the UnitedNation GeoScheme and organize the data try: p_gs=pd.read_html(self._source_dict["GeoScheme"])[0] except: raise CocoaConnectionError('Cannot connect to the UN GeoScheme ' 'wikipedia page. ' 'Please check your connection or availability of the page.') p_gs.columns=['country','capital','iso2','iso3','num','m49'] idx=[] reg=[] cap=[] for index, row in p_gs.iterrows(): if row.iso3 != '–' : # meaning a non standard iso in wikipedia UN GeoScheme for r in row.m49.replace(" ","").split('<'): idx.append(row.iso3) reg.append(int(r)) cap.append(row.capital) self._p_gs=pd.DataFrame({'iso3':idx,'capital':cap,'region':reg}) self._p_gs=self._p_gs.merge(p_m49,how='left',left_on='region',\ right_on='code').drop(["code"],axis=1)
def __init__(self,db=None): verb("Init of CocoDisplay()") self.colors = itertools.cycle(Paired12) self.coco_circle = [] self.coco_line = [] self.base_fig = None self.hover_tool = None self.increment = 1 if db == None: self.info = coge.GeoInfo() else: self.info = coge.GeoInfo(db.geo)
def __init__(self, db_name): ''' Fill the pandas_datase ''' verb("Init of covid19.DataBase()") self.database_name = ['jhu', 'spf', 'owid', 'opencovid19'] self.csv_url_parsed = [] self.pandas_datase = {} self.available_keys_words = [] self.dates = [] self.dicos_countries = {} self.dict_current_days = {} self.dict_cumul_days = {} self.dict_diff_days = {} self.database_columns_not_computed = {} self.db = db_name if self.db != 'spf' and self.db != 'opencovid19': self.geo = coge.GeoManager('name') if self.db not in self.database_name: raise CocoaDbError('Unknown ' + self.db + '. Available database so far in CoCoa are : ' + str(self.database_name), file=sys.stderr) else: if self.db == 'jhu': info('JHU aka Johns Hopkins database selected ...') self.pandas_datase = self.parse_convert_jhu() elif self.db == 'spf': info('SPF aka Sante Publique France database selected ...') info('... tree differents db from SPF will be parsed ...') # https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/ # Parse and convert spf data structure to JHU one for historical raison # hosp Number of people currently hospitalized # rea Number of people currently in resuscitation or critical care # rad Total amount of patient that returned home # dc Total amout of deaths at the hospital # 'sexe' == 0 male + female cast = {'dep': 'string'} rename = {'jour': 'date', 'dep': 'location'} constraints = {'sexe': 0} url = "https://www.data.gouv.fr/fr/datasets/r/63352e38-d353-4b54-bfd1-f1b3ee1cabd7" spf1 = self.csv_to_pandas_index_location_date( url, rename_columns=rename, constraints=constraints, cast=cast) self.csv_url_parsed.append(url) # https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/ # incid_hosp string Nombre quotidien de personnes nouvellement hospitalisées # incid_rea integer Nombre quotidien de nouvelles admissions en réanimation # incid_dc integer Nombre quotidien de personnes nouvellement décédées # incid_rad integer Nombre quotidien de nouveaux retours à domicile url = "https://www.data.gouv.fr/fr/datasets/r/6fadff46-9efd-4c53-942a-54aca783c30c" spf2 = self.csv_to_pandas_index_location_date( url, rename_columns=rename, cast=cast) self.csv_url_parsed.append(url) # https://www.data.gouv.fr/fr/datasets/donnees-relatives-aux-resultats-des-tests-virologiques-covid-19/ # T Number of tests performed # P Number of positive tests constraints = {'cl_age90': 0} url = "https://www.data.gouv.fr/fr/datasets/r/406c6a23-e283-4300-9484-54e78c8ae675" spf3 = self.csv_to_pandas_index_location_date( url, rename_columns=rename, constraints=constraints, cast=cast) self.csv_url_parsed.append(url) #https://www.data.gouv.fr/fr/datasets/indicateurs-de-suivi-de-lepidemie-de-covid-19/#_ # tension hospitaliere # Vert : taux d’occupation compris entre 0 et 40% ; # Orange : taux d’occupation compris entre 40 et 60% ; # Rouge : taux d'occupation supérieur à 60%. # R0 # vert : R0 entre 0 et 1 ; # Orange : R0 entre 1 et 1,5 ; # Rouge : R0 supérieur à 1,5. cast = {'departement': 'string'} rename = {'extract_date': 'date', 'departement': 'location'} columns_skipped=['region','libelle_reg','libelle_dep','tx_incid_couleur','R_couleur',\ 'taux_occupation_sae_couleur','tx_pos_couleur','nb_orange','nb_rouge'] url = "https://www.data.gouv.fr/fr/datasets/r/4acad602-d8b1-4516-bc71-7d5574d5f33e" spf4 = self.csv_to_pandas_index_location_date( url, rename_columns=rename, separator=',', encoding="ISO-8859-1", cast=cast) self.csv_url_parsed.append(url) result = pd.concat([spf1, spf2, spf3, spf4], axis=1, sort=False) self.pandas_datase = self.pandas_index_location_date_to_jhu_format( result, columns_skipped=columns_skipped) elif self.db == 'opencovid19': info('OPENCOVID19 selected ...') rename = {'jour': 'date', 'maille_nom': 'location'} constraints = {'granularite': 'pays'} columns_skipped = [ 'maille_code', 'source_nom', 'source_url', 'source_archive', 'source_type' ] url = 'https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/chiffres-cles.csv' opencovid19 = self.csv_to_pandas_index_location_date( url, constraints=constraints, rename_columns=rename, separator=',') self.csv_url_parsed.append(url) self.pandas_datase = self.pandas_index_location_date_to_jhu_format( opencovid19, columns_skipped=columns_skipped) elif self.db == 'owid': info('OWID aka \"Our World in Data\" database selected ...') columns_keeped = [ 'total_cases', 'new_cases', 'total_deaths', 'new_deaths', 'total_cases_per_million', 'new_cases_per_million', 'total_deaths_per_million', 'new_deaths_per_million', 'total_tests', 'new_tests', 'total_tests_per_thousand', 'new_tests_per_thousand', 'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'stringency_index' ] drop_field = {'location': ['International', 'World']} url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv" owid = self.csv_to_pandas_index_location_date( url, separator=',', drop_field=drop_field) self.csv_url_parsed.append(url) self.pandas_datase = self.pandas_index_location_date_to_jhu_format( owid, columns_keeped=columns_keeped) self.fill_cocoa_field() info('Few information concernant the selected database : ', self.get_db()) info('Available which key-words for: ', self.get_available_keys_words()) if self.get_db() != 'opencovid19': info('Example of location : ', ', '.join(random.choices(self.get_locations(), k=5)), ' ...') else: info('Only available location: ', self.get_locations()) info('Last date data ', self.get_dates()[-1])