示例#1
0
文件: geo.py 项目: tjbtjbtjb/CoCoA
 def __init__(self,standard=_list_standard[0]):
     """ __init__ member function, with default definition of
     the used standard. To get the current default standard,
     see get_list_standard()[0].
     """
     verb("Init of GeoManager()")
     self.set_standard(standard)
     self._gr=GeoRegion()
示例#2
0
文件: geo.py 项目: tjbtjbtjb/CoCoA
 def __init__(self,gm=0):
     """ __init__ member function.
     """
     verb("Init of GeoInfo()")
     if gm != 0:
         self._gm=gm
     else:
         self._gm=GeoManager()
         
     self._grp=self._gm._gr.get_pandas()
示例#3
0
文件: geo.py 项目: tjbtjbtjb/CoCoA
    def __init__(self,):
        """ __init__ member function.
        """
        #if 'XK' in self._country_list:
        #    del self._country_list['XK'] # creates bugs in pycountry and is currently a contested country as country


        # --- get the UN M49 information and organize the data in the _region_dict
        
        verb("Init of GeoRegion()")
        try:
            p_m49=pd.read_html(self._source_dict["UN_M49"])[1]
        except:
            raise CocoaConnectionError('Cannot connect to the UN_M49 '
                    'wikipedia page. '
                    'Please check your connection or availability of the page.')

        p_m49.columns=['code','region_name']
        p_m49['region_name']=[r.split('(')[0].rstrip() for r in p_m49.region_name]  # suppress information in parenthesis in region name
        p_m49.set_index('code')

        self._region_dict.update(p_m49.to_dict('split')['data'])
        self._region_dict.update({  "UE":"European Union",
                                    "G7":"G7",
                                    "G8":"G8",
                                    "G20":"G20",
                                    "OECD":"Oecd",
                                    "G77":"G77",
                                    })  # add UE for other analysis


        # --- get the UnitedNation GeoScheme and organize the data
        try:
            p_gs=pd.read_html(self._source_dict["GeoScheme"])[0]
        except:
            raise CocoaConnectionError('Cannot connect to the UN GeoScheme '
                    'wikipedia page. '
                    'Please check your connection or availability of the page.')
        p_gs.columns=['country','capital','iso2','iso3','num','m49']

        idx=[]
        reg=[]
        cap=[]

        for index, row in p_gs.iterrows():
            if row.iso3 != '–' : # meaning a non standard iso in wikipedia UN GeoScheme
                for r in row.m49.replace(" ","").split('<'):
                    idx.append(row.iso3)
                    reg.append(int(r))
                    cap.append(row.capital)
        self._p_gs=pd.DataFrame({'iso3':idx,'capital':cap,'region':reg})
        self._p_gs=self._p_gs.merge(p_m49,how='left',left_on='region',\
                            right_on='code').drop(["code"],axis=1)
示例#4
0
 def __init__(self,db=None):
     verb("Init of CocoDisplay()")
     self.colors = itertools.cycle(Paired12)
     self.coco_circle = []
     self.coco_line = []
     self.base_fig = None
     self.hover_tool = None
     self.increment = 1
     if db == None:
         self.info = coge.GeoInfo()
     else:
         self.info = coge.GeoInfo(db.geo)
示例#5
0
    def __init__(self, db_name):
        '''
         Fill the pandas_datase
        '''
        verb("Init of covid19.DataBase()")
        self.database_name = ['jhu', 'spf', 'owid', 'opencovid19']
        self.csv_url_parsed = []
        self.pandas_datase = {}
        self.available_keys_words = []
        self.dates = []
        self.dicos_countries = {}
        self.dict_current_days = {}
        self.dict_cumul_days = {}
        self.dict_diff_days = {}
        self.database_columns_not_computed = {}
        self.db = db_name
        if self.db != 'spf' and self.db != 'opencovid19':
            self.geo = coge.GeoManager('name')

        if self.db not in self.database_name:
            raise CocoaDbError('Unknown ' + self.db +
                               '. Available database so far in CoCoa are : ' +
                               str(self.database_name),
                               file=sys.stderr)
        else:
            if self.db == 'jhu':
                info('JHU aka Johns Hopkins database selected ...')
                self.pandas_datase = self.parse_convert_jhu()
            elif self.db == 'spf':
                info('SPF aka Sante Publique France database selected ...')
                info('... tree differents db from SPF will be parsed ...')
                # https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/
                # Parse and convert spf data structure to JHU one for historical raison
                # hosp Number of people currently hospitalized
                # rea  Number of people currently in resuscitation or critical care
                # rad      Total amount of patient that returned home
                # dc       Total amout of deaths at the hospital
                # 'sexe' == 0 male + female
                cast = {'dep': 'string'}
                rename = {'jour': 'date', 'dep': 'location'}
                constraints = {'sexe': 0}
                url = "https://www.data.gouv.fr/fr/datasets/r/63352e38-d353-4b54-bfd1-f1b3ee1cabd7"
                spf1 = self.csv_to_pandas_index_location_date(
                    url,
                    rename_columns=rename,
                    constraints=constraints,
                    cast=cast)
                self.csv_url_parsed.append(url)
                # https://www.data.gouv.fr/fr/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/
                # incid_hosp	string 	Nombre quotidien de personnes nouvellement hospitalisées
                # incid_rea	integer	Nombre quotidien de nouvelles admissions en réanimation
                # incid_dc	integer	Nombre quotidien de personnes nouvellement décédées
                # incid_rad	integer	Nombre quotidien de nouveaux retours à domicile
                url = "https://www.data.gouv.fr/fr/datasets/r/6fadff46-9efd-4c53-942a-54aca783c30c"
                spf2 = self.csv_to_pandas_index_location_date(
                    url, rename_columns=rename, cast=cast)
                self.csv_url_parsed.append(url)
                # https://www.data.gouv.fr/fr/datasets/donnees-relatives-aux-resultats-des-tests-virologiques-covid-19/
                # T       Number of tests performed
                # P       Number of positive tests
                constraints = {'cl_age90': 0}
                url = "https://www.data.gouv.fr/fr/datasets/r/406c6a23-e283-4300-9484-54e78c8ae675"
                spf3 = self.csv_to_pandas_index_location_date(
                    url,
                    rename_columns=rename,
                    constraints=constraints,
                    cast=cast)
                self.csv_url_parsed.append(url)
                #https://www.data.gouv.fr/fr/datasets/indicateurs-de-suivi-de-lepidemie-de-covid-19/#_
                # tension hospitaliere
                # Vert : taux d’occupation compris entre 0 et 40% ;
                # Orange : taux d’occupation compris entre 40 et 60% ;
                # Rouge : taux d'occupation supérieur à 60%.
                # R0
                # vert : R0 entre 0 et 1 ;
                # Orange : R0 entre 1 et 1,5 ;
                # Rouge : R0 supérieur à 1,5.
                cast = {'departement': 'string'}
                rename = {'extract_date': 'date', 'departement': 'location'}
                columns_skipped=['region','libelle_reg','libelle_dep','tx_incid_couleur','R_couleur',\
                'taux_occupation_sae_couleur','tx_pos_couleur','nb_orange','nb_rouge']
                url = "https://www.data.gouv.fr/fr/datasets/r/4acad602-d8b1-4516-bc71-7d5574d5f33e"
                spf4 = self.csv_to_pandas_index_location_date(
                    url,
                    rename_columns=rename,
                    separator=',',
                    encoding="ISO-8859-1",
                    cast=cast)
                self.csv_url_parsed.append(url)
                result = pd.concat([spf1, spf2, spf3, spf4],
                                   axis=1,
                                   sort=False)
                self.pandas_datase = self.pandas_index_location_date_to_jhu_format(
                    result, columns_skipped=columns_skipped)
            elif self.db == 'opencovid19':
                info('OPENCOVID19 selected ...')
                rename = {'jour': 'date', 'maille_nom': 'location'}
                constraints = {'granularite': 'pays'}
                columns_skipped = [
                    'maille_code', 'source_nom', 'source_url',
                    'source_archive', 'source_type'
                ]
                url = 'https://raw.githubusercontent.com/opencovid19-fr/data/master/dist/chiffres-cles.csv'
                opencovid19 = self.csv_to_pandas_index_location_date(
                    url,
                    constraints=constraints,
                    rename_columns=rename,
                    separator=',')
                self.csv_url_parsed.append(url)
                self.pandas_datase = self.pandas_index_location_date_to_jhu_format(
                    opencovid19, columns_skipped=columns_skipped)

            elif self.db == 'owid':
                info('OWID aka \"Our World in Data\" database selected ...')
                columns_keeped = [
                    'total_cases', 'new_cases', 'total_deaths', 'new_deaths',
                    'total_cases_per_million', 'new_cases_per_million',
                    'total_deaths_per_million', 'new_deaths_per_million',
                    'total_tests', 'new_tests', 'total_tests_per_thousand',
                    'new_tests_per_thousand', 'new_tests_smoothed',
                    'new_tests_smoothed_per_thousand', 'stringency_index'
                ]
                drop_field = {'location': ['International', 'World']}
                url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"
                owid = self.csv_to_pandas_index_location_date(
                    url, separator=',', drop_field=drop_field)
                self.csv_url_parsed.append(url)
                self.pandas_datase = self.pandas_index_location_date_to_jhu_format(
                    owid, columns_keeped=columns_keeped)
            self.fill_cocoa_field()
            info('Few information concernant the selected database : ',
                 self.get_db())
            info('Available which key-words for: ',
                 self.get_available_keys_words())
            if self.get_db() != 'opencovid19':
                info('Example of location : ',
                     ', '.join(random.choices(self.get_locations(),
                                              k=5)), ' ...')
            else:
                info('Only available location: ', self.get_locations())
            info('Last date data ', self.get_dates()[-1])