def to_dataframe(self): """ Load Argo index and return a pandas dataframe """ # Download data: get a csv, open it as pandas dataframe, create wmo field df = self.fs.read_csv(self.url, parse_dates=True, skiprows=[1]) # erddap date format : 2019-03-21T00:00:35Z df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%dT%H:%M:%SZ") df['date_update'] = pd.to_datetime(df['date_update'], format="%Y-%m-%dT%H:%M:%SZ") df['wmo'] = df.file.apply(lambda x: int(x.split('/')[1])) # institution & profiler mapping institution_dictionnary = load_dict('institutions') df['tmp1'] = df.institution.apply( lambda x: mapp_dict(institution_dictionnary, x)) df = df.rename(columns={ "institution": "institution_code", "tmp1": "institution" }) profiler_dictionnary = load_dict('profilers') df['profiler'] = df.profiler_type.apply( lambda x: mapp_dict(profiler_dictionnary, int(x))) df = df.rename(columns={"profiler_type": "profiler_code"}) return df
def to_dataframe(self): """ filter local index file and return a pandas dataframe """ df = self.fs.open_dataframe(self.filter_index()) # Post-processing of the filtered index: df['wmo'] = df['file'].apply(lambda x: int(x.split('/')[1])) # institution & profiler mapping for all users # todo: may be we need to separate this for standard and expert users institution_dictionnary = load_dict('institutions') df['tmp1'] = df.institution.apply(lambda x: mapp_dict(institution_dictionnary, x)) df = df.rename(columns={"institution": "institution_code", "tmp1": "institution"}) profiler_dictionnary = load_dict('profilers') df['profiler'] = df.profiler_type.apply(lambda x: mapp_dict(profiler_dictionnary, int(x))) df = df.rename(columns={"profiler_type": "profiler_code"}) return df
def to_dataframe(self): """ filter local index file and return a pandas dataframe """ # # Try to load cached file if requested: if self.cache and os.path.exists(self.cachepath): df = pd.read_csv(self.cachepath) return df # No cache found or requested, so we compute: self.filter_index() # df = pd.read_csv(self.filtered_index) #create datetime & wmo field #local ftp date format 20160513065300 df['date'] = pd.to_datetime(df['date'], format="%Y%m%d%H%M%S") df['date_update'] = pd.to_datetime(df['date_update'], format="%Y%m%d%H%M%S") df['wmo'] = df.file.apply(lambda x: int(x.split('/')[1])) # # institution & profiler mapping try: institution_dictionnary = load_dict('institutions') df['tmp1'] = df.institution.apply( lambda x: mapp_dict(institution_dictionnary, x)) profiler_dictionnary = load_dict('profilers') df['tmp2'] = df.profiler_type.apply( lambda x: mapp_dict(profiler_dictionnary, x)) df = df.drop(columns=['institution', 'profiler_type']) df = df.rename(columns={ "tmp1": "institution", "tmp2": "profiler_type" }) except: pass # Possibly save in cache for later re-use if self.cache: df.to_csv(self.cachepath, index=False) return df
def to_dataframe(self): """ Load Argo index and return a pandas dataframe """ # Try to load cached file if requested: if self.cache and os.path.exists(self.cachepath): ds = pd.read_csv(self.cachepath) return ds # No cache found or requested, so we compute: # Download data: get a csv, open it as pandas dataframe, create wmo field df = pd.read_csv(urlopen(self.url), parse_dates=True, skiprows=[1]) # erddap date format : 2019-03-21T00:00:35Z df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%dT%H:%M:%SZ") df['date_update'] = pd.to_datetime(df['date_update'], format="%Y-%m-%dT%H:%M:%SZ") df['wmo'] = df.file.apply(lambda x: int(x.split('/')[1])) # # institution & profiler mapping try: institution_dictionnary = load_dict('institutions') df['tmp1'] = df.institution.apply( lambda x: mapp_dict(institution_dictionnary, x)) profiler_dictionnary = load_dict('profilers') df['tmp2'] = df.profiler_type.apply( lambda x: mapp_dict(profiler_dictionnary, x)) df = df.drop(columns=['institution', 'profiler_type']) df = df.rename(columns={ "tmp1": "institution", "tmp2": "profiler_type" }) except: pass # Possibly save in cache for later re-use if self.cache: df.to_csv(self.cachepath, index=False) return df
def test_invalid_dictionnary_key(): d = load_dict('profilers') assert mapp_dict(d, "invalid_key") == "Unknown"