Exemplo n.º 1
0
def calc_climatology(Ser,
                     moving_avg_orig=5,
                     moving_avg_clim=30,
                     median=False,
                     timespan=None):
    '''
    Calculates the climatology of a data set

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex or julian date)

    moving_avg_orig : float, optional
        The size of the moving_average window [days] that will be applied on the
        input Series (gap filling, short-term rainfall correction)
        Default: 5

    moving_avg_clim : float, optional
        The size of the moving_average window [days] that will be applied on the
        calculated climatology (long-term event correction)
        Default: 35

    median : boolean, optional
        if set to True, the climatology will be based on the median conditions

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        Set this to calculate the climatology based on a subset of the input
        Series

    Returns
    -------
    climatology : pandas.Series
        Series containing the calculated climatology
    '''

    if timespan is not None:
        Ser = Ser.truncate(before=timespan[0], after=timespan[1])

    Ser = moving_average(Ser, window_size=moving_avg_orig)

    Ser = pd.DataFrame(Ser)

    if type(Ser.index) == pd.DatetimeIndex:

        doys = doy(Ser.index.month, Ser.index.day)

    else:
        year, month, day = julian2date(Ser.index.values)[0:3]
        doys = doy(month, day)


    Ser['doy'] = doys


    if median:
        clim = Ser.groupby('doy').median()
    else:
        clim = Ser.groupby('doy').mean()

    return moving_average(pd.Series(clim.values.flatten(), index=clim.index.values), window_size=moving_avg_clim)
Exemplo n.º 2
0
def calc_anomaly(Ser,
                 window_size=35,
                 climatology=None):
    '''
    Calculates the anomaly of a time series (Pandas series).
    Both, climatology based, or moving-average based anomalies can be
    calculated

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex)

    window_size : float, optional
        The window-size [days] of the moving-average window to calculate the
        anomaly reference (only used if climatology is not provided)
        Default: 35 (days)

    climatology : pandas.Series (index: 1-366), optional
        if provided, anomalies will be based on the climatology

    timespann : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        If set, only a subset

    Returns
    -------
    anomaly : pandas.Series
        Series containing the calculated anomalies
    '''

    if climatology is not None:

        if type(Ser.index) == pd.DatetimeIndex:

            doys = doy(Ser.index.month, Ser.index.day)

        else:
            year, month, day = julian2date(Ser.index.values)[0:3]
            doys = doy(month, day)

        df = pd.DataFrame()
        df['absolute'] = Ser
        df['doy'] = doys

        clim = pd.DataFrame(climatology, columns=['climatology'])

        df = df.join(clim, on='doy', how='left')

        anomaly = df['absolute'] - df['climatology']
        anomaly.index = df.index


    else:
        reference = moving_average(Ser, window_size=window_size)
        anomaly = Ser - reference

    return anomaly
Exemplo n.º 3
0
def test_doy():
    day_of_year = doy(1, 28)
    assert day_of_year == 28
    day_of_year = doy(2, 29)
    assert day_of_year == 31 + 29
    day_of_year = doy(3, 1, year=2004)
    assert day_of_year == 31 + 29 + 1
    # test numpy arrays as input
    days = np.array([28, 29, 1], dtype=int)
    months = np.array([1, 2, 3])
    days_of_year = doy(months, days, year=np.array([2005, 2004, 2004]))
    nptest.assert_allclose(days_of_year, np.array([28, 31 + 29, 31 + 29 + 1]))

    days_of_year = doy(months, days, year=2004)
    nptest.assert_allclose(days_of_year, np.array([28, 31 + 29, 31 + 29 + 1]))
Exemplo n.º 4
0
def calc_climatology(Ser,
                     moving_avg_orig=5,
                     moving_avg_clim=30,
                     median=False,
                     timespan=None):
    '''
    Calculates the climatology of a data set
    
    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex)
    
    moving_avg_orig : float, optional
        The size of the moving_average window [days] that will be applied on the 
        input Series (gap filling, short-term rainfall correction)
        Default: 5
    
    moving_avg_clim : float, optional
        The size of the moving_average window [days] that will be applied on the 
        calculated climatology (long-term event correction)
        Default: 35
        
    median : boolean, optional
        if set to True, the climatology will be based on the median conditions
    
    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        Set this to calculate the climatology based on a subset of the input
        Series
        
    Returns
    -------
    climatology : pandas.Series
        Series containing the calculated climatology
    '''

    if timespan is not None:
        Ser = Ser.truncate(before=timespan[0], after=timespan[1])

    Ser = moving_average(Ser,
                         window_size=moving_avg_orig,
                         sample_to_days=True,
                         fast=True)

    Ser = pd.DataFrame(Ser)

    Ser['doy'] = doy(Ser.index.month, Ser.index.day)

    if median:
        clim = Ser.groupby('doy').median()
    else:
        clim = Ser.groupby('doy').mean()

    return moving_average(pd.Series(clim.values.flatten(),
                                    index=clim.index.values),
                          window_size=moving_avg_clim,
                          no_date=True)
Exemplo n.º 5
0
def test_doy():
    day_of_year = doy(1, 28)
    assert day_of_year == 28
    day_of_year = doy(2, 29)
    assert day_of_year == 31 + 29
    day_of_year = doy(3, 1, year=2004)
    assert day_of_year == 31 + 29 + 1
    # test numpy arrays as input
    days = np.array([28, 29, 1], dtype=int)
    months = np.array([1, 2, 3])
    days_of_year = doy(months, days, year=np.array([2005, 2004, 2004]))
    nptest.assert_allclose(days_of_year, np.array([28,
                                                   31 + 29,
                                                   31 + 29 + 1]))

    days_of_year = doy(months, days, year=2004)
    nptest.assert_allclose(days_of_year, np.array([28,
                                                   31 + 29,
                                                   31 + 29 + 1]))
Exemplo n.º 6
0
def calc_anomaly(Ser, window_size=35, climatology=None):
    '''
    Calculates the anomaly of a time series (Pandas series).
    Both, climatology based, or moving-average based anomalies can be 
    calculated
	
    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex)
    
    window_size : float, optional
        The window-size [days] of the moving-average window to calculate the
        anomaly reference (only used if climatology is not provided)
        Default: 35 (days)
    
    climatology : pandas.Series (index: 1-366), optional
        if provided, anomalies will be based on the climatology
    
    timespann : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        If set, only a subset
        
    Returns
    -------
    anomaly : pandas.Series
        Series containing the calculated anomalies
    '''

    if climatology is not None:

        Ser = pd.DataFrame(Ser, columns=['absolute'])
        Ser['doy'] = doy(Ser.index.month, Ser.index.day)

        clim = pd.DataFrame(climatology, columns=['climatology'])

        Ser = Ser.join(clim, on='doy', how='left')

        anomaly = Ser['absolute'] - Ser['climatology']
        anomaly.index = Ser.index

    else:
        reference = moving_average(Ser, window_size=window_size, fast=True)
        anomaly = Ser - reference

    return anomaly
Exemplo n.º 7
0
def calc_climatology(Ser,
                     moving_avg_orig=5,
                     moving_avg_clim=30,
                     median=False,
                     timespan=None,
                     fill=np.nan,
                     wraparound=False):
    '''
    Calculates the climatology of a data set.

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex or julian date)

    moving_avg_orig : float, optional
        The size of the moving_average window [days] that will be applied on the
        input Series (gap filling, short-term rainfall correction)
        Default: 5

    moving_avg_clim : float, optional
        The size of the moving_average window [days] that will be applied on the
        calculated climatology (long-term event correction)
        Default: 35

    median : boolean, optional
        if set to True, the climatology will be based on the median conditions

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        Set this to calculate the climatology based on a subset of the input
        Series

    fill : float or int, optional
        Fill value to use for days on which no climatology exists

    wraparound : boolean, optional
        If set then the climatology is wrapped around at the edges before
        doing the second running average (long-term event correction)

    Returns
    -------
    climatology : pandas.Series
        Series containing the calculated climatology
        Always has 366 values behaving like a leap year
    '''

    if timespan is not None:
        Ser = Ser.truncate(before=timespan[0], after=timespan[1])

    Ser = moving_average(Ser, window_size=moving_avg_orig)

    Ser = pd.DataFrame(Ser)

    if type(Ser.index) == pd.DatetimeIndex:

        doys = doy(Ser.index.month, Ser.index.day)

    else:
        year, month, day = julian2date(Ser.index.values)[0:3]
        doys = doy(month, day)

    Ser['doy'] = doys

    if median:
        clim = Ser.groupby('doy').median()
    else:
        clim = Ser.groupby('doy').mean()

    clim_ser = pd.Series(clim.values.flatten(),
                         index=clim.index.values)

    if wraparound:
        index_old = clim_ser.index.copy()
        left_mirror = clim_ser.iloc[-moving_avg_clim:]
        right_mirror = clim_ser.iloc[:moving_avg_clim]
        # Shift index to start at 366 - index at -moving_avg_clim
        # to run over a whole year while keeping gaps the same size
        right_mirror.index = right_mirror.index + 366 * 2
        clim_ser.index = clim_ser.index + 366
        clim_ser = pd.concat([left_mirror,
                              clim_ser,
                              right_mirror])

        clim_ser = moving_average(clim_ser, window_size=moving_avg_clim)
        clim_ser = clim_ser.iloc[moving_avg_clim:-moving_avg_clim]
        clim_ser.index = index_old
    else:
        clim_ser = moving_average(clim_ser, window_size=moving_avg_clim)

    clim_ser = clim_ser.reindex(np.arange(366) + 1)
    clim_ser = clim_ser.fillna(fill)
    return clim_ser
Exemplo n.º 8
0
def calc_anomaly(Ser,
                 window_size=35,
                 climatology=None,
                 respect_leap_years=True,
                 return_clim=False):
    '''
    Calculates the anomaly of a time series (Pandas series).
    Both, climatology based, or moving-average based anomalies can be
    calculated

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex)

    window_size : float, optional
        The window-size [days] of the moving-average window to calculate the
        anomaly reference (only used if climatology is not provided)
        Default: 35 (days)

    climatology : pandas.Series (index: 1-366), optional
        if provided, anomalies will be based on the climatology

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        If set, only a subset

    respect_leap_years : boolean, optional
        If set then leap years will be respected during matching of the climatology
        to the time series

    return_clim : boolean, optional
        if set to true the return argument will be a DataFrame which
        also contains the climatology time series.
        Only has an effect if climatology is used.

    Returns
    -------
    anomaly : pandas.Series
        Series containing the calculated anomalies
    '''

    if climatology is not None:

        if type(Ser.index) == pd.DatetimeIndex:

            year, month, day = (np.asarray(Ser.index.year),
                                np.asarray(Ser.index.month),
                                np.asarray(Ser.index.day))

        else:
            year, month, day = julian2date(Ser.index.values)[0:3]

        if respect_leap_years:
            doys = doy(month, day, year)
        else:
            doys = doy(month, day)

        df = pd.DataFrame()
        df['absolute'] = Ser
        df['doy'] = doys

        clim = pd.DataFrame({'climatology': climatology})

        df = df.join(clim, on='doy', how='left')

        anomaly = df['absolute'] - df['climatology']
        anomaly.index = df.index

        if return_clim:
            anomaly = pd.DataFrame({'anomaly': anomaly})
            anomaly['climatology'] = df['climatology']

    else:
        reference = moving_average(Ser, window_size=window_size)
        anomaly = Ser - reference

    return anomaly
Exemplo n.º 9
0
def calc_climatology(Ser,
                     moving_avg_orig=5,
                     moving_avg_clim=30,
                     median=False,
                     timespan=None,
                     fill=np.nan,
                     wraparound=False,
                     respect_leap_years=False,
                     interpolate_leapday=False,
                     fillna=True,
                     min_obs_orig=1,
                     min_obs_clim=1):
    '''
    Calculates the climatology of a data set.

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex or julian date)

    moving_avg_orig : float, optional
        The size of the moving_average window [days] that will be applied on the
        input Series (gap filling, short-term rainfall correction)
        Default: 5

    moving_avg_clim : float, optional
        The size of the moving_average window [days] that will be applied on the
        calculated climatology (long-term event correction)
        Default: 35

    median : boolean, optional
        if set to True, the climatology will be based on the median conditions

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        Set this to calculate the climatology based on a subset of the input
        Series

    fill : float or int, optional
        Fill value to use for days on which no climatology exists

    wraparound : boolean, optional
        If set then the climatology is wrapped around at the edges before
        doing the second running average (long-term event correction)

    respect_leap_years : boolean, optional
        If set then leap years will be respected during the calculation of 
        the climatology
        Default: False

    fillna: boolean, optional
        If set, then the moving average used for the calculation of the
        climatology will be filled at the nan-values

    min_obs_orig: int
        Minimum observations required to give a valid output in the first
        moving average applied on the input series

    min_obs_clim: int
        Minimum observations required to give a valid output in the second
        moving average applied on the calculated climatology

    Returns
    -------
    climatology : pandas.Series
        Series containing the calculated climatology
        Always has 366 values behaving like a leap year
    '''

    if timespan is not None:
        Ser = Ser.truncate(before=timespan[0], after=timespan[1])

    Ser = moving_average(Ser, window_size=moving_avg_orig, fillna=fillna, min_obs=min_obs_orig)

    Ser = pd.DataFrame(Ser)

    if type(Ser.index) == pd.DatetimeIndex:
        year, month, day = (np.asarray(Ser.index.year),
                            np.asarray(Ser.index.month),
                            np.asarray(Ser.index.day))
    else:
        year, month, day = julian2date(Ser.index.values)[0:3]




    if respect_leap_years:
        doys = doy(month, day, year)
    else:
        doys = doy(month, day)


    Ser['doy'] = doys

    if median:
        clim = Ser.groupby('doy').median()
    else:
        clim = Ser.groupby('doy').mean()

    clim_ser = pd.Series(clim.values.flatten(),
                         index=clim.index.values)

    if interpolate_leapday and not respect_leap_years:
        clim_ser[60] = np.mean((clim_ser[59], clim_ser[61]))
    elif interpolate_leapday and respect_leap_years:
        clim_ser[366] = np.mean((clim_ser[365], clim_ser[1]))

    if wraparound:
        index_old = clim_ser.index.copy()
        left_mirror = clim_ser.iloc[-moving_avg_clim:]
        right_mirror = clim_ser.iloc[:moving_avg_clim]
        # Shift index to start at 366 - index at -moving_avg_clim
        # to run over a whole year while keeping gaps the same size
        right_mirror.index = right_mirror.index + 366 * 2
        clim_ser.index = clim_ser.index + 366
        clim_ser = pd.concat([left_mirror,
                              clim_ser,
                              right_mirror])

        clim_ser = moving_average(clim_ser, window_size=moving_avg_clim, fillna=fillna, min_obs=min_obs_clim)
        clim_ser = clim_ser.iloc[moving_avg_clim:-moving_avg_clim]
        clim_ser.index = index_old
    else:
        clim_ser = moving_average(clim_ser, window_size=moving_avg_clim, fillna=fillna, min_obs=min_obs_clim)

    clim_ser = clim_ser.reindex(np.arange(366) + 1)
    clim_ser = clim_ser.fillna(fill)
    return clim_ser
Exemplo n.º 10
0
def calc_anomaly(Ser,
                 window_size=35,
                 climatology=None,
                 respect_leap_years=True,
                 return_clim=False):
    '''
    Calculates the anomaly of a time series (Pandas series).
    Both, climatology based, or moving-average based anomalies can be
    calculated

    Parameters
    ----------
    Ser : pandas.Series (index must be a DateTimeIndex)

    window_size : float, optional
        The window-size [days] of the moving-average window to calculate the
        anomaly reference (only used if climatology is not provided)
        Default: 35 (days)

    climatology : pandas.Series (index: 1-366), optional
        if provided, anomalies will be based on the climatology

    timespan : [timespan_from, timespan_to], datetime.datetime(y,m,d), optional
        If set, only a subset

    respect_leap_years : boolean, optional
        If set then leap years will be respected during matching of the climatology
        to the time series

    return_clim : boolean, optional
        if set to true the return argument will be a DataFrame which
        also contains the climatology time series.
        Only has an effect if climatology is used.

    Returns
    -------
    anomaly : pandas.Series
        Series containing the calculated anomalies
    '''

    if climatology is not None:

        if type(Ser.index) == pd.DatetimeIndex:

            year, month, day = (np.asarray(Ser.index.year),
                                np.asarray(Ser.index.month),
                                np.asarray(Ser.index.day))

        else:
            year, month, day = julian2date(Ser.index.values)[0:3]

        if respect_leap_years:
            doys = doy(month, day, year)
        else:
            doys = doy(month, day)

        df = pd.DataFrame()
        df['absolute'] = Ser
        df['doy'] = doys

        clim = pd.DataFrame({'climatology': climatology})

        df = df.join(clim, on='doy', how='left')

        anomaly = df['absolute'] - df['climatology']
        anomaly.index = df.index

        if return_clim:
            anomaly = pd.DataFrame({'anomaly': anomaly})
            anomaly['climatology'] = df['climatology']

    else:
        reference = moving_average(Ser, window_size=window_size)
        anomaly = Ser - reference

    return anomaly
Exemplo n.º 11
0
    def _read_gp(self,gpi,**kwargs):
        """
        reads the time series of the given grid point index. Masks frozen and snow observations
        if keywords are present
        
        Parameters
        ----------
        gpi : long
            grid point index
        mask_frozen_prob : int,optional
            if included in kwargs then all observations taken when 
            frozen probability > mask_frozen_prob are removed from the result 
        mask_snow_prob : int,optional
            if included in kwargs then all observations taken when 
            snow probability > mask_snow_prob are removed from the result 
			
        Returns
        -------
        df : pandas.DataFrame
            containing all fields in the list self.include_in_df 
            plus frozen_prob and snow_prob if a path to advisory flags was set during
            initialization
        """
        index = np.where(gpi == self.gpis)[0]
    
        cell = self.cells[index][0]
        
        gp_file = os.path.join(self.path,'%4d'%cell,self.gp_filename_template%gpi)
        
        if not os.path.exists(gp_file):
            print 'first time reading from cell %4d unzipping ...'%cell
            self.unzip_cell(cell)
        
        data = np.fromfile(gp_file,dtype=self.gp_filestruct)
        dates = data['DAT']
        
        datetime_parser = np.vectorize(self._datetime_arr)
        
        datetimes_correct = datetime_parser(dates)
    
        dict_df={}
    
        for into_df in self.include_in_df:
            d = np.ma.asarray(data[into_df],dtype=self.datatype[into_df])
            d = np.ma.masked_equal(d,self.nan_values[into_df])
            if self.scale_factor.has_key(into_df):
                d = d * self.scale_factor[into_df]
            dict_df[into_df] = d
                
        
        df = pd.DataFrame(dict_df,index=datetimes_correct)
        
        
        if self.include_advflags:
            adv_flags,topo,water = self.read_advisory_flags(gpi)
            
            if topo >= self.topo_threshold:
                warnings.warn("Warning gpi shows topographic complexity of %d %%. Data might not be usable."%topo)
            if water >= self.water_threshold:
                warnings.warn("Warning gpi shows water fraction of %d %%. Data might not be usable."%water)    
            
            df['doy'] = doy(df.index.month, df.index.day)
            df = df.join(adv_flags,on='doy',how='left')
            del df['doy']
            
            if 'mask_frozen_prob' in kwargs:
                mask_frozen = kwargs['mask_frozen_prob']
                df = df[df['frozen_prob']<=mask_frozen]
            
            if 'mask_snow_prob' in kwargs:
                mask_snow = kwargs['mask_snow_prob']
                df = df[df['snow_prob']<=mask_snow]
       
       
        lon,lat = self.gpi2lonlat(gpi)
        
        return df,gpi,lon,lat,cell    
Exemplo n.º 12
0
    def _read_gp(self, gpi, **kwargs):
        """
        reads the time series of the given grid point index. Masks frozen and snow observations
        if keywords are present

        Parameters
        ----------
        gpi : long
            grid point index
        mask_frozen_prob : int,optional
            if included in kwargs then all observations taken when
            frozen probability > mask_frozen_prob are removed from the result
        mask_snow_prob : int,optional
            if included in kwargs then all observations taken when
            snow probability > mask_snow_prob are removed from the result
        absolute_values : boolean, optional
            if True soil porosities from HWSD and GLDAS will be used to
            derive absolute values which will be available in the
            pandas.DataFrame in the columns
            'sm_por_gldas','sm_noise_por_gldas',
            'sm_por_hwsd','sm_noise_por_hwsd'

        Returns
        -------
        df : pandas.DataFrame
            containing all fields in the list self.include_in_df
            plus frozen_prob and snow_prob if a path to advisory flags was set during
            initialization
        gpi : long
            grid point index
        lon : float
            longitude
        lat : float
            latitude
        cell : int
            cell number
        topo : int
            topographic complexity
        wetland : int
            wetland fraction
        porosity : dict
            porosity values for 'gldas' and 'hwsd'
        """
        if not self.grid_info_loaded:
            self._load_grid_info()
        cell = self.grid.gpi2cell(gpi)
        if self.prev_cell != cell:
            # new cell - means new file object and new read bulk if
            # set
            ncfile = netCDF4.Dataset(
                os.path.join(self.path, self.netcdftemplate % cell), 'r')
            self.units = ncfile.variables['time'].units
            if self.read_bulk:
                self.variables = {}
                for var in ncfile.variables:
                    self.variables[var] = ncfile.variables[var][:]

                ncfile.close()
                ncfile = self

        gpi_index = np.where(ncfile.variables[self.loc_id][:] == gpi)[0]
        time_series_length = ncfile.variables[self.obs_var][gpi_index]
        startindex = np.sum(ncfile.variables[self.obs_var][:gpi_index])
        endindex = startindex + time_series_length
        timestamps = netCDF4.num2date(ncfile.variables['time'][startindex:endindex],
                                      self.units)
        dict_df = {}
        for into_df in self.include_in_df:
            d = ncfile.variables[into_df][startindex:endindex]
            dict_df[into_df] = d

        df = pd.DataFrame(dict_df, index=timestamps)

        # read porosity values
        porosity = {}
        for por_source in ['gldas', 'hwsd']:
            porosity[por_source] = ncfile.variables[
                'por_%s' % por_source][gpi_index][0]

        if 'absolute_values' in kwargs:

            if kwargs['absolute_values']:
                for por_source in ['gldas', 'hwsd']:
                    for el in self.to_absolute:
                        df['%s_por_%s' % (el, por_source)] = (
                            df[el] / 100.0) * (porosity[por_source])

        topo = ncfile.variables[self.topo_var][gpi_index][0]
        wetland = ncfile.variables[self.wetland_var][gpi_index][0]

        snow = np.squeeze(ncfile.variables[self.snow_var][gpi_index, :])
        # if data is not valid assume no snow
        if type(snow) == np.ma.masked_array:
            warnings.warn('Snow probabilities not valid, assuming no snow')
            snow = snow.filled(0)

        frozen = np.squeeze(ncfile.variables[self.frozen_var][gpi_index, :])
        # if data is not valid assume no freezing
        if type(frozen) == np.ma.masked_array:
            warnings.warn(
                'Frozen probabilities not valid, assuming no freezing')
            frozen = frozen.filled(0)

        adv_flags = pd.DataFrame({'snow_prob': snow,
                                  'frozen_prob': frozen})

        if topo >= self.topo_threshold:
            warnings.warn(
                "Warning gpi shows topographic complexity of %d %%. Data might not be usable." % topo)
        if wetland >= self.wetland_threshold:
            warnings.warn(
                "Warning gpi shows wetland fraction of %d %%. Data might not be usable." % wetland)

        df['doy'] = doy(df.index.month, df.index.day)
        df = df.join(adv_flags, on='doy', how='left')
        del df['doy']

        if 'mask_frozen_prob' in kwargs:
            mask_frozen = kwargs['mask_frozen_prob']
            df = df[df['frozen_prob'] <= mask_frozen]

        if 'mask_snow_prob' in kwargs:
            mask_snow = kwargs['mask_snow_prob']
            df = df[df['snow_prob'] <= mask_snow]

        lon, lat = self.grid.gpi2lonlat(gpi)

        if not self.read_bulk:
            ncfile.close()

        return df, gpi, lon, lat, cell, topo, wetland, porosity