Exemplo n.º 1
0
def aggregate(infile, outfile, reduction, variables=None, 
      agg_methods=rv.ReduceVar.REDUCE_MEAN, 
      agg_dim='days') : 
    """copy named variables and aggregate in the specified manner
    
    Copy infile to outfile, aggregating the named variables by the specified
    "reduction factor" using agg_methods to produce the representative value
    in "outfile"."""
    in_ds = nc.Dataset(infile)
    
    # if the user did not specify which variables to reduce, 
    # guess that they want everything except coordinate variables.
    if variables is None: 
        variables = list(in_ds.variables.keys())
        for d in in_ds.dimensions.keys() : 
            variables.remove(d)
        if 'nav_lat' in variables : 
            variables.remove('nav_lat')
        if 'nav_lon' in variables :
            variables.remove('nav_lon')
            
    # set up the "ReduceVar" aggregator
    # assume that all variables have same dimensions.
    v = in_ds.variables[variables[0]]
    variable_shape = v.shape
    variable_dims  = v.dimensions
    i_agg = variable_dims.index(agg_dim)
    if reduction == REDUCE_MONTHLY : 
        aggregator = rv.monthly_aggregator(variable_shape, i_agg) 
    else : 
        aggregator = rv.ReduceVar(variable_shape, i_agg, reduction)
        
    # figure out the shape of the output array 
    output_shape = list(variable_shape)
    output_shape[i_agg] = aggregator.reduced
    
    # create the output file
    out_agg = agg.NetCDFTemplate(infile, outfile)
    
    # don't let the template copy the "aggregate" dimension to the new file!
    out_agg.createDimension(agg_dim, aggregator.reduced)
    
    # copy the "navigation" variables
    out_agg.copyVariable('nav_lat')
    out_agg.copyVariable('nav_lon')
    
    # expand agg_methods if necessary
    if not isinstance(agg_methods, collections.Sequence) : 
        agg_methods = [agg_methods] * len(variables)

    # prepare an index to write the output
    out_slice = [ slice(None,None,None) ] * len(variable_shape)
    
    # loop over the variables        
    for varname, agg_method in zip(variables, agg_methods) : 
        v = in_ds.variables[varname]
        fill_value = getattr(v, '_FillValue', None)
        out_v = out_agg.create_variable(varname, v.dimensions, 
                       v.dtype, fill=fill_value)

        # loop over each reduced index        
        for reduced_i in range(aggregator.reduced) : 
            out_slice[i_agg] = reduced_i
            out_v[out_slice] = aggregator.reduce(agg_method, reduced_i, v)
            
    out_agg.close()
    in_ds.close()
Exemplo n.º 2
0
def aggregate(infile,
              outfile,
              reduction,
              variables=None,
              agg_methods=rv.ReduceVar.REDUCE_MEAN,
              agg_dim='days'):
    """copy named variables and aggregate in the specified manner
    
    Copy infile to outfile, aggregating the named variables by the specified
    "reduction factor" using agg_methods to produce the representative value
    in "outfile"."""
    in_ds = nc.Dataset(infile)

    # if the user did not specify which variables to reduce,
    # guess that they want everything except coordinate variables.
    if variables is None:
        variables = list(in_ds.variables.keys())
        for d in in_ds.dimensions.keys():
            variables.remove(d)
        if 'nav_lat' in variables:
            variables.remove('nav_lat')
        if 'nav_lon' in variables:
            variables.remove('nav_lon')

    # set up the "ReduceVar" aggregator
    # assume that all variables have same dimensions.
    v = in_ds.variables[variables[0]]
    variable_shape = v.shape
    variable_dims = v.dimensions
    i_agg = variable_dims.index(agg_dim)
    if reduction == REDUCE_MONTHLY:
        aggregator = rv.monthly_aggregator(variable_shape, i_agg)
    else:
        aggregator = rv.ReduceVar(variable_shape, i_agg, reduction)

    # figure out the shape of the output array
    output_shape = list(variable_shape)
    output_shape[i_agg] = aggregator.reduced

    # create the output file
    out_agg = agg.NetCDFTemplate(infile, outfile)

    # don't let the template copy the "aggregate" dimension to the new file!
    out_agg.createDimension(agg_dim, aggregator.reduced)

    # copy the "navigation" variables
    out_agg.copyVariable('nav_lat')
    out_agg.copyVariable('nav_lon')

    # expand agg_methods if necessary
    if not isinstance(agg_methods, collections.Sequence):
        agg_methods = [agg_methods] * len(variables)

    # prepare an index to write the output
    out_slice = [slice(None, None, None)] * len(variable_shape)

    # loop over the variables
    for varname, agg_method in zip(variables, agg_methods):
        v = in_ds.variables[varname]
        fill_value = getattr(v, '_FillValue', None)
        out_v = out_agg.create_variable(varname,
                                        v.dimensions,
                                        v.dtype,
                                        fill=fill_value)

        # loop over each reduced index
        for reduced_i in range(aggregator.reduced):
            out_slice[i_agg] = reduced_i
            out_v[out_slice] = aggregator.reduce(agg_method, reduced_i, v)

    out_agg.close()
    in_ds.close()
Exemplo n.º 3
0
def ba_compare_year(indicesfile, bafile, outfile=None, indicesvarnames= None, support=None, reduction=None) : 
    """collates the various indices with burned area counts"""

    # interesting variables from the indicesfile
    last_val_reduce = [ ] 
    if indicesvarnames is None : 
        indicesvarnames = ['gsi_avg','fm1000','fm100','fm10','fm1','dd','t_max']
    if 'dd' in indicesvarnames : 
        last_val_reduce = ['dd']

    indices = nc.Dataset(indicesfile)
    indicesvars = [indices.variables[v] for v in indicesvarnames]
    ba = nc.Dataset(bafile)
    count = ba.variables['count']

    if support is not None : 
        s = nc.Dataset(support)
        supportvars = list(s.variables.keys())
        supportvars.remove('land')
        indicesvarnames.extend(supportvars)
        indicesvars.extend([s.variables[v] for v in supportvars])
        last_val_reduce.extend(supportvars)
    
    # workaround: bug in index calculator does not calculate last day.
    time_samples = range(1,len(ba.dimensions['days']) - 1)
    if reduction is not None : 
        if reduction == REDUCE_MONTHLY  : 
            grid_reducer = rv.monthly_aggregator(count.shape, 3)
            cmp_reducer  = rv.monthly_aggregator(indicesvars[0].shape,0)
            grid_reducer.cutpoints[0]=1
            cmp_reducer.cutpoints[1]=1
        else : 
            grid_reducer = rv.ReduceVar(count.shape, 3, reduction)
            cmp_reducer  = rv.ReduceVar(indicesvars[0].shape, 0, reduction)
        time_samples = range(grid_reducer.reduced)

    ca = trend.CompressedAxes(indices, 'land')

    alldata = []
    days = [] 

    for i_time in time_samples : 
        day_data = [] 
        active_lc = [] 
        

        if reduction is None :
            count_slice = count[...,i_time]
        else :
            count_slice = np.array(grid_reducer.sum(i_time, count))
            
        for lc in range(len(ba.dimensions['landcover'])) : 
            
            # compress the count
            lc_count = ca.compress(count_slice[:,:,lc])
    
            # find nonzero counts
            i_nonzero = ma.nonzero(lc_count)
    
            if len(i_nonzero[0]) > 0 : 
                # construct dataframe for this landcover code
                lc_data = {"BA Count" : lc_count[i_nonzero]}
    
                for n,v in zip(indicesvarnames,indicesvars) : 
                    # reduce variable if necessary
                    if reduction is None: 
                        day_v = v[i_time,:]
                    else : 
                        # the last value of the dry day sequence is 
                        # representative of the reduced time period
                        if n in last_val_reduce : 
                            day_v = cmp_reducer.last_val(i_time, v)
                        else : 
                            day_v = cmp_reducer.mean(i_time, v)

                    # add a column for the current index    
                    lc_data[n] = day_v[i_nonzero]
    
                day_data.append( pd.DataFrame( lc_data ) )
                active_lc.append(ba.variables['landcover'][lc])
            
        if len(day_data) > 0 : 
            alldata.append(pd.concat(day_data, keys=active_lc)) 
            days.append(i_time)

    all_data_frame = pd.concat(alldata, keys=days)

    if outfile is not None : 
        all_data_frame.to_csv(outfile)

    return all_data_frame
Exemplo n.º 4
0
def ba_compare_year(indicesfile,
                    bafile,
                    outfile=None,
                    indicesvarnames=None,
                    support=None,
                    reduction=None):
    """collates the various indices with burned area counts"""

    # interesting variables from the indicesfile
    last_val_reduce = []
    if indicesvarnames is None:
        indicesvarnames = [
            'gsi_avg', 'fm1000', 'fm100', 'fm10', 'fm1', 'dd', 't_max'
        ]
    if 'dd' in indicesvarnames:
        last_val_reduce = ['dd']

    indices = nc.Dataset(indicesfile)
    indicesvars = [indices.variables[v] for v in indicesvarnames]
    ba = nc.Dataset(bafile)
    count = ba.variables['count']

    if support is not None:
        s = nc.Dataset(support)
        supportvars = list(s.variables.keys())
        supportvars.remove('land')
        indicesvarnames.extend(supportvars)
        indicesvars.extend([s.variables[v] for v in supportvars])
        last_val_reduce.extend(supportvars)

    # workaround: bug in index calculator does not calculate last day.
    time_samples = range(1, len(ba.dimensions['days']) - 1)
    if reduction is not None:
        if reduction == REDUCE_MONTHLY:
            grid_reducer = rv.monthly_aggregator(count.shape, 3)
            cmp_reducer = rv.monthly_aggregator(indicesvars[0].shape, 0)
            grid_reducer.cutpoints[0] = 1
            cmp_reducer.cutpoints[1] = 1
        else:
            grid_reducer = rv.ReduceVar(count.shape, 3, reduction)
            cmp_reducer = rv.ReduceVar(indicesvars[0].shape, 0, reduction)
        time_samples = range(grid_reducer.reduced)

    ca = trend.CompressedAxes(indices, 'land')

    alldata = []
    days = []

    for i_time in time_samples:
        day_data = []
        active_lc = []

        if reduction is None:
            count_slice = count[..., i_time]
        else:
            count_slice = np.array(grid_reducer.sum(i_time, count))

        for lc in range(len(ba.dimensions['landcover'])):

            # compress the count
            lc_count = ca.compress(count_slice[:, :, lc])

            # find nonzero counts
            i_nonzero = ma.nonzero(lc_count)

            if len(i_nonzero[0]) > 0:
                # construct dataframe for this landcover code
                lc_data = {"BA Count": lc_count[i_nonzero]}

                for n, v in zip(indicesvarnames, indicesvars):
                    # reduce variable if necessary
                    if reduction is None:
                        day_v = v[i_time, :]
                    else:
                        # the last value of the dry day sequence is
                        # representative of the reduced time period
                        if n in last_val_reduce:
                            day_v = cmp_reducer.last_val(i_time, v)
                        else:
                            day_v = cmp_reducer.mean(i_time, v)

                    # add a column for the current index
                    lc_data[n] = day_v[i_nonzero]

                day_data.append(pd.DataFrame(lc_data))
                active_lc.append(ba.variables['landcover'][lc])

        if len(day_data) > 0:
            alldata.append(pd.concat(day_data, keys=active_lc))
            days.append(i_time)

    all_data_frame = pd.concat(alldata, keys=days)

    if outfile is not None:
        all_data_frame.to_csv(outfile)

    return all_data_frame