Python Dataset示例，xray.Dataset Python示例

示例#1

0

显示文件

文件： netcdfio.py 项目： gorsol/soda

def dict_toxray(data, ds={}, **kwargs):
    """
    Converts a dictionary with keys as variable names to an
    xray.Dataset object

    The dictionary keys should correspond with variable names in
    ncmetadata.yaml

    **kwargs are passed directly to xray.DataArray()
    """

    for vv in list(data.keys()):

        if vv in ncmeta:
            attrs = ncmeta[vv]['attributes']
        else:
            print(
                'Warning variable: %s not in ncmetadata.yaml. Dataset will have no attrs'
            )
            attrs = {}

        da = xray.DataArray(data[vv], attrs=attrs, **kwargs)

        ds.update({vv: da})

    return xray.Dataset(ds)

示例#2

0

显示文件

def _generate_fake_points(components, statevar_dict, energy_limit, output,
                          maximum_internal_dof):
    """
    Generate points for a fictitious hyperplane used as a starting point for energy minimization.
    """
    coordinate_dict = {'component': components}
    coordinate_dict.update(
        {str(key): value
         for key, value in statevar_dict.items()})
    largest_energy = float(energy_limit)
    if largest_energy < 0:
        largest_energy *= 0.99
    else:
        largest_energy *= 1.01
    output_columns = [str(x) for x in statevar_dict.keys()] + ['points']
    statevar_shape = tuple(
        len(np.atleast_1d(x)) for x in statevar_dict.values())
    # The internal dof for the fake points are all NaNs
    expanded_points = np.full(
        statevar_shape + (len(components), maximum_internal_dof), np.nan)
    data_arrays = {
        'X':
        (output_columns + ['component'],
         broadcast_to(np.eye(len(components)),
                      statevar_shape + (len(components), len(components)))),
        'Y': (output_columns + ['internal_dof'], expanded_points),
        'Phase': (output_columns,
                  np.full(statevar_shape + (len(components), ),
                          '_FAKE_',
                          dtype='S6')),
        output: (output_columns,
                 np.full(statevar_shape + (len(components), ), largest_energy))
    }
    return xray.Dataset(data_arrays, coords=coordinate_dict)

示例#3

0

显示文件

文件： NetCDF4XrayWriter.py 项目： isaac-ped/ptsa_new

    def write(self, filename):

        dim_names_appended = map(lambda x: x, self.array.dims)

        values_dict = {'array': (tuple(dim_names_appended), self.array.values)}

        # axes
        coords_dict = {}
        coords_dict = {
            'axis_names': list(self.array.dims),
            '__version__': self._writer_version,
            '__writerclass__': self.__class__.__name__
        }

        for dim_name in self.array.dims:
            if self.array[
                    dim_name].dtype.fields is None:  # indicates simple type, not a recarray
                coords_dict['__axis__' +
                            dim_name] = self.array[dim_name].values
            else:
                for field, dtype_tuple in self.array[
                        dim_name].dtype.fields.items():
                    if self.array[dim_name].values[field].dtype.char == 'O':
                        print 'WE ARE NOT STORING VARIABLES OF TYPE=OBJECT IN THIS WRITER '
                        continue
                    dtype = dtype_tuple[0]
                    coords_dict['__axis__' + dim_name + '__' +
                                field] = self.array[dim_name].values[field]

        ds = xray.Dataset(values_dict, coords=coords_dict)

        ds.to_netcdf(filename)

示例#4

0

显示文件

def group_mean(ds_in, grouping_var, groups):
    '''Function taking mean across grouping variable, leaving other dimensions
    untouched. '''
    gr_ds = xray.Dataset()
    for gr in groups:
        gr_ds[gr] = ds_in.loc[{grouping_var: gr}].mean(grouping_var)
    return (gr_ds)

示例#5

0

显示文件

文件： field.py 项目： dham/parcels

    def write(self, filename, varname=None):
        """Write a :class:`Field` to a netcdf file

        :param filename: Basename of the file
        :param varname: Name of the field, to be appended to the filename"""
        filepath = str(path.local('%s%s.nc' % (filename, self.name)))
        if varname is None:
            varname = self.name
        # Derive name of 'depth' variable for NEMO convention
        vname_depth = 'depth%s' % self.name.lower()

        # Create DataArray objects for file I/O
        t, d, x, y = (self.time.size, self.depth.size, self.lon.size,
                      self.lat.size)
        nav_lon = xray.DataArray(self.lon + np.zeros((y, x), dtype=np.float32),
                                 coords=[('y', self.lat), ('x', self.lon)])
        nav_lat = xray.DataArray(self.lat.reshape(y, 1) +
                                 np.zeros(x, dtype=np.float32),
                                 coords=[('y', self.lat), ('x', self.lon)])
        vardata = xray.DataArray(self.data.reshape((t, d, y, x)),
                                 coords=[('time_counter', self.time),
                                         (vname_depth, self.depth),
                                         ('y', self.lat), ('x', self.lon)])
        # Create xray Dataset and output to netCDF format
        dset = xray.Dataset({varname: vardata},
                            coords={
                                'nav_lon': nav_lon,
                                'nav_lat': nav_lat
                            })
        dset.to_netcdf(filepath)

示例#6

0

显示文件

文件： Example_download_MesoWest_data_to_Netcdf.py 项目： joejoezz/AWNPy

    def combinevars(ds_in,dat_vars,new_dim_name='new_dim',combinevarname='new_var'):
        ds_out = xray.Dataset()
        ds_out = xray.concat([ds_in[dv] for dv in dat_vars],dim='new_dim')
        ds_out = ds_out.rename({'new_dim': new_dim_name})
        ds_out.coords[new_dim_name] = dat_vars
        ds_out.name = combinevarname

        return ds_out

示例#7

0

显示文件

 def test_unary(self):
     args = [0,
             np.zeros(2),
             xray.Variable(['x'], [0, 0]),
             xray.DataArray([0, 0], dims='x'),
             xray.Dataset({'y': ('x', [0, 0])})]
     for a in args:
         self.assertIdentical(a + 1, xu.cos(a))

示例#8

0

显示文件

 def download(self, i1, i2, j1, j2):
     uvel = self.xdataset_persist.u.isel(X=slice(i1, i2),
                                         Y=slice(j1, j2),
                                         Depth=0)
     vvel = self.ydataset_persist.v.isel(X=slice(i1, i2),
                                         Y=slice(j1, j2),
                                         Depth=0)
     ds = xray.Dataset()
     ds['u'] = uvel
     ds['v'] = vvel

示例#9

0

显示文件

    def to_xray(self):
        import xray

        das = {}
        for varname, unit in fields:
            x, t, val = self.xtargs(varname)
            das[varname] = xray.DataArray(val,
                                          coords=(t, x),
                                          dims=('time', 'x'))
            
        return xray.Dataset(das)

示例#10

0

显示文件

    def __init__(self, obj, group_coord, squeeze=True):
        """Create a GroupBy object

        Parameters
        ----------
        obj : Dataset or DataArray
            Object to group.
        group_coord : DataArray
            1-dimensional array with the group values.
        squeeze : boolean, optional
            If "group" is a coordinate of object, `squeeze` controls whether
            the subarrays have a dimension of length 1 along that coordinate or
            if the dimension is squeezed out.
        """
        if group_coord.ndim != 1:
            # TODO: remove this limitation?
            raise ValueError('`group_coord` must be 1 dimensional')

        self.obj = obj
        self.group_coord = group_coord
        self.group_dim, = group_coord.dimensions

        from .dataset import as_dataset
        expected_size = as_dataset(obj).dimensions[self.group_dim]
        if group_coord.size != expected_size:
            raise ValueError('the group variable\'s length does not '
                             'match the length of this variable along its '
                             'dimension')

        if group_coord.name in obj.dimensions:
            # assume that group_coord already has sorted, unique values
            if group_coord.dimensions != (group_coord.name, ):
                raise ValueError('`group_coord` is required to be a '
                                 'coordinate variable if `group_coord.name` '
                                 'is a dimension in `obj`')
            group_indices = np.arange(group_coord.size)
            if not squeeze:
                # group_indices = group_indices.reshape(-1, 1)
                # use slices to do views instead of fancy indexing
                group_indices = [slice(i, i + 1) for i in group_indices]
            unique_coord = group_coord
        else:
            # look through group_coord to find the unique values
            unique_values, group_indices = unique_value_groups(group_coord)
            # TODO: switch this to using the new DataArray constructor when we
            # get around to writing it:
            # unique_coord = xary.DataArray(unique_values, name=group_coord.name)
            variables = {group_coord.name: (group_coord.name, unique_values)}
            unique_coord = xray.Dataset(variables)[group_coord.name]

        self.group_indices = group_indices
        self.unique_coord = unique_coord
        self._groups = None

示例#11

0

显示文件

 def test_binary(self):
     args = [0,
             np.zeros(2),
             xray.Variable(['x'], [0, 0]),
             xray.DataArray([0, 0], dims='x'),
             xray.Dataset({'y': ('x', [0, 0])})]
     for n, t1 in enumerate(args):
         for t2 in args[n:]:
             self.assertIdentical(t2 + 1, xu.maximum(t1, t2 + 1))
             self.assertIdentical(t2 + 1, xu.maximum(t2, t1 + 1))
             self.assertIdentical(t2 + 1, xu.maximum(t1 + 1, t2))
             self.assertIdentical(t2 + 1, xu.maximum(t2 + 1, t1))

示例#12

0

显示文件

def main(inargs):
    """Run the program."""

    # Read the data
    dset_in = xray.open_dataset(inargs.infile)
    #gio.check_xrayDataset(dset_in, inargs.var)

    subset_dict = gio.get_subset_kwargs(inargs)
    darray = dset_in[inargs.var].sel(**subset_dict)

    assert darray.dims == ('time', 'latitude', 'longitude'), \
    "Order of the data must be time, latitude, longitude"

    # Generate datetime list
    dt_list, dt_list_metadata = get_datetimes(darray, inargs.date_file)

    # Calculate the composites
    if not inargs.date_file:
        inargs.no_sig = True
    cmeans, cmean_atts, pvals, pval_atts = calc_composites(
        darray, dt_list, sig_test=not inargs.no_sig)

    # Write the output file
    d = {}
    d['latitude'] = darray['latitude']
    d['longitude'] = darray['longitude']

    for season in season_months.keys():
        d[inargs.var + '_' + season] = (['latitude',
                                         'longitude'], cmeans[season])
        if not inargs.no_sig:
            d['p_' + season] = (['latitude', 'longitude'], pvals[season])

    dset_out = xray.Dataset(d)

    for season in season_months.keys():
        dset_out[inargs.var + '_' + season].attrs = cmean_atts[season]
        if not inargs.no_sig:
            dset_out['p_' + season].attrs = pval_atts[season]

    output_metadata = {
        inargs.infile: dset_in.attrs['history'],
    }
    if inargs.date_file:
        output_metadata[inargs.date_file] = dt_list_metadata

    gio.set_global_atts(dset_out, dset_in.attrs, output_metadata)
    dset_out.to_netcdf(inargs.outfile, format='NETCDF3_CLASSIC')

示例#13

0

显示文件

文件： modvsobs.py 项目： rustychris/soda

    def to_xray(self, attrs={}):
        """
        Converts both time series to an xray dataset object

        attrs = global attribute dictionary
        """

        OBS = self.TSobs.to_xray()
        MOD = self.TSmod.to_xray()

        varobs = '%s_obs' % self.varname
        varmod = '%s_mod' % self.varname

        return xray.Dataset({
                varobs:OBS,\
                varmod:MOD,\
            }, attrs=attrs)

示例#14

0

显示文件

文件： calc_wind_quantities.py 项目： pandasambit15/climate-analysis

def main(inargs):
    """Run the program."""

    # Read the data
    dset_in_u = xray.open_dataset(inargs.infileu)
    gio.check_xrayDataset(dset_in_u, inargs.varu)

    dset_in_v = xray.open_dataset(inargs.infilev)
    gio.check_xrayDataset(dset_in_v, inargs.varv)

    subset_dict = gio.get_subset_kwargs(inargs)

    darray_u = dset_in_u[inargs.varu].sel(**subset_dict)
    darray_v = dset_in_v[inargs.varv].sel(**subset_dict)

    lat_axis = darray_u['latitude'].values
    lon_axis = darray_u['longitude'].values
    axis_order = axis_letters(darray_u.dims)

    # Calculate the desired quantity
    data_out = calc_quantity(darray_u.values, darray_v.values, inargs.quantity,
                             lat_axis, lon_axis, axis_order)

    # Write the output file
    d = {}
    for dim in darray_u.dims:
        d[dim] = darray_u[dim]

    for var in data_out.keys():
        d[var] = (darray_u.dims, data_out[var])

    dset_out = xray.Dataset(d)

    for var in data_out.keys():
        dset_out[var].attrs = var_atts[var]

    outfile_metadata = {
        inargs.infileu: dset_in_u.attrs['history'],
        inargs.infilev: dset_in_v.attrs['history']
    }
    gio.set_global_atts(dset_out, dset_in_u.attrs, outfile_metadata)
    dset_out.to_netcdf(inargs.outfile, format='NETCDF3_CLASSIC')

示例#15

0

显示文件

文件： test_ufuncs.py 项目： scottza/xray

    def test_groupby(self):
        ds = xray.Dataset({'a': ('x', [0, 0, 0])}, {'c': ('x', [0, 0, 1])})
        ds_grouped = ds.groupby('c')
        group_mean = ds_grouped.mean('x')
        arr_grouped = ds['a'].groupby('c')

        self.assertIdentical(ds, xu.maximum(ds_grouped, group_mean))
        self.assertIdentical(ds, xu.maximum(group_mean, ds_grouped))

        self.assertIdentical(ds, xu.maximum(arr_grouped, group_mean))
        self.assertIdentical(ds, xu.maximum(group_mean, arr_grouped))

        self.assertIdentical(ds, xu.maximum(ds_grouped, group_mean['a']))
        self.assertIdentical(ds, xu.maximum(group_mean['a'], ds_grouped))

        self.assertIdentical(ds.a, xu.maximum(arr_grouped, group_mean.a))
        self.assertIdentical(ds.a, xu.maximum(group_mean.a, arr_grouped))

        with self.assertRaisesRegexp(TypeError, 'only support binary ops'):
            xu.maximum(ds.a.variable, ds_grouped)

示例#16

0

显示文件

def read_mds(host, tree, shot, signals=None):
    """ Connect to host and read the signals from the MDS tree of the specified
    shot.
    """
    assert isinstance(host, str)
    assert isinstance(tree, str)
    assert isinstance(shot, int)

    signals = signals if signals else _SIGNALS

    conn = mds.Connection(host)
    conn.openTree(tree, shot)

    darrays = {}
    for signal in signals:
        darrays[_name(signal)] = _read_one_signal(conn, signal)

    conn.closeTree(tree, shot)

    return xray.Dataset(darrays)

示例#17

0

显示文件

    month = int(s[5][4:6])
    day = int(s[5][6:8])

    hdf = SD(fpath, SDC.READ)
    sm = hdf.select("A_Soil_Moisture")
    tp = hdf.select("A_Land_Surface_Temp")

    if year != prev_year:
        if len(sm_data) > 0:
            sm_data = np.concatenate(sm_data, axis=2).astype('float32')
            tp_data = np.concatenate(tp_data, axis=2).astype('float32')
            sm_data[(sm_data == 9999) | (sm_data == -9999)] = np.nan
            tp_data[(tp_data == 9999) | (tp_data == -9999)] = np.nan
            dr_sm = xr.DataArray(sm_data, coords=[lats, lons, times], dims=['lat', 'lon', 'time'])
            dr_tp = xr.DataArray(tp_data, coords=[lats, lons, times], dims=['lat', 'lon', 'time'])
            ds = xr.Dataset(dict(temperature=dr_tp, soil_moisture=dr_sm))
            ds.to_netcdf(os.path.join(netcdf_dir, "amsr_daily_%i.nc" % prev_year))

        sm_data = []
        tp_data = []
        times = []

    sm_data += [sm[:][:,:,np.newaxis]]
    tp_data += [tp[:][:,:,np.newaxis]]
    times += [dt.datetime(year, month, day)]
    prev_year = year

sm_data = np.concatenate(sm_data, axis=2).astype('float32')
tp_data = np.concatenate(tp_data, axis=2).astype('float32')
sm_data[(sm_data == 9999) | (sm_data == -9999)] = np.nan
tp_data[(tp_data == 9999) | (tp_data == -9999)] = np.nan

示例#18

0

显示文件

文件： PWP.py 项目： gmaze/pwp_python_00

def run(met_data,
        prof_data,
        param_kwds=None,
        overwrite=True,
        diagnostics=False,
        suffix='',
        save_plots=False):

    #TODO: move this to the helper file
    """
    This is the main controller function for the model. The flow of the algorithm
    is as follows:
    
        1) Set model parameters (see set_params function in PWP_helper.py). 
        2) Read in forcing and initial profile data.
        3) Prepare forcing and profile data for model run (see prep_data in PWP_helper.py).
            3.1) Interpolate forcing data to prescribed time increments.
            3.2) Interpolate profile data to prescribed depth increments.
            3.3) Initialize model output variables.
        4) Iterate the PWP model specified time interval:
            4.1) apply heat and salt fluxes
            4.2) rotate, adjust to wind, rotate
            4.3) apply bulk Richardson number mixing
            4.4) apply gradient Richardson number mixing
            4.5) apply drag associated with internal wave dissipation
            4.5) apply diapycnal diffusion       
        5) Save results to output file
    
    Input: 
    met_data -  path to netCDF file containing forcing/meterological data. This file must be in the 
                input_data/ directory. 
                
                The data fields should include 'time', 'sw', 'lw', 'qlat', 'qsens', 'tx', 
                'ty', and 'precip'. These fields should store 1-D time series of the same 
                length. 
                
                The model expects positive heat flux values to represent ocean warming. The time
                data field should contain a 1-D array representing fraction of day. For example, 
                for 6 hourly data, met_data['time'] should contain a number series that increases
                in steps of 0.25, such as np.array([1.0, 1.25, 1.75, 2.0, 2.25...]).
    
                See https://github.com/earlew/pwp_python#input-data for more info about the
                expect intput data.
                  
    prof_data - path to netCDF file containing initial profile data. This must be in input_data/ directory.
                The fields of this dataset should include:
                ['z', 't', 's', 'lat']. These represent 1-D vertical profiles of temperature,
                salinity and density. 'lat' is expected to be a length=1 array-like object. e.g. 
                prof_data['lat'] = [25.0].
    
                See https://github.com/earlew/pwp_python#input-data for more info about the
                expect intput data.
    
    overwrite - controls the naming of output file. If True, the same filename is used for 
                every model run. If False, a unique time_stamp is generated and appended
                to the file name. Default is True.
                
    diagnostics - if True, the code will generate live plots of mixed layer properties at 
                each time step.
    
    suffix - string to add to the end of filenames. e.g. suffix = 'nodiff' leads to 'pwp_out_nodiff.nc.
            default is an empty string ''.

    save_plots -this gets passed on to the makeSomePlots() function in the PWP_helper. If True, the code
                saves the generated plots. Default is False.
                
    param_kwds -dict containing keyword arguments for set_params function. See PWP_helper.set_params()
                for more details. If None, default parameters are used. Default is None.
                
    Output:
    
    forcing, pwp_out = PWP.run()
    
    forcing: a dictionary containing the interpolated surface forcing.
    pwp_out: a dictionary containing the solutions generated by the model.
    
    This script also saves the following to file:
    
    'pwp_output.nc'- a netCDF containing the output generated by the model.
    'pwp_output.p' - a pickle file containing the output generated by the model.
    'forcing.p' - a pickle file containing the (interpolated) forcing used for the model run.
    If overwrite is set to False, a timestamp will be added to these file names.
    
    ------------------------------------------------------------------------------
    There are two ways to run the model:
    1.  You can run the model by typing "python PWP.py" from the bash command line. This
        will initiate this function with the set defaults. Typing "%run PWP" from the ipython 
        command line will do the same thing. 
        
    2.  You can also import the module then call the run() function specifically. For example,
        >> import PWP 
        >> forcing, pwp_out = PWP.run()
        Alternatively, if you want to change the defaults...
        >> forcing, pwp_out = PWP.run(met_data='new_forcing.nc', overwrite=False, diagnostics=False)
        
    This is a more interactive approach as it provides direct access to all of the model's
    subfunctions.
        
    """

    #close all figures
    plt.close('all')

    #start timer
    t0 = timeit.default_timer()

    ## Get surface forcing and profile data
    # These are x-ray datasets, but you can treat them as dicts.
    # Do met_dset.keys() to explore the data fields
    met_dset = xray.open_dataset('input_data/%s' % met_data)
    prof_dset = xray.open_dataset('input_data/%s' % prof_data)

    ## get model parameters and constants (read docs for set_params function)
    lat = prof_dset['lat']  #needed to compute internal wave dissipation
    if param_kwds is None:
        params = phf.set_params(lat=lat)
    else:
        param_kwds['lat'] = lat
        params = phf.set_params(**param_kwds)

    ## prep forcing and initial profile data for model run (see prep_data function for more details)
    forcing, pwp_out, params = phf.prep_data(met_dset, prof_dset, params)

    ## run the model
    pwp_out = pwpgo(forcing, params, pwp_out, diagnostics)

    #check timer
    tnow = timeit.default_timer()
    t_elapsed = (tnow - t0)
    print("Time elapsed: %i minutes and %i seconds" %
          (np.floor(t_elapsed / 60), t_elapsed % 60))

    ## write output to disk
    if overwrite:
        time_stamp = ''
    else:
        #use unique time stamp
        time_stamp = datetime.now().strftime("_%Y%m%d_%H%M")

    if len(suffix) > 0 and suffix[0] != '_':
        suffix = '_%s' % suffix

    # save output as netCDF file
    pwp_out_ds = xray.Dataset(
        {
            'temp': (['z', 'time'], pwp_out['temp']),
            'sal': (['z', 'time'], pwp_out['sal']),
            'uvel': (['z', 'time'], pwp_out['uvel']),
            'vvel': (['z', 'time'], pwp_out['vvel']),
            'dens': (['z', 'time'], pwp_out['dens']),
            'mld': (['time'], pwp_out['mld'])
        },
        coords={
            'z': pwp_out['z'],
            'time': pwp_out['time']
        })

    pwp_out_ds.to_netcdf("output/pwp_output%s%s.nc" % (suffix, time_stamp))

    # also output and forcing as pickle file
    pickle.dump(forcing,
                open("output/forcing%s%s.p" % (suffix, time_stamp), "wb"))
    pickle.dump(pwp_out,
                open("output/pwp_out%s%s.p" % (suffix, time_stamp), "wb"))

    ## do analysis of the results
    phf.makeSomePlots(forcing, pwp_out, suffix=suffix, save_plots=save_plots)

    return forcing, pwp_out

示例#19

0

显示文件

    def bias_correction(self, obs, modeled, obs_var, modeled_var, njobs=1):
        """
        Parameters
        ---------------------------------------------------------------
        obs: :py:class:`~xarray.DataArray`, required
            A baseline gridded low resolution observed dataset. This should include
            high quality gridded observations. lat and lon are expected as dimensions.
        modeled: :py:class:`~xarray.DataArray`, required
            A gridded low resolution climate variable to be bias corrected. This may include
            reanalysis or GCM datasets. It is recommended that the lat and lon dimensions 
            match are very similar to obs.
        obs_var: str, required
            The variable name in dataset obs which to model
        modeled_var: str, required
            The variable name in Dataset modeled which to bias correct
        njobs: int, optional
            The number of processes to execute in parallel
        """
        # Select intersecting time perids
        d1 = obs.time.values
        d2 = modeled.time.values
        intersection = np.intersect1d(d1, d2)
        obs = obs.loc[dict(time=intersection)]
        modeled = modeled.loc[dict(time=intersection)]

        dayofyear = obs['time.dayofyear']
        lat_vals = modeled.lat.values
        lon_vals = modeled.lon.values

        # initialize the output data array
        mapped_data = np.zeros(shape=(intersection.shape[0], lat_vals.shape[0],
                                      lon_vals.shape[0]))
        # loop through each day of the year, 1 to 366
        for day in np.unique(dayofyear.values):
            if day % 100 == 0:
                print "Day = %i" % day
            # select days +- pool
            dayrange = (np.arange(day - self.pool, day + self.pool + 1) +
                        366) % 366 + 1
            days = np.in1d(dayofyear, dayrange)
            subobs = obs.loc[dict(time=days)]
            submodeled = modeled.loc[dict(time=days)]

            # which rows correspond to these days
            sub_curr_day_rows = np.where(
                day == subobs['time.dayofyear'].values)[0]
            curr_day_rows = np.where(day == obs['time.dayofyear'].values)[0]
            train_num = np.where(
                subobs['time.year'] <= self.max_train_year)[0][-1]
            mapped_times = subobs['time'].values[sub_curr_day_rows]

            jobs = []  # list to collect jobs
            for i, lat in enumerate(lat_vals):
                X_lat = subobs.sel(lat=lat, lon=lon_vals,
                                   method='nearest')[obs_var].values
                Y_lat = submodeled.sel(lat=lat,
                                       lon=lon_vals)[modeled_var].values
                jobs.append(
                    delayed(mapper)(X_lat, Y_lat, train_num, self.step))

            if day % 100 == 0:
                print "Running jobs", len(jobs)
            # select only those days which correspond to the current day of the year
            day_mapped = np.asarray(
                Parallel(n_jobs=njobs)(jobs))[:, sub_curr_day_rows]
            day_mapped = np.swapaxes(day_mapped, 0, 1)
            mapped_data[curr_day_rows, :, :] = day_mapped

        # put data into a data array
        dr = xray.DataArray(mapped_data,
                            coords=[obs['time'].values, lat_vals, lon_vals],
                            dims=['time', 'lat', 'lon'])
        dr.attrs['gridtype'] = 'latlon'
        ds = xray.Dataset({'bias_corrected': dr})
        ds = ds.reindex_like(modeled)
        modeled = modeled.merge(
            ds)  # merging aids in preserving netcdf structure
        # delete modeled variable to save space
        del modeled[modeled_var]
        return modeled

示例#20

0

显示文件

文件： equilibrium.py 项目： manirm/pycalphad

def equilibrium(dbf, comps, phases, conditions, **kwargs):
    """
    Calculate the equilibrium state of a system containing the specified
    components and phases, under the specified conditions.
    Model parameters are taken from 'dbf'.

    Parameters
    ----------
    dbf : Database
        Thermodynamic database containing the relevant parameters.
    comps : list
        Names of components to consider in the calculation.
    phases : list or dict
        Names of phases to consider in the calculation.
    conditions : dict or (list of dict)
        StateVariables and their corresponding value.
    verbose : bool, optional (Default: True)
        Show progress of calculations.
    grid_opts : dict, optional
        Keyword arguments to pass to the initial grid routine.

    Returns
    -------
    Structured equilibrium calculation.

    Examples
    --------
    None yet.
    """
    active_phases = unpack_phases(phases) or sorted(dbf.phases.keys())
    comps = sorted(comps)
    indep_vars = ['T', 'P']
    grid_opts = kwargs.pop('grid_opts', dict())
    verbose = kwargs.pop('verbose', True)
    phase_records = dict()
    callable_dict = kwargs.pop('callables', dict())
    grad_callable_dict = kwargs.pop('grad_callables', dict())
    hess_callable_dict = kwargs.pop('hess_callables', dict())
    points_dict = dict()
    maximum_internal_dof = 0
    conds = OrderedDict((key, unpack_condition(value))
                        for key, value in sorted(conditions.items(), key=str))
    str_conds = OrderedDict((str(key), value) for key, value in conds.items())
    indep_vals = list([float(x) for x in np.atleast_1d(val)]
                      for key, val in str_conds.items() if key in indep_vars)
    components = [x for x in sorted(comps) if not x.startswith('VA')]
    # Construct models for each phase; prioritize user models
    models = unpack_kwarg(kwargs.pop('model', Model), default_arg=Model)
    if verbose:
        print('Components:', ' '.join(comps))
        print('Phases:', end=' ')
    for name in active_phases:
        mod = models[name]
        if isinstance(mod, type):
            models[name] = mod = mod(dbf, comps, name)
        variables = sorted(mod.energy.atoms(v.StateVariable).union(
            {key
             for key in conditions.keys() if key in [v.T, v.P]}),
                           key=str)
        site_fracs = sorted(mod.energy.atoms(v.SiteFraction), key=str)
        maximum_internal_dof = max(maximum_internal_dof, len(site_fracs))
        # Extra factor '1e-100...' is to work around an annoying broadcasting bug for zero gradient entries
        #models[name].models['_broadcaster'] = 1e-100 * Mul(*variables) ** 3
        out = models[name].energy
        undefs = list(out.atoms(Symbol) - out.atoms(v.StateVariable))
        for undef in undefs:
            out = out.xreplace({undef: float(0)})
        callable_dict[name], grad_callable_dict[name], hess_callable_dict[name] = \
            build_functions(out, [v.P, v.T] + site_fracs)

        # Adjust gradient by the approximate chemical potentials
        hyperplane = Add(*[
            v.MU(i) * mole_fraction(dbf.phases[name], comps, i) for i in comps
            if i != 'VA'
        ])
        plane_obj, plane_grad, plane_hess = build_functions(
            hyperplane, [v.MU(i) for i in comps if i != 'VA'] + site_fracs)
        phase_records[name.upper()] = PhaseRecord(
            variables=variables,
            grad=grad_callable_dict[name],
            hess=hess_callable_dict[name],
            plane_grad=plane_grad,
            plane_hess=plane_hess)
        if verbose:
            print(name, end=' ')
    if verbose:
        print('[done]', end='\n')

    # 'calculate' accepts conditions through its keyword arguments
    grid_opts.update(
        {key: value
         for key, value in str_conds.items() if key in indep_vars})
    if 'pdens' not in grid_opts:
        grid_opts['pdens'] = 100

    coord_dict = str_conds.copy()
    coord_dict['vertex'] = np.arange(len(components))
    grid_shape = np.meshgrid(*coord_dict.values(), indexing='ij',
                             sparse=False)[0].shape
    coord_dict['component'] = components
    if verbose:
        print('Computing initial grid', end=' ')

    grid = calculate(dbf,
                     comps,
                     active_phases,
                     output='GM',
                     model=models,
                     callables=callable_dict,
                     fake_points=True,
                     **grid_opts)

    if verbose:
        print('[{0} points, {1}]'.format(len(grid.points),
                                         sizeof_fmt(grid.nbytes)),
              end='\n')

    properties = xray.Dataset(
        {
            'NP': (list(str_conds.keys()) + ['vertex'], np.empty(grid_shape)),
            'GM': (list(str_conds.keys()), np.empty(grid_shape[:-1])),
            'MU':
            (list(str_conds.keys()) + ['component'], np.empty(grid_shape)),
            'points': (list(str_conds.keys()) + ['vertex'],
                       np.empty(grid_shape, dtype=np.int))
        },
        coords=coord_dict,
        attrs={'iterations': 1},
    )
    # Store the potentials from the previous iteration
    current_potentials = properties.MU.copy()

    for iteration in range(MAX_ITERATIONS):
        if verbose:
            print('Computing convex hull [iteration {}]'.format(
                properties.attrs['iterations']))
        # lower_convex_hull will modify properties
        lower_convex_hull(grid, properties)
        progress = np.abs(current_potentials - properties.MU).values
        converged = (progress < MIN_PROGRESS).all(axis=-1)
        if verbose:
            print('progress', progress.max(),
                  '[{} conditions updated]'.format(np.sum(~converged)))
        if progress.max() < MIN_PROGRESS:
            if verbose:
                print('Convergence achieved')
            break
        current_potentials[...] = properties.MU.values
        if verbose:
            print('Refining convex hull')
        # Insert extra dimensions for non-T,P conditions so GM broadcasts correctly
        energy_broadcast_shape = grid.GM.values.shape[:len(indep_vals)] + \
            (1,) * (len(str_conds) - len(indep_vals)) + (grid.GM.values.shape[-1],)
        driving_forces = np.einsum('...i,...i',
                                   properties.MU.values[..., np.newaxis, :].astype(np.float),
                                   grid.X.values[np.index_exp[...] +
                                                 (np.newaxis,) * (len(str_conds) - len(indep_vals)) +
                                                 np.index_exp[:, :]].astype(np.float)) - \
            grid.GM.values.view().reshape(energy_broadcast_shape)

        for name in active_phases:
            dof = len(models[name].energy.atoms(v.SiteFraction))
            current_phase_indices = (grid.Phase.values == name
                                     ).reshape(energy_broadcast_shape[:-1] +
                                               (-1, ))
            # Broadcast to capture all conditions
            current_phase_indices = np.broadcast_arrays(
                current_phase_indices, np.empty(driving_forces.shape))[0]
            # This reshape is safe as long as phases have the same number of points at all indep. conditions
            current_phase_driving_forces = driving_forces[
                current_phase_indices].reshape(
                    current_phase_indices.shape[:-1] + (-1, ))
            # Note: This works as long as all points are in the same phase order for all T, P
            current_site_fractions = grid.Y.values[..., current_phase_indices[
                (0, ) * len(str_conds)], :]
            if np.sum(
                    current_site_fractions[(0, ) *
                                           len(indep_vals)][..., :dof]) == dof:
                # All site fractions are 1, aka zero internal degrees of freedom
                # Impossible to refine these points, so skip this phase
                points_dict[name] = current_site_fractions[
                    (0, ) * len(indep_vals)][..., :dof]
                continue
            # Find the N points with largest driving force for a given set of conditions
            # Remember that driving force has a sign, so we want the "most positive" values
            # N is the number of components, in this context
            # N points define a 'best simplex' for every set of conditions
            # We also need to restrict ourselves to one phase at a time
            trial_indices = np.argpartition(current_phase_driving_forces,
                                            -len(components),
                                            axis=-1)[..., -len(components):]
            trial_indices = trial_indices.ravel()
            statevar_indices = np.unravel_index(
                np.arange(
                    np.multiply.reduce(properties.GM.values.shape +
                                       (len(components), ))),
                properties.GM.values.shape +
                (len(components), ))[:len(indep_vals)]
            points = current_site_fractions[np.index_exp[statevar_indices +
                                                         (trial_indices, )]]
            points.shape = properties.points.shape[:-1] + (
                -1, maximum_internal_dof)
            # The Y arrays have been padded, so we should slice off the padding
            points = points[..., :dof]
            #print('Starting points shape: ', points.shape)
            #print(points)
            if len(points) == 0:
                if name in points_dict:
                    del points_dict[name]
                # No nearly stable points: skip this phase
                continue

            num_vars = len(phase_records[name].variables)
            plane_grad = phase_records[name].plane_grad
            plane_hess = phase_records[name].plane_hess
            statevar_grid = np.meshgrid(*itertools.chain(indep_vals),
                                        sparse=True,
                                        indexing='ij')
            # TODO: A more sophisticated treatment of constraints
            num_constraints = len(dbf.phases[name].sublattices)
            constraint_jac = np.zeros(
                (num_constraints, num_vars - len(indep_vars)))
            # Independent variables are always fixed (in this limited implementation)
            #for idx in range(len(indep_vals)):
            #    constraint_jac[idx, idx] = 1
            # This is for site fraction balance constraints
            var_idx = 0  #len(indep_vals)
            for idx in range(len(dbf.phases[name].sublattices)):
                active_in_subl = set(
                    dbf.phases[name].constituents[idx]).intersection(comps)
                constraint_jac[idx, var_idx:var_idx + len(active_in_subl)] = 1
                var_idx += len(active_in_subl)

            newton_iteration = 0
            while newton_iteration < MAX_NEWTON_ITERATIONS:
                flattened_points = points.reshape(
                    points.shape[:len(indep_vals)] + (-1, points.shape[-1]))
                grad_args = itertools.chain(
                    [i[..., None] for i in statevar_grid], [
                        flattened_points[..., i]
                        for i in range(flattened_points.shape[-1])
                    ])
                grad = np.array(phase_records[name].grad(*grad_args),
                                dtype=np.float)
                # Remove derivatives wrt T,P
                grad = grad[..., len(indep_vars):]
                grad.shape = points.shape
                grad[np.isnan(grad).any(
                    axis=-1
                )] = 0  # This is necessary for gradients on the edge of space
                hess_args = itertools.chain(
                    [i[..., None] for i in statevar_grid], [
                        flattened_points[..., i]
                        for i in range(flattened_points.shape[-1])
                    ])
                hess = np.array(phase_records[name].hess(*hess_args),
                                dtype=np.float)
                # Remove derivatives wrt T,P
                hess = hess[..., len(indep_vars):, len(indep_vars):]
                hess.shape = points.shape + (hess.shape[-1], )
                hess[np.isnan(hess).any(axis=(-2,
                                              -1))] = np.eye(hess.shape[-1])
                plane_args = itertools.chain([
                    properties.MU.values[..., i][..., None]
                    for i in range(properties.MU.shape[-1])
                ], [points[..., i] for i in range(points.shape[-1])])
                cast_grad = np.array(plane_grad(*plane_args), dtype=np.float)
                # Remove derivatives wrt chemical potentials
                cast_grad = cast_grad[..., properties.MU.shape[-1]:]
                grad = grad - cast_grad
                plane_args = itertools.chain([
                    properties.MU.values[..., i][..., None]
                    for i in range(properties.MU.shape[-1])
                ], [points[..., i] for i in range(points.shape[-1])])
                cast_hess = np.array(plane_hess(*plane_args), dtype=np.float)
                # Remove derivatives wrt chemical potentials
                cast_hess = cast_hess[..., properties.MU.shape[-1]:,
                                      properties.MU.shape[-1]:]
                cast_hess = -cast_hess + hess
                hess = cast_hess.astype(np.float, copy=False)
                try:
                    e_matrix = np.linalg.inv(hess)
                except np.linalg.LinAlgError:
                    print(hess)
                    raise
                current = calculate(
                    dbf,
                    comps,
                    name,
                    output='GM',
                    model=models,
                    callables=callable_dict,
                    fake_points=False,
                    points=points.reshape(points.shape[:len(indep_vals)] +
                                          (-1, points.shape[-1])),
                    **grid_opts)
                current_plane = np.multiply(
                    current.X.values.reshape(points.shape[:-1] +
                                             (len(components), )),
                    properties.MU.values[..., np.newaxis, :]).sum(axis=-1)
                current_df = current.GM.values.reshape(
                    points.shape[:-1]) - current_plane
                #print('Inv hess check: ', np.isnan(e_matrix).any())
                #print('grad check: ', np.isnan(grad).any())
                dy_unconstrained = -np.einsum('...ij,...j->...i', e_matrix,
                                              grad)
                #print('dy_unconstrained check: ', np.isnan(dy_unconstrained).any())
                proj_matrix = np.dot(e_matrix, constraint_jac.T)
                inv_matrix = np.rollaxis(np.dot(constraint_jac, proj_matrix),
                                         0, -1)
                inv_term = np.linalg.inv(inv_matrix)
                #print('inv_term check: ', np.isnan(inv_term).any())
                first_term = np.einsum('...ij,...jk->...ik', proj_matrix,
                                       inv_term)
                #print('first_term check: ', np.isnan(first_term).any())
                # Normally a term for the residual here
                # We only choose starting points which obey the constraints, so r = 0
                cons_summation = np.einsum('...i,...ji->...j',
                                           dy_unconstrained, constraint_jac)
                #print('cons_summation check: ', np.isnan(cons_summation).any())
                cons_correction = np.einsum('...ij,...j->...i', first_term,
                                            cons_summation)
                #print('cons_correction check: ', np.isnan(cons_correction).any())
                dy_constrained = dy_unconstrained - cons_correction
                #print('dy_constrained check: ', np.isnan(dy_constrained).any())
                # TODO: Support for adaptive changing independent variable steps
                new_direction = dy_constrained
                #print('new_direction', new_direction)
                #print('points', points)
                # Backtracking line search
                if np.isnan(new_direction).any():
                    print('new_direction', new_direction)
                #print('Convergence angle:', -(grad*new_direction).sum(axis=-1) / (np.linalg.norm(grad, axis=-1) * np.linalg.norm(new_direction, axis=-1)))
                new_points = points + INITIAL_STEP_SIZE * new_direction
                alpha = np.full(new_points.shape[:-1],
                                INITIAL_STEP_SIZE,
                                dtype=np.float)
                alpha[np.all(np.linalg.norm(new_direction, axis=-1) <
                             MIN_DIRECTION_NORM,
                             axis=-1)] = 0
                negative_points = np.any(new_points < 0., axis=-1)
                while np.any(negative_points):
                    alpha[negative_points] *= 0.5
                    new_points = points + alpha[...,
                                                np.newaxis] * new_direction
                    negative_points = np.any(new_points < 0., axis=-1)
                # Backtracking line search
                # alpha now contains maximum possible values that keep us inside the space
                # but we don't just want to take the biggest step; we want the biggest step which reduces energy
                new_points = new_points.reshape(
                    new_points.shape[:len(indep_vals)] +
                    (-1, new_points.shape[-1]))
                candidates = calculate(dbf,
                                       comps,
                                       name,
                                       output='GM',
                                       model=models,
                                       callables=callable_dict,
                                       fake_points=False,
                                       points=new_points,
                                       **grid_opts)
                candidate_plane = np.multiply(
                    candidates.X.values.reshape(points.shape[:-1] +
                                                (len(components), )),
                    properties.MU.values[..., np.newaxis, :]).sum(axis=-1)
                energy_diff = (candidates.GM.values.reshape(
                    new_direction.shape[:-1]) - candidate_plane) - current_df
                new_points.shape = new_direction.shape
                bad_steps = energy_diff > alpha * 1e-4 * (new_direction *
                                                          grad).sum(axis=-1)
                backtracking_iterations = 0
                while np.any(bad_steps):
                    alpha[bad_steps] *= 0.5
                    new_points = points + alpha[...,
                                                np.newaxis] * new_direction
                    #print('new_points', new_points)
                    #print('bad_steps', bad_steps)
                    new_points = new_points.reshape(
                        new_points.shape[:len(indep_vals)] +
                        (-1, new_points.shape[-1]))
                    candidates = calculate(dbf,
                                           comps,
                                           name,
                                           output='GM',
                                           model=models,
                                           callables=callable_dict,
                                           fake_points=False,
                                           points=new_points,
                                           **grid_opts)
                    candidate_plane = np.multiply(
                        candidates.X.values.reshape(points.shape[:-1] +
                                                    (len(components), )),
                        properties.MU.values[..., np.newaxis, :]).sum(axis=-1)
                    energy_diff = (candidates.GM.values.reshape(
                        new_direction.shape[:-1]) -
                                   candidate_plane) - current_df
                    #print('energy_diff', energy_diff)
                    new_points.shape = new_direction.shape
                    bad_steps = energy_diff > alpha * 1e-4 * (
                        new_direction * grad).sum(axis=-1)
                    backtracking_iterations += 1
                    if backtracking_iterations > MAX_BACKTRACKING:
                        break
                biggest_step = np.max(
                    np.linalg.norm(new_points - points, axis=-1))
                if biggest_step < 1e-2:
                    if verbose:
                        print('N-R convergence on mini-iteration',
                              newton_iteration, '[{}]'.format(name))
                    points = new_points
                    break
                if verbose:
                    #print('Biggest step:', biggest_step)
                    #print('points', points)
                    #print('grad of points', grad)
                    #print('new_direction', new_direction)
                    #print('alpha', alpha)
                    #print('new_points', new_points)
                    pass
                points = new_points
                newton_iteration += 1
            new_points = points.reshape(points.shape[:len(indep_vals)] +
                                        (-1, points.shape[-1]))
            new_points = np.concatenate(
                (current_site_fractions[..., :dof], new_points), axis=-2)
            points_dict[name] = new_points

        if verbose:
            print('Rebuilding grid', end=' ')
        grid = calculate(dbf,
                         comps,
                         active_phases,
                         output='GM',
                         model=models,
                         callables=callable_dict,
                         fake_points=True,
                         points=points_dict,
                         **grid_opts)
        if verbose:
            print('[{0} points, {1}]'.format(len(grid.points),
                                             sizeof_fmt(grid.nbytes)),
                  end='\n')
        properties.attrs['iterations'] += 1

    # One last call to ensure 'properties' and 'grid' are consistent with one another
    lower_convex_hull(grid, properties)
    ravelled_X_view = grid['X'].values.view().reshape(
        -1, grid['X'].values.shape[-1])
    ravelled_Y_view = grid['Y'].values.view().reshape(
        -1, grid['Y'].values.shape[-1])
    ravelled_Phase_view = grid['Phase'].values.view().reshape(-1)
    # Copy final point values from the grid and drop the index array
    # For some reason direct construction doesn't work. We have to create empty and then assign.
    properties['X'] = xray.DataArray(
        np.empty_like(ravelled_X_view[properties['points'].values]),
        dims=properties['points'].dims + ('component', ))
    properties['X'].values[...] = ravelled_X_view[properties['points'].values]
    properties['Y'] = xray.DataArray(
        np.empty_like(ravelled_Y_view[properties['points'].values]),
        dims=properties['points'].dims + ('internal_dof', ))
    properties['Y'].values[...] = ravelled_Y_view[properties['points'].values]
    # TODO: What about invariant reactions? We should perform a final driving force calculation here.
    # We can handle that in the same post-processing step where we identify single-phase regions.
    properties['Phase'] = xray.DataArray(np.empty_like(
        ravelled_Phase_view[properties['points'].values]),
                                         dims=properties['points'].dims)
    properties['Phase'].values[...] = ravelled_Phase_view[
        properties['points'].values]
    del properties['points']
    return properties

示例#21

0

显示文件

文件： calc_dfm_is.py 项目： dgergel/IS_snowfire

    rhmax = constrain_dataset(rhmax, operator.le, 100, 100)
    print("entering iteration loop")
    tmois, fm1000_rh[day, :, :], fm100_rh[day, :, :], bv = calc_fm100_fm1000(
        x, pptdur.isel(time=day), rhmax, rhmin,
        kelvin_to_fahrenheit(tmax['air_temp_max'].isel(time=day)),
        kelvin_to_fahrenheit(tmin['air_temp_min'].isel(time=day)), lats, tmois,
        bv, julians[day], ymc)

    ymc = fm100_rh[day, :, :]
    print(day)

print("finished iteration loop")

# CONSTRUCT DATASET

ds = xray.Dataset()
lon_da = xray.DataArray(tmax.lon,
                        dims=('longitude', ),
                        name='longitude',
                        attrs={'long_name': 'longitude coordinate'})
lat_da = xray.DataArray(tmax.lat,
                        dims=('latitude', ),
                        name='latitude',
                        attrs={'long_name': 'latitude coordinate'})
ds['fm100'] = xray.DataArray(fm100_rh,
                             dims=('latitude', 'longitude'),
                             name='fm100',
                             coords={
                                 'latitude': lat_da,
                                 'longitude': lon_da
                             },

示例#22

0

显示文件

    df['year'] = s_storage.index.year
    df['month'] = s_storage.index.month
    df['day'] = s_storage.index.day
    df['storage_acre_ft'] = s_storage.values
    df[['year', 'month', 'day', 'storage_acre_ft']].\
            to_csv('{}.storage.dam{}.txt'.format(cfg['OUTPUT']['out_flow_basepath'], \
                                                 dam_number), \
            sep='\t', index=False)
    
#====================================================================#
# Save modified streamflow to netCDF file
#====================================================================#

#=== Save modified streamflow ===#
ds_flow_new = xray.Dataset({'streamflow': (['time', 'lat', 'lon'], da_flow.values)}, \
                           coords={'lat': (['lat'], ds_rvic['lat'].values), \
                                   'lon': (['lon'], ds_rvic['lon'].values), \
                                   'time': (['time'], da_flow['time'].values)})
ds_flow_new['streamflow'].attrs['units'] = 'cfs'
ds_flow_new['streamflow'].attrs['long_name'] = 'Simulated regulated streamflow'

ds_flow_new.to_netcdf('{}.modified_flow.nc'.format(cfg['OUTPUT']['out_flow_basepath']), \
                      format='NETCDF4_CLASSIC')
ds_flow_new.close()

#=== Save flow change before and after reservoir operation ===#
ds_flow_delta = xray.Dataset({'flow_delta': (['time', 'lat', 'lon'], \
                                             da_flow.values-da_rvic_flow.values)}, \
                             coords={'lat': (['lat'], ds_rvic['lat'].values), \
                                     'lon': (['lon'], ds_rvic['lon'].values), \
                                     'time': (['time'], da_flow['time'].values)})
ds_flow_delta['flow_delta'].attrs['units'] = 'cfs'

示例#23

0

显示文件

文件： EKE_calc.py 项目： wmingch1992/eventTracking

uvindir = '/home/disk/eos4/rachel/Obs/ERAI/uv'
startyr = 1998
endyr = 2015
for iyear in range(startyr, endyr):
    uvfile = xr.open_mfdataset(uvindir + '/interim_daily_' + str(iyear) +
                               '*.grb')
    ulev, vlev = uvfile['u'], uvfile['v']
    udash = ulev - ulev.mean(dim='longitude')
    vdash = vlev - vlev.mean(dim='longitude')
    EKEall = 0.5 * ((ulev * ulev) + (vlev * vlev))
    EKEyears = EKEall.groupby('time.month').sum(dim='time')
    EKEyears = EKEyears.rename({'month': 'time'})
    EKEyears = EKEyears.rename({'latitude': 'lat'})
    EKEyears = EKEyears.rename({'longitude': 'lon'})
    EKEds = xr.Dataset({'EKE': EKEyears})
    EKEds.to_netcdf(uvindir + '/EKE_' + str(iyear) + '.nc', mode='w')

uvindir = '/home/disk/eos4/rachel/Obs/ERAI/uv'
startyr = 1998
endyr = 2015
for iyear in range(startyr, endyr):
    for imonth in range(0, 12):
        uvfile = xr.open_mfdataset(uvindir + '/interim_daily_' + str(iyear) +
                                   '{:02d}'.format(imonth + 1) + '.grb')
        ulev, vlev = uvfile['u'], uvfile['v']
        udash = ulev - ulev.mean(dim='time')
        vdash = vlev - vlev.mean(dim='time')
        EKEall = 0.5 * ((udash * udash) + (vdash * vdash))
        EKEmonth = EKEall.mean(dim='time')
        EKEmonth = EKEmonth.rename({'latitude': 'lat'})

示例#24

0

显示文件

def write_parcel_output(filename=None,
                        format=None,
                        parcel=None,
                        parcel_df=None,
                        aerosol_dfs=None,
                        other_dfs=None):
    """ Write model output to disk.

    Wrapper for methods to write parcel model output to disk.

    Parameters
    ----------
    filename : str 
        Full filename to write output; if not supplied, will default
        to the current timestamp
    format : str 
        Format to use from ``OUTPUT_FORMATS``; must be supplied if no
        filename is provided
    parcel : ParcelModel
        A ParcelModel which has already been integrated at least once
    parcel_df : DataFrame
        Model thermodynamic history
    aerosol_dfs : Panel
        Aerosol size history
    other_dfs : list of DataFrames 
        Additional DataFrames to include in output; must have the same index
        as the parcel's results when transformed to a DataFrame!

    """

    if not filename:
        if not format:
            raise ParcelModelError("Must supply either a filename or format.")
        if not (format in OUTPUT_FORMATS):
            raise ParcelModelError("Please supply a format from %r" %
                                   OUTPUT_FORMATS)
        basename = get_timestamp()
        extension = format
    else:
        basename, extension = os.path.splitext(filename)
        extension = extension[1:]  # strip '.'
        if not (extension in OUTPUT_FORMATS):
            extension = format = 'obj'
        else:
            format = extension

    if parcel.console:
        print()
        print("Saving output to %s format with base filename %s" %
              (extension, basename))
        print()

    #filename = "%s.%s" % (basename, extension)

    # Sanity - check, either we need the dataframes themselves or
    # we need the model
    if (parcel_df is None) and (aerosol_dfs is None):
        if parcel is None:
            raise ValueError("Need to supply either dataframes or model")
        else:
            parcel_df, aerosol_dfs = parcel_to_dataframes(parcel)
    # Concatenate on the additional dataframes supplied by the user
    if not (other_dfs is None):
        for df in other_dfs:
            parcel_df = pd.concat([parcel_df, df], axis=1)

    # 1) csv
    if format == 'csv':

        # Write parcel data
        parcel_df.to_csv("%s_%s.%s" % (basename, 'parcel', extension))

        # Write aerosol data
        for species, data in list(aerosol_dfs.items()):
            data.to_csv("%s_%s.%s" % (basename, species, extension))

    # 2) nc
    elif format == 'nc':

        print("doing netcdf")

        if not _XRAY:
            raise ValueError("Module `xray` must be installed to output"
                             "to netcdf!")

        ## Construct xray datastructure to write to netCDF
        ds = xray.Dataset(attrs={
            'Conventions': "CF-1.0",
            'source': "parcel_model v%s" % ver,
        })

        ds.coords['time'] = ('time', parcel.time, {
            'units': 'seconds',
            'long_name': 'simulation time'
        })

        ## Aerosol coordinates and basic data
        for aerosol in parcel.aerosols:
            if parcel.console: print(aerosol)

            nr = aerosol.nr
            r_drys = aerosol.r_drys * 1e6
            kappas = [
                aerosol.kappa,
            ] * nr
            Nis = aerosol.Nis * 1e-6
            species = aerosol.species

            aer_coord = '%s_bins' % species

            ds.coords[aer_coord] = (aer_coord,
                                    np.array(list(range(1, aerosol.nr + 1)),
                                             dtype=np.int32), {
                                                 'long_name':
                                                 '%s size bin number' % species
                                             })
            ds['%s_rdry' % species] = ((aer_coord, ), r_drys, {
                'units':
                'micron',
                'long_name':
                '%s bin dry radii' % species
            })
            ds['%s_kappas' % species] = ((aer_coord, ), kappas, {
                'long_name':
                '%s bin kappa-kohler hygroscopicity' % species
            })
            ds['%s_Nis' % species] = ((aer_coord, ), Nis, {
                'units':
                'cm-3',
                'long_name':
                '%s bin number concentration' % species
            })

            size_data = aerosol_dfs[species].values * 1e6
            ds['%s_size' % species] = (('time', aer_coord), size_data, {
                'units':
                'micron',
                'long_name':
                '%s bin wet radii' % species
            })

        ## Parcel data
        ds['S'] = (('time', ), parcel_df['S'] * 100., {
            'units': "%",
            'long_name': "Supersaturation"
        })
        ds['T'] = (('time', ), parcel_df['T'], {
            'units': "K",
            "long_name": "Temperature"
        })
        ds['P'] = (('time', ), parcel_df['P'], {
            'units': 'Pa',
            'long_name': "Pressure"
        })
        ds['wv'] = (('time', ), parcel_df['wv'], {
            'units': 'kg/kg',
            'long_name': "Water vapor mixing ratio"
        })
        ds['wc'] = (('time', ), parcel_df['wc'], {
            'units': 'kg/kg',
            'long_name': "Liquid water mixing ratio"
        })
        ds['wi'] = (('time', ), parcel_df['wi'], {
            'units': 'kg/kg',
            'long_name': "Ice water mixing ratio"
        })
        ds['height'] = (('time', ), parcel_df['z'], {
            'units': "meters",
            'long_name': "Parcel height above start"
        })
        ds['rho'] = (('time', ), parcel_df['rho'], {
            'units': "kg/m3",
            'long_name': "Air density"
        })

        ds['wtot'] = (('time', ), parcel_df['wv'] + parcel_df['wc'], {
            'units': 'kg/kg',
            'long_name': "Total water mixing ratio"
        })

        if 'alpha' in parcel_df:
            ds['alpha'] = (('time', ), parcel_df['alpha'], {
                'long_name': "ratio of Nkn/Neq"
            })
        if 'phi' in parcel_df:
            ds['phi'] = (('time', ), parcel_df['phi'], {
                'long_name':
                "fraction of not-strictly activated drops in Nkn"
            })
        if 'eq' in parcel_df:
            ds['eq'] = (('time', ), parcel_df['eq'], {
                'long_name': "Equilibrium Kohler-activated fraction"
            })
        if 'kn' in parcel_df:
            ds['kn'] = (('time', ), parcel_df['kn'], {
                'long_name': "Kinetic activated fraction"
            })

        ## Save to disk
        ds.to_netcdf(basename + ".nc")

    # 3) obj (pickle)
    else:
        assert parcel

        with open(basename + ".obj", 'w') as f:
            pickle.dump(parcel, f)

示例#25

0

显示文件

文件： science_data.py 项目： jenfly/python-cheatsheets

plt.plot(ubar.lat, ubar)

# We can also apply operations to all the data variables in a dataset with
# one command, and using named dimensions.
print('Zonal mean')
dsbar = ds.mean(dim='lon')
print(dsbar)
print('Boom!')
ubar2 = dsbar['u'][k]
plt.figure()
plt.plot(ubar2.lat, ubar2)

# ----------------------------------------------------------------------
# Create a new dataset object and save to netcdf file

ds2 = xray.Dataset()
ds2.attrs['title'] = 'My Dataset'
ds2.attrs['source'] = 'Grumpy Cat'
ds2['ps'] = (('lat', 'lon'), ps)
ds2.coords['lat'] = ('lat', lat)
ds2.coords['lon'] = ('lon', lon)
print(ds2)

# Save to netcdf
outfile = 'data/out.nc'
ds2.to_netcdf(outfile, mode='w')

# ----------------------------------------------------------------------
# Reading OPenDAP data files

remote_file = 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods'

示例#26

0

显示文件

    return_both=True)

# casting data DataArray into TimeSeriesXray
base_ev_data_xray = TimeSeriesXray(base_ev_data_xray)

# baseline_corrected test

corrected_base_ev_data_ptsa = base_ev_data_ptsa.baseline_corrected((0.0, 0.2))

corrected_base_ev_data_xray = base_ev_data_xray.baseline_corrected((0.0, 0.2))

# remove buffer test
no_buffer_base_ev_data_ptsa = base_ev_data_ptsa.remove_buffer(1.0)
no_buffer_base_ev_data_xray = base_ev_data_xray.remove_buffer(1.0)

no_buffer_dataset = xray.Dataset({'no_buffer': no_buffer_base_ev_data_xray})

from ptsa.data.writers.NetCDF4XrayWriter import NetCDF4XrayWriter

nc4_writer = NetCDF4XrayWriter(no_buffer_base_ev_data_xray)
nc4_writer.write('no_buffer_base_ev_data_xray_new.nc')

from ptsa.data.readers.NetCDF4XrayReader import NetCDF4XrayReader

nc4_reader = NetCDF4XrayReader()
array = nc4_reader.read('no_buffer_base_ev_data_xray_new.nc')

print array['events']

sys.exit()

示例#27

0

显示文件

文件： Example_download_MesoWest_data_to_Netcdf.py 项目： joejoezz/AWNPy

def Get_data(m,sta_id,Vars_ext,StartDate,EndDate):
    # INPUT
    # m MESOWEST token variable
    # sta_id list of station ids requested
    # Vars_ext list of variables requested
    # StateDate start of data
    # EndDate end of data
    
    #### Function for combining xray data variables into a single array with new labeled dimension
    # Creator Karl Lapo
    def combinevars(ds_in,dat_vars,new_dim_name='new_dim',combinevarname='new_var'):
        ds_out = xray.Dataset()
        ds_out = xray.concat([ds_in[dv] for dv in dat_vars],dim='new_dim')
        ds_out = ds_out.rename({'new_dim': new_dim_name})
        ds_out.coords[new_dim_name] = dat_vars
        ds_out.name = combinevarname

        return ds_out

    # Grab all time series data from all stations for a given date range
    print 'Grabbed all station data'
    allstationdata = m.timeseries(stid=sta_id, start=StartDate, end=EndDate)

    # Get Station Info
    N_sta = allstationdata['SUMMARY']['NUMBER_OF_OBJECTS']
    print 'Found ',N_sta,' Stations'
    Elev  = [ast.literal_eval(json.dumps(allstationdata['STATION'][cs]['ELEVATION'])) for cs in range(0,N_sta)]
    Lat   = [ast.literal_eval(json.dumps(allstationdata['STATION'][cs]['LATITUDE'])) for cs in range(0,N_sta)]
    Lon   = [ast.literal_eval(json.dumps(allstationdata['STATION'][cs]['LONGITUDE'])) for cs in range(0,N_sta)]
    NAME  = [ast.literal_eval(json.dumps(allstationdata['STATION'][cs]['NAME'])) for cs in range(0,N_sta)]
    ID    = [ast.literal_eval(json.dumps(allstationdata['STATION'][cs]['STID'])) for cs in range(0,N_sta)]
    print 'Got all station info'
    
    if N_sta == 0: # No stations were found for this period
        return None
    
    # Get timestamp timeseries for all stations (may be different lengths and different time steps)
    timestamp = []
    [timestamp.append(ob['OBSERVATIONS']['date_time']) for ob in allstationdata['STATION']]
    print 'Got timestamps for each station'
    
    # Loop through each variable to extract
    print 'Looping through each station to extract data'
    DS_list = [] # Empty list of each dataset containing one variable
    for Vn,cVar in enumerate(Vars_ext):
        print 'Current variable is ',cVar
        # Get timeseries of data for all stations
        temp_list = []
        for Sn,ob in enumerate(allstationdata['STATION']):
            #print ob['NAME']
            # Not all stations have all variables, which will throw an error
            # If station has this Variable
            try:
                temp_list.append(ob['OBSERVATIONS'][cVar])
            # Else add missing values as padding (so xray can handel it)
            except: 
                # Create empty array of -9999
                temp_vals = np.empty(np.size(timestamp[Sn]))
                temp_vals[:] = np.NAN
                temp_list.append(temp_vals)
                print 'Station ',ob['NAME'],' is missing ',[cVar],'Padding with -9999s'
                
        print 'Got ',cVar,'data from',len(temp_list),'stations'   
        
        # Make dictionary of site and xray data array
        # Warning, must cast returned types to float64
        print '.....Converting to a dictionary list of xray.DataArrays'
        dict1 = {}
        for csta in range(0,len(temp_list)):
            c_t = [datetime.strptime(ast.literal_eval(json.dumps(timestamp[csta][cd])), '%Y-%m-%dT%H:%M:%SZ') for cd in range(len(timestamp[csta]))]
            dict1[ID[csta]] = xray.DataArray(np.array(temp_list[csta],dtype='float64'), coords=[c_t], dims=['time'], name=ID[csta])
        #print dict1
        
        # Make it a dataset
        print '.....Converting to a xray.Dataset'
        ds_temp_Var = xray.Dataset(dict1)
        #print ds_temp_Var
            
        # Resample to common time step as Data contains mix of 15, 10, and 5 min data
        # For some variables we want to sample
        if cVar=='wind_direction_set_1':  
            print '.....Resampling to 1 hour time step. Using Median!!!! Timestamp out is END of period!!!!'
            ds_temp_Var_1hr = ds_temp_Var.resample(freq='H',dim='time',how='median',label='right')
            #print ds_temp_Var_1hr
        else:
            print '.....Resampling to 1 hour time step. Using mean!!!! Timestamp out is END of period!!!!'
            ds_temp_Var_1hr = ds_temp_Var.resample(freq='H',dim='time',how='mean',label='right')
            #print ds_temp_Var_1hr

        # Combine stations
        print '.....Combining stations'
        DS_list.append(combinevars(ds_temp_Var_1hr,ds_temp_Var_1hr.data_vars,new_dim_name='site',combinevarname=cVar))
        
    # Make dictionary list
    DIC1 = dict(zip([cv.name for cv in DS_list],DS_list))
    
    # Combine all Datasets
    print 'Combine all datasets (if multple varibles requested)'
    ds_ALL = xray.Dataset(DIC1)
    #print ds_ALL
    
    print 'Update coords'
    # Fill in descriptive variables
    ds_ALL.coords['lat'] = ('site',[float(x) for x in Lat])
    ds_ALL.coords['lon'] = ('site',[float(x) for x in Lon])
    ds_ALL.coords['elev'] = ('site',[float(x) for x in Elev])
    ds_ALL.coords['sta_name'] = ('site',NAME)
    
    return ds_ALL

示例#28

0

显示文件

def _compute_phase_values(phase_obj, components, variables, statevar_dict,
                          points, func, output, maximum_internal_dof):
    """
    Calculate output values for a particular phase.

    Parameters
    ----------
    phase_obj : Phase
        Phase object from a thermodynamic database.
    components : list
        Names of components to consider in the calculation.
    variables : list
        Names of variables in the phase's internal degrees of freedom.
    statevar_dict : OrderedDict {str -> float or sequence}
        Mapping of state variables to desired values. This will broadcast if necessary.
    points : ndarray
        Inputs to 'func', except state variables. Columns should be in 'variables' order.
    func : callable
        Function of state variables and 'variables'.
        See 'make_callable' docstring for details.
    output : string
        Desired name of the output result in the Dataset.
    maximum_internal_dof : int
        Largest number of internal degrees of freedom of any phase. This is used
        to guarantee different phase's Datasets can be concatenated.

    Returns
    -------
    xray.Dataset of the output attribute as a function of state variables

    Examples
    --------
    None yet.
    """
    # Broadcast compositions and state variables along orthogonal axes
    # This lets us eliminate an expensive Python loop
    statevar_grid = np.meshgrid(*itertools.chain(statevar_dict.values(),
                                                 [np.empty(points.shape[-2])]),
                                sparse=True,
                                indexing='ij')[:-1]
    points = broadcast_to(
        points,
        tuple(len(np.atleast_1d(x))
              for x in statevar_dict.values()) + points.shape[-2:])
    phase_output = func(
        *itertools.chain(statevar_grid, np.rollaxis(points, -1, start=0)))

    # Map the internal degrees of freedom to global coordinates
    # Normalize site ratios by the sum of site ratios times a factor
    # related to the site fraction of vacancies
    site_ratio_normalization = np.zeros(points.shape[:-1])
    for idx, sublattice in enumerate(phase_obj.constituents):
        vacancy_column = np.ones(points.shape[:-1])
        if 'VA' in set(sublattice):
            var_idx = variables.index(v.SiteFraction(phase_obj.name, idx,
                                                     'VA'))
            vacancy_column -= points[..., :, var_idx]
        site_ratio_normalization += phase_obj.sublattices[idx] * vacancy_column

    phase_compositions = np.empty(points.shape[:-1] + (len(components), ))
    for col, comp in enumerate(components):
        avector = [float(vxx.species == comp) * \
            phase_obj.sublattices[vxx.sublattice_index] for vxx in variables]
        phase_compositions[..., :,
                           col] = np.divide(np.dot(points[..., :, :], avector),
                                            site_ratio_normalization)

    coordinate_dict = {'component': components}
    coordinate_dict.update(
        {key: np.atleast_1d(value)
         for key, value in statevar_dict.items()})
    output_columns = [str(x) for x in statevar_dict.keys()] + ['points']
    # Resize 'points' so it has the same number of columns as the maximum
    # number of internal degrees of freedom of any phase in the calculation.
    # We do this so that everything is aligned for concat.
    # Waste of memory? Yes, but the alternatives are unclear.
    expanded_points = np.full(points.shape[:-1] + (maximum_internal_dof, ),
                              np.nan)
    expanded_points[..., :points.shape[-1]] = points
    data_arrays = {
        'X': (output_columns + ['component'], phase_compositions),
        'Phase': (output_columns,
                  np.full(points.shape[:-1],
                          phase_obj.name,
                          dtype='U' + str(len(phase_obj.name)))),
        'Y': (output_columns + ['internal_dof'], expanded_points),
        output: ([
            'dim_' + str(i)
            for i in range(len(phase_output.shape) - len(output_columns))
        ] + output_columns, phase_output)
    }

    return xray.Dataset(data_arrays, coords=coordinate_dict)

示例#29

0

显示文件

文件： make_cams_opi_nc_python.py 项目： nicolasfauchereau/cams_opi

    creates the dataset
    """

    d = {}
    d['time'] = ('time', date)
    d['lat'] = ('lat', lats)
    d['lon'] = ('lon', lons)
    d['cams'] = (['time', 'lat', 'lon'], data[0, ...][np.newaxis, ...])
    d['camsn'] = (['time', 'lat', 'lon'], data[1, ...][np.newaxis, ...])
    d['opi'] = (['time', 'lat', 'lon'], data[2, ...][np.newaxis, ...])
    d['comb'] = (['time', 'lat', 'lon'], data[3, ...][np.newaxis, ...])
    d['xxxx'] = (['time', 'lat', 'lon'], data[4, ...][np.newaxis, ...])
    d['comba'] = (['time', 'lat', 'lon'], data[5, ...][np.newaxis, ...])
    d['gam'] = (['time', 'lat', 'lon'], data[6, ...][np.newaxis, ...])

    dset = xray.Dataset(d)
    """
    defines some attributes
    """

    dset.lon.attrs['long_name'] = 'Longitude'
    dset.lat.attrs['long_name'] = 'Latitude'

    dset.lon.attrs['standard_name'] = 'longitude'
    dset.lat.attrs['standard_name'] = 'latitude'

    dset.lat.attrs['units'] = 'degrees_north'
    dset.lon.attrs['units'] = 'degrees_east'

    filepath = os.path.join(dpath,
                            'nc/cams_opi_merged.{}.nc'.format(datestring))

示例#30

0

显示文件

def equilibrium(dbf, comps, phases, conditions, **kwargs):
    """
    Calculate the equilibrium state of a system containing the specified
    components and phases, under the specified conditions.
    Model parameters are taken from 'dbf'.

    Parameters
    ----------
    dbf : Database
        Thermodynamic database containing the relevant parameters.
    comps : list
        Names of components to consider in the calculation.
    phases : list or dict
        Names of phases to consider in the calculation.
    conditions : dict or (list of dict)
        StateVariables and their corresponding value.
    verbose : bool, optional (Default: True)
        Show progress of calculations.
    grid_opts : dict, optional
        Keyword arguments to pass to the initial grid routine.

    Returns
    -------
    Structured equilibrium calculation.

    Examples
    --------
    None yet.
    """
    active_phases = unpack_phases(phases) or sorted(dbf.phases.keys())
    comps = sorted(comps)
    indep_vars = ['T', 'P']
    grid_opts = kwargs.pop('grid_opts', dict())
    verbose = kwargs.pop('verbose', True)
    phase_records = dict()
    callable_dict = kwargs.pop('callables', dict())
    grad_callable_dict = kwargs.pop('grad_callables', dict())
    points_dict = dict()
    maximum_internal_dof = 0
    # Construct models for each phase; prioritize user models
    models = unpack_kwarg(kwargs.pop('model', Model), default_arg=Model)
    if verbose:
        print('Components:', ' '.join(comps))
        print('Phases:', end=' ')
    for name in active_phases:
        mod = models[name]
        if isinstance(mod, type):
            models[name] = mod = mod(dbf, comps, name)
        variables = sorted(mod.energy.atoms(v.StateVariable).union(
            {key
             for key in conditions.keys() if key in [v.T, v.P]}),
                           key=str)
        site_fracs = sorted(mod.energy.atoms(v.SiteFraction), key=str)
        maximum_internal_dof = max(maximum_internal_dof, len(site_fracs))
        # Extra factor '1e-100...' is to work around an annoying broadcasting bug for zero gradient entries
        models[name].models['_broadcaster'] = 1e-100 * Mul(*variables)**3
        out = models[name].energy
        if name not in callable_dict:
            undefs = list(out.atoms(Symbol) - out.atoms(v.StateVariable))
            for undef in undefs:
                out = out.xreplace({undef: float(0)})
            # callable_dict takes variables in a different order due to calculate() pecularities
            callable_dict[name] = make_callable(
                out,
                sorted((key for key in conditions.keys() if key in [v.T, v.P]),
                       key=str) + site_fracs)
        if name not in grad_callable_dict:
            grad_func = make_callable(
                Matrix([out]).jacobian(variables), variables)
        else:
            grad_func = grad_callable_dict[name]
        # Adjust gradient by the approximate chemical potentials
        plane_vars = sorted(models[name].energy.atoms(v.SiteFraction), key=str)
        hyperplane = Add(*[
            v.MU(i) * mole_fraction(dbf.phases[name], comps, i) for i in comps
            if i != 'VA'
        ])
        # Workaround an annoying bug with zero gradient entries
        # This forces numerically zero entries to broadcast correctly
        hyperplane += 1e-100 * Mul(*([v.MU(i) for i in comps if i != 'VA'] +
                                     plane_vars + [v.T, v.P]))**3

        plane_grad = make_callable(
            Matrix([hyperplane]).jacobian(variables),
            [v.MU(i) for i in comps if i != 'VA'] + plane_vars + [v.T, v.P])
        plane_hess = make_callable(hessian(hyperplane, variables),
                                   [v.MU(i) for i in comps if i != 'VA'] +
                                   plane_vars + [v.T, v.P])
        phase_records[name.upper()] = PhaseRecord(variables=variables,
                                                  grad=grad_func,
                                                  plane_grad=plane_grad,
                                                  plane_hess=plane_hess)
        if verbose:
            print(name, end=' ')
    if verbose:
        print('[done]', end='\n')

    conds = OrderedDict((key, unpack_condition(value))
                        for key, value in sorted(conditions.items(), key=str))
    str_conds = OrderedDict((str(key), value) for key, value in conds.items())
    indep_vals = list([float(x) for x in np.atleast_1d(val)]
                      for key, val in str_conds.items() if key in indep_vars)
    components = [x for x in sorted(comps) if not x.startswith('VA')]
    # 'calculate' accepts conditions through its keyword arguments
    grid_opts.update(
        {key: value
         for key, value in str_conds.items() if key in indep_vars})
    if 'pdens' not in grid_opts:
        grid_opts['pdens'] = 10

    coord_dict = str_conds.copy()
    coord_dict['vertex'] = np.arange(len(components))
    grid_shape = np.meshgrid(*coord_dict.values(), indexing='ij',
                             sparse=False)[0].shape
    coord_dict['component'] = components
    if verbose:
        print('Computing initial grid', end=' ')

    grid = calculate(dbf,
                     comps,
                     active_phases,
                     output='GM',
                     model=models,
                     callables=callable_dict,
                     fake_points=True,
                     **grid_opts)

    if verbose:
        print('[{0} points, {1}]'.format(len(grid.points),
                                         sizeof_fmt(grid.nbytes)),
              end='\n')

    properties = xray.Dataset(
        {
            'NP': (list(str_conds.keys()) + ['vertex'], np.empty(grid_shape)),
            'GM': (list(str_conds.keys()), np.empty(grid_shape[:-1])),
            'MU':
            (list(str_conds.keys()) + ['component'], np.empty(grid_shape)),
            'points': (list(str_conds.keys()) + ['vertex'],
                       np.empty(grid_shape, dtype=np.int))
        },
        coords=coord_dict,
        attrs={'iterations': 1},
    )
    # Store the potentials from the previous iteration
    current_potentials = properties.MU.copy()

    for iteration in range(MAX_ITERATIONS):
        if verbose:
            print('Computing convex hull [iteration {}]'.format(
                properties.attrs['iterations']))
        # lower_convex_hull will modify properties
        lower_convex_hull(grid, properties)
        progress = np.abs(current_potentials - properties.MU).max().values
        if verbose:
            print('progress', progress)
        if progress < MIN_PROGRESS:
            if verbose:
                print('Convergence achieved')
            break
        current_potentials[...] = properties.MU.values
        if verbose:
            print('Refining convex hull')
        # Insert extra dimensions for non-T,P conditions so GM broadcasts correctly
        energy_broadcast_shape = grid.GM.values.shape[:len(indep_vals)] + \
            (1,) * (len(str_conds) - len(indep_vals)) + (grid.GM.values.shape[-1],)
        driving_forces = np.einsum('...i,...i',
                                   properties.MU.values[..., np.newaxis, :],
                                   grid.X.values[np.index_exp[...] +
                                                 (np.newaxis,) * (len(str_conds) - len(indep_vals)) +
                                                 np.index_exp[:, :]]) - \
            grid.GM.values.view().reshape(energy_broadcast_shape)

        for name in active_phases:
            dof = len(models[name].energy.atoms(v.SiteFraction))
            current_phase_indices = (grid.Phase.values == name
                                     ).reshape(energy_broadcast_shape[:-1] +
                                               (-1, ))
            # Broadcast to capture all conditions
            current_phase_indices = np.broadcast_arrays(
                current_phase_indices, np.empty(driving_forces.shape))[0]
            # This reshape is safe as long as phases have the same number of points at all indep. conditions
            current_phase_driving_forces = driving_forces[
                current_phase_indices].reshape(
                    current_phase_indices.shape[:-1] + (-1, ))
            # Note: This works as long as all points are in the same phase order for all T, P
            current_site_fractions = grid.Y.values[..., current_phase_indices[
                (0, ) * len(str_conds)], :]
            if np.sum(
                    current_site_fractions[(0, ) *
                                           len(indep_vals)][..., :dof]) == dof:
                # All site fractions are 1, aka zero internal degrees of freedom
                # Impossible to refine these points, so skip this phase
                points_dict[name] = current_site_fractions[
                    (0, ) * len(indep_vals)][..., :dof]
                continue
            # Find the N points with largest driving force for a given set of conditions
            # Remember that driving force has a sign, so we want the "most positive" values
            # N is the number of components, in this context
            # N points define a 'best simplex' for every set of conditions
            # We also need to restrict ourselves to one phase at a time
            trial_indices = np.argpartition(current_phase_driving_forces,
                                            -len(components),
                                            axis=-1)[..., -len(components):]
            trial_indices = trial_indices.ravel()
            statevar_indices = np.unravel_index(
                np.arange(
                    np.multiply.reduce(properties.GM.values.shape +
                                       (len(components), ))),
                properties.GM.values.shape +
                (len(components), ))[:len(indep_vals)]
            points = current_site_fractions[np.index_exp[statevar_indices +
                                                         (trial_indices, )]]
            points.shape = properties.points.shape[:-1] + (
                -1, maximum_internal_dof)
            # The Y arrays have been padded, so we should slice off the padding
            points = points[..., :dof]
            # Workaround for derivative issues at endmembers
            points[points == 0.] = MIN_SITE_FRACTION
            if len(points) == 0:
                if name in points_dict:
                    del points_dict[name]
                # No nearly stable points: skip this phase
                continue

            num_vars = len(phase_records[name].variables)
            plane_grad = phase_records[name].plane_grad
            plane_hess = phase_records[name].plane_hess
            statevar_grid = np.meshgrid(*itertools.chain(indep_vals),
                                        sparse=True,
                                        indexing='xy')
            # TODO: A more sophisticated treatment of constraints
            num_constraints = len(indep_vals) + len(
                dbf.phases[name].sublattices)
            constraint_jac = np.zeros((num_constraints, num_vars))
            # Independent variables are always fixed (in this limited implementation)
            for idx in range(len(indep_vals)):
                constraint_jac[idx, idx] = 1
            # This is for site fraction balance constraints
            var_idx = len(indep_vals)
            for idx in range(len(dbf.phases[name].sublattices)):
                active_in_subl = set(
                    dbf.phases[name].constituents[idx]).intersection(comps)
                constraint_jac[len(indep_vals) + idx,
                               var_idx:var_idx + len(active_in_subl)] = 1
                var_idx += len(active_in_subl)

            grad = phase_records[name].grad(
                *itertools.chain(statevar_grid, points.T))
            if grad.dtype == 'object':
                # Workaround a bug in zero gradient entries
                grad_zeros = np.zeros(points.T.shape[1:], dtype=np.float)
                for i in np.arange(grad.shape[0]):
                    if isinstance(grad[i], int):
                        grad[i] = grad_zeros
                grad = np.array(grad.tolist(), dtype=np.float)
            bcasts = np.broadcast_arrays(
                *itertools.chain(properties.MU.values.T, points.T))
            cast_grad = -plane_grad(*itertools.chain(bcasts, [0], [0]))
            cast_grad = cast_grad.T + grad.T
            grad = cast_grad
            grad.shape = grad.shape[:-1]  # Remove extraneous dimension
            # This Hessian is an approximation updated using the BFGS method
            # See Nocedal and Wright, ch.3, p. 198
            # Initialize as identity matrix
            hess = broadcast_to(np.eye(num_vars),
                                grad.shape + (grad.shape[-1], )).copy()
            newton_iteration = 0
            while newton_iteration < MAX_NEWTON_ITERATIONS:
                e_matrix = np.linalg.inv(hess)
                dy_unconstrained = -np.einsum('...ij,...j->...i', e_matrix,
                                              grad)
                proj_matrix = np.dot(e_matrix, constraint_jac.T)
                inv_matrix = np.rollaxis(np.dot(constraint_jac, proj_matrix),
                                         0, -1)
                inv_term = np.linalg.inv(inv_matrix)
                first_term = np.einsum('...ij,...jk->...ik', proj_matrix,
                                       inv_term)
                # Normally a term for the residual here
                # We only choose starting points which obey the constraints, so r = 0
                cons_summation = np.einsum('...i,...ji->...j',
                                           dy_unconstrained, constraint_jac)
                cons_correction = np.einsum('...ij,...j->...i', first_term,
                                            cons_summation)
                dy_constrained = dy_unconstrained - cons_correction
                # TODO: Support for adaptive changing independent variable steps
                new_direction = dy_constrained[..., len(indep_vals):]
                # Backtracking line search
                new_points = points + INITIAL_STEP_SIZE * new_direction
                alpha = np.full(new_points.shape[:-1],
                                INITIAL_STEP_SIZE,
                                dtype=np.float)
                negative_points = np.any(new_points < 0., axis=-1)
                while np.any(negative_points):
                    alpha[negative_points] *= 0.1
                    new_points = points + alpha[...,
                                                np.newaxis] * new_direction
                    negative_points = np.any(new_points < 0., axis=-1)
                # If we made "near" zero progress on any points, don't update the Hessian until
                # we've rebuilt the convex hull
                # Nocedal and Wright recommend against skipping Hessian updates
                # They recommend using a damped update approach, pp. 538-539 of their book
                # TODO: Check the projected gradient norm, not the step length
                if np.any(
                        np.max(np.abs(alpha[..., np.newaxis] * new_direction),
                               axis=-1) < MIN_STEP_LENGTH):
                    break
                # Workaround for derivative issues at endmembers
                new_points[new_points == 0.] = 1e-16
                # BFGS update to Hessian
                new_grad = phase_records[name].grad(
                    *itertools.chain(statevar_grid, new_points.T))
                if new_grad.dtype == 'object':
                    # Workaround a bug in zero gradient entries
                    grad_zeros = np.zeros(new_points.T.shape[1:],
                                          dtype=np.float)
                    for i in np.arange(new_grad.shape[0]):
                        if isinstance(new_grad[i], int):
                            new_grad[i] = grad_zeros
                    new_grad = np.array(new_grad.tolist(), dtype=np.float)
                bcasts = np.broadcast_arrays(
                    *itertools.chain(properties.MU.values.T, new_points.T))
                cast_grad = -plane_grad(*itertools.chain(bcasts, [0], [0]))
                cast_grad = cast_grad.T + new_grad.T
                new_grad = cast_grad
                new_grad.shape = new_grad.shape[:
                                                -1]  # Remove extraneous dimension
                # Notation used here consistent with Nocedal and Wright
                s_k = np.empty(points.shape[:-1] +
                               (points.shape[-1] + len(indep_vals), ))
                # Zero out independent variable changes for now
                s_k[..., :len(indep_vals)] = 0
                s_k[..., len(indep_vals):] = new_points - points
                y_k = new_grad - grad
                s_s_term = np.einsum('...j,...k->...jk', s_k, s_k)
                s_b_s_term = np.einsum('...i,...ij,...j', s_k, hess, s_k)
                y_y_y_s_term = np.einsum('...j,...k->...jk', y_k, y_k) / \
                    np.einsum('...i,...i', y_k, s_k)[..., np.newaxis, np.newaxis]
                update = np.einsum('...ij,...jk,...kl->...il', hess, s_s_term, hess) / \
                    s_b_s_term[..., np.newaxis, np.newaxis] + y_y_y_s_term
                hess = hess - update
                cast_hess = -plane_hess(
                    *itertools.chain(bcasts, [0], [0])).T + hess
                hess = -cast_hess  #TODO: Why does this fix things?
                # TODO: Verify that the chosen step lengths reduce the energy
                points = new_points
                grad = new_grad
                newton_iteration += 1
            new_points = new_points.reshape(
                new_points.shape[:len(indep_vals)] +
                (-1, new_points.shape[-1]))
            new_points = np.concatenate(
                (current_site_fractions[..., :dof], new_points), axis=-2)
            points_dict[name] = new_points

        if verbose:
            print('Rebuilding grid', end=' ')
        grid = calculate(dbf,
                         comps,
                         active_phases,
                         output='GM',
                         model=models,
                         callables=callable_dict,
                         fake_points=True,
                         points=points_dict,
                         **grid_opts)
        if verbose:
            print('[{0} points, {1}]'.format(len(grid.points),
                                             sizeof_fmt(grid.nbytes)),
                  end='\n')
        properties.attrs['iterations'] += 1

    # One last call to ensure 'properties' and 'grid' are consistent with one another
    lower_convex_hull(grid, properties)
    ravelled_X_view = grid['X'].values.view().reshape(
        -1, grid['X'].values.shape[-1])
    ravelled_Y_view = grid['Y'].values.view().reshape(
        -1, grid['Y'].values.shape[-1])
    ravelled_Phase_view = grid['Phase'].values.view().reshape(-1)
    # Copy final point values from the grid and drop the index array
    # For some reason direct construction doesn't work. We have to create empty and then assign.
    properties['X'] = xray.DataArray(
        np.empty_like(ravelled_X_view[properties['points'].values]),
        dims=properties['points'].dims + ('component', ))
    properties['X'].values[...] = ravelled_X_view[properties['points'].values]
    properties['Y'] = xray.DataArray(
        np.empty_like(ravelled_Y_view[properties['points'].values]),
        dims=properties['points'].dims + ('internal_dof', ))
    properties['Y'].values[...] = ravelled_Y_view[properties['points'].values]
    # TODO: What about invariant reactions? We should perform a final driving force calculation here.
    # We can handle that in the same post-processing step where we identify single-phase regions.
    properties['Phase'] = xray.DataArray(np.empty_like(
        ravelled_Phase_view[properties['points'].values]),
                                         dims=properties['points'].dims)
    properties['Phase'].values[...] = ravelled_Phase_view[
        properties['points'].values]
    del properties['points']
    return properties