Пример #1
0
def obsoper(self,
            inputs,
            mode,
            run_id=0,
            datei=datetime.datetime(1979, 1, 1),
            datef=datetime.datetime(2100, 1, 1),
            workdir='./',
            reload_results=False,
            **kwargs):
    """The observation operator.
    This function maps information from the control space to the observation
    space
    
    This version of 'obsoper' was  developed for use with FLEXPART
    backward runs.

    Gets concentrations/mixing ratios from FLEXPART flux sensitivities.

    For now assumes FLEXPART runs are stored in the structure used by
    FLEXINVERT+,
    .i.e, /station_id/YYYYMM/
    
    if FWD
    Turns back model-equivalents into observation space
    Hx(native) -> Hx(obs)

    Generates departures and multiply by R-1
    -> Hx - y0, R^(-1).(Hx - y0)

    Calculates gradient in observation space
       Jo'(p) = H^TR^-1(H(p) - y)

    
    Args:
        inputs: can be a control vector or an observation vector, depending
                on the mode
        mode (str): the running mode; always 'fwd' for the flexpart version
        run_id (int): the ID number of the current run; is used to define
                the current sub-directory
        datei, datef (datetime.datetime): start and end dates of the
                simulation window
        workdir (str): the parent directory of the simulation to run
        reload_results (bool): look for results from pre-computed simulation
                if any
    
    Returns:
        observation or control vector depending on the running mode
    """

    # Check that inputs are consistent with the mode to run
    check_inputs(inputs, mode)

    # If true, do not run simulations at this point
    read_only = getattr(self, 'read_only', True)
    print("read_only", read_only)

    # Initializing modules and variables from the setup
    model = self.model
    controlvect = self.controlvect
    obsvect = self.obsvect
    subsimu_dates = model.subsimu_dates

    if mode == 'fwd':
        controlvect = inputs
        obsvect.datastore.loc[:, 'sim'] = np.nan

    # Various initializations

    # Get flexpart headers
    subdir = subsimu_dates[0].strftime("%Y%m")

    fp_header_glob = model.utils.flexpart_header.Flexpartheader()
    fp_header_glob.read_header(
        os.path.join(model.run_dir_glob,
                     obsvect.datastore.head(1)['station'][0].upper(), subdir,
                     'header'))

    fp_header_nest = None
    if model.plugin.nested:
        fp_header_nest = model.utils.flexpart_header.Flexpartheader()
        fp_header_nest.read_header(
            os.path.join(model.run_dir_nest,
                         obsvect.datastore.head(1)['station'][0].upper(),
                         subdir, 'header_nest'))

    # Get the domain definition
    species = getattr(controlvect.components,
                      'fluxes').parameters.attributes[0]
    tracer = getattr(
        getattr(controlvect.components, 'fluxes').parameters, species)

    if tracer.hresol == 'regions':
        nbox = tracer.nregions

        # TODO: this will change once initial conditions are added (ciniopt)
        npvar = tracer.ndates * nbox
        ndvar = nbox
        nvar = npvar
        grad_o = np.zeros(nvar)

        ix1 = model.domain.ix1
        ix2 = model.domain.ix2
        iy1 = model.domain.iy1
        iy2 = model.domain.iy2
    else:
        raise Exception(
            "For FLEXPART, only hresol:regions are implemented in controlvect")

    # Loop through model periods and read model output
    self.missingperiod = False

    for di, df in zip(subsimu_dates[:-1], subsimu_dates[1:]):

        subdir = di.strftime("%Y%m")

        # Save to datastore for debugging purposes
        obs_ghg = np.empty(len(obsvect.datastore))
        obs_ghg[:] = np.nan
        obs_bkg = np.empty(len(obsvect.datastore))
        obs_bkg[:] = np.nan
        obs_sim = np.empty(len(obsvect.datastore))
        obs_sim[:] = np.nan
        obs_model = np.empty(len(obsvect.datastore))
        obs_model[:] = np.nan
        obs_check = np.empty(len(obsvect.datastore))
        obs_check[:] = np.nan
        obs_bkgerr = np.empty(len(obsvect.datastore))
        obs_bkgerr[:] = np.nan
        obs_err = np.empty(len(obsvect.datastore))
        obs_err[:] = np.nan

        # Loop over observation dates
        print("di, df", di, df, datetime.datetime.now())

        for obs_i, row in enumerate(obsvect.datastore.itertuples()):
            # For debugging
            obs_check[obs_i] = obs_i

            station = row.station

            runsubdir_nest = os.path.join(model.run_dir_nest, station.upper(),
                                          subdir)
            runsubdir_glob = os.path.join(model.run_dir_glob, station.upper(),
                                          subdir)

            file_date = row.Index.strftime('%Y%m%d%H%M%S')

            # Read nested grids
            file_name = 'grid_time_nest_' + file_date + '_001'

            if not os.path.isfile(os.path.join(runsubdir_nest, file_name)):
                continue

            grid_nest, gtime, ngrid = model.utils.read.read_flexpart_grid(
                runsubdir_nest,
                file_name,
                fp_header_nest,
                numscale=model.numscale)

            grid_nest *= model.coeff * model.mmair / model.molarmass

            # Read global footprints
            # TODO read correction factor dry air
            file_name = 'grid_time_' + file_date + '_001'

            if not os.path.isfile(os.path.join(runsubdir_glob, file_name)):
                continue

            grid_glob, gtime_glob, ngrid_glob = \
                model.utils.read.read_flexpart_grid(
                runsubdir_glob, file_name, fp_header_glob,
                numscale=model.numscale)

            grid_glob *= model.coeff * model.mmair / model.molarmass

            # Array for global transport background
            hbkg = np.sum(grid_glob[:, :, 0:ngrid - 1], axis=2)
            hbkg[ix1:ix2, iy1:iy2] = 0.0

            # Index to state vector
            ind = np.argmin(np.abs(tracer.dates[0::-1] - gtime_glob[0]))
            obs_bkg[obs_i] = np.sum(hbkg[:, :].T *
                                    controlvect.flx_bkg[ind, :, :])
            obs_bkgerr[obs_i] = obs_bkg[obs_i] * tracer.err

            # Transport for boxes in regions
            hbox = np.zeros((nbox * ngrid), np.float64)
            mask_reg = tracer.regions > 0
            for n in range(ngrid):
                inds = n * nbox + tracer.regions[mask_reg] - 1
                np.add.at(hbox, inds, grid_nest.T[n, mask_reg])

                # for jy in range(fp_header_nest.numy):
                #     for ix in range(fp_header_nest.numx):
                #         if tracer.regions[jy, ix] > 0:
                #             hbox[n*nbox + tracer.regions[jy, ix] - 1] += \
                #             grid_nest[ix, jy, n]

            # TODO: account for possible difference nested grid/inversion domain
            hnest = grid_nest

            istate = np.zeros(ngrid, dtype=int) - 1

            # Calculate indices to state vector
            for i, j in enumerate(gtime):
                if j > df:
                    istate[i] = -1
                else:
                    # Discard 1st tracer date in the comparison
                    mask = j - tracer.dates[1::] <= datetime.timedelta(0)
                    istate[i] = int(np.argmax(mask))

            if np.max(istate) < 0:
                continue

            # ntstep: number of inversion intervals covered by the footprints
            ntstep = int(
                np.max(istate[istate > -1]) - np.min(istate[istate > -1]) + 1)

            hx = np.zeros(ntstep * nbox)
            px = np.zeros(ntstep * nbox)

            for i in range(ngrid):
                if istate[i] == -1:
                    continue

                ns = istate[i] - np.min(istate[istate > -1]) + 1

                px[(ns - 1) * nbox:ns * nbox] = \
                    controlvect.x[istate[i] * nbox:(istate[i] + 1) * nbox]
                hx[(ns - 1) * nbox:ns * nbox] += hbox[i * nbox:(i + 1) * nbox]

            obs_model[obs_i] = np.dot(hx, px)

            # Change in mixing ratio from best guess estimate
            obs_ghg[obs_i] = 0.
            for i, itim in enumerate(gtime):
                ind = np.argmin(np.abs(tracer.dates[0::-1] - itim))
                obs_ghg[obs_i] += np.sum(hnest.T[i, :, :] *
                                         controlvect.flxall[ind, :, :])

            if getattr(tracer, 'offsets', False):
                # Optimize offsets
                obs_sim[obs_i] = obs_model[obs_i] \
                                 + obs_ghg[obs_i] + obs_bkg[obs_i]
            else:
                # Optimize fluxes
                obs_sim[obs_i] = obs_model[obs_i] + obs_bkg[obs_i]

            # calculate gradient in observation space
            # Jo'(p) = H^TR^-1(H(p) - y)
            # calculate as: Jo'(p) = sum( H_i^T*ydel_i*R_i )

            # Contribution to observation gradient from obs_i
            departure = obs_sim[obs_i] - row.obs
            istate_uni = np.unique(istate).astype(int)

            for n in range(ntstep):
                if istate_uni[n] > -1:
                    grad_o[istate_uni[n] * ndvar:
                           (istate_uni[n] + 1) * ndvar] += \
                        hx[n * ndvar:(n + 1) * ndvar] \
                        * departure / (row.obserror ** 2
                                       + obs_bkgerr[obs_i] ** 2)

            print(obs_i, row.obs, obs_sim[obs_i])

        obsvect.dx = grad_o

        # Add the different components to datastore
        obsvect.datastore['obs_bkgerr'] = obs_bkgerr
        obsvect.datastore['sim'] = obs_sim
        obsvect.datastore['obs_ghg'] = obs_ghg
        obsvect.datastore['obs_bkg'] = obs_bkg
        obsvect.datastore['obs_model'] = obs_model
        obsvect.datastore['obs_sim'] = obs_sim
        obsvect.datastore['obs_check'] = obs_check
        obsvect.datastore['obs_err'] = np.sqrt(
            obsvect.datastore['obs_bkgerr']**2 +
            obsvect.datastore['obserror']**2)

        # Save grad_o for inspection
        rundir = "{}/obsoperator".format(workdir)
        file_grado = '{}/grad_o_{}.txt'.format(rundir, run_id)
        np.savetxt(file_grado, grad_o, fmt='%.8e')

        model.output_read = True

        created = os.path.isdir(runsubdir_nest)

        # If the sub-directory was already created,
        # the observation operator considers that the sub-simulation
        # was already properly run, thus passing to next sub-periods
        # Compute the sub-simulation anyway if some previous periods
        # were missing (as stored in self.missingperiod)
        do_simu = (created or not getattr(self, 'autorestart', False)
                   or self.missingperiod) and not read_only
        self.missingperiod = do_simu

        # Some verbose
        # verbose("Running {} for the period {} to {}"
        #         .format(model.plugin.name, di, df))
        # verbose("Running mode: {}".format(mode))
        # verbose("Sub-directory: {}".format(runsubdir_nest))
        # print "do_simu", do_simu
        # print "read_only", read_only

        # Prepare observations for the model
        if not read_only:
            model.dataobs = obsvect.obsvect2native(di, df, mode,
                                                   runsubdir_nest, workdir)

        # If only initializing inputs, continue to next sub-period
        if getattr(self, 'onlyinit', False):
            continue

        model.chain = min(di, df)

    # If only initializing inputs, exit
    if getattr(self, 'onlyinit', False):
        verbose("The run was correctly initialized")
        return

    # Re-initalizing the chain argument
    if hasattr(model, 'chain'):
        del model.chain

    if mode in ['fwd']:
        controlvect.dump("{}/controlvect_{}".format(rundir, run_id),
                         to_netcdf=getattr(controlvect, 'save_out_netcdf',
                                           True),
                         dir_netcdf='{}/controlvect/'.format(rundir),
                         run_id=run_id)

    rundir = "{}/obsoperator/{}".format(workdir, mode)
    path.init_dir(rundir)
    dump_type = obsvect.dump_type
    dump_datastore(obsvect.datastore,
                   file_monit='{}/monitor_{}_.{}'.format(
                       rundir, run_id, dump_type),
                   mode='w',
                   dump_type=dump_type,
                   col2dump=[
                       'obs_ghg', 'obs_bkg', 'obs_model', 'obs_sim',
                       'obs_check', 'obs_bkgerr', 'obs_err', 'obs_hx'
                   ])

    # Returning obsvect to the simulator
    if mode is 'fwd':
        return obsvect
Пример #2
0
def parse_tracers(self, datei, datef, file_monitor="", workdir="", **kwargs):
    """Parses all observation files related to the tracers specified as
    inputs

    Args:
        self (Measurement) : dictionary of tracers as defined in the Yaml
        file
        datei (datetime.datetime): initial date for the inversion window
        datef (datetime.datetime): end date for the inversion window
        file_monitor (str): file with pre-compile observations if exists
        workdir (str): working directory
        logfile (str): path to the log file for verbose instances
        **kwargs (dictionary) : any additional argument that might be useful
                                for extracting observations.
                                Default contains config_dict

    Returns:
        dictionary : dictionary with all observations

    Notes:
        The data that are kept in the datastore are also saved in a
        monit_standard.txt file for debugging
    """

    # Dump type: default is nc
    self.dump_type = getattr(self, "dump_type", "nc")

    # If file_monitor is defined, tries reading it
    if hasattr(self, "file_monitor"):
        file_monitor = self.file_monitor

        try:
            info("Extracting measurements from {}".format(file_monitor))
            return dump.read_datastore(file_monitor,
                                       dump_type=self.dump_type,
                                       **kwargs)

        except IOError as e:
            info(e.message)
            info("Could not find a monitor file, reading observations")

        except Exception as e:
            info(e)
            info("Could not read the specified monitor file: {}", file_monitor)
            raise e

    # Otherwise, create the monitor from observations
    if hasattr(self, "workdir"):
        workdir = self.workdir

    file_monitor = workdir + "/obs/monit_standard.nc"

    # If the measurement definition is empty in the Yaml,
    # return an empty datastore
    if not hasattr(self, "species"):
        return init_empty()

    # Loops through tracers if monitor not found
    path.init_dir(workdir + "/obs/")
    shutil.rmtree(file_monitor, ignore_errors=True)

    datastore = {}

    # Merging species datastores into one data
    for spec in self.species.attributes:
        specattr = getattr(self.species, spec)

        if hasattr(specattr, "format") and hasattr(specattr, "provider"):
            info("Extracting measurements for {}"
                 " with a single provider {}".format(spec, specattr.provider))

            parser = ObsParser.get_parser(specattr)
            datastore[spec] = parser.parse_multiple_files(spec=spec)
            datastore[spec] = datastore[spec].loc[str(datei):str(datef)]
            continue

        else:
            info("Extracting measurements for {} from multiple providers".
                 format(spec))

        # Looping over providers
        dataspec = {}
        for provider in specattr.attributes:
            # Get the observation parser
            providattr = getattr(specattr, provider)
            parser = ObsParser.get_parser(providattr)

            # Reading all files from provider
            dataspec[provider] = parser.parse_multiple_files(spec=spec)

            # Cropping to the inversion window
            dataspec[provider] = dataspec[provider].loc[str(datei):str(datef)]

        datastore[spec] = pd.concat(list(dataspec.values()))

    # Grouping species into a single datastore
    datastore = pd.concat(list(datastore.values()))

    # Dumping
    dump.dump_datastore(datastore,
                        file_monitor,
                        workdir,
                        dump_type=self.dump_type,
                        **kwargs)

    return datastore
Пример #3
0
def init_y0(obsvect, measurements, **kwargs):
    """Initializes the observation vector. In most cases the observation
    vector is similar to the measurement vector but there is no reason
    for it to be the same, especially when super-observations are used
    (e.g. daily or afternoon averages, gradients, etc.)

    Args:
        obsvect (Plugin): the observation vector with all its attributes
        measurements (Plugin): the pre-loaded measurements

    Returns:
        obsvect, updated with horizontal and vertical coordinates,
        as well as model time steps

    """

    # Saves initialized observation vector to workdir/obsvect/monitor
    # Try linking file_obsvect to workdir/obsvect if existing
    # Otherwise, define it and generate it later on
    path.init_dir("{}/obsvect".format(obsvect.workdir))
    file_monitor = "{}/obsvect/monitor.{}".format(obsvect.workdir,
                                                  obsvect.dump_type)
    shutil.rmtree(file_monitor, ignore_errors=True)

    if os.path.isfile(obsvect.file_obsvect):
        path.link(obsvect.file_obsvect, file_monitor)

    # From here, no interaction with files outside the working directory
    obsvect.file_obsvect = file_monitor

    # Try reading file
    allcorrec, ok_hcoord, ok_vcoord, do_tstep = False, False, False, True
    exclude_stations = getattr(obsvect, "exclude_stat", [])
    try:
        # Reading a monitor file if available
        info("Try opening {}".format(file_monitor))
        obsvect.datastore = read_datastore(file_monitor,
                                           dump_type=obsvect.dump_type)

        # Check that the monitor is consistent with the simulation to be run
        # If True, returns directly the observation as is
        allcorrect, ok_hcoord, ok_vcoord, do_tstep = obsvect.check_monitor()
        if allcorrect and len(exclude_stations) == 0:
            return obsvect

        # Otherwise, recompute necessary items (i, j, lev, tstep)
        # of the observation error
        raise PluginError

    # If the monitor file could not be opened, start back from measurements
    except IOError:

        info("Couldn't open the observation file")
        info("Generating observation vector from measurements")

        # Copying attributes from measurements
        for key in measurements.attributes:
            setattr(obsvect, key, getattr(measurements, key))

        # Copying datastore from measurements if existing and not empty
        obsvect.datastore = getattr(measurements, "datastore", init_empty())

    except PluginError:
        info("Warning! The prescribed monitor file is not consistent with "
             "the current simulation. Re-analysing it")

    # Remove unwanted stations
    if len(exclude_stations) > 0:
        mask = ~(obsvect.datastore["station"].isin(exclude_stations))
        obsvect.datastore = obsvect.datastore.loc[mask]

    # Crops y0 to the simulation period
    obsvect.datastore = crop_monitor(obsvect.datastore, obsvect.datei,
                                     obsvect.datef, **kwargs)

    # Computes grid cells where observations fall
    if not ok_hcoord:
        obsvect = hcoord(obsvect, **kwargs)

    # Computes model layer where observations fall
    if not ok_vcoord:
        obsvect = vcoord(obsvect, **kwargs)

    # Compute time steps corresponding to the observations
    if do_tstep:
        obsvect = tstep(obsvect, **kwargs)

    # Dumps the datastore
    # Attributes to keep in the monitor if NetCDF format (recommanded!)
    nc_attributes = {
        "datei": obsvect.datei.strftime("%d-%m-%Y %H:%M:%S"),
        "datef": obsvect.datef.strftime("%d-%m-%Y %H:%M:%S"),
        "model name": obsvect.model.plugin.name,
        "model version": obsvect.model.plugin.version,
        "domain nlon": str(obsvect.model.domain.nlon),
        "domain nlat": str(obsvect.model.domain.nlat),
    }

    if getattr(obsvect, "dump", True):
        dump_datastore(
            obsvect.datastore,
            file_monit=obsvect.file_obsvect,
            dump_type=obsvect.dump_type,
            nc_attributes=nc_attributes,
            mode="w",
        )

    return obsvect
Пример #4
0
def native2obsvect(
    transf,
    xmod,
    mapper,
    mod_input,
    di,
    df,
    mode,
    runsubdir,
    workdir,
    trans_mode,
    **kwargs
):
    """Aggregate simulations at the grid scale to total columns.
    Re-interpolate the model pressure levels to the satellite averaging kernel
    levels. Average using the averaging kernel formula

    """

    ddi = min(di, df)
    datastore = xmod
    ref_indexes = ~xmod.duplicated(subset=["indorig"])
    y0 = xmod.loc[ref_indexes]

    # Building the extended dataframe
    iq1 = y0["iq1"]
    nblinfo = y0["nblinfo"]
    list_satIDs = iq1.loc[y0["is_satellite"]].unique()

    ds_p = datastore.set_index("indorig")[["pressure", "dp", "airm", "hlay"]]
    for satID in list_satIDs:
        satmask = iq1 == satID
        nobs = np.sum(satmask)
        nblloc = nblinfo.loc[satmask].values - 1
        print("satID", satID)
        # Stacking output datastore into levels * nobs
        native_ind_stack = (
            np.flatnonzero(ref_indexes)[satmask]
            + np.arange(transf.model.domain.nlev)[:, np.newaxis]
        )

        # If all nans in datasim, meaning that the species was not simulated
        sim = datastore.loc[:, "sim"].values[native_ind_stack]
        if not np.any(~np.isnan(sim)):
            continue

        # Grouping all data from this satellite
        datasim = xr.Dataset(
            {
                "pressure": (
                    ["level", "index"],
                    np.log(ds_p["pressure"].values[native_ind_stack]),
                ),
                "dp": (
                    ["level", "index"],
                    ds_p["dp"].values[native_ind_stack],
                ),
                "airm": (
                    ["level", "index"],
                    ds_p["airm"].values[native_ind_stack],
                ),
                "hlay": (
                    ["level", "index"],
                    ds_p["hlay"].values[native_ind_stack],
                ),
                "sim": (["level", "index"], sim),
            },
            coords={
                "index": nblloc,
                "level": np.arange(transf.model.domain.nlev),
            },
        )

        if mode == "tl":
            datasim["sim_tl"] = (
                ["level", "index"],
                datastore.loc[:, "sim_tl"].values[native_ind_stack],
            )

        # convert CHIMERE fields to the correct unit
        # from ppb to molec.cm-2 if the satellite product is a column
        if getattr(transf.available_satellites, satID).product == "column":
            keys = ["sim"] + (["sim_tl"] if mode == "tl" else [])
            for k in keys:
                datasim[k] *= datasim["hlay"] / (1e9 / datasim["airm"])

        # Check whether there is some ak
        file_aks = ddi.strftime(
            "{}/obsvect/satellites/infos_{}%Y%m%d%H%M.nc".format(
                transf.workdir, satID
            )
        )
        print("infos file", file_aks)

        # try:
        if True:
            sat_aks = read_datastore(file_aks).to_xarray()

        # except IOError:
        # Assumes total columns?
        # datastore['qdp'] = datastore['sim'] * datastore['dp']
        # groups = datastore.groupby(['indorig'])
        # y0.loc[:, 'sim'] += \
        #     groups['qdp'].sum().values / groups['dp'].sum().values
        #
        # if 'sim_tl' in datastore:
        #     datastore['qdp'] = datastore['sim_tl'] * datastore['dp']
        #     groups = datastore.groupby(['indorig'])
        #     y0.loc[:, 'sim_tl'] += \
        #         groups['qdp'].sum().values / groups['dp'].sum().values
        #    continue

        aks = sat_aks["ak"][nblloc, -2::-1].T

        if getattr(transf.available_satellites, satID).pressure == "Pa":
            pavgs = sat_aks["pavg"][nblloc, ::-1].T
        else:
            pavgs = 100 * sat_aks["pavg"][nblloc, ::-1].T
        # import code
        # code.interact(local=dict(locals(), **globals()))

        pavgs = xr.DataArray(
            pavgs,
            coords={"index": nblloc, "level": np.arange(aks.level.size + 1)},
            dims=("level", "index"),
        )
        pavgs_mid = xr.DataArray(
            np.log(0.5 * (pavgs[:-1].values + pavgs[1:].values)),
            coords={"index": nblloc, "level": np.arange(aks.level.size)},
            dims=("level", "index"),
        )
        dpavgs = xr.DataArray(
            np.diff(-pavgs, axis=0),
            coords={"index": nblloc, "level": np.arange(aks.level.size)},
            dims=("level", "index"),
        )
        qa0lus = sat_aks["qa0lu"][nblloc, -2::-1].T

        # Interpolating simulated values to averaging kernel pressures
        # Doing it by chunk to fastensim_ak = 0. * pavgs_mid the process
        # A single chunk overloads the memory,
        # while too many chunks do not take advantage
        # of scipy automatic parallelisation
        # 50 chunks seems to be fairly efficient
        sim_ak = 0.0 * pavgs_mid
        sim_ak_tl = 0.0 * pavgs_mid

        nchunks = getattr(transf, "nchunks", 50)
        chunks = np.linspace(0, nobs, num=nchunks, dtype=int)
        cropstrato = getattr(
            getattr(transf.available_satellites, satID), "cropstrato", 0
        )
        for k1, k2 in zip(chunks[:-1], chunks[1:]):
            info("Compute chunk for satellite {}: {}-{}".format(satID, k1, k2))

            # Vertical interpolation
            xlow, xhigh, alphalow, alphahigh = vertical_interp(
                datasim["pressure"][:, k1:k2].values,
                pavgs_mid[:, k1:k2].values,
                cropstrato,
            )

            # Applying coefficients
            meshout = np.array(pavgs_mid.shape[0] * [list(range(k2 - k1))])
            sim = datasim["sim"][:, k1:k2].values
            sim_ak[:, k1:k2] = (
                alphalow * sim[xlow, meshout] + alphahigh * sim[xhigh, meshout]
            )

            if mode == "tl":
                sim_tl = datasim["sim_tl"][:, k1:k2].values
                sim_ak_tl[:, k1:k2] = (
                    alphalow * sim_tl[xlow, meshout]
                    + alphahigh * sim_tl[xhigh, meshout]
                )

        # # Correction with the pressure thickness
        # # WARNING: there is an inconsistency in the number of levels
        # sim_ak *= (datasim['dp'] * datasim['sim']).sum(axis=0).values \
        #           / (dpavgs * sim_ak).sum(axis=0).values \
        #           * dpavgs.sum(axis=0).values / datasim['dp'].sum(
        # axis=0).values
        # if 'sim_tl' in datasim:
        #     sim_ak_tl *= \
        #         (datasim['dp'] * datasim['sim_tl']).sum(axis=0).values \
        #         / (dpavgs * sim_ak_tl).sum(axis=0).values \
        #         * dpavgs.sum(axis=0).values / datasim['dp'].sum(axis=0).values

        # Applying aks
        nbformula = getattr(transf.available_satellites, satID).formula
        chosenlevel = getattr(
            getattr(transf.available_satellites, satID), "chosenlev", 0
        )

        print("product:", getattr(transf.available_satellites, satID).product)
        print("nbformula:", nbformula)
        print("chosenlev:", chosenlevel)
        y0.loc[satmask, "sim"] = apply_ak(
            sim_ak, dpavgs, aks.values, nbformula, qa0lus.values, chosenlevel
        )

        if mode == "tl":
            y0.loc[satmask, "sim_tl"] = apply_ak_tl(
                sim_ak_tl,
                dpavgs,
                aks.values,
                nbformula,
                qa0lus.values,
                chosenlevel,
                sim_ak,
            )

    # Save forward datastore for later use by adjoint
    file_monit = ddi.strftime(
        "{}/chain/monit_%Y%m%d%H%M.nc".format(transf.model.adj_refdir)
    )
    dump_datastore(
        datastore,
        file_monit=file_monit,
        dump_default=False,
        col2dump=["pressure", "dp", "indorig", "hlay", "airm"],
        mode="w",
    )

    return y0
Пример #5
0
def parse_tracers(self, datei, datef, file_monitor="", workdir="", **kwargs):
    """Generate random observations at random locations in the domain"""

    # If file_monitor is defined, tries reading it
    if hasattr(self, "file_monitor"):
        file_monitor = self.file_monitor

        try:
            info("Extracting measurements from {}".format(file_monitor))
            return read_datastore(file_monitor,
                                  dump_type=getattr(self, "dump_type", "nc"),
                                  **kwargs)

        except IOError as e:
            info(str(e))
            info("Could not find a monitor file, reading observations")

        except Exception as e:
            info(e)
            info("Could not read the specified monitor file: {}", file_monitor)
            raise e

    # Otherwise, create the monitor from observations
    else:
        if hasattr(self, "workdir"):
            workdir = self.workdir

        if file_monitor == "" or file_monitor is None:
            file_monitor = workdir + "/obs/monit_standard.nc"

    # Initialize empty datastore
    ds = init_empty()

    # Loop over species
    for trcr in self.species.attributes:
        tracer = getattr(self.species, trcr)

        # Make date range
        drange = pd.date_range(datei,
                               datef,
                               freq=getattr(tracer, "frequency", "1H"))
        ndates = drange.size

        # Pick random locations for x and y within the reference domain
        xmin = self.domain.zlon.min()
        xmax = self.domain.zlon.max()
        ymin = self.domain.zlat.min()
        ymax = self.domain.zlat.max()
        zmax = getattr(tracer, "zmax", 100)

        nstat = tracer.nstations

        statx = np.random.uniform(low=xmin, high=xmax, size=nstat)
        staty = np.random.uniform(low=ymin, high=ymax, size=nstat)
        statz = np.random.uniform(low=1, high=zmax, size=nstat)

        # Put locations into a monitor
        duration = pd.to_timedelta(getattr(tracer, "duration", "1H"))
        seconds_duration = duration.total_seconds() / 3600.0

        df = pd.DataFrame({
            "alt": np.array(ndates * list(statz)),
            "lat": np.array(ndates * list(staty)),
            "lon": np.array(ndates * list(statx)),
            "station": ndates * list(range(nstat)),
            "parameter": trcr,
            "duration": seconds_duration,
        })

        df.index = (np.array(nstat * list(drange)).reshape(
            (ndates, nstat), order="F").flatten())
        if getattr(tracer, "random_subperiod_shift", False):
            df.index += np.random.uniform(0, 1, size=ndates * nstat) * duration

        # Appending datastore
        ds = pd.concat((ds, df), sort=False)

    # Dumping the datastore for later use by pycif
    dump_datastore(ds, file_monit=self.file_monitor, dump_type="nc")

    return ds
Пример #6
0
def obsoper(self,
            inputs,
            mode,
            run_id=0,
            datei=datetime.datetime(1979, 1, 1),
            datef=datetime.datetime(2100, 1, 1),
            workdir="./",
            reload_results=False,
            **kwargs):
    """The observation operator.
    This function maps information from the control space to the observation
    space and conversely depending on the running mode.

    Generates model inputs from the control vector
    inputs(x)

    Turns observations into model compatible extraction points
    i.e. y0 (fwd) or dy* (adj)

    Runs the model to get model extractions
    i.e. Hx (fwd) or H^T.dy* (adj)

    if FWD
    Turns back model-equivalents into observation space
    Hx(native) -> Hx(obs)

    Generates departures and multiply by R-1
    -> Hx - y0, R^(-1).(Hx - y0)

    if ADJ
    Turns native increments to control space increments
    H^T.dy* (native) -> H^T.dy* (control)

    Translates control increments to chi values
    -> B^(1/2) . H^T.dy*

    Args:
        inputs: can be a control vector or an observation vector, depending
                on the mode
        mode (str): the running mode; should be one of 'fwd', 'tl' or 'adj'
        run_id (int): the ID number of the current run; is used to define
                the current sub-directory
        datei, datef (datetime.datetime): start and end dates of the
                simulation window
        workdir (str): the parent directory of the simulation to run
        reload_results (bool): look for results from pre-computed simulation
                if any

    Returns:
        observation or control vector depending on the running mode
    """

    # Check that inputs are consistent with the mode to run
    check_inputs(inputs, mode)

    # Create of sub- working directory for the present run
    rundir = "{}/obsoperator/{}_{:04d}/".format(workdir, mode, run_id)
    path.init_dir(rundir)

    # Create save directory for chaining sub-simulations
    path.init_dir("{}/chain/".format(rundir, run_id))

    # Return results from previous versions if existing
    if reload_results:
        if mode in ["fwd", "tl"]:
            try:
                # Saving the directory for possible later use by the adjoint
                self.model.adj_refdir = rundir

                # Trying reading the monitor if any
                obsvect = self.obsvect
                obsvect.datastore = read_datastore(
                    "{}/monitor.nc".format(rundir))
                return obsvect

            except IOError:
                info("There is no monitor file to be recovered. "
                     "Compute the full forward simulation")

        elif mode == "adj":
            try:
                statevect = self.statevect
                statevect.load("{}/statevect.pickle".format(rundir))
                return statevect

            except IOError:
                info("There is no statevect file to be recovered. "
                     "Compute the full adjoint simulation.")

    # Initializing modules and variables from the setup
    model = self.model
    statevect = self.statevect
    obsvect = self.obsvect
    subsimu_dates = model.subsimu_dates

    if mode in ["fwd", "tl"]:
        obsvect.datastore.loc[:, "sim"] = 0.0
        obsvect.datastore.loc[:, "sim_tl"] = 0.0

        # Dumps control vector in forward and tl modes
        statevect.dump(
            "{}/statevect.pickle".format(rundir),
            to_netcdf=getattr(statevect, "save_out_netcdf", False),
            dir_netcdf="{}/statevect/".format(rundir),
        )

    elif mode == "adj":
        obsvect = inputs
        statevect.dx = 0 * statevect.x
        subsimu_dates = subsimu_dates[::-1]

    # Loop through model periods and runs the model
    self.missingperiod = False
    for di, df in zip(subsimu_dates[:-1], subsimu_dates[1:]):
        # Create a sub-path for each period
        runsubdir = rundir + min(di, df).strftime("%Y-%m-%d_%H-%M")
        _, created = path.init_dir(runsubdir)

        # If the sub-directory was already created,
        # the observation operator considers that the sub-simulation
        # was already properly run, thus passing to next sub-periods
        # Compute the sub-simulation anyway if some previous periods
        # were missing (as stored in self.missingperiod)
        do_simu = (created or not getattr(self, "autorestart", False)
                   or self.missingperiod)
        self.missingperiod = do_simu

        # Some verbose
        info("Running {} for the period {} to {}".format(
            model.plugin.name, di, df))
        info("Running mode: {}".format(mode))
        info("Sub-directory: {}".format(runsubdir))

        # Prepare observation vector at model resolution
        model.dataobs = obsvect.obsvect2native(di, df, mode, runsubdir,
                                               workdir)
        model.dataobs = self.do_transforms(
            obsvect.transform,
            model.dataobs,
            obsvect.transform.mapper,
            "obs",
            di,
            df,
            mode,
            runsubdir,
            workdir,
            trans_mode="fwd",
        )

        # If the simulation was already carried out, pass to next steps
        # If a sub-period was missing, following ones will be recomputed even
        # if available
        if do_simu:
            # Writing observations for on-line model extraction if any
            model.native2inputs(model.dataobs, "obs", di, df, runsubdir, mode)

        # Prepare inputs for the model and carry out on the fly transformations
        mapper = statevect.transform.mapper
        self.do_transforms(
            statevect.transform,
            statevect,
            mapper,
            "state",
            di,
            df,
            mode,
            runsubdir,
            workdir,
            trans_mode="fwd",
            do_simu=do_simu,
            onlyinit=getattr(self, "onlyinit", False),
        )

        # If only initializing inputs, continue to next sub-period
        if getattr(self, "onlyinit", False):
            continue

        # Update observation vector if necessary
        if mode in ["fwd", "tl"] and obsvect.datastore.size > 0:
            # Read outputs
            model.outputs2native({}, "obs", di, df, runsubdir, mode)
            # Apply obs transformation and update obs datastore
            model.dataobs = self.do_transforms(
                obsvect.transform,
                model.dataobs,
                obsvect.transform.mapper,
                "obs",
                di,
                df,
                mode,
                runsubdir,
                workdir,
                trans_mode="inv",
            )
            obsvect.native2obsvect(model.dataobs, di, df, runsubdir, workdir)

        # Update state vector if necessary
        elif mode == "adj":
            mapper = statevect.transform.mapper
            self.do_transforms(
                statevect.transform,
                statevect,
                mapper,
                "state",
                di,
                df,
                mode,
                runsubdir,
                workdir,
                trans_mode="inv",
            )

        # Keep in memory the fact that it is (or not) a chained simulation
        model.chain = min(di, df)

    # If only initializing inputs, exit
    if getattr(self, "onlyinit", False):
        info("The run was correctly initialized")
        return

    # Re-initalizing the chain argument
    if hasattr(model, "chain"):
        del model.chain

    # Dump observation vector for later use in fwd and tl modes
    # Otherwise dumps the control vector
    if mode in ["fwd", "tl"]:
        dump_type = obsvect.dump_type
        dump_datastore(
            obsvect.datastore,
            file_monit="{}/monitor.{}".format(rundir, dump_type),
            mode="w",
            dump_type=dump_type,
        )

    elif mode == "adj":
        statevect.dump("{}/statevect.pickle".format(rundir))

    # Cleaning unnecessary files
    if getattr(model, "autoflush", False):
        info("Flushing unnecessary files in {}".format(rundir))
        model.flushrun(rundir, mode)

    # Returning the output object depending on the running mode
    if mode in ["fwd", "tl"]:
        return obsvect

    if mode == "adj":
        return statevect