Пример #1
0
def test_ensemble_prediction_is_stable_after_saving(model_name, fitted_model):
    """Predictions shouldn't change after the model is saved and re-loaded"""
    predictions1 = fitted_model.predict()
    fitted_model.save_params('model_params.json', overwrite=True)
    loaded_model = utils.load_saved_model('model_params.json')
    predictions2 = loaded_model.predict(to_predict = obs, predictors=predictors)
    assert np.all(predictions1 == predictions2)
Пример #2
0
def test_invalid_saved_model_type():
    with pytest.raises(TypeError):
        utils.load_saved_model(123)
Пример #3
0
def test_ensemble_save_load(model_name, fitted_model):
    """"Save and load a model"""
    fitted_model.save_params('model_params.json', overwrite=True)
    loaded_model = utils.load_saved_model('model_params.json')
    assert len(loaded_model.predict(obs, predictors)) == len(obs)
Пример #4
0
def test_ensemble_do_not_predict_without_data(model_name, fitted_model):
    """Should not predict when no fitting was done and no new data passed """
    fitted_model.save_params('model_params.json', overwrite=True)
    loaded_model = utils.load_saved_model('model_params.json')
    with pytest.raises(TypeError):
        loaded_model.predict()
Пример #5
0
def test_save_and_load_model_universal_loader(model_name, fitted_model):
    """Load a saved model via utils.load_saved_model"""

    fitted_model.save_params('model_params.json', overwrite=True)
    loaded_model = utils.load_saved_model('model_params.json')
    assert fitted_model.get_params() == loaded_model.get_params()
Пример #6
0
def test_ensemble_parameters(model_name, fitted_model):
    """Parameters should be the same when loaded again"""
    fitted_model.save_params('model_params.json', overwrite=True)
    loaded_model = utils.load_saved_model('model_params.json')
    assert loaded_model.get_params() == fitted_model.get_params()
Пример #7
0
                forecast_metadata.to_dict('records')):

            time_elapsed = np.round((time.time() - start_time) / 60, 0)
            print(
                'Hindcast date {d}/{D}, species {s}/{S}, elapsed time {t} minutes'
                .format(d=date_i,
                        D=total_dates,
                        s=species_i,
                        S=total_species,
                        t=time_elapsed))

            species = forecast_info['species']
            Phenophase_ID = forecast_info['Phenophase_ID']
            model_file = config['phenology_model_folder'] + forecast_info[
                'model_file']
            model = utils.load_saved_model(model_file)

            # only make predictions where this species is located.
            sites_for_this_species = species_sites.query(
                'species == @species & Phenophase_ID == @Phenophase_ID')
            site_info_for_this_species = site_info_for_prediction[
                site_info_for_prediction.site_id.isin(
                    sites_for_this_species.site_id)]
            site_temp_for_this_species = site_temp[site_temp.site_id.isin(
                sites_for_this_species.site_id)]
            #print('{s} - {p}'.format(s=species, p=Phenophase_ID))
            #print('################## site info ###################')
            #print(site_info_for_this_species)
            #print('################## site temp ###################')
            #print(site_temp_for_this_species)
            hindcasts_to_compute.append(
Пример #8
0
def run(climate_forecast_folder=None,
        phenology_forecast_folder=None,
        species_list=None):
    """Build phenology models
    
    """
    divider = '#' * 90

    config = tools.load_config()

    current_season = tools.current_growing_season(config)
    current_season_doy_0 = str(int(current_season)) + '0101'
    current_season_doy_0 = tools.string_to_date(current_season_doy_0,
                                                h=False).date()
    today = datetime.datetime.today().date()

    current_doy = today.timetuple().tm_yday
    season_first_date = str(
        int(current_season) -
        1) + config['season_month_begin'] + config['season_day_begin']
    season_first_date = tools.string_to_date(season_first_date, h=False).date()

    # if the season for spring forecasts has started. Nov 1
    if today >= season_first_date:
        # adjust the current doy to potentially be negative to reflect the doy
        # for the following calendar year.
        if today < current_season_doy_0:
            current_doy -= 365

    print(divider)
    print('Applying phenology models - ' + str(today))

    range_masks = xr.open_dataset(config['species_range_file'])

    doy_0 = np.datetime64(current_season_doy_0)

    # Default location of climate forecasts
    if not climate_forecast_folder:
        climate_forecast_folder = config['current_forecast_folder']

    current_climate_forecast_files = glob.glob(climate_forecast_folder +
                                               '*.nc')

    print(
        str(len(current_climate_forecast_files)) +
        ' current climate forecast files: \n' +
        str(current_climate_forecast_files))

    # Load default species list if no special one was passed
    if not species_list:
        species_list = pd.read_csv(config['species_list_file'])
        species_list = species_list[[
            'species', 'Phenophase_ID', 'current_forecast_version',
            'season_start_doy', 'season_end_doy'
        ]]

    # Only forecast species and phenophases in the current season
    species_list = species_list[(current_doy >= species_list.season_start_doy)
                                & (current_doy <= species_list.season_end_doy)]

    if len(species_list) == 0:
        raise RuntimeError(
            'No species currenly in season, which is roughly Dec. 1 - Nov. 1')

    phenology_model_metadata = pd.read_csv(
        config['phenology_model_metadata_file'])

    forecast_metadata = species_list.merge(
        phenology_model_metadata,
        left_on=['species', 'Phenophase_ID', 'current_forecast_version'],
        right_on=['species', 'Phenophase_ID', 'forecast_version'],
        how='left')

    # Default location to write phenology forecasts
    if not phenology_forecast_folder:
        phenology_forecast_folder = config['phenology_forecast_folder']

    print(divider)

    # Load the climate forecasts

    #current_climate_forecasts = [xr.open_dataset(f) for f in current_climate_forecast_files]

    num_species_processed = 0
    for i, forecast_info in enumerate(forecast_metadata.to_dict('records')):
        species = forecast_info['species']
        phenophase = forecast_info['Phenophase_ID']
        model_file = config['phenology_model_folder'] + forecast_info[
            'model_file']
        model = utils.load_saved_model(model_file)

        print(divider)
        if species not in range_masks.species.values:
            print('Skipping {s} {p}, no range mask'.format(s=species,
                                                           p=phenophase))
            continue
        else:
            print('Apply model for {s} {p}'.format(s=species, p=phenophase))
            print(
                'forecast attempt {i} of {n} potential species. {n2} processed succesfully so far.'
                .format(i=i,
                        n=len(forecast_metadata),
                        n2=num_species_processed))
            species_range = range_masks.sel(species=species)

        prediction, prediction_sd = predict_phenology_from_climate(
            model,
            current_climate_forecast_files,
            post_process='automated',
            doy_0=doy_0,
            species_range=species_range,
            n_jobs=config['n_jobs'])

        species_forecast = xr.Dataset(data_vars={
            'doy_prediction':
            (('species', 'phenophase', 'lat', 'lon'), prediction),
            'doy_sd': (('species', 'phenophase', 'lat', 'lon'), prediction_sd)
        },
                                      coords={
                                          'species': [species],
                                          'phenophase': [phenophase],
                                          'lat': species_range.lat,
                                          'lon': species_range.lon
                                      })

        if i == 0:
            all_species_forecasts = species_forecast
            num_species_processed += 1
        else:
            merge_start_time = time.time()
            all_species_forecasts = xr.merge(
                [all_species_forecasts, species_forecast])
            print('merge time {s} sec'.format(
                s=round(time.time() - merge_start_time, 0)))
            num_species_processed += 1

        # Merging this files over and over slows things down more and more
        # Saving it every few iterations seems to speed things up.
        if num_species_processed % 5 == 0:
            all_species_forecasts.to_netcdf(config['tmp_folder'] +
                                            'forecast_tmp.nc')
            all_species_forecasts = xr.open_dataset(config['tmp_folder'] +
                                                    'forecast_tmp.nc')
            all_species_forecasts.load()
            all_species_forecasts.close()

    print(divider)
    print('phenology forecast final processing')
    #all_species_forecasts = xr.merge(all_species_forecasts)

    current_season = tools.current_growing_season(config)

    provenance_note = \
    """Forecasts for plant phenology of select species flowering and/or leaf out
    times for the {s} season. Made on {t} from NOAA CFSv2
    forecasts downscaled using PRISM climate data.
    Plant phenology models made using National Phenology Network data. 
    """.format(s=current_season, t=today)

    all_species_forecasts.attrs['note'] = provenance_note
    all_species_forecasts.attrs['issue_date'] = str(today)
    all_species_forecasts.attrs['crs'] = '+init=epsg:4269'
    # TODO: add some  more metadata
    # common names?
    #all_species_forecasts['forecast_date']=str(today)
    #all_species_forecasts['forecast_date']=str(today)

    forecast_filename = config[
        'phenology_forecast_folder'] + 'phenology_forecast_' + str(
            today) + '.nc'

    all_species_forecasts = all_species_forecasts.chunk({'lat': 50, 'lon': 50})
    all_species_forecasts.to_netcdf(forecast_filename,
                                    encoding={
                                        'doy_prediction': {
                                            'zlib': True,
                                            'complevel': 4,
                                            'dtype': 'int32',
                                            'scale_factor': 0.001,
                                            '_FillValue': -9999
                                        },
                                        'doy_sd': {
                                            'zlib': True,
                                            'complevel': 4,
                                            'dtype': 'int32',
                                            'scale_factor': 0.001,
                                            '_FillValue': -9999
                                        }
                                    })

    # Return filename of final forecast file for use by primary script
    return forecast_filename
def predict_phenology_from_climate(model,
                                   climate_forecast_files,
                                   post_process,
                                   doy_0,
                                   species_range=None,
                                   n_jobs=1):
    """Predict a phenology model over climate ensemble
    
    model
        A saved model file, or pyPhenology object
    
    climate_forecasts
        A list of xarray objects. each one a climate forecast
        
    post_process
        How to deal with multiple forecasts and/or bootstraps
        'automated': for the automated website forecasts. Returns
                    a tuple (prediction_doy, prediction_sd) each with shape
                    (lat, lon)
        'hindcast': for hindcasting where I want all the bootstrap + 
                    climate ensembles. returns a single array
                    prediction_doy of shape (n_ensemble, n_bootstrap, lat, lon)
    
    doy_0
        A timestamp for doy_0, usually Jan 1 of the year in question
        
    species_range
        xarray object from the species range code representing 1 species
        
    returns
        numpy array of (a,b,lat,lon)
    """
    if post_process not in ['automated', 'hindcast']:
        raise ValueError('Uknown post-processing routine: ' +
                         str(post_process))

    # If not a pre-fitted model then assume it's a saved file to load
    try:
        model.get_params()
    except:
        model = utils.load_saved_model(model)

    species_ensemble = []
    for climate_file in climate_forecast_files:
        climate = xr.open_dataset(climate_file)
        doy_series = pd.TimedeltaIndex(climate.time.values - doy_0,
                                       freq='D').days.values

        # When using a bootstrap model in hindcasting we want *all*
        # the predictions. Otherwise just the mean will do
        if type(model
                ).__name__ == 'BootstrapModel' and post_process == 'hindcast':
            species_ensemble.append(
                model.predict(predictors={
                    'temperature': climate.tmean.values,
                    'doy_series': doy_series
                },
                              aggregation='none',
                              n_jobs=n_jobs))
        else:
            species_ensemble.append(
                model.predict(predictors={
                    'temperature': climate.tmean.values,
                    'doy_series': doy_series
                },
                              n_jobs=n_jobs))

    species_ensemble = np.array(species_ensemble).astype(np.float)
    # apply nan to non predictions
    species_ensemble[species_ensemble == 999] = np.nan

    # Keep only values in the range
    if species_range:
        species_ensemble[:, ~species_range.range.values] = np.nan

    if post_process == 'automated':

        prediction_doy = np.nanmean(species_ensemble, axis=0)
        prediction_sd = np.nanstd(species_ensemble, axis=0)

        # extend the axis by 2 to match the xarray creation
        prediction_doy = np.expand_dims(prediction_doy, axis=0)
        prediction_doy = np.expand_dims(prediction_doy, axis=0)
        prediction_sd = np.expand_dims(prediction_sd, axis=0)
        prediction_sd = np.expand_dims(prediction_sd, axis=0)

        return prediction_doy, prediction_sd

    elif post_process == 'hindcast':
        return species_ensemble
Пример #10
0
    model.fit(species_obs, predictor_data)

    ######################################################
    # Save the model parameters
    time.sleep(1)
    model_hash = str(uuid.uuid1())
    model_filename = '{s}_{p}_{h}.json'.format(
        s=species_info['species'].replace(' ', '_'),
        p=species_info['Phenophase_ID'],
        h=model_hash)

    model.save_params(config['phenology_model_folder'] + model_filename)

    # reload the model to clear the fitting data
    # Otherwise the prediction below balks.
    model = utils.load_saved_model(config['phenology_model_folder'] +
                                   model_filename)
    #######################################################
    # make entry for this specifc model in model metadata file
    # forecast version is -1 cause the models themselves will
    # never be used in the automated stuff.
    model_note = """Naive model using doy ~ latitude, Variation from bootstrapping"""

    all_model_metadata.append({
        'species': species,
        'Phenophase_ID': phenophase,
        'base_model': 'Naive',
        'forecast_version': -1,
        'model_file': model_filename,
        'build_date': str(today),
        'n_observations': len(species_obs),
        'percent_test': 0,
            'species == @this_spp & \
                                                            Phenophase_ID == @this_phenophase & \
                                                            forecast_version == @forecast_version_to_use'
        ).to_dict('records')
        n_model_entries = len(species_model_info)
        if n_model_entries == 0:
            print('No model found: {s} - {p} - forecast version {f}'.format(
                s=this_spp, p=this_phenophase, f=forecast_version_to_use))
            continue
        if n_model_entries > 1:
            raise RuntimeError('{n} models found for {s}-{p}-v{f}'.format(
                s=this_spp, p=this_phenophase, f=forecast_version_to_use))

        try:
            pheno_model = utils.load_saved_model(
                config['phenology_model_folder'] +
                species_model_info[0]['model_file'])
        except:
            continue

        # Make the weighted ensemble unweighted
        pheno_model.weights[:] = 1 / len(pheno_model.weights)

        ################
        # Make the prediction
        prediction = pheno_model.predict(predictors={
            'temperature':
            this_year_temperature.tmean.values,
            'doy_series':
            doy_series
        },