def evaluate_design_matrix(self): """Produce output design matrices with optional within grid cell sampling""" component = self.component centre_latitudes = self.centre_latitudes centre_longitudes = self.centre_longitudes time_index = self.time_index corresponding_datetime = self.corresponding_datetime latitude_resolution = numpy.float(self.grid_resolution[0]) longitude_resolution = numpy.float(self.grid_resolution[1]) latitude_delta = latitude_resolution / numpy.float( self.cell_sampling[0]) longitude_delta = longitude_resolution / numpy.float( self.cell_sampling[1]) # Loop through within cell sampling to compute cell averaging design matrices design_matrix = None weight_normalisation_array = None for latitude_index in range(self.cell_sampling[0]): for longitude_index in range(self.cell_sampling[1]): point_latitudes = centre_latitudes - latitude_resolution / 2.0 + ( 0.5 + latitude_index) * latitude_delta point_longitudes = centre_longitudes - longitude_resolution / 2.0 + ( 0.5 + longitude_index) * longitude_delta projectionstructure = OutputRectilinearGridStructure( time_index, corresponding_datetime, point_latitudes, point_longitudes) block_model_matrix = component.storage.element_read( ).element_design(projectionstructure).design_matrix() block_weight_array = self.weight_array( projectionstructure.location_polar_coordinates()[:, 0]) if design_matrix is None: design_matrix = scipy.sparse.diags(block_weight_array).dot( block_model_matrix) weight_normalisation_array = block_weight_array else: design_matrix += scipy.sparse.diags( block_weight_array).dot(block_model_matrix) weight_normalisation_array += block_weight_array self.design_matrix = scipy.sparse.diags( 1.0 / weight_normalisation_array).dot(design_matrix)
def test_init(self): A = OutputRectilinearGridStructure('A', 'B', 'C', 'D') self.assertEqual('A', A.time_index_number) self.assertEqual('B', A.corresponding_datetime) self.assertEqual('C', A.latitudes) self.assertEqual('D', A.longitudes)
def test_mini_world_local(self): # Local component local_component = SpatialComponent( ComponentStorage_InMemory( LocalElement(n_triangulation_divisions=1), LocalHyperparameters(log_sigma=0.0, log_rho=numpy.log(1.0))), SpatialComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED') # Analysis system using the specified components, for the Tmean observable analysis_system = AnalysisSystem([local_component], ObservationSource.TMEAN, log=StringIO()) # Simulated inputs simulated_input_loader = SimulatedInputLoader() # Simulate evaluation of this time index simulated_time_indices = [0] # Update with data analysis_system.update([simulated_input_loader], simulated_time_indices) # Check state vector directly statevector = analysis_system.components[ 0].solutionstorage.partial_state_read(0).ravel() # These are the nodes where observations were put (see SimulatedObservationSource above) # - check they correspond to within 3 times the stated noise level self.assertAlmostEqual(20.0, statevector[12], delta=0.3) self.assertAlmostEqual(-15.0, statevector[17], delta=0.3) self.assertAlmostEqual(5.0, statevector[41], delta=0.3) # Also check entire state vector within outer bounds set by obs self.assertTrue(all(statevector < 20.0)) self.assertTrue(all(statevector > -15.0)) # And check output corresponds too # (evaluate result on output structure same as input) simulated_output_structure = SimulatedObservationStructure(0) result = analysis_system.evaluate_expected_value( 'MAP', simulated_output_structure, flag='POINTWISE') numpy.testing.assert_almost_equal(statevector[[12, 17, 41]], result) # test output gridding, pointwise limit outputstructure = OutputRectilinearGridStructure( 2, epoch_plus_days(2), latitudes=numpy.linspace(-89.875, 89.875, num=10), longitudes=numpy.linspace(-179.875, 179.875, num=20)) pointwise_result = analysis_system.evaluate_expected_value( 'MAP', outputstructure, 'POINTWISE') pointwise_limit_result = analysis_system.evaluate_expected_value( 'MAP', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1, 1], 3) numpy.testing.assert_array_almost_equal(pointwise_result, pointwise_limit_result)
def output_grid(storage_climatology, storage_large_scale, storage_local, outputfile, processdate, time_index, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method, compute_sample, sample_size): print 'VERSION: {0}'.format(get_revision_id_for_module(eustace)) # Build analysis system analysissystem = AnalysisSystem_EUSTACE(storage_climatology, storage_large_scale, storage_local, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method, compute_sample, sample_size) #Configure output grid outputstructure = OutputRectilinearGridStructure( time_index, processdate, latitudes=numpy.linspace(-90.+definitions.GLOBAL_FIELD_RESOLUTION/2., 90.-definitions.GLOBAL_FIELD_RESOLUTION/2., num=definitions.GLOBAL_FIELD_SHAPE[1]), longitudes=numpy.linspace(-180.+definitions.GLOBAL_FIELD_RESOLUTION/2., 180.-definitions.GLOBAL_FIELD_RESOLUTION/2., num=definitions.GLOBAL_FIELD_SHAPE[2])) # Evaluate expected value at these locations for field in ['MAP', 'post_STD']: print 'Evaluating: ',field result_expected_value = analysissystem.evaluate_expected_value('MAP', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000) result_expected_uncertainties = analysissystem.evaluate_expected_value('post_STD', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000) print 'Evaluating: climatology fraction' climatology_fraction = analysissystem.evaluate_climatology_fraction(outputstructure, [1,1], 1000) print 'Evaluating: the sample' sample = analysissystem.evaluate_projected_sample(outputstructure) # Save results filebuilder = FileBuilderGlobalField( outputfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'Infilling Example', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Example data only', __name__, '') filebuilder.add_global_field(definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field(definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field(definitions.TAS_CLIMATOLOGY_FRACTION, climatology_fraction.reshape(definitions.GLOBAL_FIELD_SHAPE)) for index in range(definitions.GLOBAL_SAMPLE_SHAPE[3]): variable = copy.deepcopy(definitions.TASENSEMBLE) variable.name = variable.name + '_' + str(index) selected_sample = sample[:,index].ravel()+result_expected_value filebuilder.add_global_field(variable, selected_sample.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close()
def test_compute_gridded_expected_value_harmonic_projection(self): #Compute cell grid area averages, with projection equalling the cosine of the latitude of each point. new_structure = OutputRectilinearGridStructure( 1, None, numpy.array([-60., 0., 60.]), numpy.array([-90., -60., 60., 90.])) A = Regridder(new_structure, [2, 3], blocking=2) component_solution = TestRegridder.HarmonicComponentSolution() # Check it raises an exception if wrong field flags are given self.assertRaises(ValueError, A.compute_gridded_expected_value, 'MMP', component_solution) my_expected_grid = numpy.array([[-75., -45., -75., -45., -75., -45.], [-75., -45., -75., -45., -75., -45.], [-75., -45., -75., -45., -75., -45.], [-75., -45., -75., -45., -75., -45.], [-15., 15., -15., 15., -15., 15.], [-15., 15., -15., 15., -15., 15.], [-15., 15., -15., 15., -15., 15.], [-15., 15., -15., 15., -15., 15.], [45., 75., 45., 75., 45., 75.], [45., 75., 45., 75., 45., 75.], [45., 75., 45., 75., 45., 75.], [45., 75., 45., 75., 45., 75.]]) expected_MAP_result = (A.weighting_factors(my_expected_grid) * numpy.cos(numpy.radians(my_expected_grid))).sum( axis=1) expected_post_STD_result = ( A.weighting_factors(my_expected_grid) * numpy.sin(numpy.radians(my_expected_grid))).sum(axis=1) expected_prior_STD_result = ( A.weighting_factors(my_expected_grid) * numpy.square(numpy.sin(numpy.radians(my_expected_grid)))).sum( axis=1) for field, array in zip(GLOBAL_FIELD_OUTPUT_FLAGS, [ expected_MAP_result, expected_post_STD_result, expected_prior_STD_result ]): numpy.testing.assert_array_equal( array, A.compute_gridded_expected_value(field, component_solution))
def output_month(outputfile, time_index, processdate): print 'Saving: ', processdate print 'Output: ', outputfile # Configure output grid outputstructure = OutputRectilinearGridStructure( time_index, processdate, latitudes=numpy.linspace(-87.5, 87.5, num=36), longitudes=numpy.linspace(-177.5, 177.5, num=72)) # Evaluate expected value at these locations result_expected_value = AnalysisSystem_HadCRUT4_InMemory( ).evaluate_expected_value(outputstructure) # Save results filebuilder = FileBuilderHadCRUT4ExampleOutput(outputfile, outputstructure) filebuilder.add_global_field(TAS_ANOMALY, result_expected_value.reshape(1, 36, 72)) filebuilder.save_and_close()
def test_location_polar_coordinates(self): A = OutputRectilinearGridStructure('A', 'B', numpy.array([1, 2, 3]), numpy.array([.1, .3])) expected_array = numpy.array([[1, .1], [1, .3], [2, .1], [2, .3], [3, .1], [3, .3]]) numpy.testing.assert_array_equal(expected_array, A.location_polar_coordinates()) A = OutputRectilinearGridStructure('A', 'B', numpy.array([1, 2]), numpy.array([.1, .3, .2])) expected_array = numpy.array([[1, .1], [1, .3], [1, .2], [2, .1], [2, .3], [2, .2]]) numpy.testing.assert_array_equal(expected_array, A.location_polar_coordinates())
def setUp(self): """Set up mock objects to be used for testing the Regridder class functionalities""" self.structure = OutputRectilinearGridStructure( 1, None, numpy.array([1, 2, 3, 4]), numpy.array([.2, .3, .4])) # Example of latitude and longitude values for different sets of points self.points = [ numpy.array([[-180.], [-90.], [0.]]), numpy.array([[-180., -90.], [0., 90.], [-45, 45]]), numpy.array([[-180., -90., 0., 90., -45, 45]]) ] # Expected harmonic factors self.expected_arrays = [ numpy.array([[-1], [0.], [1.]]), numpy.array([[-1, 0.], [1., 0.], [numpy.sqrt(2.) / 2., numpy.sqrt(2.) / 2.]]), numpy.array( [[-1, 0., 1., 0., numpy.sqrt(2.) / 2., numpy.sqrt(2.) / 2.]]) ] self.expected_normalizations = [ numpy.array([[-1], [0.], [1.]]), numpy.array([[-1], [1.], [numpy.sqrt(2.)]]), numpy.array([[numpy.sqrt(2.)]]) ] # We cannot divide by zero, we discard the first normalization factor self.expected_weithing_factors = [] for index in range(1, len(self.expected_arrays)): self.expected_weithing_factors.append( self.expected_arrays[index] / self.expected_normalizations[index]) self.cell_dimensions = [[1, 1], [2, 1], [6, 1]]
def output_grid_component(storage_climatology, storage_large_scale, storage_local, outputfile, processdate, time_index, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method): print 'VERSION: {0}'.format(get_revision_id_for_module(eustace)) # Build analysis system analysissystem = AnalysisSystem_EUSTACE(storage_climatology, storage_large_scale, storage_local, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method) # Configure output grid outputstructure = OutputRectilinearGridStructure( time_index, processdate, latitudes=numpy.linspace(-89.875, 89.875, num=definitions.GLOBAL_FIELD_SHAPE[1]), longitudes=numpy.linspace(-179.875, 179.875, num=definitions.GLOBAL_FIELD_SHAPE[2])) # Evaluate expected value at these locations for field in ['MAP', 'post_STD']: print 'Evaluating: ',field result_expected_value = analysissystem.evaluate_expected_value('MAP', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000) result_expected_uncertainties = analysissystem.evaluate_expected_value('post_STD', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000) # Save results filebuilder = FileBuilderGlobalField( outputfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'Infilling Example', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Example data only', __name__, '') filebuilder.add_global_field(definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field(definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close()
def latent_variable_flag(input_directory, output_directory, iteration, processing_dates): # manually setup the analysis model for the R1413 run - Warning: the eustace svn revision must be correct for the global bias model interpretation to that run analysis storage_climatology = SpaceTimeComponentSolutionStorageBatched_Files( statefilename_read='/work/scratch/cmorice/advanced_standard/climatology_solution_9/climatology_solution_9.pickle', sample_filename_read='/work/scratch/cmorice/advanced_standard/climatology_solution_sample_9/climatology_solution_sample_9.pickle', prior_sample_filename_read='/work/scratch/cmorice/advanced_standard/climatology_solution_prior_sample_9/climatology_solution_prior_sample_9.pickle', keep_in_memory = True ) storage_large_scale = SpaceTimeComponentSolutionStorageBatched_Files( statefilename_read='/work/scratch/cmorice/advanced_standard/large_scale_solution_9/large_scale_solution_9.pickle', sample_filename_read='/work/scratch/cmorice/advanced_standard/large_scale_solution_sample_9/large_scale_solution_sample_9.pickle', prior_sample_filename_read='/work/scratch/cmorice/advanced_standard/large_scale_solution_prior_sample_9/large_scale_solution_prior_sample_9.pickle', keep_in_memory = True ) storage_local = eustace.analysis.advanced_standard.components.storage_files_batch.SpatialComponentSolutionStorageIndexed_Files() covariates_descriptor = "/gws/nopw/j04/eustace/data/internal/climatology_covariates/covariates.json" insitu_biases = True breakpoints_file = "/gws/nopw/j04/eustace/data/internal/D1.7/daily/eustace_stations_global_R001127_daily_status.nc" global_biases = True global_biases_group_list = ["surfaceairmodel_ice_global" , "surfaceairmodel_land_global", "surfaceairmodel_ocean_global"] compute_uncertainties = False method = 'EXACT' compute_sample = False sample_size = definitions.GLOBAL_SAMPLE_SHAPE[3] compute_prior_sample = False print 'VERSION: {0}'.format(get_revision_id_for_module(eustace)) # Build analysis system analysissystem = AnalysisSystem_EUSTACE(storage_climatology, storage_large_scale, storage_local, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method) grid_resolution = [180. / definitions.GLOBAL_FIELD_SHAPE[1], 360. / definitions.GLOBAL_FIELD_SHAPE[2]] latitudes=numpy.linspace(-90.+grid_resolution[0]/2., 90.-grid_resolution[0]/2, num=definitions.GLOBAL_FIELD_SHAPE[1]) longitudes=numpy.linspace(-180.+grid_resolution[1]/2., 180.-grid_resolution[1]/2, num=definitions.GLOBAL_FIELD_SHAPE[2]) #timebase = TimeBaseDays(eustace.timeutils.epoch.EPOCH) #processdates = [timebase.number_to_datetime(daynumber) for daynumber in time_indices] # get times as understood by the analysis sustem time_indices =[eustace.timeutils.epoch.days_since_epoch(t) for t in processing_dates] cell_sampling = [1, 1] blocking = 10 # thinned set of sample indices for inclusion in output product sample_indices = range(definitions.GLOBAL_SAMPLE_SHAPE[3]) climatology_projector = None large_scale_projector = None local_projector = None for ( inner_index, time_index, processdate ) in zip( range(len(time_indices)), time_indices, processing_dates ): print time_index # initialise flags flag_values = numpy.zeros( definitions.GLOBAL_FIELD_SHAPE[1:], FLAG_TYPE ) # Configure output grid outputstructure = OutputRectilinearGridStructure(time_index, processdate, latitudes=latitudes, longitudes=longitudes) # climatology component print 'Evaluating: climatology' if climatology_projector is None: climatology_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) climatology_projector.set_component(analysissystem.components[0]) latent_climatology_constraint = evaluate_latent_variable_constraint(climatology_projector) climatology_projector.update_time_index(time_index, keep_design = False) climatology_projector.evaluate_design_matrix() climatology_statistic = evaluate_constraint_statistic(climatology_projector, latent_climatology_constraint, CONSTRAINT_THRESHOLD).reshape(definitions.GLOBAL_FIELD_SHAPE[1:]) flag_values[climatology_statistic] = flag_values[climatology_statistic] | CLIMATOLOGY_LATENT_FLAG # large scale component print 'Evaluating: large-scale' if large_scale_projector is None: large_scale_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) large_scale_projector.set_component(analysissystem.components[1]) latent_large_scale_constraint = evaluate_latent_variable_constraint(large_scale_projector) large_scale_projector.update_time_index(time_index, keep_design = False) large_scale_projector.evaluate_design_matrix() large_scale_statistic = evaluate_constraint_statistic(large_scale_projector, latent_large_scale_constraint, CONSTRAINT_THRESHOLD).reshape(definitions.GLOBAL_FIELD_SHAPE[1:]) flag_values[large_scale_statistic] = flag_values[large_scale_statistic] | LARGE_SCALE_LATENT_FLAG outputfile = os.path.join(output_directory, '{:04d}'.format(processdate.year), 'eustace_analysis_{:d}_qc_flags_{:04d}{:02d}{:02d}.nc'.format(iteration, processdate.year, processdate.month, processdate.day)) save_flag_file(flag_values, processdate, outputfile)
def main(): print 'Advanced standard example using a few days of EUSTACE data' parser = argparse.ArgumentParser( description='Advanced standard example using a few days of EUSTACE data' ) parser.add_argument('outpath', help='directory where the output should be redirected') parser.add_argument( '--json_descriptor', default=None, help= 'a json descriptor containing the covariates to include in the climatology model' ) parser.add_argument('--land_biases', action='store_true', help='include insitu land homogenization bias terms') parser.add_argument('--global_biases', action='store_true', help='include global satellite bias terms') parser.add_argument('--n_iterations', type=int, default=5, help='number of solving iterations') args = parser.parse_args() # Input data path basepath = os.path.join('/work/scratch/eustace/rawbinary3') # Days to process #time_indices = range(int(days_since_epoch(datetime(2006, 2, 1))), int(days_since_epoch(datetime(2006, 2, 2)))) #time_indices = range(int(days_since_epoch(datetime(1906, 2, 1))), int(days_since_epoch(datetime(1906, 2, 2)))) date_list = [ datetime(2006, 1, 1) + relativedelta(days=k) for k in range(3) ] #backwards_list = [date_list[i] for i in range(11, -1, -1)] #date_list = backwards_list time_indices = [int(days_since_epoch(date)) for date in date_list] # Sources to use sources = [ 'surfaceairmodel_land', 'surfaceairmodel_ocean', 'surfaceairmodel_ice', 'insitu_land', 'insitu_ocean' ] sources = ['insitu_land', 'insitu_ocean'] #sources = [ 'surfaceairmodel_land' ] # CLIMATOLOGY COMPONENT: combining the seasonal core along with latitude harmonics, altitude and coastal effects if args.json_descriptor is not None: loader = LoadCovariateElement(args.json_descriptor) loader.check_keys() covariate_elements, covariate_hyperparameters = loader.load_covariates_and_hyperparameters( ) print( 'The following fields have been added as covariates of the climatology model' ) print(loader.data.keys()) else: covariate_elements, covariate_hyperparameters = [], [] #climatology_element = CombinationElement( [SeasonalElement(n_triangulation_divisions=2, n_harmonics=2, include_local_mean=False), GrandMeanElement()]+covariate_elements) #climatology_hyperparameters = CombinationHyperparameters( [SeasonalHyperparameters(n_spatial_components=2, common_log_sigma=0.0, common_log_rho=0.0), CovariateHyperparameters(numpy.log(15.0))] + covariate_hyperparameters ) climatology_element = CombinationElement([ GrandMeanElement(), ] + covariate_elements) climatology_hyperparameters = CombinationHyperparameters([ CovariateHyperparameters(numpy.log(15.0)), ] + covariate_hyperparameters) #climatology_element =SeasonalElement(n_triangulation_divisions=2, n_harmonics=2, include_local_mean=False) #climatology_hyperparameters = SeasonalHyperparameters(n_spatial_components=2, common_log_sigma=0.0, common_log_rho=0.0) climatology_component = SpaceTimeComponent( ComponentStorage_InMemory(climatology_element, climatology_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED') # LARGE SCALE (kronecker product) COMPONENT: combining large scale trends with bias terms accounting for homogeneization effects if args.land_biases: bias_element, bias_hyperparameters = [ InsituLandBiasElement(BREAKPOINTS_FILE) ], [CovariateHyperparameters(numpy.log(.9))] print('Adding bias terms for insitu land homogenization') else: bias_element, bias_hyperparameters = [], [] large_scale_element = CombinationElement([ SpaceTimeKroneckerElement(n_triangulation_divisions=2, alpha=2, starttime=-30, endtime=365 * 1 + 30, n_nodes=12 * 1 + 2, overlap_factor=2.5, H=1) ] + bias_element) large_scale_hyperparameters = CombinationHyperparameters([ SpaceTimeSPDEHyperparameters(space_log_sigma=0.0, space_log_rho=numpy.log( numpy.radians(15.0)), time_log_rho=numpy.log(15.0)) ] + bias_hyperparameters) large_scale_component = SpaceTimeComponent( ComponentStorage_InMemory(large_scale_element, large_scale_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED') # LOCAL COMPONENT: combining local scale variations with global satellite bias terms if args.global_biases: bias_elements = [ BiasElement(groupname, 1) for groupname in GLOBAL_BIASES_GROUP_LIST ] bias_hyperparameters = [ CovariateHyperparameters(numpy.log(15.0)) for index in range(3) ] print('Adding global bias terms for all the surfaces') else: bias_elements, bias_hyperparameters = [], [] n_triangulation_divisions_local = 7 local_log_sigma = numpy.log(5) local_log_rho = numpy.log(numpy.radians(5.0)) local_element = NonStationaryLocal( n_triangulation_divisions=n_triangulation_divisions_local) n_local_nodes = local_element.spde.n_latent_variables() local_scale_element = CombinationElement([local_element] + bias_elements) local_hyperparameters = ExpandedLocalHyperparameters( log_sigma=numpy.repeat(local_log_sigma, n_local_nodes), log_rho=numpy.repeat(local_log_rho, n_local_nodes)) local_scale_hyperparameters = CombinationHyperparameters( [local_hyperparameters] + bias_hyperparameters) local_component = DelayedSpatialComponent( ComponentStorage_InMemory(local_scale_element, local_scale_hyperparameters), SpatialComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED') print "hyperparameter storage:", local_component.storage.hyperparameters print 'Analysing inputs' # Analysis system using the specified components, for the Tmean observable ##analysis_system = AnalysisSystem( ## [ climatology_component, large_scale_component, local_component ], ## ObservationSource.TMEAN) analysis_system = OptimizationSystem( [climatology_component, local_component], ObservationSource.TMEAN) # Object to load raw binary inputs at time indices inputloaders = [ AnalysisSystemInputLoaderRawBinary_Sources(basepath, source, time_indices) for source in sources ] for iteration in range(args.n_iterations): message = 'Iteration {}'.format(iteration) print(message) # Update with data analysis_system.update(inputloaders, time_indices) ################################################## # Optimize local model hyperparameters # Loop over local regions, generate optimization systems, fit hyperparameters and save # split spde and bias models for local component into two components global_spde_sub_component_definition = ComponentStorage_InMemory( CombinationElement([local_element]), CombinationHyperparameters([local_hyperparameters])) global_spde_sub_component_storage_solution = SpatialComponentSolutionStorage_InMemory( ) global_spde_sub_component = DelayedSpatialComponent( global_spde_sub_component_definition, global_spde_sub_component_storage_solution) bias_sub_component_definition = ComponentStorage_InMemory( CombinationElement(bias_elements), CombinationHyperparameters(bias_hyperparameters)) bias_sub_component_storage_solution = SpatialComponentSolutionStorage_InMemory( ) bias_sub_component = DelayedSpatialComponent( bias_sub_component_definition, bias_sub_component_storage_solution) element_optimisation_flags = [True, False, False, False] # one spde, three biases for time_key in time_indices: split_states_time(local_component, global_spde_sub_component, bias_sub_component, element_optimisation_flags, time_key) # Define subregions and extract their states neighbourhood_level = 1 n_subregions = global_spde_sub_component.storage.element_read( ).combination[0].spde.n_triangles_at_level(neighbourhood_level) hyperparameter_file_template = "local_hyperparameters.%i.%i.%i.npy" fit_hyperparameters = True optimization_component_index = 2 if fit_hyperparameters: for region_index in range(n_subregions): # Setup model for local subregion of neighours with super triangle view_flags = [ True, ] region_element = CombinationElement([ LocalSubRegion(n_triangulation_divisions_local, neighbourhood_level, region_index) ]) region_hyperparameters = ExtendedCombinationHyperparameters([ LocalHyperparameters(log_sigma=local_log_sigma, log_rho=local_log_rho) ]) region_component_storage_solution = SpatialComponentSolutionStorage_InMemory( ) region_sub_component = DelayedSpatialComponent( ComponentStorage_InMemory(region_element, region_hyperparameters), region_component_storage_solution) for time_key in time_indices: print "region_index, time_key:", region_index, time_key extract_local_view_states_time(global_spde_sub_component, region_sub_component, view_flags, time_key) print "running optimization for region:", region_index region_optimization_system = OptimizationSystem([ climatology_component, bias_sub_component, region_sub_component ], ObservationSource.TMEAN) for time_key in time_indices: region_optimization_system.update_component_time( inputloaders, optimization_component_index, time_key) # commented version that works for few days inputs #region_optimization_system.components[optimization_component_index].component_solution().optimize() #region_optimization_system.components[optimization_component_index].storage.hyperparameters.get_array() #hyperparameter_file = os.path.join(args.outpath, hyperparameter_file_template % (n_triangulation_divisions_local, neighbourhood_level, region_index) ) #region_sub_component.storage.hyperparameters.values_to_npy_savefile( hyperparameter_file ) # replaced with version for full processing based json dump of input files - need to generate the input_descriptor dict hyperparameter_file = os.path.join( args.outpath, hyperparameter_file_template % (n_triangulation_divisions_local, neighbourhood_level, region_index)) region_optimization_system.process_inputs( input_descriptor, optimization_component_index, time_indices) region_optimization_system.optimize_component( optimization_component_index, hyperparameter_storage_file=hyperparameter_file) fitted_hyperparameters_converted = region_sub_component.storage.hyperparameters.get_array( ) fitted_hyperparameters_converted[0] = numpy.exp( fitted_hyperparameters_converted[0]) fitted_hyperparameters_converted[1] = numpy.exp( fitted_hyperparameters_converted[1]) * 180.0 / numpy.pi print 'fitted_hyperparameters_converted:', fitted_hyperparameters_converted # Setup model for the super triangle without neighbours for hyperparameter merging region_spdes = [] region_hyperparameter_values = [] for region_index in range(n_subregions): # Redefine the region sub component as a supertriangle rather than a neighbourhood region_element = CombinationElement([ LocalSuperTriangle(n_triangulation_divisions_local, neighbourhood_level, region_index) ]) region_hyperparameters = ExtendedCombinationHyperparameters([ LocalHyperparameters(log_sigma=local_log_sigma, log_rho=local_log_rho) ]) region_component_storage_solution = SpatialComponentSolutionStorage_InMemory( ) region_sub_component = DelayedSpatialComponent( ComponentStorage_InMemory(region_element, region_hyperparameters), region_component_storage_solution) # Read the optimized hyperparameters hyperparameter_file = os.path.join( args.outpath, hyperparameter_file_template % (n_triangulation_divisions_local, neighbourhood_level, region_index)) region_sub_component.storage.hyperparameters.values_from_npy_savefile( hyperparameter_file) # Append the spde model and hyperparameters to their lists for merging region_spdes.append(region_element.combination[0].spde) region_hyperparameter_values.append( region_sub_component.storage.hyperparameters.get_array()) # merge and save hyperparameters full_spde = local_element.spde new_hyperparameter_values, global_sigma_design, global_rho_design = full_spde.merge_local_parameterisations( region_spdes, region_hyperparameter_values, merge_method='exp_average') local_hyperparameters.set_array(new_hyperparameter_values) hyperparameter_file_merged = "merged_hyperparameters.%i.%i.npy" % ( n_triangulation_divisions_local, neighbourhood_level) local_hyperparameters.values_to_npy_savefile( os.path.join(args.outpath, hyperparameter_file_merged)) # Refit local model with the optimized hyperparameters analysis_system.update_component(inputloaders, 1, time_indices) ################################################## print 'Computing outputs' # Produce an output for each time index for time_index in time_indices: # Get date for output outputdate = inputloaders[0].datetime_at_time_index(time_index) print 'Evaluating output grid: ', outputdate #Configure output grid outputstructure = OutputRectilinearGridStructure( time_index, outputdate, latitudes=numpy.linspace(-89.875, 89.875, num=definitions.GLOBAL_FIELD_SHAPE[1]), longitudes=numpy.linspace(-179.875, 179.875, num=definitions.GLOBAL_FIELD_SHAPE[2])) # print 'Size of grid : ', outputstructure.number_of_observations() # Evaluate expected value at these locations result_expected_value = analysis_system.evaluate_expected_value( 'MAP', outputstructure, 'POINTWISE') result_expected_uncertainties = analysis_system.evaluate_expected_value( 'post_STD', outputstructure, 'POINTWISE') # Make output filename pathname = 'eustace_example_output_{0:04d}{1:02d}{2:02d}.nc'.format( outputdate.year, outputdate.month, outputdate.day) pathname = os.path.join(args.outpath, pathname) print 'Saving: ', pathname # Save results filebuilder = FileBuilderGlobalField( pathname, time_index, 'Infilling Example', 'UNVERSIONED', definitions.TAS.name, '', 'Example data only', 'eustace.analysis.advanced_standard.examples.example_eustace_few_days', '') filebuilder.add_global_field( definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape( definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() print 'Complete'
def test_mini_world_altitude_with_latitude(self): """Testing using altitude as a covariate""" # GENERATING OBSERVATIONS # Simulated locations: they will exactly sits on the grid points of the covariate datafile DEM = Dataset(self.altitude_datafile) latitude = DEM.variables['lat'][:] longitude = DEM.variables['lon'][:] altitude = DEM.variables['dem'][:] indices = numpy.stack( (numpy.array([1, 3, 5, 7, 8, 9, 10, 11 ]), numpy.array([0, 0, 0, 0, 0, 0, 0, 0])), axis=1) selected_location = [] altitude_observations = [] for couple in indices: selected_location.append([ latitude[couple[0], couple[1]], longitude[couple[0], couple[1]] ]) altitude_observations.append(altitude[couple[0], couple[1]]) DEM.close() locations = numpy.array(selected_location) # Simulated model is y = z + a*cos(2x) + c*cos(4*x) + b*sin(2x) + d*sin(4*x), with z = altitude, x = latitude, a=b=c=d=0 slope = 1e-3 measurement = slope * numpy.array(altitude_observations) # Simulated errors uncorrelatederror = 0.1 * numpy.ones(measurement.shape) # Simulated inputs simulated_input_loader = SimulatedInputLoader(locations, measurement, uncorrelatederror) # Simulate evaluation of this time index simulated_time_indices = [0] # GENERATING THE MODEL # Local component geography_covariate_element = GeographyBasedElement( self.altitude_datafile, 'lat', 'lon', 'dem', 1.0) geography_covariate_element.load() combined_element = CombinationElement( [geography_covariate_element, LatitudeHarmonicsElement()]) combined_hyperparamters = CombinationHyperparameters([ CovariateHyperparameters(-0.5 * numpy.log(10.)), CombinationHyperparameters([ CovariateHyperparameters(-0.5 * numpy.log(p)) for p in [10.0, 10.0, 10.0, 10.0] ]) ]) combined_component = SpatialComponent( ComponentStorage_InMemory(combined_element, combined_hyperparamters), SpatialComponentSolutionStorage_InMemory()) # GENERATING THE ANALYSIS # Analysis system using the specified components, for the Tmean observable analysis_system = AnalysisSystem([combined_component], ObservationSource.TMEAN, log=StringIO()) # Update with data analysis_system.update([simulated_input_loader], simulated_time_indices) # Check state vector directly statevector = analysis_system.components[ 0].solutionstorage.partial_state_read(0).ravel() # These are the nodes where observations were put (see SimulatedObservationSource above) # - check they correspond to within 3 times the stated noise level self.assertAlmostEqual(slope, statevector[0], delta=0.3) self.assertAlmostEqual(0., statevector[1], delta=0.3) self.assertAlmostEqual(0., statevector[2], delta=0.3) self.assertAlmostEqual(0., statevector[3], delta=0.3) self.assertAlmostEqual(0., statevector[4], delta=0.3) # And check output corresponds too # (evaluate result on output structure same as input) simulated_output_structure = SimulatedObservationStructure( 0, locations, None, None) result = analysis_system.evaluate_expected_value( 'MAP', simulated_output_structure, flag='POINTWISE') expected = statevector[0]*numpy.array(altitude_observations)\ + statevector[1]*LatitudeFunction(numpy.cos, 2.0).compute(locations[:,0]).ravel()\ + statevector[2]*LatitudeFunction(numpy.sin, 2.0).compute(locations[:,0]).ravel()\ + statevector[3]*LatitudeFunction(numpy.cos, 4.0).compute(locations[:,0]).ravel()\ + statevector[4]*LatitudeFunction(numpy.sin, 2.0).compute(locations[:,0]).ravel() numpy.testing.assert_almost_equal(expected, result) # test output gridding, pointwise limit outputstructure = OutputRectilinearGridStructure( 2, epoch_plus_days(2), latitudes=numpy.linspace(-60., 60., num=5), longitudes=numpy.linspace(-90., 90, num=10)) pointwise_result = analysis_system.evaluate_expected_value( 'MAP', outputstructure, 'POINTWISE') pointwise_limit_result = analysis_system.evaluate_expected_value( 'MAP', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1, 1], 10) numpy.testing.assert_array_almost_equal(pointwise_result, pointwise_limit_result)
def main(): print 'Advanced standard example using a few days of EUSTACE data' parser = argparse.ArgumentParser(description='Advanced standard example using a few days of EUSTACE data') parser.add_argument('outpath', help='directory where the output should be redirected') parser.add_argument('--json_descriptor', default = None, help='a json descriptor containing the covariates to include in the climatology model') parser.add_argument('--land_biases', action='store_true', help='include insitu land homogenization bias terms') parser.add_argument('--global_biases', action='store_true', help='include global satellite bias terms') parser.add_argument('--n_iterations', type=int, default=5, help='number of solving iterations') args = parser.parse_args() # Input data path basepath = os.path.join('/work/scratch/eustace/rawbinary3') # Days to process time_indices = range(int(days_since_epoch(datetime(2006, 2, 1))), int(days_since_epoch(datetime(2006, 2, 2)))) # Sources to use sources = [ 'surfaceairmodel_land', 'surfaceairmodel_ocean', 'surfaceairmodel_ice', 'insitu_land', 'insitu_ocean' ] #SETUP # setup for the seasonal core: climatology covariates setup read from file seasonal_setup = {'n_triangulation_divisions':5, 'n_harmonics':4, 'n_spatial_components':6, 'amplitude':2., 'space_length_scale':5., # length scale in units of degrees } grandmean_amplitude = 15.0 # setup for the large scale component spacetime_setup = {'n_triangulation_divisions':2, 'alpha':2, 'starttime':0, 'endtime':10., 'n_nodes':2, 'overlap_factor':2.5, 'H':1, 'amplitude':1., 'space_lenght_scale':15.0, # length scale in units of degrees 'time_length_scale':15.0 # length scal in units of days } bias_amplitude = .9 # setup for the local component local_setup = {'n_triangulation_divisions':6, 'amplitude':2., 'space_length_scale':2. # length scale in units of degrees } globalbias_amplitude = 15.0 # CLIMATOLOGY COMPONENT: combining the seasonal core along with latitude harmonics, altitude and coastal effects if args.json_descriptor is not None: loader = LoadCovariateElement(args.json_descriptor) loader.check_keys() covariate_elements, covariate_hyperparameters = loader.load_covariates_and_hyperparameters() print('The following fields have been added as covariates of the climatology model') print(loader.data.keys()) else: covariate_elements, covariate_hyperparameters = [], [] climatology_element = CombinationElement( [SeasonalElement(n_triangulation_divisions=seasonal_setup['n_triangulation_divisions'], n_harmonics=seasonal_setup['n_harmonics'], include_local_mean=True), GrandMeanElement()]+covariate_elements) climatology_hyperparameters = CombinationHyperparameters( [SeasonalHyperparameters(n_spatial_components=seasonal_setup['n_spatial_components'], common_log_sigma=numpy.log(seasonal_setup['amplitude']), common_log_rho=numpy.log(numpy.radians(seasonal_setup['space_length_scale']))), CovariateHyperparameters(numpy.log(grandmean_amplitude))] + covariate_hyperparameters ) climatology_component = SpaceTimeComponent(ComponentStorage_InMemory(climatology_element, climatology_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED', compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3]) # LARGE SCALE (kronecker product) COMPONENT: combining large scale trends with bias terms accounting for homogeneization effects if args.land_biases: bias_element, bias_hyperparameters = [InsituLandBiasElement(BREAKPOINTS_FILE)], [CovariateHyperparameters(numpy.log(bias_amplitude))] print('Adding bias terms for insitu land homogenization') else: bias_element, bias_hyperparameters = [], [] large_scale_element = CombinationElement( [SpaceTimeKroneckerElement(n_triangulation_divisions=spacetime_setup['n_triangulation_divisions'], alpha=spacetime_setup['alpha'], starttime=spacetime_setup['starttime'], endtime=spacetime_setup['endtime'], n_nodes=spacetime_setup['n_nodes'], overlap_factor=spacetime_setup['overlap_factor'], H=spacetime_setup['H'])] + bias_element) large_scale_hyperparameters = CombinationHyperparameters( [SpaceTimeSPDEHyperparameters(space_log_sigma=numpy.log(spacetime_setup['amplitude']), space_log_rho=numpy.log(numpy.radians(spacetime_setup['space_lenght_scale'])), time_log_rho=numpy.log(spacetime_setup['time_length_scale']))] + bias_hyperparameters) large_scale_component = SpaceTimeComponent(ComponentStorage_InMemory(large_scale_element, large_scale_hyperparameters), SpaceTimeComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED', compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3]) # LOCAL COMPONENT: combining local scale variations with global satellite bias terms if args.global_biases: bias_elements = [BiasElement(groupname, 1) for groupname in GLOBAL_BIASES_GROUP_LIST] bias_hyperparameters = [CovariateHyperparameters(numpy.log(globalbias_amplitude)) for index in range(len(GLOBAL_BIASES_GROUP_LIST))] print('Adding global bias terms for all the surfaces') else: bias_elements, bias_hyperparameters = [], [] local_scale_element = CombinationElement([LocalElement(n_triangulation_divisions=local_setup['n_triangulation_divisions'])] + bias_elements) local_scale_hyperparameters = CombinationHyperparameters([LocalHyperparameters(log_sigma=numpy.log(local_setup['amplitude']), log_rho=numpy.log(numpy.radians(local_setup['space_length_scale'])))] + bias_hyperparameters) local_component = SpatialComponent(ComponentStorage_InMemory(local_scale_element, local_scale_hyperparameters), SpatialComponentSolutionStorage_InMemory(), compute_uncertainties=True, method='APPROXIMATED', compute_sample=True, sample_size=definitions.GLOBAL_SAMPLE_SHAPE[3]) # Analysis system using the specified components, for the Tmean observable print 'Analysing inputs' analysis_system = AnalysisSystem( [ climatology_component, large_scale_component, local_component ], ObservationSource.TMEAN) # Object to load raw binary inputs at time indices inputloaders = [ AnalysisSystemInputLoaderRawBinary_Sources(basepath, source, time_indices) for source in sources ] for iteration in range(args.n_iterations): message = 'Iteration {}'.format(iteration) print(message) # Update with data analysis_system.update(inputloaders, time_indices) print 'Computing outputs' # Produce an output for each time index for time_index in time_indices: # Get date for output outputdate = inputloaders[0].datetime_at_time_index(time_index) print 'Evaluating output grid: ', outputdate #Configure output grid outputstructure = OutputRectilinearGridStructure( time_index, outputdate, latitudes=numpy.linspace(-90.+definitions.GLOBAL_FIELD_RESOLUTION/2., 90.-definitions.GLOBAL_FIELD_RESOLUTION/2., num=definitions.GLOBAL_FIELD_SHAPE[1]), longitudes=numpy.linspace(-180.+definitions.GLOBAL_FIELD_RESOLUTION/2., 180.-definitions.GLOBAL_FIELD_RESOLUTION/2., num=definitions.GLOBAL_FIELD_SHAPE[2])) # Evaluate expected value at these locations for field in ['MAP', 'post_STD']: print 'Evaluating: ',field result_expected_value = analysis_system.evaluate_expected_value('MAP', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000) result_expected_uncertainties = analysis_system.evaluate_expected_value('post_STD', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1,1], 1000) print 'Evaluating: climatology fraction' climatology_fraction = analysis_system.evaluate_climatology_fraction(outputstructure, [1,1], 1000) print 'Evaluating: the sample' sample = analysis_system.evaluate_projected_sample(outputstructure) # Make output filename pathname = 'eustace_example_output_{0:04d}{1:02d}{2:02d}.nc'.format(outputdate.year, outputdate.month, outputdate.day) pathname = os.path.join(args.outpath, pathname) print 'Saving: ', pathname # Save results filebuilder = FileBuilderGlobalField( pathname, time_index, 'Infilling Example', 'UNVERSIONED', definitions.TAS.name, '', 'Example data only', 'eustace.analysis.advanced_standard.examples.example_eustace_few_days', '') filebuilder.add_global_field(definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field(definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field(definitions.TAS_CLIMATOLOGY_FRACTION, climatology_fraction.reshape(definitions.GLOBAL_FIELD_SHAPE)) for index in range(definitions.GLOBAL_SAMPLE_SHAPE[3]): variable = copy.deepcopy(definitions.TASENSEMBLE) variable.name = variable.name + '_' + str(index) selected_sample = sample[:,index].ravel()+result_expected_value filebuilder.add_global_field(variable, selected_sample.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() print 'Complete'
def output_grid_batch(storage_climatology, storage_large_scale, storage_local, outputfiles, climatologyfiles, largescalefiles, localfiles, time_indices, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method, compute_sample, sample_size, compute_prior_sample): from eustace.analysis.advanced_standard.fileio.output_projector import Projector variance_ratio_upper_bound = 1.0 print 'VERSION: {0}'.format(get_revision_id_for_module(eustace)) # Build analysis system analysissystem = AnalysisSystem_EUSTACE( storage_climatology, storage_large_scale, storage_local, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method) grid_resolution = [ 180. / definitions.GLOBAL_FIELD_SHAPE[1], 360. / definitions.GLOBAL_FIELD_SHAPE[2] ] latitudes = numpy.linspace(-90. + grid_resolution[0] / 2., 90. - grid_resolution[0] / 2, num=definitions.GLOBAL_FIELD_SHAPE[1]) longitudes = numpy.linspace(-180. + grid_resolution[1] / 2., 180. - grid_resolution[1] / 2, num=definitions.GLOBAL_FIELD_SHAPE[2]) timebase = TimeBaseDays(eustace.timeutils.epoch.EPOCH) #processdates = [datetime_numeric.build( timebase.number_to_datetime(daynumber) ) for daynumber in time_indices] processdates = [ timebase.number_to_datetime(daynumber) for daynumber in time_indices ] cell_sampling = [1, 1] blocking = 10 # thinned set of sample indices for inclusion in output product sample_indices = range(definitions.GLOBAL_SAMPLE_SHAPE[3]) climatology_projector = None large_scale_projector = None local_projector = None for (inner_index, time_index, processdate) in zip(range(len(time_indices)), time_indices, processdates): print time_index # Configure output grid outputstructure = OutputRectilinearGridStructure(time_index, processdate, latitudes=latitudes, longitudes=longitudes) # climatology component print 'Evaluating: climatology' if climatology_projector is None: climatology_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) climatology_projector.set_component(analysissystem.components[0]) climatology_projector.update_time_index(time_index, keep_design=False) climatology_projector.evaluate_design_matrix() climatology_expected_value = climatology_projector.project_expected_value( ).reshape((-1, 1)) climatology_uncertainties = climatology_projector.project_sample_deviation( ) climatology_samples = climatology_projector.project_sample_values( sample_indices=sample_indices) + climatology_expected_value climatology_unconstraint = numpy.minimum( climatology_uncertainties**2 / climatology_projector.project_sample_deviation(prior=True)**2, variance_ratio_upper_bound) # large scale component print 'Evaluating: large-scale' if large_scale_projector is None: large_scale_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) large_scale_projector.set_component(analysissystem.components[1]) large_scale_projector.update_time_index(time_index, keep_design=False) large_scale_projector.evaluate_design_matrix() large_scale_expected_value = large_scale_projector.project_expected_value( ).reshape((-1, 1)) large_scale_uncertainties = large_scale_projector.project_sample_deviation( ) large_scale_samples = large_scale_projector.project_sample_values( sample_indices=sample_indices) + large_scale_expected_value large_scale_unconstraint = numpy.minimum( large_scale_uncertainties**2 / large_scale_projector.project_sample_deviation(prior=True)**2, variance_ratio_upper_bound) # local component - time handling updates state to new time but does not recompute the design matrix print 'Evaluating: local' if local_projector is None: local_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) local_projector.set_component(analysissystem.components[2]) local_projector.evaluate_design_matrix() else: local_projector.update_time_index(time_index, keep_design=True) local_projector.set_component(analysissystem.components[2], keep_design=True) print analysissystem.components local_expected_value = local_projector.project_expected_value( ).reshape((-1, 1)) local_uncertainties = local_projector.project_sample_deviation() local_samples = local_projector.project_sample_values( sample_indices=sample_indices) + local_expected_value local_unconstraint = numpy.minimum( local_uncertainties**2 / local_projector.project_sample_deviation(prior=True)**2, variance_ratio_upper_bound) # Save results outputfile = outputfiles[inner_index] print outputfile # main merged product output files filebuilder = FileBuilderGlobalField( outputfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional output', __name__, '') result_expected_value = climatology_expected_value + large_scale_expected_value + local_expected_value result_expected_uncertainties = numpy.sqrt( climatology_uncertainties**2 + large_scale_uncertainties**2 + local_uncertainties**2) climatology_fraction = local_unconstraint # defined as ratio of posterior to prior variance in local component filebuilder.add_global_field( definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape( definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TAS_OBSERVATION_INFLUENCE, 1.0 - climatology_fraction.reshape(definitions.GLOBAL_FIELD_SHAPE)) for index in range(definitions.GLOBAL_SAMPLE_SHAPE[3]): variable = copy.deepcopy(definitions.TASENSEMBLE) variable.name = variable.name + '_' + str(index) selected_sample = (climatology_samples[:, index] + large_scale_samples[:, index] + local_samples[:, index]).ravel() filebuilder.add_global_field( variable, selected_sample.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() # climatology only output climatologyfile = climatologyfiles[inner_index] filebuilder = FileBuilderGlobalField( climatologyfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional component output - climatology', __name__, '') filebuilder.add_global_field( definitions.TAS, climatology_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, climatology_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TAS_OBSERVATION_INFLUENCE, 1.0 - climatology_unconstraint.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() # large scale only output largescalefile = largescalefiles[inner_index] filebuilder = FileBuilderGlobalField( largescalefile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional component output - large scale', __name__, '') filebuilder.add_global_field( definitions.TASPERTURBATION, large_scale_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, large_scale_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TAS_OBSERVATION_INFLUENCE, 1.0 - large_scale_unconstraint.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() # local only output localfile = localfiles[inner_index] filebuilder = FileBuilderGlobalField( localfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional component output - local', __name__, '') filebuilder.add_global_field( definitions.TASPERTURBATION, local_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, local_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TAS_OBSERVATION_INFLUENCE, 1.0 - local_unconstraint.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() print "Memory usage (MB):", psutil.Process( os.getpid()).memory_info().rss / (1024 * 1024)
def early_look_grid_batch( storage_climatology, storage_large_scale, storage_local, outputfiles, time_indices, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method, compute_sample, sample_size, compute_prior_sample): """Produce 'early look' NetCDF output files without loading or gridding uncertainty information For inspection of analysis output prior to the final gridding step. """ from eustace.analysis.advanced_standard.fileio.output_projector import Projector print 'VERSION: {0}'.format(get_revision_id_for_module(eustace)) # Build analysis system analysissystem = AnalysisSystem_EUSTACE( storage_climatology, storage_large_scale, storage_local, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method) grid_resolution = [ 180. / definitions.GLOBAL_FIELD_SHAPE[1], 360. / definitions.GLOBAL_FIELD_SHAPE[2] ] latitudes = numpy.linspace(-90. + grid_resolution[0] / 2., 90. - grid_resolution[0] / 2, num=definitions.GLOBAL_FIELD_SHAPE[1]) longitudes = numpy.linspace(-180. + grid_resolution[1] / 2., 180. - grid_resolution[1] / 2, num=definitions.GLOBAL_FIELD_SHAPE[2]) timebase = TimeBaseDays(eustace.timeutils.epoch.EPOCH) processdates = [ timebase.number_to_datetime(daynumber) for daynumber in time_indices ] cell_sampling = [1, 1] blocking = 10 # thinned set of sample indices for inclusion in output product climatology_projector = None large_scale_projector = None local_projector = None for (inner_index, time_index, processdate) in zip(range(len(time_indices)), time_indices, processdates): print time_index # Configure output grid outputstructure = OutputRectilinearGridStructure(time_index, processdate, latitudes=latitudes, longitudes=longitudes) # climatology component print 'Evaluating: climatology' if climatology_projector is None: climatology_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) climatology_projector.set_component(analysissystem.components[0]) climatology_projector.update_time_index(time_index, keep_design=False) climatology_projector.evaluate_design_matrix() climatology_expected_value = climatology_projector.project_expected_value( ).reshape((-1, 1)) # large scale component print 'Evaluating: large-scale' if large_scale_projector is None: large_scale_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) large_scale_projector.set_component(analysissystem.components[1]) large_scale_projector.update_time_index(time_index, keep_design=False) large_scale_projector.evaluate_design_matrix() large_scale_expected_value = large_scale_projector.project_expected_value( ).reshape((-1, 1)) # local component - time handling updates state to new time but does not recompute the design matrix print 'Evaluating: local' if local_projector is None: local_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) local_projector.set_component(analysissystem.components[2]) local_projector.evaluate_design_matrix() else: local_projector.update_time_index(time_index, keep_design=True) local_projector.set_component(analysissystem.components[2], keep_design=True) print analysissystem.components local_expected_value = local_projector.project_expected_value( ).reshape((-1, 1)) # Save results outputfile = outputfiles[inner_index] print outputfile # main merged product output files filebuilder = FileBuilderGlobalField( outputfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional output', __name__, '') field_definition_tas = definitions.OutputVariable.from_template( definitions.TEMPLATE_TEMPERATURE, 'tas', quantity='average', cell_methods='time: mean') field_definition_tas_climatology = definitions.OutputVariable.from_template( definitions.TEMPLATE_TEMPERATURE, 'tas_climatology', quantity='average', cell_methods='time: mean') field_definition_tas_large_scale = definitions.OutputVariable.from_template( definitions.TEMPLATE_PERTURBATION, 'tas_large_scale', quantity='average', cell_methods='time: mean') field_definition_tas_daily_local = definitions.OutputVariable.from_template( definitions.TEMPLATE_PERTURBATION, 'tas_daily_local', quantity='average', cell_methods='time: mean') result_expected_value = climatology_expected_value + large_scale_expected_value + local_expected_value filebuilder.add_global_field( field_definition_tas, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( field_definition_tas_climatology, climatology_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( field_definition_tas_large_scale, large_scale_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( field_definition_tas_daily_local, local_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() print "Memory usage (MB):", psutil.Process( os.getpid()).memory_info().rss / (1024 * 1024)
def output_grid(storage_climatology, storage_large_scale, storage_local, outputfile, climatologyfile, largescalefile, localfile, processdate, time_index, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method, compute_sample, sample_size, compute_prior_sample): from eustace.analysis.advanced_standard.fileio.output_projector import Projector print 'VERSION: {0}'.format(get_revision_id_for_module(eustace)) # Build analysis system analysissystem = AnalysisSystem_EUSTACE( storage_climatology, storage_large_scale, storage_local, covariates_descriptor, insitu_biases, breakpoints_file, global_biases, global_biases_group_list, compute_uncertainties, method) grid_resolution = [ 180. / definitions.GLOBAL_FIELD_SHAPE[1], 360. / definitions.GLOBAL_FIELD_SHAPE[2] ] latitudes = numpy.linspace(-90. + grid_resolution[0] / 2., 90. - grid_resolution[0] / 2, num=definitions.GLOBAL_FIELD_SHAPE[1]) longitudes = numpy.linspace(-180. + grid_resolution[1] / 2., 180. - grid_resolution[1] / 2, num=definitions.GLOBAL_FIELD_SHAPE[2]) cell_sampling = [1, 1] blocking = 10 # Configure output grid outputstructure = OutputRectilinearGridStructure(time_index, processdate, latitudes=latitudes, longitudes=longitudes) # thinned set of sample indices for inclusion in output product sample_indices = range(definitions.GLOBAL_SAMPLE_SHAPE[3]) # climatology component print 'Evaluating: climatology' climatology_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) climatology_projector.set_component(analysissystem.components[0]) climatology_projector.evaluate_design_matrix() climatology_expected_value = climatology_projector.project_expected_value( ).reshape((-1, 1)) climatology_uncertainties = climatology_projector.project_sample_deviation( ) climatology_samples = climatology_projector.project_sample_values( sample_indices=sample_indices) + climatology_expected_value climatology_unconstraint = climatology_uncertainties**2 / climatology_projector.project_sample_deviation( prior=True)**2 climatology_projector = None # clear projector from memory print climatology_expected_value.shape, climatology_uncertainties.shape, climatology_samples.shape # large scale component print 'Evaluating: large-scale' large_scale_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) large_scale_projector.set_component(analysissystem.components[1]) large_scale_projector.evaluate_design_matrix() large_scale_expected_value = large_scale_projector.project_expected_value( ).reshape((-1, 1)) large_scale_uncertainties = large_scale_projector.project_sample_deviation( ) large_scale_samples = large_scale_projector.project_sample_values( sample_indices=sample_indices) + large_scale_expected_value large_scale_unconstraint = large_scale_uncertainties**2 / large_scale_projector.project_sample_deviation( prior=True)**2 large_scale_projector = None # clear projector from memory print large_scale_expected_value.shape, large_scale_uncertainties.shape, large_scale_samples.shape # local component print 'Evaluating: local' local_projector = Projector(latitudes, longitudes, grid_resolution, time_index, cell_sampling, blocking) local_projector.set_component(analysissystem.components[2]) local_projector.evaluate_design_matrix() local_expected_value = local_projector.project_expected_value().reshape( (-1, 1)) local_uncertainties = local_projector.project_sample_deviation() local_samples = local_projector.project_sample_values( sample_indices=sample_indices) + local_expected_value local_unconstraint = local_uncertainties**2 / local_projector.project_sample_deviation( prior=True)**2 local_projector = None # clear projector from memory print local_expected_value.shape, local_uncertainties.shape, local_samples.shape # Save results print outputfile # main merged product output files filebuilder = FileBuilderGlobalField( outputfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional output', __name__, '') climatology_fraction = local_unconstraint # defined as ratio of posterior to prior variance in local component result_expected_value = climatology_expected_value + large_scale_expected_value + local_expected_value result_expected_uncertainties = numpy.sqrt(climatology_uncertainties**2 + large_scale_uncertainties**2 + local_uncertainties**2) filebuilder.add_global_field( definitions.TAS, result_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, result_expected_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TAS_CLIMATOLOGY_FRACTION, climatology_fraction.reshape(definitions.GLOBAL_FIELD_SHAPE)) for index in range(definitions.GLOBAL_SAMPLE_SHAPE[3]): variable = copy.deepcopy(definitions.TASENSEMBLE) variable.name = variable.name + '_' + str(index) selected_sample = (climatology_samples[:, index] + large_scale_samples[:, index] + local_samples[:, index]).ravel() filebuilder.add_global_field( variable, selected_sample.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.save_and_close() # climatology only output filebuilder = FileBuilderGlobalField( climatologyfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional component output - climatology', __name__, '') filebuilder.add_global_field( definitions.TAS, climatology_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, climatology_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TAS_CLIMATOLOGY_FRACTION, climatology_unconstraint.reshape(definitions.GLOBAL_FIELD_SHAPE)) # large scale only output filebuilder = FileBuilderGlobalField( largescalefile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional component output - large scale', __name__, '') filebuilder.add_global_field( definitions.TASPERTURBATION, large_scale_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, large_scale_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TAS_CLIMATOLOGY_FRACTION, large_scale_unconstraint.reshape(definitions.GLOBAL_FIELD_SHAPE)) # local only output filebuilder = FileBuilderGlobalField( localfile, eustace.timeutils.epoch.days_since_epoch(processdate), 'EUSTACE Analysis', get_revision_id_for_module(eustace), definitions.TAS.name, '', 'Provisional component output - local', __name__, '') filebuilder.add_global_field( definitions.TASPERTURBATION, local_expected_value.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TASUNCERTAINTY, local_uncertainties.reshape(definitions.GLOBAL_FIELD_SHAPE)) filebuilder.add_global_field( definitions.TAS_CLIMATOLOGY_FRACTION, local_unconstraint.reshape(definitions.GLOBAL_FIELD_SHAPE))
def test_mini_world_noiseless(self): number_of_simulated_time_steps = 1 # Build system element = SeasonalElement(n_triangulation_divisions=3, n_harmonics=5, include_local_mean=True) hyperparameters = SeasonalHyperparameters(n_spatial_components=6, common_log_sigma=0.0, common_log_rho=0.0) component = SpaceTimeComponent( ComponentStorage_InMemory(element, hyperparameters), SpaceTimeComponentSolutionStorage_InMemory()) analysis_system = AnalysisSystem([component], ObservationSource.TMEAN, log=StringIO()) # use fixed locations from icosahedron fixed_locations = cartesian_to_polar2d( MeshIcosahedronSubdivision.build(3).points) # random measurement at each location numpy.random.seed(8976) field_basis = numpy.random.randn(fixed_locations.shape[0]) #print(field_basis.shape) #time_basis = numpy.array(harmonics_list) # some time function that varies over a year #decimal_years = numpy.array([datetime_to_decimal_year(epoch_plus_days(step)) for step in range(number_of_simulated_time_steps)]) time_basis = numpy.cos( numpy.linspace(0.1, 1.75 * numpy.pi, number_of_simulated_time_steps)) # kronecker product of the two #print(numpy.expand_dims(time_basis, 1)) measurement = numpy.kron(field_basis, numpy.expand_dims( time_basis, 1)) #numpy.expand_dims(time_basis, 1)) #print(measurement.shape) # Simulated inputs simulated_input_loader = SimulatedInputLoader(fixed_locations, measurement, 0.0001) # Simulate evaluation of this time index simulated_time_indices = range(number_of_simulated_time_steps) # Iterate for iteration in range(5): analysis_system.update([simulated_input_loader], simulated_time_indices) # Get all results result = numpy.zeros(measurement.shape) for t in range(number_of_simulated_time_steps): result[t, :] = analysis_system.evaluate_expected_value( 'MAP', SimulatedObservationStructure(t, fixed_locations, None, None), flag='POINTWISE') # Should be very close to original because specified noise is low numpy.testing.assert_almost_equal(result, measurement) max_disparity = (numpy.abs(result - measurement)).ravel().max() self.assertTrue(max_disparity < 1E-5) # test output gridding, pointwise limit outputstructure = OutputRectilinearGridStructure( 2, epoch_plus_days(2), latitudes=numpy.linspace(-60., 60., num=5), longitudes=numpy.linspace(-90., 90, num=10)) pointwise_result = analysis_system.evaluate_expected_value( 'MAP', outputstructure, 'POINTWISE') pointwise_limit_result = analysis_system.evaluate_expected_value( 'MAP', outputstructure, 'GRID_CELL_AREA_AVERAGE', [1, 1], 10) numpy.testing.assert_array_almost_equal(pointwise_result, pointwise_limit_result)
def setUp(self): """Set up mock objects to be used for testing the Regridder class functionalities""" self.structure = OutputRectilinearGridStructure( 1, None, numpy.array([1, 2, 3, 4]), numpy.array([.2, .3, .4]))
def main(): print 'EUSTACE example using HadCRUT4 monthly data' # Input data path input_basepath = os.path.join(WORKSPACE_PATH, 'data/incoming/HadCRUT4.5.0.0') # Input filenames input_filenames = [ 'hadcrut4_median_netcdf.nc', 'hadcrut4_uncorrelated_supplementary.nc', 'hadcrut4_blended_uncorrelated.nc' ] # Months to process time_indices = range(2) # Climatology component climatology_component = SpaceTimeComponent(ComponentStorage_InMemory(SeasonalElement(n_triangulation_divisions=5, n_harmonics=5, include_local_mean=True), SeasonalHyperparameters(n_spatial_components=6, common_log_sigma=1.0, common_log_rho=0.0)), SpaceTimeComponentSolutionStorage_InMemory()) # Number of factors for large scale (factor analysis) component and initial hyperparameters n_factors = 5 factors = [ ] factor_hyperparameters = [ ] for factor_index in range(n_factors): factor_hyperparameters.append( SpaceTimeSPDEHyperparameters( space_log_sigma=0.0, space_log_rho=numpy.log(10.0 * numpy.pi/180 + 25.0 * numpy.pi/180 *(n_factors - factor_index) / n_factors), time_log_rho=numpy.log(1/12.0 + 6/12.0*(n_factors - factor_index) / n_factors)) ) factors.append( SpaceTimeFactorElement(n_triangulation_divisions=5, alpha=2, starttime=0, endtime=36, overlap_factor=2.5, H=1) ) # Large scale (factor analysis) component large_scale_component = SpaceTimeComponent(ComponentStorage_InMemory(CombinationElement(factors), CombinationHyperparameters(factor_hyperparameters)), SpaceTimeComponentSolutionStorage_InMemory()) # Local component local_component = SpatialComponent(ComponentStorage_InMemory(LocalElement(n_triangulation_divisions=4), LocalHyperparameters(log_sigma=0.0, log_rho=numpy.log(10.0 * numpy.pi/180))), SpatialComponentSolutionStorage_InMemory()) print 'Analysing inputs' # Analysis system using the specified components, for the Tmean observable analysis_system = AnalysisSystem( [ climatology_component, large_scale_component, local_component ], ObservationSource.TMEAN) # Make filelist input_filelist = [ os.path.join(input_basepath, filename) for filename in input_filenames ] # Object to load HadCRUT4 inputs at time indices inputloader = AnalysisSystemInputLoaderHadCRUT4(input_filelist) # Update with data analysis_system.update([ inputloader ], time_indices) print 'Computing outputs' # Produce an output for each time index for time_index in time_indices: # Make output filename outputdate = inputloader.datetime_at_time_index(time_index) pathname = 'example_output_{0:04d}{1:02d}.nc'.format(outputdate.year, outputdate.month) print 'Saving: ', pathname # Configure output grid outputstructure = OutputRectilinearGridStructure( time_index, outputdate, latitudes=numpy.linspace(-87.5, 87.5, num=36), longitudes=numpy.linspace(-177.5, 177.5, num=72)) # Evaluate expected value at these locations result_expected_value = analysis_system.evaluate_expected_value(outputstructure) # Save results filebuilder = FileBuilderHadCRUT4ExampleOutput(pathname, outputstructure) filebuilder.add_global_field(TAS_ANOMALY, result_expected_value.reshape(1,36,72)) filebuilder.save_and_close() print 'Complete'
def test_number_of_observations(self): A = OutputRectilinearGridStructure('A', 'B', numpy.array([1, 2, 3]), numpy.array([.1, .3])) self.assertEqual(6, A.number_of_observations())