def compute_indicator(args): # extract the arguments lon_index_start = args[0] lat_index = args[1] # turn the shared arrays into numpy arrays input_data = np.ctypeslib.as_array(input_shared_array) input_data = input_data.reshape(input_data_shape) output_data = np.ctypeslib.as_array(output_shared_array) output_data = output_data.reshape(output_data_shape) for lon_index in range(lons_per_chunk): # only process non-empty grid cells, i.e. input_data array contains at least some non-NaN values if (isinstance(input_data[:, lon_index, lat_index], np.ma.MaskedArray) and input_data[:, lon_index, lat_index].mask.all()) \ or np.isnan(input_data[:, lon_index, lat_index]).all() or (input_data[:, lon_index, lat_index] <= 0).all(): # logger.info('No input_data at lon/lat: {0}/{1}'.format(lon_index_start + lon_index, lat_index)) pass else: # we have some valid values to work with logger.info('Processing longitude/latitude: {}/{}'.format(lon_index_start + lon_index, lat_index)) for scale_index, month_scale in enumerate(month_scales): # perform a fitting to gamma output_data[scale_index, :, lon_index, lat_index] = indices.spi_gamma(input_data[:, lon_index, lat_index], month_scale, valid_min, valid_max)
def compute_indicator_by_lons(input_dataset, output_dataset, input_var_name, output_var_name, month_scale, valid_min, valid_max, dim1_index, # typically lon, for example with gridded datasets dim2_index): # typically lat, for example with gridded datasets # slice out the period of record for the longitude slice data = input_dataset.variables[input_var_name][:, dim1_index, :] # keep the original data shape, we'll use this to reshape later original_shape = input_dataset.variables[input_var_name].shape for dim2_index in range(input_dataset.variables[input_var_name].shape[2]): # only process non-empty grid cells, i.e. data array contains at least some non-NaN values if isinstance(data[:, dim2_index], np.ma.MaskedArray) and data[:, dim2_index].mask.all(): pass else: # we have some valid values to work with logger.info('Processing x/y {}/{}'.format(dim1_index, dim2_index)) # perform a fitting to gamma data[:, dim2_index] = indices.spi_gamma(data[:, dim2_index], month_scale, valid_min, valid_max) # assign values for period of record to the longitude slice output_dataset.variables[output_var_name][:, dim1_index, :] = np.reshape(data, (original_shape[0], 1, original_shape[2]))
def compute_indicator_by_lons(input_dataset, output_dataset, input_var_name, output_var_name, month_scale, valid_min, valid_max, dim1_index, # typically lon, for example with gridded datasets dim2_index): # typically lat, for example with gridded datasets # lock the thread when doing I/O lock.acquire() # slice out the period of record for the x/y point data = input_dataset.variables[input_var_name][:, dim1_index, :] # release the lock since we'll not share anything else until doing the I/O to the output dataset lock.release() # keep the original data shape, we'll use this to reshape later original_shape = input_dataset.variables[input_var_name].shape for dim2_index in range(input_dataset.variables[input_var_name].shape[2]): # only process non-empty grid cells, i.e. data array contains at least some non-NaN values if isinstance(data[:, dim2_index], np.ma.MaskedArray) and data[:, dim2_index].mask.all(): pass else: # we have some valid values to work with logger.info('Processing x/y {}/{}'.format(dim1_index, dim2_index)) # perform a fitting to gamma data[:, dim2_index] = indices.spi_gamma(data[:, dim2_index], month_scale, valid_min, valid_max) # reacquire the thread lock for doing NetCDF I/O lock.acquire() # slice out the period of record for the x/y point output_dataset.variables[output_var_name][:, dim1_index, :] = np.reshape(data, (original_shape[0], 1, original_shape[2])) # release the lock since we'll not share anything else until doing the I/O to the output dataset lock.release()
def compute_indicator(self): while True: # get a list of arguments from the queue arguments = self.queue.get() # if we didn't get one we keep looping if arguments is None: break # process the arguments here index = arguments[0] month_scale = arguments[1] valid_min = arguments[2] valid_max = arguments[3] # turn the shared array into a numpy array data = np.ctypeslib.as_array(self.shared_array) data = data.reshape(self.data_shape) # only process non-empty grid cells, i.e. data array contains at least some non-NaN values if (isinstance(data[:, index], np.ma.MaskedArray) and data[:, index].mask.all()) \ or np.isnan(data[:, index]).all() or (data[:, index] < 0).all(): pass else: # we have some valid values to work with logger.info('Processing latitude: {}'.format(index)) # perform a fitting to gamma fitted_values = indices.spi_gamma(data[:, index], month_scale, valid_min, valid_max) # update the shared array data[:, index] = fitted_values # indicate that the task has completed self.queue.task_done()
def compute_indicator(args): # extract the arguments index = args[0] # turn the shared array into a numpy array data = np.ctypeslib.as_array(shared_array) data = data.reshape(data_shape) # only process non-empty grid cells, i.e. data array contains at least some non-NaN values if (isinstance(data[:, index], np.ma.MaskedArray) and data[:, index].mask.all()) \ or np.isnan(data[:, index]).all() or (data[:, index] <= 0).all(): pass else: # we have some valid values to work with logger.info('Processing latitude: {}'.format(index)) # perform a fitting to gamma data[:, index] = indices.spi_gamma(data[:, index], month_scale, valid_min, valid_max)
spi_gamma_datasets[variable_name_spi_gamma] = spi_gamma_dataset # loop over the grid cells for x in range(precip_dataset.variables[x_dim_name].size): for y in range(precip_dataset.variables[y_dim_name].size): logger.info('Processing x/y {}/{}'.format(x, y)) # slice out the period of record for the x/y point precip_data = precip_dataset.variables[precip_var_name][:, x, y] # only process non-empty grid cells, i.e. the data array contains at least some non-NaN values if (isinstance(precip_data, np.ma.MaskedArray)) and precip_data.mask.all(): continue else: # we have some valid values to work with for month_scale_index, month_scale_var_name in enumerate(sorted(spi_gamma_datasets.keys())): # perform the SPI computation (fit to the Gamma distribution) and assign the values into the dataset spi_gamma_datasets[month_scale_var_name].variables[month_scale_var_name][:, x, y] = \ indices.spi_gamma(precip_data, month_scales[month_scale_index], valid_min, valid_max) except Exception, e: logger.error('Failed to complete', exc_info=True) raise
import sys sys.path.insert(0, "/Users/marcoventurini/Downloads/indices_rc1") import compute import indices as ind import pandas as pd import json import numpy as np from scipy.stats import norm df = pd.read_csv( '/Users/marcoventurini/Documents/spark-2.0.0-bin-hadoop2.7/data/MonthlyPrp_lat-14_lon35.csv' ) arrayD = np.asarray(df.PrpSummed) arraySPI3 = ind.spi_gamma(arrayD, 3) df['SPI3'] = np.NaN for index, row in df.iterrows(): df.loc[index, 'SPI3'] = arraySPI3[index] arraySPI12 = ind.spi_gamma(arrayD, 12) df['SPI12'] = np.NaN for index, row in df.iterrows(): df.loc[index, 'SPI12'] = arraySPI12[index] df.to_csv( '/Users/marcoventurini/Documents/spark-2.0.0-bin-hadoop2.7/SPIshort.csv', columns=['Year', 'Month', 'SPI3', 'SPI12'], index=False)
def compute_worker(args): # extract the arguments lat_index = args[0] # turn the shared array into a numpy array data = np.ctypeslib.as_array(shared_array) data = data.reshape(data_shape) # data now expected to be in shape: (indicators, distributions, month_scales, times, lats) # # with indicator (spi: 0, spei: 1) # distribution (gamma: 0, pearson: 1) # month_scales (0, month_scales) # # with data[0, 0, 0] indicating the longitude slice with shape: (times, lats) with values for precipitation # with data[1, 0, 0] indicating the longitude slice with shape: (times, lats) with values for temperature # only process non-empty grid cells, i.e. data array contains at least some non-NaN values if (isinstance(data[0, 0, 0, :, lat_index], np.ma.MaskedArray) and data[0, 0, 0, :, lat_index].mask.all()) \ or np.isnan(data[0, 0, 0, :, lat_index]).all() or (data[0, 0, 0, :, lat_index] <= 0).all(): pass else: # we have some valid values to work with logger.info('Processing latitude: {}'.format(lat_index)) for month_index, month_scale in enumerate(month_scales): # only process month scales after 0 since month_scale = 0 is reserved for the input data if month_index > 0: # loop over all specified indicators for indicator in indicators: # loop over all specified distributions for distribution in distributions: if indicator == 'spi': if distribution == 'gamma': # perform a fitting to gamma data[0, 0, month_index, :, lat_index] = indices.spi_gamma(data[0, 0, 0, :, lat_index], month_scale, valid_min, valid_max) elif distribution == 'pearson': # perform a fitting to Pearson type III data[0, 1, month_index, :, lat_index] = indices.spi_pearson(data[0, 0, 0, :, lat_index], month_scale, valid_min, valid_max, data_start_year, data_end_year, calibration_start_year, calibration_end_year) elif indicator == 'spei': if distribution == 'gamma': # perform a fitting to gamma data[1, 0, month_index, :, lat_index] = indices.spei_gamma(data[0, 0, 0, :, lat_index], data[0, 0, 1, :, lat_index], data_start_year, lats_array[lat_index], month_scale, valid_min, valid_max) elif distribution == 'pearson': # perform a fitting to Pearson type III data[1, 1, month_index, :, lat_index] = indices.spei_pearson(data[0, 0, 0, :, lat_index], data[0, 0, 1, :, lat_index], month_scale, lats_array[lat_index], valid_min, valid_max, data_start_year, data_end_year, calibration_start_year, calibration_end_year) else: raise ValueError('Invalid distribution specified: {}'.format(distribution)) else: raise ValueError('Invalid indicator specified: {}'.format(indicator))
calibration_end_year) # perform the SPEI computation (fit to the Gamma distribution) and assign the values into the dataset spei_gamma_dataset.variables[variable_name_spei_gamma][:, x, y] = indices.spei_gamma(precip_data, temp_data, data_start_date.year, latitude, month_scale, valid_min, valid_max) # perform the SPI computation (fit to the Pearson distribution) and assign the values into the dataset spi_pearson_dataset[variable_name_spi_pearson][:, x, y] = indices.spi_pearson(precip_data, month_scale, valid_min, valid_max, data_start_date.year, data_end_date.year, calibration_start_year, calibration_end_year) # perform the SPI computation (fit to the Gamma distribution) and assign the values into the dataset spi_gamma_dataset.variables[variable_name_spi_gamma][:, x, y] = indices.spi_gamma(precip_data, month_scale, valid_min, valid_max) except Exception, e: logger.error('Failed to complete', exc_info=True) raise
# slice out the period of record for the x/y point precip_data = precip_dataset.variables[precip_var_name][:, x, y] # only process non-empty grid cells, i.e. data array contains at least some non-NaN values if (isinstance(precip_data, np.ma.MaskedArray)) and precip_data.mask.all(): continue else: # we have some valid values to work with logger.info('Processing x/y {}/{}'.format(x, y)) # perform the SPI computation (fit to the Gamma distribution) and assign the values into the dataset data = indices.spi_gamma(precip_data, month_scale, valid_min, valid_max) output_dataset.variables[variable_name][:, x, y] = data # output_dataset.variables[variable_name][:, x, y] = indices.spi_gamma(precip_data, # month_scale, # valid_min, # valid_max) # report on the elapsed time end_datetime = datetime.now() logger.info("End time: {}".format(end_datetime, '%x')) elapsed = end_datetime - start_datetime logger.info("Elapsed time: {}".format(elapsed, '%x')) except Exception, e: logger.error('Failed to complete', exc_info=True)