def linear_trans_opt(param, *args): """ Computes the Log Marginal Likelihood using standard GP regression by first performing transformation of the data set :param param: transform_mat :param args: :return: """ # Define arguments x_scatter = args[0] y_scatter = args[1] c = args[2] kernel = args[3] # Define parameters to be optimized - the matrix variables transform_mat = param # Begin transformation of the regression window xy_scatter = np.vstack( (x_scatter, y_scatter)) # Create the sample points to be rotated xy_scatter_transformed = fn.transform_array(transform_mat, xy_scatter, c) x_points_trans = xy_scatter_transformed[0] y_points_trans = xy_scatter_transformed[1] # 1. Obtain the maximum range in x and y in the transformed space - to define the regression window x_down = min(x_points_trans) x_up = max(x_points_trans) y_down = min(y_points_trans) y_up = max(y_points_trans) # --------------------- Conduct binning into transformed space - the x and y quad lengths will be different # ChangeParam quads_on_side = 20 # define the number of quads along each dimension k_mesh, y_edges, x_edges = np.histogram2d(y_points_trans, x_points_trans, bins=quads_on_side, range=[[y_down, y_up], [x_down, x_up]]) x_mesh_plot, y_mesh_plot = np.meshgrid( x_edges, y_edges) # creating mesh-grid for use x_mesh = x_mesh_plot[:-1, : -1] # Removing extra rows and columns due to edges y_mesh = y_mesh_plot[:-1, :-1] x_quad = fn.row_create(x_mesh) # Creating the rows from the mesh y_quad = fn.row_create(y_mesh) xy_quad = np.vstack((x_quad, y_quad)) k_quad = fn.row_create(k_mesh) # Start Optimization arguments = (xy_quad, k_quad, kernel) # Initialise kernel hyper-parameters - arbitrary value but should be as close to actual value as possible initial_hyperparameters = np.array([1, 1, 1, 1]) # An optimization process is embedded within another optimization process solution = scopt.minimize(fun=short_log_integrand_data, args=arguments, x0=initial_hyperparameters, method='Nelder-Mead', options={ 'xatol': 1, 'fatol': 100, 'disp': True, 'maxfev': 500 }) print('Last function evaluation is ', solution.fun) # This will be a negative value neg_log_likelihood = solution.fun # We want to minimize the mirror image return neg_log_likelihood
def linear_trans_skinny_opt(param, *args): """ Computes the Log Marginal Likelihood using standard GP regression by first performing transformation of the data set This finds the average log marginal likelihood instead of the combined log_likelhood, and will find this average while adapting to the number of quadrats in the regression window after transformation :param param: transform_mat - matrix variables to be optimized :param args: x and y coordinates of scatter points,, center, kernel type, and array containing vertices the right order :return: the average log likelihood by dividing total log likelihood with number of selected quadrats """ # Define original required arguments xy_scatter = args[0] c = args[1] kernel = args[2] vertex_array = args[ 3] # Have to be in the right order in the original mathematical space # Define parameters to be optimized - the matrix variables transform_mat = param # Begin transformation of the regression window xy_scatter_transformed = fn.transform_array(transform_mat, xy_scatter, c) x_points_trans = xy_scatter_transformed[0] y_points_trans = xy_scatter_transformed[1] # 1. Obtain the maximum range in x and y in the transformed space - to define the regression window x_down = min(x_points_trans) x_up = max(x_points_trans) y_down = min(y_points_trans) y_up = max(y_points_trans) # Conduct binning into transformed space - the x and y quad lengths will be different # ChangeParam quads_on_side = 20 # define the number of quads along each dimension k_mesh, y_edges, x_edges = np.histogram2d(y_points_trans, x_points_trans, bins=quads_on_side, range=[[y_down, y_up], [x_down, x_up]]) x_mesh_plot, y_mesh_plot = np.meshgrid( x_edges, y_edges) # creating mesh-grid for use x_mesh = x_mesh_plot[:-1, : -1] # Removing extra rows and columns due to edges y_mesh = y_mesh_plot[:-1, :-1] x_quad = fn.row_create(x_mesh) # Creating the rows from the mesh y_quad = fn.row_create(y_mesh) xy_quad = np.vstack((x_quad, y_quad)) k_quad = fn.row_create(k_mesh) # Selection of quadrats that fall inside the polygon # Transform the vertices using the same transformation matrix transformed_vertices = fn.transform_array(transform_mat, vertex_array, center) # Create polygon and polygon = mpath.Path(np.transpose(transformed_vertices)) polygon_indicator = polygon.contains_points(np.transpose(xy_quad), transform=None, radius=1.0) x_quad_polygon = x_quad[polygon_indicator] y_quad_polygon = y_quad[polygon_indicator] xy_quad_polygon = np.vstack((x_quad_polygon, y_quad_polygon)) k_quad_polygon = k_quad[polygon_indicator] # Begin Optimization using selected quadrats arguments = (xy_quad_polygon, k_quad_polygon, kernel) # Initialise kernel hyper-parameters - arbitrary value but should be as close to actual value as possible initial_hyperparameters = np.array([1, 1, 1, 1]) # An optimization process is embedded within another optimization process solution = scopt.minimize(fun=short_log_integrand_data, args=arguments, x0=initial_hyperparameters, method='Nelder-Mead', options={ 'xatol': 1, 'fatol': 100, 'disp': True, 'maxfev': 1000 }) positive_log_likelihood = solution.fun # We want to minimize the mirror image selected_quadrats_n = k_quad_polygon.size avg_positive_log_likelihood = positive_log_likelihood / selected_quadrats_n print('Last function evaluation is ', solution.fun) # This will be a negative value print('The number of selected quadrats inside polygon is', selected_quadrats_n) return avg_positive_log_likelihood
y = np.ravel(df.values[1]) """Specify rotation matrix for data set""" theta = 0 * np.pi # Specify degree of rotation in a clockwise direction in radians mat_transform = np.matrix([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]) df_matrix = np.vstack((x, y)) df_transform = mat_transform * df_matrix x_transform = np.ravel(df_transform[0]) y_transform = np.ravel(df_transform[1]) """Bin point process data""" histo, x_edges, y_edges = np.histogram2d(x_transform, y_transform, bins=10) xv_trans_data, yv_trans_data = np.meshgrid(x_edges, y_edges) xv_trans_data = xv_trans_data[:-1, : -1] # Removing the last bin edge and zero points to make dimensions consistent yv_trans_data = yv_trans_data[:-1, :-1] # Contains a square matrix xv_trans_row = fn.row_create( xv_trans_data) # Creates a row from the square matrix yv_trans_row = fn.row_create(yv_trans_data) histo = fn.row_create(histo) # xv_transform_row = xv_transform_row[histo != 0] # Remove data point at histogram equal 0 # yv_transform_row = yv_transform_row[histo != 0] # histo = histo[histo != 0] # This is after putting them into rows xy_data_coord = np.vstack( (xv_trans_row, yv_trans_row)) # location of all the data points """ Data point coordinates are now at bottom-left hand corner, coordinates of data points have to be centralised to the centre of each quadrat """ print(xy_data_coord.shape) """Note the above relates to obtaining the data set first""" """Generate Gaussian Surface which forms the basis of the latent intensity function"""
y_down = min(transformed_vertices[1]) y_up = max(transformed_vertices[1]) # ChangeParam - create histogram in transformed space before quadrat selection quads_on_side = 20 # define the number of quads along each dimension k_mesh, y_edges, x_edges = np.histogram2d(y_points_trans, x_points_trans, bins=quads_on_side, range=[[y_down, y_up], [x_down, x_up]]) x_mesh_plot, y_mesh_plot = np.meshgrid( x_edges, y_edges) # creating mesh-grid for use x_mesh = x_mesh_plot[:-1, : -1] # Removing extra rows and columns due to edges y_mesh = y_mesh_plot[:-1, :-1] x_quad = fn.row_create(x_mesh) # Creating the rows from the mesh y_quad = fn.row_create(y_mesh) k_quad = fn.row_create(k_mesh) # Realign the quad coordinates to the centers - shift centers by half a quad length on either dimension quad_length_x = (x_upper - x_lower) / quads_on_side quad_length_y = (y_upper - y_lower) / quads_on_side x_quad = x_quad + (0.5 * quad_length_x) y_quad = y_quad + (0.5 * quad_length_y) xy_quad = np.vstack((x_quad, y_quad)) # Create Polygon using the transformed_vertices polygon = mpath.Path(np.transpose(transformed_vertices)) # Create Boolean array which is the polygon indicator
y = np.ravel(df.values[1]) """Specify rotation matrix for data set""" theta = 0 # Specify degree of rotation mat_transform = np.matrix([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]) df_matrix = np.vstack((x, y)) df_transform = mat_transform * df_matrix x_transform = np.ravel(df_transform[0]) y_transform = np.ravel(df_transform[1]) """Bin point process data""" histo, x_edges, y_edges = np.histogram2d(x_transform, y_transform, bins=10) xv_transform, yv_transform = np.meshgrid(x_edges, y_edges) xv_transform = xv_transform[:-1, : -1] # Removing the last bin edge and zero points to make dimensions consistent yv_transform = yv_transform[:-1, :-1] # Contains a square matrix xv_transform_row = fn.row_create( xv_transform) # Creates a row from the square matrix yv_transform_row = fn.row_create(yv_transform) histo = fn.row_create(histo) xv_transform_row = xv_transform_row[ histo != 0] # Remove data point at histogram equal 0 yv_transform_row = yv_transform_row[histo != 0] xy_data_coord = np.vstack((xv_transform_row, yv_transform_row)) histo = histo[histo != 0] # This is after putting them into rows """Calculate optimal hyper-parameters""" xyz_data = (xy_data_coord, histo) initial_param = np.array( [10, 2, 3]) # sigma, length and noise - find a good one to reduce iterations # No bounds needed for Nelder-Mead solution = scopt.minimize(fun=log_model_evidence, args=xyz_data,
# ------------------------------------------ Start of Histogram Generation from Box # First conduct a regression on the 2014 data set # ChangeParam quads_on_side = 20 # define the number of quads along each dimension # Note the range is already specified using the boolean variables above k_mesh, y_edges, x_edges = np.histogram2d(y_within_window, x_within_window, bins=quads_on_side, range=[[y_lower, y_upper], [x_lower, x_upper]]) x_mesh_plot, y_mesh_plot = np.meshgrid(x_edges, y_edges) # creating mesh-grid for use x_mesh = x_mesh_plot[:-1, :-1] # Removing extra rows and columns due to edges y_mesh = y_mesh_plot[:-1, :-1] x_quad = fn.row_create(x_mesh) # Creating the rows from the mesh y_quad = fn.row_create(y_mesh) # ------------------------------------------ End of Histogram Generation from Box # ------------------------------------------ Start of Realignment of Quad Centers # Have to shift up the centres by half a quad length # Measure quad length and correct for quad centers quad_length_x = (x_upper - x_lower) / quads_on_side quad_length_y = (y_upper - y_lower) / quads_on_side x_quad = x_quad + (0.5 * quad_length_x) y_quad = y_quad + (0.5 * quad_length_y) # Stack x and y coordinates together - the box version is not used xy_quad_box = np.vstack((x_quad, y_quad)) # Generate Histogram Array - Histo is in a mesh form
aedes_brazil_2014 = aedes_df[brazil & year_2014] aedes_brazil_2013 = aedes_df[brazil & year_2013] aedes_brazil_2012 = aedes_df[brazil & year_2012] x_2014 = aedes_brazil_2014.values[:, 5].astype('float64') y_2014 = aedes_brazil_2014.values[:, 4].astype('float64') x_2013 = aedes_brazil_2013.values[:, 5].astype('float64') y_2013 = aedes_brazil_2013.values[:, 4].astype('float64') # First conduct a regression on the 2014 data set quads_on_side = 10 # define the number of quads along each dimension # histo, x_edges, y_edges = np.histogram2d(theft_x, theft_y, bins=quads_on_side) # create histogram histo, y_edges, x_edges = np.histogram2d(y_2013, x_2013, bins=quads_on_side) x_mesh, y_mesh = np.meshgrid(x_edges, y_edges) # creating mesh-grid for use x_mesh = x_mesh[:-1, :-1] # Removing extra rows and columns due to edges y_mesh = y_mesh[:-1, :-1] x_quad = fn.row_create(x_mesh) # Creating the rows from the mesh y_quad = fn.row_create(y_mesh) # *** Centralising the coordinates to be at the centre of the quads # Note that the quads will not be of equal length, depending on the data set quad_length_x = (x_quad[-1] - x_quad[0]) / quads_on_side quad_length_y = (y_quad[-1] - y_quad[0]) / quads_on_side x_quad = x_quad + 0.5 * quad_length_x y_quad = y_quad + 0.5 * quad_length_y xy_quad = np.vstack( (x_quad, y_quad)) # stacking the x and y coordinates vertically together k_quad = fn.row_create(histo) # histogram array """Generate auto-covariance matrix with noise - using arbitrary hyper-parameters first""" sigma_arb = 3 length_arb = 5 noise_arb = 2
y_window = (y_values > y_lower) & (y_values < y_upper) x_within_window = x_values[x_window & y_window] y_within_window = y_values[x_window & y_window] print('Number of scatter points = ', x_within_window.shape) print('Number of scatter points = ', y_within_window.shape) # First conduct a regression on the 2014 data set # ChangeParam quads_on_side = 20 # define the number of quads along each dimension # histo, x_edges, y_edges = np.histogram2d(theft_x, theft_y, bins=quads_on_side) # create histogram histo, y_edges, x_edges = np.histogram2d(y_within_window, x_within_window, bins=quads_on_side) x_mesh, y_mesh = np.meshgrid(x_edges, y_edges) # creating mesh-grid for use x_mesh = x_mesh[:-1, :-1] # Removing extra rows and columns due to edges y_mesh = y_mesh[:-1, :-1] x_quad_all = fn.row_create(x_mesh) # Creating the rows from the mesh y_quad_all = fn.row_create(y_mesh) # *** Centralising the coordinates to be at the centre of the quads # Note that the quads will not be of equal length, depending on the data set quad_length_x = (x_quad_all[-1] - x_quad_all[0]) / quads_on_side quad_length_y = (y_quad_all[-1] - y_quad_all[0]) / quads_on_side x_quad_all = x_quad_all + 0.5 * quad_length_x y_quad_all = y_quad_all + 0.5 * quad_length_y xy_quad_all = np.vstack((x_quad_all, y_quad_all)) # stacking the x and y coordinates vertically together k_quad_all = fn.row_create(histo) # histogram array # For graphical plotting x_mesh_centralise_all = x_quad_all.reshape(x_mesh.shape) y_mesh_centralise_all = y_quad_all.reshape(y_mesh.shape)
df_transform = mat_transform * df_matrix x_transform = np.ravel(df_transform[0]) y_transform = np.ravel(df_transform[1]) print(type(x_transform[1])) """Bin point process data""" bins_number = 10 histo, x_edges, y_edges = np.histogram2d(x_transform, y_transform, bins=bins_number) xv_trans_data, yv_trans_data = np.meshgrid(x_edges, y_edges) xv_trans_data = xv_trans_data[:-1, : -1] # Removing the last bin edge and zero points to make dimensions consistent yv_trans_data = yv_trans_data[:-1, :-1] # Contains a square matrix xv_trans_row = fn.row_create( xv_trans_data) # Creates a row from the square matrix yv_trans_row = fn.row_create(yv_trans_data) histo_k_array = fn.row_create(histo) # this is the k array # xv_transform_row = xv_transform_row[histo != 0] # Remove data point at histogram equal 0 # yv_transform_row = yv_transform_row[histo != 0] # histo = histo[histo != 0] # This is after putting them into rows """ Data point coordinates are now at bottom-left hand corner, coordinates of data points have to be centralised to the centre of each quadrat """ # Centralizing coordinates for each quadrat - taking the last minus first value xv_trans_row = xv_trans_row + 0.5 * ((x_edges[-1] - x_edges[0]) / bins_number) yv_trans_row = yv_trans_row + 0.5 * ((y_edges[-1] - y_edges[0]) / bins_number) # Stack into 2 rows of many columns xy_data_coord = np.vstack(
print('y range is', y_max - y_min) # Display regression window size at each iteration # ChangeParam quads_on_side = 10 # define the number of quads along each dimension k_mesh, y_edges, x_edges = np.histogram2d(y_points_trans, x_points_trans, bins=quads_on_side, range=[[y_min, y_max], [x_min, x_max]]) x_mesh_plot, y_mesh_plot = np.meshgrid( x_edges, y_edges) # creating mesh-grid for use x_mesh = x_mesh_plot[:-1, : -1] # Removing extra rows and columns due to edges y_mesh = y_mesh_plot[:-1, :-1] x_quad = fn.row_create(x_mesh) # Creating the rows from the mesh y_quad = fn.row_create(y_mesh) xy_quad = np.vstack((x_quad, y_quad)) k_quad = fn.row_create(k_mesh) # Kernel Optimization ker = 'matern1' # Start Optimization arguments = (xy_quad, k_quad, ker) # Check time taken for the optimization start_opt = time.clock() solution = scopt.minimize(fun=short_log_integrand_data,
"""Specify rotation matrix for data set""" theta = 0 * np.pi # Specify degree of rotation in a clockwise direction in radians mat_transform = np.matrix([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]) df_matrix = np.vstack((x, y)) df_transform = mat_transform * df_matrix x_transform = np.ravel(df_transform[0]) y_transform = np.ravel(df_transform[1]) """Bin point process data""" histo, x_edges, y_edges = np.histogram2d(x_transform, y_transform, bins=10) xv_transform, yv_transform = np.meshgrid(x_edges, y_edges) xv_transform = xv_transform[:-1, :-1] # Removing the last bin edge and zero points to make dimensions consistent yv_transform = yv_transform[:-1, :-1] # Contains a square matrix xv_transform_row = fn.row_create(xv_transform) # Creates a row from the square matrix yv_transform_row = fn.row_create(yv_transform) histo = fn.row_create(histo) xv_transform_row = xv_transform_row[histo != 0] # Remove data point at histogram equal 0 yv_transform_row = yv_transform_row[histo != 0] xy_data_coord = np.vstack((xv_transform_row, yv_transform_row)) histo = histo[histo != 0] # This is after putting them into rows """Optimization using Latin Hypercube Sampling - Differential Evolution""" matern_v = 3/2 # Define matern_v xyz_data = (xy_data_coord, histo, matern_v) # No initial parameters needed as latin hypercube sampling is used - to get the global optimum boundary = [(0, 30), (0, 3), (0, 3)] solution = scopt.differential_evolution(func=log_model_evidence, bounds=boundary, args=xyz_data, init='latinhypercube') sigma_optimal = solution.x[0]
def rotation_likelihood_opt(param, *args): """ Objective is to find the angle of rotation that gives the greatest log-likelihood, based on a standard GP regression. It would be a same assumption that the same optimal angle will be obtained using both standard GP regression and the LGCP. Over here, we do not need to tabulate the posterior so that saves time. We are taking the xy_data which is already boxed and a single year will be taken :param param: angle of rotation in degrees - note there is only one parameter to optimize :param args: xy_data, center, kernel form (this is a tuple), regression window :return: log marginal likelihood based on the standard GP process """ angle = param # convert angle to radians radians = (angle / 180) * np.pi # Unpack Param Tuple center = args[0] kernel = args[1] n_quads = args[2] xy_coordinates = args[3] # Make this a tuple, so it will be a tuple within a tuple regression_window = args[4] # This is an array - x_upper, x_lower, y_upper and y_lower # Define regression window x_upper_box = regression_window[0] x_lower_box = regression_window[1] y_upper_box = regression_window[2] y_lower_box = regression_window[3] # Break up xy_coordinates into x and y x_coordinates = xy_coordinates[0] y_coordinates = xy_coordinates[1] # Define Boolean Variable for Scatter Points Selection x_range_box = (x_coordinates > x_lower_box) & (x_coordinates < x_upper_box) y_range_box = (y_coordinates > y_lower_box) & (y_coordinates < y_upper_box) # Obtain data points within the regression window x_coordinates = x_coordinates[x_range_box & y_range_box] y_coordinates = y_coordinates[x_range_box & y_range_box] # Stack x and y coordinates xy_within_box = np.vstack((x_coordinates, y_coordinates)) # Perform rotation using simple steps rotation_mat = np.array([[np.cos(radians), - np.sin(radians)], [np.sin(radians), np.cos(radians)]]) rotation_mat = np.hstack((rotation_mat[0], rotation_mat[1])) print(rotation_mat.shape) x_within_box = xy_within_box[0] - center[0] y_within_box = xy_within_box[1] - center[1] xy_within_box = np.vstack((x_within_box, y_within_box)) xy_within_box = np.matmul(rotation_mat, xy_within_box) rotated_x = xy_within_box[0] + center[0] rotated_y = xy_within_box[1] + center[1] # Create boolean variable x_window_w = (rotated_x > x_lower_box) & (rotated_x < x_upper_box) y_window_w = (rotated_y > y_lower_box) & (rotated_y < y_upper_box) x_window = rotated_x[x_window_w & y_window_w] y_window = rotated_y[x_window_w & y_window_w] # First conduct a regression on the 2014 data set # ChangeParam histo_f, y_edges_f, x_edges_f = np.histogram2d(y_window, x_window, bins=n_quads) x_mesh_plot_f, y_mesh_plot_f = np.meshgrid(x_edges_f, y_edges_f) # creating mesh-grid for use x_mesh_f = x_mesh_plot_f[:-1, :-1] # Removing extra rows and columns due to edges y_mesh_f = y_mesh_plot_f[:-1, :-1] x_quad_f = fn.row_create(x_mesh_f) # Creating the rows from the mesh y_quad_f = fn.row_create(y_mesh_f) # Note that over here, we do not have to consider the alignment of quad centers # Stack x and y coordinates together xy_quad = np.vstack((x_quad_f, y_quad_f)) # Create histogram array k_quad = fn.row_create(histo_f) # Being tabulating log marginal likelihood after optimizing for kernel hyper-parameters initial_hyperparam = np.array([3, 2, 1, 1]) # Note that this initial condition should be close to actual # Set up tuple for arguments args_hyperparam = (xy_quad, k_quad, kernel) # Start Optimization Algorithm hyperparam_solution = scopt.minimize(fun=short_log_integrand_data, args=args_hyperparam, x0=initial_hyperparam, method='Nelder-Mead', options={'xatol': 1, 'fatol': 1, 'disp': True, 'maxfev': 10000}) # Extract Log_likelihood value neg_log_likelihood = hyperparam_solution.fun # Eventually, we will have to minimize the negative log likelihood # Hence, this is actually an optimization nested within another optimization algorithm return neg_log_likelihood
print('The number of scatter points is', x_taiwan_selected.size) # Create array for 3-D histogram clustering xy_taiwan_selected = np.vstack((x_taiwan_selected, y_taiwan_selected)) xyt_taiwan_selected = np.vstack((xy_taiwan_selected, t_taiwan_selected)) vox_on_side = 10 k_mesh, xyt_edges = np.histogramdd( np.transpose(xyt_taiwan_selected), bins=(vox_on_side, vox_on_side, vox_on_side), range=((x_lower, x_upper), (y_lower, y_upper), (year_lower, year_upper))) x_edges = xyt_edges[0][:-1] y_edges = xyt_edges[1][:-1] t_edges = xyt_edges[2][:-1] x_mesh, y_mesh, t_mesh = np.meshgrid(x_edges, y_edges, t_edges) x_vox = fn.row_create(x_mesh) y_vox = fn.row_create(y_mesh) t_vox = fn.row_create(t_mesh) k_vox = fn.row_create(k_mesh) print('k_vox shape is', k_vox.shape) print("Initial Data Points are ", k_vox) # Initialise arguments and parameters for optimization # Arbitrary vector for optimization using the Newton-CG optimization algorithm initial_p_array = np.ones_like(k_vox) # Choose appropriate starting point for the optimization # Initialise array for optimization start initial_v_scalar = np.arange(0, 10, 1) initial_v_array = fn.log_special(k_vox)