def rNeighbours2dPlot(X,y,r=0.5,res=100,dist_scale='normalize',im_kws={},reg_kws={},ax=None): if isinstance(X,pd.core.frame.DataFrame): X = X.values if 'origin' not in reg_kws: im_kws['origin'] ='lower' if 'extent' not in im_kws: im_kws['extent'] = (X[:,0].min(),X[:,0].max(),X[:,1].min(),X[:,1].max()) if 'aspect' not in im_kws: im_kws['aspect'] = (X[:,0].max()-X[:,0].min())/(X[:,1].max()-X[:,1].min()) if dist_scale is not None: if dist_scale == 'normalize': X = X/(X.max(axis=0) - X.min(axis=0)) else: X = X/dist_scale kneighbours = RadiusNeighborsRegressor(radius=r,**reg_kws) kneighbours.fit(X,y) xx,yy = np.meshgrid(np.linspace(X[:,0].min(),X[:,0].max(),res),np.linspace(X[:,1].min(),X[:,1].max(),res)) X_grid = np.vstack([xx.ravel(),yy.ravel()]).T y_hat = kneighbours.predict(X_grid) Y_hat = y_hat.reshape((res,res)) if ax is None: return plt.imshow(Y_hat,**im_kws) else: return ax.imshow(Y_hat,**im_kws)
def get_best_rnn_radius(low, high, step): """ Return the best radius value in step range [low, high] to be used in rnn algorithm. """ radii = [] mae_rnn = [] for r in np.arange(low, high + step, step): rnn_regressor = RadiusNeighborsRegressor(radius=r, weights='distance') rnn_regressor.fit(train_df[['temperatura', 'vacuo']], train_df[['energia']]) energia_rnn = rnn_regressor.predict(test_df[['temperatura', 'vacuo']]) radii.append(r) mae_rnn.append( metrics.mean_absolute_error(test_df['energia'], energia_rnn)) best_radius = radii[np.argmin(mae_rnn)] fig, ax = plt.subplots() ax.set_title('Parameter evaluation for RNN') ax.set_xlabel('Radius') ax.set_ylabel('Mean absolute error') ax.set_xlim(low, high) ax.set_xticks(list(ax.get_xticks()) + [best_radius]) ax.plot(radii, mae_rnn, c='orange', linewidth=2) fig.savefig('rnn_param.png') return best_radius
def _check_coords_for_distance_weighting(self, coords, check_radius, check_weights, X, y_mean): """ Checks that coords won't break the distance weighting function """ valid_inds = [] for coord in xrange(len(coords)): temp = RadiusNeighborsRegressor(radius=check_radius, weights=check_weights) temp.fit(X, y_mean) try: temp.predict([coords[coord]]) valid_inds.append(coord) except ZeroDivisionError: continue return valid_inds
def sampling_fix(df, name, start, stop, radius, medianFilter, plot): #Filter dataset based on depth range df = df[(df['Measured Depth m'] > start) & (df['Measured Depth m'] < stop)] #remove NaNs from dataset df = df[np.isfinite(df[name])] X = df['Measured Depth m'] #reshape the depth to matcch regressor requirements X = X.values.reshape(X.shape[0], 1) from sklearn.neighbors import RadiusNeighborsRegressor #define regressor with provided radius reg = RadiusNeighborsRegressor(radius=radius, weights='uniform') #apply median filter with back filling (to remove NaNs at the beginning of dataset) y = df[name].rolling(medianFilter).median().bfill() #fit regressor reg.fit(X, y) #check if plotting was required or should the model be returned if plot == 0: return reg else: import matplotlib.pyplot as plt #plot the chart. Original data is plotted as well as the regression data. plt.scatter(X, y, label=name) plt.plot(X, reg.predict(X), c='r', label="prediction") plt.legend() plt.show()
def _check_coords_for_distance_weighting(self, coords, check_radius, check_weights, X, y_mean): """ Checks that coords won't break the distance weighting function """ valid_inds = [] for coord in xrange(len(coords)): temp = RadiusNeighborsRegressor(radius=check_radius, weights=check_weights) temp.fit(X, y_mean) try: temp.predict([coords[coord]]) valid_inds.append(coord) except ZeroDivisionError: continue return valid_inds
def plot_std_dev(folder): data = load_images(folder) for channel in range(data.shape[3]): channel_stack = data[:, :, :, channel] std_dev_img = np.std(channel_stack, axis=0) mean_img = np.mean(channel_stack, axis=0) # print(std_dev_img) # print(np.mean(std_dev_img)) if 1: plt.subplot(2, 2, 1) # plt.imshow(mean_img) display_image(mean_img, z=1) plt.title('mean') plt.subplot(2, 2, 2) display_image(std_dev_img, z=1) plt.title('std') plt.subplot(2, 2, 3) display_image(mean_img / std_dev_img, z=1) plt.title('mean / std') plt.subplot(2, 2, 4) bins = np.arange(np.min(channel_stack), np.max(channel_stack) + 1) plt.hist(channel_stack.flatten(), bins=bins) plt.grid(True) plt.show() # skip = 10 # for img_channel in channel_stack: # plt.scatter(img_channel.flatten()[::skip], mean_img.flatten()[::skip], alpha = 0.1, color='black', s=1) rnr = RadiusNeighborsRegressor(radius=10, weights='distance') rnr.fit(np.expand_dims(mean_img.flatten(), axis=1), std_dev_img.flatten()) line_x = np.arange(np.min(mean_img), np.max(mean_img) + 1) line_y = rnr.predict(np.expand_dims(line_x, axis=1)) fit = np.polyfit(mean_img.flatten(), std_dev_img.flatten(), deg=1) linear_y = np.polyval(fit, line_x) # for d in range(deg+1): # fits[y//n, :, channel, d] = section_fits[d] plt.scatter(mean_img.flatten(), std_dev_img.flatten(), alpha=0.1, color='black', s=1) plt.plot(line_x, line_y, 'r') plt.plot(line_x, linear_y, 'orange') plt.grid(True) plt.show()
class KNNDynamicsResidual: def __init__(self, args, env_params): # Save args self.args, self.env_params = args, env_params # Create the KNN model self.knn_model = RadiusNeighborsRegressor(radius=args.neighbor_radius, weights='uniform') # Flag self.is_fit = False def fit(self, X, y): ''' X should be the data matrix N x d, where each row is a 4D vector consisting of object pos and gripper pos y should be target matrix N x d, where each row is a 4D vector consisting of next object pos and next gripper pos ''' self.knn_model.fit(X, y) self.is_fit = True return self.loss(X, y) def predict(self, X): ''' X should be the data matrix N x d, where each row is a 4D vector consisting of object pos and gripper pos ''' ypred = np.zeros(X.shape) if not self.is_fit: # KNN model is not fit return ypred # Get neighbors of X neighbors = self.knn_model.radius_neighbors(X) # Check if any of the X doesn't have any neighbors by getting nonzero mask neighbor_mask = [x.shape[0] != 0 for x in neighbors[1]] # If none of X has any neighbors if X[neighbor_mask].shape[0] == 0: return ypred # Else, for the X that have neighbors use the KNN prediction ypred[neighbor_mask] = self.knn_model.predict(X[neighbor_mask]) return ypred def get_num_neighbors(self, X): if not self.is_fit: return np.zeros(X.shape[0]) neighbors = self.knn_model.radius_neighbors(X) num_neighbors = np.array([x.shape[0] for x in neighbors[1]]) return num_neighbors def loss(self, X, y): ypred = self.predict(X) # Loss is just the mean distance between predictions and true targets loss = np.linalg.norm(ypred - y, axis=1).mean() return loss
class _RadiusNeighborsRegressorImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def predict(self): """ trains the scikit-learn python machine learning algorithm library function https://scikit-learn.org then passes the trained algorithm the features set and returns the predicted y test values form, the function then compares the y_test values from scikit-learn predicted to y_test values passed in then returns the accuracy """ algorithm = RadiusNeighborsRegressor( radius=get_ohe_config().rnr_radius) algorithm.fit(self.X_train, self.y_train) y_pred = list(algorithm.predict(self.X_test)) self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test) return self.acc
def compare_multiple_stacks(folder): subfolders = os.listdir(folder) all_data = [] for subfolder in tqdm.tqdm(subfolders): all_data.append(load_images(os.path.join(folder, subfolder))) all_data = np.array(all_data) print(all_data.shape) for channel in range(3): for subfolder_index in range(all_data.shape[0]): channel_stack = all_data[subfolder_index][:, :, :, channel] img_mean = np.mean(channel_stack, axis=0) img_sigma_clip = np.mean(astropy.stats.sigma_clip(channel_stack, sigma=2, axis=0), axis=0) img_sigma_ratio = (img_mean / img_sigma_clip - 1) * 1E3 skip = 1 flat_ratios = img_sigma_ratio.flatten()[::skip] mean_values = img_mean.flatten()[::skip] # plt.scatter(mean_values, flat_ratios, alpha=0.1, color='black', s=1) rnr = RadiusNeighborsRegressor(radius=50, weights='uniform') rnr.fit(np.expand_dims(mean_values, axis=1), flat_ratios.flatten()) x = np.arange( np.min(mean_values) + 200, np.max(mean_values) + 1 - 200, 10) line_y = rnr.predict(np.expand_dims(x, axis=1)) plt.plot(x, line_y, label=str(subfolder_index)) plt.legend() plt.grid(True) plt.show()
def compare_error_vs_brightness(folder): data = load_images(folder) for channel in range(data.shape[3]): channel_stack = data[:, :, :, channel] img_mean = np.mean(channel_stack, axis=0) img_sigma_clip = np.mean(astropy.stats.sigma_clip(channel_stack, sigma=2, axis=0), axis=0) img_sigma_ratio = (img_mean / img_sigma_clip - 1) * 1E3 x = np.arange(np.min(img_mean), np.max(img_mean) + 1) bit_flip_change = 128 if channel == 1 else 256 y_top = ((channel_stack.shape[0] * x) / (channel_stack.shape[0] * x - bit_flip_change) - 1) * 1E3 y_bottom = ((channel_stack.shape[0] * x) / (channel_stack.shape[0] * x + bit_flip_change) - 1) * 1E3 plt.plot(x, y_top, 'r') plt.plot(x, y_bottom, 'r') plt.scatter(img_mean.flatten(), img_sigma_ratio.flatten(), alpha=0.1, color='black', s=1) rnr = RadiusNeighborsRegressor(radius=50, weights='distance') rnr.fit(np.expand_dims(img_mean.flatten(), axis=1), img_sigma_ratio.flatten()) x = np.arange(np.min(img_mean), np.max(img_mean) + 1) line_y = rnr.predict(np.expand_dims(x, axis=1)) plt.plot(x, line_y, 'g') plt.grid(True) plt.show()
def powerproduction(): if fl.request.method == "POST": speed = {} speed = float(fl.request.form['speed']) # speed = requests.get(data['input_s']) # import csv data and convert to pandas dataframe df = pd.read_csv("powerproduction.csv") # remove all zeros df = df[df.power != 0] # put rows in order of speed df = df.sort_values('speed') # set each column to a numpy array for processing S = df['speed'].to_numpy() p = df['power'].to_numpy() neigh_radius = RadiusNeighborsRegressor(radius=1.7, weights='distance', p = 2) neigh_radius.fit(S.reshape(-1, 1), p) p_pred = neigh_radius.predict([[speed]]) return {'value': p_pred[0]}
def grid_points_2d(mesh, cell_size=10): grid = vtk_Voxel.from_mesh(mesh, cell_size, 2) cells = grid.cell_centers().points radius = cell_size * 0.5 tmat = np.full(cells.shape[0], np.nan) print("sample min", np.min(mesh.points[:, 2]), "max", np.max(mesh.points[:, 2])) while np.any(np.isnan(tmat)): # keep increasing radius until all cells have values radius *= 1.5 print("RadiusNeighborsRegressor =", radius, "m") neigh = RadiusNeighborsRegressor(radius, 'distance') neigh.fit(mesh.points[:, :2], mesh.points[:, 2]) rmat = neigh.predict(cells[:, :2]) np.putmask(tmat, np.isnan(tmat), rmat) print("regression min", np.min(tmat), "max", np.max(tmat)) grid.cell_arrays['Elevation'] = tmat surf = grid.extract_surface() surf = surf.ctp() surf.points[:, 2] = surf.point_arrays['Elevation'] return surf
# reg = GradientBoostingRegressor() # reg = HistGradientBoostingRegressor() # kernel = DotProduct() + WhiteKernel() # reg = GaussianProcessRegressor(kernel=kernel, random_state=0) #awful # reg = LogisticRegression() # reg = Ridge(alpha=1.0) # not good # reg = BayesianRidge() # not good # reg = PoissonRegressor() # not good # reg = TweedieRegressor() # not good # reg = GammaRegressor() # not good # reg = MLPRegressor(random_state=0, max_iter=500) # not good # reg = DecisionTreeRegressor() # not too great # reg = KNeighborsRegressor(n_neighbors=5, algorithm="auto", weights="uniform", leaf_size=30) reg = RadiusNeighborsRegressor(radius=4.3) # reg = SVR(C=60, gamma='auto') # print(cross_val_score(reg, X, y, cv=10)) reg.fit(X_train, y_train) # accuracy = reg.score(X_test, y_test) # print(accuracy) predictions = reg.predict(X) df['Prediction'] = predictions df = df.loc["2015-03-13"] df['Total_Feeder'].plot() df['Prediction'].plot() plt.show()
def process_data(input_true, input_reco, segment_label, group_label): """ arguments are Nx5 from processing data input_true: energy depositions input_reco: charge depositions segment_label: fivetypes label group_label: particle instance purpose is to get find M non-ghost reco voxels and set target energies for them based on blurring returns tuple of neural network inputs and other useful stuff (it's messy, sorry) element 0: [size Mx12] corresponding to input_reco (5) + one-hot encoded fivetypes+ghost (6) + blurred energy target (1) element 1: [size M] group label of voxel element 2: [size M] indices in input_true of voxels that have been reconstructed element 3: [size Mx5] input_true intersection with reco, where the last element in each row is blurred energy """ chosen_indices = [] chosen_reco_indices = [] current_batch = 0 current_batch_selection = np.where(input_true[:, -2] == current_batch)[0] current_input_true = input_true[current_batch_selection] for r in range(len(input_reco)): row = input_reco[r] b = row[-2] if b != current_batch: current_batch = b current_batch_selection = np.where( input_true[:, -2] == current_batch)[0] pos = row[:3] region_selection = np.where((current_input_true[:, 0] == pos[0]) & (current_input_true[:, 1] == pos[1]))[0] input_true_region = current_input_true[region_selection] for i in range(len(input_true_region)): row2 = input_true_region[i] pos2 = row2[:3] if np.array_equal(pos, pos2): chosen_indices.append( current_batch_selection[region_selection[i]]) chosen_reco_indices.append(r) break if len(chosen_indices) == 0: return None chosen_indices = np.array(chosen_indices) chosen_reco_indices = np.array(chosen_reco_indices) lost_data = np.delete(input_true, chosen_indices, axis=0) found_data = input_true[chosen_indices] # find where the chosen indices are in the group data lost_group_data = -np.ones((len(lost_data), len(lost_data[0]))) ungrouped_data = -np.ones((len(lost_data), len(lost_data[0]))) found_group_data = -np.ones((len(found_data), len(found_data[0]))) for i in range(len(lost_data)): row = lost_data[i] filter0 = group_label[np.where(group_label[:, -2] == row[-2])] filter1 = filter0[np.where(filter0[:, 0] == row[0])] filter2 = filter1[np.where(filter1[:, 1] == row[1])] filter3 = filter2[np.where(filter2[:, 2] == row[2])] if len(filter3) == 0: ungrouped_data[i] = row else: g = filter3[0] lost_group_data[i] = g for i in range(len(found_data)): row = found_data[i] filter0 = group_label[np.where(group_label[:, -2] == row[-2])] filter1 = filter0[np.where(filter0[:, 0] == row[0])] filter2 = filter1[np.where(filter1[:, 1] == row[1])] filter3 = filter2[np.where(filter2[:, 2] == row[2])] g = filter3[0] found_group_data[i] = g if ADD_MISSING_ENERGY: batches = np.unique(input_true[:, 3]) for b in batches: # nearest neighbor assignment within group found_groups = np.unique( found_group_data[np.where(found_group_data[:, 3] == b)][:, -1]) lost_batch_mask = lost_group_data[:, 3] == b found_batch_mask = found_group_data[:, 3] == b for g in found_groups: lost_selection = np.where(lost_batch_mask & (lost_group_data[:, -1] == g))[0] found_selection = np.where(found_batch_mask & (found_group_data[:, -1] == g))[0] ldata = lost_data[lost_selection] fdata = found_data[found_selection] lost_positions = ldata[:, :3] found_positions = fdata[:, :3] distances = distance_matrix(lost_positions, found_positions) closest_points = np.argmin(distances, axis=1) closest_energies = ldata[:, -1] for i in range(len(closest_points)): found_data[found_selection[ closest_points[i]]][-1] += closest_energies[i] # associated ungrouped voxels with nearest voxels, regardless of group lost_ungrouped = np.where((ungrouped_data[:, 3] == b))[0] if len(lost_ungrouped) > 0: found_selection = np.where(found_batch_mask)[0] ldata = lost_data[lost_ungrouped] fdata = found_data[found_selection] lost_positions = ldata[:, :3] found_positions = fdata[:, :3] distances = distance_matrix(lost_positions, found_positions) closest_points = np.argmin(distances, axis=1) closest_energies = ldata[:, -1] for i in range(len(closest_points)): found_data[found_selection[ closest_points[i]]][-1] += closest_energies[i] if BLUR_ENERGY: blur_kernel = 3 for g in np.unique(found_group_data[:, -1]): inds = np.where(found_group_data[:, -1] == g) selection = found_data[inds] total_energy = np.sum(selection[:, -1]) coords = selection[:, :3] energies = selection[:, -1] neigh = RadiusNeighborsRegressor(radius=blur_kernel) neigh.fit(coords, energies) selection[:, -1] = neigh.predict(coords) selection[:, -1] *= total_energy / np.sum(selection[:, -1]) found_data[inds, -1] = selection[:, -1] segment_indices = segment_label[chosen_indices, -1].astype(int) segment_one_hot = np.zeros((len(segment_indices), 5)) segment_one_hot[np.arange(len(segment_indices)), segment_indices] = 1 out = np.concatenate((input_reco[chosen_reco_indices], segment_one_hot, np.expand_dims(found_data[:, -1], axis=1)), axis=1) return np.array(out), found_group_data[:, -1], chosen_indices, found_data
knn_regressor = KNeighborsRegressor(n_neighbors=get_best_knn_n_neighbors( 1, 100), weights='distance') knn_regressor.fit(train_df[['temperatura', 'vacuo']], train_df[['energia']]) rnn_regressor = RadiusNeighborsRegressor(radius=get_best_rnn_radius( 1.7, 3.0, 0.05), weights='distance') rnn_regressor.fit(train_df[['temperatura', 'vacuo']], train_df[['energia']]) lr_regressor = LinearRegression() lr_regressor.fit(train_df[['temperatura', 'vacuo']], train_df[['energia']]) energia_knn = knn_regressor.predict(test_df[['temperatura', 'vacuo']]) energia_rnn = rnn_regressor.predict(test_df[['temperatura', 'vacuo']]) energia_lr = lr_regressor.predict(test_df[['temperatura', 'vacuo']]) fig, ax = plt.subplots() ax.set_title('Evaluation of regression algorithms') ax.set_ylabel('Mean absolute error') ax.set_ylim(0, 5) ax.set_yticks(np.arange(0, 5, 1.5)) rects = ax.bar(x=['kNN', 'rNN', 'LR'], height=[ metrics.mean_absolute_error(test_df['energia'], energia_knn), metrics.mean_absolute_error(test_df['energia'], energia_rnn), metrics.mean_absolute_error(test_df['energia'], energia_lr),
# CV Loop for train_index, test_index in kf: # for each iteration of the for loop we'll do a test train split X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] t = StandardScaler() X_train = t.fit_transform(X_train) clf.fit(X_train, y_train) # Train clf_1 on the training data X_test = t.transform(X_test) y_pred[test_index] = clf.predict(X_test) # Predict clf_1 using the test and store in y_pred r2_score(y, y_pred) rmse = sqrt(mean_squared_error(y, y_pred)) print "RadiusNeighborsRegressor RMSE: " , rmse ### Prediction ### result = clf.predict(test_feats) result = np.asarray(result) np.savetxt("result.csv", result, delimiter=",")
def process_data(data): """ data: input_true, input_reco, ghost_label, group_label returns: input, output input: intersection between reco and true, labeled with reco charge depositions output: intersection between reco and true, labeled with adjusted energy depositions """ input_true = data['input_true'] input_reco = data['input_reco'] segment_label = data['segment_label'] group_label = data['group_label'] chosen_indices = [] chosen_reco_indices = [] current_batch = 0 current_batch_selection = np.where(input_true[:, -2] == current_batch)[0] current_input_true = input_true[current_batch_selection] for r in range(len(input_reco)): row = input_reco[r] b = row[-2] if b != current_batch: current_batch = b current_batch_selection = np.where( input_true[:, -2] == current_batch)[0] pos = row[:3] region_selection = np.where((current_input_true[:, 0] == pos[0]) & (current_input_true[:, 1] == pos[1]))[0] input_true_region = current_input_true[region_selection] for i in range(len(input_true_region)): row2 = input_true_region[i] pos2 = row2[:3] if np.array_equal(pos, pos2): chosen_indices.append( current_batch_selection[region_selection[i]]) chosen_reco_indices.append(r) break if len(chosen_indices) == 0: return None chosen_indices = np.array(chosen_indices) chosen_reco_indices = np.array(chosen_reco_indices) lost_data = np.delete(input_true, chosen_indices, axis=0) found_data = input_true[chosen_indices] # find where the chosen indices are in the group data lost_group_data = -np.ones((len(lost_data), len(lost_data[0]))) ungrouped_data = -np.ones((len(lost_data), len(lost_data[0]))) found_group_data = -np.ones((len(found_data), len(found_data[0]))) for i in range(len(lost_data)): row = lost_data[i] filter0 = group_label[np.where(group_label[:, -2] == row[-2])] filter1 = filter0[np.where(filter0[:, 0] == row[0])] filter2 = filter1[np.where(filter1[:, 1] == row[1])] filter3 = filter2[np.where(filter2[:, 2] == row[2])] if len(filter3) == 0: ungrouped_data[i] = row else: g = filter3[0] lost_group_data[i] = g for i in range(len(found_data)): row = found_data[i] filter0 = group_label[np.where(group_label[:, -2] == row[-2])] filter1 = filter0[np.where(filter0[:, 0] == row[0])] filter2 = filter1[np.where(filter1[:, 1] == row[1])] filter3 = filter2[np.where(filter2[:, 2] == row[2])] g = filter3[0] found_group_data[i] = g # lost_group_data = np.delete(group_label, chosen_indices, axis=0) # found_group_data = group_label[chosen_indices] if ADD_MISSING_ENERGY: batches = np.unique(input_true[:, 3]) for b in batches: # nearest neighbor assignment within group found_groups = np.unique( found_group_data[np.where(found_group_data[:, 3] == b)][:, -1]) lost_batch_mask = lost_group_data[:, 3] == b found_batch_mask = found_group_data[:, 3] == b for g in found_groups: lost_selection = np.where(lost_batch_mask & (lost_group_data[:, -1] == g))[0] found_selection = np.where(found_batch_mask & (found_group_data[:, -1] == g))[0] ldata = lost_data[lost_selection] fdata = found_data[found_selection] lost_positions = ldata[:, :3] found_positions = fdata[:, :3] distances = distance_matrix(lost_positions, found_positions) closest_points = np.argmin(distances, axis=1) closest_energies = ldata[:, -1] for i in range(len(closest_points)): found_data[found_selection[ closest_points[i]]][-1] += closest_energies[i] # associated ungrouped voxels with nearest voxels, regardless of group lost_ungrouped = np.where((ungrouped_data[:, 3] == b))[0] if len(lost_ungrouped) > 0: found_selection = np.where(found_batch_mask)[0] ldata = lost_data[lost_ungrouped] fdata = found_data[found_selection] lost_positions = ldata[:, :3] found_positions = fdata[:, :3] distances = distance_matrix(lost_positions, found_positions) closest_points = np.argmin(distances, axis=1) closest_energies = ldata[:, -1] for i in range(len(closest_points)): found_data[found_selection[ closest_points[i]]][-1] += closest_energies[i] if BLUR_ENERGY: blur_kernel = 3 for g in np.unique(found_group_data[:, -1]): inds = np.where(found_group_data[:, -1] == g) selection = found_data[inds] total_energy = np.sum(selection[:, -1]) coords = selection[:, :3] energies = selection[:, -1] neigh = RadiusNeighborsRegressor(radius=blur_kernel) neigh.fit(coords, energies) selection[:, -1] = neigh.predict(coords) selection[:, -1] *= total_energy / np.sum(selection[:, -1]) found_data[inds, -1] = selection[:, -1] segment_indices = segment_label[chosen_indices, -1].astype(int) segment_one_hot = np.zeros((len(segment_indices), 5)) segment_one_hot[np.arange(len(segment_indices)), segment_indices] = 1 out = np.concatenate((input_reco[chosen_reco_indices], segment_one_hot, np.expand_dims(found_data[:, -1], axis=1)), axis=1) return np.array(out), found_group_data[:, -1]
KNN_reg = KNeighborsRegressor(n_neighbors=6, weights='uniform') KNN_reg.fit(x_train, y_train) y_predict_knn = KNN_reg.predict(x_test) y_predict_knn[0:10] from sklearn.neighbors import RadiusNeighborsRegressor RNN_reg = RadiusNeighborsRegressor(radius=x_train.std()) RNN_reg.fit(x_train, y_train) y_predict_rnn = RNN_reg.predict(x_test) y_predict_rnn[0:10] RNN_reg = RadiusNeighborsRegressor() RNN_reg.fit(x_train, y_train) RNN_reg.predict(x_test) from sklearn.metrics import mean_absolute_error, mean_squared_error mean_absolute_error(y_test, y_predict_knn) mean_squared_error(y_test, y_predict_knn)
neigh = RadiusNeighborsRegressor(radius = KNN_RADIUS) neigh.fit(X, Y) # Read Test dataset testFiles = [file for file in os.listdir(TEST_DATASET_DIRECTORY) if str(file).find('test') >= 0] print 'Number of test files:', len(testFiles) TEST_Y_ALL = np.array([]) TEST_Y_ALL_PREDICTED = np.array([]) for file in testFiles: df = pd.read_csv(TEST_DATASET_DIRECTORY + '/' + file, header=None) # read from the first line df_y = df.ix[:,columns-1] df_x = df.ix[:,:columns-2] X = np.array(df_x) Y = np.array(df_y) predictedY = neigh.predict(X) predictedY = np.nan_to_num(predictedY) # important to prevent nan error TEST_Y_ALL = np.append(TEST_Y_ALL, Y) TEST_Y_ALL_PREDICTED = np.append(TEST_Y_ALL_PREDICTED, predictedY) print 'TEST_Y_ALL size:', "{:,}".format(len(TEST_Y_ALL)) ERROR = abs(TEST_Y_ALL - TEST_Y_ALL_PREDICTED) print 'Method: KNN for Radius=', KNN_RADIUS mean = ERROR.mean() print 'Mean error:',mean
df2012['rnk_2012'] = range(1,df2012.shape[0]+1) df2012.sort('f2013', ascending=False, inplace=True) df2012['rnk_2013'] = range(1,df2012.shape[0]+1) #df2012.to_csv('f2013_projection_3yrs.csv', headers=True,index=True) ########## ### PROJECTIONS - 2014 ########## # Get 2011/12/13 stats for 2014 projection y=2014 y3 = [y-1,y-2,y-3] tms_include = np.intersect1d(df[df.Year == y3[0]].Team.values, df[df.Year == y3[2]].Team.values) df2013 = pd.merge(df[(df.Year.isin(y3)) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), df[(df.Year == y3[0]) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), how='left',left_index=True, right_index=True, suffixes=['_3yr_avg','_yr3']) df2013['f2014'] = lin3.predict(df2013.values) df2013.sort('f_yr3', ascending=False, inplace=True) df2013['rnk_2013'] = range(1,df2013.shape[0]+1) df2013.sort('f2014', ascending=False, inplace=True) df2013['rnk_2014'] = range(1,df2013.shape[0]+1) #df2013.to_csv('f2014_projection_3yrs.csv', headers=True,index=True) # Get 2011/12/13 stats for 2014 projection y=2014 y3 = [y-1,y-2,y-3] tms_include = np.intersect1d(df[df.Year == y3[0]].Team.values, df[df.Year == y3[2]].Team.values) df2013 = pd.merge(df[(df.Year.isin(y3)) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), df[(df.Year == y3[0]) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), how='left',left_index=True, right_index=True, suffixes=['_3yr_avg','_yr3']) df2013['f2014'] = rn3.predict(df2013.values) df2013.sort('f_yr3', ascending=False, inplace=True) df2013['rnk_2013'] = range(1,df2013.shape[0]+1) df2013.sort('f2014', ascending=False, inplace=True) df2013['rnk_2014'] = range(1,df2013.shape[0]+1) df2013.to_csv('f2014_projection_3yrs_rn.csv', headers=True,index=True)
from data.transformed_data import * from data.raw_data import data_dir from sklearn.neighbors import RadiusNeighborsRegressor regressor = RadiusNeighborsRegressor() regressor.fit(train_x, train_y) print('RadiusNeighborsRegressor rmse:{}'.format( RMSLE(validation_y, regressor.predict(validation_x)))) predict = regressor.predict(test[col]) test['visitors'] = np.expm1(predict) test['visitors'] = test['visitors'].clip(lower=0.) test[['id', 'visitors' ]].to_csv(data_dir + 'submission_radius_neighbors_regressor.csv', index=False)
df_removed_one_x = df_all_x df_removed_one_x = df_removed_one_x.drop(df_removed_one_x.index[[i]]) X = np.array(df_removed_one_x) #print('X') #print(X) df_x_test = df_all_x.iloc[i] X_TEST = np.array(df_x_test) #print('X_TEST') #print(X_TEST) df_removed_one_y = df_all_y df_removed_one_y = df_removed_one_y.drop(df_removed_one_y.index[[i]]) Y = np.array(df_removed_one_y) #print('Y') #print(Y) neigh = RadiusNeighborsRegressor(radius=KNN_RADIUS) neigh.fit(X, Y) predicted_one_y = neigh.predict([X_TEST]) predicted_one_y_2 = float(np.asarray(predicted_one_y)) predictedY.append(predicted_one_y_2) print(repr(i + 1) + ' / ' + repr(row_count)) #print(predictedY) np.savetxt("predicted_Y_KNN_RADIUS_2.csv", predictedY, delimiter=",", fmt='%10.10f')
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.neighbors import RadiusNeighborsRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_log_error from functions import errors data = pd.read_csv("forestfires.csv") data = data.drop(labels=['month', 'day'], axis=1) y = data.area x = data.drop(labels=['area'], axis=1) x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=10) reg = RadiusNeighborsRegressor() reg.fit(x_train, y_train) y_predict = reg.predict(x_test) for i in range(len(y_predict)): print("pred %s act %s" % (y_predict[i], y_test.ravel()[i])) errors(y_test, y_predict)
import pandas as pd import numpy as np from sklearn.neighbors import RadiusNeighborsRegressor from sklearn import cross_validation # Membaca data training dan test df = pd.read_hdf(sys.argv[1]) tdf = pd.read_hdf(sys.argv[2]) # Mengubah menjadi array numpy yang digunakan scikit-learn X_train = df.as_matrix(['lat', 'lon']) y_train = (df.length.as_matrix())*15 X_test = tdf.as_matrix(['lat', 'lon']) y_test = (tdf.length.as_matrix())*15 id_test = tdf.index.to_series().as_matrix() # Inisialisasi model model = RadiusNeighborsRegressor(radius=0.0005, weights='distance') # Training model.fit(X_train, y_train) # Prediksi y_try = model.predict(X_test) # Penulisan hasil resdf = pd.DataFrame({'idx': id_test, 'predict': (y_try), 'actual': (y_test)}).set_index('idx') resdf.to_csv(sys.argv[3])
def mydist(x, y): distance_assignement = (0. if x[0]==y[0] else 1.) distance_time = (0. if x[2]==y[2] else 1.) distance_day = (0. if x[1]==y[1] else 1.) #distance_week_day = (1 if x[0]==y[0] else 0) #distance_time = abs(x[3] - y[3])%1440 distance = distance_assignement + distance_time + distance_day return distance #dist = neighbors.DistanceMetric.get_metric('pyfunc', func=distance) preprocessing = fp.feature_preprocessing() preprocessing.full_preprocess(used_columns=['ASS_ID', 'WEEK_DAY', 'TIME', 'CSPL_RECEIVED_CALLS']) data = preprocessing.data[:1000] Y = data['CSPL_RECEIVED_CALLS'] X = data.drop(['CSPL_RECEIVED_CALLS'], axis=1) X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, Y, test_size=0.1, random_state=0) neigh = RadiusNeighborsRegressor(radius=0.5, metric='pyfunc', func=mydist, algorithm='auto') print('fitting...') neigh.fit(X_train, y_train) print('fitted') #error = neigh.score(X_test, y_test) #print(error) y_pred = neigh.predict(X_test)
from sklearn.neighbors import KNeighborsRegressor from sklearn.metrics import mean_squared_error from math import sqrt print("KNN ...{}".format("")) knnreg = KNeighborsRegressor(n_neighbors=1) knnreg.fit(data, Y) y_KNNpred = knnreg.predict(data) trainrms = sqrt(mean_squared_error(Y, y_KNNpred)) print("KNN PCA : trainrms {}".format(trainrms)) print("Rad ...{}".format("")) from sklearn.neighbors import RadiusNeighborsRegressor radreg = RadiusNeighborsRegressor(weights='distance', radius=10.3) radreg.fit(data, Y) y_radpred = radreg.predict(data) trainrms = sqrt(mean_squared_error(Y, y_radpred)) print("Rad PCA : trainrms {}".format(trainrms)) #============================================================================= # end Feature selection #============================================================================= # Instanciate a Gaussian Process model #kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) print("RF ...{}".format("")) RFregr = RandomForestRegressor(n_estimators=301, random_state=0, oob_score=True) # Fit to data using Maximum Likelihood Estimation of the parameters
train_labels = np.array(pd.read_csv('train_labels.csv', sep= ';')) test_labels = np.array(pd.read_csv('test_labels.csv', sep= ';')) inicio = time.time() # importar o modelo de regressão linear from sklearn.neighbors import RadiusNeighborsRegressor from sklearn import preprocessing lab_enc = preprocessing.LabelEncoder() training_scores_encoded = lab_enc.fit_transform(train_labels) # treinar o modelo no conjunto de dados regression = RadiusNeighborsRegressor(radius=1.0).fit(train_data, training_scores_encoded) # prever predictions_labels = regression.predict(test_data) fim = time.time() df_time = pd.DataFrame({'Execution Time:' : [fim-inicio]}) output_path = os.path.join('/home/isadorasalles/Documents/Regressao/radius_neighbors', 'time_'+name_folder) df_time.to_csv(output_path, sep=';') from sklearn import metrics df_metrics = pd.DataFrame({'Mean Absolute Error' : [metrics.mean_absolute_error(test_labels, predictions_labels)], 'Mean Squared Error' : [metrics.mean_squared_error(test_labels, predictions_labels)], 'Root Mean Squared Error': [np.sqrt(metrics.mean_squared_error(test_labels, predictions_labels))], 'R2 Score': [metrics.r2_score(test_labels, predictions_labels)]}) output_path = os.path.join('/home/isadorasalles/Documents/Regressao/radius_neighbors', 'metrics_'+name_folder) df_metrics.to_csv(output_path, sep=';')
del globals()['profilesDF'] del globals()['profiles'] del globals()['profilesLSo'] del globals()['profilesLS'] del globals()['row'] del globals()['tmpLS'] del globals()['tmpAGE'] del globals()['profsTOlikes'] del globals()['i'] del globals()['tmpIND'] seed = 7 myRand = np.random.seed(seed) X_train, X_test, y_train, y_test = train_test_split(likesMAT, consARR, test_size=1500) myRAD = float(sys.argv[1]) radNN = RadiusNeighborsRegressor(radius=myRAD) #radNN.fit(likesMAT, consARR) radNN.fit(X_train, y_train) y_pred = radNN.predict(X_test) import math myRMSE = math.sqrt(metrics.mean_squared_error(y_test, y_pred)) print("cons, Radius neighbors: ", str(myRAD), " ", myRMSE) # joblib.dump(radNN, "/Users/jamster/radNN-A-cons.xz", compress=9) # impRadNN = joblib.load("/Users/jamster/radNN-A-cons.xz")
dtr = DecisionTreeRegressor(criterion='mse') abr = AdaBoostRegressor(n_estimators=50) rfr = RandomForestRegressor(n_estimators=50) svr_rbf.fit(X, y), svr_lin.fit(X, y), svr_poly.fit(X, y) knng.fit(X, y), rng.fit(X, y), dtr.fit(X, y) abr.fit(X, y), rfr.fit(X, y) # 支持向量回归 y_rbf = svr_rbf.predict(X) y_lin = svr_lin.predict(X) y_poly = svr_poly.predict(X) # KNN 回归 y_knng = knng.predict(X) y_rng = rng.predict(X) # 决策树回归 y_dtr = dtr.predict(X) # ensemble y_abr = abr.predict(X) y_rfr = rfr.predict(X) # 结果 sns.set(style='whitegrid') colors = sns.color_palette('Set2', 8) names = ['RBF model', 'Linear model', 'Polynomial model', 'KNR', 'RNR', 'DTR', 'ABR', 'RFR'] data_pred = [y_rbf, y_lin, y_poly, y_knng, y_rng, y_dtr, y_abr, y_rfr] plt.figure(1) plt.scatter(X, y, color='red', label='data')