Пример #1
0
def rNeighbours2dPlot(X,y,r=0.5,res=100,dist_scale='normalize',im_kws={},reg_kws={},ax=None):
    if isinstance(X,pd.core.frame.DataFrame):
        X = X.values

    if 'origin' not in reg_kws:
        im_kws['origin'] ='lower'

    if 'extent' not in im_kws:
        im_kws['extent'] = (X[:,0].min(),X[:,0].max(),X[:,1].min(),X[:,1].max())

    if  'aspect' not in im_kws:
        im_kws['aspect'] = (X[:,0].max()-X[:,0].min())/(X[:,1].max()-X[:,1].min())

    if dist_scale is not None:
        if dist_scale == 'normalize':
            X = X/(X.max(axis=0) - X.min(axis=0))
        else:
            X = X/dist_scale

    kneighbours = RadiusNeighborsRegressor(radius=r,**reg_kws)
    kneighbours.fit(X,y)

    xx,yy = np.meshgrid(np.linspace(X[:,0].min(),X[:,0].max(),res),np.linspace(X[:,1].min(),X[:,1].max(),res))
    X_grid = np.vstack([xx.ravel(),yy.ravel()]).T

    y_hat = kneighbours.predict(X_grid)
    Y_hat = y_hat.reshape((res,res))
    if ax is None:
        return plt.imshow(Y_hat,**im_kws)
    else:
        return ax.imshow(Y_hat,**im_kws)
Пример #2
0
def get_best_rnn_radius(low, high, step):
    """ Return the best radius value in step range [low, high] to be used in rnn algorithm. """
    radii = []
    mae_rnn = []

    for r in np.arange(low, high + step, step):
        rnn_regressor = RadiusNeighborsRegressor(radius=r, weights='distance')
        rnn_regressor.fit(train_df[['temperatura', 'vacuo']],
                          train_df[['energia']])
        energia_rnn = rnn_regressor.predict(test_df[['temperatura', 'vacuo']])

        radii.append(r)
        mae_rnn.append(
            metrics.mean_absolute_error(test_df['energia'], energia_rnn))

    best_radius = radii[np.argmin(mae_rnn)]

    fig, ax = plt.subplots()
    ax.set_title('Parameter evaluation for RNN')
    ax.set_xlabel('Radius')
    ax.set_ylabel('Mean absolute error')
    ax.set_xlim(low, high)
    ax.set_xticks(list(ax.get_xticks()) + [best_radius])
    ax.plot(radii, mae_rnn, c='orange', linewidth=2)
    fig.savefig('rnn_param.png')

    return best_radius
Пример #3
0
    def _check_coords_for_distance_weighting(self, coords, check_radius, check_weights, X, y_mean):
        """
        Checks that coords won't break the distance weighting function

        """
        valid_inds = []
        for coord in xrange(len(coords)):

            temp = RadiusNeighborsRegressor(radius=check_radius, weights=check_weights)
            temp.fit(X, y_mean)
            try:
                temp.predict([coords[coord]])
                valid_inds.append(coord)
            except ZeroDivisionError:
                continue
        return valid_inds
Пример #4
0
def sampling_fix(df, name, start, stop, radius, medianFilter, plot):
    #Filter dataset based on depth range
    df = df[(df['Measured Depth m'] > start) & (df['Measured Depth m'] < stop)]
    #remove NaNs from dataset
    df = df[np.isfinite(df[name])]
    X = df['Measured Depth m']

    #reshape the depth to matcch regressor requirements
    X = X.values.reshape(X.shape[0], 1)
    from sklearn.neighbors import RadiusNeighborsRegressor
    #define regressor with provided radius
    reg = RadiusNeighborsRegressor(radius=radius, weights='uniform')

    #apply median filter with back filling (to remove NaNs at the beginning of dataset)
    y = df[name].rolling(medianFilter).median().bfill()

    #fit regressor
    reg.fit(X, y)

    #check if plotting was required or should the model be returned
    if plot == 0:
        return reg
    else:
        import matplotlib.pyplot as plt
        #plot the chart. Original data is plotted as well as the regression data.
        plt.scatter(X, y, label=name)
        plt.plot(X, reg.predict(X), c='r', label="prediction")
        plt.legend()
        plt.show()
Пример #5
0
    def _check_coords_for_distance_weighting(self, coords, check_radius,
                                             check_weights, X, y_mean):
        """
        Checks that coords won't break the distance weighting function

        """
        valid_inds = []
        for coord in xrange(len(coords)):

            temp = RadiusNeighborsRegressor(radius=check_radius,
                                            weights=check_weights)
            temp.fit(X, y_mean)
            try:
                temp.predict([coords[coord]])
                valid_inds.append(coord)
            except ZeroDivisionError:
                continue
        return valid_inds
Пример #6
0
def plot_std_dev(folder):
    data = load_images(folder)

    for channel in range(data.shape[3]):
        channel_stack = data[:, :, :, channel]
        std_dev_img = np.std(channel_stack, axis=0)
        mean_img = np.mean(channel_stack, axis=0)
        # print(std_dev_img)
        # print(np.mean(std_dev_img))
        if 1:
            plt.subplot(2, 2, 1)
            # plt.imshow(mean_img)
            display_image(mean_img, z=1)
            plt.title('mean')
            plt.subplot(2, 2, 2)
            display_image(std_dev_img, z=1)
            plt.title('std')

            plt.subplot(2, 2, 3)
            display_image(mean_img / std_dev_img, z=1)
            plt.title('mean / std')

            plt.subplot(2, 2, 4)
            bins = np.arange(np.min(channel_stack), np.max(channel_stack) + 1)
            plt.hist(channel_stack.flatten(), bins=bins)
            plt.grid(True)
            plt.show()

        # skip = 10
        # for img_channel in channel_stack:
        # 	plt.scatter(img_channel.flatten()[::skip], mean_img.flatten()[::skip], alpha = 0.1, color='black', s=1)

        rnr = RadiusNeighborsRegressor(radius=10, weights='distance')
        rnr.fit(np.expand_dims(mean_img.flatten(), axis=1),
                std_dev_img.flatten())

        line_x = np.arange(np.min(mean_img), np.max(mean_img) + 1)
        line_y = rnr.predict(np.expand_dims(line_x, axis=1))

        fit = np.polyfit(mean_img.flatten(), std_dev_img.flatten(), deg=1)
        linear_y = np.polyval(fit, line_x)

        # for d in range(deg+1):
        # 	fits[y//n, :, channel, d] = section_fits[d]

        plt.scatter(mean_img.flatten(),
                    std_dev_img.flatten(),
                    alpha=0.1,
                    color='black',
                    s=1)
        plt.plot(line_x, line_y, 'r')
        plt.plot(line_x, linear_y, 'orange')
        plt.grid(True)
        plt.show()
Пример #7
0
class KNNDynamicsResidual:
    def __init__(self, args, env_params):
        # Save args
        self.args, self.env_params = args, env_params
        # Create the KNN model
        self.knn_model = RadiusNeighborsRegressor(radius=args.neighbor_radius,
                                                  weights='uniform')
        # Flag
        self.is_fit = False

    def fit(self, X, y):
        '''
        X should be the data matrix N x d, where each row is a 4D vector
        consisting of object pos and gripper pos
        y should be target matrix N x d, where each row is a 4D vector 
        consisting of next object pos and next gripper pos
        '''
        self.knn_model.fit(X, y)
        self.is_fit = True
        return self.loss(X, y)

    def predict(self, X):
        '''
        X should be the data matrix N x d, where each row is a 4D vector
        consisting of object pos and gripper pos
        '''
        ypred = np.zeros(X.shape)
        if not self.is_fit:
            # KNN model is not fit
            return ypred
        # Get neighbors of X
        neighbors = self.knn_model.radius_neighbors(X)
        # Check if any of the X doesn't have any neighbors by getting nonzero mask
        neighbor_mask = [x.shape[0] != 0 for x in neighbors[1]]
        # If none of X has any neighbors
        if X[neighbor_mask].shape[0] == 0:
            return ypred
        # Else, for the X that have neighbors use the KNN prediction
        ypred[neighbor_mask] = self.knn_model.predict(X[neighbor_mask])
        return ypred

    def get_num_neighbors(self, X):
        if not self.is_fit:
            return np.zeros(X.shape[0])
        neighbors = self.knn_model.radius_neighbors(X)
        num_neighbors = np.array([x.shape[0] for x in neighbors[1]])
        return num_neighbors

    def loss(self, X, y):
        ypred = self.predict(X)
        # Loss is just the mean distance between predictions and true targets
        loss = np.linalg.norm(ypred - y, axis=1).mean()
        return loss
Пример #8
0
class _RadiusNeighborsRegressorImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Пример #9
0
    def predict(self):
        """
         trains the scikit-learn  python machine learning algorithm library function
         https://scikit-learn.org

         then passes the trained algorithm the features set and returns the
         predicted y test values form, the function

         then compares the y_test values from scikit-learn predicted to
         y_test values passed in

         then returns the accuracy
         """
        algorithm = RadiusNeighborsRegressor(
            radius=get_ohe_config().rnr_radius)
        algorithm.fit(self.X_train, self.y_train)
        y_pred = list(algorithm.predict(self.X_test))
        self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test)
        return self.acc
Пример #10
0
def compare_multiple_stacks(folder):
    subfolders = os.listdir(folder)

    all_data = []

    for subfolder in tqdm.tqdm(subfolders):
        all_data.append(load_images(os.path.join(folder, subfolder)))

    all_data = np.array(all_data)
    print(all_data.shape)

    for channel in range(3):
        for subfolder_index in range(all_data.shape[0]):

            channel_stack = all_data[subfolder_index][:, :, :, channel]

            img_mean = np.mean(channel_stack, axis=0)
            img_sigma_clip = np.mean(astropy.stats.sigma_clip(channel_stack,
                                                              sigma=2,
                                                              axis=0),
                                     axis=0)

            img_sigma_ratio = (img_mean / img_sigma_clip - 1) * 1E3
            skip = 1
            flat_ratios = img_sigma_ratio.flatten()[::skip]
            mean_values = img_mean.flatten()[::skip]

            # plt.scatter(mean_values, flat_ratios, alpha=0.1, color='black', s=1)

            rnr = RadiusNeighborsRegressor(radius=50, weights='uniform')
            rnr.fit(np.expand_dims(mean_values, axis=1), flat_ratios.flatten())

            x = np.arange(
                np.min(mean_values) + 200,
                np.max(mean_values) + 1 - 200, 10)
            line_y = rnr.predict(np.expand_dims(x, axis=1))
            plt.plot(x, line_y, label=str(subfolder_index))

        plt.legend()
        plt.grid(True)
        plt.show()
Пример #11
0
def compare_error_vs_brightness(folder):
    data = load_images(folder)

    for channel in range(data.shape[3]):
        channel_stack = data[:, :, :, channel]

        img_mean = np.mean(channel_stack, axis=0)
        img_sigma_clip = np.mean(astropy.stats.sigma_clip(channel_stack,
                                                          sigma=2,
                                                          axis=0),
                                 axis=0)

        img_sigma_ratio = (img_mean / img_sigma_clip - 1) * 1E3

        x = np.arange(np.min(img_mean), np.max(img_mean) + 1)
        bit_flip_change = 128 if channel == 1 else 256
        y_top = ((channel_stack.shape[0] * x) /
                 (channel_stack.shape[0] * x - bit_flip_change) - 1) * 1E3
        y_bottom = ((channel_stack.shape[0] * x) /
                    (channel_stack.shape[0] * x + bit_flip_change) - 1) * 1E3
        plt.plot(x, y_top, 'r')
        plt.plot(x, y_bottom, 'r')
        plt.scatter(img_mean.flatten(),
                    img_sigma_ratio.flatten(),
                    alpha=0.1,
                    color='black',
                    s=1)

        rnr = RadiusNeighborsRegressor(radius=50, weights='distance')
        rnr.fit(np.expand_dims(img_mean.flatten(), axis=1),
                img_sigma_ratio.flatten())

        x = np.arange(np.min(img_mean), np.max(img_mean) + 1)
        line_y = rnr.predict(np.expand_dims(x, axis=1))
        plt.plot(x, line_y, 'g')

        plt.grid(True)
        plt.show()
def powerproduction():
    if fl.request.method == "POST":
        speed = {}
        speed = float(fl.request.form['speed'])
        # speed = requests.get(data['input_s'])
        # import csv data and convert to pandas dataframe
        df = pd.read_csv("powerproduction.csv")

        # remove all zeros
        df = df[df.power != 0]

        # put rows in order of speed
        df = df.sort_values('speed')

        # set each column to a numpy array for processing
        S = df['speed'].to_numpy()
        p = df['power'].to_numpy()

        neigh_radius = RadiusNeighborsRegressor(radius=1.7, weights='distance', p = 2)
        neigh_radius.fit(S.reshape(-1, 1), p)

        p_pred = neigh_radius.predict([[speed]])

        return {'value': p_pred[0]}
def grid_points_2d(mesh, cell_size=10):
    grid = vtk_Voxel.from_mesh(mesh, cell_size, 2)

    cells = grid.cell_centers().points

    radius = cell_size * 0.5
    tmat = np.full(cells.shape[0], np.nan)
    print("sample min", np.min(mesh.points[:, 2]), "max",
          np.max(mesh.points[:, 2]))
    while np.any(np.isnan(tmat)):
        # keep increasing radius until all cells have values
        radius *= 1.5
        print("RadiusNeighborsRegressor =", radius, "m")
        neigh = RadiusNeighborsRegressor(radius, 'distance')
        neigh.fit(mesh.points[:, :2], mesh.points[:, 2])
        rmat = neigh.predict(cells[:, :2])
        np.putmask(tmat, np.isnan(tmat), rmat)
    print("regression min", np.min(tmat), "max", np.max(tmat))
    grid.cell_arrays['Elevation'] = tmat
    surf = grid.extract_surface()
    surf = surf.ctp()
    surf.points[:, 2] = surf.point_arrays['Elevation']

    return surf
Пример #14
0
# reg = GradientBoostingRegressor()
# reg = HistGradientBoostingRegressor()
# kernel = DotProduct() + WhiteKernel()
# reg = GaussianProcessRegressor(kernel=kernel, random_state=0) #awful
# reg = LogisticRegression()
# reg = Ridge(alpha=1.0)    # not good
# reg = BayesianRidge()     # not good
# reg = PoissonRegressor()  # not good
# reg = TweedieRegressor()  # not good
# reg = GammaRegressor()    # not good
# reg = MLPRegressor(random_state=0, max_iter=500)  # not good
# reg = DecisionTreeRegressor() # not too great

# reg = KNeighborsRegressor(n_neighbors=5, algorithm="auto", weights="uniform", leaf_size=30)
reg = RadiusNeighborsRegressor(radius=4.3)

# reg = SVR(C=60, gamma='auto')
# print(cross_val_score(reg, X, y, cv=10))

reg.fit(X_train, y_train)
# accuracy = reg.score(X_test, y_test)
# print(accuracy)

predictions = reg.predict(X)

df['Prediction'] = predictions
df = df.loc["2015-03-13"]
df['Total_Feeder'].plot()
df['Prediction'].plot()
plt.show()
Пример #15
0
def process_data(input_true, input_reco, segment_label, group_label):
    """
    arguments are Nx5 from processing data
    input_true: energy depositions
    input_reco: charge depositions
    segment_label: fivetypes label
    group_label: particle instance
    
    purpose is to get find M non-ghost reco voxels and set target energies for them based on blurring
    
    returns tuple of neural network inputs and other useful stuff (it's messy, sorry)
    element 0: [size Mx12] corresponding to input_reco (5) + one-hot encoded fivetypes+ghost (6) + blurred energy target (1)
    element 1: [size M] group label of voxel
    element 2: [size M] indices in input_true of voxels that have been reconstructed
    element 3: [size Mx5] input_true intersection with reco, where the last element in each row is blurred energy
    
    """
    chosen_indices = []
    chosen_reco_indices = []

    current_batch = 0
    current_batch_selection = np.where(input_true[:, -2] == current_batch)[0]
    current_input_true = input_true[current_batch_selection]
    for r in range(len(input_reco)):
        row = input_reco[r]
        b = row[-2]
        if b != current_batch:
            current_batch = b
            current_batch_selection = np.where(
                input_true[:, -2] == current_batch)[0]
        pos = row[:3]
        region_selection = np.where((current_input_true[:, 0] == pos[0])
                                    & (current_input_true[:, 1] == pos[1]))[0]
        input_true_region = current_input_true[region_selection]
        for i in range(len(input_true_region)):
            row2 = input_true_region[i]
            pos2 = row2[:3]
            if np.array_equal(pos, pos2):
                chosen_indices.append(
                    current_batch_selection[region_selection[i]])
                chosen_reco_indices.append(r)
                break

    if len(chosen_indices) == 0:
        return None

    chosen_indices = np.array(chosen_indices)
    chosen_reco_indices = np.array(chosen_reco_indices)

    lost_data = np.delete(input_true, chosen_indices, axis=0)
    found_data = input_true[chosen_indices]

    # find where the chosen indices are in the group data
    lost_group_data = -np.ones((len(lost_data), len(lost_data[0])))
    ungrouped_data = -np.ones((len(lost_data), len(lost_data[0])))
    found_group_data = -np.ones((len(found_data), len(found_data[0])))
    for i in range(len(lost_data)):
        row = lost_data[i]
        filter0 = group_label[np.where(group_label[:, -2] == row[-2])]
        filter1 = filter0[np.where(filter0[:, 0] == row[0])]
        filter2 = filter1[np.where(filter1[:, 1] == row[1])]
        filter3 = filter2[np.where(filter2[:, 2] == row[2])]
        if len(filter3) == 0:
            ungrouped_data[i] = row
        else:
            g = filter3[0]
            lost_group_data[i] = g
    for i in range(len(found_data)):
        row = found_data[i]
        filter0 = group_label[np.where(group_label[:, -2] == row[-2])]
        filter1 = filter0[np.where(filter0[:, 0] == row[0])]
        filter2 = filter1[np.where(filter1[:, 1] == row[1])]
        filter3 = filter2[np.where(filter2[:, 2] == row[2])]
        g = filter3[0]
        found_group_data[i] = g

    if ADD_MISSING_ENERGY:
        batches = np.unique(input_true[:, 3])
        for b in batches:
            # nearest neighbor assignment within group
            found_groups = np.unique(
                found_group_data[np.where(found_group_data[:, 3] == b)][:, -1])
            lost_batch_mask = lost_group_data[:, 3] == b
            found_batch_mask = found_group_data[:, 3] == b
            for g in found_groups:
                lost_selection = np.where(lost_batch_mask
                                          & (lost_group_data[:, -1] == g))[0]
                found_selection = np.where(found_batch_mask
                                           & (found_group_data[:, -1] == g))[0]
                ldata = lost_data[lost_selection]
                fdata = found_data[found_selection]
                lost_positions = ldata[:, :3]
                found_positions = fdata[:, :3]
                distances = distance_matrix(lost_positions, found_positions)
                closest_points = np.argmin(distances, axis=1)
                closest_energies = ldata[:, -1]
                for i in range(len(closest_points)):
                    found_data[found_selection[
                        closest_points[i]]][-1] += closest_energies[i]

            # associated ungrouped voxels with nearest voxels, regardless of group
            lost_ungrouped = np.where((ungrouped_data[:, 3] == b))[0]
            if len(lost_ungrouped) > 0:
                found_selection = np.where(found_batch_mask)[0]
                ldata = lost_data[lost_ungrouped]
                fdata = found_data[found_selection]
                lost_positions = ldata[:, :3]
                found_positions = fdata[:, :3]
                distances = distance_matrix(lost_positions, found_positions)
                closest_points = np.argmin(distances, axis=1)
                closest_energies = ldata[:, -1]
                for i in range(len(closest_points)):
                    found_data[found_selection[
                        closest_points[i]]][-1] += closest_energies[i]

    if BLUR_ENERGY:
        blur_kernel = 3
        for g in np.unique(found_group_data[:, -1]):
            inds = np.where(found_group_data[:, -1] == g)
            selection = found_data[inds]
            total_energy = np.sum(selection[:, -1])

            coords = selection[:, :3]
            energies = selection[:, -1]
            neigh = RadiusNeighborsRegressor(radius=blur_kernel)
            neigh.fit(coords, energies)
            selection[:, -1] = neigh.predict(coords)
            selection[:, -1] *= total_energy / np.sum(selection[:, -1])
            found_data[inds, -1] = selection[:, -1]

    segment_indices = segment_label[chosen_indices, -1].astype(int)
    segment_one_hot = np.zeros((len(segment_indices), 5))
    segment_one_hot[np.arange(len(segment_indices)), segment_indices] = 1
    out = np.concatenate((input_reco[chosen_reco_indices], segment_one_hot,
                          np.expand_dims(found_data[:, -1], axis=1)),
                         axis=1)
    return np.array(out), found_group_data[:, -1], chosen_indices, found_data
Пример #16
0
knn_regressor = KNeighborsRegressor(n_neighbors=get_best_knn_n_neighbors(
    1, 100),
                                    weights='distance')
knn_regressor.fit(train_df[['temperatura', 'vacuo']], train_df[['energia']])

rnn_regressor = RadiusNeighborsRegressor(radius=get_best_rnn_radius(
    1.7, 3.0, 0.05),
                                         weights='distance')
rnn_regressor.fit(train_df[['temperatura', 'vacuo']], train_df[['energia']])

lr_regressor = LinearRegression()
lr_regressor.fit(train_df[['temperatura', 'vacuo']], train_df[['energia']])

energia_knn = knn_regressor.predict(test_df[['temperatura', 'vacuo']])
energia_rnn = rnn_regressor.predict(test_df[['temperatura', 'vacuo']])
energia_lr = lr_regressor.predict(test_df[['temperatura', 'vacuo']])

fig, ax = plt.subplots()
ax.set_title('Evaluation of regression algorithms')
ax.set_ylabel('Mean absolute error')
ax.set_ylim(0, 5)
ax.set_yticks(np.arange(0, 5, 1.5))

rects = ax.bar(x=['kNN', 'rNN', 'LR'],
               height=[
                   metrics.mean_absolute_error(test_df['energia'],
                                               energia_knn),
                   metrics.mean_absolute_error(test_df['energia'],
                                               energia_rnn),
                   metrics.mean_absolute_error(test_df['energia'], energia_lr),
Пример #17
0



# CV Loop
for train_index, test_index in kf:
    # for each iteration of the for loop we'll do a test train split
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    t = StandardScaler()
    X_train = t.fit_transform(X_train)
    clf.fit(X_train, y_train) # Train clf_1 on the training data

    X_test = t.transform(X_test)
    y_pred[test_index] = clf.predict(X_test) # Predict clf_1 using the test and store in y_pred

r2_score(y, y_pred)
rmse = sqrt(mean_squared_error(y, y_pred))

print "RadiusNeighborsRegressor RMSE: " , rmse


### Prediction ###
result = clf.predict(test_feats)
result = np.asarray(result)
np.savetxt("result.csv", result, delimiter=",")



Пример #18
0
def process_data(data):
    """
    data: input_true, input_reco, ghost_label, group_label
    returns: input, output
        input: intersection between reco and true, labeled with reco charge depositions
        output: intersection between reco and true, labeled with adjusted energy depositions
    """
    input_true = data['input_true']
    input_reco = data['input_reco']
    segment_label = data['segment_label']
    group_label = data['group_label']

    chosen_indices = []
    chosen_reco_indices = []

    current_batch = 0
    current_batch_selection = np.where(input_true[:, -2] == current_batch)[0]
    current_input_true = input_true[current_batch_selection]
    for r in range(len(input_reco)):
        row = input_reco[r]
        b = row[-2]
        if b != current_batch:
            current_batch = b
            current_batch_selection = np.where(
                input_true[:, -2] == current_batch)[0]
        pos = row[:3]
        region_selection = np.where((current_input_true[:, 0] == pos[0])
                                    & (current_input_true[:, 1] == pos[1]))[0]
        input_true_region = current_input_true[region_selection]
        for i in range(len(input_true_region)):
            row2 = input_true_region[i]
            pos2 = row2[:3]
            if np.array_equal(pos, pos2):
                chosen_indices.append(
                    current_batch_selection[region_selection[i]])
                chosen_reco_indices.append(r)
                break

    if len(chosen_indices) == 0:
        return None

    chosen_indices = np.array(chosen_indices)
    chosen_reco_indices = np.array(chosen_reco_indices)

    lost_data = np.delete(input_true, chosen_indices, axis=0)
    found_data = input_true[chosen_indices]

    # find where the chosen indices are in the group data
    lost_group_data = -np.ones((len(lost_data), len(lost_data[0])))
    ungrouped_data = -np.ones((len(lost_data), len(lost_data[0])))
    found_group_data = -np.ones((len(found_data), len(found_data[0])))
    for i in range(len(lost_data)):
        row = lost_data[i]
        filter0 = group_label[np.where(group_label[:, -2] == row[-2])]
        filter1 = filter0[np.where(filter0[:, 0] == row[0])]
        filter2 = filter1[np.where(filter1[:, 1] == row[1])]
        filter3 = filter2[np.where(filter2[:, 2] == row[2])]
        if len(filter3) == 0:
            ungrouped_data[i] = row
        else:
            g = filter3[0]
            lost_group_data[i] = g
    for i in range(len(found_data)):
        row = found_data[i]
        filter0 = group_label[np.where(group_label[:, -2] == row[-2])]
        filter1 = filter0[np.where(filter0[:, 0] == row[0])]
        filter2 = filter1[np.where(filter1[:, 1] == row[1])]
        filter3 = filter2[np.where(filter2[:, 2] == row[2])]
        g = filter3[0]
        found_group_data[i] = g

#     lost_group_data = np.delete(group_label, chosen_indices, axis=0)
#     found_group_data = group_label[chosen_indices]

    if ADD_MISSING_ENERGY:
        batches = np.unique(input_true[:, 3])
        for b in batches:
            # nearest neighbor assignment within group
            found_groups = np.unique(
                found_group_data[np.where(found_group_data[:, 3] == b)][:, -1])
            lost_batch_mask = lost_group_data[:, 3] == b
            found_batch_mask = found_group_data[:, 3] == b
            for g in found_groups:
                lost_selection = np.where(lost_batch_mask
                                          & (lost_group_data[:, -1] == g))[0]
                found_selection = np.where(found_batch_mask
                                           & (found_group_data[:, -1] == g))[0]
                ldata = lost_data[lost_selection]
                fdata = found_data[found_selection]
                lost_positions = ldata[:, :3]
                found_positions = fdata[:, :3]
                distances = distance_matrix(lost_positions, found_positions)
                closest_points = np.argmin(distances, axis=1)
                closest_energies = ldata[:, -1]
                for i in range(len(closest_points)):
                    found_data[found_selection[
                        closest_points[i]]][-1] += closest_energies[i]

            # associated ungrouped voxels with nearest voxels, regardless of group
            lost_ungrouped = np.where((ungrouped_data[:, 3] == b))[0]
            if len(lost_ungrouped) > 0:
                found_selection = np.where(found_batch_mask)[0]
                ldata = lost_data[lost_ungrouped]
                fdata = found_data[found_selection]
                lost_positions = ldata[:, :3]
                found_positions = fdata[:, :3]
                distances = distance_matrix(lost_positions, found_positions)
                closest_points = np.argmin(distances, axis=1)
                closest_energies = ldata[:, -1]
                for i in range(len(closest_points)):
                    found_data[found_selection[
                        closest_points[i]]][-1] += closest_energies[i]

    if BLUR_ENERGY:
        blur_kernel = 3
        for g in np.unique(found_group_data[:, -1]):
            inds = np.where(found_group_data[:, -1] == g)
            selection = found_data[inds]
            total_energy = np.sum(selection[:, -1])

            coords = selection[:, :3]
            energies = selection[:, -1]
            neigh = RadiusNeighborsRegressor(radius=blur_kernel)
            neigh.fit(coords, energies)
            selection[:, -1] = neigh.predict(coords)
            selection[:, -1] *= total_energy / np.sum(selection[:, -1])
            found_data[inds, -1] = selection[:, -1]

    segment_indices = segment_label[chosen_indices, -1].astype(int)
    segment_one_hot = np.zeros((len(segment_indices), 5))
    segment_one_hot[np.arange(len(segment_indices)), segment_indices] = 1
    out = np.concatenate((input_reco[chosen_reco_indices], segment_one_hot,
                          np.expand_dims(found_data[:, -1], axis=1)),
                         axis=1)
    return np.array(out), found_group_data[:, -1]
Пример #19
0
KNN_reg = KNeighborsRegressor(n_neighbors=6, weights='uniform')

KNN_reg.fit(x_train, y_train)

y_predict_knn = KNN_reg.predict(x_test)

y_predict_knn[0:10]

from sklearn.neighbors import RadiusNeighborsRegressor

RNN_reg = RadiusNeighborsRegressor(radius=x_train.std())

RNN_reg.fit(x_train, y_train)

y_predict_rnn = RNN_reg.predict(x_test)

y_predict_rnn[0:10]

RNN_reg = RadiusNeighborsRegressor()

RNN_reg.fit(x_train, y_train)

RNN_reg.predict(x_test)

from sklearn.metrics import mean_absolute_error, mean_squared_error

mean_absolute_error(y_test, y_predict_knn)

mean_squared_error(y_test, y_predict_knn)
Пример #20
0
neigh = RadiusNeighborsRegressor(radius = KNN_RADIUS)
neigh.fit(X, Y)

# Read Test dataset
testFiles = [file for file in os.listdir(TEST_DATASET_DIRECTORY) if str(file).find('test') >= 0]
print 'Number of test files:', len(testFiles)

TEST_Y_ALL = np.array([])
TEST_Y_ALL_PREDICTED = np.array([])
for file in testFiles:
	df = pd.read_csv(TEST_DATASET_DIRECTORY + '/' + file, header=None)		# read from the first line
	df_y = df.ix[:,columns-1]
	df_x = df.ix[:,:columns-2]
	
	X = np.array(df_x)
	Y = np.array(df_y)
	
	predictedY = neigh.predict(X)
	predictedY = np.nan_to_num(predictedY)	# important to prevent nan error
	
	TEST_Y_ALL = np.append(TEST_Y_ALL, Y)
	TEST_Y_ALL_PREDICTED = np.append(TEST_Y_ALL_PREDICTED, predictedY)
		
print 'TEST_Y_ALL size:', "{:,}".format(len(TEST_Y_ALL))

ERROR = abs(TEST_Y_ALL - TEST_Y_ALL_PREDICTED)
print 'Method: KNN for Radius=', KNN_RADIUS
mean = ERROR.mean()
print 'Mean error:',mean
Пример #21
0
df2012['rnk_2012'] = range(1,df2012.shape[0]+1)
df2012.sort('f2013', ascending=False, inplace=True)
df2012['rnk_2013'] = range(1,df2012.shape[0]+1)
#df2012.to_csv('f2013_projection_3yrs.csv', headers=True,index=True)

##########
### PROJECTIONS - 2014
##########
# Get 2011/12/13 stats for 2014 projection
y=2014
y3 = [y-1,y-2,y-3]
tms_include = np.intersect1d(df[df.Year == y3[0]].Team.values, df[df.Year == y3[2]].Team.values)
df2013 = pd.merge(df[(df.Year.isin(y3)) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), df[(df.Year == y3[0]) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), how='left',left_index=True, right_index=True, suffixes=['_3yr_avg','_yr3'])
df2013['f2014'] = lin3.predict(df2013.values)
df2013.sort('f_yr3', ascending=False, inplace=True)
df2013['rnk_2013'] = range(1,df2013.shape[0]+1)
df2013.sort('f2014', ascending=False, inplace=True)
df2013['rnk_2014'] = range(1,df2013.shape[0]+1)
#df2013.to_csv('f2014_projection_3yrs.csv', headers=True,index=True)

# Get 2011/12/13 stats for 2014 projection
y=2014
y3 = [y-1,y-2,y-3]
tms_include = np.intersect1d(df[df.Year == y3[0]].Team.values, df[df.Year == y3[2]].Team.values)
df2013 = pd.merge(df[(df.Year.isin(y3)) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), df[(df.Year == y3[0]) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), how='left',left_index=True, right_index=True, suffixes=['_3yr_avg','_yr3'])
df2013['f2014'] = rn3.predict(df2013.values)
df2013.sort('f_yr3', ascending=False, inplace=True)
df2013['rnk_2013'] = range(1,df2013.shape[0]+1)
df2013.sort('f2014', ascending=False, inplace=True)
df2013['rnk_2014'] = range(1,df2013.shape[0]+1)
df2013.to_csv('f2014_projection_3yrs_rn.csv', headers=True,index=True)
from data.transformed_data import *
from data.raw_data import data_dir
from sklearn.neighbors import RadiusNeighborsRegressor

regressor = RadiusNeighborsRegressor()
regressor.fit(train_x, train_y)

print('RadiusNeighborsRegressor rmse:{}'.format(
    RMSLE(validation_y, regressor.predict(validation_x))))

predict = regressor.predict(test[col])
test['visitors'] = np.expm1(predict)
test['visitors'] = test['visitors'].clip(lower=0.)
test[['id', 'visitors'
      ]].to_csv(data_dir + 'submission_radius_neighbors_regressor.csv',
                index=False)
Пример #23
0
    df_removed_one_x = df_all_x
    df_removed_one_x = df_removed_one_x.drop(df_removed_one_x.index[[i]])
    X = np.array(df_removed_one_x)
    #print('X')
    #print(X)

    df_x_test = df_all_x.iloc[i]
    X_TEST = np.array(df_x_test)
    #print('X_TEST')
    #print(X_TEST)

    df_removed_one_y = df_all_y
    df_removed_one_y = df_removed_one_y.drop(df_removed_one_y.index[[i]])
    Y = np.array(df_removed_one_y)
    #print('Y')
    #print(Y)

    neigh = RadiusNeighborsRegressor(radius=KNN_RADIUS)
    neigh.fit(X, Y)

    predicted_one_y = neigh.predict([X_TEST])
    predicted_one_y_2 = float(np.asarray(predicted_one_y))
    predictedY.append(predicted_one_y_2)

    print(repr(i + 1) + ' / ' + repr(row_count))
#print(predictedY)
np.savetxt("predicted_Y_KNN_RADIUS_2.csv",
           predictedY,
           delimiter=",",
           fmt='%10.10f')
Пример #24
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error
from functions import errors

data = pd.read_csv("forestfires.csv")
data = data.drop(labels=['month', 'day'], axis=1)

y = data.area
x = data.drop(labels=['area'], axis=1)

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=10)

reg = RadiusNeighborsRegressor()
reg.fit(x_train, y_train)
y_predict = reg.predict(x_test)

for i in range(len(y_predict)):
    print("pred %s act %s" % (y_predict[i], y_test.ravel()[i]))

errors(y_test, y_predict)
Пример #25
0
import pandas as pd
import numpy as np
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn import cross_validation

# Membaca data training dan test
df = pd.read_hdf(sys.argv[1])
tdf = pd.read_hdf(sys.argv[2])

# Mengubah menjadi array numpy yang digunakan scikit-learn
X_train = df.as_matrix(['lat', 'lon'])
y_train = (df.length.as_matrix())*15
X_test = tdf.as_matrix(['lat', 'lon'])
y_test = (tdf.length.as_matrix())*15

id_test = tdf.index.to_series().as_matrix()

# Inisialisasi model
model = RadiusNeighborsRegressor(radius=0.0005, weights='distance')

# Training
model.fit(X_train, y_train)

# Prediksi
y_try = model.predict(X_test)

# Penulisan hasil
resdf = pd.DataFrame({'idx': id_test, 'predict': (y_try), 'actual': (y_test)}).set_index('idx')

resdf.to_csv(sys.argv[3])
Пример #26
0
def mydist(x, y):
    distance_assignement = (0. if x[0]==y[0] else 1.)
    distance_time = (0. if x[2]==y[2] else 1.)
    distance_day = (0. if x[1]==y[1] else 1.)
    #distance_week_day = (1 if x[0]==y[0] else 0)
    #distance_time = abs(x[3] - y[3])%1440

    distance = distance_assignement + distance_time + distance_day
    return distance

#dist = neighbors.DistanceMetric.get_metric('pyfunc', func=distance)

preprocessing = fp.feature_preprocessing()
preprocessing.full_preprocess(used_columns=['ASS_ID', 'WEEK_DAY', 'TIME', 'CSPL_RECEIVED_CALLS'])
data = preprocessing.data[:1000]
Y = data['CSPL_RECEIVED_CALLS']
X = data.drop(['CSPL_RECEIVED_CALLS'], axis=1)

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, Y, test_size=0.1, random_state=0)

neigh = RadiusNeighborsRegressor(radius=0.5, metric='pyfunc', func=mydist, algorithm='auto')
print('fitting...')
neigh.fit(X_train, y_train)
print('fitted')
#error = neigh.score(X_test, y_test)

#print(error)

y_pred = neigh.predict(X_test)

Пример #27
0
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt

print("KNN ...{}".format(""))
knnreg = KNeighborsRegressor(n_neighbors=1)
knnreg.fit(data, Y)
y_KNNpred = knnreg.predict(data)
trainrms = sqrt(mean_squared_error(Y, y_KNNpred))
print("KNN PCA : trainrms {}".format(trainrms))

print("Rad ...{}".format(""))
from sklearn.neighbors import RadiusNeighborsRegressor
radreg = RadiusNeighborsRegressor(weights='distance', radius=10.3)
radreg.fit(data, Y)
y_radpred = radreg.predict(data)
trainrms = sqrt(mean_squared_error(Y, y_radpred))
print("Rad PCA : trainrms {}".format(trainrms))

#=============================================================================
# end Feature selection
#=============================================================================

# Instanciate a Gaussian Process model
#kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
print("RF ...{}".format(""))
RFregr = RandomForestRegressor(n_estimators=301,
                               random_state=0,
                               oob_score=True)

# Fit to data using Maximum Likelihood Estimation of the parameters
Пример #28
0
	train_labels = np.array(pd.read_csv('train_labels.csv', sep= ';'))
	test_labels = np.array(pd.read_csv('test_labels.csv', sep= ';'))

	inicio = time.time()

	# importar o modelo de regressão linear
	from sklearn.neighbors import RadiusNeighborsRegressor 
	from sklearn import preprocessing

	lab_enc = preprocessing.LabelEncoder()
	training_scores_encoded = lab_enc.fit_transform(train_labels)
	# treinar o modelo no conjunto de dados
	regression = RadiusNeighborsRegressor(radius=1.0).fit(train_data, training_scores_encoded)

	# prever 
	predictions_labels = regression.predict(test_data)

	fim = time.time()
	df_time = pd.DataFrame({'Execution Time:' : [fim-inicio]})

	output_path = os.path.join('/home/isadorasalles/Documents/Regressao/radius_neighbors', 'time_'+name_folder)
	df_time.to_csv(output_path, sep=';')

	from sklearn import metrics

	df_metrics = pd.DataFrame({'Mean Absolute Error' : [metrics.mean_absolute_error(test_labels, predictions_labels)], 'Mean Squared Error' : [metrics.mean_squared_error(test_labels, predictions_labels)],  
		'Root Mean Squared Error': [np.sqrt(metrics.mean_squared_error(test_labels, predictions_labels))], 'R2 Score': [metrics.r2_score(test_labels, predictions_labels)]})

	output_path = os.path.join('/home/isadorasalles/Documents/Regressao/radius_neighbors', 'metrics_'+name_folder)
	df_metrics.to_csv(output_path, sep=';')
Пример #29
0
del globals()['profilesDF']
del globals()['profiles']
del globals()['profilesLSo']
del globals()['profilesLS']
del globals()['row']
del globals()['tmpLS']
del globals()['tmpAGE']
del globals()['profsTOlikes']
del globals()['i']
del globals()['tmpIND']

seed = 7
myRand = np.random.seed(seed)
X_train, X_test, y_train, y_test = train_test_split(likesMAT,
                                                    consARR,
                                                    test_size=1500)

myRAD = float(sys.argv[1])
radNN = RadiusNeighborsRegressor(radius=myRAD)

#radNN.fit(likesMAT, consARR)
radNN.fit(X_train, y_train)

y_pred = radNN.predict(X_test)
import math
myRMSE = math.sqrt(metrics.mean_squared_error(y_test, y_pred))
print("cons, Radius neighbors:  ", str(myRAD), " ", myRMSE)

# joblib.dump(radNN, "/Users/jamster/radNN-A-cons.xz", compress=9)

# impRadNN = joblib.load("/Users/jamster/radNN-A-cons.xz")
Пример #30
0
dtr = DecisionTreeRegressor(criterion='mse')
abr = AdaBoostRegressor(n_estimators=50)
rfr = RandomForestRegressor(n_estimators=50)

svr_rbf.fit(X, y), svr_lin.fit(X, y), svr_poly.fit(X, y)
knng.fit(X, y), rng.fit(X, y), dtr.fit(X, y)
abr.fit(X, y), rfr.fit(X, y)

# 支持向量回归
y_rbf = svr_rbf.predict(X)
y_lin = svr_lin.predict(X)
y_poly = svr_poly.predict(X)

# KNN 回归
y_knng = knng.predict(X)
y_rng = rng.predict(X)

# 决策树回归
y_dtr = dtr.predict(X)

# ensemble
y_abr = abr.predict(X)
y_rfr = rfr.predict(X)

# 结果
sns.set(style='whitegrid')
colors = sns.color_palette('Set2', 8)
names = ['RBF model', 'Linear model', 'Polynomial model', 'KNR', 'RNR', 'DTR', 'ABR', 'RFR']
data_pred = [y_rbf, y_lin, y_poly, y_knng, y_rng, y_dtr, y_abr, y_rfr]
plt.figure(1)
plt.scatter(X, y, color='red', label='data')