def compute_n_crystal_found(filename, test_range=range(0, 101, 10)): X, y = read_data(filename) scores = [] for t in test_range: ind = N_INIT_POINTS - 1 + t scores.append(np.sum(y[N_INIT_POINTS:ind] == CRYSTAL_CLASS)) return test_range, scores
def compute_volume_convex_hull(filename, test_range=range(0, 101, 10)): X, y = read_data(filename) scores = [] for t in test_range: ind = N_INIT_POINTS - 1 + t X_train = X[0:ind] y_train = y[0:ind] points = X_train[y_train == CRYSTAL_CLASS, :] hull = ConvexHull(points) scores.append(hull.volume) return test_range, scores
def compute_volume_neighbors(filename, radius, test_range=range(0, 101, 10)): X, y = read_data(filename) scores = [] for t in test_range: ind = N_INIT_POINTS - 1 + t X_train = X[0:ind] y_train = y[0:ind] points = X_train[y_train == CRYSTAL_CLASS, :] tree = BallTree(points) score, _ = average_n_neighbors(tree, points, radius) scores.append(score) return test_range, scores
if len(sys.argv) != 2: print 'Please specify a root folder as argument' # seed seed = int(sys.argv[1]) random.seed(seed) np.random.seed(seed) # root_folder = os.path.join(HERE_PATH, sys.argv[1]) filetools.ensure_dir(root_folder) # load data current_datafile = os.path.join(HERE_PATH, 'init_data.csv') X, y = read_data(current_datafile) # check everything is fine np.testing.assert_array_almost_equal(np.sum(X, axis=1), TOTAL_VOLUME_IN_ML, decimal=N_DECIMAL_EQUAL) # X_selected = np.random.rand(N_GENERATED, X.shape[1]) X_selected = proba_normalize_row(X_selected) X_selected = TOTAL_VOLUME_IN_ML * X_selected # save new csv # xout X_out = np.vstack((X, X_selected))
def get_data_xp(xp_key): return read_data(FILENAMES[xp_key])
def get_new_data(filename): X, y = read_data(filename) return X[N_INIT_POINTS:, :], y[N_INIT_POINTS:]
def get_init_data(filename): X, y = read_data(filename) return X[:N_INIT_POINTS, :], y[:N_INIT_POINTS]
def get_min_sample_per_class(y): min_sample_per_class = np.inf for class_number in np.unique(y): n_sample = np.sum(y == class_number) if n_sample < min_sample_per_class: min_sample_per_class = n_sample return min_sample_per_class def compute_learning_curve(filename, (X_test, y_test), blank_clf, test_range=range(0, 101, 10)): X, y = read_data(filename) scores = [] confusions = [] for t in test_range: ind = N_INIT_POINTS - 1 + t X_train = X[0:ind] y_train = y[0:ind] clf = clone(blank_clf) clf.fit(X_train, y_train) prediction_accuracy = clf.score(X_test, y_test) scores.append(prediction_accuracy)