def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## ICA ## ica = FastICA(n_components=X_train_scl.shape[1]) X_ica = ica.fit_transform(X_train_scl) ## ## Plots ## ph = plot_helper() kurt = kurtosis(X_ica) print(kurt) title = 'Kurtosis (FastICA) for ' + data_set_name name = data_set_name.lower() + '_ica_kurt' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(kurt) + 1, 1), kurt, np.arange(1, len(kurt) + 1, 1).astype('str'), 'Feature Index', 'Kurtosis', title, filename)
def nn_analysis(self, X_train, X_test, y_train, y_test, data_set_name, analysis_name='Neural Network'): clf = MLPClassifier(activation='relu', learning_rate='constant', shuffle=True, solver='adam', random_state=0, max_iter=1000, batch_size=60) with open(self.nn_time_filename, 'a+') as text_file: t0 = time() clf.fit(X_train, y_train) text_file.write(analysis_name.lower() + ' fit time: %0.3f seconds\n' % (time() - t0)) t0 = time() y_pred = clf.predict(X_test) text_file.write(analysis_name.lower() + ' predict time: %0.3f seconds\n' % (time() - t0)) cv = StratifiedShuffleSplit(n_splits=100, test_size=0.2, random_state=0) title = 'Learning Curve (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_nn.png' filename = './' + self.out_dir + '/' + name ## ## Plots ## ph = plot_helper() ## ## Learning Curve ## ph.plot_learning_curve(clf, title, X_train, y_train, ylim=None, cv=cv, n_jobs=-1, filename=filename)
def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) ks = [] for i in range(1000): ## ## Random Projection ## rp = GaussianRandomProjection(n_components=X_train_scl.shape[1]) rp.fit(X_train_scl) X_train_rp = rp.transform(X_train_scl) ks.append(kurtosis(X_train_rp)) mean_k = np.mean(ks, 0) ## ## Plots ## ph = plot_helper() title = 'Kurtosis (Randomized Projection) for ' + data_set_name name = data_set_name.lower() + '_rp_kurt' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(mean_k) + 1, 1), mean_k, np.arange(1, len(mean_k) + 1, 1).astype('str'), 'Feature Index', 'Kurtosis', title, filename)
def pca_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## PCA ## pca = PCA(n_components=X_train_scl.shape[1], svd_solver='full') X_pca = pca.fit_transform(X_train_scl) ## ## Plots ## ph = plot_helper() ## ## Explained Variance Plot ## title = 'Explained Variance (PCA) for ' + data_set_name name = data_set_name.lower() + '_pca_evar_err' filename = './' + self.out_dir + '/' + name + '.png' self.plot_explained_variance(pca, title, filename) ## ## Reconstruction Error ## all_mses, rng = self.reconstruction_error(X_train_scl, PCA) title = 'Reconstruction Error (PCA) for ' + data_set_name name = data_set_name.lower() + '_pca_rec_err' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(rng, [all_mses.mean(0)], [all_mses.std(0)], ['mse'], ['red'], ['o'], title, 'Number of Features', 'Reconstruction Error', filename) ## ## Manually compute eigenvalues ## cov_mat = np.cov(X_train_scl.T) eigen_values, eigen_vectors = np.linalg.eig(cov_mat) print(eigen_values) sorted_eigen_values = sorted(eigen_values, reverse=True) title = 'Eigen Values (PCA) for ' + data_set_name name = data_set_name.lower() + '_pca_eigen' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar( np.arange(1, len(sorted_eigen_values) + 1, 1), sorted_eigen_values, np.arange(1, len(sorted_eigen_values) + 1, 1).astype('str'), 'Principal Components', 'Eigenvalue', title, filename) ## TODO Factor this out to new method ## ## Scatter ## '''
def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) ks = [] for i in range(1000): ## ## Random Projection ## rp = GaussianRandomProjection(n_components=X_train_scl.shape[1]) rp.fit(X_train_scl) X_train_rp = rp.transform(X_train_scl) ks.append(kurtosis(X_train_rp)) mean_k = np.mean(ks, 0) ## ## Plots ## ph = plot_helper() title = 'Kurtosis (Randomized Projection) for ' + data_set_name name = data_set_name.lower() + '_rp_kurt' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(mean_k)+1, 1), mean_k, np.arange(1, len(mean_k)+1, 1).astype('str'), 'Feature Index', 'Kurtosis', title, filename)
def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## ICA ## ica = FastICA(n_components=X_train_scl.shape[1]) X_ica = ica.fit_transform(X_train_scl) ## ## Plots ## ph = plot_helper() kurt = kurtosis(X_ica) print(kurt) title = 'Kurtosis (FastICA) for ' + data_set_name name = data_set_name.lower() + '_ica_kurt' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1), kurt, np.arange(1, len(kurt)+1, 1).astype('str'), 'Feature Index', 'Kurtosis', title, filename)
def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## Plots ## ph = plot_helper() scores = [] train_scores = [] rng = range(1, X_train_scl.shape[1]+1) for i in rng: lda = LinearDiscriminantAnalysis(n_components=i) cv = KFold(X_train_scl.shape[0], 3, shuffle=True) # cross validation cv_scores = [] for (train, test) in cv: lda.fit(X_train_scl[train], y_train[train]) score = lda.score(X_train_scl[test], y_train[test]) cv_scores.append(score) mean_score = np.mean(cv_scores) scores.append(mean_score) # train score lda = LinearDiscriminantAnalysis(n_components=i) lda.fit(X_train_scl, y_train) train_score = lda.score(X_train_scl, y_train) train_scores.append(train_score) print(i, mean_score) ## ## Score Plot ## title = 'Score Summary Plot (LDA) for ' + data_set_name name = data_set_name.lower() + '_lda_score' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(rng, [scores, train_scores], [None, None], ['cross validation score', 'training score'], cm.viridis(np.linspace(0, 1, 2)), ['o', '*'], title, 'n_components', 'Score', filename)
def run_policy_iteration_and_plot(self, grid, k, d, discount, epsilon=0.001, max_iter=10000, plot=True): ## policy iteration T, R, start, goals, r_temp = self.__convert_grid_to_mdp(grid, k, d) #pi = mdptoolbox.mdp.PolicyIteration(T, R, discount, max_iter=1000000) pi = mdptoolbox.mdp.PolicyIterationModified(T, R, discount, epsilon=epsilon, max_iter=max_iter) with open('./output/policyiter.txt', 'a') as text_file: t0 = time() pi.run() final_time = time() - t0 if plot: text_file.write('PolicyIteration: %0.3f seconds. Iters: %i\n' % (final_time, pi.iter)) p = np.array(pi.policy) p.shape = grid.shape p = p v = np.array(pi.V) v.shape = grid.shape v = v mean_v = v.mean() if d: d_str = 'dir' else: d_str = 'non-dir' if plot: ph = plot_helper() title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Grid r: ' + str( k) + '(' + d_str + '), discount: ', str(discount) fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + 'policyiter_' + str( k) + '_' + d_str + '_' + str(discount) + '.png' # ph.plot_heatmap(v, grid, p, title, fn) ph.plot_results2(v, grid, p, title, fn) return final_time, mean_v, pi.iter, pi.policy
def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## Plots ## ph = plot_helper() scores = [] train_scores = [] rng = range(1, X_train_scl.shape[1] + 1) for i in rng: lda = LinearDiscriminantAnalysis(n_components=i) cv = KFold(X_train_scl.shape[0], 3, shuffle=True) # cross validation cv_scores = [] for (train, test) in cv: lda.fit(X_train_scl[train], y_train[train]) score = lda.score(X_train_scl[test], y_train[test]) cv_scores.append(score) mean_score = np.mean(cv_scores) scores.append(mean_score) # train score lda = LinearDiscriminantAnalysis(n_components=i) lda.fit(X_train_scl, y_train) train_score = lda.score(X_train_scl, y_train) train_scores.append(train_score) print(i, mean_score) ## ## Score Plot ## title = 'Score Summary Plot (LDA) for ' + data_set_name name = data_set_name.lower() + '_lda_score' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(rng, [scores, train_scores], [None, None], ['cross validation score', 'training score'], cm.viridis(np.linspace(0, 1, 2)), ['o', '*'], title, 'n_components', 'Score', filename) '''
def run2(self): fn = './input/grid1.csv' grid = pd.read_csv(fn, header=None).values r = self.__create_reward_grid(grid) start, goals = self.__get_grid_terminals(grid) mdp = GridMDP([[-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], [-1,-100,-100,-100,-100,-100,-100,-100,-100,-100], [-1,-1,-1,-1,-1,-1,-1,-1,-1,1], [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]], terminals=[(9, 5)], init=(0, 5)) ''' mdp = GridMDP([[-0.01, -0.01, -0.01, +1.00], [-0.01, None, -0.01, -0.01], [-0.01, -0.01, -0.01, -0.01]], terminals=[(3, 1)]) mdp = GridMDP([[-1.0, +1.0]], terminals=[(0,0),(1,0)]) ''' vi = value_iteration(mdp, .01) print(vi) vi_grid = np.ndarray((10,10)) for k in vi.keys(): vi_grid[k] = vi[k] ph = plot_helper() ph.plot_heatmap(vi_grid, None, None, 'title', 'value_iter_mdp.png') pi = policy_iteration(mdp) print(pi) b = best_policy(mdp, vi) print(b)
def cluster_3d_plot(self, df, k, cls_type, data_set_name, analysis_name=None): p = list(itertools.permutations(range(df.shape[1]), 3)) print(p) for u in p: f1 = df.columns[u[0]] f2 = df.columns[u[1]] f3 = df.columns[u[2]] print('Feature1: ', f1, ', Feature2: ', f2, ', Feature3: ', f3) X_train = df.values[:,(u[0],u[1],u[2])] X_train_scl = RobustScaler().fit_transform(X_train) ## ## Cluster Routine ## if 'KMeans' in cls_type: cls = KMeans(n_clusters=k, algorithm='full') elif 'GaussianMixture' in cls_type: cls = GaussianMixture(n_components=k, covariance_type='full') else: raise AttributeError('cls_type: ' + cls_type + ' not supported.') cls.fit(X_train_scl) y_pred = cls.predict(X_train_scl) ## ## Plots ## ph = plot_helper() ## ## 3d Scatter Plot ## title = cls_type + ' Clusters 3D: ' + str(f1) + '\nvs ' + str(f2) + ' vs ' + str(f3) + ', k=' + str(k) if analysis_name != None: name = analysis_name.lower() else: name = cls_type.lower() name = data_set_name.lower() + '_' + name + '3d_cluster' filename = './' + self.out_dir + '/' + str(f1).lower() + '_' + str(f2).lower() + '_' + str(f3).lower() + '_' + str(k) + '_' + name + '_' + data_set_name + '_cluster.png' ph.plot_3d_scatter(X_train_scl[:,0], X_train_scl[:,1], X_train_scl[:,2], y_pred, f1, f2, f3, title, filename)
def run_policy_iteration_and_plot(self, grid, k, d, discount, epsilon=0.001): ## policy iteration T, R, start, goals = self.__convert_grid_to_mdp(grid, k, d) # pi = mdptoolbox.mdp.PolicyIteration(T, R, discount, max_iter=1000000) pi = mdptoolbox.mdp.PolicyIterationModified(T, R, discount, epsilon=epsilon, max_iter=1000000) with open('./output/policyiter.txt', 'a') as text_file: t0 = time() pi.run() text_file.write('PolicyIteration: %0.3f seconds. Iters: %i\n' % (time() - t0, pi.iter)) p = np.array(pi.policy) p.shape = grid.shape p = p v = np.array(pi.V) v.shape = grid.shape v = v if d: d_str = 'dir' else: d_str = 'non-dir' ph = plot_helper() title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Grid r: ' + str( k) + '(' + d_str + '), discount: ', str(discount) fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + 'policyiter_' + str(k) + '_' + d_str + '_' + str( discount) + '.png' # ph.plot_heatmap(v, grid, p, title, fn) ph.plot_results2(v, grid, p, title, fn) # ph.plot_heatmap_simple(v[::-1], title, fn) print('done')
def run2(self): fn = './input/grid1.csv' grid = pd.read_csv(fn, header=None).values r = self.__create_reward_grid(grid) start, goals = self.__get_grid_terminals(grid) mdp = GridMDP( [[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -100, -100, -100, -100, -100, -100, -100, -100, -100], [-1, -1, -1, -1, -1, -1, -1, -1, -1, 1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]], terminals=[(9, 5)], init=(0, 5)) ''' mdp = GridMDP([[-0.01, -0.01, -0.01, +1.00], [-0.01, None, -0.01, -0.01], [-0.01, -0.01, -0.01, -0.01]], terminals=[(3, 1)]) mdp = GridMDP([[-1.0, +1.0]], terminals=[(0,0),(1,0)]) ''' vi = value_iteration(mdp, .01) print(vi) vi_grid = np.ndarray((10, 10)) for k in vi.keys(): vi_grid[k] = vi[k] ph = plot_helper() ph.plot_heatmap(vi_grid, None, None, 'title', 'value_iter_mdp.png') pi = policy_iteration(mdp) print(pi) b = best_policy(mdp, vi) print(b)
def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) ks = [] for i in range(1000): ## ## Random Projection ## rp = GaussianRandomProjection(n_components=X_train_scl.shape[1]) rp.fit(X_train_scl) X_train_rp = rp.transform(X_train_scl) ks.append(kurtosis(X_train_rp)) mean_k = np.mean(ks, 0) ## ## Plots ## ph = plot_helper() title = 'Kurtosis (Randomized Projection) for ' + data_set_name name = data_set_name.lower() + '_rp_kurt' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(mean_k) + 1, 1), mean_k, np.arange(1, len(mean_k) + 1, 1).astype('str'), 'Feature Index', 'Kurtosis', title, filename) ## ## Reconstruction Error ## all_mses, rng = self.reconstruction_error(X_train_scl, GaussianRandomProjection) title = 'Reconstruction Error (RP) for ' + data_set_name name = data_set_name.lower() + '_rp_rec_err' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(rng, [all_mses.mean(0)], [all_mses.std(0)], ['mse'], ['red'], ['o'], title, 'Number of Features', 'Reconstruction Error', filename)
def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## ICA ## ica = FastICA(n_components=X_train_scl.shape[1]) X_ica = ica.fit_transform(X_train_scl) ## ## Plots ## ph = plot_helper() kurt = kurtosis(X_ica) print(kurt) title = 'Kurtosis (FastICA) for ' + data_set_name name = data_set_name.lower() + '_ica_kurt' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(kurt) + 1, 1), kurt, np.arange(1, len(kurt) + 1, 1).astype('str'), 'Feature Index', 'Kurtosis', title, filename) ## ## Reconstruction Error ## all_mses, rng = self.reconstruction_error(X_train_scl, FastICA) title = 'Reconstruction Error (ICA) for ' + data_set_name name = data_set_name.lower() + '_ica_rec_err' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(rng, [all_mses.mean(0)], [all_mses.std(0)], ['mse'], ['red'], ['o'], title, 'Number of Features', 'Reconstruction Error', filename)
def nn_analysis(self, X_train, X_test, y_train, y_test, data_set_name, analysis_name="Neural Network"): clf = MLPClassifier( activation="relu", learning_rate="constant", shuffle=True, solver="adam", random_state=0, max_iter=1000, batch_size=60, ) with open(self.nn_time_filename, "a+") as text_file: t0 = time() clf.fit(X_train, y_train) text_file.write(analysis_name.lower() + " fit time: %0.3f seconds\n" % (time() - t0)) t0 = time() y_pred = clf.predict(X_test) text_file.write(analysis_name.lower() + " predict time: %0.3f seconds\n" % (time() - t0)) cv = StratifiedShuffleSplit(n_splits=100, test_size=0.2, random_state=0) title = "Learning Curve (" + analysis_name + ") for " + data_set_name name = data_set_name.lower() + "_" + analysis_name.lower() + "_nn.png" filename = "./" + self.out_dir + "/" + name ## ## Plots ## ph = plot_helper() ## ## Learning Curve ## ph.plot_learning_curve(clf, title, X_train, y_train, ylim=None, cv=cv, n_jobs=-1, filename=filename)
def run(self): print('Running part 1') ''' grid = self.__generate_random_grid(2, 2, 0., 0.) print(grid) T, R = self.__convert_grid_to_mdp(grid) pi = mdptoolbox.mdp.PolicyIteration(T, R, 0.9) pi.run() print(pi.policy) vi = mdptoolbox.mdp.ValueIteration(T, R, 0.9) vi.run() print(vi.V) print(vi.policy) print(vi.iter) ''' # self.__test_movement() for grid_file in ['./input/grid2.csv']: # fn = './input/grid1.csv' grid = pd.read_csv(grid_file, header=None).values ph = plot_helper() title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Grid Layout' fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + '_layout.png' ph.plot_layout(grid, title, fn) #self.run_value_iteration_and_plot(grid, k=1.0, d=True, discount=0.9, epsilon=0.00001) # self.run_policy_iteration_and_plot(grid, k=1.0, d=True, discount=0.9, epsilon=0.00001) #self.run_value_iteration_and_plot(grid, k=1.0, d=True, discount=0.9, epsilon=0.00001) self.run_value_iteration_and_plot(grid, k=0.9, d=True, discount=0.9, epsilon=0.00001) self.run_value_iteration_and_plot(grid, k=0.8, d=True, discount=0.9, epsilon=0.00001) self.run_policy_iteration_and_plot(grid, k=0.9, d=True, discount=0.9, epsilon=0.00001) self.run_policy_iteration_and_plot(grid, k=0.8, d=True, discount=0.9, epsilon=0.00001) # self.run_and_plot_qlearner(grid, d=True, k=1.0, alpha=0.2, gamma=0.8, rar=0.99, rard=0.999999, n_restarts=5000, n_iter=1000000) ''' for k in [1.00, 0.90, 0.85, 0.80, 0.75]: for d in [False, True]: for discount in [0.9, 0.8, 0.7, 0.6]: self.run_value_iteration_and_plot(grid, k=k, d=d, discount=discount) for k in [1.00, 0.90, 0.85, 0.80, 0.75]: for d in [False, True]: for discount in [0.9, 0.8, 0.7, 0.6]: self.run_policy_iteration_and_plot(grid, k=k, d=d, discount=discount) ''' ''' for k in [1.00, 0.90, 0.85, 0.80, 0.75]: for d in [False, True]: for alpha in [0.1, 0.3, 0.5, 0.7, 0.9]: for gamma in [1.0, 0.8, 0.6, 0.4, 0.2]: for rard in [0.99, 0.9999, 0.999999]: self.run_and_plot_qlearner(grid, d, k, alpha, gamma, rar=0.99, rard=rard) ''' print('done qlearner') grid_file = './input/grid1.csv' grid = pd.read_csv(grid_file, header=None).values self.run_and_plot_qlearner(grid, d=True, k=0.8, alpha=0.2, gamma=0.8, rar=0.99, rard=0.99, n_restarts=100, n_iter=1000000) grid_file = './input/grid2.csv' grid = pd.read_csv(grid_file, header=None).values self.run_and_plot_qlearner(grid, d=True, k=0.8, alpha=0.2, gamma=0.8, rar=0.99, rard=0.99, n_restarts=300, n_iter=1000000) self.run_and_plot_qlearner(grid, d=True, k=0.8, alpha=0.2, gamma=0.8, rar=0.99, rard=0.999999, n_restarts=5000, n_iter=1000000) self.run_and_plot_qlearner(grid, d=True, k=0.8, alpha=0.1, gamma=0.8, rar=0.99, rard=0.99, n_restarts=300, n_iter=1000000) self.run_and_plot_qlearner(grid, d=True, k=0.8, alpha=0.1, gamma=0.8, rar=0.99, rard=0.999999, n_restarts=5000, n_iter=1000000) '''
def run_and_plot_qlearner(self, grid, d, k, alpha, gamma, rar, rard=0.99, n_restarts=1000, n_iter=100000): T, R, start, goals = self.__convert_grid_to_mdp(grid, k, d) q = QLearningEx(T, R, grid, start, goals, n_restarts=n_restarts, alpha=alpha, gamma=gamma, rar=rar, radr=rard, n_iter=n_iter) # q = mdptoolbox.mdp.QLearning(T, R, 0.9) q.run() print(q.Q) p = np.array(q.policy) p.shape = grid.shape p = p v = np.array(q.V) v.shape = grid.shape v = v if d: d_str = 'dir' else: d_str = 'non-dir' ph = plot_helper() title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Tracker\na: ' + str(q.alpha) + ', g: ' + str( q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str( q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + 'tracker_' + str(q.alpha) + '_' + str( q.gamma) + '_' + str(q.orig_rar) + '_' + str( q.radr) + '_' + str(k) + '_' + d_str + '.png' # tracker = normalize(q.tracker[::-1], axis=1, norm='l1') ph.plot_heatmap_simple(q.tracker[::-1], title, fn) title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Iterations\na: ' + str(q.alpha) + ', g: ' + str( q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str( q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + 'iterations_' + str(q.alpha) + '_' + str( q.gamma) + '_' + str(q.orig_rar) + '_' + str( q.radr) + '_' + str(k) + '_' + d_str + '.png' ph.plot_series( range(len(q.episode_iterations)), [q.episode_iterations], [None], ['iterations'], # cm.viridis(np.linspace(0, 1, 1)), ['black'], [''], title, 'Episodes', 'Iterations', fn) title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Rewards/Iterations\na: ' + str( q.alpha) + ', g: ' + str(q.gamma) + ', d: ' + str( q.orig_rar) + '@' + str( q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + 'rewards_iterations_' + str(q.alpha) + '_' + str( q.gamma) + '_' + str(q.orig_rar) + '_' + str( q.radr) + '_' + str(k) + '_' + d_str + '.png' ph.plot_series( range(len(q.episode_reward)), [q.episode_reward, q.episode_iterations], [None, None], ['rewards', 'iterations'], # cm.viridis(np.linspace(0, 1, 1)), ['black', 'blue'], ['', ''], title, 'Episodes', 'Rewards/Iterations', fn) title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Rewards\na: ' + str(q.alpha) + ', g: ' + str( q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str( q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + 'rewards_' + str(q.alpha) + '_' + str( q.gamma) + '_' + str(q.orig_rar) + '_' + str( q.radr) + '_' + str(k) + '_' + d_str + '.png' ph.plot_series( range(len(q.episode_reward)), [q.episode_reward], [None], ['rewards'], # cm.viridis(np.linspace(0, 1, 1)), ['black'], [''], title, 'Episodes', 'Rewards', fn) title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Timing\na: ' + str(q.alpha) + ', g: ' + str( q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str( q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + 'timing_' + str(q.alpha) + '_' + str( q.gamma) + '_' + str(q.orig_rar) + '_' + str( q.radr) + '_' + str(k) + '_' + d_str + '.png' ph.plot_series( range(len(q.episode_times)), [q.episode_times], [None], ['seconds'], # cm.viridis(np.linspace(0, 1, 1)), ['black'], [''], title, 'Episodes', 'Time in seconds', fn) title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Grid\na: ' + str(q.alpha) + ', g: ' + str( q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str( q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + 'qlearn_' + str(q.alpha) + '_' + str( q.gamma) + '_' + str(q.orig_rar) + '_' + str( q.radr) + '_' + str(k) + '_' + d_str + '.png' # ph.plot_heatmap(v, grid[::-1], p, title, fn) ph.plot_results2(v, grid, p, title, fn) '''
def gmm_analysis(self, X_train, X_test, y_train, y_test, data_set_name, max_clusters, analysis_name='GMM'): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) em_bic = [] em_aic = [] em_completeness_score = [] em_homogeneity_score = [] em_measure_score = [] em_adjusted_rand_score = [] em_adjusted_mutual_info_score = [] cluster_range = np.arange(2, max_clusters+1, 1) for k in cluster_range: print('K Clusters: ', k) ## ## Expectation Maximization ## em = GaussianMixture(n_components=k, covariance_type='full') em.fit(X_train_scl) em_pred = em.predict(X_train_scl) em_bic.append(em.bic(X_train_scl)) em_aic.append(em.aic(X_train_scl)) # metrics y_train_score = y_train.reshape(y_train.shape[0],) em_homogeneity_score.append(homogeneity_score(y_train_score, em_pred)) em_completeness_score.append(completeness_score(y_train_score, em_pred)) em_measure_score.append(v_measure_score(y_train_score, em_pred)) em_adjusted_rand_score.append(adjusted_rand_score(y_train_score, em_pred)) em_adjusted_mutual_info_score.append(adjusted_mutual_info_score(y_train_score, em_pred)) ## ## Plots ## ph = plot_helper() ## ## BIC/AIC Plot ## title = 'Information Criterion Plot (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_ic' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(cluster_range, [em_bic, em_aic], [None, None], ['bic', 'aic'], cm.viridis(np.linspace(0, 1, 2)), ['o', '*'], title, 'Number of Clusters', 'Information Criterion', filename) ## ## Score Plot ## title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_score' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(cluster_range, [em_homogeneity_score, em_completeness_score, em_measure_score, em_adjusted_rand_score, em_adjusted_mutual_info_score], [None, None, None, None, None, None], ['homogeneity', 'completeness', 'measure', 'adjusted_rand', 'adjusted_mutual_info'], cm.viridis(np.linspace(0, 1, 5)), ['o', '^', 'v', '>', '<', '1'], title, 'Number of Clusters', 'Score', filename)
def run(self): print('Running part 1') ''' grid = self.__generate_random_grid(2, 2, 0., 0.) print(grid) T, R = self.__convert_grid_to_mdp(grid) pi = mdptoolbox.mdp.PolicyIteration(T, R, 0.9) pi.run() print(pi.policy) vi = mdptoolbox.mdp.ValueIteration(T, R, 0.9) vi.run() print(vi.V) print(vi.policy) print(vi.iter) ''' for grid_file in ['./input/grid1.csv', './input/grid2.csv']: #fn = './input/grid1.csv' grid = pd.read_csv(grid_file, header=None).values ph = plot_helper() title = str(grid.shape[0]) + 'x' + str(grid.shape[1]) + ' Grid Layout' fn = './output/' + str(grid.shape[0]) + 'x' + str(grid.shape[1]) + '_layout.png' ph.plot_layout(grid, title, fn) for k in [1.00, 0.90, 0.85, 0.80, 0.75]: for d in [False, True]: for alpha in [0.1, 0.2, 0.3]: for gamma in [1.0, 0.9, 0.8]: for rar in [1.0, 0.9, 0.8, 0.7]: T, R, start, goals = self.__convert_grid_to_mdp(grid, k, d) #rar = 0.9 q = QLearningEx(T, R, grid, start, goals, n_restarts=1000, alpha = alpha, gamma = gamma, rar = rar, radr = 0.99, n_iter=100000) q.run() print(q.Q) p = np.array(q.policy) p.shape = grid.shape p = p v = np.array(q.V)[::-1] v.shape = grid.shape v = v if d: d_str = 'dir' else: d_str = 'non-dir' title = str(grid.shape[0]) + 'x' + str(grid.shape[1]) + ' Tracker\na: ' + str(q.alpha) + ', g: ' + str(q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str(grid.shape[1]) + 'tracker_' + str(q.alpha) + '_' + str(q.gamma) + '_' + str(q.orig_rar) + '_' + str(q.radr) + '_' + str(k) + '_' + d_str + '.png' #tracker = normalize(q.tracker[::-1], axis=1, norm='l1') ph.plot_heatmap_simple(q.tracker[::-1], title, fn) title = str(grid.shape[0]) + 'x' + str(grid.shape[1]) + ' Iterations\na: ' + str(q.alpha) + ', g: ' + str(q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str(grid.shape[1]) + 'iterations_' + str(q.alpha) + '_' + str(q.gamma) + '_' + str(q.orig_rar) + '_' + str(q.radr) + '_' + str(k) + '_' + d_str + '.png' ph.plot_series(range(len(q.episode_iterations)), [q.episode_iterations], [None], ['iterations'], cm.viridis(np.linspace(0, 1, 1)), [''], title, 'Iterations', 'Episodes', fn) title = str(grid.shape[0]) + 'x' + str(grid.shape[1]) + ' Grid\na: ' + str(q.alpha) + ', g: ' + str(q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(q.radr) + ', r: ' + str(k) + '(' + d_str + ')' fn = './output/' + str(grid.shape[0]) + 'x' + str(grid.shape[1]) + 'qlearn_' + str(q.alpha) + '_' + str(q.gamma) + '_' + str(q.orig_rar) + '_' + str(q.radr) + '_' + str(k) + '_' + d_str + '.png' ph.plot_heatmap(v, grid[::-1], p, title, fn) print('done qlearner') '''
def gmm_analysis(self, X_train, X_test, y_train, y_test, data_set_name, max_clusters, analysis_name='GMM'): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) em_bic = [] em_aic = [] em_completeness_score = [] em_homogeneity_score = [] em_measure_score = [] em_adjusted_rand_score = [] em_adjusted_mutual_info_score = [] cluster_range = np.arange(2, max_clusters + 1, 1) for k in cluster_range: print('K Clusters: ', k) ## ## Expectation Maximization ## em = GaussianMixture(n_components=k, covariance_type='full') em.fit(X_train_scl) em_pred = em.predict(X_train_scl) em_bic.append(em.bic(X_train_scl)) em_aic.append(em.aic(X_train_scl)) # metrics y_train_score = y_train.reshape(y_train.shape[0], ) em_homogeneity_score.append( homogeneity_score(y_train_score, em_pred)) em_completeness_score.append( completeness_score(y_train_score, em_pred)) em_measure_score.append(v_measure_score(y_train_score, em_pred)) em_adjusted_rand_score.append( adjusted_rand_score(y_train_score, em_pred)) em_adjusted_mutual_info_score.append( adjusted_mutual_info_score(y_train_score, em_pred)) ## ## Plots ## ph = plot_helper() ## ## BIC/AIC Plot ## title = 'Information Criterion Plot (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_ic' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(cluster_range, [em_bic, em_aic], [None, None], ['bic', 'aic'], cm.viridis(np.linspace(0, 1, 2)), ['o', '*'], title, 'Number of Clusters', 'Information Criterion', filename) ## ## Score Plot ## title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_score' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(cluster_range, [ em_homogeneity_score, em_completeness_score, em_measure_score, em_adjusted_rand_score, em_adjusted_mutual_info_score ], [None, None, None, None, None, None], [ 'homogeneity', 'completeness', 'measure', 'adjusted_rand', 'adjusted_mutual_info' ], cm.viridis(np.linspace(0, 1, 5)), ['o', '^', 'v', '>', '<', '1'], title, 'Number of Clusters', 'Score', filename)
def kmeans_analysis(self, X_train, X_test, y_train, y_test, data_set_name, max_clusters, analysis_name='K-Means'): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) km_inertias = [] km_completeness_score = [] km_homogeneity_score = [] km_measure_score = [] km_adjusted_rand_score = [] km_adjusted_mutual_info_score = [] cluster_range = np.arange(2, max_clusters + 1, 1) for k in cluster_range: print('K Clusters: ', k) ## ## KMeans ## km = KMeans(n_clusters=k, algorithm='full', n_jobs=-1) km.fit(X_train_scl) # inertia is the sum of distances from each point to its center km_inertias.append(km.inertia_) # metrics y_train_score = y_train.reshape(y_train.shape[0], ) km_homogeneity_score.append( homogeneity_score(y_train_score, km.labels_)) km_completeness_score.append( completeness_score(y_train_score, km.labels_)) km_measure_score.append(v_measure_score(y_train_score, km.labels_)) km_adjusted_rand_score.append( adjusted_rand_score(y_train_score, km.labels_)) km_adjusted_mutual_info_score.append( adjusted_mutual_info_score(y_train_score, km.labels_)) ## ## Silhouette Plot ## title = 'Silhouette Plot (' + analysis_name + ', k=' + str( k) + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower( ) + '_silhouette_' + str(k) filename = './' + self.out_dir + '/' + name + '.png' self.silhouette_plot(X_train_scl, km.labels_, title, filename) ## ## Plots ## ph = plot_helper() ## ## Elbow Plot ## title = 'Elbow Plot (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_elbow' filename = './' + self.out_dir + '/' + name + '.png' # line to help visualize the elbow lin = ph.extended_line_from_first_two_points(km_inertias, 0, 2) ph.plot_series(cluster_range, [km_inertias, lin], [None, None], ['inertia', 'projected'], cm.viridis(np.linspace(0, 1, 2)), ['o', ''], title, 'Number of Clusters', 'Inertia', filename) ## ## Score Plot ## title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_score' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(cluster_range, [ km_homogeneity_score, km_completeness_score, km_measure_score, km_adjusted_rand_score, km_adjusted_mutual_info_score ], [None, None, None, None, None, None], [ 'homogeneity', 'completeness', 'measure', 'adjusted_rand', 'adjusted_mutual_info' ], cm.viridis(np.linspace(0, 1, 5)), ['o', '^', 'v', '>', '<', '1'], title, 'Number of Clusters', 'Score', filename)
def run(self): print('Running part 1') #self.__test_movement() for grid_file in ['./input/grid1.csv', './input/grid2.csv']: if grid_file == './input/grid1.csv': grid_name = 'small_grid' else: grid_name = 'large_grid' grid = pd.read_csv(grid_file, header=None).values ph = plot_helper() title = str(grid.shape[0]) + 'x' + str( grid.shape[1]) + ' Grid Layout' fn = './output/' + str(grid.shape[0]) + 'x' + str( grid.shape[1]) + '_layout.png' ph.plot_layout(grid, title, fn) self.run_value_iteration_and_plot(grid, k=0.8, d=True, discount=0.95, epsilon=0.00001) self.run_policy_iteration_and_plot(grid, k=0.8, d=True, discount=0.95, epsilon=0.00001) self.run_value_iteration_and_plot(grid, k=1.0, d=True, discount=0.95, epsilon=0.00001) self.run_value_iteration_and_plot(grid, k=0.8, d=True, discount=0.95, epsilon=0.00001) self.run_value_iteration_and_plot(grid, k=0.7, d=True, discount=0.95, epsilon=0.00001) print("Start qlearner") grid_file = './input/grid1.csv' grid = pd.read_csv(grid_file, header=None).values self.run_and_plot_qlearner(grid, d=True, k=0.8, alpha=0.2, gamma=0.8, rar=0.99, rard=0.99, n_restarts=100, n_iter=1000000) grid_file = './input/grid2.csv' grid = pd.read_csv(grid_file, header=None).values self.run_and_plot_qlearner(grid, d=True, k=0.8, alpha=0.2, gamma=0.8, rar=0.99, rard=0.99, n_restarts=300, n_iter=1000000) self.run_and_plot_qlearner(grid, d=True, k=0.8, alpha=0.2, gamma=0.8, rar=0.99, rard=0.999999, n_restarts=300, n_iter=1000000)
def kmeans_analysis(self, X_train, X_test, y_train, y_test, data_set_name, max_clusters, analysis_name='K-Means'): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) km_inertias = [] km_completeness_score = [] km_homogeneity_score = [] km_measure_score = [] km_adjusted_rand_score = [] km_adjusted_mutual_info_score = [] cluster_range = np.arange(2, max_clusters+1, 1) for k in cluster_range: print('K Clusters: ', k) ## ## KMeans ## km = KMeans(n_clusters=k, algorithm='full', n_jobs=-1) km.fit(X_train_scl) # inertia is the sum of distances from each point to its center km_inertias.append(km.inertia_) # metrics y_train_score = y_train.reshape(y_train.shape[0],) km_homogeneity_score.append(homogeneity_score(y_train_score, km.labels_)) km_completeness_score.append(completeness_score(y_train_score, km.labels_)) km_measure_score.append(v_measure_score(y_train_score, km.labels_)) km_adjusted_rand_score.append(adjusted_rand_score(y_train_score, km.labels_)) km_adjusted_mutual_info_score.append(adjusted_mutual_info_score(y_train_score, km.labels_)) ## ## Silhouette Plot ## title = 'Silhouette Plot (' + analysis_name + ', k=' + str(k) + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_silhouette_' + str(k) filename = './' + self.out_dir + '/' + name + '.png' self.silhouette_plot(X_train_scl, km.labels_, title, filename) ## ## Plots ## ph = plot_helper() ## ## Elbow Plot ## title = 'Elbow Plot (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_elbow' filename = './' + self.out_dir + '/' + name + '.png' # line to help visualize the elbow lin = ph.extended_line_from_first_two_points(km_inertias, 0, 2) ph.plot_series(cluster_range, [km_inertias, lin], [None, None], ['inertia', 'projected'], cm.viridis(np.linspace(0, 1, 2)), ['o', ''], title, 'Number of Clusters', 'Inertia', filename) ## ## Score Plot ## title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name name = data_set_name.lower() + '_' + analysis_name.lower() + '_score' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(cluster_range, [km_homogeneity_score, km_completeness_score, km_measure_score, km_adjusted_rand_score, km_adjusted_mutual_info_score], [None, None, None, None, None, None], ['homogeneity', 'completeness', 'measure', 'adjusted_rand', 'adjusted_mutual_info'], cm.viridis(np.linspace(0, 1, 5)), ['o', '^', 'v', '>', '<', '1'], title, 'Number of Clusters', 'Score', filename)
def pca_analysis(self, X_train, X_test, y_train, y_test, data_set_name): scl = RobustScaler() X_train_scl = scl.fit_transform(X_train) X_test_scl = scl.transform(X_test) ## ## PCA ## pca = PCA(n_components=X_train_scl.shape[1], svd_solver='full') X_pca = pca.fit_transform(X_train_scl) ## ## Plots ## ph = plot_helper() ## ## Explained Variance Plot ## title = 'Explained Variance (PCA) for ' + data_set_name name = data_set_name.lower() + '_pca_evar_err' filename = './' + self.out_dir + '/' + name + '.png' self.plot_explained_variance(pca, title, filename) ## ## Reconstruction Error ## all_mses, rng = self.reconstruction_error(X_train_scl, PCA) title = 'Reconstruction Error (PCA) for ' + data_set_name name = data_set_name.lower() + '_pca_rec_err' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_series(rng, [all_mses.mean(0)], [all_mses.std(0)], ['mse'], ['red'], ['o'], title, 'Number of Features', 'Mean Squared Error', filename) ## ## Manually compute eigenvalues ## cov_mat = np.cov(X_train_scl.T) eigen_values, eigen_vectors = np.linalg.eig(cov_mat) print(eigen_values) sorted_eigen_values = sorted(eigen_values, reverse=True) title = 'Eigen Values (PCA) for ' + data_set_name name = data_set_name.lower() + '_pca_eigen' filename = './' + self.out_dir + '/' + name + '.png' ph.plot_simple_bar(np.arange(1, len(sorted_eigen_values)+1, 1), sorted_eigen_values, np.arange(1, len(sorted_eigen_values)+1, 1).astype('str'), 'Principal Components', 'Eigenvalue', title, filename) ## TODO Factor this out to new method ## ## Scatter ## '''