Exemplo n.º 1
0
    def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)

        ##
        ## ICA
        ##
        ica = FastICA(n_components=X_train_scl.shape[1])
        X_ica = ica.fit_transform(X_train_scl)

        ##
        ## Plots
        ##
        ph = plot_helper()

        kurt = kurtosis(X_ica)
        print(kurt)

        title = 'Kurtosis (FastICA) for ' + data_set_name
        name = data_set_name.lower() + '_ica_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_simple_bar(np.arange(1,
                                     len(kurt) + 1, 1), kurt,
                           np.arange(1,
                                     len(kurt) + 1, 1).astype('str'),
                           'Feature Index', 'Kurtosis', title, filename)
Exemplo n.º 2
0
 def nn_analysis(self, X_train, X_test, y_train, y_test, data_set_name, analysis_name='Neural Network'):
     
     clf = MLPClassifier(activation='relu',
                         learning_rate='constant',
                         shuffle=True,
                         solver='adam',
                         random_state=0,
                         max_iter=1000,
                         batch_size=60)
     
     with open(self.nn_time_filename, 'a+') as text_file:
         t0 = time()
         clf.fit(X_train, y_train)
         text_file.write(analysis_name.lower() + ' fit time: %0.3f seconds\n' % (time() - t0))
         
         t0 = time()
         y_pred = clf.predict(X_test)
         text_file.write(analysis_name.lower() + ' predict time: %0.3f seconds\n' % (time() - t0))
         
     cv = StratifiedShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
     
     title = 'Learning Curve (' + analysis_name + ') for ' + data_set_name
     name = data_set_name.lower() + '_' + analysis_name.lower() + '_nn.png'
     filename = './' + self.out_dir + '/' + name
     
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     ##
     ## Learning Curve
     ##
     ph.plot_learning_curve(clf, title, X_train, y_train, ylim=None, cv=cv, n_jobs=-1, filename=filename)
Exemplo n.º 3
0
    def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)

        ks = []
        for i in range(1000):
            ##
            ## Random Projection
            ##
            rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
            rp.fit(X_train_scl)
            X_train_rp = rp.transform(X_train_scl)

            ks.append(kurtosis(X_train_rp))

        mean_k = np.mean(ks, 0)

        ##
        ## Plots
        ##
        ph = plot_helper()

        title = 'Kurtosis (Randomized Projection) for ' + data_set_name
        name = data_set_name.lower() + '_rp_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_simple_bar(np.arange(1,
                                     len(mean_k) + 1, 1), mean_k,
                           np.arange(1,
                                     len(mean_k) + 1, 1).astype('str'),
                           'Feature Index', 'Kurtosis', title, filename)
Exemplo n.º 4
0
    def pca_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)

        ##
        ## PCA
        ##
        pca = PCA(n_components=X_train_scl.shape[1], svd_solver='full')
        X_pca = pca.fit_transform(X_train_scl)

        ##
        ## Plots
        ##
        ph = plot_helper()

        ##
        ## Explained Variance Plot
        ##
        title = 'Explained Variance (PCA) for ' + data_set_name
        name = data_set_name.lower() + '_pca_evar_err'
        filename = './' + self.out_dir + '/' + name + '.png'
        self.plot_explained_variance(pca, title, filename)

        ##
        ## Reconstruction Error
        ##
        all_mses, rng = self.reconstruction_error(X_train_scl, PCA)

        title = 'Reconstruction Error (PCA) for ' + data_set_name
        name = data_set_name.lower() + '_pca_rec_err'
        filename = './' + self.out_dir + '/' + name + '.png'
        ph.plot_series(rng, [all_mses.mean(0)], [all_mses.std(0)], ['mse'],
                       ['red'], ['o'], title, 'Number of Features',
                       'Reconstruction Error', filename)

        ##
        ## Manually compute eigenvalues
        ##
        cov_mat = np.cov(X_train_scl.T)
        eigen_values, eigen_vectors = np.linalg.eig(cov_mat)
        print(eigen_values)
        sorted_eigen_values = sorted(eigen_values, reverse=True)

        title = 'Eigen Values (PCA) for ' + data_set_name
        name = data_set_name.lower() + '_pca_eigen'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_simple_bar(
            np.arange(1,
                      len(sorted_eigen_values) + 1, 1), sorted_eigen_values,
            np.arange(1,
                      len(sorted_eigen_values) + 1, 1).astype('str'),
            'Principal Components', 'Eigenvalue', title, filename)

        ## TODO Factor this out to new method
        ##
        ## Scatter
        ##
        '''
Exemplo n.º 5
0
 def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     
     ks = []
     for i in range(1000):
         ##
         ## Random Projection
         ##
         rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
         rp.fit(X_train_scl)
         X_train_rp = rp.transform(X_train_scl)
         
         ks.append(kurtosis(X_train_rp))
         
     mean_k = np.mean(ks, 0)
         
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     title = 'Kurtosis (Randomized Projection) for ' + data_set_name
     name = data_set_name.lower() + '_rp_kurt'
     filename = './' + self.out_dir + '/' + name + '.png'
     
     ph.plot_simple_bar(np.arange(1, len(mean_k)+1, 1),
                        mean_k,
                        np.arange(1, len(mean_k)+1, 1).astype('str'),
                        'Feature Index',
                        'Kurtosis',
                        title,
                        filename)
Exemplo n.º 6
0
    def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)
        
        ##
        ## ICA
        ##
        ica = FastICA(n_components=X_train_scl.shape[1])
        X_ica = ica.fit_transform(X_train_scl)
        
        ##
        ## Plots
        ##
        ph = plot_helper()

        kurt = kurtosis(X_ica)
        print(kurt)
        
        title = 'Kurtosis (FastICA) for ' + data_set_name
        name = data_set_name.lower() + '_ica_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'
        
        ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1),
                           kurt,
                           np.arange(1, len(kurt)+1, 1).astype('str'),
                           'Feature Index',
                           'Kurtosis',
                           title,
                           filename)
Exemplo n.º 7
0
 def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     scores = []
     train_scores = []
     rng = range(1, X_train_scl.shape[1]+1)
     for i in rng:
         lda = LinearDiscriminantAnalysis(n_components=i)
         cv = KFold(X_train_scl.shape[0], 3, shuffle=True)
         
         # cross validation
         cv_scores = []
         for (train, test) in cv:
             lda.fit(X_train_scl[train], y_train[train])
             score = lda.score(X_train_scl[test], y_train[test])
             cv_scores.append(score)
         
         mean_score = np.mean(cv_scores)
         scores.append(mean_score)
         
         # train score
         lda = LinearDiscriminantAnalysis(n_components=i)
         lda.fit(X_train_scl, y_train)
         train_score = lda.score(X_train_scl, y_train)
         train_scores.append(train_score)
         
         print(i, mean_score)
         
     ##
     ## Score Plot
     ##
     title = 'Score Summary Plot (LDA) for ' + data_set_name
     name = data_set_name.lower() + '_lda_score'
     filename = './' + self.out_dir + '/' + name + '.png'
                 
     ph.plot_series(rng,
                    [scores, train_scores],
                    [None, None],
                    ['cross validation score', 'training score'],
                    cm.viridis(np.linspace(0, 1, 2)),
                    ['o', '*'],
                    title,
                    'n_components',
                    'Score',
                    filename)
Exemplo n.º 8
0
    def run_policy_iteration_and_plot(self,
                                      grid,
                                      k,
                                      d,
                                      discount,
                                      epsilon=0.001,
                                      max_iter=10000,
                                      plot=True):
        ## policy iteration
        T, R, start, goals, r_temp = self.__convert_grid_to_mdp(grid, k, d)
        #pi = mdptoolbox.mdp.PolicyIteration(T, R, discount, max_iter=1000000)
        pi = mdptoolbox.mdp.PolicyIterationModified(T,
                                                    R,
                                                    discount,
                                                    epsilon=epsilon,
                                                    max_iter=max_iter)
        with open('./output/policyiter.txt', 'a') as text_file:
            t0 = time()
            pi.run()
            final_time = time() - t0
            if plot:
                text_file.write('PolicyIteration: %0.3f seconds. Iters: %i\n' %
                                (final_time, pi.iter))

        p = np.array(pi.policy)
        p.shape = grid.shape
        p = p

        v = np.array(pi.V)
        v.shape = grid.shape
        v = v
        mean_v = v.mean()
        if d:
            d_str = 'dir'
        else:
            d_str = 'non-dir'
        if plot:
            ph = plot_helper()
            title = str(grid.shape[0]) + 'x' + str(
                grid.shape[1]) + ' Grid r: ' + str(
                    k) + '(' + d_str + '), discount: ', str(discount)
            fn = './output/' + str(grid.shape[0]) + 'x' + str(
                grid.shape[1]) + 'policyiter_' + str(
                    k) + '_' + d_str + '_' + str(discount) + '.png'
            # ph.plot_heatmap(v, grid, p, title, fn)
            ph.plot_results2(v, grid, p, title, fn)
        return final_time, mean_v, pi.iter, pi.policy
Exemplo n.º 9
0
    def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)

        ##
        ## Plots
        ##
        ph = plot_helper()

        scores = []
        train_scores = []
        rng = range(1, X_train_scl.shape[1] + 1)
        for i in rng:
            lda = LinearDiscriminantAnalysis(n_components=i)
            cv = KFold(X_train_scl.shape[0], 3, shuffle=True)

            # cross validation
            cv_scores = []
            for (train, test) in cv:
                lda.fit(X_train_scl[train], y_train[train])
                score = lda.score(X_train_scl[test], y_train[test])
                cv_scores.append(score)

            mean_score = np.mean(cv_scores)
            scores.append(mean_score)

            # train score
            lda = LinearDiscriminantAnalysis(n_components=i)
            lda.fit(X_train_scl, y_train)
            train_score = lda.score(X_train_scl, y_train)
            train_scores.append(train_score)

            print(i, mean_score)

        ##
        ## Score Plot
        ##
        title = 'Score Summary Plot (LDA) for ' + data_set_name
        name = data_set_name.lower() + '_lda_score'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_series(rng, [scores, train_scores], [None, None],
                       ['cross validation score', 'training score'],
                       cm.viridis(np.linspace(0, 1, 2)), ['o', '*'], title,
                       'n_components', 'Score', filename)
        '''
Exemplo n.º 10
0
    def run2(self):
        fn = './input/grid1.csv'
        grid = pd.read_csv(fn, header=None).values
        
        r = self.__create_reward_grid(grid)
        start, goals = self.__get_grid_terminals(grid)
        
        mdp = GridMDP([[-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
                       [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
                       [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
                       [-1,-100,-100,-100,-100,-100,-100,-100,-100,-100],
                       [-1,-1,-1,-1,-1,-1,-1,-1,-1,1],
                       [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
                       [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
                       [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
                       [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],
                       [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]],
                      terminals=[(9, 5)],
                      init=(0, 5))
        '''
        mdp = GridMDP([[-0.01, -0.01, -0.01, +1.00],
                       [-0.01, None,  -0.01, -0.01],
                       [-0.01, -0.01, -0.01, -0.01]],
                      terminals=[(3, 1)])

        mdp = GridMDP([[-1.0, +1.0]],
                      terminals=[(0,0),(1,0)])
        '''
        
        vi = value_iteration(mdp, .01)
        print(vi)
        
        vi_grid = np.ndarray((10,10))
        for k in vi.keys():
            vi_grid[k] = vi[k]
            
            
        ph = plot_helper()
        ph.plot_heatmap(vi_grid, None, None, 'title', 'value_iter_mdp.png')
        
            
        pi = policy_iteration(mdp)
        print(pi)
        
        b = best_policy(mdp, vi)
        print(b)
Exemplo n.º 11
0
    def cluster_3d_plot(self, df, k, cls_type, data_set_name, analysis_name=None):
        p = list(itertools.permutations(range(df.shape[1]), 3))

        print(p)
        
        for u in p:            
            f1 = df.columns[u[0]]
            f2 = df.columns[u[1]]
            f3 = df.columns[u[2]]
            
            print('Feature1: ', f1, ', Feature2: ', f2, ', Feature3: ', f3)
            X_train = df.values[:,(u[0],u[1],u[2])]
            X_train_scl = RobustScaler().fit_transform(X_train)
            
            ##
            ## Cluster Routine
            ##
            if 'KMeans' in cls_type:
                cls = KMeans(n_clusters=k, algorithm='full')
            elif 'GaussianMixture' in cls_type:
                cls = GaussianMixture(n_components=k, covariance_type='full')
            else:
                raise AttributeError('cls_type: ' + cls_type + ' not supported.')
                                     
            cls.fit(X_train_scl)
            y_pred = cls.predict(X_train_scl)
            
            ##
            ## Plots
            ##
            ph = plot_helper()
            
            ##
            ## 3d Scatter Plot
            ##
            title = cls_type + ' Clusters 3D: ' + str(f1) + '\nvs ' + str(f2) + ' vs ' + str(f3) + ', k=' + str(k)
            
            if analysis_name != None:
                name = analysis_name.lower()
            else:
                name = cls_type.lower()
                
            name = data_set_name.lower() + '_' + name + '3d_cluster'
            filename = './' + self.out_dir + '/' + str(f1).lower() + '_' + str(f2).lower() + '_' + str(f3).lower() + '_' + str(k) + '_' + name + '_' + data_set_name + '_cluster.png'
            
            ph.plot_3d_scatter(X_train_scl[:,0], X_train_scl[:,1], X_train_scl[:,2], y_pred, f1, f2, f3, title, filename)
Exemplo n.º 12
0
    def run_policy_iteration_and_plot(self,
                                      grid,
                                      k,
                                      d,
                                      discount,
                                      epsilon=0.001):
        ## policy iteration
        T, R, start, goals = self.__convert_grid_to_mdp(grid, k, d)
        # pi = mdptoolbox.mdp.PolicyIteration(T, R, discount, max_iter=1000000)
        pi = mdptoolbox.mdp.PolicyIterationModified(T,
                                                    R,
                                                    discount,
                                                    epsilon=epsilon,
                                                    max_iter=1000000)

        with open('./output/policyiter.txt', 'a') as text_file:
            t0 = time()
            pi.run()
            text_file.write('PolicyIteration: %0.3f seconds. Iters: %i\n' %
                            (time() - t0, pi.iter))

        p = np.array(pi.policy)
        p.shape = grid.shape
        p = p

        v = np.array(pi.V)
        v.shape = grid.shape
        v = v
        if d:
            d_str = 'dir'
        else:
            d_str = 'non-dir'

        ph = plot_helper()

        title = str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + ' Grid r: ' + str(
                k) + '(' + d_str + '), discount: ', str(discount)
        fn = './output/' + str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + 'policyiter_' + str(k) + '_' + d_str + '_' + str(
                discount) + '.png'
        # ph.plot_heatmap(v, grid, p, title, fn)
        ph.plot_results2(v, grid, p, title, fn)

        # ph.plot_heatmap_simple(v[::-1], title, fn)
        print('done')
Exemplo n.º 13
0
    def run2(self):
        fn = './input/grid1.csv'
        grid = pd.read_csv(fn, header=None).values

        r = self.__create_reward_grid(grid)
        start, goals = self.__get_grid_terminals(grid)

        mdp = GridMDP(
            [[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
             [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
             [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
             [-1, -100, -100, -100, -100, -100, -100, -100, -100, -100],
             [-1, -1, -1, -1, -1, -1, -1, -1, -1, 1],
             [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
             [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
             [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
             [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
             [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]],
            terminals=[(9, 5)],
            init=(0, 5))
        '''
        mdp = GridMDP([[-0.01, -0.01, -0.01, +1.00],
                       [-0.01, None,  -0.01, -0.01],
                       [-0.01, -0.01, -0.01, -0.01]],
                      terminals=[(3, 1)])

        mdp = GridMDP([[-1.0, +1.0]],
                      terminals=[(0,0),(1,0)])
        '''

        vi = value_iteration(mdp, .01)
        print(vi)

        vi_grid = np.ndarray((10, 10))
        for k in vi.keys():
            vi_grid[k] = vi[k]

        ph = plot_helper()
        ph.plot_heatmap(vi_grid, None, None, 'title', 'value_iter_mdp.png')

        pi = policy_iteration(mdp)
        print(pi)

        b = best_policy(mdp, vi)
        print(b)
Exemplo n.º 14
0
    def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)

        ks = []
        for i in range(1000):
            ##
            ## Random Projection
            ##
            rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
            rp.fit(X_train_scl)
            X_train_rp = rp.transform(X_train_scl)

            ks.append(kurtosis(X_train_rp))

        mean_k = np.mean(ks, 0)

        ##
        ## Plots
        ##
        ph = plot_helper()

        title = 'Kurtosis (Randomized Projection) for ' + data_set_name
        name = data_set_name.lower() + '_rp_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_simple_bar(np.arange(1,
                                     len(mean_k) + 1, 1), mean_k,
                           np.arange(1,
                                     len(mean_k) + 1, 1).astype('str'),
                           'Feature Index', 'Kurtosis', title, filename)
        ##
        ## Reconstruction Error
        ##
        all_mses, rng = self.reconstruction_error(X_train_scl,
                                                  GaussianRandomProjection)

        title = 'Reconstruction Error (RP) for ' + data_set_name
        name = data_set_name.lower() + '_rp_rec_err'
        filename = './' + self.out_dir + '/' + name + '.png'
        ph.plot_series(rng, [all_mses.mean(0)], [all_mses.std(0)], ['mse'],
                       ['red'], ['o'], title, 'Number of Features',
                       'Reconstruction Error', filename)
Exemplo n.º 15
0
    def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)

        ##
        ## ICA
        ##
        ica = FastICA(n_components=X_train_scl.shape[1])
        X_ica = ica.fit_transform(X_train_scl)

        ##
        ## Plots
        ##
        ph = plot_helper()

        kurt = kurtosis(X_ica)
        print(kurt)

        title = 'Kurtosis (FastICA) for ' + data_set_name
        name = data_set_name.lower() + '_ica_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_simple_bar(np.arange(1,
                                     len(kurt) + 1, 1), kurt,
                           np.arange(1,
                                     len(kurt) + 1, 1).astype('str'),
                           'Feature Index', 'Kurtosis', title, filename)

        ##
        ## Reconstruction Error
        ##
        all_mses, rng = self.reconstruction_error(X_train_scl, FastICA)

        title = 'Reconstruction Error (ICA) for ' + data_set_name
        name = data_set_name.lower() + '_ica_rec_err'
        filename = './' + self.out_dir + '/' + name + '.png'
        ph.plot_series(rng, [all_mses.mean(0)], [all_mses.std(0)], ['mse'],
                       ['red'], ['o'], title, 'Number of Features',
                       'Reconstruction Error', filename)
Exemplo n.º 16
0
    def nn_analysis(self, X_train, X_test, y_train, y_test, data_set_name, analysis_name="Neural Network"):

        clf = MLPClassifier(
            activation="relu",
            learning_rate="constant",
            shuffle=True,
            solver="adam",
            random_state=0,
            max_iter=1000,
            batch_size=60,
        )

        with open(self.nn_time_filename, "a+") as text_file:
            t0 = time()
            clf.fit(X_train, y_train)
            text_file.write(analysis_name.lower() + " fit time: %0.3f seconds\n" % (time() - t0))

            t0 = time()
            y_pred = clf.predict(X_test)
            text_file.write(analysis_name.lower() + " predict time: %0.3f seconds\n" % (time() - t0))

        cv = StratifiedShuffleSplit(n_splits=100, test_size=0.2, random_state=0)

        title = "Learning Curve (" + analysis_name + ") for " + data_set_name
        name = data_set_name.lower() + "_" + analysis_name.lower() + "_nn.png"
        filename = "./" + self.out_dir + "/" + name

        ##
        ## Plots
        ##
        ph = plot_helper()

        ##
        ## Learning Curve
        ##
        ph.plot_learning_curve(clf, title, X_train, y_train, ylim=None, cv=cv, n_jobs=-1, filename=filename)
Exemplo n.º 17
0
    def run(self):
        print('Running part 1')
        '''
        grid = self.__generate_random_grid(2, 2, 0., 0.)
        print(grid)

        T, R = self.__convert_grid_to_mdp(grid)
        pi = mdptoolbox.mdp.PolicyIteration(T, R, 0.9)
        pi.run()
        print(pi.policy)

        vi = mdptoolbox.mdp.ValueIteration(T, R, 0.9)
        vi.run()
        print(vi.V)
        print(vi.policy)
        print(vi.iter)
        '''

        # self.__test_movement()

        for grid_file in ['./input/grid2.csv']:
            # fn = './input/grid1.csv'
            grid = pd.read_csv(grid_file, header=None).values
            ph = plot_helper()

            title = str(grid.shape[0]) + 'x' + str(
                grid.shape[1]) + ' Grid Layout'
            fn = './output/' + str(grid.shape[0]) + 'x' + str(
                grid.shape[1]) + '_layout.png'
            ph.plot_layout(grid, title, fn)

            #self.run_value_iteration_and_plot(grid, k=1.0, d=True, discount=0.9, epsilon=0.00001)
            # self.run_policy_iteration_and_plot(grid, k=1.0, d=True, discount=0.9, epsilon=0.00001)

            #self.run_value_iteration_and_plot(grid, k=1.0, d=True, discount=0.9, epsilon=0.00001)
            self.run_value_iteration_and_plot(grid,
                                              k=0.9,
                                              d=True,
                                              discount=0.9,
                                              epsilon=0.00001)
            self.run_value_iteration_and_plot(grid,
                                              k=0.8,
                                              d=True,
                                              discount=0.9,
                                              epsilon=0.00001)
            self.run_policy_iteration_and_plot(grid,
                                               k=0.9,
                                               d=True,
                                               discount=0.9,
                                               epsilon=0.00001)
            self.run_policy_iteration_and_plot(grid,
                                               k=0.8,
                                               d=True,
                                               discount=0.9,
                                               epsilon=0.00001)

            # self.run_and_plot_qlearner(grid, d=True, k=1.0, alpha=0.2, gamma=0.8, rar=0.99, rard=0.999999, n_restarts=5000, n_iter=1000000)
            '''
            for k in [1.00, 0.90, 0.85, 0.80, 0.75]:
                for d in [False, True]:
                    for discount in [0.9, 0.8, 0.7, 0.6]:
                        self.run_value_iteration_and_plot(grid, k=k, d=d, discount=discount)

            for k in [1.00, 0.90, 0.85, 0.80, 0.75]:
                for d in [False, True]:
                    for discount in [0.9, 0.8, 0.7, 0.6]:
                        self.run_policy_iteration_and_plot(grid, k=k, d=d, discount=discount)
            '''
            '''
            for k in [1.00, 0.90, 0.85, 0.80, 0.75]:
                for d in [False, True]:
                    for alpha in [0.1, 0.3, 0.5, 0.7, 0.9]:
                        for gamma in [1.0, 0.8, 0.6, 0.4, 0.2]:
                            for rard in [0.99, 0.9999, 0.999999]:
                                self.run_and_plot_qlearner(grid, d, k, alpha, gamma, rar=0.99, rard=rard)
            '''
            print('done qlearner')

        grid_file = './input/grid1.csv'
        grid = pd.read_csv(grid_file, header=None).values

        self.run_and_plot_qlearner(grid,
                                   d=True,
                                   k=0.8,
                                   alpha=0.2,
                                   gamma=0.8,
                                   rar=0.99,
                                   rard=0.99,
                                   n_restarts=100,
                                   n_iter=1000000)

        grid_file = './input/grid2.csv'
        grid = pd.read_csv(grid_file, header=None).values

        self.run_and_plot_qlearner(grid,
                                   d=True,
                                   k=0.8,
                                   alpha=0.2,
                                   gamma=0.8,
                                   rar=0.99,
                                   rard=0.99,
                                   n_restarts=300,
                                   n_iter=1000000)

        self.run_and_plot_qlearner(grid,
                                   d=True,
                                   k=0.8,
                                   alpha=0.2,
                                   gamma=0.8,
                                   rar=0.99,
                                   rard=0.999999,
                                   n_restarts=5000,
                                   n_iter=1000000)
        self.run_and_plot_qlearner(grid,
                                   d=True,
                                   k=0.8,
                                   alpha=0.1,
                                   gamma=0.8,
                                   rar=0.99,
                                   rard=0.99,
                                   n_restarts=300,
                                   n_iter=1000000)

        self.run_and_plot_qlearner(grid,
                                   d=True,
                                   k=0.8,
                                   alpha=0.1,
                                   gamma=0.8,
                                   rar=0.99,
                                   rard=0.999999,
                                   n_restarts=5000,
                                   n_iter=1000000)
        '''
Exemplo n.º 18
0
    def run_and_plot_qlearner(self,
                              grid,
                              d,
                              k,
                              alpha,
                              gamma,
                              rar,
                              rard=0.99,
                              n_restarts=1000,
                              n_iter=100000):
        T, R, start, goals = self.__convert_grid_to_mdp(grid, k, d)

        q = QLearningEx(T,
                        R,
                        grid,
                        start,
                        goals,
                        n_restarts=n_restarts,
                        alpha=alpha,
                        gamma=gamma,
                        rar=rar,
                        radr=rard,
                        n_iter=n_iter)
        # q = mdptoolbox.mdp.QLearning(T, R, 0.9)
        q.run()
        print(q.Q)

        p = np.array(q.policy)
        p.shape = grid.shape
        p = p

        v = np.array(q.V)
        v.shape = grid.shape
        v = v
        if d:
            d_str = 'dir'
        else:
            d_str = 'non-dir'

        ph = plot_helper()

        title = str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + ' Tracker\na: ' + str(q.alpha) + ', g: ' + str(
                q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(
                    q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
        fn = './output/' + str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + 'tracker_' + str(q.alpha) + '_' + str(
                q.gamma) + '_' + str(q.orig_rar) + '_' + str(
                    q.radr) + '_' + str(k) + '_' + d_str + '.png'
        # tracker = normalize(q.tracker[::-1], axis=1, norm='l1')
        ph.plot_heatmap_simple(q.tracker[::-1], title, fn)

        title = str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + ' Iterations\na: ' + str(q.alpha) + ', g: ' + str(
                q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(
                    q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
        fn = './output/' + str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + 'iterations_' + str(q.alpha) + '_' + str(
                q.gamma) + '_' + str(q.orig_rar) + '_' + str(
                    q.radr) + '_' + str(k) + '_' + d_str + '.png'
        ph.plot_series(
            range(len(q.episode_iterations)),
            [q.episode_iterations],
            [None],
            ['iterations'],
            # cm.viridis(np.linspace(0, 1, 1)),
            ['black'],
            [''],
            title,
            'Episodes',
            'Iterations',
            fn)

        title = str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + ' Rewards/Iterations\na: ' + str(
                q.alpha) + ', g: ' + str(q.gamma) + ', d: ' + str(
                    q.orig_rar) + '@' + str(
                        q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
        fn = './output/' + str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + 'rewards_iterations_' + str(q.alpha) + '_' + str(
                q.gamma) + '_' + str(q.orig_rar) + '_' + str(
                    q.radr) + '_' + str(k) + '_' + d_str + '.png'

        ph.plot_series(
            range(len(q.episode_reward)),
            [q.episode_reward, q.episode_iterations],
            [None, None],
            ['rewards', 'iterations'],
            # cm.viridis(np.linspace(0, 1, 1)),
            ['black', 'blue'],
            ['', ''],
            title,
            'Episodes',
            'Rewards/Iterations',
            fn)

        title = str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + ' Rewards\na: ' + str(q.alpha) + ', g: ' + str(
                q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(
                    q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
        fn = './output/' + str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + 'rewards_' + str(q.alpha) + '_' + str(
                q.gamma) + '_' + str(q.orig_rar) + '_' + str(
                    q.radr) + '_' + str(k) + '_' + d_str + '.png'
        ph.plot_series(
            range(len(q.episode_reward)),
            [q.episode_reward],
            [None],
            ['rewards'],
            # cm.viridis(np.linspace(0, 1, 1)),
            ['black'],
            [''],
            title,
            'Episodes',
            'Rewards',
            fn)

        title = str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + ' Timing\na: ' + str(q.alpha) + ', g: ' + str(
                q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(
                    q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
        fn = './output/' + str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + 'timing_' + str(q.alpha) + '_' + str(
                q.gamma) + '_' + str(q.orig_rar) + '_' + str(
                    q.radr) + '_' + str(k) + '_' + d_str + '.png'
        ph.plot_series(
            range(len(q.episode_times)),
            [q.episode_times],
            [None],
            ['seconds'],
            # cm.viridis(np.linspace(0, 1, 1)),
            ['black'],
            [''],
            title,
            'Episodes',
            'Time in seconds',
            fn)

        title = str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + ' Grid\na: ' + str(q.alpha) + ', g: ' + str(
                q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(
                    q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
        fn = './output/' + str(grid.shape[0]) + 'x' + str(
            grid.shape[1]) + 'qlearn_' + str(q.alpha) + '_' + str(
                q.gamma) + '_' + str(q.orig_rar) + '_' + str(
                    q.radr) + '_' + str(k) + '_' + d_str + '.png'
        # ph.plot_heatmap(v, grid[::-1], p, title, fn)
        ph.plot_results2(v, grid, p, title, fn)
        '''
Exemplo n.º 19
0
 def gmm_analysis(self, X_train, X_test, y_train, y_test, data_set_name, max_clusters, analysis_name='GMM'):
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     em_bic = []
     em_aic = []
     em_completeness_score = []
     em_homogeneity_score = []
     em_measure_score = []
     em_adjusted_rand_score = []
     em_adjusted_mutual_info_score = []
     
     cluster_range = np.arange(2, max_clusters+1, 1)
     for k in cluster_range:
         print('K Clusters: ', k)
         
         ##
         ## Expectation Maximization
         ##
         em = GaussianMixture(n_components=k, covariance_type='full')
         em.fit(X_train_scl)
         em_pred = em.predict(X_train_scl)
         
         em_bic.append(em.bic(X_train_scl))
         em_aic.append(em.aic(X_train_scl))        
     
         # metrics
         y_train_score = y_train.reshape(y_train.shape[0],)
         
         em_homogeneity_score.append(homogeneity_score(y_train_score, em_pred))
         em_completeness_score.append(completeness_score(y_train_score, em_pred))
         em_measure_score.append(v_measure_score(y_train_score, em_pred))
         em_adjusted_rand_score.append(adjusted_rand_score(y_train_score, em_pred))
         em_adjusted_mutual_info_score.append(adjusted_mutual_info_score(y_train_score, em_pred))
         
     
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     ##
     ## BIC/AIC Plot
     ##
     title = 'Information Criterion Plot (' + analysis_name + ') for ' + data_set_name
     name = data_set_name.lower() + '_' + analysis_name.lower() + '_ic'
     filename = './' + self.out_dir + '/' + name + '.png'
     
     ph.plot_series(cluster_range,
                 [em_bic, em_aic],
                 [None, None],
                 ['bic', 'aic'],
                 cm.viridis(np.linspace(0, 1, 2)),
                 ['o', '*'],
                 title,
                 'Number of Clusters',
                 'Information Criterion',
                 filename)
     
     ##
     ## Score Plot
     ##
     title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name
     name = data_set_name.lower() + '_' + analysis_name.lower() + '_score'
     filename = './' + self.out_dir + '/' + name + '.png'
                 
     ph.plot_series(cluster_range,
                 [em_homogeneity_score, em_completeness_score, em_measure_score, em_adjusted_rand_score, em_adjusted_mutual_info_score],
                 [None, None, None, None, None, None],
                 ['homogeneity', 'completeness', 'measure', 'adjusted_rand', 'adjusted_mutual_info'],
                 cm.viridis(np.linspace(0, 1, 5)),
                 ['o', '^', 'v', '>', '<', '1'],
                 title,
                 'Number of Clusters',
                 'Score',
                 filename)
Exemplo n.º 20
0
 def run(self):
     print('Running part 1')
     '''
     grid = self.__generate_random_grid(2, 2, 0., 0.)
     print(grid)
     
     T, R = self.__convert_grid_to_mdp(grid)
     pi = mdptoolbox.mdp.PolicyIteration(T, R, 0.9)
     pi.run()
     print(pi.policy)
     
     vi = mdptoolbox.mdp.ValueIteration(T, R, 0.9)
     vi.run()
     print(vi.V)
     print(vi.policy)
     print(vi.iter)
     '''
     
     for grid_file in ['./input/grid1.csv', './input/grid2.csv']:
         
         #fn = './input/grid1.csv'
         grid = pd.read_csv(grid_file, header=None).values
         ph = plot_helper()
         
         title = str(grid.shape[0]) + 'x' + str(grid.shape[1]) + ' Grid Layout'
         fn = './output/' + str(grid.shape[0]) + 'x' + str(grid.shape[1]) + '_layout.png'        
         ph.plot_layout(grid, title, fn)
         
         for k in [1.00, 0.90, 0.85, 0.80, 0.75]:
             for d in [False, True]:
                 for alpha in [0.1, 0.2, 0.3]:
                     for gamma in [1.0, 0.9, 0.8]:
                         for rar in [1.0, 0.9, 0.8, 0.7]:
                             T, R, start, goals = self.__convert_grid_to_mdp(grid, k, d)
                         
                             #rar = 0.9
                             q = QLearningEx(T, R, grid, start, goals, n_restarts=1000, alpha = alpha, gamma = gamma, rar = rar, radr = 0.99, n_iter=100000)
                             q.run()
                             print(q.Q)
                             
                             p = np.array(q.policy)
                             p.shape = grid.shape
                             p = p
                             
                             v = np.array(q.V)[::-1]
                             v.shape = grid.shape
                             v = v
                             if d:
                                 d_str = 'dir'
                             else:
                                 d_str = 'non-dir'
                                 
                             title = str(grid.shape[0]) + 'x' + str(grid.shape[1]) + ' Tracker\na: ' + str(q.alpha) + ', g: ' + str(q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
                             fn = './output/' + str(grid.shape[0]) + 'x' + str(grid.shape[1]) + 'tracker_' + str(q.alpha) + '_' + str(q.gamma) + '_' + str(q.orig_rar) + '_' + str(q.radr) + '_' + str(k) + '_' + d_str + '.png'
                             #tracker = normalize(q.tracker[::-1], axis=1, norm='l1')
                             ph.plot_heatmap_simple(q.tracker[::-1], title, fn)
                             
                             title = str(grid.shape[0]) + 'x' + str(grid.shape[1]) + ' Iterations\na: ' + str(q.alpha) + ', g: ' + str(q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
                             fn = './output/' + str(grid.shape[0]) + 'x' + str(grid.shape[1]) + 'iterations_' + str(q.alpha) + '_' + str(q.gamma) + '_' + str(q.orig_rar) + '_' + str(q.radr) + '_' + str(k) + '_' + d_str + '.png'
                             ph.plot_series(range(len(q.episode_iterations)),
                                         [q.episode_iterations],
                                         [None],
                                         ['iterations'],
                                         cm.viridis(np.linspace(0, 1, 1)),
                                         [''],
                                         title,
                                         'Iterations',
                                         'Episodes',
                                         fn)
                             
                             title = str(grid.shape[0]) + 'x' + str(grid.shape[1]) + ' Grid\na: ' + str(q.alpha) + ', g: ' + str(q.gamma) + ', d: ' + str(q.orig_rar) + '@' + str(q.radr) + ', r: ' + str(k) + '(' + d_str + ')'
                             fn = './output/' + str(grid.shape[0]) + 'x' + str(grid.shape[1]) + 'qlearn_' + str(q.alpha) + '_' + str(q.gamma) + '_' + str(q.orig_rar) + '_' + str(q.radr) + '_' + str(k) + '_' + d_str + '.png'
                             ph.plot_heatmap(v, grid[::-1], p, title, fn)
                         
         print('done qlearner')
     
     '''
Exemplo n.º 21
0
    def gmm_analysis(self,
                     X_train,
                     X_test,
                     y_train,
                     y_test,
                     data_set_name,
                     max_clusters,
                     analysis_name='GMM'):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)

        em_bic = []
        em_aic = []
        em_completeness_score = []
        em_homogeneity_score = []
        em_measure_score = []
        em_adjusted_rand_score = []
        em_adjusted_mutual_info_score = []

        cluster_range = np.arange(2, max_clusters + 1, 1)
        for k in cluster_range:
            print('K Clusters: ', k)

            ##
            ## Expectation Maximization
            ##
            em = GaussianMixture(n_components=k, covariance_type='full')
            em.fit(X_train_scl)
            em_pred = em.predict(X_train_scl)

            em_bic.append(em.bic(X_train_scl))
            em_aic.append(em.aic(X_train_scl))

            # metrics
            y_train_score = y_train.reshape(y_train.shape[0], )

            em_homogeneity_score.append(
                homogeneity_score(y_train_score, em_pred))
            em_completeness_score.append(
                completeness_score(y_train_score, em_pred))
            em_measure_score.append(v_measure_score(y_train_score, em_pred))
            em_adjusted_rand_score.append(
                adjusted_rand_score(y_train_score, em_pred))
            em_adjusted_mutual_info_score.append(
                adjusted_mutual_info_score(y_train_score, em_pred))

        ##
        ## Plots
        ##
        ph = plot_helper()

        ##
        ## BIC/AIC Plot
        ##
        title = 'Information Criterion Plot (' + analysis_name + ') for ' + data_set_name
        name = data_set_name.lower() + '_' + analysis_name.lower() + '_ic'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_series(cluster_range, [em_bic, em_aic],
                       [None, None], ['bic', 'aic'],
                       cm.viridis(np.linspace(0, 1, 2)), ['o', '*'], title,
                       'Number of Clusters', 'Information Criterion', filename)

        ##
        ## Score Plot
        ##
        title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name
        name = data_set_name.lower() + '_' + analysis_name.lower() + '_score'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_series(cluster_range, [
            em_homogeneity_score, em_completeness_score, em_measure_score,
            em_adjusted_rand_score, em_adjusted_mutual_info_score
        ], [None, None, None, None, None, None], [
            'homogeneity', 'completeness', 'measure', 'adjusted_rand',
            'adjusted_mutual_info'
        ], cm.viridis(np.linspace(0, 1, 5)), ['o', '^', 'v', '>', '<', '1'],
                       title, 'Number of Clusters', 'Score', filename)
Exemplo n.º 22
0
    def kmeans_analysis(self,
                        X_train,
                        X_test,
                        y_train,
                        y_test,
                        data_set_name,
                        max_clusters,
                        analysis_name='K-Means'):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)

        km_inertias = []
        km_completeness_score = []
        km_homogeneity_score = []
        km_measure_score = []
        km_adjusted_rand_score = []
        km_adjusted_mutual_info_score = []

        cluster_range = np.arange(2, max_clusters + 1, 1)
        for k in cluster_range:
            print('K Clusters: ', k)
            ##
            ## KMeans
            ##
            km = KMeans(n_clusters=k, algorithm='full', n_jobs=-1)
            km.fit(X_train_scl)

            # inertia is the sum of distances from each point to its center
            km_inertias.append(km.inertia_)

            # metrics
            y_train_score = y_train.reshape(y_train.shape[0], )

            km_homogeneity_score.append(
                homogeneity_score(y_train_score, km.labels_))
            km_completeness_score.append(
                completeness_score(y_train_score, km.labels_))
            km_measure_score.append(v_measure_score(y_train_score, km.labels_))
            km_adjusted_rand_score.append(
                adjusted_rand_score(y_train_score, km.labels_))
            km_adjusted_mutual_info_score.append(
                adjusted_mutual_info_score(y_train_score, km.labels_))

            ##
            ## Silhouette Plot
            ##
            title = 'Silhouette Plot (' + analysis_name + ', k=' + str(
                k) + ') for ' + data_set_name
            name = data_set_name.lower() + '_' + analysis_name.lower(
            ) + '_silhouette_' + str(k)
            filename = './' + self.out_dir + '/' + name + '.png'

            self.silhouette_plot(X_train_scl, km.labels_, title, filename)

        ##
        ## Plots
        ##
        ph = plot_helper()

        ##
        ## Elbow Plot
        ##
        title = 'Elbow Plot (' + analysis_name + ') for ' + data_set_name
        name = data_set_name.lower() + '_' + analysis_name.lower() + '_elbow'
        filename = './' + self.out_dir + '/' + name + '.png'

        # line to help visualize the elbow
        lin = ph.extended_line_from_first_two_points(km_inertias, 0, 2)

        ph.plot_series(cluster_range, [km_inertias, lin], [None, None],
                       ['inertia', 'projected'],
                       cm.viridis(np.linspace(0, 1, 2)), ['o', ''], title,
                       'Number of Clusters', 'Inertia', filename)

        ##
        ## Score Plot
        ##
        title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name
        name = data_set_name.lower() + '_' + analysis_name.lower() + '_score'
        filename = './' + self.out_dir + '/' + name + '.png'

        ph.plot_series(cluster_range, [
            km_homogeneity_score, km_completeness_score, km_measure_score,
            km_adjusted_rand_score, km_adjusted_mutual_info_score
        ], [None, None, None, None, None, None], [
            'homogeneity', 'completeness', 'measure', 'adjusted_rand',
            'adjusted_mutual_info'
        ], cm.viridis(np.linspace(0, 1, 5)), ['o', '^', 'v', '>', '<', '1'],
                       title, 'Number of Clusters', 'Score', filename)
Exemplo n.º 23
0
    def run(self):
        print('Running part 1')

        #self.__test_movement()

        for grid_file in ['./input/grid1.csv', './input/grid2.csv']:
            if grid_file == './input/grid1.csv':
                grid_name = 'small_grid'
            else:
                grid_name = 'large_grid'

            grid = pd.read_csv(grid_file, header=None).values
            ph = plot_helper()

            title = str(grid.shape[0]) + 'x' + str(
                grid.shape[1]) + ' Grid Layout'
            fn = './output/' + str(grid.shape[0]) + 'x' + str(
                grid.shape[1]) + '_layout.png'
            ph.plot_layout(grid, title, fn)

            self.run_value_iteration_and_plot(grid,
                                              k=0.8,
                                              d=True,
                                              discount=0.95,
                                              epsilon=0.00001)
            self.run_policy_iteration_and_plot(grid,
                                               k=0.8,
                                               d=True,
                                               discount=0.95,
                                               epsilon=0.00001)

            self.run_value_iteration_and_plot(grid,
                                              k=1.0,
                                              d=True,
                                              discount=0.95,
                                              epsilon=0.00001)
            self.run_value_iteration_and_plot(grid,
                                              k=0.8,
                                              d=True,
                                              discount=0.95,
                                              epsilon=0.00001)
            self.run_value_iteration_and_plot(grid,
                                              k=0.7,
                                              d=True,
                                              discount=0.95,
                                              epsilon=0.00001)

        print("Start qlearner")
        grid_file = './input/grid1.csv'
        grid = pd.read_csv(grid_file, header=None).values
        self.run_and_plot_qlearner(grid,
                                   d=True,
                                   k=0.8,
                                   alpha=0.2,
                                   gamma=0.8,
                                   rar=0.99,
                                   rard=0.99,
                                   n_restarts=100,
                                   n_iter=1000000)

        grid_file = './input/grid2.csv'
        grid = pd.read_csv(grid_file, header=None).values
        self.run_and_plot_qlearner(grid,
                                   d=True,
                                   k=0.8,
                                   alpha=0.2,
                                   gamma=0.8,
                                   rar=0.99,
                                   rard=0.99,
                                   n_restarts=300,
                                   n_iter=1000000)
        self.run_and_plot_qlearner(grid,
                                   d=True,
                                   k=0.8,
                                   alpha=0.2,
                                   gamma=0.8,
                                   rar=0.99,
                                   rard=0.999999,
                                   n_restarts=300,
                                   n_iter=1000000)
Exemplo n.º 24
0
 def kmeans_analysis(self, X_train, X_test, y_train, y_test, data_set_name, max_clusters, analysis_name='K-Means'):
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     km_inertias = []
     km_completeness_score = []
     km_homogeneity_score = []
     km_measure_score = []
     km_adjusted_rand_score = []
     km_adjusted_mutual_info_score = []
     
     cluster_range = np.arange(2, max_clusters+1, 1)
     for k in cluster_range:
         print('K Clusters: ', k)
         ##
         ## KMeans
         ##
         km = KMeans(n_clusters=k, algorithm='full', n_jobs=-1)
         km.fit(X_train_scl)
         
         # inertia is the sum of distances from each point to its center   
         km_inertias.append(km.inertia_)
         
         # metrics
         y_train_score = y_train.reshape(y_train.shape[0],)
         
         km_homogeneity_score.append(homogeneity_score(y_train_score, km.labels_))
         km_completeness_score.append(completeness_score(y_train_score, km.labels_))
         km_measure_score.append(v_measure_score(y_train_score, km.labels_))
         km_adjusted_rand_score.append(adjusted_rand_score(y_train_score, km.labels_))
         km_adjusted_mutual_info_score.append(adjusted_mutual_info_score(y_train_score, km.labels_))
         
         ##
         ## Silhouette Plot
         ##
         title = 'Silhouette Plot (' + analysis_name + ', k=' + str(k) + ') for ' + data_set_name
         name = data_set_name.lower() + '_' + analysis_name.lower() + '_silhouette_' + str(k)
         filename = './' + self.out_dir + '/' + name + '.png'
         
         self.silhouette_plot(X_train_scl, km.labels_, title, filename)
         
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     ##
     ## Elbow Plot
     ##
     title = 'Elbow Plot (' + analysis_name + ') for ' + data_set_name
     name = data_set_name.lower() + '_' + analysis_name.lower() + '_elbow'
     filename = './' + self.out_dir + '/' + name + '.png'
     
     # line to help visualize the elbow
     lin = ph.extended_line_from_first_two_points(km_inertias, 0, 2)
     
     ph.plot_series(cluster_range,
                 [km_inertias, lin],
                 [None, None],
                 ['inertia', 'projected'],
                 cm.viridis(np.linspace(0, 1, 2)),
                 ['o', ''],
                 title,
                 'Number of Clusters',
                 'Inertia',
                 filename)
     
     ##
     ## Score Plot
     ##
     title = 'Score Summary Plot (' + analysis_name + ') for ' + data_set_name
     name = data_set_name.lower() + '_' + analysis_name.lower() + '_score'
     filename = './' + self.out_dir + '/' + name + '.png'
                 
     ph.plot_series(cluster_range,
                 [km_homogeneity_score, km_completeness_score, km_measure_score, km_adjusted_rand_score, km_adjusted_mutual_info_score],
                 [None, None, None, None, None, None],
                 ['homogeneity', 'completeness', 'measure', 'adjusted_rand', 'adjusted_mutual_info'],
                 cm.viridis(np.linspace(0, 1, 5)),
                 ['o', '^', 'v', '>', '<', '1'],
                 title,
                 'Number of Clusters',
                 'Score',
                 filename)
Exemplo n.º 25
0
    def pca_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)
        
        ##
        ## PCA
        ##
        pca = PCA(n_components=X_train_scl.shape[1], svd_solver='full')
        X_pca = pca.fit_transform(X_train_scl)
        
        ##
        ## Plots
        ##
        ph = plot_helper()
        
        ##
        ## Explained Variance Plot
        ##
        title = 'Explained Variance (PCA) for ' + data_set_name
        name = data_set_name.lower() + '_pca_evar_err'
        filename = './' + self.out_dir + '/' + name + '.png'        
        self.plot_explained_variance(pca, title, filename)

        ##
        ## Reconstruction Error
        ##
        all_mses, rng = self.reconstruction_error(X_train_scl, PCA)
        
        title = 'Reconstruction Error (PCA) for ' + data_set_name
        name = data_set_name.lower() + '_pca_rec_err'
        filename = './' + self.out_dir + '/' + name + '.png'
        ph.plot_series(rng,
                    [all_mses.mean(0)],
                    [all_mses.std(0)],
                    ['mse'],
                    ['red'],
                    ['o'],
                    title,
                    'Number of Features',
                    'Mean Squared Error',
                    filename)
        
        
        ##
        ## Manually compute eigenvalues
        ## 
        cov_mat = np.cov(X_train_scl.T)
        eigen_values, eigen_vectors = np.linalg.eig(cov_mat)
        print(eigen_values)
        sorted_eigen_values = sorted(eigen_values, reverse=True)

        title = 'Eigen Values (PCA) for ' + data_set_name
        name = data_set_name.lower() + '_pca_eigen'
        filename = './' + self.out_dir + '/' + name + '.png'
        
        ph.plot_simple_bar(np.arange(1, len(sorted_eigen_values)+1, 1),
                           sorted_eigen_values,
                           np.arange(1, len(sorted_eigen_values)+1, 1).astype('str'),
                           'Principal Components',
                           'Eigenvalue',
                           title,
                           filename)
        
        ## TODO Factor this out to new method
        ##
        ## Scatter
        ##
        '''