示例#1
0
    def test_variance_k_best_random_tree_k_fold(self):
        # Feature Selection
        samples, responses = open_model("models.obj")
        samples = np.array(samples)
        responses = np.array(responses)

        FeatureSelection = True

        if FeatureSelection:
            selection = VarianceThreshold(threshold=0.00)
            selection.fit(samples)
            idxs = selection.get_support(indices=True)
            samples = samples[:, idxs]

        samples = preprocessing.scale(samples)

        # Stratified cross-validation
        scv = StratifiedKFold(responses, n_folds=10)
        sum = 0
        for i, (train, test) in enumerate(scv):
            print('Case %d' % (i))
            # Modeling
            rdmForest = RandomForest_scikit()

            # Train
            init = time()
            rdmForest.train(samples[train, :], responses[train])

            # Test
            a, confusionPre = rdmForest.test(samples[test, :], responses[test], True)
            print('Time: %0.3fs' % (time() - init))

            for idx, fila in enumerate(confusionPre):
                for jdx, entrada in enumerate(fila):
                    if idx != jdx:
                        sum += entrada

        print("Wrong Cases: "+str(sum))
        print(' Full Case ')
        rdmForest = RandomForest_scikit()
        rdmForest.train(samples, responses)
        rdmForest.test(samples, responses, True)
示例#2
0
    def upper(self):
        model = FeatureSelectionScikit()
        rdmForestPre = RandomForest_scikit()
        rdmForest = RandomForest_scikit()
        file = open("models.obj", 'r')
        models = pickle.load(file)
        samples = models[0]
        responses = models[1]

        '''
        pca = decomposition.PCA()
        X_digits = samples
        y_digits = responses

        pca.fit(X_digits)

        plt.figure(1, figsize=(4, 3))
        plt.clf()
        plt.axes([.2, .2, .7, .7])
        plt.plot(pca.explained_variance_, linewidth=2)
        plt.axis('tight')
        plt.xlabel('n_components')
        plt.ylabel('explained_variance_')
        plt.show()
        '''

        # Scaled data
        #samplesScaled = preprocessing.scale(samples)
        samplesScaled = samples

        model.fit(samplesScaled, responses)
        variablesImportance = model.importance()
        mean = np.mean(variablesImportance)
        std = np.std(variablesImportance)

        fig1 = plt.figure(1, figsize=(4, 3))
        ax1 = fig1.add_subplot(111)
        ax1.plot(variablesImportance, linewidth=2)

        basicPre = []
        indices = []
        minimo = min(variablesImportance)

        for i, value in enumerate(variablesImportance):
            if value > minimo:
                basicPre.append(value)
                indices.append(i)

        print('Escogi %d' % (len(basicPre)))

        fig2 = plt.figure(2, figsize=(4, 3))
        ax2 = fig2.add_subplot(111)
        ax2.plot(basicPre, linewidth=2)

        newSample = []
        for i, fila in enumerate(samplesScaled):
            newSample.append([val for is_good, val in izip(indices, fila) if is_good])

        t0 = time()
        rdmForestPre.train(newSample, responses)
        a, confusionPre = rdmForestPre.test(newSample, responses, True)
        preTiempo = (time() - t0)
        print("With Preprocessing %0.3fs" % (preTiempo))

        sumPre = 0
        for idx, fila in enumerate(confusionPre):
            for jdx, entrada in enumerate(fila):
                if idx != jdx:
                    sumPre += entrada

        t0 = time()
        rdmForest.train(samples, responses)
        a, confusion = rdmForest.test(samples, responses, True)
        Tiempo = time() - t0
        print("Without Preprocessing %0.3fs" % (Tiempo))
        print("Preprocessing/Without = %0.3fs" % (1.0 * preTiempo / Tiempo))

        sum = 0
        for idx, fila in enumerate(confusion):
            for jdx, entrada in enumerate(fila):
                if idx != jdx:
                    sum += entrada

        print(str(sumPre), str(sum), float(1.0 * sumPre / sum))

        plt.show()