def test_variance_k_best_random_tree_k_fold(self): # Feature Selection samples, responses = open_model("models.obj") samples = np.array(samples) responses = np.array(responses) FeatureSelection = True if FeatureSelection: selection = VarianceThreshold(threshold=0.00) selection.fit(samples) idxs = selection.get_support(indices=True) samples = samples[:, idxs] samples = preprocessing.scale(samples) # Stratified cross-validation scv = StratifiedKFold(responses, n_folds=10) sum = 0 for i, (train, test) in enumerate(scv): print('Case %d' % (i)) # Modeling rdmForest = RandomForest_scikit() # Train init = time() rdmForest.train(samples[train, :], responses[train]) # Test a, confusionPre = rdmForest.test(samples[test, :], responses[test], True) print('Time: %0.3fs' % (time() - init)) for idx, fila in enumerate(confusionPre): for jdx, entrada in enumerate(fila): if idx != jdx: sum += entrada print("Wrong Cases: "+str(sum)) print(' Full Case ') rdmForest = RandomForest_scikit() rdmForest.train(samples, responses) rdmForest.test(samples, responses, True)
def upper(self): model = FeatureSelectionScikit() rdmForestPre = RandomForest_scikit() rdmForest = RandomForest_scikit() file = open("models.obj", 'r') models = pickle.load(file) samples = models[0] responses = models[1] ''' pca = decomposition.PCA() X_digits = samples y_digits = responses pca.fit(X_digits) plt.figure(1, figsize=(4, 3)) plt.clf() plt.axes([.2, .2, .7, .7]) plt.plot(pca.explained_variance_, linewidth=2) plt.axis('tight') plt.xlabel('n_components') plt.ylabel('explained_variance_') plt.show() ''' # Scaled data #samplesScaled = preprocessing.scale(samples) samplesScaled = samples model.fit(samplesScaled, responses) variablesImportance = model.importance() mean = np.mean(variablesImportance) std = np.std(variablesImportance) fig1 = plt.figure(1, figsize=(4, 3)) ax1 = fig1.add_subplot(111) ax1.plot(variablesImportance, linewidth=2) basicPre = [] indices = [] minimo = min(variablesImportance) for i, value in enumerate(variablesImportance): if value > minimo: basicPre.append(value) indices.append(i) print('Escogi %d' % (len(basicPre))) fig2 = plt.figure(2, figsize=(4, 3)) ax2 = fig2.add_subplot(111) ax2.plot(basicPre, linewidth=2) newSample = [] for i, fila in enumerate(samplesScaled): newSample.append([val for is_good, val in izip(indices, fila) if is_good]) t0 = time() rdmForestPre.train(newSample, responses) a, confusionPre = rdmForestPre.test(newSample, responses, True) preTiempo = (time() - t0) print("With Preprocessing %0.3fs" % (preTiempo)) sumPre = 0 for idx, fila in enumerate(confusionPre): for jdx, entrada in enumerate(fila): if idx != jdx: sumPre += entrada t0 = time() rdmForest.train(samples, responses) a, confusion = rdmForest.test(samples, responses, True) Tiempo = time() - t0 print("Without Preprocessing %0.3fs" % (Tiempo)) print("Preprocessing/Without = %0.3fs" % (1.0 * preTiempo / Tiempo)) sum = 0 for idx, fila in enumerate(confusion): for jdx, entrada in enumerate(fila): if idx != jdx: sum += entrada print(str(sumPre), str(sum), float(1.0 * sumPre / sum)) plt.show()