def runTest(size, trees, features, test): '''for i in range(10): foldX_train, foldX_test, foldy_train, foldy_test = train_test_split(X,y) print("train size: " + str(foldy_train.size)) print("test size: " + str(foldy_test.size)) model = RandomForestClassifier(n_estimators = trees, max_features = features, min_samples_leaf = 5, oob_score = False) model.fit(foldX_train,foldy_train) for i in y_train: if i not in classes: classes.append(i) model.evt_predict(X_test[test])''' model = RandomForestClassifier(n_estimators = trees, max_features = features, min_samples_leaf = 5, oob_score = False) model.fit(X_train,y_train) fit(model, n_classes) global min_threshold global average_threshold global product_threshold classes = [] for i in y_train: if i not in classes: classes.append(i) X_tests = None y_tests = None unknown = [] for i in range(10): if i not in train_classes and i not in validate_classes: unknown.append(i) print train_classes print validate_classes print unknown print train_classes + unknown[:test] for i in train_classes + unknown[:test]: if X_tests == None: X_tests = X_test[i] y_tests = y_test[i] else: X_tests = np.vstack((X_tests, X_test[i])) y_tests = np.append(y_tests, y_test[i]) og_score = model.score(X_tests,y_tests) print("random test: " + str(og_score)) predictions, pertinence = model.evt_predict(X_tests) total = 0 min_correct = 0 average_correct = 0 product_correct = 0 min_out = 0 average_out = 0 product_out = 0 min_inn = 0 average_inn = 0 product_inn = 0 counter1 = 0 counter2 = 0 points_in = [] points_out = [] for i in range(len(predictions)): total += 1 if pertinence[i][0] > min_threshold: if predictions[i] == y_tests[i]: min_correct += 1 else: if y_tests[i] not in classes: min_correct += 1 if pertinence[i][1] > average_threshold: if predictions[i] == y_tests[i]: average_correct += 1 else: if y_tests[i] not in classes: average_correct += 1 if pertinence[i][2] > product_threshold: if predictions[i] == y_tests[i]: product_correct += 1 else: if y_tests[i] not in classes: product_correct += 1 if y_tests[i] not in classes: points_out.append(i) min_out += pertinence[i][0] average_out += pertinence[i][1] product_out += pertinence[i][2] counter1 += 1 else: points_in.append(i) min_inn += pertinence[i][0] average_inn += pertinence[i][1] product_inn += pertinence[i][2] counter2 += 1 if counter2 > 0: min_pertinence = min_inn/ float(counter2) average_pertinence = average_inn/ float(counter2) product_pertinence = product_inn/ float(counter2) min_deviance = 0 average_deviance = 0 product_deviance = 0 for i in points_out: min_deviance += (pertinence[i][0] - min_pertinence) ** 2 average_deviance += (pertinence[i][1] - average_pertinence) ** 2 product_deviance += (pertinence[i][2] - product_pertinence) ** 2 min_deviance /= len(points_out) - 1 average_deviance /= len(points_out) - 1 product_deviance /= len(points_out) -1 min_deviance = min_deviance ** 0.5 average_deviance = average_deviance ** 0.5 product_deviance = product_deviance ** 0.5 print "average kown classes with min: " + str(min_pertinence) + " standard deviation: " + str(min_deviance) print "average kown classes with average: " + str(average_pertinence) + " standard deviation: " + str(average_deviance) print "average kown classes with product: " + str(product_pertinence) + " standard deviation: " + str(product_deviance) if counter1 > 0: min_pertinence = min_out/ float(counter1) average_pertinence = average_out/ float(counter1) product_pertinence = product_out/ float(counter1) min_deviance = 0 average_deviance = 0 product_deviance = 0 for i in points_out: min_deviance += (pertinence[i][0] - min_pertinence) ** 2 average_deviance += (pertinence[i][1] - average_pertinence) ** 2 product_deviance += (pertinence[i][2] - product_pertinence) ** 2 min_deviance /= len(points_out) - 1 average_deviance /= len(points_out) - 1 product_deviance /= len(points_out) -1 min_deviance = min_deviance ** 0.5 average_deviance = average_deviance ** 0.5 product_deviance = product_deviance ** 0.5 print " average unknown classes with min: " + str(min_pertinence) + " standard deviation: " + str(min_deviance) print " average unknown classes with average: " + str(average_pertinence) + " standard deviation: " + str(average_deviance) print " average unknown classes with product: " + str(product_pertinence) + " standard deviation: " + str(product_deviance) EVT_min_score = float(min_correct)/total EVT_average_score = float(average_correct)/total EVT_product_score = float(product_correct)/total print n_classes print("evt score: " + str(EVT_min_score)) return (og_score,EVT_min_score,EVT_average_score,EVT_product_score)