def _learn_model(scenario_name): ''' Learns a classifier model for the specified scenario if one does not already exist. ''' scenario = _scenarios[scenario_name] if path.exists(scenario['model']): return print 'Training the model for scenario {}...'.format(scenario_name) # Decide on classifier classifier = 0 if scenario['classifier'] == 'rf': classifier = RandomForest() sys.stdout.write('TRAINING RANDOM FOREST\n') cutoff = [c * 0.1 for c in range(1, 10)] elif scenario['classifier'] == 'svm': classifier = sklearn_SVC(kernel='rbf', C=10, gamma=0.01) sys.stdout.write('TRAINING SVM\n') cutoff = [0.0] # Load the required dataset and train the model X, y, _ = datasets.csv2numpy(scenario['training']) classifier.fit(X, y) # Evaluate the model on the training dataset y_pred = classifier.decision_function(X) sys.stdout.write('Performance on training data:\n') utility.print_stats_cutoff(y, y_pred, cutoff) # Save the model in the corresponding file classifier.save_model(scenario['model'])
def _learn_model(scenario_name): ''' Learns a classifier model for the specified scenario if one does not already exist. ''' scenario = _scenarios[scenario_name] if path.exists(scenario['model']): return print 'Training the model for scenario {}...'.format(scenario_name) # Decide on classifier classifier = 0 if scenario['classifier'] == 'rf': classifier = RandomForest() sys.stdout.write('TRAINING RANDOM FOREST\n') cutoff = [c * 0.1 for c in range(1, 10)] elif scenario['classifier'] == 'svm': classifier = sklearn_SVC(kernel='rbf', C=10, gamma=0.01) sys.stdout.write('TRAINING SVM\n') cutoff = [0.0] # Load the required dataset and train the model X, y, _ = datasets.csv2numpy(scenario['training']) classifier.fit(X, y) # Evaluate the model on the training dataset y_pred = classifier.decision_function(X) sys.stdout.write('Performance on training data:\n') utility.print_stats_cutoff(y, y_pred, cutoff) # Save the model in the corresponding file classifier.save_model(scenario['model'])
def evaluate_classifier(data, labels, test_data, test_labels): ''' Returns the classification accuracies of the RandomForest classifier trained on (data, labels) and tested on a list of (test_data, test_labels). ''' rf = RandomForest() rf.fit(data, labels) accs = [] for ted, tel in zip(test_data, test_labels): pred = rf.predict(ted) accs.append(accuracy_score(tel, pred)) return accs
def evaluate_classifier(data, labels, test_data, test_labels): ''' Returns the classification accuracies of the RandomForest classifier trained on (data, labels) and tested on a list of (test_data, test_labels). ''' rf = RandomForest() rf.fit(data, labels) accs = [] for ted, tel in zip(test_data, test_labels): pred = rf.predict(ted) accs.append(accuracy_score(tel, pred)) return accs