def main(): data_folder = 'data/sources/wikipedia' models_folder = 'classifier/models' save_loc = '/usr/share/nginx/html/wiki' if not os.path.exists(data_folder): os.makedirs(data_folder) if next(os.walk(data_folder))[1]: retrain = True if retrain: input, target, classes = data.sample(data_folder) model = classifier.build(input.shape, target.shape) classifier.train(model, input, target) classifier.save(models_folder, model, classes) else: model, classes = classifier.load(models_folder, sorted(os.listdir(models_folder))[-1]) for root, dirs, files in os.walk(data_folder): for file in files: if not file.startswith('.'): with open(root+'/'+file) as f: input = data.str2mat(f.read()) output = classifier.run(model, input) data.backtest(save_loc+'/'+file, classes, input, output) else: print("""\nNo data found.\nPut subfolders of files by class, within the 'data' folder.""")
def perform_experiment(percentages, model_config, loader_config, method): accuracy_list = [] for percentage in percentages: # print(f"Training of model based on {percentage*100}% deletion of pixels.") model_config.set_model('VGG-11') model_config.set_optimizer() data_dir = f'dataset/roar_{method}/' datasetname = f'cifar-{model_config.num_classes}-{percentage*100}%-removed/' percentage_loader = DataLoaderConfiguration(path=loader_config.path, data_dir=data_dir, datasetname=datasetname) model_config.model_dir = f'saved-models/VGG-11-ROAR-{method}-{percentage*100}.pth' if not os.path.exists(model_config.model_dir): print(f"Model for {percentage*100}% will be trained now.") train(model_config, percentage_loader) else: model_config.load_model() eval_accuracy = parse_epoch(percentage_loader.testloader, model_config.model, model_config.optimizer, model_config.criterion, model_config.device, train=False) accuracy_list.append(eval_accuracy) # print("Eval accur:", eval_accuracy) # print("----------------------------------------------") return accuracy_list
def experiment(model_config, loader_config, percentages = [0.1, 0.3, 0.5, 0.7, 0.9]): if not os.path.exists(model_config.model_dir): print("Cifar-10 model will be trained which is used for data preparation.") train(model_config, loader_config) If adjusted data is not created, create it. if not os.path.exists(loader_config.path + 'dataset/roar_full_grad/'): print("The data for full grad is not found in dataset/roar_full_grad") print("Creating it can take a long time, please abort this run and download it from github") create_data(percentages, model_config, loader_config, salience_method="full_grad") if not os.path.exists(loader_config.path + 'dataset/roar_input_grad/'): print("The data for input grad is not found in dataset/roar_input_grad") print("Creating it can take a long time, please abort this run and download it from github") create_data(percentages, model_config, loader_config, salience_method="input_grad") if not os.path.exists(loader_config.path + 'dataset/roar_random/'): print("The data for random is not found in dataset/roar_random") print("Creating it can take a long time, please abort this run and download it from github") create_data(percentages, model_config, loader_config, salience_method="random") # Train model based on certrain adjusted data accuracy_list = [] accuracy_list.append(perform_experiment(percentages, model_config, loader_config, "full_grad")) accuracy_list.append(perform_experiment(percentages, model_config, loader_config, "input_grad")) accuracy_list.append(perform_experiment(percentages, model_config, loader_config, "random")) return accuracy_list
def main(): if len(sys.argv) != 1: usage() net = sys.argv[1] if net == "classifier": classifier.train() else: print("unknown net")
def evaluate(train_set, test_set, classifier): classifier.train(train_set) predictions_list = classifier.predict(test_set) acc = get_acc(test_set, predictions_list) sens = get_sensivity(test_set, predictions_list) spec = get_specifity(test_set, predictions_list) prec = get_precision(test_set, predictions_list) fmeas = get_fmeas(test_set, predictions_list) return acc, sens, spec, prec, fmeas
def add_buttons(self): # self.add_button('Deploy', self.OnDeploy) # self.add_button('Save Team', self.OnSave1) # self.add_button('Load Team', self.OnLoad1) self.add_button('First Person Agent', self.OnFPS) self.add_button('Snapshot', self.OnSnapshot) from classifier import ObjectClassifier classifier = ObjectClassifier() classifier.train()
def retrain(model_folder_name, model_type): model_dir = os.path.join(Globals.model_path, model_folder_name) processed_dir = os.path.join(model_dir, "data") classifier.train(data_dir=processed_dir, session=MyGraph(), classifier_filename=os.path.join(model_dir, "classifier.pkl"), model_type=model_type) return True, ""
def main(transductive: bool = False): try: from classifier import pretrain except ImportError: part2xy = load_dataset_fast('FILIMDB', parts=SCORED_PARTS) train_ids, train_texts, train_labels = part2xy['train'] print('\nTraining classifier on %d examples from train set ...' % len(train_texts)) st = time() params = train(train_texts, train_labels) print('Classifier trained in %.2fs' % (time() - st)) else: part2xy = load_dataset_fast('FILIMDB', parts=SCORED_PARTS + ('train_unlabeled', )) train_ids, train_texts, train_labels = part2xy['train'] _, train_unlabeled_texts, _ = part2xy['train_unlabeled'] st = time() if transductive: all_texts = list(text for _, text, _ in part2xy.values()) else: all_texts = [train_texts, train_unlabeled_texts] total_texts = sum(len(text) for text in all_texts) print('\nPretraining classifier on %d examples' % total_texts) params = pretrain(all_texts) print('Classifier pretrained in %.2fs' % (time() - st)) print('\nTraining classifier on %d examples from train set ...' % len(train_texts)) st = time() params = train(train_texts, train_labels, params) print('Classifier trained in %.2fs' % (time() - st)) del part2xy["train_unlabeled"] allpreds = [] for part, (ids, x, y) in part2xy.items(): print('\nClassifying %s set with %d examples ...' % (part, len(x))) st = time() preds = classify(x, params) print('%s set classified in %.2fs' % (part, time() - st)) allpreds.extend(zip(ids, preds)) if y is None: print('no labels for %s set' % part) else: score(preds, y) save_preds(allpreds, preds_fname=PREDS_FNAME) print('\nChecking saved predictions ...') score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')
def test_classify_test_dataset(): """ This will attempt to classify the test dataset """ start_time = time.time() classifier.train() number_of_reviews = classifier.negative_review_count + classifier.positive_review_count results = classifier.predict_reviews() print(results) print( str(results["correct_predictions"] / number_of_reviews * 100) + "% is the accuracy ") final_time = time.time() - start_time print("It took: " f'{final_time:.2f}' " seconds to run\n")
def test_classify_train_dataset_with_testing_data_with_stopwords(): """ This will attempt to classify the training dataset, using the testing dataset to train - with stop-words """ start_time = time.time() classifier.train(use_testing_data=True) number_of_reviews = classifier.negative_review_count + classifier.positive_review_count results = classifier.predict_reviews(use_stop_words=True, classify_training_data=True) print(results) print( str(results["correct_predictions"] / number_of_reviews * 100) + "% is the accuracy ") final_time = time.time() - start_time print("It took: " f'{final_time:.2f}' " seconds to run\n")
def test_predict_test_dataset_with_stopwords(): """ This test will attempt to classify the test dataset while using stopwords """ start_time = time.time() use_stop_words = True predict.train() number_of_reviews = predict.negative_review_count + predict.positive_review_count results = predict.predict_test_reviews(use_stop_words=use_stop_words) print(results) print( str(results["correct_predictions"] / number_of_reviews * (100)) + "% is the accuracy ") final_time = time.time() - start_time print("It took: " f'{final_time:.2f}' " seconds to run\n")
def train(input_folder_path, model_folder_name, model_type): print("Input Folder Path:", input_folder_path) print("Model Folder Name:", model_folder_name) print("Checking Directories...") if os.path.exists(input_folder_path) == False: return False, "Invalid input folder!" model_dir = os.path.join(Globals.model_path, model_folder_name) if os.path.exists(model_dir) == True: return False, "Model already exists!" print("Aligning faces...") processed_dir = os.path.join(model_dir, "data") my_graph = MyGraph() align.align_faces(AlignOptions(input_folder_path, processed_dir, my_graph)) directories = os.listdir(processed_dir) # SVC's don't seem to be able to handle only having 1 image for training, so let's create a duplicate if model_type == "svc": for d in directories: subdir = os.path.join(processed_dir, d) if os.path.isdir(subdir): files = os.listdir(subdir) if len(files) == 1: file_name_split = os.path.splitext(files[0]) file_path_from = os.path.join(subdir, files[0]) file_path_to = os.path.join( subdir, file_name_split[0] + "_2" + file_name_split[1]) print("Only 1 image found for training... Duplicating ", file_path_from) copyfile(file_path_from, file_path_to) print("Training...") classifier.train(data_dir=processed_dir, session=my_graph, classifier_filename=os.path.join(model_dir, "classifier.pkl"), model_type=model_type) return True, ""
def main(): part2xy = load_dataset_fast('FILIMDB') train_ids, train_texts, train_labels = part2xy['train'] print('\nTraining classifier on %d examples from train set ...' % len(train_texts)) st = time() params = train(train_texts, train_labels) print('Classifier trained in %.2fs' % (time() - st)) allpreds = [] for part, (ids, x, y) in part2xy.items(): print('\nClassifying %s set with %d examples ...' % (part, len(x))) st = time() preds = classify(x, params) print('%s set classified in %.2fs' % (part, time() - st)) allpreds.extend(zip(ids, preds)) if y is None: print('no labels for %s set' % part) else: score(preds, y) save_preds(allpreds, preds_fname=PREDS_FNAME) print('\nChecking saved predictions ...') score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')
def main(): try: from classifier import pretrain except ImportError: part2xy = load_dataset_fast('FILIMDB') train_ids, train_texts, train_labels = part2xy['train'] print('\nTraining classifier on %d examples from train set ...' % len(train_texts)) st = time() params = train(train_texts, train_labels) print('Classifier trained in %.2fs' % (time() - st)) else: part2xy = load_dataset_fast('FILIMDB', parts=('train', 'dev', 'test', 'train_unlabeled')) train_ids, train_texts, train_labels = part2xy['train'] _, train_unlabeled_texts, _ = part2xy['train_unlabeled'] all_texts = train_texts + train_unlabeled_texts print('\nPretraining classifier on %d examples' % len(all_texts)) st = time() params = pretrain(all_texts) print('Classifier pretrained in %.2fs' % (time() - st)) print('\nTraining classifier on %d examples from train set ...' % len(train_texts)) st = time() params = train(train_texts, train_labels, params) print('Classifier trained in %.2fs' % (time() - st)) del part2xy["train_unlabeled"] allpreds = [] for part, (ids, x, y) in part2xy.items(): print('\nClassifying %s set with %d examples ...' % (part, len(x))) st = time() preds = classify(x, params) print('%s set classified in %.2fs' % (part, time() - st)) allpreds.extend(zip(ids, preds)) if y is None: print('no labels for %s set' % part) else: score(preds, y) save_preds(allpreds, preds_fname=PREDS_FNAME) print('\nChecking saved predictions ...') score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')
def train(classifier=c): print("training classifier, please wait\n") trainingData = parsedata.readAllGames(False,exclusions) counter = 0 # lazy iterator since trainingdata can be HUUUUUUUUUGE for k in iter(trainingData): counter += 1 #print(k) if len(k.split(',')) != 7: continue for score in trainingData[k]: classifier.train(k,score > 0) if counter % 25000 == 0: print("{}, ".format(counter), end='', flush=True) if counter % 100000 == 0: print() print("\n")
def train(classifier=c): print("training classifier, please wait\n") trainingData = parsedata.readAllGames(False, exclusions) counter = 0 # lazy iterator since trainingdata can be HUUUUUUUUUGE for k in iter(trainingData): counter += 1 #print(k) if len(k.split(',')) != 7: continue for score in trainingData[k]: classifier.train(k, score > 0) if counter % 25000 == 0: print("{}, ".format(counter), end='', flush=True) if counter % 100000 == 0: print() print("\n")
def main(): # Instantiate the console arguments function args = arg_parser() print("GPU setting: {}".format(args.gpu)) # Define normalization for transforms normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ) # Define transformations for training, validation and test sets data_transforms = create_transforms(30, 224, 256, normalize) # Load the datasets from the image folders datasets = image_datasets(data_transforms) # Define the dataloaders using the image datasets loaders = data_loaders(datasets, 32) # Instantiate a new model model = create_model(arch=args.arch) output_units = len(datasets['training'].classes) # Create new classifier model.classifier = create_classifier(model, args.hidden_layers, output_units, args.dropout) device = check_gpu(args.gpu) print(device) model.to(device) learning_rate = args.learning_rate criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) epochs = args.epochs print_every = args.print_every steps = 0 trainloader = loaders['training'] validloader = loaders['validation'] # trained_model = train(model, epochs, learning_rate, criterion, optimizer, loaders['training'], loaders['validation'], device) trained_model = train(model, trainloader, validloader, device, criterion, optimizer, epochs, print_every, steps) print("Training has completed") test_model(trained_model, loaders['testing'], device) initial_checkpoint(trained_model, args.checkpoint_dir, datasets['training'])
def train_classifier(): #1. prepare data print "-- Prepare Data" train_sentences = conll2002.iob_sents('esp.train') test_sentences = conll2002.iob_sents('esp.testa') #2. extract features train_df, test_df = extract_classif_features(train_sentences, test_sentences) #3. train clf = classifier.train(train_df, test_df)
def standalone(): auc1, t1, TP1, FP1, TN1, FN1 = classifier.train(X_train, Y_train, X_test,Y_test, "svm") auc2, t2, TP2, FP2, TN2, FN2 = classifier.train(X_train, Y_train, X_test,Y_test, "pct") auc3, t3, TP3, FP3, TN3, FN3 = classifier.train(X_train, Y_train, X_test,Y_test, "nct") auc4, t4, TP4, FP4, TN4, FN4 = classifier.train(X_train, Y_train, X_test,Y_test, "lr") data_save = np.asarray([data_index, input_dim, balance_rate, auc1,1000 * t1,TP1, FP1, TN1, FN1, auc2,1000 * t2,TP2, FP2, TN2, FN2, auc3,1000 * t3, TP3, FP3, TN3, FN3 , auc4,1000 * t4, TP4, FP4, TN4, FN4]) data_save = np.reshape(data_save, (-1,27)) if os.path.isfile("Results/RF_AUC_DIF/AUC_Input.csv"): # auc = np.genfromtxt('Results/RF_AUC_DIF/AUC_Input.csv', delimiter=',') auc = np.reshape(auc,(-1,27)) data_save = np.concatenate((auc, data_save), axis = 0) np.savetxt("Results/RF_AUC_DIF/AUC_Input.csv", data_save,delimiter = ",",fmt = "%f") else: np.savetxt("Results/RF_AUC_DIF/AUC_Input.csv", data_save,delimiter = ",",fmt = "%f")
def main(): """ """ parser = argparse.ArgumentParser() parser.add_argument("-v", "--verbose", dest="verbose", help="Increase verbosity", action='store_true') parser.add_argument("-d", "--postset", dest="postset", help="Dataset file path. A CSV file is required.", type=str) parser.add_argument("-min", "--ngrammin", dest="ngram_min", help="Minimum number of ngrams", type=int) parser.add_argument("-max", "--ngrammax", dest="ngram_max", help="Maximum number of ngrams", type=int) parser.add_argument("-s", "--save", dest="save", help="Save", action='store_true') options = parser.parse_args() if not options.verbose: warnings.filterwarnings("ignore") with open(options.postset) as f: postset = json.load(f) df = pd.read_json(options.postset, orient='columns') df.columns = ['samples'] posts = preprocess(df['samples']) scores = cls.train(posts, options.ngram_min, options.ngram_max, save=options.save)
def get_model(car_features, non_car_features, filename): if os.path.exists(filename): with open(filename, mode='rb') as f: data = pickle.load(f) clf = data['clf'] scaler = data['scaler'] return clf, scaler clf, scaler = train(car_features, non_car_features) with open(filename, mode='wb') as f: pickle.dump({ 'clf': clf, 'scaler': scaler, }, f) return clf, scaler
def trainModels(dataDict, models=[]): notesMdls = [] velMdls = [] timeMdls = [] if (len(models) == 0): notesMdl = train(dataDict['dataNotes'], dataDict['targetNotes']) velMdl = train(dataDict['dataNotes'], dataDict['targetVelocity']) timeMdl = train(dataDict['dataNotes'], dataDict['targetTime']) notesMdls.append(notesMdl) velMdls.append(velMdl) timeMdls.append(timeMdl) elif (len(models) == 1): notesMdl = train(dataDict['dataNotes'], dataDict['targetNotes'], model=models[0]) velMdl = train(dataDict['dataNotes'], dataDict['targetVelocity'], model=models[0]) timeMdl = train(dataDict['dataNotes'], dataDict['targetTime'], model=models[0]) notesMdls.append(notesMdl) velMdls.append(velMdl) timeMdls.append(timeMdl) else: for model in models: print("Training notes model for " + model) notesMdl = train(dataDict['dataNotes'], dataDict['targetNotes'], model=model) print("Training velocity model for " + model) velMdl = train(dataDict['dataNotes'], dataDict['targetVelocity'], model=model) print("Training time model for " + model) timeMdl = train(dataDict['dataNotes'], dataDict['targetTime'], model=model) notesMdls.append(notesMdl) velMdls.append(velMdl) timeMdls.append(timeMdl) return notesMdls, velMdls, timeMdls
def get_data_and_retrain(): global user_data_path # if user training data unavailable, throw error. This means user didn't bother training. if(not os.path.exists(user_data_path + user + ".csv")): print("NO TRAINING DATA AVIALABLE. PLEASE TRAIN SOME CLASSES") return # refresh user data classifier.get_data_from(user_data_path + user + ".csv") # block training from happening --> the svm breaks with a single class if( len(classifier.c_classes) <= 1): print("Please train more classes: at least 2 classes required") return # use normalization (have to do this for the svm for some reason) classifier.use_normalization_and_normalize_training_data() # set classifier type classifier.set_classifier(classifier.c_svm_rbf) # train classifier (no inputs no offsets/biases) classifier.train()
def sensitive_transparency(model_config, data_config): saliency_dir = data_config.path + 'dataset/saliency/' dataset = datasets.ImageFolder(root=saliency_dir, transform=data_config.transform) saliencyloader = torch.utils.data.DataLoader(dataset, batch_size=1, num_workers=2) # model_config.set_model() if not os.path.exists(model_config.model_dir): train(model_config, data_config) else: model_config.load_model() simple_fullgrad = SimpleFullGrad(model_config.model) # model # fullgrad = FullGrad(model_config.model, im_size=(1,3,32,32), device=model_config.device) if os.path.exists(saliency_dir): compute_save_fullgrad_saliency(saliencyloader, data_config.unnormalize, data_config.save_path, model_config.device, simple_fullgrad) else: print("Add pictures to: " + saliency_dir) print("Saliency maps will be shown") csv_dir = data_config.data_dir + '/' + data_config.dataset_name + '/test/PPB-2017-metadata.csv' etnic_acc(data_config.testloader, model_config.model, model_config.optimizer, model_config.criterion, model_config.device, csv_dir)
def main(): s = time.time() train = pd.read_csv("train.csv") test = pd.read_csv("test.csv") clean(train) clean(test) print("Data processed after " + str(time.time() - s) + " sec") #-----------Predictor------------------------------- tfidf_bag = bag(train.tweet) predictor.predict(tfidf_bag, train.label) #--------------------------------------------------- #-----------Clustering------------------------------ hateful = train.copy(deep=True) get_hateful(hateful) kmeans_model, vectorizer = classifier.train(hateful, False) print("Clusters Found after " + str(time.time() - s) + " sec")
def train(): classfier_name = request.forms.get('classfier_name') classfier_type = request.forms.get('classfier_type') classfier_params = request.forms.get('classfier_params') cross_validation_type = request.forms.get('cross_validation_type') learning_curve_params = request.forms.get('learning_curve_params') train_size = request.forms.get('train_size') clf = classifier.configure_classifier(classfier_type,classfier_params) cv = classifier.configure_cross_validation(cross_validation_type,classfier_params) features_train, labels_train = wtf.getArrays() clf, train_sizes, train_scores, test_scores = classifier.train(clf, train_sizes = np.linspace(.1, 1.0,train_size), cv = cv, params = " ", features = features_train, labels = labels_train ) data = classfier_to_send(classfier_name, clf, train_sizes, train_scores, test_scores) post.send("http://naos-software.com/dataprocessing/rest-api","/classifiers","",data) return data
def test_features2(features, num_rounds, file): """ Tests the (homemade)classifier on a set of features, returning it's precision """ correct = {} for a in authors: correct[a] = 0 runs = 0 print "aantal features:" + str(len(features)) print "testfeature:" + str(features) for i in range(0, num_rounds): start = time() data = split_train_test_data(authors, corp, 45) testdata = data["test"] traindata = data["train"] if file == "": trained_model = train(traindata, authors, features) print "model trained in:" + str(time() - start) + "seconds" else: trained_model = getfromfile(file)[1] writetofile((features, trained_model), "classifier2.c") print "trained model extracted from" + file print "number of runs:" + str(len(testdata)) winsound.Beep(2000, 500) print "starting with classifications..." for j in range(0, len(testdata)): start = time() if classify(testdata[j][0], trained_model, features, authors, traindata) == testdata[j][1]: correct[testdata[j][1]] += 1 runs += 1 else: runs += 1 print "runtime:" + str(time() - start) print "runs:" + str(runs) totalcorrect = 0 for a in authors: totalcorrect += correct[a] print "correct:" + str(totalcorrect) return float(totalcorrect) / runs
def old_user(): if request.method == 'POST': id = request.form['id'] print id UPLOAD_FOLDER = '/home/ankush/openface/training-images/' + id app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER file = request.files['file'] filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) for i in range(20): shutil.copy(os.path.join(app.config['UPLOAD_FOLDER'], filename), os.path.join(app.config['UPLOAD_FOLDER'], filename.split('.')[0] + str(i) + '.jpg')) import alignImages output = alignImages.alignMain("align") import creatingcsv creatingcsv.csv() import classifier output = classifier.train('/home/ankush/openface/generated-embeddings') return jsonify(output)
def training_image(): if request.method == 'POST': user = request.form['user'] print user cursor = db.cursor() sql = "INSERT INTO data (user_name) VALUES ('%s')" % (user) sql1 = "SELECT user_id FROM data WHERE user_name = ('%s')" % (user) cursor.execute(sql) db.commit() cursor.execute(sql1) results = cursor.fetchall() for row in results: ids = int(row[0]) print ids path = os.makedirs('/home/ankush/openface/training-images/' + str(ids)) UPLOAD_FOLDER = '/home/ankush/openface/training-images/' + str(ids) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER file = request.files['file'] filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) for i in range(30): shutil.copy(os.path.join(app.config['UPLOAD_FOLDER'], filename), os.path.join(app.config['UPLOAD_FOLDER'], filename.split('.')[0] + str(i) + '.jpg')) import alignImages output = alignImages.alignMain("align") import creatingcsv creatingcsv.csv() import classifier output = classifier.train('/home/ankush/openface/generated-embeddings') return jsonify(output)
from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import f1_score from sklearn.externals import joblib #features_train, labels_train, = wtf.getArrays() features_train, labels_train, features_test, labels_test = prep_terrain_data.makeTerrainData() #for (i, feature) in enumerate(features_test): # print (i, len(feature)) #labels_train[0] = 1 # fit the model cv = cross_validation.ShuffleSplit(len(features_train), n_iter=50, test_size=0.1, random_state=0) clf = train("svm", train_sizes=np.linspace(.1, 1.0, 20),cv = cv, params = " ", features= features_train, labels=labels_train ) #pred, accuracy, recall, precision = test(clf, features_test, labels_test) plt.show() #clf.fit(features_train, labels_train) #pred = clf.predict(features_test) from sklearn.metrics import accuracy_score #acc = accuracy_score(pred, labels_test)d #print (" accuracy: ", acc) #print(classification_report(labels_test, pred))
import numpy as np import matplotlib.pyplot as pyplot import h5py import scipy from PIL import Image from scipy import ndimage import cv2 import random import json # importing classifier resources import classifier # fetch data from model after training result = classifier.train() def format_results(): # static array variants APPLE = [1, 0, 0] ORANGE = [0, 1, 0] BANANA = [0, 0, 1] # misc printing data correct_counter = 0 show_prediction = True for i in range(result["prediction_training_labels"].shape[1]): guess_array = result["prediction_training_labels"][:, i].astype(int) correct_label = result["original_training_labels"][i]
dest='non_overlap_chunk_size', action='store', default=10) parser.add_argument('-umm', '--use-min-max', dest='use_min_max', action='store_true', default=False) parser.add_argument('features_dir', action='store', type=str, help='Path to directory where feature files are stored') parser.add_argument('output_dir', action='store', type=str, help='Path to directory where output files will be stored') parser.add_argument('fold_num', action='store', type=int, help='Fold ordinal to train/test with') return vars(parser.parse_args()) if __name__ == '__main__': args = parse_arguments() train(**args)
def learn(): ds_x, ds_y = dataset.load_dataset() clsfr = classifier.create_classifier(verbose=True, layer_sizes=(100,25)) classifier.train(clsfr, ds_x[:9000], ds_y[:9000]) print(classifier.rate(clsfr, ds_x[9000:], ds_y[9000:])) classifier.dump_classifier(clsfr, '100x25')
def train(): callback = request.GET.get('callback') classifier_name = request.GET.get('classifier_name') classifier_id = request.GET.get('classifier_id') user_id = request.GET.get('user_id') classifier_type = request.GET.get('classifier_type') classifier_params = request.GET.get('classifier_params') cross_validation_type = request.GET.get('cross_validation_type') cross_validation_params = request.GET.get('cross_validation_params') result_test_classifiers_id = request.GET.get('result_test_classifiers_id') collection_id = request.GET.get('collection_id') vectorized_document_collection_id = request.GET.get('vectorized_document_collection_id') train_size = request.GET.get('train_size') #data = classifier_to_send(user_id, classifier_name, classifier_params, "", "", 1) #post.send("http://localhost:8080/dataprocessing/rest-api/classifiers/",data) print("Params :") print(classifier_name) print(classifier_type) print(classifier_params) print(cross_validation_type) print(cross_validation_params) print(collection_id) print(train_size) clf = classifier.configure_classifier(classifier_type,classifier_params) #features_train, labels_train = makeTerrainData(n_points=200) features_train, labels_train, features_test, labels_test = makeTerrainData() #print(features_train) if(cross_validation_type == 'None') : cross_validation_type = None cv = classifier.configure_cross_validation(cross_validation_type,cross_validation_params, n = len(features_train)) print("features_train :") print(len(features_train)) print("labels_train :") print(len(labels_train)) clf.fit(features_train, labels_train) fig = classifier.train(clf, train_sizes = np.linspace(.1, 1.0,train_size), cv = cv, params = " ", features = features_train, labels = labels_train ) imgdata = StringIO() fig.savefig(imgdata, format='svg') imgdata.seek(0) # rewind the data svg_dta = imgdata.getvalue() # this is svg data import pickle s = pickle.dumps(clf) print("classifier dump:") #print(s) data = classifier_to_send(user_id = user_id, name = classifier_name, vectorizedDocumentCollectionId= vectorized_document_collection_id, parameter = classifier_params, learningCurve = svg_dta, content = s, flag = 1) put.send("http://localhost:8080/dataprocessing/rest-api/classifiers/",classifier_id, data) pred = clf.predict(features_train) from sklearn.metrics import accuracy_score acc = accuracy_score(labels_train, pred) precision = precision_score(labels_train, pred) recall = recall_score(labels_train, pred) print("result_test_classifiers_id: " + result_test_classifiers_id ) data = test_data_to_send(id_result_test_classifier = result_test_classifiers_id, user_id = user_id, classifierId = classifier_id, vectorizedDocumentCollectionId = vectorized_document_collection_id, parameter = " ", precision = precision, accuracy = acc, recall = recall) print(data) put.send("http://localhost:8080/dataprocessing/rest-api/resultTestClassifiers/", result_test_classifiers_id, data) return '{0}({1})'.format(callback, {'a':1, 'b':2})
def main(args): """Calls training and other functions. Needs 5 arguments. """ word_file = args[0] training_file = args[1] validation_file = args[2] test_file = args[3] out_path = args[4] words = {} with open(word_file, "r") as f: i = 0 for line in f: line = line.split() words[i] = line[0] i += 1 dimension = len(words) training_set = parse_input(training_file, dimension) validation_set = parse_input(validation_file, dimension) test_set = parse_input(test_file, dimension) # first part w, w0, error = train(training_set, dimension, 0.) emp_error = calc_num_wrong(w, w0, training_set, dimension) print_weights(out_path + "tezine1.dat", w, w0, error, emp_error) # second part l = [0.1, 1., 5., 10., 100., 1000.] with open(out_path + "optimizacija.dat", "w") as f: best_error = error best_w = w best_w0 = w0 num_wrong = calc_num_wrong(w, w0, validation_set, dimension) optimal = 0. f.write("\u03BB" + " = " + str(0) + ", " + str(num_wrong) + "\n") for lambda_ in l: w, w0, error = train(training_set, dimension, lambda_) num = calc_num_wrong(w, w0, validation_set, dimension) # if lambda_ == 1.: # output_predictions(out_path + "pred_proba.dat", validation_set, w, w0) # top_five = w.argsort()[-5:][::-1] # with open(out_path + "rijeci_proba.txt", "w") as f2: # for x in top_five: # f2.write(words[x] + "\n") f.write("\u03BB" + " = " + str(lambda_) + ", " + str(num) + "\n") if num <= num_wrong: num_wrong = num optimal = lambda_ f.write("optimalno: " + "\u03BB = " + str(optimal) + "\n") # third part training_set.extend(validation_set) w, w0, error = train(training_set, dimension, optimal) emp_error = calc_num_wrong(w, w0, training_set, dimension) print_weights(out_path + "tezine2.dat", w, w0, error, emp_error) top_twenty = w.argsort()[-20:][::-1] with open(out_path + "rijeci.txt", "w") as f: for x in top_twenty: f.write(words[x] + "\n") output_predictions(out_path + "ispitni-predikcije.dat", test_set, w, w0)
import classifier as c import operator import math nb, vocab = c.train() prob = nb[1] wordprob=[[],[]] wordprob[0] = sorted(prob[0].iteritems(), key=operator.itemgetter(1)) wordprob[1] = sorted(prob[1].iteritems(), key=operator.itemgetter(1)) wordprob[0].reverse() wordprob[1].reverse() #for i in range(0, 2): # print "------------" # for j in range(1, 20): # print wordprob[i][j] logratio = [{},{}] for word in vocab: for i in range(0, 2): logratio[0][word]=math.log(prob[0][word])-math.log(prob[1][word]) logratio[1][word]=math.log(prob[1][word])-math.log(prob[0][word]) logrank = [[],[]] logrank[0] = sorted(logratio[0].iteritems(), key=operator.itemgetter(1)) logrank[1] = sorted(logratio[1].iteritems(), key=operator.itemgetter(1)) for i in range(0, 2): print "--------------" logrank[i].reverse() for j in range(1, 21): print logrank[i][j]
def main(train_timeout=5 * 60, eval_timeout=5 * 60): results = {} try: import classifier importlib.reload(classifier) except Exception as e: print(e) results["exception"] = str(e) if sys.modules.get("classifier"): del sys.modules['classifier'] return results part2xy = load_dataset_fast('FILIMDB_hidden', SCORED_PARTS) train_ids, train_texts, train_labels = part2xy['train'] print('\nTraining classifier on %d examples from train set ...' % len(train_texts)) st = time() try: with time_limit(train_timeout): params = classifier.train(train_texts, train_labels) except (TimeoutException, ValueError, Exception) as e: del sys.modules['classifier'] print(e) if isinstance(e, TimeoutException): results["train_time"] = train_timeout results["exception"] = str(e) return results train_time = time() - st results["train_time"] = train_time print('Classifier trained in %.2fs' % train_time) allpreds = [] for part, (ids, x, y) in part2xy.items(): print('\nClassifying %s set with %d examples ...' % (part, len(x))) st = time() try: with time_limit(eval_timeout): preds = classifier.classify(x, params) except (TimeoutException, ValueError) as e: del sys.modules['classifier'] if isinstance(e, TimeoutException): print("Timeout on evaluating %s set!" % part) results["eval_on_%s_set_time" % part] = eval_timeout else: print(e) results["exception"] = str(e) return results eval_time = time() - st results["eval_on_%s_set_time" % part] = eval_time print('%s set classified in %.2fs' % (part, eval_time)) allpreds.extend(zip(ids, preds)) if y is None: print('no labels for %s set' % part) else: acc = score(preds, y) results["eval_on_%s_set_acc" % part] = acc del sys.modules['classifier'] return results
def command(command): """ This function will take a command and execute the command given, or else it will tell the user the command doesnt exist :param command: a string with your command """ help = """ Available commands are: predict - Attempts to predict a review by the user run - allow you to run different parts of the program exit - exits the program clear - clears the window help - shows the different commands available wordcount - Will show how many times a word shows up in the trainingdata, type in the word in the next input stopwords - learn more about stopwords setpath - allows you to set the path to the directory that contains the data topwords - this will list the most common positive or negative words candidates - list the people that contributed to the assignment and how they contributed """ stop_words_info = "Stop words are words that doesn't have any negative or positive meaning.\n"\ "It can be helpful to use stopwords to remove data that shouldn't impact the prediction.\n"\ "It can help performance and has an impact on the result." stop_word_commands = """ Available commands are: help / commands - lists the commands info - lists info about stopwords back - go back to prevous section clear - clear the window listwords - lists the stopwords """ if command == "exit" or command == "close" or command == "stop": quit() elif command == "topwords": is_a_number = False number_of_words = None while not is_a_number: number_of_words = input("How many words do you want to see?\n") try: number_of_words = int(number_of_words) is_a_number = True except Exception as e: print("Please enter a number.") pass classifier.train() common_pos_words = data_handler.get_common_words( classifier.pos_words_dict, number_of_words) print("\nPositive words...") for item in common_pos_words: print(item) print("\nNegative words...") common_neg_words = data_handler.get_common_words( classifier.neg_words_dict, number_of_words) for item in common_neg_words: print(item) elif command == "wordcount": done = False while not done: # You can keep trying different words until you type back word = input("Type in the word: ") if word == "back": done = True return # return to "main menu" data = classifier.train() pos_fr = data["pos_words_dict"] neg_fr = data["neg_words_dict"] print(word, " was found ", data_handler.get_specific_word(pos_fr, word), " times in the positive reviews\n") print(word, " was found ", data_handler.get_specific_word(neg_fr, word), " times in the negative reviews\n") elif command == "setpath": main.set_path() elif command == "run": done = False clear_window() while not done: #TODO finish commands user_input = input( "Which function do you want to run? Use the numbers to select. Type back to return\n" "1 - train - This will attempt to load the preprocessed training data from the file, if it can't it will process it and save it as a file \n" "2 - load test data - this will load the test data from the file test.data if possible, if it can't it will process the test data and save it as test.data\n" "3 - predict the test reviews - This will attempt to predict the test reviews\n" "4 - Predict test review with stopwords - This will attempt to predict the test reviews while using stopwords\n" "5 - cleanup - This will remove all files created by this program\n" "back - Return back to main menu\n" ) user_input = user_input.lower() print("Running ", user_input) if user_input == "1": classifier.train() print("Classifier is ready.") elif user_input == "2": classifier.load_test_dataset() print("Test data is ready.") elif user_input == "3": testing.test_predict_test_dataset() elif user_input == "4": testing.test_predict_test_dataset_with_stopwords() elif user_input == "5": data_handler.cleanup_files() elif user_input == "back": done = True print("Returning to previous section...") else: print("Couldn't run ", user_input, " Maybe you spelled it wrong?\n") elif command == "predict": done = False while not done: user_input = input("Enter your review or back to return: ") if user_input.lower() == "back": done = True return print("Attempting to predict...") print("Your input was: " + user_input + "\n") result = classifier.predict_input(user_input) print(result[0]) print(result[1]) print(result[2]) elif command == "help": print(help) elif command == "stopwords": done = False print(stop_word_commands) while not done: user_input = input( "Type a command. Type help for a list of options: ") if user_input.lower() == "info": print(stop_words_info) elif user_input == "back": print("Going back...") done = True elif user_input == "help" or user_input == "commands": print(stop_word_commands) elif user_input == "clear": clear_window() elif user_input == "listwords": stop_words = get_stop_words('english') for word in stop_words: print(word) else: print( "Did not recognize that command, type help to show a list of commands" ) elif command == "clear": clear_window() elif command == "candidates": print("The candidates are:\n") print("110 - wrote all the code") print("21 - minor testing of the code") else: # if a command that doesnt exist is typed in. print(help)
def train(file_path, classification): for line in lines(file_path): classifier.train(line, classification)
4 : "Left", 5 : "Up", 6 : "Down", 7 : "Clockwise Circle", 8 : "Counter Circle" } NOTES_LOOKUP = { 3 : G, 5 : C, 6 : A, 7 : B, 8 : D } classy = classifier.train() sock.settimeout(.5) def handle_data(data,log): float_dat = map(lambda x: float(x), data.split(",")) return [float_dat[0],float_dat[1],-float_dat[2]] log_data = False data_stream_x = [] data_stream_y = [] data_stream_z = [] count_less_than_theta = 0 print "SERVER IS LISTENING" midiout = rtmidi.MidiOut() midiout.open_virtual_port("test1") while True:
'data/lists/train_list.mat') data_frame = create_dataframe(filename_list, labels_list, annotation_list, 'data/annotation/') pickle_file(data_frame=data_frame, file_to_save='train_data.pickle') x_train = to_numpy_array(data_frame, image_shape=(224, 224), data_path='data/images/') y_train = labels_to_logical(labels_list) print('COMPILE and TRAIN MODEL') model = Models() model = model.TransferFine(top_layers=True) history, model = train(model, x_train, y_train, split=0.8, early_stopping=True, epochs=5) print('Saving: Model Architecture and Weights') model.save('save_architecture.h5') model.save_weights('save_model_weights.h5') print('TEST DATA PREP') filename_list, labels_list, annotation_list = load_matfile( 'data/lists/test_list.mat') data_frame = create_dataframe(filename_list, labels_list, annotation_list, 'data/annotation/') pickle_file(data_frame=data_frame, file_to_save='test_data.pickle') x_test = to_numpy_array(data_frame,
from sklearn.metrics import recall_score from sklearn.metrics import f1_score from sklearn import svm from sklearn.externals import joblib #features_train, labels_train, = wtf.getArrays() features_train, labels_train, features_test, labels_test = prep_terrain_data.makeTerrainData() #for (i, feature) in enumerate(features_test): # print (i, len(feature)) #labels_train[0] = 1 # fit the model cv = cross_validation.ShuffleSplit(len(features_train), n_iter=50, test_size=0.1, random_state=0) train(svm.SVC(), train_sizes=np.linspace(.1, 1.0, 10),cv = cv, params = " ", features= features_train, labels=labels_train ) #pred, accuracy, recall, precision = test(clf, features_test, labels_test) plt.show() #clf.fit(features_train, labels_train) #pred = clf.predict(features_test) from sklearn.metrics import accuracy_score #acc = accuracy_score(pred, labels_test)d #print (" accuracy: ", acc) #print(classification_report(labels_test, pred))
def command(command): """ This function will take a command and execute the command given, or else it will tell the user the command doesnt exist :param command: a string with your command """ commands = """ Available commands are: classify - Attempts to classify a review that is written by the user run - allow you to run different parts of the program exit - exits the program clear - clears the window commands / help - shows the different commands available wordcount - Will show how many times a word shows up in the trainingdata, type in the word in the next input stopwords - learn more about stopwords setpath - allows you to set the path to the directory that contains the data topwords - this will list the most common positive or negative words candidates - list the people that contributed to the assignment and how they contributed """ stop_words_info = "\nStop words are words that doesn't have any negative or positive meaning.\n"\ "These words can have a negative impact on the accuracy if they are used more in one of the types of reviews\n"\ "The way we handle stop-words is to just skip skip words that are found in the list of stop-words.\n" stop_word_commands = """ Available commands are: commands / help - lists the commands info - lists info about stopwords back - go back to prevous section clear - clear the window listwords - lists the stopwords """ if command == "exit" or command == "close" or command == "stop": quit() elif command == "topwords": is_a_number = False number_of_words = None while not is_a_number: number_of_words = input("How many words do you want to see?\n") try: number_of_words = int(number_of_words) is_a_number = True except Exception: print("Please enter a number.") pass classifier.train() common_pos_words = data_handler.get_common_words( classifier.pos_words_dict, number_of_words) print("\nPositive words...") for item in common_pos_words: print(item) print("\nNegative words...") common_neg_words = data_handler.get_common_words( classifier.neg_words_dict, number_of_words) for item in common_neg_words: print(item) elif command == "wordcount": data = classifier.train() pos_fr = data["pos_words_dict"] neg_fr = data["neg_words_dict"] while True: # You can keep trying different words until you type back word = input("Type in the word: ") if word == "back": return # return to "main menu" print(word, " was found ", data_handler.get_specific_word(pos_fr, word), " times in the positive reviews\n") print(word, " was found ", data_handler.get_specific_word(neg_fr, word), " times in the negative reviews\n") elif command == "setpath": main.set_path() elif command == "run": done = False clear_window() while not done: user_input = input( "Which function do you want to run? Use the numbers to select. Type back to return\n" "1 - classify the test reviews - This will classify the test reviews\n" "2 - classify test review with stopwords - This will classify the test reviews while using stopwords\n" "3 - cleanup - This will remove all files created by this program\n" "4 - classify training reviews - This will classify the training data\n" "5 - classify training reviews with stopwords - This will classify the training data with stopwords\n" "6 - classify training reviews with testing dataset - This will classify the training reviews, using the testing data for the classifier\n" "7 - classify training reviews with testing dataset, using stopwords - This will classify the training reviews, using the testing data for the classifier and stopwords\n" "8 - classify testing reviews with testing dataset - This will classify the testing reviews, using the testing data for the classifier\n" "9 - classify testing reviews with testing dataset, using stopwords - This will classify the testing reviews, using the testing data for the classifier and stop-words\n" "10 - all - This will run all the tests\n" "back - Return back to main menu\n" ) user_input = user_input.lower() print("Running ", user_input) if user_input == "1": print( "Attempting to classify the test reviews. This may take a while." ) testing.test_classify_test_dataset() elif user_input == "2": print( "Attempting to classify the test reviews while using stop-words. This may take a while." ) testing.test_classify_test_dataset_with_stopwords() elif user_input == "3": data_handler.cleanup_files() elif user_input == "4": print( "Attempting to classify the training reviews. This may take a while." ) testing.test_classify_train_dataset() elif user_input == "5": print( "Attempting to classify the training reviews with stopwords. This may take a while." ) testing.test_classify_train_dataset_with_stopwords() elif user_input == "6": print( "Attempting to classify training dataset while using the testing dataset for the classifier. This may take a while." ) testing.test_classify_train_dataset_with_testing_data() elif user_input == "7": print( "Attempting to classify training dataset while using the testing dataset for the classifier, while using stop-words. This may take a while." ) testing.test_classify_train_dataset_with_testing_data_with_stopwords( ) elif user_input == "8": print( "Attempting to classify testing dataset while using the testing dataset for the classifier. This may take a while." ) testing.test_classify_test_dataset_with_testing_data() elif user_input == "9": print( "Attempting to classify testing dataset while using the testing dataset for the classifier, while using stop-words. This may take a while." ) testing.test_classify_test_dataset_with_testing_data_using_stopwords( ) elif user_input == "10": line = "_____________________________________________________________________________________________________________________________" print( "Attempting to classify the test reviews. This may take a while." ) testing.test_classify_test_dataset() print(line) print( "Attempting to classify the test reviews while using stop-words. This may take a while." ) testing.test_classify_test_dataset_with_stopwords() print(line) print( "Attempting to classify the training reviews. This may take a while." ) testing.test_classify_train_dataset() print(line) print( "Attempting to classify the training reviews with stopwords. This may take a while." ) testing.test_classify_train_dataset_with_stopwords() print(line) print( "Attempting to classify training dataset while using the testing dataset for the classifier. This may take a while." ) testing.test_classify_train_dataset_with_testing_data() print(line) print( "Attempting to classify training dataset while using the testing dataset for the classifier, while using stop-words. This may take a while." ) testing.test_classify_train_dataset_with_testing_data_with_stopwords( ) print(line) print( "Attempting to classify testing dataset while using the testing dataset for the classifier. This may take a while." ) testing.test_classify_test_dataset_with_testing_data() print(line) print( "Attempting to classify testing dataset while using the testing dataset for the classifier, while using stop-words. This may take a while." ) testing.test_classify_test_dataset_with_testing_data_using_stopwords( ) print(line) elif user_input == "back": done = True print("Returning to previous section...") else: print("Couldn't run ", user_input, " Maybe you spelled it wrong?\n") elif command == "classify": classifier.train() #prepare the classifier while True: user_input = input("\nEnter your review or back to return: ") if user_input.lower() == "back": return print("Attempting to classify the review: " + user_input + "\n") result = classifier.predict_input(user_input) print(result[0]) print(result[1]) print(result[2]) elif command == "commands" or command == "help": print(commands) elif command == "stopwords": done = False print(stop_word_commands) while not done: user_input = input( "Type a command. Type help for a list of options: ") if user_input.lower() == "info": print(stop_words_info) elif user_input == "back": print("Going back...") done = True elif user_input == "commands" or user_input == "help": print(stop_word_commands) elif user_input == "clear": clear_window() elif user_input == "listwords": stop_words = get_stop_words('english') for word in stop_words: print(word) else: print( "Did not recognize that command, type commands to show a list of commands" ) elif command == "clear": clear_window() elif command == "candidates": print("The candidates are:\n") print("110 - wrote all the code and worked on the report") print("21 - testing of the code and worked on the report") else: # if a command that doesnt exist is typed in. print(commands)
import classifier import neuralpy import grapher net = neuralpy.Network(2, 8, 1) uris = [ "miller_xml/" + str(i) + ".xml" for i in range(1,13) ] epochs = 200 learning_rate = 0.05 validation_percentage = .32 ps = [] classifier.stand = "L" for i in range(0, 10): net.randomize_parameters() p = classifier.train(net, uris, epochs, learning_rate, validation_percentage, save_file='results/miller_' + str(i) + '.txt') neuralpy.output(p) ps.append(p) i = ps.index(max(ps)) neuralpy.output("\n\n" + str(max(ps)) + " at " + str(i)) grapher.graph(filepath='results/miller_' + str(i) + '.txt') # grapher.graph(filepath='results/miller_4.txt')