def pedestrians(data_root): ''' Return a list of bounding boxes in the format frame, bb_id, x,y,dx,dy ''' #return [[1,1,617,128,20,50]] # If the memory space is enough you can use the following function, instead of # training hog, lbp, luv separately # train_kmeans(save_path='./Models') print('Training k-means HoG...') train_kmeans_hog(save_path='./Models') print('Training k-means LBP...') train_kmeans_lbp(save_path='./Models') print('Training k-means LUV...') train_kmeans_luv(save_path='./Models') print('Building dataset...') build_dataset(save_path='./Dataset/', kmeans_path='./Models/') print('Training classifier...') train_classifier(save_path='./Models', data_path='./Dataset') print('Detecting...') sol = detect(save_path='./gt', model_path='./Models/', image_path=data_root) return sol
def opt_C(cvals, model_name, feature_name, classifier_type, set_name, train_batch_num, do_norm): import train_classifier val_batches = range(16, 20) C_res = {} for C in cvals: props = dict() props['C'] = C clf_fn_c = util.get_classifier_filename(model_name, feature_name, classifier_type, range(train_batch_num), set_name=set_name, do_norm=do_norm, C=C) if os.path.isfile(clf_fn_c): norm_fn = util.get_norm_filename(model_name, feature_name, 0, set_name) C_res[str(C)] = predict.get_accuracy_for_sets( model_name, feature_name, set_name, val_batches, norm_fn, clf_fn_c, do_norm) print 'Acc %2.2f in %s' % (C_res[str(C)] * 100, clf_fn_c) else: C_res[str(C)] = train_classifier.train_classifier( model_name, feature_name, classifier_type, props, set_name, train_batch_num, do_norm, val_batches) best_C = int(max(C_res.iteritems(), key=op.itemgetter(1))[0]) print 'Optimal C Value: ' + str(best_C) + ', ' + str( C_res[str(best_C)]) + ' accuracy' return best_C
def main(): messages = get_messages(min_word_length=2, remove_stopwords=True, adjust_self=True) train_classifier.compute_train_test(messages, top_features=5000, only_ngrams=False) classifier = train_classifier.train_classifier() dump_classifier(classifier) dump_word_features(train_classifier.word_features) print_classifier_stats(classifier)
'max_iter': 200, 'shuffle': True, # only used when solver='sgd' or 'adam' 'warm_start': False, 'momentum': 0.9, # only used when solver='sgd' 'max_fun': 15000, # only used when solver='lbfgs' } ############## # Processing # ############## # Document vectors and labels vectors, labels = writer2vec(train_data, train_labels, outfiles=embedding_file, **writer2vec_params) # Fraction select (50% of 90% for positive, 25% of 90% for each negative) vectors, labels = split_combine_data(vectors, labels, seed=seed) # Flatten data train_vectors = flatten(vectors) train_labels = flatten(labels) # Train mlp = train_classifier(train_vectors, train_labels, train_test_params, **mlp_params) # Save classifier model if mlp_file: save_pickle(mlp, mlp_file)
m = y.shape[0] # add ones to X X0 = np.ones((X.shape[0], 1)) X = np.hstack((X0, X)) # get number of labels and set lambda for regularization term num_labels = y.max() L = 1 # initialize learning params alpha = 0.01 iterations = 5000 # get trained Theta matrix Theta, J_histories = train_classifier(X, y, num_labels, L, iterations, alpha) # plot J_histories to make sure gradient descent worked for i in range(0, num_labels): plt.figure('J_history ' + str(i + 1)) plt.plot(range(iterations), J_histories[i]) # run predictions and calculate accuracy z = X.dot(Theta.T) h = sigmoid(z) # calculate accuracy of classifier predicted_classes = h.argmax( axis=1 ) + 1 # plus one because this returns a zero indexed array but classes start with 1 err = np.count_nonzero(predicted_classes - y)
1976 c++ """ ################################ MultiLabel Classifier ################################### #Multiple tags, transform labels in a binary form and the prediction will be a mask of 0s and 1s. For this purpose it is convenient to use [MultiLabelBinarizer] from sklearn.preprocessing import MultiLabelBinarizer mlb = MultiLabelBinarizer(classes=sorted(tags_counts.keys())) #mlb.fit_transform(data) means that generate a vector and the length will be equal to classes' length, 0 and 1 represents whether the data has those tags. y_train = mlb.fit_transform(y_train) y_validation = mlb.fit_transform(y_validation) #print(mlb) ######################################## Train Classifier ########################################### import train_classifier classifier_bag = train_classifier.train_classifier(x_train_bag, y_train) classifier_tfidf = train_classifier.train_classifier(x_train_tfidf, y_train) ########### Apply the classifier on the validation data to predict and get the score ########### y_val_predicted_labels_bag = classifier_bag.predict(x_validation_bag) y_val_predicted_scores_bag = classifier_bag.decision_function(x_validation_bag) y_val_predicted_labels_tfidf = classifier_tfidf.predict(x_validation_tfidf) y_val_predicted_scores_tfidf = classifier_tfidf.decision_function( x_validation_tfidf) # Take a look at the performance of the classifier # Transform 0 1 vectors to tags in order to compare the tags y_val_pred_inversed = mlb.inverse_transform(y_val_predicted_labels_tfidf) y_val_inversed = mlb.inverse_transform(y_validation)
from count_trigrams import count_trigrams from train_classifier import train_classifier from collections import defaultdict # We train the classifier here default_lang_counts = train_classifier('train.csv') def score_document(document, lang_counts=default_lang_counts): file = document lines = ''.join([line for line in file]) ans = count_trigrams(lines) cross_product = defaultdict(float) for lan in lang_counts: for key in ans: cross_product[lan] += ans[key] * lang_counts[lan][key] return cross_product
from classify import classify from eval_classification import eval_classification from eval_classification import plot_confusion_matrix import warnings warnings.filterwarnings("ignore") #Extraccio dels parametres params=get_params() #Creacio de la base de dades params['split']='train' build_database(params) params['split']='val' build_database(params) #Extraccio de les caracteristiques get_features(params) #Entrenem un model de classificacio train_classifier(params) #Classificacio classify(params) #Avaluacio de la classificacio f1, precision, recall, accuracy,cm, labels = eval_classification(params) print "Mesures:\n" print f1 print "-F1:", np.mean(f1) print "-Precision:", np.mean(precision) print "-Recall:", np.mean(recall) print "-Accuracy:", accuracy print "-Confusion matrix:\n", cm plot_confusion_matrix(cm, labels,normalize = True)
def train(detector, traindata, target): train_classifier(detector, traindata, target)
'--hidden_units', nargs=3, help='Number of neurons in the two hidden layers in the classifier', default=[4096, 2048, 1024], type=int) parser.add_argument('--epochs', default=30, type=int) parser.add_argument('--gpu', action="store_true", default=False) args = parser.parse_args() state_dict_checkpoint = 'state_dict_checkpoint.pt' image_datasets = get_image_datasets(args.data_directory) dataloaders = get_dataloaders(image_datasets) model_arch = get_model_arch_from_model_name(args.arch) model = get_model(model_arch, args.hidden_units) device = 'cuda' if torch.cuda.is_available() and args.gpu else 'cpu' print("Running in {}".format(device)) train_classifier(model, device, dataloaders, lr=args.learning_rate, epochs=args.epochs, model_checkpoint=state_dict_checkpoint) model.load_state_dict(torch.load(state_dict_checkpoint)) model.class_to_idx = image_datasets['train'].class_to_idx store_checkpoint(model, model_arch, checkpoint_path=args.save_dir)
#classify incoming tweet import train_classifier as tc import date_time_ex as dte import read import relevant_term as rt import location_tagger as lt #Run CLassifier classpath = "CSV/classsified.csv" classifying = tc.train_classifier(classpath) #Extract New Tweet include_columns = [1,6,7] filepath = "CSV/u_NYC_DOT_t_1449992629_TMLineTwt.csv" tweets = read.file_list(filepath,include_columns) #Tokenize - Find Feature - Classify - Extract if relevant working_tweet = tc.tweet_Tokenizer(tweets[1][1]) relevance = classifying[0].classify(tc.find_features(working_tweet,classifying[1])) if(relevance == 'Relevant'): date_time = dte.date_time_extract(tweets[1][0:2]) # summary = rt.ExtractRelWordfromText(tweets[1]) locations = lt.tweet_location(tweets[1]) ##tweet = [] ##for i in range(0,len(tweetR)):
return 0 def printOutput(): dict_data = getFormattedOutput() for dt in dict_data: rt.TRACE(dt) event_details = dict_data[dt].split(" ; ") for evnt in event_details: temp = evnt.split("|") rt.TRACE("Location : ",temp[1]) rt.TRACE("Time : ", temp[2]) rt.TRACE("Event Tag",temp[0]) #Run CLassifier classpath = "classified.csv" classifying = tc.train_classifier(classpath) #Extract New Tweet include_columns = [1,6,7] filepath = "u_NYC_DOT_t_1449992629_TMLineTwt.csv" tweets = read.file_list(filepath,include_columns) #Tokenize - Find Feature - Classify - Extract if relevant with open('final2.csv','wb') as f: writer = csv.writer(f) for row in tweets: #rt.TRACE(row[0]) #rt.TRACE(row[1]) #rt.TRACE(row[2]) working_tweet = tc.tweet_Tokenizer(row[1])
#!/usr/bin/env python # -*- coding: utf-8 -*- __author__ = 'policecar' import train_classifier as train import predict_class as predict train.train_classifier( recompute_feats=True ) predict.predict_class( test_file='test/private_leaderboard.csv', recompute_feats=True )
def main(): """Run main training/test pipeline.""" data_path = "/projects/katefgroup/viewpredseg/art/" if not osp.exists(data_path): data_path = 'data/' # or change this if you work locally # Parse arguments argparser = argparse.ArgumentParser() argparser.add_argument("--im_path", default=osp.join(data_path, "wikiart/")) argparser.add_argument("--checkpoint_path", default=osp.join(data_path, "checkpoints/")) argparser.add_argument("--checkpoint", default="classifier.pt") argparser.add_argument("--epochs", default=50, type=int) argparser.add_argument("--batch_size", default=128, type=int) argparser.add_argument("--lr", default=1e-3, type=float) argparser.add_argument("--wd", default=1e-5, type=float) argparser.add_argument("--langevin_steps", default=20, type=int) argparser.add_argument("--langevin_step_size", default=10, type=float) argparser.add_argument("--ebm_log_fps", default=6, type=int) argparser.add_argument("--run_bin_classifier", action='store_true') argparser.add_argument("--run_classifier", action='store_true') argparser.add_argument("--run_generator", action='store_true') argparser.add_argument("--run_manipulator", action='store_true') argparser.add_argument("--run_transformations", action='store_true') argparser.add_argument("--run_colorizer", action='store_true') argparser.add_argument("--emot_label", default=None) args = argparser.parse_args() args.classifier_ckpnt = osp.join(args.checkpoint_path, args.checkpoint) args.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' os.makedirs(args.checkpoint_path, exist_ok=True) # Data loaders for classification data_loaders = { mode: DataLoader(ArtEmisDataset(mode, args.im_path, emot_label=args.emot_label, im_size=224 if args.run_classifier else 64), batch_size=args.batch_size, shuffle=mode == 'train', drop_last=mode == 'train', num_workers=0) for mode in ('train', 'test') } # Train classifier # Emotion labels # {'amusement': 0, 'anger': 1, 'awe': 2, 'contentment': 3, 'disgust': 4, # 'excitement': 5, 'fear': 6, 'sadness': 7, 'something else': 8} if args.run_classifier: model = ResNetClassifier(num_classes=len( data_loaders['train'].dataset.emotions), pretrained=True, freeze_backbone=True, layers=34) model = train_classifier(model.to(args.device), data_loaders, args) eval_writer = SummaryWriter('runs/classifier_eval') eval_classifier(model, data_loaders['test'], args, eval_writer) # Train binary classifier if args.run_bin_classifier: model = ResNetClassifier(num_classes=1, pretrained=True, freeze_backbone=True, layers=18) model = train_bin_classifier(model.to(args.device), data_loaders, args) eval_writer = SummaryWriter('runs/bin_classifier_eval') eval_bin_classifier(model, data_loaders['test'], args, eval_writer) # Train generator if args.run_generator: model = ResNetEBM(pretrained=False, freeze_backbone=False, layers=18) model = train_generator(model.to(args.device), data_loaders, args) eval_generator(model.to(args.device), data_loaders['test'], args) # Train manipulator if args.run_manipulator: model = ResNetEBM(pretrained=False, freeze_backbone=False, layers=34) model = train_manipulator(model.to(args.device), data_loaders, args) eval_manipulator(model.to(args.device), data_loaders['test'], args) # Train transformations if args.run_transformations: model = ResNetEBM(pretrained=True, freeze_backbone=False, layers=18) model = train_transformations(model.to(args.device), data_loaders, args) eval_transformations(model.to(args.device), data_loaders['test'], args) # Train colorizer if args.run_colorizer: model = ResNetEBM(pretrained=True, freeze_backbone=False, layers=18) model = train_colorizer(model.to(args.device), data_loaders, args) eval_colorizer(model.to(args.device), data_loaders['test'], args)
pix_per_cell = 8 # HOG pixels per cell cell_per_block = 2 # HOG cells per block hog_channel = "ALL" # Can be 0, 1, 2, or "ALL" spatial_size = (32, 32) # Spatial binning dimensions hist_bins = 32 # Number of histogram bins spatial_feat = True # Spatial features on or off hist_feat = True # Histogram features on or off hog_feat = True # HOG features on or off ystart = 400 # Min and max in y to search find_cars() ystop = 656 scale = 1.5 ################################### TRAIN MODEL ################################### train_classifier(color_space, spatial_size, hist_bins, orient, pix_per_cell, cell_per_block, hog_channel, spatial_feat, hist_feat, hog_feat) ################################### MULTI-DETECTION SEARCH ################################### img = mpimg.imread('test_images/test6.jpg') with open('model.p', 'rb') as f: model = pickle.load(f) svc = model['svc'] X_scaler = model['X_scaler'] def process_image(img): image_copy = np.copy(img)
from classify import classify from eval_classification import eval_classification from eval_classification import plot_confusion_matrix import warnings warnings.filterwarnings("ignore") #Extraccio dels parametres params = get_params() #Creacio de la base de dades params['split'] = 'train' build_database(params) params['split'] = 'val' build_database(params) #Extraccio de les caracteristiques get_features(params) #Entrenem un model de classificacio train_classifier(params) #Classificacio classify(params) #Avaluacio de la classificacio f1, precision, recall, accuracy, cm, labels = eval_classification(params) print "Mesures:\n" print f1 print "-F1:", np.mean(f1) print "-Precision:", np.mean(precision) print "-Recall:", np.mean(recall) print "-Accuracy:", accuracy print "-Confusion matrix:\n", cm plot_confusion_matrix(cm, labels, normalize=True)
print "Error:", error.text print "Mission running ", agent.run(agent_host, learner) print "Traveled:", agent.loops - 1 agent.clear() print "Mission ended\n" time.sleep(2) print "Done" def create_agent(): agent = VisualAgent() return agent if __name__ == '__main__': sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # flush print output immediately my_agent_host = create_def_objs() agent = create_agent() choice = raw_input('input lr for saving a model and ld to load a model: ') if choice == "lr": learner = train_classifier.train_classifier() filename = 'finalized_model_10.sav' joblib.dump(learner, filename) elif choice == "ld": learner = joblib.load('finalized_model_10.sav') start_mission(my_agent_host, agent, learner)