def main(dataset_path, working_dir, testing_path, testing_working_dir, dimensions, batch_size, number_classes, epochs): def generator_augmented(): while True: while not dataset_loader.done(): x, y = dataset_loader.get_training_batch() gen = datagen.flow(x, y, batch_size=batch_size) x_augmented, y_augmented = next(gen) yield np.concatenate((x, x_augmented), axis=0), np.concatenate( (y, y_augmented), axis=0) dataset_loader.reset() def generator(): while True: while not dataset_loader.done(): x, y = dataset_loader.get_training_batch() yield x, y dataset_loader.reset() # model = alexnet(dimensions, number_classes).get_model() model = load_model() reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=0.001) sgd_optimizer = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.0005) dataset_loader = imagerecognition.dataset_loader(dataset_path, working_dir, dimensions, batch_size) datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, vertical_flip=True) model.compile(sgd_optimizer, loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator(generator(), steps_per_epoch=dataset_loader.length / batch_size, epochs=epochs, callbacks=[reduce_lr]) dataset_loader.delete_from_disk() save_model(model) dataset_loader = imagerecognition.dataset_loader(testing_path, testing_working_dir, dimensions, batch_size) print('Testing...') consolidated_images, consolidated_labels = dataset_loader.get_training_batch( ) while not dataset_loader.done(): temp_images, temp_labels = dataset_loader.get_training_batch() consolidated_images = np.concatenate( (consolidated_images, temp_images), axis=0) consolidated_labels = np.concatenate( (consolidated_labels, temp_labels), axis=0) print(model.evaluate(consolidated_images, consolidated_labels)) dataset_loader.delete_from_disk()
def step1(data, args): print '### STEP 1: Train for classification task' pretrained_snapshot_fname = 'model_best_accuracy.th' train_loader, val_loader, test_loader = data n_samples_train = len(train_loader.dataset) n_samples_val = len(val_loader.dataset) n_samples_test = len(test_loader.dataset) num_classes = len(set(val_loader.dataset.target_tensor)) model = eval(args.modelArch)(num_classes=num_classes) best_val_acc = None test_acc = None # try to load pretrained model if step 1 has already been executed saved_model = load_model(model, pretrained_snapshot_fname, args) if saved_model is not None: print 'Loading pretrained model:', pretrained_snapshot_fname model = saved_model model.cuda() else: # else train a new model print 'Training a new model ...' logfile = open(os.path.join(args.workDir, 'log.txt'), 'wb') model.cuda() optimizer = torch.optim.Adam(model.parameters(), args.learningRate) since = time.time() for epoch in trange(1, args.nEpochs + 1, desc='Epochs'): avg_loss = train(model, optimizer, epoch, train_loader, logfile, args) val_loss, val_acc, n_correct = evaluate(model, val_loader, args) if best_val_acc is None or best_val_acc < val_acc: best_val_acc = val_acc tqdm.write('Snapshotting best model: ' + pretrained_snapshot_fname) save_model(model, pretrained_snapshot_fname, args) logline = 'Epoch {:3d}/{}] train_avg_loss = {:.4f}, val_avg_loss = {:.4f}, val_accuracy = {}/{} ({:.2f}%, Best: {:.2f}%)' tqdm.write(logline.format(epoch, args.nEpochs, avg_loss, val_loss, n_correct, n_samples_val, val_acc, best_val_acc)) time_elapsed = time.time() - since print 'Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60) model = load_model(model, pretrained_snapshot_fname, args) # TESTING ----------------- if not args.skipTest: test_loss, test_acc, n_correct = evaluate(model, test_loader, args) logline = 'TEST] test_avg_loss = {:.4f}, test_accuracy = {}/{} ({:.2f}%)' print logline.format(test_loss, n_correct, n_samples_test, test_acc) return model, {'BestValAccuracy': best_val_acc, 'TestAccuracy': test_acc}
def train(args, data, model): # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True config = config_model(args) with tf.Session(config=tf_config) as sess: best_f1 = 0 model = create_model(sess, NERModel, args.output_dir, config, data, logger) logger.info("start training") for epoch in range(1, 1 + args.epochs): loss = [] random.shuffle(data.train_Ids) batch_size = args.batch_size train_num = len(data.train_Ids) total_batch = train_num // batch_size + 1 for batch_id in range(total_batch): start = batch_id * batch_size end = (batch_id + 1) * batch_size if end > train_num: end = train_num instance = data.train_Ids[start:end] # train_Ids if not instance: continue # batchify_with_label #gazs, word_seq_tensor, word_seq_lengths, biword_seq_tensor, word_seq_lengths, label_seq_tensor, layer_gaz_tensor, gaz_count_tensor, gaz_mask_tensor, mask _, batch_word, batch_biword, batch_wordlen, batch_label, layer_gaz, gaz_count, gaz_mask, mask = batchify_with_label( instance) batch = (batch_word, batch_biword, batch_wordlen, batch_label, layer_gaz, gaz_count, gaz_mask, mask, True) step, batch_loss = model.run_step(sess, batch, True) # print(step) loss.append(batch_loss) train_log = {'loss': np.mean(loss)} loss = [] eval_log, class_info = evaluate(sess, args, model, data) logs = dict(train_log, **eval_log) show_info = f'\nEpoch: {epoch} - ' + "-".join( [f' {key}: {value:.4f} ' for key, value in logs.items()]) logger.info(show_info) if logs['eval_f1'] > best_f1: logger.info( f"\nEpoch {epoch}: eval_f1 improved from {best_f1} to {logs['eval_f1']}" ) logger.info("save model to disk.") best_f1 = logs['eval_f1'] save_model(sess, model, args.output_dir, logger) print("Eval Entity Score: ") for key, value in class_info.items(): info = f"Subject: {key} - Acc: {value['acc']} - Recall: {value['recall']} - F1: {value['f1']}" logger.info(info)
def main(argv): args = argparser().parse_args(argv[1:]) positive = load_conllu(args.positive) negative = load_conllu(args.negative) posf = featurize_documents(positive) negf = featurize_documents(negative) vecf = DictVectorizer() X = vecf.fit_transform(posf + negf) Y = ['pos'] * len(posf) + ['neg'] * len(negf) clf = LinearSVC(C=1.0) clf.fit(X, Y) save_model(args.model, clf, vecf) return 0
def train(args, NERModel, processor): train_dataset = load_and_cache_examples(args, processor, data_type='train') train_manager = BatchManager(data=train_dataset, batch_size=args.batch_size, vocab=processor.vocab, label2id=args.label2id, shuffle=True) # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True config = config_model(args) config['vocab_size'] = len(processor.vocab) loss = [] with tf.Session(config=tf_config) as sess: model = create_model(sess, NERModel, args.output_dir, config, logger) logger.info("start training") best_f1 = 0 for epoch in range(1, 1 + args.epochs): train_manager.reset() for batch in train_manager.iter_batch(shuffle=True): step, batch_loss = model.run_step(sess, True, batch) loss.append(batch_loss) train_log = {'loss': np.mean(loss)} loss = [] eval_log, class_info = evaluate(sess, args, model, processor) #! logs = dict(train_log, **eval_log) show_info = f'\nEpoch: {epoch} - ' + "-".join( [f' {key}: {value:.4f} ' for key, value in logs.items()]) logger.info(show_info) # scheduler.epoch_step(logs['eval_f1'], epoch) if logs['eval_f1'] > best_f1: logger.info( f"\nEpoch {epoch}: eval_f1 improved from {best_f1} to {logs['eval_f1']}" ) logger.info("save model to disk.") best_f1 = logs['eval_f1'] save_model(sess, model, args.output_dir, logger) print("Eval Entity Score: ") for key, value in class_info.items(): info = f"Subject: {key} - Acc: {value['acc']} - Recall: {value['recall']} - F1: {value['f1']}" logger.info(info)
def main(argv): args = argparser().parse_args(argv[1:]) examples = load_examples(args.data) vecf = TfidfVectorizer(analyzer='word', token_pattern=r'\S+', lowercase=False, ngram_range=(1, 3)) texts = [e.text for e in examples] X = vecf.fit_transform(texts) Y = [e.class_ for e in examples] clf = LinearSVC(C=1.0) clf.fit(X, Y) save_model(args.model, clf, vecf) return 0
acc_score = metrics.accuracy_score(y_eval, pred) print('Accuracy =', acc_score * 100) reca_score = metrics.recall_score(y_eval, pred) print('Recall =', reca_score * 100) prec_score = metrics.precision_score(y_eval, pred) print('Precision =', prec_score * 100) f1_score = metrics.f1_score(y_eval, pred) print('F-score =', f1_score) # write files print('===== write files =====') print('saving model "{}"...'.format(os.path.basename(options.model_path))) common.save_model(model=model, path=options.model_path) print('saving epochs log "{}"...'.format(os.path.basename(options.log_path))) common.save_log(fit_result=result, path=options.log_path) epochs = len(result.history['val_acc']) best_epoch = result.history['val_acc'].index(max( result.history['val_acc'])) + 1 print('saving history statistics "{}"...'.format( os.path.basename(options.statistics_path))) common.save_statistics( ann_name=ANN_NAME, path=options.statistics_path, entries={ 'step_size': options.step_size,
'reg': reg, 'batchsize': batch_size, } model_file, loss_file, time_file = make_filenames( path, [optimizer, loss_type], params, ) with TimeThis(time_file, params): w, perf_logger = SGD(X, y, Xv, yv, Xt, yt, epochs, batch_size, optimizer, loss_type, params) # Save model and loss data save_model(model_file, w) perf_logger.save(loss_file) elif optimizer == 'adam': for alpha, reg, beta1, beta2, epsilon in itertools.product( args.alpha, args.reg, args.beta1, args.beta2, args.epsilon ): params = { 'alpha': alpha, 'reg': reg, 'beta1': beta1, 'beta2': beta2, 'epsilon': epsilon, 'batchsize': batch_size, }
Classifier = load_classifier_class(options.Classifier) print 'classifier=%s' % Classifier.__dict__['__module__'] tweets = get_labelled_tweets() random.shuffle(tweets) if options.limit > 0: tweets = tweets[:options.limit] do_filter = options.filter if options.optimize: optimize_params(tweets) if options.ngrams: show_ngrams(tweets) if options.self_validate: show_self_validation(tweets) if options.cross_validate or options.show_errors: show_cross_validation(tweets, options.show_errors) if options.test_string: show_classification_details(options.test_string) if options.model: model = Classifier(tweets) common.save_model(model)
def process(params,with_predict=True,with_eval=True): logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) params['cnn']['n_out'] = int(params['dataset']['dim']) #params['cnn']['n_frames'] = int(params['dataset']['window'] * SR / float(HR)) with_metadata = params['dataset']['with_metadata'] only_metadata = params['dataset']['only_metadata'] metadata_source = params['dataset']['meta-suffix'] if with_metadata: if 'w2v' in metadata_source: X_meta = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (metadata_source,params['dataset']['dataset']))[:,:int(params['cnn']['sequence_length'])] params['cnn']['n_metafeatures'] = len(X_meta[0]) if 'meta-suffix2' in params['dataset']: X_meta2 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix2'],params['dataset']['dataset'])) params['cnn']['n_metafeatures2'] = len(X_meta2[0]) if 'meta-suffix3' in params['dataset']: X_meta3 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix3'],params['dataset']['dataset'])) params['cnn']['n_metafeatures3'] = len(X_meta3[0]) if 'meta-suffix4' in params['dataset']: X_meta4 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix4'],params['dataset']['dataset'])) params['cnn']['n_metafeatures4'] = len(X_meta4[0]) elif 'model' in metadata_source or not params['dataset']['sparse']: X_meta = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (metadata_source,params['dataset']['dataset'])) params['cnn']['n_metafeatures'] = len(X_meta[0]) if 'meta-suffix2' in params['dataset']: X_meta2 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix2'],params['dataset']['dataset'])) params['cnn']['n_metafeatures2'] = len(X_meta2[0]) if 'meta-suffix3' in params['dataset']: X_meta3 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix3'],params['dataset']['dataset'])) params['cnn']['n_metafeatures3'] = len(X_meta3[0]) if 'meta-suffix4' in params['dataset']: X_meta4 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix4'],params['dataset']['dataset'])) params['cnn']['n_metafeatures4'] = len(X_meta4[0]) else: X_meta = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (metadata_source,params['dataset']['dataset'])).todense() params['cnn']['n_metafeatures'] = X_meta.shape[1] if 'meta-suffix2' in params['dataset']: X_meta2 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix2'],params['dataset']['dataset'])) params['cnn']['n_metafeatures2'] = X_meta2.shape[1] if 'meta-suffix3' in params['dataset']: X_meta3 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix3'],params['dataset']['dataset'])) params['cnn']['n_metafeatures3'] = len(X_meta3[0]) if 'meta-suffix4' in params['dataset']: X_meta4 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix4'],params['dataset']['dataset'])) params['cnn']['n_metafeatures3'] = len(X_meta4[0]) print(X_meta.shape) else: X_meta = None config = Config(params) model_dir = os.path.join(common.MODELS_DIR, config.model_id) common.ensure_dir(common.MODELS_DIR) common.ensure_dir(model_dir) model_file = os.path.join(model_dir, config.model_id + common.MODEL_EXT) logging.debug("Building Network...") #model = build_model(config) model = build_model(config) print(model.summary()) #plot(model, to_file='model2.png', show_shapes=True) trained_model = config.get_dict() # Save model #plot(model, to_file=os.path.join(model_dir, config.model_id + PLOT_EXT)) common.save_model(model, model_file) logging.debug(trained_model["model_id"]) logging.debug("Loading Data...") with_generator = True if only_metadata: X_train, Y_train, X_val, Y_val, X_test, Y_test = \ load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"], config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, metadata_source) if 'meta-suffix2' in params['dataset']: X_train2, Y_train2, X_val2, Y_val2, X_test2, Y_test2 = \ load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"], config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix2']) X_train = [X_train,X_train2] X_val = [X_val,X_val2] X_test = [X_test,X_test2] print("X_train bi", len(X_train)) if 'meta-suffix3' in params['dataset']: X_train3, Y_train3, X_val3, Y_val3, X_test3, Y_test3 = \ load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"], config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix3']) X_train.append(X_train3) X_val.append(X_val3) X_test.append(X_test3) print("X_train tri", len(X_train)) if 'meta-suffix4' in params['dataset']: X_train4, Y_train4, X_val4, Y_val4, X_test4, Y_test4 = \ load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"], config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix4']) X_train.append(X_train4) X_val.append(X_val4) X_test.append(X_test4) print("X_train four", len(X_train)) else: if with_generator: id2gt = dict() factors = np.load(common.DATASETS_DIR+'/y_train_'+config.y_path+'.npy') index_factors = open(common.DATASETS_DIR+'/items_index_train_'+params['dataset']['dataset']+'.tsv').read().splitlines() id2gt = dict((index,factor) for (index,factor) in zip(index_factors,factors)) X_val, Y_val, X_test, Y_test, N_train = load_data_hf5_memory(params,config.training_params["validation"],config.training_params["test"],config.y_path,id2gt,X_meta,config.training_params["val_from_file"]) if params['dataset']['nsamples'] != 'all': N_train = min(N_train,params['dataset']['nsamples']) else: X_train, Y_train, X_val, Y_val, X_test, Y_test, N_train = load_data_hf5(params,config.training_params["validation"],config.training_params["test"]) trained_model["whiten_scaler"] = common.TRAINDATA_DIR+'/scaler_%s.pk' % config.x_path logging.debug("Training...") if config.model_arch["final_activation"] == 'softmax': monitor_metric = 'val_categorical_accuracy' else: monitor_metric = 'val_loss' early_stopping = EarlyStopping(monitor=monitor_metric, patience=4) if only_metadata: epochs = model.fit(X_train, Y_train, batch_size=config.training_params["n_minibatch"], #shuffle='batch', nb_epoch=config.training_params["n_epochs"], verbose=2, validation_data=(X_val, Y_val), callbacks=[early_stopping]) else: if with_generator: print(N_train) epochs = model.fit_generator(batch_block_generator(params,config.y_path,N_train,id2gt,X_meta,config.training_params["val_from_file"]), samples_per_epoch = N_train-(N_train % config.training_params["n_minibatch"]), nb_epoch = config.training_params["n_epochs"], verbose=2, validation_data = (X_val, Y_val), callbacks=[early_stopping]) else: epochs = model.fit(X_train, Y_train, batch_size=config.training_params["n_minibatch"], shuffle='batch', nb_epoch=config.training_params["n_epochs"], verbose=2, validation_data=(X_val, Y_val), callbacks=[early_stopping]) model.save_weights(os.path.join(model_dir, config.model_id + common.WEIGHTS_EXT)) logging.debug("Saving trained model %s in %s..." % (trained_model["model_id"], common.DEFAULT_TRAINED_MODELS_FILE)) common.save_trained_model(common.DEFAULT_TRAINED_MODELS_FILE, trained_model) logging.debug("Evaluating...") print(X_test[0].shape,X_test[1].shape) preds=model.predict(X_test) print(preds.shape) if params["dataset"]["evaluation"] in ['binary','multiclass']: y_pred = (preds > 0.5).astype('int32') acc = accuracy_score(Y_test,y_pred) prec = precision_score(Y_test,y_pred,average='macro') recall = recall_score(Y_test,y_pred,average='macro') f1 = f1_score(Y_test,y_pred,average='macro') print('Accuracy', acc) print("%.3f\t%.3f\t%.3f" % (prec,recall,f1)) if params["dataset"]["fact"] == 'class': good_classes = np.nonzero(Y_test.sum(0))[0] print(Y_test.shape,preds.shape) roc_auc=roc_auc_score(Y_test[:,good_classes],preds[:,good_classes]) logging.debug('ROC-AUC '+str(roc_auc)) pr_auc = average_precision_score(Y_test[:,good_classes],preds[:,good_classes]) print('PR-AUC',pr_auc) r2 = roc_auc elif params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']: r2s = [] for i,pred in enumerate(preds): r2 = r2_score(Y_test[i],pred) r2s.append(r2) r2 = np.asarray(r2s).mean() logging.debug('R2 avg '+str(r2)) # Batch prediction if X_test[1].shape == Y_test[1].shape: score = model.evaluate(X_test, Y_test, verbose=0) logging.debug(score) logging.debug(model.metrics_names) print(score) trained_model["loss_score"] = score[0] trained_model["mse"] = score[1] if params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']: trained_model["r2"] = r2 fw=open(common.DATA_DIR+'/results/train_results.txt','a') fw.write(trained_model["model_id"]+'\n') if params["training"]["loss_func"] == 'binary_crossentropy': fw.write('ROC-AUC: '+str(roc_auc)+'\n') print('ROC-AUC: '+str(roc_auc)) fw.write('Loss: '+str(score[0])+' ('+config.training_params["loss_func"]+')\n') fw.write('MSE: '+str(score[1])+'\n') elif params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']: fw.write('R2 avg: '+str(r2)+'\n') print('R2 avg: '+str(r2)) fw.write('Loss: '+str(score[0])+' ('+config.training_params["loss_func"]+')\n') fw.write('MSE: '+str(score[1])+'\n') fw.write(json.dumps(epochs.history)+"\n\n") fw.close() if with_predict: trained_models = pd.read_csv(common.DEFAULT_TRAINED_MODELS_FILE, sep='\t') model_config = trained_models[trained_models["model_id"] == trained_model["model_id"]] model_config = model_config.to_dict(orient="list") testset = open(common.DATASETS_DIR+'/items_index_test_%s.tsv' % (config.dataset_settings["dataset"])).read().splitlines() if config.training_params["val_from_file"] and not only_metadata: predictions, predictions_index = obtain_predictions(model_config, testset, trained_model["model_id"], config.predicting_params["trim_coeff"], model=model, with_metadata=with_metadata, only_metadata=only_metadata, metadata_source=metadata_source, with_patches=True) else: predictions, predictions_index = obtain_predictions(model_config, testset, trained_model["model_id"], config.predicting_params["trim_coeff"], model=model, with_metadata=with_metadata, only_metadata=only_metadata, metadata_source=metadata_source) print("Predictions created") if with_eval: do_eval(trained_model["model_id"],get_roc=True,get_map=True,get_p=True,predictions=predictions,predictions_index=predictions_index)
def step2(model, data, args): print '### STEP 2: Train for ordinal regression task' pretrained_snapshot_fname = 'model_best_loss.th' train_loader, val_loader, test_loader = to_ordinal_data(data, args) n_samples_train = len(train_loader.dataset) n_samples_val = len(val_loader.dataset) n_samples_test = len(test_loader.dataset) best_val_acc = None test_acc = None model.to_ordinal() saved_model = load_model(model, pretrained_snapshot_fname, args, subdir='snapshots_2') if saved_model is not None: print 'Loading pretrained model:', pretrained_snapshot_fname model = saved_model model.cuda() else: logfile = open(os.path.join(args.workDir, 'log_2.txt'), 'wb') model.cuda() optimizer = torch.optim.Adam(model.parameters(), args.learningRate2) since = time.time() for epoch in trange(args.nEpochs2 + 1, desc='Epochs'): avg_loss = train(model, optimizer, epoch, train_loader, logfile, args) val_loss, val_acc, n_correct = evaluate(model, val_loader, args) train_loader.dataset.sample_tuples() val_loader.dataset.sample_tuples() if best_val_acc is None or best_val_acc < val_acc: best_val_acc = val_acc tqdm.write('Snapshotting best model: ' + pretrained_snapshot_fname) save_model(model, pretrained_snapshot_fname, args, subdir='snapshots_2') logline = 'Epoch {:3d}/{}] train_avg_loss = {:.4f}, val_avg_loss = {:.4f}, val_accuracy = {}/{} ({:.2f}%, Best: {:.2f}%)' tqdm.write( logline.format(epoch, args.nEpochs2, avg_loss, val_loss, n_correct, n_samples_val, val_acc, best_val_acc)) time_elapsed = time.time() - since print 'Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60) model = load_model(model, pretrained_snapshot_fname, args, subdir='snapshots_2') model.cuda() # RANK TESTING ------------ if not args.skipTest: test_loss, test_acc, n_correct = evaluate(model, test_loader, args) logline = 'TEST] test_avg_loss = {:.4f}, test_accuracy = {}/{} ({:.2f}%)' print logline.format(test_loss, n_correct, n_samples_test, test_acc) return model, { 'Best Val Rank Accuracy': best_val_acc, 'Test Rank Accuracy': test_acc }
if bst_model_id < 0: print("execute the training") # run the training model = run_training(df_train, copy.deepcopy(trainParams)) if train_validation: # training self evaluation train_eval_score = cv_evaluate(model, df_train, selected_iteration=None) # logging trainParams.pop('metric', None) common.save_model(model, trainParams, \ best_score=train_eval_score.mean() if train_validation else 0.0,\ best_iteration=model.current_iteration(), notes='lgb_version='+str(lgb.__version__)) detail_result = { 'best_iteration': model.best_iteration, 'best_score': train_eval_score.mean() if train_validation else 0.0, 'current_iteration': model.current_iteration(), 'current_score': train_eval_score.mean() if train_validation else 0.0, 'lgb_version': lgb.__version__ } common.logging_dict(logger, detail_result, 'train result') else: print("load the trained model") model, model_params = common.load_lgb_model(model_id=bst_model_id)
def main(dataset_path, working_dir, testing_path, testing_working_dir, dimensions, batch_size, epochs): def generator_augmented(): datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, vertical_flip=True) while True: while not dataset_loader.done(): x, y, z = dataset_loader.get_training_batch() gen = datagen.flow(x, y, batch_size=batch_size) x_augmented, y_augmented = next(gen) yield ({ 'left_input': np.concatenate((x, x_augmented), axis=0), 'right_input': np.concatenate((y, y_augmented), axis=0) }, { 'main_output': np.concatenate((z, z), axis=0) }) dataset_loader.reset() def generator(): while True: while not dataset_loader.done(): x, y, z = dataset_loader.get_training_batch() yield ({'left_input': x, 'right_input': y}, {'main_output': z}) dataset_loader.reset() model = default_oneshot(dimensions).get_model() optimizer = Adam(0.00006) dataset_loader = oneshot.dataset_loader(dataset_path, working_dir, dimensions, batch_size) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) model.fit_generator(generator(), steps_per_epoch=dataset_loader.length // batch_size, epochs=epochs) dataset_loader.delete_from_disk() save_model(model) dataset_loader = oneshot.dataset_loader(testing_path, testing_working_dir, dimensions, batch_size) consolidated_left, consolidated_right, consolidated_label = dataset_loader.get_training_batch( ) while not dataset_loader.done(): left, right, label = dataset_loader.get_training_batch() consolidated_left = np.concatenate((consolidated_left, left), axis=0) consolidated_right = np.concatenate((consolidated_right, right), axis=0) consolidated_label = np.concatenate((consolidated_label, label), axis=0) print("Testing...") print( model.evaluate(x={ 'left_input': consolidated_left, 'right_input': consolidated_right }, y=consolidated_label)) dataset_loader.delete_from_disk()