Exemplo n.º 1
0
def main(dataset_path, working_dir, testing_path, testing_working_dir,
         dimensions, batch_size, number_classes, epochs):
    def generator_augmented():
        while True:
            while not dataset_loader.done():
                x, y = dataset_loader.get_training_batch()
                gen = datagen.flow(x, y, batch_size=batch_size)
                x_augmented, y_augmented = next(gen)
                yield np.concatenate((x, x_augmented), axis=0), np.concatenate(
                    (y, y_augmented), axis=0)
            dataset_loader.reset()

    def generator():
        while True:
            while not dataset_loader.done():
                x, y = dataset_loader.get_training_batch()
                yield x, y
            dataset_loader.reset()

    # model = alexnet(dimensions, number_classes).get_model()
    model = load_model()
    reduce_lr = ReduceLROnPlateau(monitor='loss',
                                  factor=0.1,
                                  patience=5,
                                  min_lr=0.001)
    sgd_optimizer = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.0005)
    dataset_loader = imagerecognition.dataset_loader(dataset_path, working_dir,
                                                     dimensions, batch_size)
    datagen = ImageDataGenerator(rotation_range=20,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 horizontal_flip=True,
                                 vertical_flip=True)

    model.compile(sgd_optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit_generator(generator(),
                        steps_per_epoch=dataset_loader.length / batch_size,
                        epochs=epochs,
                        callbacks=[reduce_lr])
    dataset_loader.delete_from_disk()
    save_model(model)

    dataset_loader = imagerecognition.dataset_loader(testing_path,
                                                     testing_working_dir,
                                                     dimensions, batch_size)
    print('Testing...')
    consolidated_images, consolidated_labels = dataset_loader.get_training_batch(
    )
    while not dataset_loader.done():
        temp_images, temp_labels = dataset_loader.get_training_batch()
        consolidated_images = np.concatenate(
            (consolidated_images, temp_images), axis=0)
        consolidated_labels = np.concatenate(
            (consolidated_labels, temp_labels), axis=0)
    print(model.evaluate(consolidated_images, consolidated_labels))
    dataset_loader.delete_from_disk()
Exemplo n.º 2
0
def step1(data, args):
    print '### STEP 1: Train for classification task'
    
    pretrained_snapshot_fname = 'model_best_accuracy.th'
    
    train_loader, val_loader, test_loader = data
    
    n_samples_train = len(train_loader.dataset)
    n_samples_val = len(val_loader.dataset)
    n_samples_test = len(test_loader.dataset)

    num_classes = len(set(val_loader.dataset.target_tensor))
    
    model = eval(args.modelArch)(num_classes=num_classes)
    
    best_val_acc = None
    test_acc = None

    # try to load pretrained model if step 1 has already been executed
    saved_model = load_model(model, pretrained_snapshot_fname, args)
    if saved_model is not None:
        print 'Loading pretrained model:', pretrained_snapshot_fname
        model = saved_model
        model.cuda()
    else: 
        # else train a new model
        print 'Training a new model ...'    
        logfile = open(os.path.join(args.workDir, 'log.txt'), 'wb')
        
        model.cuda()
        optimizer = torch.optim.Adam(model.parameters(), args.learningRate)
        
        since = time.time()
        for epoch in trange(1, args.nEpochs + 1, desc='Epochs'):
            avg_loss = train(model, optimizer, epoch, train_loader, logfile, args)
            val_loss, val_acc, n_correct = evaluate(model, val_loader, args)
            
            if best_val_acc is None or best_val_acc < val_acc:
                best_val_acc = val_acc
                tqdm.write('Snapshotting best model: ' + pretrained_snapshot_fname)
                save_model(model, pretrained_snapshot_fname, args)
            
            logline = 'Epoch {:3d}/{}] train_avg_loss = {:.4f}, val_avg_loss = {:.4f}, val_accuracy = {}/{} ({:.2f}%, Best: {:.2f}%)'
            tqdm.write(logline.format(epoch, args.nEpochs, avg_loss, val_loss, n_correct, n_samples_val, val_acc, best_val_acc))
            
        time_elapsed = time.time() - since
        print 'Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)
        model = load_model(model, pretrained_snapshot_fname, args)

    # TESTING -----------------
    if not args.skipTest:
        test_loss, test_acc, n_correct = evaluate(model, test_loader, args)
        logline = 'TEST] test_avg_loss = {:.4f}, test_accuracy = {}/{} ({:.2f}%)'
        print logline.format(test_loss, n_correct, n_samples_test, test_acc)
    
    return model, {'BestValAccuracy': best_val_acc, 'TestAccuracy': test_acc}
Exemplo n.º 3
0
def train(args, data, model):
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    config = config_model(args)
    with tf.Session(config=tf_config) as sess:
        best_f1 = 0
        model = create_model(sess, NERModel, args.output_dir, config, data,
                             logger)
        logger.info("start training")
        for epoch in range(1, 1 + args.epochs):
            loss = []
            random.shuffle(data.train_Ids)
            batch_size = args.batch_size
            train_num = len(data.train_Ids)
            total_batch = train_num // batch_size + 1
            for batch_id in range(total_batch):
                start = batch_id * batch_size
                end = (batch_id + 1) * batch_size
                if end > train_num:
                    end = train_num
                instance = data.train_Ids[start:end]  # train_Ids
                if not instance:
                    continue
                # batchify_with_label
                #gazs, word_seq_tensor, word_seq_lengths, biword_seq_tensor, word_seq_lengths, label_seq_tensor, layer_gaz_tensor, gaz_count_tensor, gaz_mask_tensor, mask
                _, batch_word, batch_biword, batch_wordlen, batch_label, layer_gaz, gaz_count, gaz_mask, mask = batchify_with_label(
                    instance)
                batch = (batch_word, batch_biword, batch_wordlen, batch_label,
                         layer_gaz, gaz_count, gaz_mask, mask, True)
                step, batch_loss = model.run_step(sess, batch, True)
                # print(step)
                loss.append(batch_loss)
            train_log = {'loss': np.mean(loss)}
            loss = []
            eval_log, class_info = evaluate(sess, args, model, data)
            logs = dict(train_log, **eval_log)
            show_info = f'\nEpoch: {epoch} - ' + "-".join(
                [f' {key}: {value:.4f} ' for key, value in logs.items()])
            logger.info(show_info)
            if logs['eval_f1'] > best_f1:
                logger.info(
                    f"\nEpoch {epoch}: eval_f1 improved from {best_f1} to {logs['eval_f1']}"
                )
                logger.info("save model to disk.")
                best_f1 = logs['eval_f1']
                save_model(sess, model, args.output_dir, logger)
                print("Eval Entity Score: ")
                for key, value in class_info.items():
                    info = f"Subject: {key} - Acc: {value['acc']} - Recall: {value['recall']} - F1: {value['f1']}"
                    logger.info(info)
Exemplo n.º 4
0
def main(argv):
    args = argparser().parse_args(argv[1:])

    positive = load_conllu(args.positive)
    negative = load_conllu(args.negative)
    posf = featurize_documents(positive)
    negf = featurize_documents(negative)
    vecf = DictVectorizer()

    X = vecf.fit_transform(posf + negf)
    Y = ['pos'] * len(posf) + ['neg'] * len(negf)

    clf = LinearSVC(C=1.0)
    clf.fit(X, Y)

    save_model(args.model, clf, vecf)
    return 0
Exemplo n.º 5
0
def train(args, NERModel, processor):
    train_dataset = load_and_cache_examples(args, processor, data_type='train')
    train_manager = BatchManager(data=train_dataset,
                                 batch_size=args.batch_size,
                                 vocab=processor.vocab,
                                 label2id=args.label2id,
                                 shuffle=True)
    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    config = config_model(args)
    config['vocab_size'] = len(processor.vocab)
    loss = []
    with tf.Session(config=tf_config) as sess:
        model = create_model(sess, NERModel, args.output_dir, config, logger)
        logger.info("start training")
        best_f1 = 0
        for epoch in range(1, 1 + args.epochs):
            train_manager.reset()

            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
            train_log = {'loss': np.mean(loss)}
            loss = []

            eval_log, class_info = evaluate(sess, args, model, processor)  #!
            logs = dict(train_log, **eval_log)
            show_info = f'\nEpoch: {epoch} - ' + "-".join(
                [f' {key}: {value:.4f} ' for key, value in logs.items()])
            logger.info(show_info)
            # scheduler.epoch_step(logs['eval_f1'], epoch)
            if logs['eval_f1'] > best_f1:
                logger.info(
                    f"\nEpoch {epoch}: eval_f1 improved from {best_f1} to {logs['eval_f1']}"
                )
                logger.info("save model to disk.")
                best_f1 = logs['eval_f1']
                save_model(sess, model, args.output_dir, logger)
                print("Eval Entity Score: ")
                for key, value in class_info.items():
                    info = f"Subject: {key} - Acc: {value['acc']} - Recall: {value['recall']} - F1: {value['f1']}"
                    logger.info(info)
Exemplo n.º 6
0
def main(argv):
    args = argparser().parse_args(argv[1:])
    examples = load_examples(args.data)

    vecf = TfidfVectorizer(analyzer='word',
                           token_pattern=r'\S+',
                           lowercase=False,
                           ngram_range=(1, 3))

    texts = [e.text for e in examples]
    X = vecf.fit_transform(texts)
    Y = [e.class_ for e in examples]

    clf = LinearSVC(C=1.0)
    clf.fit(X, Y)

    save_model(args.model, clf, vecf)

    return 0
Exemplo n.º 7
0
acc_score = metrics.accuracy_score(y_eval, pred)
print('Accuracy =', acc_score * 100)

reca_score = metrics.recall_score(y_eval, pred)
print('Recall =', reca_score * 100)

prec_score = metrics.precision_score(y_eval, pred)
print('Precision =', prec_score * 100)

f1_score = metrics.f1_score(y_eval, pred)
print('F-score =', f1_score)

# write files
print('===== write files =====')
print('saving model "{}"...'.format(os.path.basename(options.model_path)))
common.save_model(model=model, path=options.model_path)

print('saving epochs log "{}"...'.format(os.path.basename(options.log_path)))
common.save_log(fit_result=result, path=options.log_path)

epochs = len(result.history['val_acc'])
best_epoch = result.history['val_acc'].index(max(
    result.history['val_acc'])) + 1

print('saving history statistics "{}"...'.format(
    os.path.basename(options.statistics_path)))
common.save_statistics(
    ann_name=ANN_NAME,
    path=options.statistics_path,
    entries={
        'step_size': options.step_size,
Exemplo n.º 8
0
                'reg': reg,
                'batchsize': batch_size,
            }

            model_file, loss_file, time_file = make_filenames(
                path,
                [optimizer, loss_type],
                params,
            )

            with TimeThis(time_file, params):
                w, perf_logger = SGD(X, y, Xv, yv, Xt, yt,
                                    epochs, batch_size, optimizer, loss_type, params)

            # Save model and loss data
            save_model(model_file, w)
            perf_logger.save(loss_file)


    elif optimizer == 'adam':
        for alpha, reg, beta1, beta2, epsilon in itertools.product(
            args.alpha, args.reg, args.beta1, args.beta2, args.epsilon
        ):
            params = {
                'alpha': alpha,
                'reg': reg,
                'beta1': beta1,
                'beta2': beta2,
                'epsilon': epsilon,
                'batchsize': batch_size,
            }
Exemplo n.º 9
0
    Classifier = load_classifier_class(options.Classifier)
    print 'classifier=%s' % Classifier.__dict__['__module__']

    tweets = get_labelled_tweets()
    random.shuffle(tweets)

    if options.limit > 0:
        tweets = tweets[:options.limit]

    do_filter = options.filter

    if options.optimize:
        optimize_params(tweets)

    if options.ngrams:
        show_ngrams(tweets)

    if options.self_validate:
        show_self_validation(tweets)

    if options.cross_validate or options.show_errors:
        show_cross_validation(tweets, options.show_errors)

    if options.test_string:
        show_classification_details(options.test_string)

    if options.model:
        model = Classifier(tweets)
        common.save_model(model)
Exemplo n.º 10
0
def process(params,with_predict=True,with_eval=True):
    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
    params['cnn']['n_out'] = int(params['dataset']['dim'])
    #params['cnn']['n_frames'] =  int(params['dataset']['window'] * SR / float(HR))
    with_metadata = params['dataset']['with_metadata']
    only_metadata = params['dataset']['only_metadata']
    metadata_source = params['dataset']['meta-suffix']
    if with_metadata:
        if 'w2v' in metadata_source:
            X_meta = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (metadata_source,params['dataset']['dataset']))[:,:int(params['cnn']['sequence_length'])]
            params['cnn']['n_metafeatures'] = len(X_meta[0])
            if 'meta-suffix2' in params['dataset']:
                X_meta2 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix2'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures2'] = len(X_meta2[0])
            if 'meta-suffix3' in params['dataset']:
                X_meta3 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix3'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures3'] = len(X_meta3[0])
            if 'meta-suffix4' in params['dataset']:
                X_meta4 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix4'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures4'] = len(X_meta4[0])
        elif 'model' in metadata_source or not params['dataset']['sparse']:
            X_meta = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (metadata_source,params['dataset']['dataset']))
            params['cnn']['n_metafeatures'] = len(X_meta[0])
            if 'meta-suffix2' in params['dataset']:
                X_meta2 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix2'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures2'] = len(X_meta2[0])
            if 'meta-suffix3' in params['dataset']:
                X_meta3 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix3'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures3'] = len(X_meta3[0])
            if 'meta-suffix4' in params['dataset']:
                X_meta4 = np.load(common.TRAINDATA_DIR+'/X_train_%s_%s.npy' % (params['dataset']['meta-suffix4'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures4'] = len(X_meta4[0])
        else:
            X_meta = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (metadata_source,params['dataset']['dataset'])).todense()
            params['cnn']['n_metafeatures'] = X_meta.shape[1]
            if 'meta-suffix2' in params['dataset']:
                X_meta2 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix2'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures2'] = X_meta2.shape[1]
            if 'meta-suffix3' in params['dataset']:
                X_meta3 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix3'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures3'] = len(X_meta3[0])
            if 'meta-suffix4' in params['dataset']:
                X_meta4 = load_sparse_csr(common.TRAINDATA_DIR+'/X_train_%s_%s.npz' % (params['dataset']['meta-suffix4'],params['dataset']['dataset']))
                params['cnn']['n_metafeatures3'] = len(X_meta4[0])
        print(X_meta.shape)
    else:
        X_meta = None

    config = Config(params)
    model_dir = os.path.join(common.MODELS_DIR, config.model_id)
    common.ensure_dir(common.MODELS_DIR)
    common.ensure_dir(model_dir)
    model_file = os.path.join(model_dir, config.model_id + common.MODEL_EXT)
    logging.debug("Building Network...")
    #model = build_model(config)
    model = build_model(config)
    print(model.summary())
    #plot(model, to_file='model2.png', show_shapes=True)
    trained_model = config.get_dict()

    # Save model
    #plot(model, to_file=os.path.join(model_dir, config.model_id + PLOT_EXT))
    common.save_model(model, model_file)

    logging.debug(trained_model["model_id"])

    logging.debug("Loading Data...")

    with_generator = True

    if only_metadata:
        X_train, Y_train, X_val, Y_val, X_test, Y_test = \
            load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"],
                      config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, metadata_source)
        if 'meta-suffix2' in params['dataset']:
            X_train2, Y_train2, X_val2, Y_val2, X_test2, Y_test2 = \
                load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"],
                          config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix2'])
            X_train = [X_train,X_train2]
            X_val = [X_val,X_val2]
            X_test = [X_test,X_test2]
            print("X_train bi", len(X_train))
        if 'meta-suffix3' in params['dataset']:
            X_train3, Y_train3, X_val3, Y_val3, X_test3, Y_test3 = \
                load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"],
                          config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix3'])
            X_train.append(X_train3)
            X_val.append(X_val3)
            X_test.append(X_test3)
            print("X_train tri", len(X_train))
        if 'meta-suffix4' in params['dataset']:
            X_train4, Y_train4, X_val4, Y_val4, X_test4, Y_test4 = \
                load_data_preprocesed(params, config.x_path, config.y_path, params['dataset']['dataset'], config.training_params["validation"],
                          config.training_params["test"], config.dataset_settings["nsamples"], with_metadata, only_metadata, params['dataset']['meta-suffix4'])
            X_train.append(X_train4)
            X_val.append(X_val4)
            X_test.append(X_test4)
            print("X_train four", len(X_train))
    else:
        if with_generator:
            id2gt = dict()
            factors = np.load(common.DATASETS_DIR+'/y_train_'+config.y_path+'.npy')
            index_factors = open(common.DATASETS_DIR+'/items_index_train_'+params['dataset']['dataset']+'.tsv').read().splitlines()
            id2gt = dict((index,factor) for (index,factor) in zip(index_factors,factors))
            X_val, Y_val, X_test, Y_test, N_train = load_data_hf5_memory(params,config.training_params["validation"],config.training_params["test"],config.y_path,id2gt,X_meta,config.training_params["val_from_file"])
            if params['dataset']['nsamples'] != 'all':
                N_train = min(N_train,params['dataset']['nsamples'])

        else:
            X_train, Y_train, X_val, Y_val, X_test, Y_test, N_train = load_data_hf5(params,config.training_params["validation"],config.training_params["test"])

    trained_model["whiten_scaler"] = common.TRAINDATA_DIR+'/scaler_%s.pk' % config.x_path
    logging.debug("Training...")

    if config.model_arch["final_activation"] == 'softmax':
        monitor_metric = 'val_categorical_accuracy'
    else:
        monitor_metric = 'val_loss'
    early_stopping = EarlyStopping(monitor=monitor_metric, patience=4)

    if only_metadata:
        epochs = model.fit(X_train, Y_train,
                  batch_size=config.training_params["n_minibatch"],
                  #shuffle='batch',
                  nb_epoch=config.training_params["n_epochs"],
                  verbose=2, validation_data=(X_val, Y_val),
                  callbacks=[early_stopping])
    else:
        if with_generator:
            print(N_train)
            epochs = model.fit_generator(batch_block_generator(params,config.y_path,N_train,id2gt,X_meta,config.training_params["val_from_file"]),
                        samples_per_epoch = N_train-(N_train % config.training_params["n_minibatch"]),
                        nb_epoch = config.training_params["n_epochs"],
                        verbose=2,
                        validation_data = (X_val, Y_val),
                        callbacks=[early_stopping])
        else:
            epochs = model.fit(X_train, Y_train,
                      batch_size=config.training_params["n_minibatch"],
                      shuffle='batch',
                      nb_epoch=config.training_params["n_epochs"],
                      verbose=2,
                      validation_data=(X_val, Y_val),
                      callbacks=[early_stopping])

    model.save_weights(os.path.join(model_dir, config.model_id + common.WEIGHTS_EXT))
    logging.debug("Saving trained model %s in %s..." %
                  (trained_model["model_id"], common.DEFAULT_TRAINED_MODELS_FILE))
    common.save_trained_model(common.DEFAULT_TRAINED_MODELS_FILE, trained_model)

    logging.debug("Evaluating...")

    print(X_test[0].shape,X_test[1].shape)
    preds=model.predict(X_test)
    print(preds.shape)
    if params["dataset"]["evaluation"] in ['binary','multiclass']:
        y_pred = (preds > 0.5).astype('int32')        
        acc = accuracy_score(Y_test,y_pred)
        prec = precision_score(Y_test,y_pred,average='macro')
        recall = recall_score(Y_test,y_pred,average='macro')
        f1 = f1_score(Y_test,y_pred,average='macro')
        print('Accuracy', acc)
        print("%.3f\t%.3f\t%.3f" % (prec,recall,f1))
    if params["dataset"]["fact"] == 'class':
        good_classes = np.nonzero(Y_test.sum(0))[0]
        print(Y_test.shape,preds.shape)
        roc_auc=roc_auc_score(Y_test[:,good_classes],preds[:,good_classes])
        logging.debug('ROC-AUC '+str(roc_auc))
        pr_auc = average_precision_score(Y_test[:,good_classes],preds[:,good_classes])
        print('PR-AUC',pr_auc)
        r2 = roc_auc
    elif params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']:
        r2s = []
        for i,pred in enumerate(preds):
            r2 = r2_score(Y_test[i],pred)
            r2s.append(r2)
        r2 = np.asarray(r2s).mean()
        logging.debug('R2 avg '+str(r2))
    # Batch prediction
    if X_test[1].shape == Y_test[1].shape:
        score = model.evaluate(X_test, Y_test, verbose=0)
        logging.debug(score)
        logging.debug(model.metrics_names)
        print(score)
        trained_model["loss_score"] = score[0]
        trained_model["mse"] = score[1]
        if params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']:
            trained_model["r2"] = r2

        fw=open(common.DATA_DIR+'/results/train_results.txt','a')
        fw.write(trained_model["model_id"]+'\n')
        if params["training"]["loss_func"] == 'binary_crossentropy':
            fw.write('ROC-AUC: '+str(roc_auc)+'\n')
            print('ROC-AUC: '+str(roc_auc))
            fw.write('Loss: '+str(score[0])+' ('+config.training_params["loss_func"]+')\n')
            fw.write('MSE: '+str(score[1])+'\n')
        elif params["dataset"]["evaluation"] not in ['binary','multiclass','multilabel']:
            fw.write('R2 avg: '+str(r2)+'\n')
            print('R2 avg: '+str(r2))
            fw.write('Loss: '+str(score[0])+' ('+config.training_params["loss_func"]+')\n')
            fw.write('MSE: '+str(score[1])+'\n')
        fw.write(json.dumps(epochs.history)+"\n\n")
        fw.close()

    if with_predict:
        trained_models = pd.read_csv(common.DEFAULT_TRAINED_MODELS_FILE, sep='\t')
        model_config = trained_models[trained_models["model_id"] == trained_model["model_id"]]
        model_config = model_config.to_dict(orient="list")
        testset = open(common.DATASETS_DIR+'/items_index_test_%s.tsv' % (config.dataset_settings["dataset"])).read().splitlines()
        if config.training_params["val_from_file"] and not only_metadata:
            predictions, predictions_index = obtain_predictions(model_config, testset, trained_model["model_id"], config.predicting_params["trim_coeff"], model=model, with_metadata=with_metadata, only_metadata=only_metadata, metadata_source=metadata_source, with_patches=True)
        else:
            predictions, predictions_index = obtain_predictions(model_config, testset, trained_model["model_id"], config.predicting_params["trim_coeff"], model=model, with_metadata=with_metadata, only_metadata=only_metadata, metadata_source=metadata_source)
        print("Predictions created")

    if with_eval:
        do_eval(trained_model["model_id"],get_roc=True,get_map=True,get_p=True,predictions=predictions,predictions_index=predictions_index)
Exemplo n.º 11
0
def step2(model, data, args):
    print '### STEP 2: Train for ordinal regression task'

    pretrained_snapshot_fname = 'model_best_loss.th'

    train_loader, val_loader, test_loader = to_ordinal_data(data, args)

    n_samples_train = len(train_loader.dataset)
    n_samples_val = len(val_loader.dataset)
    n_samples_test = len(test_loader.dataset)

    best_val_acc = None
    test_acc = None

    model.to_ordinal()
    saved_model = load_model(model,
                             pretrained_snapshot_fname,
                             args,
                             subdir='snapshots_2')
    if saved_model is not None:
        print 'Loading pretrained model:', pretrained_snapshot_fname
        model = saved_model
        model.cuda()
    else:
        logfile = open(os.path.join(args.workDir, 'log_2.txt'), 'wb')
        model.cuda()
        optimizer = torch.optim.Adam(model.parameters(), args.learningRate2)

        since = time.time()
        for epoch in trange(args.nEpochs2 + 1, desc='Epochs'):
            avg_loss = train(model, optimizer, epoch, train_loader, logfile,
                             args)
            val_loss, val_acc, n_correct = evaluate(model, val_loader, args)
            train_loader.dataset.sample_tuples()
            val_loader.dataset.sample_tuples()

            if best_val_acc is None or best_val_acc < val_acc:
                best_val_acc = val_acc
                tqdm.write('Snapshotting best model: ' +
                           pretrained_snapshot_fname)
                save_model(model,
                           pretrained_snapshot_fname,
                           args,
                           subdir='snapshots_2')

            logline = 'Epoch {:3d}/{}] train_avg_loss = {:.4f}, val_avg_loss = {:.4f}, val_accuracy = {}/{} ({:.2f}%, Best: {:.2f}%)'
            tqdm.write(
                logline.format(epoch, args.nEpochs2, avg_loss, val_loss,
                               n_correct, n_samples_val, val_acc,
                               best_val_acc))

        time_elapsed = time.time() - since
        print 'Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60)
        model = load_model(model,
                           pretrained_snapshot_fname,
                           args,
                           subdir='snapshots_2')
        model.cuda()

    # RANK TESTING ------------
    if not args.skipTest:
        test_loss, test_acc, n_correct = evaluate(model, test_loader, args)
        logline = 'TEST] test_avg_loss = {:.4f}, test_accuracy = {}/{} ({:.2f}%)'
        print logline.format(test_loss, n_correct, n_samples_test, test_acc)

    return model, {
        'Best Val Rank Accuracy': best_val_acc,
        'Test Rank Accuracy': test_acc
    }
Exemplo n.º 12
0
    if bst_model_id < 0:
        print("execute the training")
        # run the training
        model = run_training(df_train, copy.deepcopy(trainParams))

        if train_validation:
            # training self evaluation
            train_eval_score = cv_evaluate(model,
                                           df_train,
                                           selected_iteration=None)

        # logging
        trainParams.pop('metric', None)
        common.save_model(model, trainParams, \
                          best_score=train_eval_score.mean() if train_validation else 0.0,\
                          best_iteration=model.current_iteration(),
                          notes='lgb_version='+str(lgb.__version__))

        detail_result = {
            'best_iteration': model.best_iteration,
            'best_score': train_eval_score.mean() if train_validation else 0.0,
            'current_iteration': model.current_iteration(),
            'current_score':
            train_eval_score.mean() if train_validation else 0.0,
            'lgb_version': lgb.__version__
        }
        common.logging_dict(logger, detail_result, 'train result')
    else:
        print("load the trained model")
        model, model_params = common.load_lgb_model(model_id=bst_model_id)
Exemplo n.º 13
0
    Classifier = load_classifier_class(options.Classifier)
    print 'classifier=%s' % Classifier.__dict__['__module__']
    
    tweets = get_labelled_tweets() 
    random.shuffle(tweets)
    
    if options.limit > 0:
        tweets = tweets[:options.limit]
 
    do_filter = options.filter

    if options.optimize:
        optimize_params(tweets)
       
    if options.ngrams:
        show_ngrams(tweets)
        
    if options.self_validate:
        show_self_validation(tweets)

    if options.cross_validate or options.show_errors:
        show_cross_validation(tweets, options.show_errors)

    if options.test_string:
        show_classification_details(options.test_string)

    if options.model:
        model = Classifier(tweets)
        common.save_model(model)

        
Exemplo n.º 14
0
def main(dataset_path, working_dir, testing_path, testing_working_dir,
         dimensions, batch_size, epochs):
    def generator_augmented():
        datagen = ImageDataGenerator(rotation_range=20,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     horizontal_flip=True,
                                     vertical_flip=True)
        while True:
            while not dataset_loader.done():
                x, y, z = dataset_loader.get_training_batch()
                gen = datagen.flow(x, y, batch_size=batch_size)
                x_augmented, y_augmented = next(gen)
                yield ({
                    'left_input': np.concatenate((x, x_augmented), axis=0),
                    'right_input': np.concatenate((y, y_augmented), axis=0)
                }, {
                    'main_output': np.concatenate((z, z), axis=0)
                })
            dataset_loader.reset()

    def generator():
        while True:
            while not dataset_loader.done():
                x, y, z = dataset_loader.get_training_batch()
                yield ({'left_input': x, 'right_input': y}, {'main_output': z})
            dataset_loader.reset()

    model = default_oneshot(dimensions).get_model()
    optimizer = Adam(0.00006)
    dataset_loader = oneshot.dataset_loader(dataset_path, working_dir,
                                            dimensions, batch_size)

    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit_generator(generator(),
                        steps_per_epoch=dataset_loader.length // batch_size,
                        epochs=epochs)
    dataset_loader.delete_from_disk()
    save_model(model)

    dataset_loader = oneshot.dataset_loader(testing_path, testing_working_dir,
                                            dimensions, batch_size)
    consolidated_left, consolidated_right, consolidated_label = dataset_loader.get_training_batch(
    )
    while not dataset_loader.done():
        left, right, label = dataset_loader.get_training_batch()
        consolidated_left = np.concatenate((consolidated_left, left), axis=0)
        consolidated_right = np.concatenate((consolidated_right, right),
                                            axis=0)
        consolidated_label = np.concatenate((consolidated_label, label),
                                            axis=0)

    print("Testing...")
    print(
        model.evaluate(x={
            'left_input': consolidated_left,
            'right_input': consolidated_right
        },
                       y=consolidated_label))
    dataset_loader.delete_from_disk()