示例#1
0
    def test_maxent_iteration(self):
        print("MaxEntClassifier iteration")
        print("---" * 45)
        print("Train num = %s" % self.train_num)
        print("Test num = %s" % self.test_num)
        print("maxiter = %s" % self.max_iter)

        from classifiers import MaxEntClassifier

        m = MaxEntClassifier(self.max_iter)
        iter_results = m.test(self.train_data, self.train_labels, self.best_words, self.test_data)

        filepath = "f_runout/MaxEnt-iteration-%s-train-%d-test-%d-f-%d-maxiter-%d-%s.xls" % \
                   (self.type,
                    self.train_num,
                    self.test_num,
                    self.feature_num,
                    self.max_iter,
                    datetime.datetime.now().strftime(
                        "%Y-%m-%d-%H-%M-%S"))

        results = []
        for i in range(len(iter_results)):
            try:
                results.append(get_accuracy(self.test_labels, iter_results[i], self.parameters))
            except ZeroDivisionError:
                print("ZeroDivisionError")

        Write2File.write_contents(filepath, results)
示例#2
0
    def write(self, filepath, classify_labels, i=-1):
        results = get_accuracy(self.test_labels, classify_labels, self.parameters)
        if i >= 0:
            self.precisions[i][0] = results[10][1] / 100
            self.precisions[i][1] = results[7][1] / 100

        Write2File.write_contents(filepath, results)
示例#3
0
        def on_epoch_end(self, epoch, logs={}):
            metrics_names = ['acc', 'loss', 'eer', 'auc']
            if int(epoch) % 10 != 0:
                self.verbose = 0
                results = np.asarray((np.inf, np.inf, np.inf, np.inf))

            else:
                self.test_model.set_weights(self.model.get_weights())
                y_pred = self.test_model.predict(None,
                                                 None,
                                                 steps=int(self.num_steps),
                                                 verbose=self.verbose)
                acc = get_accuracy(self.label, y_pred)
                pred_loss = get_loss(self.label, y_pred)
                equal_error_rate, _ = get_eer(self.label, y_pred)
                auc = get_auc(self.label, y_pred)
                results = [acc, pred_loss, equal_error_rate, auc]

            metrics_str = ' '
            for result, name in zip(results, metrics_names):
                metric_name = self.metrics_prefix + '_' + name
                logs[metric_name] = result
                if self.verbose > 0:
                    metrics_str = metric_name + ': ' + str(
                        result) + ' ' + metrics_str
            if self.verbose > 0:
                print(metrics_str)
def process(k):
    filepath = ""
    classify_labels = []

    if isinstance(k, list):
        knn = KNNClassifier(train_data, train_labels, k=k, best_words=best_words)
        for data in test_data:
            classify_labels.append(knn.multiple_k_classify(data))

        filepath = "f_runout/KNNClassifier-pos_train-%d-neg_train-%d-pos_test-%d-neg_test-%d-feature-%d-multiple_k-%s" \
                   "-%s.xls" % (pos_train_num, neg_train_num, pos_test_num, neg_test_num, feature_num,
                                "_".join([str(i) for i in k]), datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
    elif isinstance(k, int):
        knn = KNNClassifier(train_data, train_labels, k=k, best_words=best_words)
        for data in test_data:
            classify_labels.append(knn.single_k_classify(data))

        filepath = "f_runout/KNNClassifier-pos_train-%d-neg_train-%d-pos_test-%d-neg_test-%d-feature-%d-k-%d-" \
                   "%s.xls" % (pos_train_num, neg_train_num, pos_test_num, neg_test_num, feature_num, k,
                               datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))

    get_accuracy(test_labels, classify_labels, parameters, filepath)
                    for id_subject in id_test:
                        time_test.append(timestamps[id_subject])

                    predict_labels, proba = model.test_model(data_test)

                    for i in range(len(predict_labels)):
                        conf_mat = tools.compute_confusion_matrix(
                            predict_labels[i], labels_test[i], list_states)
                        confusion_matrix += conf_mat
                        MCC += tools.compute_MCC_score(
                            predict_labels[i], labels_test[i],
                            list_states) / len(predict_labels)

                prec_total, recall_total, F1_score = tools.compute_score(
                    confusion_matrix)
                acc = tools.get_accuracy(confusion_matrix)
                F1_S = F1_score
                # F1_S = MCC/nbr_cross_val
                if (len(score) == 0):
                    score.append(F1_S)
                    best_features.append(sub_list_features)
                else:

                    for num in range(len(score)):
                        if (F1_S > score[num]):
                            score.insert(num, F1_S)
                            best_features.insert(num, sub_list_features)
                            break

                        if (num == len(score) - 1):
                            score.append(F1_S)
示例#6
0
# -*- coding:utf-8 -*-
def PIPELINE(estimator):
    print(get_accuracy(estimator))
    write_to_file(estimator)
def main(args):
    if args.test and args.saved_state is None:
        print(
            'You have to use --saved_state when using --test, to specify the weights of the model'
        )
        sys.exit(0)

    # Select device
    cuda_device = 'cuda:%d' % args.gpu
    device = torch.device(cuda_device if torch.cuda.is_available() else 'cpu')

    # Load parameters from yaml file.
    param_config = load_yaml(args.param_file)

    # Assign parameters
    modality = args.modality
    modality_config = param_config.get('modalities').get(modality)
    selected_dataset = getattr(datasets,
                               param_config.get('dataset').get('class_name'))
    transforms, test_transforms = get_transforms_from_config(
        modality_config.get('transforms'))
    batch_size = modality_config.get(
        'batch_size') if args.bs is None else args.bs
    num_epochs = modality_config.get(
        'num_epochs') if args.epochs is None else args.epochs
    shuffle = param_config.get('dataset').get('shuffle')
    model_class_name = modality_config.get('model').get('class_name')
    criterion = modality_config.get('criterion').get('class_name')
    criterion_from = modality_config.get('criterion').get('from_module')
    optimizer = modality_config.get('optimizer').get('class_name')
    optimizer_from = modality_config.get('optimizer').get('from_module')
    optimizer_kwargs = modality_config.get('optimizer').get('kwargs')
    if args.lr:
        optimizer_kwargs['lr'] = args.lr
    train_dataset_kwargs = param_config.get('dataset').get('train_kwargs')
    validation_dataset_kwargs = param_config.get('dataset').get(
        'validation_kwargs')
    test_dataset_kwargs = param_config.get('dataset').get('test_kwargs')

    # Load Data
    train_dataset = selected_dataset(modality=modality,
                                     transform=transforms,
                                     **train_dataset_kwargs)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=shuffle)
    validation_dataset = selected_dataset(modality=modality,
                                          transform=test_transforms,
                                          **validation_dataset_kwargs)
    validation_loader = DataLoader(dataset=validation_dataset,
                                   batch_size=batch_size,
                                   shuffle=shuffle)
    test_dataset = selected_dataset(modality=modality,
                                    transform=test_transforms,
                                    **test_dataset_kwargs)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=batch_size,
                             shuffle=shuffle)

    # Initiate the model
    model_kwargs = modality_config.get('model').get('kwargs')
    if args.dr is not None:
        model_kwargs['dropout_rate'] = args.dr
    model = getattr(
        models, model_class_name)(*modality_config.get('model').get('args'),
                                  **modality_config.get('model').get('kwargs'))
    if args.test:
        model.load_state_dict(torch.load(args.saved_state))
    model = model.to(device)

    # Loss and optimizer
    criterion = getattr(importlib.import_module(criterion_from), criterion)()
    optimizer = getattr(importlib.import_module(optimizer_from),
                        optimizer)(model.parameters(), **optimizer_kwargs)

    # Training procedure
    max_val_acc = -1
    max_train_acc = -1
    min_train_loss = -1
    min_val_loss = -1

    if not args.test:
        # Initiate Tensorboard writer with the given experiment name or generate an automatic one
        experiment = '%s_%s_%s_%s' % (
            selected_dataset.__name__, modality,
            args.param_file.split('/')[-1],
            time.strftime("%Y%m%d_%H%M", time.localtime())
        ) if args.experiment is None else args.experiment
        writer_name = '../logs/%s' % experiment
        writer = SummaryWriter(writer_name)

        # Print parameters
        print_table({
            'param_file': args.param_file,
            'experiment': experiment,
            'tensorboard_folder': writer_name,
            'dataset': selected_dataset.__name__,
            'criterion': type(criterion).__name__,
            'optimizer': type(optimizer).__name__,
            'modality': modality,
            'model': model.name,
            'learning_rate': optimizer_kwargs['lr'],
            'batch_size': batch_size,
            'num_epochs': num_epochs,
        })

        # Start training
        train_accs, val_accs, train_losses, val_losses = train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            train_loader=train_loader,
            validation_loader=validation_loader,
            num_epochs=num_epochs,
            device=device,
            experiment=experiment,
            writer=writer)

        # Save last state of model
        save_model(model, '%s_last_state.pt' % experiment)

        max_val_acc = max(val_accs) if len(val_accs) > 0 else max_val_acc
        max_train_acc = max(
            train_accs) if len(train_accs) > 0 else max_train_acc
        min_train_loss = max(
            train_losses) if len(train_losses) > 0 else min_train_loss
        min_val_loss = max(val_losses) if len(val_losses) > 0 else min_val_loss

        cm_image_train = plot_confusion_matrix(
            cm=get_confusion_matrix(train_loader, model, device),
            title='Confusion Matrix - Training',
            normalize=False,
            save=False,
            classes=train_dataset.get_class_names(),
            show_figure=False)
        cm_image_validation = plot_confusion_matrix(
            cm=get_confusion_matrix(validation_loader, model, device),
            title='Confusion Matrix - Validation',
            normalize=False,
            save=False,
            classes=validation_dataset.get_class_names(),
            show_figure=False)
        cm_image_test = plot_confusion_matrix(
            cm=get_confusion_matrix(test_loader, model, device),
            title='Confusion Matrix - Test',
            normalize=False,
            save=False,
            classes=test_dataset.get_class_names(),
            show_figure=False)

        # Add confusion matrices for each dataset, mark it for the last step which is num_epochs - 1
        writer.add_images('ConfusionMatrix/Train',
                          cm_image_train,
                          dataformats='CHW',
                          global_step=num_epochs - 1)
        writer.add_images('ConfusionMatrix/Validation',
                          cm_image_validation,
                          dataformats='CHW',
                          global_step=num_epochs - 1)
        writer.add_images('ConfusionMatrix/Test',
                          cm_image_test,
                          dataformats='CHW',
                          global_step=num_epochs - 1)
        print('Best validation accuracy: %f' % max(val_accs))

        writer.add_text('config', json.dumps(param_config, indent=2))
        writer.add_text('args', json.dumps(args.__dict__, indent=2))
        writer.flush()
        writer.close()

    test_accuracy = get_accuracy(test_loader, model, device)
    print('Test accuracy (not based on val): %f' % test_accuracy)

    return {
        'test_acc': test_accuracy,
        'max_train_acc': max_train_acc,
        'max_val_acc': max_val_acc,
        'min_train_loss': min_train_loss,
        'min_val_loss': min_val_loss
    }
model = getattr(models, model_class_name)(
    *param_config.get('modalities').get(modality).get('model').get('args'),
    **param_config.get('modalities').get(modality).get('model').get('kwargs'))
model = model.to(device)
model.load_state_dict(torch.load(args.saved_state))

if args.knn:
    # Use test transforms for train_dataset too, we don't want random stuff happening.
    train_dataset = SelectedDataset(modality=modality,
                                    transform=test_transforms,
                                    **train_dataset_kwargs)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=shuffle)
    cm, test_accuracy = get_predictions_with_knn(n_neighbors=args.n_neighbors,
                                                 train_loader=train_loader,
                                                 test_loader=test_loader,
                                                 model=model,
                                                 device=device)
else:
    cm = get_confusion_matrix(test_loader, model, device)
    test_accuracy = get_accuracy(test_loader, model, device)

print('Test Accuracy: %f' % test_accuracy)
plot_confusion_matrix(cm=cm,
                      title='Confusion Matrix - Percentage % - Test Loader',
                      normalize=True,
                      save=False,
                      show_figure=True,
                      classes=test_dataset.get_class_names())