def evaluate_model(model_weights_path, test_list):
    # load model weights
    if 'baseline' in model_weights_path:
        model = LFI_conv3D.build_model()
        model.load_weights(model_weights_path)
    elif 'CLSTM' in model_weights_path:
        model = LFV_conv3D_STCLSTM.build_model()
        model.load_weights(model_weights_path)
    else:
        raise Exception(f'model weights path error.')
    print(model.summary())

    # path settings
    save_dir = Path(model_weights_path).parent / 'evaluated'
    save_dir.mkdir(parents=True, exist_ok=True)
    now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M")
    csv_path = save_dir / f'_metrics_{now}.csv'
    with open(csv_path, 'a', newline='') as f:
        writer = csv.writer(f)
        metrics_dict = calc_metrics(pred=None, true=None)
        writer.writerow(['scene_name', *[metric for metric in metrics_dict.keys()]])

    # evaluate
    test_gen = test_generator(test_list)
    preds = []
    gts = []
    scene = {'name':'', 'frame_n':0}
    for inputs, gt, scenes in test_gen.flow_from_directory():
        pred = model.predict(inputs)
        for i, scene_name in enumerate(scenes):
            preds.append(pred[i])
            gts.append(gt[i])
            if len(preds) == x_patch_n*y_patch_n:
                fullmap_pred = create_fullmap(preds)
                fullmap_gt = create_fullmap(gts)
                # npz_save
                if not scene['name'] == scene_name:
                    scene['frame_n'] = 0
                scene['name'] = scene_name
                save_name = save_dir / f"{scene['name']}_{scene['frame_n']:03}.npz"
                np.savez_compressed(save_name, pred=fullmap_pred, gt=fullmap_gt)
                print('saved:', save_name)
                scene['frame_n'] += 1
                preds = []
                gts = []
                # metrics output
                metrics_dict = calc_metrics(fullmap_pred, fullmap_gt)
                with open(csv_path, 'a', newline='') as f:
                    writer = csv.writer(f)
                    writer.writerow([scene_name, *metrics_dict.values()])
Пример #2
0
def main():
    # set GPU ID
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    cudnn.benchmark = True

    # check save path
    save_path = args.save_path
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # make dataloader
    train_loader, test_loader, \
    test_onehot, test_label = dataset.get_loader(args.data,
                                                 args.data_path,
                                                 args.batch_size)

    # set num_class
    if args.data == 'cifar100':
        num_class = 100
    else:
        num_class = 10

    # set num_classes
    model_dict = {
        "num_classes": num_class,
    }

    # set network
    if args.model == 'res':
        model = resnet.resnet110(**model_dict).cuda()
    elif args.model == 'dense':
        model = densenet_BC.DenseNet3(depth=100,
                                      num_classes=num_class,
                                      growth_rate=12,
                                      reduction=0.5,
                                      bottleneck=True,
                                      dropRate=0.0).cuda()
    elif args.model == 'vgg':
        model = vgg.vgg16(**model_dict).cuda()

    # set criterion
    cls_criterion = nn.CrossEntropyLoss().cuda()

    # make logger
    result_logger = utils.Logger(os.path.join(save_path, 'result.log'))

    # load pretrained model
    model_state_dict = torch.load(os.path.join(args.save_path,
                                               '{0}.pth'.format(args.file_name)))
    model.load_state_dict(model_state_dict)

    # calc measure
    acc, aurc, eaurc, aupr, fpr, ece, nll, brier = metrics.calc_metrics(test_loader,
                                                                        test_label,
                                                                        test_onehot,
                                                                        model,
                                                                        cls_criterion)
    # result write
    result_logger.write([acc,aurc*1000,eaurc*1000,aupr*100,fpr*100,ece*100,nll*10,brier*100])
Пример #3
0
def main():
    # set GPU ID
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    cudnn.benchmark = True

    # check save path
    save_path = args.save_path
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # make dataloader
    train_loader, test_loader, \
    test_onehot, test_label = dataset.get_loader(args.data,
                                                 args.data_path,
                                                 args.batch_size)

    # set num_class
    if args.data == 'cifar100':
        num_class = 100
    else:
        num_class = 10

    # set num_classes
    model_dict = {
        "num_classes": num_class,
    }

    # set model
    if args.model == 'res':
        model = resnet.resnet110(**model_dict).cuda()
    elif args.model == 'dense':
        model = densenet_BC.DenseNet3(depth=100,
                                      num_classes=num_class,
                                      growth_rate=12,
                                      reduction=0.5,
                                      bottleneck=True,
                                      dropRate=0.0).cuda()
    elif args.model == 'vgg':
        model = vgg.vgg16(**model_dict).cuda()

    # set criterion
    cls_criterion = nn.CrossEntropyLoss().cuda()
    ranking_criterion = nn.MarginRankingLoss(margin=0.0).cuda()

    # set optimizer (default:sgd)
    optimizer = optim.SGD(model.parameters(),
                          lr=0.1,
                          momentum=0.9,
                          weight_decay=0.0001,
                          nesterov=False)

    # set scheduler
    scheduler = MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

    # make logger
    train_logger = utils_orig.Logger(os.path.join(save_path, 'train.log'))
    result_logger = utils_orig.Logger(os.path.join(save_path, 'result.log'))

    # make History Class
    correctness_history = crl_utils.History(len(train_loader.dataset))

    # start Train
    for epoch in range(1, args.epochs + 1):
        scheduler.step()
        train.train(train_loader, model, cls_criterion, ranking_criterion,
                    optimizer, epoch, correctness_history, train_logger, args)

        # save model
        if epoch == args.epochs:
            torch.save(model.state_dict(),
                       os.path.join(save_path, 'model.pth'))
    # finish train

    # calc measure
    acc, aurc, eaurc, aupr, fpr, ece, nll, brier = metrics.calc_metrics(
        test_loader, test_label, test_onehot, model, cls_criterion)
    # result write
    result_logger.write([
        acc, aurc * 1000, eaurc * 1000, aupr * 100, fpr * 100, ece * 100,
        nll * 10, brier * 100
    ])
Пример #4
0
def main():
    parser = argparse.ArgumentParser()
    # Experiment
    parser.add_argument("--experiment_name", default="default")
    parser.add_argument("--idd_name", default="skeletal-age")
    parser.add_argument(
        "--mode",
        type=str,
        default='devries',
        choices=['baseline', 'devries', 'devries_odin', 'energy', 'oe'])
    parser.add_argument("--outlier_name",
                        type=str,
                        nargs='+',
                        default=['mura', 'mimic-crx'])
    parser.add_argument("--ood_name", type=str, nargs='+', default=['retina'])
    parser.add_argument('--use_xent',
                        '-x',
                        action='store_true',
                        help='Use cross entropy scoring instead of the MSP.')

    parser.add_argument("--network", type=str, default="resnet")
    # Hyper params
    parser.add_argument("--num_epochs", type=int, default=300)
    parser.add_argument("--hint_rate", type=float, default=0.5)
    parser.add_argument("--beta", type=float, default=0.3)
    parser.add_argument("--lmbda", type=float, default=0.1)
    parser.add_argument("--batch_size", type=int, default=64)
    parser.add_argument(
        '--m_in',
        type=float,
        default=-25.,
        help='margin for in-distribution; above this value will be penalized')
    parser.add_argument(
        '--m_out',
        type=float,
        default=-7.,
        help='margin for out-distribution; below this value will be penalized')
    # Training params
    parser.add_argument("--use_scheduler", type=bool, default=False)
    parser.add_argument("--lr", type=int, default=1e-3)
    parser.add_argument('--losses',
                        nargs='+',
                        default=["boneage_mad", "accuracy"])
    parser.add_argument('--early_stop_metric',
                        type=str,
                        default="fpr_at_95_tpr")
    parser.add_argument('--early_stop', type=int, default=10)
    parser.add_argument('--eval_start', type=int, default=1)
    # Misc
    parser.add_argument("--checkpoint", default="")
    parser.add_argument("--load_memory",
                        type=bool,
                        default=False,
                        help="Load images into CPU")

    args = parser.parse_args()
    args = utils.compute_args(args)

    # Create dataloader according to experiments
    loader_args = {
        'name': args.idd_name,
        'mode': 'idd',
        'root_dir': args.root[args.idd_name],
        'csv_file': 'train.csv',
        'load_memory': args.load_memory
    }

    train_loader = DataLoader(LarsonDataset(**loader_args),
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=4)

    test_true_loader = DataLoader(
        LarsonDataset(**dict(loader_args, **{'csv_file': 'test.csv'})),
        batch_size=16,
        shuffle=False,
        num_workers=4)

    outlier_set = ConcatDataset([
        LarsonDataset(
            **{
                'name': outlier_name,
                'mode': 'idd',
                'root_dir': args.root[outlier_name],
                'csv_file': 'train.csv',
                'load_memory': False
            }) for outlier_name in args.outlier_name
    ])

    assert len(outlier_set) >= len(train_loader.dataset)

    test_false_loaders = {}
    for ood_name in args.ood_name:
        test_false_loaders[ood_name] = DataLoader(LarsonDataset(
            **{
                'name': ood_name,
                'mode': 'ood',
                'root_dir': args.root[ood_name],
                'csv_file': 'test.csv',
                'load_memory': False
            }),
                                                  batch_size=16,
                                                  shuffle=False,
                                                  num_workers=4)

    model_config = DeVriesLarsonModelConfig(args=args,
                                            hint_rate=args.hint_rate,
                                            lmbda=args.lmbda,
                                            beta=args.beta)
    net = model_config.net
    loss_plots = Plot(idd_name=args.idd_name,
                      early_stop_metric=args.early_stop_metric)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        net = nn.DataParallel(net)

    print(
        "Network", args.network, 'mode', args.mode, "\nLambda", args.lmbda,
        "beta", args.beta, "hint_rate", args.hint_rate,
        "\nTotal number of parameters : " +
        str(sum([p.numel() for p in net.parameters()]) / 1e6) + "M")

    init_epoch = 0
    checkpoints_folder = f"checkpoints/{args.experiment_name}"
    early_stop = 0

    best_early_stop_value = args.early_stop_start  # -inf or +inf
    for epoch in range(init_epoch, init_epoch + args.num_epochs):
        train_start = time.time()
        # Train phases
        net.train()

        # Shuffling outlier set
        outlier_loader = DataLoader(outlier_set,
                                    batch_size=16,
                                    shuffle=True,
                                    num_workers=4)

        for train_iter, (in_set, out_set) in enumerate(
                zip(train_loader, outlier_loader)):

            data = torch.cat((in_set[0], out_set[0]), dim=0)
            target = in_set[1]

            data, target = data.cuda(), target.cuda()

            # forward
            pred, _ = net(data)

            # backward
            model_config.optimizer.zero_grad()

            task_loss = F.cross_entropy(pred[:len(in_set[0])], target)
            # cross-entropy from softmax distribution to uniform distribution
            if args.mode == 'energy':
                Ec_out = -torch.logsumexp(pred[len(in_set[0]):], dim=1)
                Ec_in = -torch.logsumexp(pred[:len(in_set[0])], dim=1)
                oe_loss = 0.1 * (
                    torch.pow(F.relu(Ec_in - args.m_in), 2).mean() +
                    torch.pow(F.relu(args.m_out - Ec_out), 2).mean())
            elif args.mode == 'oe':
                oe_loss = args.beta * -(pred[len(in_set[0]):].mean(
                    1) - torch.logsumexp(pred[len(in_set[0]):], dim=1)).mean()
            else:
                raise NotImplementedError

            total_loss = task_loss + oe_loss
            total_loss.backward()
            model_config.optimizer.step()

            print(
                "\r[Epoch {}][Step {}/{}] Loss: {:.2f} [Task: {:.2f}, Energy: {:.2f}], Lr: {:.2e}, ES: {}, {:.2f} m remaining"
                .format(
                    epoch + 1,
                    train_iter,
                    int(len(train_loader.dataset) / args.batch_size),
                    total_loss.cpu().data.numpy(),
                    task_loss.cpu().data.numpy(),
                    oe_loss.cpu().data.numpy(),
                    *[
                        group['lr']
                        for group in model_config.optimizer.param_groups
                    ],
                    early_stop,
                    ((time.time() - train_start) / (train_iter + 1)) *
                    ((len(train_loader.dataset) / args.batch_size) -
                     train_iter) / 60,
                ),
                end='          ')

        # Eval phase
        if epoch + 1 >= args.eval_start:
            net.eval()

            def evaluate(data_loader, mode="confidence", idd=False):
                confidences = []
                idd_metrics = defaultdict(list)

                for test_iter, sample in enumerate(data_loader, 0):
                    images, labels, _ = sample

                    # Reassigns inputs_batch and label_batch to cuda
                    pred, confidence = net(images.cuda())
                    labels = labels.data.cpu().numpy()

                    # manage in domain metric
                    if idd:
                        loss_dict = {}
                        task_predictions = torch.argmax(
                            pred, dim=-1).data.cpu().numpy()
                        if "accuracy" in args.losses:
                            loss_dict['accuracy'] = list(
                                task_predictions == labels)
                        elif "boneage_mad" in args.losses:
                            loss_dict['boneage_mad'] = list(
                                abs(task_predictions - labels))
                        else:
                            raise NotImplementedError

                        for k, v in loss_dict.items():
                            idd_metrics[k].extend(v)

                    # Get confidence in prediction
                    confidences.extend(
                        get_confidence(net, images, pred, confidence, args))

                confidences = np.array(confidences)
                if idd:
                    # Plot accuracy
                    if 'accuracy' in idd_metrics:
                        plot_classification(
                            idd_metrics['accuracy'],
                            confidences,
                            checkpoints_folder,
                            name=str(args.idd_name) + ' - ' + str(
                                round(float(np.mean(idd_metrics['accuracy'])),
                                      4)) + ' - ' + 'Epoch ' + str(epoch + 1) +
                            '.jpg')

                    # Average metric over valset
                    for k, v in idd_metrics.items():
                        idd_metrics[k] = round(float(np.mean(v)), 4)

                return confidences, idd_metrics

            # In domain evaluation
            ind_confs, ind_metrics = evaluate(test_true_loader, idd=True)
            ind_labels = np.ones(ind_confs.shape[0])

            ind_metrics["IDD name"] = args.idd_name
            print(str(ind_metrics))

            # Out of domain evaluation
            early_stop_metric_value = 0
            ood_metric_dicts = []
            for ood_name, test_false_loader in test_false_loaders.items():
                ood_confs, _ = evaluate(test_false_loader, idd=False)
                ood_labels = np.zeros(ood_confs.shape[0])

                labels = np.concatenate([ind_labels, ood_labels])
                scores = np.concatenate([ind_confs, ood_confs])

                ood_metrics = calc_metrics(scores, labels)
                ood_metrics['OOD Name'] = ood_name
                print(str(ood_metrics))
                ood_metric_dicts.append(ood_metrics)

                # Plot metrics
                plot_metrics(scores,
                             labels,
                             ind_confs,
                             ood_confs,
                             checkpoints_folder,
                             name=str(ood_name) + ' - ' + 'Epoch ' +
                             str(epoch + 1))

                # fetch early stop value (might be a iid metric)
                early_stop_metric_value += {
                    **ind_metrics,
                    **ood_metrics
                }[args.early_stop_metric]

            early_stop_metric_value = early_stop_metric_value / len(
                test_false_loaders)
            early_stop += 1
            loss_plots.update(epoch + 1, ind_metrics, ood_metric_dicts)
            # Save model + early stop
            # Early_stop_operator is min or max
            if args.early_stop_operator(
                    early_stop_metric_value,
                    best_early_stop_value) != best_early_stop_value:
                early_stop = 0
                best_early_stop_value = early_stop_metric_value
                utils.save_checkpoint(checkpoints_folder, {
                    "init_epoch":
                    epoch + 1,
                    "net":
                    net.state_dict(),
                    "optimizer":
                    model_config.optimizer.state_dict(),
                    "scheduler":
                    model_config.scheduler.state_dict()
                    if args.use_scheduler else None,
                    "ood_metrics":
                    ood_metric_dicts,
                    "ind_metrics":
                    ind_metrics,
                    "best_early_stop_value":
                    best_early_stop_value,
                    "args":
                    args,
                },
                                      keep_n_best=1)

                print('Early stop metric ' + str(args.early_stop_metric) +
                      ' beaten. Now ' + str(best_early_stop_value))

        if args.use_scheduler:
            model_config.scheduler.step(ind_metrics['accuracy'])
        if early_stop == args.early_stop:
            loss_plots.draw(checkpoints_folder)
            print("early_stop reached")
            break

    loss_plots.draw(checkpoints_folder)
    print('Done')
    return
Пример #5
0
def main(*kargs, **kwargs):
    get_kwargs(kwargs)
    train_fname = kwargs['train']
    test_fname = kwargs['test']
    result_fname = kwargs['output']
    embeds_fname = kwargs['embeds']
    logger_fname = kwargs['logger']
    swear_words_fname = kwargs['swear_words']
    wrong_words_fname = kwargs['wrong_words']
    warm_start = kwargs['warm_start']
    format_embeds = kwargs['format_embeds']

    cnn_model_file = 'data/cnn.h5'
    lstm_model_file = 'data/lstm.h5'
    concat_model_file = 'data/concat.h5'
    cnn_model_file = 'data/cnn.h5'
    lr_model_file = 'data/{}_logreg.bin'
    meta_catboost_model_file = 'data/{}_meta_catboost.bin'

    # ====Create logger====
    logger = Logger(logging.getLogger(), logger_fname)

    # ====Load data====
    logger.info('Loading data...')
    train_df = load_data(train_fname)
    test_df = load_data(test_fname)

    target_labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
    num_classes = len(target_labels)

    # ====Load additional data====
    logger.info('Loading additional data...')
    swear_words = load_data(swear_words_fname, func=lambda x: set(x.T[0]), header=None)
    wrong_words_dict = load_data(wrong_words_fname, func=lambda x: {val[0] : val[1] for val in x})

    tokinizer = RegexpTokenizer(r'\w+')
    regexps = [re.compile("([a-zA-Z]+)([0-9]+)"), re.compile("([0-9]+)([a-zA-Z]+)")]

    # ====Load word vectors====
    logger.info('Loading embeddings...')
    embed_dim = 300
    embeds = Embeds(embeds_fname, 'fasttext', format=format_embeds)

    # ====Clean texts====
    logger.info('Cleaning text...')
    if warm_start:
        logger.info('Use warm start...')
    else:
        train_df['comment_text_clear'] = clean_text(train_df['comment_text'], tokinizer, wrong_words_dict, swear_words, regexps)
        test_df['comment_text_clear'] = clean_text(test_df['comment_text'], tokinizer, wrong_words_dict, swear_words, regexps)
        train_df.to_csv(train_clear, index=False)
        test_df.to_csv(test_clear, index=False)

    # ====Calculate maximum seq length====
    logger.info('Calc text length...')
    train_df.fillna('unknown', inplace=True)
    test_df.fillna('unknown', inplace=True)
    train_df['text_len'] = train_df['comment_text_clear'].apply(lambda words: len(words.split()))
    test_df['text_len'] = test_df['comment_text_clear'].apply(lambda words: len(words.split()))
    max_seq_len = np.round(train_df['text_len'].mean() + 3*train_df['text_len'].std()).astype(int)
    logger.debug('Max seq length = {}'.format(max_seq_len))

    # ====Prepare data to NN====
    logger.info('Converting texts to sequences...')
    max_words = 100000

    train_df['comment_seq'], test_df['comment_seq'], word_index = convert_text2seq(train_df['comment_text_clear'].tolist(), test_df['comment_text_clear'].tolist(), max_words, max_seq_len, lower=True, char_level=False)
    logger.debug('Dictionary size = {}'.format(len(word_index)))
    logger.info('Preparing embedding matrix...')
    embedding_matrix, words_not_found = get_embedding_matrix(embed_dim, embeds, max_words, word_index)
    logger.debug('Embedding matrix shape = {}'.format(np.shape(embedding_matrix)))
    logger.debug('Number of null word embeddings = {}'.format(np.sum(np.sum(embedding_matrix, axis=1) == 0)))

    # ====Train/test split data====
    x = np.array(train_df['comment_seq'].tolist())
    y = np.array(train_df[target_labels].values)
    x_train_nn, x_test_nn, y_train_nn, y_test_nn, train_idxs, test_idxs = split_data(x, y, test_size=0.2, shuffle=True, random_state=42)
    test_df_seq = np.array(test_df['comment_seq'].tolist())

    # ====Train models====

    # CNN
    logger.info("training CNN ...")
    cnn = get_cnn(embedding_matrix, num_classes, embed_dim, max_seq_len, num_filters=64, l2_weight_decay=0.0001, dropout_val=0.5, dense_dim=32, add_sigmoid=True)
    cnn_hist = train(x_train_nn, y_train_nn, cnn, batch_size=256, num_epochs=100, learning_rate=0.005, early_stopping_delta=0.0001, early_stopping_epochs=3, use_lr_stratagy=True, lr_drop_koef=0.66, epochs_to_drop=2, logger=logger)
    y_cnn = cnn.predict(x_test_nn)
    save_predictions(test_df, cnn.predict(test_df_seq), target_labels, 'cnn')
    metrics_cnn = get_metrics(y_test_nn, y_cnn, target_labels, hist=cnn_hist, plot=False)
    logger.debug('CNN metrics:\n{}'.format(metrics_cnn))
    cnn.save(cnn_model_file)

    # LSTM
    logger.info("training LSTM ...")
    lstm = get_lstm(embedding_matrix, num_classes, embed_dim, max_seq_len, l2_weight_decay=0.0001, lstm_dim=50, dropout_val=0.3, dense_dim=32, add_sigmoid=True)
    lstm_hist = train(x_train_nn, y_train_nn, lstm, batch_size=256, num_epochs=100, learning_rate=0.005, early_stopping_delta=0.0001, early_stopping_epochs=3, use_lr_stratagy=True, lr_drop_koef=0.66, epochs_to_drop=2, logger=logger)
    y_lstm = lstm.predict(x_test_nn)
    save_predictions(test_df, lstm.predict(test_df_seq), target_labels, 'lstm')
    metrics_lstm = get_metrics(y_test_nn, y_lstm, target_labels, hist=lstm_hist, plot=False)
    logger.debug('LSTM metrics:\n{}'.format(metrics_lstm))
    lstm.save(lstm_model_file)

    # CONCAT
    logger.info("training Concat NN (LSTM + CNN) ...")
    concat = get_concat_model(embedding_matrix, num_classes, embed_dim, max_seq_len, num_filters=64, l2_weight_decay=0.0001, lstm_dim=50, dropout_val=0.5, dense_dim=32, add_sigmoid=True)
    concat_hist = train([x_train_nn, x_train_nn], y_train_nn, concat, batch_size=256, num_epochs=100, learning_rate=0.005, early_stopping_delta=0.0001, early_stopping_epochs=4, use_lr_stratagy=True, lr_drop_koef=0.66, epochs_to_drop=3, logger=logger)
    y_concat = concat.predict([x_test_nn, x_test_nn])
    save_predictions(test_df, concat.predict([test_df_seq, test_df_seq]), target_labels, 'concat')
    metrics_concat = get_metrics(y_test_nn, y_concat, target_labels, hist=concat_hist, plot=False)
    logger.debug('Concat_NN metrics:\n{}'.format(metrics_concat))
    concat.save(concat_model_file)

    # TFIDF + LogReg
    logger.info('training LogReg over tfidf...')
    train_tfidf, val_tfidf, test_tfidf, word_tfidf, char_tfidf = get_tfidf(train_df['comment_text_clear'].values[train_idxs],
                                                    train_df['comment_text_clear'].values[test_idxs],
                                                    test_df['comment_text_clear'].values)

    models_lr = []
    metrics_lr = {}
    y_tfidf = []
    for i, label in enumerate(target_labels):
        model = NbSvmClassifier(C=4.0, solver='sag', max_iter=1000)
        model.fit(train_tfidf, y_train_nn[:, i])
        y_tfidf.append(model.predict_proba(val_tfidf)[:,1])
        test_df['tfidf_{}'.format(label)] = model.predict_proba(test_tfidf)[:,1]
        metrics_lr[label] = calc_metrics(y_test_nn[:, i], y_tfidf[-1])
        models_lr.append(model)
        joblib.dump(model, lr_model_file.format(label))
    metrics_lr['Avg logloss'] = np.mean([metric[0] for label, metric in metrics_lr.items()])
    logger.debug('LogReg(TFIDF) metrics:\n{}'.format(metrics_lr))

    # Bow for catboost
    top_pos_words = []
    top_neg_words = []
    for i in range(num_classes):
        top_pos_words.append([])
        top_neg_words.append([])
        top_pos_words[-1], top_neg_words[-1] = get_most_informative_features([word_tfidf, char_tfidf], models_lr[i], n=100)

    top_pos_words = list(set(np.concatenate([[val for score, val in top] for top in top_pos_words])))
    top_neg_words = list(set(np.concatenate([[val for score, val in top] for top in top_neg_words])))
    top = list(set(np.concatenate([top_pos_words, top_neg_words])))
    train_bow = get_bow(train_df['comment_text_clear'].values[train_idxs], top)
    val_bow = get_bow(train_df['comment_text_clear'].values[test_idxs], top)
    test_bow = get_bow(test_df['comment_text_clear'].values, top)
    logger.debug('Count bow words = {}'.format(len(top)))

    # Meta catboost
    logger.info('training catboost as metamodel...')
    train_df['text_unique_len'] = train_df['comment_text_clear'].apply(calc_text_uniq_words)
    test_df['text_unique_len'] = test_df['comment_text_clear'].apply(calc_text_uniq_words)

    train_df['text_unique_koef'] = train_df['text_unique_len'] / train_df['text_len']
    test_df['text_unique_koef'] = test_df['text_unique_len'] / test_df['text_len']

    text_len_features = train_df[['text_len', 'text_unique_len', 'text_unique_koef']].values[test_idxs]

    x_train_catboost = []
    y_train_catboost = y_test_nn
    for len_f, y_hat_cnn, y_hat_lstm, y_hat_concat, y_hat_tfidf, bow in zip(text_len_features, y_cnn, y_lstm, y_concat, np.array(y_tfidf).T, val_bow):
        x_train_catboost.append(np.concatenate([len_f, y_hat_cnn, y_hat_lstm, y_hat_concat, y_hat_tfidf, bow]))

    models_cb = []
    metrics_cb = {}
    x_train_cb, x_val_cb, y_train_cb, y_val_cb = train_test_split(x_train_catboost, y_train_catboost, test_size=0.20, random_state=42)
    for i, label in enumerate(target_labels):
        model = CatBoostClassifier(loss_function='Logloss', iterations=1000, depth=6, rsm=1, learning_rate=0.01)
        model.fit(x_train_cb, y_train_cb[:, i], plot=True, eval_set=(x_val_cb, y_val_cb[:, i]), use_best_model=True)
        y_hat_cb = model.predict_proba(x_val_cb)
        metrics_cb[label] = calc_metrics(y_val_cb[:, i], y_hat_cb[:, 1])
        models_cb.append(model)
        joblib.dump(model, meta_catboost_model_file.format(label))
    metrics_cb['Avg logloss'] = np.mean([metric[0] for label,metric in metrics_cb.items()])
    logger.debug('CatBoost metrics:\n{}'.format(metrics_cb))

    # ====Predict====
    logger.info('Applying models...')
    text_len_features = test_df[['text_len', 'text_unique_len', 'text_unique_koef']].values
    y_cnn_test = test_df[['cnn_{}'.format(label) for label in target_labels]].values
    y_lstm_test = test_df[['lstm_{}'.format(label) for label in target_labels]].values
    y_concat_test = test_df[['concat_{}'.format(label) for label in target_labels]].values
    y_tfidf_test = test_df[['tfidf_{}'.format(label) for label in target_labels]].values
    x_test_cb = []
    for len_f, y_hat_cnn, y_hat_lstm, y_hat_concat, y_hat_tfidf, bow in tqdm(zip(text_len_features, y_cnn_test, y_lstm_test, y_concat_test, y_tfidf_test, test_bow)):
        x_test_cb.append(np.concatenate([len_f, y_hat_cnn, y_hat_lstm, y_hat_concat, y_hat_tfidf, bow]))

    for label, model in zip(target_labels, models_cb):
        pred = model.predict_proba(x_test_cb)
        test_df[label] = np.array(list(pred))[:, 1]

    # ====Save results====
    logger.info('Saving results...')
    test_df[['id', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].to_csv(result_fname, index=False, header=True)
        X_test[interv] = np.array(X_test[interv])
        y_train[interv] = np.array(y_train[interv])
        y_test[interv] = np.array(y_test[interv])
        size = len(y_test[interv])

        print "\t", "Model compute for size = " + str(size)
        print "\t", "computing model..."
        start = datetime.datetime.now()
        text_clf = model.fit(X_train[interv], y_train[interv])
        stop = datetime.datetime.now()
        final_time = (stop-start)

        print "\t", "Running = ", final_time

        print "\t", "predicting..."
        predicted = text_clf.predict(y_test[interv])

        parameters = {
            "filename_model_results":"results/RandomForest" + "_" + interv + ".txt",
            "filename_confusion_matrix":None,
            "model_interval" : [interval[interv][0],interval[interv][1]],
            "model_type":"RandomForest Model",
            "model":text_clf,
            "expected":y_test[interv],
            "predicted":predicted,
            "target_names":target_names}

        metrics.calc_metrics(parameters, final_time)

    else:
        print "\t","No data for this interval"
Пример #7
0
def calculate_alg(singletons, net, uw, ud, g_type, alg, metric):

    communities = "/home/amaury/communities_hashmap/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    output = str(output_dir) + str(metric) + "/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"
    graphs = "/home/amaury/graphs_hashmap/" + str(net) + "/" + str(
        g_type) + "/"

    if not os.path.exists(communities):
        print("Diretório com as comunidades não encontrado: " +
              str(communities) + "\n")

    else:
        print
        print(
            "######################################################################"
        )
        print("Os arquivos serão armazenados em: " + str(output))
        print(
            "######################################################################"
        )

        for threshold in os.listdir(communities):
            if not os.path.isdir(str(communities) + str(threshold) + "/"):
                print("Threshold para a rede " + str(net) +
                      " não encontrado: " + str(threshold))

            else:
                print("Salvando dados em: " + str(output) + str(threshold) +
                      ".json")
                if not os.path.exists(output):
                    os.makedirs(output)

                if os.path.exists(str(output) + str(threshold) + ".json"):
                    print("Arquivo de destino já existe: " + str(output) +
                          str(threshold) + ".json")

                else:
                    print(
                        "######################################################################"
                    )

                    result = []

                    i = 0  #Ponteiro para o ego
                    for file in os.listdir(
                            str(communities) + str(threshold) + "/"):
                        if os.path.isfile(
                                str(communities) + str(threshold) + "/" +
                                file):
                            ego_id = file.split(".txt")
                            ego_id = long(ego_id[0])
                            i += 1

                            if not os.path.isfile(
                                    str(graphs) + str(ego_id) + ".edge_list"):
                                print(
                                    "ERROR - EGO: " + str(i) +
                                    " - Arquivo com lista de arestas não encontrado:"
                                    + str(graphs) + str(ego_id) + ".edge_list")

                            else:
                                with open(
                                        str(communities) + str(threshold) +
                                        "/" + file, 'r') as community_file:
                                    if ud is False:
                                        G = snap.LoadEdgeList(
                                            snap.PNGraph,
                                            str(graphs) + str(ego_id) +
                                            ".edge_list", 0, 1
                                        )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
                                    else:
                                        G = snap.LoadEdgeList(
                                            snap.PUNGraph,
                                            str(graphs) + str(ego_id) +
                                            ".edge_list", 0, 1
                                        )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')

                                    print(
                                        str(g_type) + " - " + str(alg) +
                                        " - " + str(singletons) + " - Rede: " +
                                        str(net) + " - THRESHOLD: " +
                                        str(threshold) + " - ego(" + str(i) +
                                        "): " + str(file))

                                    communities_dict = prepare_communities(
                                        community_file
                                    )  #Função para devolver um dicionário com as comunidades

                                    avg = metrics.calc_metrics(
                                        communities_dict, G, ud,
                                        metric)  # Calcular as métricas
                                    result.append(
                                        avg['media'])  # Salvar Métrica

                                    print metric, result[i - 1]
                                    print

                    print(
                        "######################################################################"
                    )

                    with open(str(output) + str(threshold) + ".json",
                              "w") as f:
                        f.write(
                            json.dumps(result, separators=(',', ':')) + "\n")

    print(
        "######################################################################"
    )
Пример #8
0
    def on_epoch_end(self, epoch, logs=None):
        output_path_name = os.path.join(self.output_path, f'epoch_{epoch+1}')
        os.makedirs(output_path_name, exist_ok=True)
        metrics_log_name = os.path.join(output_path_name, "metrics.txt")
        if os.path.exists(metrics_log_name):
            os.remove(metrics_log_name)
        metrics_log = open(metrics_log_name, "a+")

        self.lrs.append(K.eval(self.model.optimizer.lr))

        predicted = []
        all_metrics = ['iou']
        metrics_values = {i: 0 for i in all_metrics}
        tmp_metrics_per_cls_res = {i: [] for i in range(self.n_classes)}
        t1 = time()
        ii = 0
        for image, mask in zip(self.images, self.masks):
            s = f'Testing on {ii+1} of {self.images.shape[0]}'
            print(s)
            metrics_log.write('\n' + s + '\n')
            pred = self.predict_image(image)
            predicted.append(pred)

            metrics = calc_metrics(
                mask[self.offset:-self.offset, self.offset:-self.offset, ...],
                pred, all_metrics, self.n_classes)
            for metric in metrics:
                s = 'Metrics for each class:'
                print(s)
                metrics_log.write(s + '\n')
                i = 0
                for value in metric[1]:
                    s = f"{metric[0]} for {classes_mask[i]} : {value}"
                    print(s)
                    metrics_log.write(s + '\n')
                    tmp_metrics_per_cls_res[i].append(value)
                    i += 1
                metrics_values[metric[0]] += sum(metric[1]) / len(metric[1])
            ii += 1

        for i in range(self.n_classes):
            self.metrics_per_cls_res[i].append(mean(
                tmp_metrics_per_cls_res[i]))

        s = 'Average metrics values:'
        print(s)
        metrics_log.write('\n' + s + '\n')
        for metrics_name in metrics_values.keys():
            self.metrics_results[metrics_name][
                epoch +
                1] = metrics_values[metrics_name] / self.images.shape[0]
            s = f'{metrics_name} : {(metrics_values[metrics_name] / self.images.shape[0])}'
            print(s)
            metrics_log.write(s + '\n')

        t2 = time()
        print(f'Prediction completed in {t2-t1} seconds')

        print('Processing visualization:')
        visualize_segmentation_result(np.array([
            i[self.offset:-self.offset, self.offset:-self.offset, ...]
            for i in self.images
        ]), [
            np.argmax(i[self.offset:-self.offset, self.offset:-self.offset,
                        ...],
                      axis=2) for i in self.masks
        ], [np.argmax(i, axis=2) for i in predicted],
                                      names=self.names,
                                      n_classes=self.n_classes,
                                      output_path=self.output_path,
                                      epoch=epoch)

        visualize_pred_heatmaps(predicted, self.n_classes, self.output_path,
                                epoch)
        plot_metrics_history(self.metrics_results, self.output_path)
        plot_per_class_history(self.metrics_per_cls_res, self.output_path)
        plot_lrs(self.lrs, self.output_path)
        t3 = time()
        print(f'Visualization completed in {t3-t2} seconds')
# #### evalute the model

# In[12]:

model, _ = get_model(params)
model.load_state_dict(torch.load(f"./models/{model.params.name}.pth"))
model.eval().to(device)

# In[13]:

model.eval()
_, all_preds, all_labels = validate(model, valid_dl, custom_loss)

# In[14]:

calc_metrics(all_preds, all_labels)

# #### plot results

# In[15]:

# for convience, we can pass an integer instead of the full string
int2key = {
    0: 'red_light',
    1: 'hazard_stop',
    2: 'speed_sign',
    3: 'relative_angle',
    4: 'center_distance',
    5: 'veh_distance'
}
Пример #10
0
                           type=int,
                           help='How many times to repeat an experiment')

    metrics_parser = subparsers.add_parser(
        'metric',
        help=
        'Metric calculation based on the snapshots produced by a blocking aggression minimization simulation experiment'
    )
    metrics_parser.add_argument(
        'aggression_threshold',
        type=float,
        help=
        "The threshold in [0,1] that determines when a user becomes aggressive. An aggression score larger than the threshold means that the user is aggressive. Eg. = 0.4"
    )
    metrics_parser.add_argument(
        'metric_type',
        type=str,
        choices=['similarity', 'aggression'],
        help=
        "The type of metric to calculate, similarity metrics or aggression. 'aggression' is suggested for blocking aggression minimization experiment"
    )

    args = parser.parse_args()
    print()
    print(args)

    if args.mode == 'simulation':
        experiment(args, False)
    elif args.mode == 'metric':
        calc_metrics(args, 'blocking')
Пример #11
0
def main():
    parser = argparse.ArgumentParser()
    # Experiment
    parser.add_argument("--experiment_name", default="default")
    parser.add_argument("--idd_name", default="skeletal-age")
    parser.add_argument(
        "--mode",
        type=str,
        default='devries',
        choices=['baseline', 'devries', 'devries_odin', 'energy', 'oe'])
    parser.add_argument("--ood_name",
                        type=str,
                        nargs='+',
                        default=['retina', 'mura', 'mimic-crx'])
    parser.add_argument("--network", type=str, default="resnet")
    # Hyper params
    parser.add_argument("--num_epochs", type=int, default=300)
    parser.add_argument("--beta", type=float, default=0.3)
    parser.add_argument("--lmbda", type=float, default=0.1)
    parser.add_argument("--batch_size", type=int, default=64)
    parser.add_argument("--hint", type=bool, default=False)
    parser.add_argument("--hint_rate", type=float, default=0.5)
    parser.add_argument("--use_budget", type=bool, default=False)
    # Training params
    parser.add_argument("--use_scheduler", type=bool, default=False)
    parser.add_argument("--lr", type=int, default=1e-3)
    parser.add_argument('--losses',
                        nargs='+',
                        default=["boneage_mad", "accuracy"])
    parser.add_argument('--early_stop_metric',
                        type=str,
                        default="fpr_at_95_tpr")
    parser.add_argument('--early_stop', type=int, default=10)
    parser.add_argument('--eval_start', type=int, default=1)
    # Misc
    parser.add_argument("--checkpoint", default="")
    parser.add_argument("--load_memory",
                        type=bool,
                        default=False,
                        help="Load images into CPU")

    args = parser.parse_args()
    args = utils.compute_args(args)
    # Create dataloader according to experiments
    loader_args = {
        'name': args.idd_name,
        'mode': 'idd',
        'root_dir': args.root[args.idd_name],
        'csv_file': 'train.csv',
        'load_memory': args.load_memory
    }
    import torchvision.transforms as trn
    train_data = dset.MNIST(
        'MNIST',
        train=True,
        transform=trn.ToTensor(),
        download=True,
    )
    test_data = dset.MNIST(
        'MNIST',
        train=False,
        transform=trn.ToTensor(),
        download=True,
    )
    num_classes = 10

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4,
                                               pin_memory=True)
    test_true_loader = torch.utils.data.DataLoader(test_data,
                                                   batch_size=args.batch_size,
                                                   shuffle=False,
                                                   num_workers=4,
                                                   pin_memory=True)
    print(len(train_loader))
    print(len(test_true_loader))
    test_false_loaders = {}

    # /////////////// gaussian Noise ///////////////

    dummy_targets = torch.ones(5000)
    ood_data = torch.from_numpy(
        np.clip(
            np.random.normal(size=(5000, 1, 28, 28), loc=0.5,
                             scale=0.5).astype(np.float32), 0, 1))
    ood_data = torch.utils.data.TensorDataset(ood_data, dummy_targets)
    ood_loader = torch.utils.data.DataLoader(ood_data,
                                             batch_size=16,
                                             shuffle=True)
    test_false_loaders["gaussian"] = ood_loader

    # /////////////// Bernoulli Noise ///////////////

    dummy_targets = torch.ones(5000)
    ood_data = torch.from_numpy(
        np.random.binomial(n=1, p=0.5,
                           size=(5000, 1, 28, 28)).astype(np.float32))
    ood_data = torch.utils.data.TensorDataset(ood_data, dummy_targets)
    ood_loader = torch.utils.data.DataLoader(ood_data,
                                             batch_size=16,
                                             shuffle=True)

    test_false_loaders["Bernoulli"] = ood_loader

    # /////////////// CIFAR data ///////////////

    ood_data = dset.CIFAR10(
        'cifar',
        train=False,
        download=True,
        transform=trn.Compose([
            trn.Resize(28),
            trn.Lambda(lambda x: x.convert('L', (0.2989, 0.5870, 0.1140, 0))),
            trn.ToTensor()
        ]))
    ood_loader = torch.utils.data.DataLoader(ood_data,
                                             batch_size=16,
                                             shuffle=True,
                                             num_workers=4,
                                             pin_memory=True)
    test_false_loaders["CIFAR"] = ood_loader

    model_config = DeVriesLarsonModelConfig(args=args,
                                            hint_rate=args.hint_rate,
                                            lmbda=args.lmbda,
                                            beta=args.beta)

    def gelu(x):
        return torch.sigmoid(1.702 * x) * x
        # return 0.5 * x * (1 + torch.tanh(x * 0.7978845608 * (1 + 0.044715 * x * x)))

    class ConvNet(nn.Module):
        def __init__(self):
            super(ConvNet, self).__init__()
            self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
            self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
            self.conv2_drop = nn.Dropout2d()
            self.fc1 = nn.Linear(320, 50)
            self.fc2 = nn.Linear(50, 10)
            self.fc3 = nn.Linear(50, 1)

        def forward(self, x):
            import torch.nn.functional as F

            x = gelu(F.max_pool2d(self.conv1(x), 2))
            x = gelu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
            x = x.view(-1, 320)
            x = gelu(self.fc1(x))
            # x = F.dropout(x)
            return self.fc2(x), self.fc3(x)

    net = ConvNet().cuda()
    import torch.optim as optim

    optimizer = optim.Adam(net.parameters(), lr=1e-3)
    loss_plots = Plot(idd_name=args.idd_name,
                      early_stop_metric=args.early_stop_metric)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        net = nn.DataParallel(net)

    print(
        "Network", args.network, 'mode', args.mode, "\nLambda", args.lmbda,
        "beta", args.beta, "hint_rate", args.hint_rate,
        "\nTotal number of parameters : " +
        str(sum([p.numel() for p in net.parameters()]) / 1e6) + "M")

    init_epoch = 0
    checkpoints_folder = f"checkpoints/{args.experiment_name}"
    early_stop = 0

    best_early_stop_value = args.early_stop_start  # -inf or +inf
    for epoch in range(init_epoch, init_epoch + args.num_epochs):
        train_start = time.time()
        # Train phases
        net.train()
        for train_iter, (data, labels) in enumerate(train_loader, 0):
            optimizer.zero_grad()
            data, labels = data.cuda(), labels.cuda()
            output_batches = net(data.cuda())
            total_loss, task_loss, confidence_loss = model_config.criterion(
                output_batches, labels.cuda())
            total_loss.backward()
            optimizer.step()

            print(
                "\r[Epoch {}][Step {}/{}] Loss: {:.2f} [Task: {:.2f}, Confidence: {:.2f}, lambda: {:.2f}], Lr: {:.2e}, ES: {}, {:.2f} m remaining"
                .format(
                    epoch + 1,
                    train_iter,
                    int(len(train_loader.dataset) / args.batch_size),
                    total_loss.cpu().data.numpy(),
                    task_loss.cpu().data.numpy(),
                    confidence_loss.cpu().data.numpy(),
                    model_config.criterion.lmbda,
                    *[group['lr'] for group in optimizer.param_groups],
                    early_stop,
                    ((time.time() - train_start) / (train_iter + 1)) *
                    ((len(train_loader.dataset) / args.batch_size) -
                     train_iter) / 60,
                ),
                end='          ')

        # Eval phase
        if epoch + 1 >= args.eval_start:
            net.eval()

            def evaluate(data_loader, mode="confidence", idd=False):
                confidences = []
                idd_metrics = defaultdict(list)

                for test_iter, (data, labels) in enumerate(data_loader, 0):
                    data = data.view(-1, 1, 28, 28).cuda()

                    # Reassigns inputs_batch and label_batch to cuda
                    pred, confidence = net(data.cuda())
                    labels = labels.data.cpu().numpy()

                    # manage in domain metric
                    if idd:
                        loss_dict = {}
                        task_predictions = torch.argmax(
                            pred, dim=-1).data.cpu().numpy()
                        if "accuracy" in args.losses:
                            loss_dict['accuracy'] = list(
                                task_predictions == labels)
                        elif "boneage_mad" in args.losses:
                            loss_dict['boneage_mad'] = list(
                                abs(task_predictions - labels))
                        else:
                            raise NotImplementedError

                        for k, v in loss_dict.items():
                            idd_metrics[k].extend(v)

                    # Get confidence in prediction
                    confidences.extend(
                        get_confidence(net, data, pred, confidence, args))

                confidences = np.array(confidences)
                if idd:
                    # Plot accuracy
                    if 'accuracy' in idd_metrics:
                        plot_classification(
                            idd_metrics['accuracy'],
                            confidences,
                            checkpoints_folder,
                            name=str(args.idd_name) + ' - ' + str(
                                round(float(np.mean(idd_metrics['accuracy'])),
                                      4)) + ' - ' + 'Epoch ' + str(epoch + 1) +
                            '.jpg')

                    # Average metric over valset
                    for k, v in idd_metrics.items():
                        idd_metrics[k] = round(float(np.mean(v)), 4)

                return confidences, idd_metrics

            # In domain evaluation
            ind_confs, ind_metrics = evaluate(test_true_loader, idd=True)
            ind_labels = np.ones(ind_confs.shape[0])

            ind_metrics["IDD name"] = args.idd_name
            print(str(ind_metrics))

            # Out of domain evaluation
            early_stop_metric_value = 0
            ood_metric_dicts = []
            for ood_name, test_false_loader in test_false_loaders.items():
                ood_confs, _ = evaluate(test_false_loader, idd=False)
                ood_labels = np.zeros(ood_confs.shape[0])

                labels = np.concatenate([ind_labels, ood_labels])
                scores = np.concatenate([ind_confs, ood_confs])

                ood_metrics = calc_metrics(scores, labels)
                ood_metrics['OOD Name'] = ood_name
                print(str(ood_metrics))
                ood_metric_dicts.append(ood_metrics)

                # Plot metrics
                plot_metrics(scores,
                             labels,
                             ind_confs,
                             ood_confs,
                             checkpoints_folder,
                             name=str(ood_name) + ' - ' + 'Epoch ' +
                             str(epoch + 1))

                # fetch early stop value (might be a iid metric)
                early_stop_metric_value += {
                    **ind_metrics,
                    **ood_metrics
                }[args.early_stop_metric]

            early_stop_metric_value = early_stop_metric_value / len(
                test_false_loaders)
            early_stop += 1
            loss_plots.update(epoch + 1, ind_metrics, ood_metric_dicts)
            # Save model + early stop
            # Early_stop_operator is min or max
            if args.early_stop_operator(
                    early_stop_metric_value,
                    best_early_stop_value) != best_early_stop_value:
                early_stop = 0
                best_early_stop_value = early_stop_metric_value
                utils.save_checkpoint(checkpoints_folder, {
                    "init_epoch":
                    epoch + 1,
                    "net":
                    net.state_dict(),
                    "optimizer":
                    optimizer.state_dict(),
                    "scheduler":
                    model_config.scheduler.state_dict()
                    if args.use_scheduler else None,
                    "ood_metrics":
                    ood_metric_dicts,
                    "ind_metrics":
                    ind_metrics,
                    "best_early_stop_value":
                    best_early_stop_value,
                    "args":
                    args,
                },
                                      keep_n_best=1)

                print('Early stop metric ' + str(args.early_stop_metric) +
                      ' beaten. Now ' + str(best_early_stop_value))

        if args.use_scheduler:
            model_config.scheduler.step(ind_metrics['accuracy'])
        if early_stop == args.early_stop:
            loss_plots.draw(checkpoints_folder)
            print("early_stop reached")
            break

    loss_plots.draw(checkpoints_folder)
    print('Done')
    return
Пример #12
0
def run(results_dir,
        model_dir,
        base_dir,
        file_names,
        data_type,
        use_multi_scale=False,
        exported_disc_path=None,
        use_3dconv=False,
        compute_metrics=False,
        min_distance=3.,
        max_distance=150.,
        show_result=False):
    sess = tf.Session()
    in_image = tf.placeholder(tf.float32, [None, None, None, 3])

    gt_image = tf.placeholder(tf.float32, [None, None, None, 1])
    gt_mask = None
    if data_type == 'real':
        gt_mask = tf.placeholder(tf.float32, [None, None, None, 1])
        model = lsgan.build_model(in_image,
                                  gt_image,
                                  data_type=data_type,
                                  gt_mask=gt_mask,
                                  smooth_weight=0.1,
                                  adv_weight=0.0001,
                                  discriminator_ckpt=exported_disc_path,
                                  use_multi_scale=use_multi_scale,
                                  use_3dconv=use_3dconv)
        min_eval_distance = min_distance
        max_eval_distance = 80.
    else:
        model = lsgan.build_model(in_image,
                                  gt_image,
                                  data_type=data_type,
                                  gt_mask=gt_mask,
                                  smooth_weight=1e-4,
                                  adv_weight=0.0001,
                                  use_multi_scale=use_multi_scale,
                                  use_3dconv=use_3dconv)
        min_eval_distance = min_distance
        max_eval_distance = max_distance

    out_image = model['out_image']

    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, model_dir)

    per_image_metrics = []
    #mae = []

    if not os.path.isdir(results_dir):
        os.makedirs(results_dir)

    results_folder = ['gated2depth', 'gated2depth_img', 'all']
    for result_folder in results_folder:
        if not os.path.exists(os.path.join(results_dir, result_folder)):
            os.makedirs(os.path.join(results_dir, result_folder))

    for ind in range(len(file_names)):
        # get the path from image id
        train_fn = file_names[ind]
        if data_type == 'real':
            img_id = train_fn
            gta_pass = ''

        else:
            img_id = train_fn
            gta_pass = ''

        in_img = dsutil.read_gated_image(base_dir, gta_pass, img_id, data_type)

        input_patch = in_img
        output = sess.run(out_image, feed_dict={in_image: input_patch})
        output = np.clip(output * max_distance, min_distance, max_distance)

        gt_patch, _ = dsutil.read_gt_image(base_dir,
                                           gta_pass,
                                           img_id,
                                           data_type,
                                           raw_values_only=True,
                                           min_distance=min_distance,
                                           max_distance=max_distance)

        if compute_metrics:
            #if data_type != 'real':
            #curr_mae = np.mean(np.abs(output - gt_patch), dtype=np.float64)
            curr_metrics = calc_metrics(output[0, :, :, 0],
                                        gt_patch,
                                        min_distance=min_eval_distance,
                                        max_distance=max_eval_distance)
            per_image_metrics.append(curr_metrics)
            #mae.append(curr_mae)

        # else:
        #depth_lidar1, _ = dsutil.read_gt_image(base_dir, gta_pass, img_id, data_type, raw_values_only=True, min_distance=min_distance, max_distance=max_distance)
        #curr_metrics = calc_metrics(output[0, :, :, 0], gt_patch, min_distance=min_eval_distance,
        #                            max_distance=max_eval_distance)
        #per_image_metrics.append(curr_metrics)
        #mae.append(curr_metrics)

        np.savez_compressed(
            os.path.join(results_dir, 'gated2depth', '{}'.format(img_id)),
            output)

        #depth_lidar1, _ = dsutil.read_gt_image(base_dir, gta_pass, img_id, data_type, raw_values_only=True, min_distance=min_distance, max_distance=max_distance)

        if data_type != 'real':
            #print(depth_lidar1.shape)
            depth_lidar1_color = visualize2D.colorize_depth(
                gt_patch,
                min_distance=min_eval_distance,
                max_distance=max_eval_distance)
        else:
            #print(depth_lidar1.shape)
            depth_lidar1_color = visualize2D.colorize_pointcloud(
                gt_patch,
                min_distance=min_eval_distance,
                max_distance=max_eval_distance,
                radius=3)

        depth_map_color = visualize2D.colorize_depth(
            output[0, :, :, 0],
            min_distance=min_eval_distance,
            max_distance=max_eval_distance)

        in_out_shape = (int(depth_map_color.shape[0] +
                            depth_map_color.shape[0] / 3. + gt_patch.shape[0]),
                        depth_map_color.shape[1], 3)

        input_output = np.zeros(shape=in_out_shape)
        scaled_input = cv2.resize(input_patch[0, :, :, :],
                                  dsize=(int(input_patch.shape[2] / 3),
                                         int(input_patch.shape[1] / 3)),
                                  interpolation=cv2.INTER_AREA) * 255

        for i in range(3):
            input_output[:scaled_input.shape[0], :scaled_input.shape[1],
                         i] = scaled_input[:, :, 0]
            input_output[:scaled_input.shape[0],
                         scaled_input.shape[1]:2 * scaled_input.shape[1],
                         i] = scaled_input[:, :, 1]
            input_output[:scaled_input.shape[0],
                         scaled_input.shape[1] * 2:scaled_input.shape[1] * 3,
                         i] = scaled_input[:, :, 2]

        input_output[scaled_input.shape[0]:scaled_input.shape[0] +
                     depth_map_color.shape[0], :, :] = depth_map_color
        input_output[scaled_input.shape[0] +
                     depth_map_color.shape[0]:, :, :] = depth_lidar1_color
        cv2.imwrite(
            os.path.join(results_dir, 'gated2depth_img',
                         '{}.jpg'.format(img_id)),
            depth_map_color.astype(np.uint8))
        cv2.imwrite(os.path.join(results_dir, 'all', '{}.jpg'.format(img_id)),
                    input_output.astype(np.uint8))

        if show_result:
            import matplotlib.pyplot as plt
            plt.imshow(
                cv2.cvtColor(input_output.astype(np.uint8), cv2.COLOR_BGR2RGB))
            plt.show()

    if compute_metrics:
        res = np.mean(per_image_metrics, axis=0)
        res_str = ''
        for i in range(res.shape[0]):
            res_str += '{}={:.2f} \n'.format(metric_str[i], res[i])
        print(res_str)
        with open(os.path.join(results_dir, 'results.txt'), 'w') as f:
            f.write(res_str)
        with open(os.path.join(results_dir, 'results.tex'), 'w') as f:
            f.write(' & '.join(metric_str) + '\n')
            f.write(' & '.join(['{:.2f}'.format(r) for r in res]))
Пример #13
0
def main():
    # set GPU ID
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    cudnn.benchmark = True

    # check save path
    save_path = args.save_path
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # set num_class
    if args.data == 'cifar100':
        num_class = 100
    else:
        num_class = 10

    # set num_classes
    model_dict = {
        "num_classes": num_class,
    }

    _, test_loader, \
        test_onehot, test_label = dataset.get_loader(args.data,
                                        args.data_path,
                                        args.batch_size)

    train_set = dataset.get_dataset(args.data, args.data_path, mode='train')
    unlabeled_pool = dataset.get_dataset(args.data,
                                         args.data_path,
                                         mode='unlabeled')
    num_train = len(train_set)

    indices = list(range(num_train))
    random.shuffle(indices)

    labeled_set = indices[:args.initial_budget]
    unlabeled_set = indices[args.initial_budget:]

    labeled_dataloader = DataLoader(train_set,
                                    sampler=SubsetRandomSampler(labeled_set),
                                    batch_size=args.batch_size,
                                    drop_last=True)

    now = datetime.datetime.now()
    formatedDate = now.strftime('%Y%m%d_%H_%M_')
    result_logger = utils.Logger(
        os.path.join(args.save_path, formatedDate + 'result.log'))

    arguments = []
    for key, val in (args.__dict__.items()):
        arguments.append("{} : {}\n".format(key, val))
    result_logger.write(arguments)
    result_logger = utils.Logger(
        os.path.join(args.save_path, formatedDate + 'result.log'))
    # make logger
    train_logger = utils.Logger(
        os.path.join(save_path, formatedDate + 'train.log'))
    test_epoch_logger = utils.Logger(
        os.path.join(save_path, formatedDate + 'test_epoch.log'))

    current_train = len(labeled_set)
    while (current_train < args.max_budget + 1):
        # set model
        if args.model == 'res':
            model = resnet.ResNet152(**model_dict).cuda()
        elif args.model == 'dense':
            model = densenet_BC.DenseNet3(depth=100,
                                          num_classes=num_class,
                                          growth_rate=12,
                                          reduction=0.5,
                                          bottleneck=True,
                                          dropRate=0.0).cuda()
        elif args.model == 'vgg':
            model = vgg.vgg16(**model_dict).cuda()

        # set criterion
        cls_criterion = nn.CrossEntropyLoss().cuda()
        ranking_criterion = nn.MarginRankingLoss(margin=0.0).cuda()

        # set optimizer (default:sgd)
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              weight_decay=0.0005,
                              nesterov=False)

        # set scheduler
        scheduler = MultiStepLR(optimizer, milestones=[120, 160], gamma=0.1)

        # make History Class
        correctness_history = crl_utils.History(len(
            labeled_dataloader.dataset))

        # start Train
        for epoch in range(1, args.epochs + 1):
            train.train(labeled_dataloader, model, cls_criterion,
                        ranking_criterion, optimizer, epoch,
                        correctness_history, train_logger, args)
            test_acc, test_loss = metrics.evaluate(test_loader, model,
                                                   cls_criterion, args.budget,
                                                   epoch, test_epoch_logger)
            scheduler.step()
            # save model
            if epoch == args.epochs:
                torch.save(model.state_dict(),
                           os.path.join(save_path, 'model.pth'))
        # finish train

        # calc measure
        acc, aurc, eaurc, aupr, fpr, ece, nll, brier = metrics.calc_metrics(
            test_loader, test_label, test_onehot, model, cls_criterion)
        # result write
        result_logger.write([
            current_train, test_acc, aurc * 1000, eaurc * 1000, aupr * 100,
            fpr * 100, ece * 100, nll * 10, brier * 100
        ])
        random.shuffle(unlabeled_set)
        subset = unlabeled_set[:args.subset]
        unlabeled_poolloader = DataLoader(
            unlabeled_pool,
            sampler=SubsetSequentialSampler(subset),
            batch_size=args.batch_size,
            drop_last=False)
        all_confidence = get_confidence(model, unlabeled_poolloader)
        print(len(all_confidence))
        arg = np.argsort(all_confidence)
        labeled_set = list(
            set(labeled_set) | set(np.array(unlabeled_set)[arg][:args.budget]))
        unlabeled_set = list(set(unlabeled_set) - set(labeled_set))
        current_train = len(labeled_set)

        #unlabeled_set = list(torch.tensor(unlabeled_set)[arg][args.budget:].numpy()) \
        #                            + unlabeled_set[args.subset:]
        print("after acquistiion")
        print('current labeled :', len(labeled_set))
        print('current unlabeled :', len(unlabeled_set))

        labeled_dataloader = DataLoader(
            train_set,
            sampler=SubsetRandomSampler(labeled_set),
            batch_size=args.batch_size,
            drop_last=True)
def calculate_alg(singletons, net, uw, ud, g_type, alg):

    communities = "/home/amaury/communities_hashmap/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    graphs = "/home/amaury/graphs_hashmap/" + str(net) + "/" + str(
        g_type) + "/"

    out_ad = str(output_dir) + "average_degree/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_c = str(output_dir) + "conductance/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_cut_r = str(output_dir) + "cut_ratio/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_d = str(output_dir) + "density/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_e = str(output_dir) + "expansion/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_normal_cut = str(output_dir) + "normalized_cut/" + str(
        g_type) + "/" + str(alg) + "/" + str(singletons) + "/" + str(net) + "/"
    out_s = str(output_dir) + "separability/" + str(g_type) + "/" + str(
        alg) + "/" + str(singletons) + "/" + str(net) + "/"

    if not os.path.exists(communities):
        print("Diretório com as comunidades não encontrado: " +
              str(communities) + "\n")

    else:
        print(
            "\n######################################################################"
        )

        for threshold in os.listdir(communities):
            if not os.path.isdir(str(communities) + str(threshold) + "/"):
                print("Threshold para a rede " + str(net) +
                      " não encontrado: " + str(threshold))

            else:
                create_dirs(out_ad, out_c, out_cut_r, out_d, out_e,
                            out_normal_cut, out_s)

                if os.path.exists(
                        str(out_ad) + str(threshold) +
                        ".json") and os.path.exists(
                            str(out_c) + str(threshold) +
                            ".json") and os.path.exists(
                                str(out_cut_r) + str(threshold) +
                                ".json") and os.path.exists(
                                    str(out_d) + str(threshold) +
                                    ".json") and os.path.exists(
                                        str(out_e) + str(threshold) +
                                        ".json") and os.path.exists(
                                            str(out_normal_cut) +
                                            str(threshold) +
                                            ".json") and os.path.exists(
                                                str(out_s) + str(threshold) +
                                                ".json"):
                    print("Arquivo de destino já existe: " + str(threshold) +
                          ".json")

                else:
                    print(
                        "######################################################################"
                    )

                    result_ad = []
                    result_c = []
                    result_cut_r = []
                    result_d = []
                    result_e = []
                    result_normal_cut = []
                    result_s = []

                    i = 0  #Ponteiro para o ego
                    for file in os.listdir(
                            str(communities) + str(threshold) + "/"):
                        if os.path.isfile(
                                str(communities) + str(threshold) + "/" +
                                file):
                            ego_id = file.split(".txt")
                            ego_id = long(ego_id[0])
                            i += 1

                            if not os.path.isfile(
                                    str(graphs) + str(ego_id) + ".edge_list"):
                                print(
                                    "ERROR - EGO: " + str(i) +
                                    " - Arquivo com lista de arestas não encontrado:"
                                    + str(graphs) + str(ego_id) + ".edge_list")

                            else:
                                with open(
                                        str(communities) + str(threshold) +
                                        "/" + file, 'r') as community_file:
                                    if ud is False:
                                        G = snap.LoadEdgeList(
                                            snap.PNGraph,
                                            str(graphs) + str(ego_id) +
                                            ".edge_list", 0, 1
                                        )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
                                    else:
                                        G = snap.LoadEdgeList(
                                            snap.PUNGraph,
                                            str(graphs) + str(ego_id) +
                                            ".edge_list", 0, 1
                                        )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')

                                    print(
                                        str(g_type) + " - " + str(alg) +
                                        " - " + str(singletons) + " - Rede: " +
                                        str(net) + " - THRESHOLD: " +
                                        str(threshold) + " - ego(" + str(i) +
                                        "): " + str(file))

                                    communities_dict = prepare_communities(
                                        community_file
                                    )  #Função para devolver um dicionário com as comunidades

                                    avg_ad, avg_c, avg_cut_r, avg_d, avg_e, avg_normal_cut, avg_s = metrics.calc_metrics(
                                        communities_dict, G,
                                        ud)  # Calcular as métricas

                                    result_ad.append(
                                        avg_ad['media'])  # Salvar Métrica
                                    result_c.append(
                                        avg_c['media'])  # Salvar Métrica
                                    result_cut_r.append(
                                        avg_cut_r['media'])  # Salvar Métrica
                                    result_d.append(
                                        avg_d['media'])  # Salvar Métrica
                                    result_e.append(
                                        avg_e['media'])  # Salvar Métrica
                                    result_normal_cut.append(
                                        avg_normal_cut['media']
                                    )  # Salvar Métrica
                                    result_s.append(
                                        avg_s['media'])  # Salvar Métrica

                                    print("Average Degree: " +
                                          str(result_ad[i - 1]) +
                                          " - Conductance: " +
                                          str(result_c[i - 1]) +
                                          " - Cut Ratio: " +
                                          str(result_cut_r[i - 1]) +
                                          " - Density: " +
                                          str(result_d[i - 1]))
                                    print("Expansion: " +
                                          str(result_e[i - 1]) +
                                          " - Normalized Cut: " +
                                          str(result_normal_cut[i - 1]) +
                                          " - Separability: " +
                                          str(result_s[i - 1]))
                                    print
                    print(
                        "######################################################################"
                    )

                    with open(str(out_ad) + str(threshold) + ".json",
                              "w") as f:
                        f.write(
                            json.dumps(result_ad, separators=(',', ':')) +
                            "\n")

                    with open(str(out_c) + str(threshold) + ".json", "w") as f:
                        f.write(
                            json.dumps(result_c, separators=(',', ':')) + "\n")

                    with open(str(out_cut_r) + str(threshold) + ".json",
                              "w") as f:
                        f.write(
                            json.dumps(result_cut_r, separators=(',', ':')) +
                            "\n")

                    with open(str(out_d) + str(threshold) + ".json", "w") as f:
                        f.write(
                            json.dumps(result_d, separators=(',', ':')) + "\n")

                    with open(str(out_e) + str(threshold) + ".json", "w") as f:
                        f.write(
                            json.dumps(result_e, separators=(',', ':')) + "\n")

                    with open(
                            str(out_normal_cut) + str(threshold) + ".json",
                            "w") as f:
                        f.write(
                            json.dumps(result_normal_cut,
                                       separators=(',', ':')) + "\n")

                    with open(str(out_s) + str(threshold) + ".json", "w") as f:
                        f.write(
                            json.dumps(result_s, separators=(',', ':')) + "\n")

    print(
        "######################################################################"
    )
Пример #15
0
def main(*kargs, **kwargs):
    get_kwargs(kwargs)
    train_fname = kwargs['train']
    test_fname = kwargs['test']
    result_fname = kwargs['output']
    embeds_fname = kwargs['embeds']
    logger_fname = kwargs['logger']
    swear_words_fname = kwargs['swear_words']
    wrong_words_fname = kwargs['wrong_words']
    warm_start = kwargs['warm_start']
    format_embeds = kwargs['format_embeds']
    config = kwargs['config']
    train_clear = kwargs['train_clear']
    test_clear = kwargs['test_clear']
    output_dir = kwargs['output_dir']
    norm_prob = kwargs['norm_prob']
    norm_prob_koef = kwargs['norm_prob_koef']
    gpus = kwargs['gpus']

    model_file = {
        'dense': os.path.join(output_dir, 'dense.h5'),
        'cnn': os.path.join(output_dir, 'cnn.h5'),
        'lstm': os.path.join(output_dir, 'lstm.h5'),
        'concat': os.path.join(output_dir, 'concat.h5'),
        'lr': os.path.join(output_dir, '{}_logreg.bin'),
        'catboost': os.path.join(output_dir, '{}_catboost.bin')
    }

    # ====Create logger====
    logger = Logger(logging.getLogger(), logger_fname)

    # ====Detect GPUs====
    logger.debug(device_lib.list_local_devices())

    # ====Load data====
    logger.info('Loading data...')
    train_df = load_data(train_fname)
    test_df = load_data(test_fname)

    target_labels = [
        'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
    ]
    num_classes = len(target_labels)

    # ====Load additional data====
    logger.info('Loading additional data...')
    swear_words = load_data(swear_words_fname,
                            func=lambda x: set(x.T[0]),
                            header=None)
    wrong_words_dict = load_data(wrong_words_fname,
                                 func=lambda x: {val[0]: val[1]
                                                 for val in x})

    tokinizer = RegexpTokenizer(r'\w+')
    regexps = [
        re.compile("([a-zA-Z]+)([0-9]+)"),
        re.compile("([0-9]+)([a-zA-Z]+)")
    ]

    # ====Load word vectors====
    logger.info('Loading embeddings...')
    embed_dim = 300
    embeds = Embeds(embeds_fname, 'fasttext', format=format_embeds)

    # ====Clean texts====
    logger.info('Cleaning text...')
    if warm_start:
        logger.info('Use warm start...')
    else:
        train_df['comment_text_clear'] = clean_text(train_df['comment_text'],
                                                    tokinizer,
                                                    wrong_words_dict,
                                                    swear_words, regexps)
        test_df['comment_text_clear'] = clean_text(test_df['comment_text'],
                                                   tokinizer, wrong_words_dict,
                                                   swear_words, regexps)
        train_df.to_csv(train_clear, index=False)
        test_df.to_csv(test_clear, index=False)

    # ====Calculate maximum seq length====
    logger.info('Calc text length...')
    train_df.fillna('unknown', inplace=True)
    test_df.fillna('unknown', inplace=True)
    train_df['text_len'] = train_df['comment_text_clear'].apply(
        lambda words: len(words.split()))
    test_df['text_len'] = test_df['comment_text_clear'].apply(
        lambda words: len(words.split()))
    max_seq_len = np.round(train_df['text_len'].mean() +
                           3 * train_df['text_len'].std()).astype(int)
    logger.debug('Max seq length = {}'.format(max_seq_len))

    # ====Prepare data to NN====
    logger.info('Converting texts to sequences...')
    max_words = 100000

    train_df['comment_seq'], test_df[
        'comment_seq'], word_index = convert_text2seq(
            train_df['comment_text_clear'].tolist(),
            test_df['comment_text_clear'].tolist(),
            max_words,
            max_seq_len,
            lower=True,
            char_level=False,
            uniq=True)
    logger.debug('Dictionary size = {}'.format(len(word_index)))

    logger.info('Preparing embedding matrix...')
    embedding_matrix, words_not_found = get_embedding_matrix(
        embed_dim, embeds, max_words, word_index)
    logger.debug('Embedding matrix shape = {}'.format(
        np.shape(embedding_matrix)))
    logger.debug('Number of null word embeddings = {}'.format(
        np.sum(np.sum(embedding_matrix, axis=1) == 0)))

    logger.info('Deleting unknown words from seq...')
    train_df['comment_seq'] = clean_seq(train_df['comment_seq'],
                                        embedding_matrix, max_seq_len)
    test_df['comment_seq'] = clean_seq(test_df['comment_seq'],
                                       embedding_matrix, max_seq_len)

    # ====Train/test split data====
    x = np.array(train_df['comment_seq'].tolist())
    y = np.array(train_df[target_labels].values)
    x_train_nn, x_test_nn, y_train_nn, y_test_nn, train_idxs, test_idxs = split_data(
        x, y, test_size=0.2, shuffle=True, random_state=42)
    test_df_seq = np.array(test_df['comment_seq'].tolist())
    y_nn = []
    logger.debug('X shape = {}'.format(np.shape(x_train_nn)))

    # ====Train models====

    params = Params(config)

    cnn = get_cnn(embedding_matrix,
                  num_classes,
                  max_seq_len,
                  num_filters=params.get('cnn').get('num_filters'),
                  l2_weight_decay=params.get('cnn').get('l2_weight_decay'),
                  dropout_val=params.get('cnn').get('dropout_val'),
                  dense_dim=params.get('cnn').get('dense_dim'),
                  add_sigmoid=True,
                  train_embeds=params.get('cnn').get('train_embeds'),
                  gpus=gpus)
    lstm = get_lstm(embedding_matrix,
                    num_classes,
                    max_seq_len,
                    l2_weight_decay=params.get('lstm').get('l2_weight_decay'),
                    lstm_dim=params.get('lstm').get('lstm_dim'),
                    dropout_val=params.get('lstm').get('dropout_val'),
                    dense_dim=params.get('lstm').get('dense_dim'),
                    add_sigmoid=True,
                    train_embeds=params.get('lstm').get('train_embeds'),
                    gpus=gpus)
    concat = get_concat_model(
        embedding_matrix,
        num_classes,
        max_seq_len,
        n_layers=params.get('concat').get('n_layers'),
        concat=params.get('concat').get('concat'),
        pool=params.get('concat').get('pool'),
        num_filters=params.get('concat').get('num_filters'),
        l2_weight_decay=params.get('concat').get('l2_weight_decay'),
        lstm_dim=params.get('concat').get('lstm_dim'),
        dropout_val=params.get('concat').get('dropout_val'),
        dense_dim=params.get('concat').get('dense_dim'),
        add_sigmoid=True,
        train_embeds=params.get('concat').get('train_embeds'),
        gpus=gpus)

    models = []
    for model_label in params.get('models'):
        if model_label == 'cnn':
            models.append([model_label, cnn])
        elif model_label == 'dense':
            models.append([model_label, dense])
        elif model_label == 'lstm':
            models.append([model_label, lstm])
        elif model_label == 'concat':
            models.append([model_label, concat])
        else:
            raise ValueError(
                'Invalid model {}. Model hasn`t defined.'.format(model_label))

    for i in range(len(models)):
        model_label, model = models[i]
        logger.info("training {} ...".format(model_label))
        if params.get(model_label).get('warm_start') and os.path.exists(
                params.get(model_label).get('model_file')):
            logger.info('{} warm starting...'.format(model_label))
            model = load_model(params.get(model_label).get('model_file'))
            models[i][1] = model
        else:
            hist = train(
                x_train_nn,
                y_train_nn,
                model,
                batch_size=params.get(model_label).get('batch_size'),
                num_epochs=params.get(model_label).get('num_epochs'),
                learning_rate=params.get(model_label).get('learning_rate'),
                early_stopping_delta=params.get(model_label).get(
                    'early_stopping_delta'),
                early_stopping_epochs=params.get(model_label).get(
                    'early_stopping_epochs'),
                use_lr_strategy=params.get(model_label).get('use_lr_strategy'),
                lr_drop_koef=params.get(model_label).get('lr_drop_koef'),
                epochs_to_drop=params.get(model_label).get('epochs_to_drop'),
                logger=logger)
        y_nn.append(model.predict(x_test_nn))
        save_predictions(test_df, model.predict(test_df_seq), target_labels,
                         model_label)
        metrics = get_metrics(y_test_nn, y_nn[-1], target_labels)
        logger.debug('{} metrics:\n{}'.format(model_label,
                                              print_metrics(metrics)))
        logger.debug('Model path = {}'.format(model_file[model_label]))
        model.save(model_file[model_label])

    # TFIDF + LogReg
    logger.info('training LogReg over tfidf...')
    train_tfidf, val_tfidf, test_tfidf, word_tfidf, char_tfidf = get_tfidf(
        train_df['comment_text_clear'].values[train_idxs],
        train_df['comment_text_clear'].values[test_idxs],
        test_df['comment_text_clear'].values)

    models_lr = []
    metrics_lr = {}
    y_tfidf = []
    for i, label in enumerate(target_labels):
        model = LogisticRegression(C=4.0,
                                   solver='sag',
                                   max_iter=1000,
                                   n_jobs=16)
        model.fit(train_tfidf, y_train_nn[:, i])
        y_tfidf.append(model.predict_proba(val_tfidf)[:, 1])
        test_df['tfidf_{}'.format(label)] = model.predict_proba(test_tfidf)[:,
                                                                            1]
        metrics_lr[label] = calc_metrics(y_test_nn[:, i], y_tfidf[-1])
        models_lr.append(model)
        joblib.dump(model, model_file['lr'].format(label))
    metrics_lr['Avg'] = {
        'Logloss':
        np.mean([metric['Logloss'] for label, metric in metrics_lr.items()])
    }
    logger.debug('LogReg(TFIDF) metrics:\n{}'.format(
        print_metrics(metrics_lr)))

    # Bow for catboost
    if params.get('catboost').get('add_bow'):
        top_pos_words = []
        top_neg_words = []
        for i in range(num_classes):
            top_pos_words.append([])
            top_neg_words.append([])
            top_pos_words[-1], top_neg_words[
                -1] = get_most_informative_features(
                    [word_tfidf, char_tfidf],
                    models_lr[i],
                    n=params.get('catboost').get('bow_top'))

        top_pos_words = list(
            set(
                np.concatenate([[val for score, val in top]
                                for top in top_pos_words])))
        top_neg_words = list(
            set(
                np.concatenate([[val for score, val in top]
                                for top in top_neg_words])))
        top = list(set(np.concatenate([top_pos_words, top_neg_words])))
        train_bow = get_bow(train_df['comment_text_clear'].values[train_idxs],
                            top)
        val_bow = get_bow(train_df['comment_text_clear'].values[test_idxs],
                          top)
        test_bow = get_bow(test_df['comment_text_clear'].values, top)
        logger.debug('Count bow words = {}'.format(len(top)))

    # Meta catboost
    logger.info('training catboost as metamodel...')
    train_df['text_unique_len'] = train_df['comment_text_clear'].apply(
        calc_text_uniq_words)
    test_df['text_unique_len'] = test_df['comment_text_clear'].apply(
        calc_text_uniq_words)

    train_df['text_unique_koef'] = train_df['text_unique_len'] / train_df[
        'text_len']
    test_df[
        'text_unique_koef'] = test_df['text_unique_len'] / test_df['text_len']

    text_len_features = train_df[[
        'text_len', 'text_unique_len', 'text_unique_koef'
    ]].values[test_idxs]

    x_train_catboost = []
    y_train_catboost = y_test_nn
    features = y_nn
    features.extend([text_len_features, np.array(y_tfidf).T])
    if params.get('catboost').get('add_bow'):
        features.append(val_bow)
    for feature in zip(*features):
        x_train_catboost.append(np.concatenate(feature))

    models_cb = []
    metrics_cb = {}
    x_train_cb, x_val_cb, y_train_cb, y_val_cb = train_test_split(
        x_train_catboost, y_train_catboost, test_size=0.20, random_state=42)
    for i, label in enumerate(target_labels):
        model = CatBoostClassifier(
            loss_function='Logloss',
            iterations=params.get('catboost').get('iterations'),
            depth=params.get('catboost').get('depth'),
            rsm=params.get('catboost').get('rsm'),
            learning_rate=params.get('catboost').get('learning_rate'),
            device_config=params.get('catboost').get('device_config'))
        model.fit(x_train_cb,
                  y_train_cb[:, i],
                  eval_set=(x_val_cb, y_val_cb[:, i]),
                  use_best_model=True)
        y_hat_cb = model.predict_proba(x_val_cb)
        metrics_cb[label] = calc_metrics(y_val_cb[:, i], y_hat_cb[:, 1])
        models_cb.append(model)
        joblib.dump(model, model_file['catboost'].format(label))
    metrics_cb['Avg'] = {
        'Logloss':
        np.mean([metric['Logloss'] for label, metric in metrics_cb.items()])
    }
    logger.debug('CatBoost metrics:\n{}'.format(print_metrics(metrics_cb)))

    # ====Predict====
    logger.info('Applying models...')
    text_len_features = test_df[[
        'text_len', 'text_unique_len', 'text_unique_koef'
    ]].values
    y_tfidf_test = test_df[[
        'tfidf_{}'.format(label) for label in target_labels
    ]].values
    x_test_cb = []
    features = []
    for model_label, _ in models:
        features.append(test_df[[
            '{}_{}'.format(model_label, label) for label in target_labels
        ]].values)
    features.extend([text_len_features, y_tfidf_test])
    if params.get('catboost').get('add_bow'):
        features.append(test_bow)
    for feature in tqdm(zip(*features)):
        x_test_cb.append(np.concatenate(feature))

    for label, model in zip(target_labels, models_cb):
        pred = model.predict_proba(x_test_cb)
        test_df[label] = np.array(list(pred))[:, 1]

    # ====Normalize probabilities====
    if norm_prob:
        for label in target_labels:
            test_df[label] = norm_prob_koef * test_df[label]

    # ====Save results====
    logger.info('Saving results...')
    test_df[[
        'id', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult',
        'identity_hate'
    ]].to_csv(result_fname, index=False, header=True)
Пример #16
0
        " An aggression score larger than the threshold means that the user is aggressive. Eg. = 0.4"
    )
    metrics_parser.add_argument(
        '--metric_type',
        default='similarity',
        type=str,
        choices=['similarity', 'aggression'],
        help=
        "The type of metric to calculate, similarity metrics or aggression. "
        "'similarity' is suggested for aggression modeling experiment")
    metrics_parser.add_argument(
        "--seedsize",
        default=None,
        type=int,
        help=
        "Seed set size. It is the number of initial infected nodes. 5594 is the total number of aggressive users"
    )
    metrics_parser.add_argument(
        "--configuration",
        default=None,
        type=str,
        help="Specifies a single experiment to run metrics for")
    args = parser.parse_args()
    print()
    print(args)

    if args.mode == 'simulation':
        experiment(args, True)
    elif args.mode == 'metric':
        calc_metrics(args, 'modeling')
        type=int,
        help='How many times to repeat an experiment')

    metrics_parser = subparsers.add_parser(
        'metric',
        help=
        'Metric calculation based on the snapshots produced by a competitive aggression minimization simulation experiment'
    )
    metrics_parser.add_argument(
        'aggression_threshold',
        type=float,
        help=
        "The threshold in [0,1] that determines when a user becomes aggressive. An aggression score larger than the threshold means that the user is aggressive. Eg. = 0.4"
    )
    metrics_parser.add_argument(
        'metric_type',
        type=str,
        choices=['similarity', 'aggression'],
        help=
        "The type of metric to calculate, similarity metrics or aggression. 'aggression' is suggested for competitive aggression minimization experiment"
    )

    args = parser.parse_args()
    print()
    print(args)

    if args.mode == 'simulation':
        experiment(args, True)
    elif args.mode == 'metric':
        calc_metrics(args, 'minimization')
    print "\t", "Converting to numpy array..."

    X_all[interv] = np.array(X_all[interv])
    X_all_test[interv] = np.array(X_all_test[interv])
    y_all[interv] = np.array(y_all[interv])
    y_all_test[interv] = np.array(y_all_test[interv])
    size = len(y_all[interv])

    print "\t", "Model compute for size = " + str(size)
    print "\t", "computing model..."
    model.fit(X_all[interv], y_all[interv])

    print "\t", "predicting..."
    expected = y_all_test[interv]
    predicted = model.predict(X_all_test[interv])

    parameters = {
        "filename_model_results": "results/SVM" + "_" + interv + ".txt",
        "filename_confusion_matrix":
        "results/SVM" + "_" + interv + "_Confusion_Matrix.csv",
        "model_interval": [interval[interv][0], interval[interv][1]],
        "model_type": "SVM Model",
        "model": model,
        "expected": y_all_test[interv],
        "predicted": predicted,
        "target_names": target_names
    }

    metrics.calc_metrics(parameters)
Пример #19
0
def main():
    file_name = "./flood_graph/150_250/128/500/ji_sort/1_conf/sample-wised/default/{}/".format(
        args.b)
    start = time.time()
    # set GPU ID
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    cudnn.benchmark = True

    # check save path
    save_path = file_name
    # save_path = args.save_path
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # make dataloader
    if args.valid == True:
        train_loader, valid_loader, test_loader, test_onehot, test_label = dataset.get_valid_loader(
            args.data, args.data_path, args.batch_size)

    else:
        train_loader, train_onehot, train_label, test_loader, test_onehot, test_label = dataset.get_loader(
            args.data, args.data_path, args.batch_size)

    # set num_class
    if args.data == 'cifar100':
        num_class = 100
    else:
        num_class = 10

    # set num_classes
    model_dict = {
        "num_classes": num_class,
    }

    # set model
    if args.model == 'res':
        model = resnet.resnet110(**model_dict).cuda()
    elif args.model == 'dense':
        model = densenet_BC.DenseNet3(depth=100,
                                      num_classes=num_class,
                                      growth_rate=12,
                                      reduction=0.5,
                                      bottleneck=True,
                                      dropRate=0.0).cuda()
    elif args.model == 'vgg':
        model = vgg.vgg16(**model_dict).cuda()

    # set criterion
    if args.loss == 'MS':
        cls_criterion = losses.MultiSimilarityLoss().cuda()
    elif args.loss == 'Contrastive':
        cls_criterion = losses.ContrastiveLoss().cuda()
    elif args.loss == 'Triplet':
        cls_criterion = losses.TripletLoss().cuda()
    elif args.loss == 'NPair':
        cls_criterion = losses.NPairLoss().cuda()
    elif args.loss == 'Focal':
        cls_criterion = losses.FocalLoss(gamma=3.0).cuda()
    else:
        if args.mode == 0:
            cls_criterion = nn.CrossEntropyLoss().cuda()
        else:
            cls_criterion = nn.CrossEntropyLoss(reduction="none").cuda()

    ranking_criterion = nn.MarginRankingLoss(margin=0.0).cuda()

    # set optimizer (default:sgd)
    optimizer = optim.SGD(
        model.parameters(),
        lr=0.1,
        momentum=0.9,
        weight_decay=5e-4,
        # weight_decay=0.0001,
        nesterov=False)

    # optimizer = optim.SGD(model.parameters(),
    #                       lr=float(args.lr),
    #                       momentum=0.9,
    #                       weight_decay=args.weight_decay,
    #                       nesterov=False)

    # set scheduler
    # scheduler = MultiStepLR(optimizer,
    #                         milestones=[500, 750],
    #                         gamma=0.1)

    scheduler = MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1)

    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay_gamma)

    # make logger
    train_logger = utils.Logger(os.path.join(save_path, 'train.log'))
    result_logger = utils.Logger(os.path.join(save_path, 'result.log'))

    # make History Class
    correctness_history = crl_utils.History(len(train_loader.dataset))

    ## define matrix
    if args.data == 'cifar':
        matrix_idx_confidence = [[_] for _ in range(50000)]
        matrix_idx_iscorrect = [[_] for _ in range(50000)]
    else:
        matrix_idx_confidence = [[_] for _ in range(73257)]
        matrix_idx_iscorrect = [[_] for _ in range(73257)]

    # write csv
    #'''
    import csv
    f = open('{}/logs_{}_{}.txt'.format(file_name, args.b, args.epochs),
             'w',
             newline='')
    f.write("location = {}\n\n".format(file_name) + str(args))

    f0 = open('{}/Test_confidence_{}_{}.csv'.format(file_name, args.b,
                                                    args.epochs),
              'w',
              newline='')
    # f0 = open('./baseline_graph/150_250/128/500/Test_confidence_{}_{}.csv'.format(args.b, args.epochs), 'w', newline='')
    # f0 = open('./CRL_graph/150_250/Test_confidence_{}_{}.csv'.format(args.b, args.epochs), 'w', newline='')

    wr_conf_test = csv.writer(f0)
    header = [_ for _ in range(args.epochs + 1)]
    header[0] = 'Epoch'
    wr_conf_test.writerows([header])

    f1 = open('{}/Train_confidence_{}_{}.csv'.format(file_name, args.b,
                                                     args.epochs),
              'w',
              newline='')
    # f1 = open('./baseline_graph/150_250/128/500/Train_confidence_{}_{}.csv'.format(args.b, args.epochs), 'w', newline='')
    # f1 = open('./CRL_graph/150_250/Train_confidence_{}_{}.csv'.format(args.b, args.epochs), 'w', newline='')

    wr = csv.writer(f1)
    header = [_ for _ in range(args.epochs + 1)]
    header[0] = 'Epoch'
    wr.writerows([header])

    f2 = open('{}/Train_Flood_{}_{}_{}.csv'.format(file_name, args.data,
                                                   args.b, args.epochs),
              'w',
              newline='')
    # f2 = open('./baseline_graph/150_250/128/500/Train_Base_{}_{}_{}.csv'.format(args.data, args.b, args.epochs), 'w', newline='')
    # f2 = open('./CRL_graph/150_250/Train_Flood_{}_{}_{}.csv'.format(args.data, args.b, args.epochs), 'w', newline='')

    wr_train = csv.writer(f2)
    header = [_ for _ in range(args.epochs + 1)]
    header[0] = 'Epoch'
    wr_train.writerows([header])

    f3 = open('{}/Test_Flood_{}_{}_{}.csv'.format(file_name, args.data, args.b,
                                                  args.epochs),
              'w',
              newline='')
    # f3 = open('./baseline_graph/150_250/128/500/Test_Base_{}_{}_{}.csv'.format(args.data, args.b, args.epochs), 'w', newline='')
    # f3 = open('./CRL_graph/150_250/Test_Flood_{}_{}_{}.csv'.format(args.data, args.b, args.epochs), 'w', newline='')

    wr_test = csv.writer(f3)
    header = [_ for _ in range(args.epochs + 1)]
    header[0] = 'Epoch'
    wr_test.writerows([header])
    #'''

    # start Train
    best_valid_acc = 0
    test_ece_report = []
    test_acc_report = []
    test_nll_report = []
    test_over_con99_report = []
    test_e99_report = []
    test_cls_loss_report = []

    train_ece_report = []
    train_acc_report = []
    train_nll_report = []
    train_over_con99_report = []
    train_e99_report = []
    train_cls_loss_report = []
    train_rank_loss_report = []
    train_total_loss_report = []

    for epoch in range(1, args.epochs + 1):
        scheduler.step()

        matrix_idx_confidence, matrix_idx_iscorrect, idx, iscorrect, confidence, target, cls_loss_tr, rank_loss_tr, batch_correctness, total_confidence, total_correctness = \
            train.train(matrix_idx_confidence, matrix_idx_iscorrect, train_loader,
                    model,
                    wr,
                    cls_criterion,
                    ranking_criterion,
                    optimizer,
                    epoch,
                    correctness_history,
                    train_logger,
                    args)

        if args.rank_weight != 0.0:
            print("RANK ", rank_loss_tr)
            total_loss_tr = cls_loss_tr + rank_loss_tr

        if args.valid == True:
            idx, iscorrect, confidence, target, cls_loss_val, acc = train.valid(
                valid_loader, model, cls_criterion, ranking_criterion,
                optimizer, epoch, correctness_history, train_logger, args)
            if acc > best_valid_acc:
                best_valid_acc = acc
                print("*** Update Best Acc ***")

        # save model
        if epoch == args.epochs:
            torch.save(model.state_dict(),
                       os.path.join(save_path, 'model.pth'))

        print("########### Train ###########")
        acc_tr, aurc_tr, eaurc_tr, aupr_tr, fpr_tr, ece_tr, nll_tr, brier_tr, E99_tr, over_99_tr, cls_loss_tr = metrics.calc_metrics(
            train_loader, train_label, train_onehot, model, cls_criterion,
            args)

        if args.sort == True and epoch == 260:
            #if args.sort == True:
            train_loader = dataset.sort_get_loader(
                args.data, args.data_path, args.batch_size, idx,
                np.array(target), iscorrect,
                batch_correctness, total_confidence, total_correctness,
                np.array(confidence), epoch, args)

        train_acc_report.append(acc_tr)
        train_nll_report.append(nll_tr * 10)
        train_ece_report.append(ece_tr)
        train_over_con99_report.append(over_99_tr)
        train_e99_report.append(E99_tr)
        train_cls_loss_report.append(cls_loss_tr)

        if args.rank_weight != 0.0:
            train_total_loss_report.append(total_loss_tr)
            train_rank_loss_report.append(rank_loss_tr)
        print("CLS ", cls_loss_tr)

        # finish train
        print("########### Test ###########")
        # calc measure
        acc_te, aurc_te, eaurc_te, aupr_te, fpr_te, ece_te, nll_te, brier_te, E99_te, over_99_te, cls_loss_te = metrics.calc_metrics(
            test_loader, test_label, test_onehot, model, cls_criterion, args)
        test_ece_report.append(ece_te)
        test_acc_report.append(acc_te)
        test_nll_report.append(nll_te * 10)
        test_over_con99_report.append(over_99_te)
        test_e99_report.append(E99_te)
        test_cls_loss_report.append(cls_loss_te)

        print("CLS ", cls_loss_te)
        print("############################")

    # for idx in matrix_idx_confidence:
    #     wr.writerow(idx)

    #'''
    # draw graph
    df = pd.DataFrame()
    df['epoch'] = [i for i in range(1, args.epochs + 1)]
    df['test_ece'] = test_ece_report
    df['train_ece'] = train_ece_report
    fig_loss = plt.figure(figsize=(35, 35))
    fig_loss.set_facecolor('white')
    ax = fig_loss.add_subplot()

    ax.plot(df['epoch'],
            df['test_ece'],
            df['epoch'],
            df['train_ece'],
            linewidth=10)
    ax.legend(['Test', 'Train'], loc=2, prop={'size': 60})
    plt.title('[FL] ECE per epoch', fontsize=80)
    # plt.title('[BASE] ECE per epoch', fontsize=80)
    # plt.title('[CRL] ECE per epoch', fontsize=80)
    plt.xlabel('Epoch', fontsize=70)
    plt.ylabel('ECE', fontsize=70)
    plt.ylim([0, 1])
    plt.setp(ax.get_xticklabels(), fontsize=30)
    plt.setp(ax.get_yticklabels(), fontsize=30)
    plt.savefig('{}/{}_{}_ECE_lr_{}.png'.format(file_name, args.model, args.b,
                                                args.epochs))
    # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_ECE_lr_{}.png'.format(args.model, args.b, args.epochs))
    # plt.savefig('./CRL_graph/150_250/{}_{}_ECE_lr_{}.png'.format(args.model, args.b, args.epochs))

    df2 = pd.DataFrame()
    df2['epoch'] = [i for i in range(1, args.epochs + 1)]
    df2['test_acc'] = test_acc_report
    df2['train_acc'] = train_acc_report
    fig_acc = plt.figure(figsize=(35, 35))
    fig_acc.set_facecolor('white')
    ax = fig_acc.add_subplot()

    ax.plot(df2['epoch'],
            df2['test_acc'],
            df2['epoch'],
            df2['train_acc'],
            linewidth=10)
    ax.legend(['Test', 'Train'], loc=2, prop={'size': 60})
    plt.title('[FL] Accuracy per epoch', fontsize=80)
    # plt.title('[BASE] Accuracy per epoch', fontsize=80)
    # plt.title('[CRL] Accuracy per epoch', fontsize=80)
    plt.xlabel('Epoch', fontsize=70)
    plt.ylabel('Accuracy', fontsize=70)
    plt.ylim([0, 100])
    plt.setp(ax.get_xticklabels(), fontsize=30)
    plt.setp(ax.get_yticklabels(), fontsize=30)
    plt.savefig('{}/{}_{}_acc_lr_{}.png'.format(file_name, args.model, args.b,
                                                args.epochs))
    # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_acc_lr_{}.png'.format(args.model, args.b, args.epochs))
    # plt.savefig('./CRL_graph/150_250/{}_{}_acc_lr_{}.png'.format(args.model, args.b, args.epochs))

    df3 = pd.DataFrame()
    df3['epoch'] = [i for i in range(1, args.epochs + 1)]
    df3['test_nll'] = test_nll_report
    df3['train_nll'] = train_nll_report
    fig_acc = plt.figure(figsize=(35, 35))
    fig_acc.set_facecolor('white')
    ax = fig_acc.add_subplot()

    ax.plot(df3['epoch'],
            df3['test_nll'],
            df3['epoch'],
            df3['train_nll'],
            linewidth=10)
    ax.legend(['Test', 'Train'], loc=2, prop={'size': 60})
    plt.title('[FL] NLL per epoch', fontsize=80)
    # plt.title('[BASE] NLL per epoch', fontsize=80)
    # plt.title('[CRL] NLL per epoch', fontsize=80)
    plt.xlabel('Epoch', fontsize=70)
    plt.ylabel('NLL', fontsize=70)
    plt.ylim([0, 45])
    plt.setp(ax.get_xticklabels(), fontsize=30)
    plt.setp(ax.get_yticklabels(), fontsize=30)
    plt.savefig('{}/{}_{}_nll_lr_{}.png'.format(file_name, args.model, args.b,
                                                args.epochs))
    # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_nll_lr_{}.png'.format(args.model, args.b, args.epochs))
    # plt.savefig('./CRL_graph/150_250/{}_{}_nll_lr_{}.png'.format(args.model, args.b, args.epochs))

    df4 = pd.DataFrame()
    df4['epoch'] = [i for i in range(1, args.epochs + 1)]
    df4['test_over_con99'] = test_over_con99_report
    df4['train_over_con99'] = train_over_con99_report
    fig_acc = plt.figure(figsize=(35, 35))
    fig_acc.set_facecolor('white')
    ax = fig_acc.add_subplot()

    ax.plot(df4['epoch'],
            df4['test_over_con99'],
            df4['epoch'],
            df4['train_over_con99'],
            linewidth=10)
    ax.legend(['Test', 'Train'], loc=2, prop={'size': 60})
    plt.title('[FL] Over conf99 per epoch', fontsize=80)
    # plt.title('[BASE] Over conf99 per epoch', fontsize=80)
    # plt.title('[CRL] Over conf99 per epoch', fontsize=80)
    plt.xlabel('Epoch', fontsize=70)
    plt.ylabel('Over con99', fontsize=70)
    if args.data == 'cifar10' or args.data == 'cifar100':
        plt.ylim([0, 50000])
    else:
        plt.ylim([0, 73257])

    plt.setp(ax.get_xticklabels(), fontsize=30)
    plt.setp(ax.get_yticklabels(), fontsize=30)
    plt.savefig('{}/{}_{}_over_conf99_lr_{}.png'.format(
        file_name, args.model, args.b, args.epochs))
    # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_over_conf99_lr_{}.png'.format(args.model, args.b, args.epochs))
    # plt.savefig('./CRL_graph/150_250/{}_{}_over_conf99_lr_{}.png'.format(args.model, args.b, args.epochs))

    df5 = pd.DataFrame()
    df5['epoch'] = [i for i in range(1, args.epochs + 1)]
    df5['test_e99'] = test_e99_report
    df5['train_e99'] = train_e99_report
    fig_acc = plt.figure(figsize=(35, 35))
    fig_acc.set_facecolor('white')
    ax = fig_acc.add_subplot()

    ax.plot(df5['epoch'],
            df5['test_e99'],
            df5['epoch'],
            df5['train_e99'],
            linewidth=10)
    ax.legend(['Test', 'Train'], loc=2, prop={'size': 60})
    plt.title('[FL] E99 per epoch', fontsize=80)
    # plt.title('[BASE] E99 per epoch', fontsize=80)
    # plt.title('[CRL] E99 per epoch', fontsize=80)
    plt.xlabel('Epoch', fontsize=70)
    plt.ylabel('E99', fontsize=70)
    plt.ylim([0, 0.2])
    plt.setp(ax.get_xticklabels(), fontsize=30)
    plt.setp(ax.get_yticklabels(), fontsize=30)
    plt.savefig('{}/{}_{}_E99_flood_lr_{}.png'.format(file_name, args.model,
                                                      args.b, args.epochs))
    # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_E99_flood_lr_{}.png'.format(args.model, args.b, args.epochs))
    # plt.savefig('./CRL_graph/150_250/{}_{}_E99_flood_lr_{}.png'.format(args.model, args.b, args.epochs))

    df5 = pd.DataFrame()
    df5['epoch'] = [i for i in range(1, args.epochs + 1)]
    df5['test_cls_loss'] = test_cls_loss_report
    df5['train_cls_loss'] = train_cls_loss_report
    fig_acc = plt.figure(figsize=(35, 35))
    fig_acc.set_facecolor('white')
    ax = fig_acc.add_subplot()

    ax.plot(df5['epoch'],
            df5['test_cls_loss'],
            df5['epoch'],
            df5['train_cls_loss'],
            linewidth=10)
    ax.legend(['Test', 'Train'], loc=2, prop={'size': 60})
    plt.title('[FL] CLS_loss per epoch', fontsize=80)
    # plt.title('[BASE] CLS_loss per epoch', fontsize=80)
    # plt.title('[CRL] CLS_loss per epoch', fontsize=80)
    plt.xlabel('Epoch', fontsize=70)
    plt.ylabel('Loss', fontsize=70)
    plt.ylim([0, 5])
    plt.setp(ax.get_xticklabels(), fontsize=30)
    plt.setp(ax.get_yticklabels(), fontsize=30)
    plt.savefig('{}/{}_{}_cls_loss_flood_lr_{}.png'.format(
        file_name, args.model, args.b, args.epochs))
    # plt.savefig('./baseline_graph/150_250/128/500/{}_{}_cls_loss_flood_lr_{}.png'.format(args.model, args.b, args.epochs))
    # plt.savefig('./CRL_graph/150_250/{}_{}_cls_loss_flood_lr_{}.png'.format(args.model, args.b, args.epochs))

    if args.rank_weight != 0.0:
        df6 = pd.DataFrame()
        df6['epoch'] = [i for i in range(1, args.epochs + 1)]
        df6['train_cls_loss'] = train_cls_loss_report
        df6['train_rank_loss'] = train_rank_loss_report
        df6['train_total_loss'] = train_total_loss_report
        fig_acc = plt.figure(figsize=(35, 35))
        fig_acc.set_facecolor('white')
        ax = fig_acc.add_subplot()

        ax.plot(df6['epoch'],
                df6['train_cls_loss'],
                df6['epoch'],
                df6['train_rank_loss'],
                df6['epoch'],
                df6['train_total_loss'],
                linewidth=10)
        ax.legend(['CLS', 'Rank', 'Total'], loc=2, prop={'size': 60})
        plt.title('[FL] CLS_loss per epoch', fontsize=80)
        plt.xlabel('Epoch', fontsize=70)
        plt.ylabel('Loss', fontsize=70)
        # plt.ylim([0, 5])
        plt.setp(ax.get_xticklabels(), fontsize=30)
        plt.setp(ax.get_yticklabels(), fontsize=30)
        plt.savefig(
            './CRL_graph/150_250/{}_{}_cls_loss_flood_lr_{}.png'.format(
                args.model, args.b, args.epochs))

    test_acc_report.insert(0, 'ACC')
    test_ece_report.insert(0, 'ECE')
    test_nll_report.insert(0, 'NLL')
    test_over_con99_report.insert(0, 'Over_conf99')
    test_e99_report.insert(0, 'E99')
    test_cls_loss_report.insert(0, 'CLS')
    wr_test.writerow(test_acc_report)
    wr_test.writerow(test_ece_report)
    wr_test.writerow(test_nll_report)
    wr_test.writerow(test_over_con99_report)
    wr_test.writerow(test_e99_report)
    wr_test.writerow(test_cls_loss_report)

    train_acc_report.insert(0, 'ACC')
    train_ece_report.insert(0, 'ECE')
    train_nll_report.insert(0, 'NLL')
    train_over_con99_report.insert(0, 'Over_conf99')
    train_e99_report.insert(0, 'E99')
    train_cls_loss_report.insert(0, 'CLS')

    wr_train.writerow(train_acc_report)
    wr_train.writerow(train_ece_report)
    wr_train.writerow(train_nll_report)
    wr_train.writerow(train_over_con99_report)
    wr_train.writerow(train_e99_report)
    wr_train.writerow(train_cls_loss_report)

    if args.rank_weight != 0.0:
        train_rank_loss_report.insert(0, 'Rank')
        train_total_loss_report.insert(0, 'Total')
        wr_train.writerow(train_rank_loss_report)
        wr_train.writerow(train_total_loss_report)

    #'''

    # result write
    result_logger.write([
        acc_te, aurc_te * 1000, eaurc_te * 1000, aupr_te * 100, fpr_te * 100,
        ece_te * 100, nll_te * 10, brier_te * 100, E99_te * 100
    ])
    if args.valid == True:
        print("Best Valid Acc : {}".format(acc))
    print("Flood Level: {}".format(args.b))
    print("Sort : {}".format(args.sort))
    print("Sort Mode : {}".format(args.sort_mode))
    print("TIME : ", time.time() - start)