예제 #1
0
    def run(self, data, gt):
        labels = obtain_hard_clusters(data, self.thresholds)

        # compute scores, save labels and scores for each threshold
        results_list = []
        for i, threshold in enumerate(self.thresholds):
            np.save(
                os.path.join(
                    self.target_dir,
                    "single_linkage_labels_threshold_{}.npy".format(
                        threshold)), labels[:, i, ...])

            # compute metrics
            results = {
                "parameters": {
                    "threshold": threshold
                },
                "scores": compute_metrics(labels[:, i, ...], gt.copy())
            }
            # save results
            np.save(
                os.path.join(
                    self.target_dir,
                    "single_linkage_scores_threshold_{}".format(threshold)),
                np.array([
                    results["scores"]["CREMI_score"],
                    results["scores"]["arand"], results["scores"]["voi"][0],
                    results["scores"]["voi"][1]
                ]))
            results_list.append(results)

        return sorted(results_list,
                      key=lambda x: x["scores"]["CREMI_score"])[0]
예제 #2
0
    def run(self, data, gt):
        results_list = []
        for min_samples_name in tqdm(self.min_samples_list,
                                     desc="Processing min_samples",
                                     leave=False):
            for min_cluster_size in tqdm(self.min_clusters_size_list,
                                         desc="Processing min_cluster_size",
                                         leave=False):
                if min_samples_name == "same":
                    min_samples = min_cluster_size
                else:
                    min_samples = min_samples_name

                clusterer = hdbscan.HDBSCAN(min_samples=min_samples,
                                            min_cluster_size=min_cluster_size,
                                            approx_min_span_tree=False)
                labels_list = []
                for batch_id in tqdm(range(data.shape[0]),
                                     desc="Processing batch",
                                     leave=False):
                    clusterer.fit(data[batch_id].reshape(-1, data.shape[-1]))
                    labels_list.append(
                        self.assign_noise(
                            data[batch_id].reshape(-1, data.shape[-1]),
                            clusterer.labels_).reshape(data.shape[1:-1]))
                    tqdm.write(
                        "Done with clustering batch {}".format(batch_id))

                labels = np.array(labels_list)

                self.save_labels(labels, min_cluster_size, min_samples)

                # compute metrics
                results = {
                    "parameters": {
                        "min_samples": min_samples,
                        "min_cluster_size": min_cluster_size
                    },
                    "scores": compute_metrics(labels, gt.copy())
                }
                # save results
                np.save(
                    os.path.join(
                        self.target_dir,
                        "hdbscan_scores_min_samples_{}_min_cluster_size_{}".
                        format(min_cluster_size, min_samples)),
                    np.array([
                        results["scores"]["CREMI_score"],
                        results["scores"]["arand"],
                        results["scores"]["voi"][0],
                        results["scores"]["voi"][1]
                    ]))
                results_list.append(results)
                tqdm.write(
                    "Done with min_samples {} and min_cluster_size {}".format(
                        min_samples, min_cluster_size))

        return sorted(results_list,
                      key=lambda x: x["scores"]["CREMI_score"])[0]
예제 #3
0
    def run(self, data, gt):
        results_list = []
        # for each batch compute complete linkage tree once, cut it at various thresholds
        labels_list = []
        for batch_id in tqdm(range(data.shape[0]),
                             desc="Processing batch",
                             leave=False):
            labels_batch = []
            complete_linkage = complete(data[batch_id].reshape(
                -1, data.shape[-1]))
            for threshold in tqdm(self.thresholds,
                                  desc="Cutting at thresholds",
                                  leave=False):
                labels_threshold_slice = fcluster(complete_linkage,
                                                  t=threshold,
                                                  criterion="distance")
                labels_threshold_slice = labels_threshold_slice.reshape(
                    *data.shape[1:-1])
                labels_batch.append(labels_threshold_slice)
            labels_list.append(labels_batch)
            tqdm.write("Done with clustering batch {}".format(batch_id))

        labels = np.array(labels_list)

        # compute scores, save labels and scores for each threshold
        for i, threshold in enumerate(self.thresholds):
            np.save(
                os.path.join(
                    self.target_dir,
                    "complete_linkage_labels_threshold_{}.npy".format(
                        threshold)), labels[:, i, ...])

            # compute metrics
            results = {
                "parameters": {
                    "threshold": threshold
                },
                "scores": compute_metrics(labels[:, i, ...], gt.copy())
            }
            # save results
            np.save(
                os.path.join(
                    self.target_dir,
                    "complete_linkage_scores_threshold_{}".format(threshold)),
                np.array([
                    results["scores"]["CREMI_score"],
                    results["scores"]["arand"], results["scores"]["voi"][0],
                    results["scores"]["voi"][1]
                ]))
            results_list.append(results)

        return sorted(results_list,
                      key=lambda x: x["scores"]["CREMI_score"])[0]
예제 #4
0
def main(args):
    labels: pd.DataFrame = pd.read_pickle(args.labels)
    if "narration_id" in labels.columns:
        labels.set_index("narration_id", inplace=True)
    labels = add_action_class_column(labels)
    unseen_participants: np.ndarray = pd.read_csv(
        args.unseen_participant_ids_csv, index_col="participant_id"
    ).index.values
    tail_verb_classes: np.ndarray = pd.read_csv(
        args.tail_verb_classes_csv, index_col="verb"
    ).index.values
    tail_noun_classes: np.ndarray = pd.read_csv(
        args.tail_noun_classes_csv, index_col="noun"
    ).index.values

    results = load_results(args.results)
    verb_output = results["verb_output"]
    noun_output = results["noun_output"]
    narration_ids = results["narration_id"]
    scores = {
        "verb": verb_output,
        "noun": noun_output,
    }
    (verbs, nouns), _scores = compute_action_scores(
        scores["verb"], scores["noun"], top_n=100
    )
    scores["action"] = [
        {
            action_id_from_verb_noun(verb, noun): score
            for verb, noun, score in zip(segment_verbs, segment_nouns, segment_score)
        }
        for segment_verbs, segment_nouns, segment_score in zip(verbs, nouns, _scores)
    ]
    accuracies = compute_metrics(
        labels.loc[narration_ids],
        scores,
        tail_verb_classes,
        tail_noun_classes,
        unseen_participants,
    )

    display_metrics = dict()
    for split in accuracies.keys():
        for task in ["verb", "noun", "action"]:
            task_accuracies = accuracies[split][task]
            for k, task_accuracy in zip((1, 5), task_accuracies):
                display_metrics[f"{split}_{task}_accuracy_at_{k}"] = task_accuracy
    display_metrics = {
        metric: float(value * 100) for metric, value in display_metrics.items()
    }

    print(yaml.dump(display_metrics))
def computeAverageMetrics(imfeats, recipefeats, k, t, forceorder=False):
    """Computes retrieval metrics for two sets of features

    Parameters
    ----------
    imfeats : np.ndarray [n x d]
        The image features..
    recipefeats : np.ndarray [n x d]
        The recipe features.
    k : int
        Ranking size.
    t : int
        Number of evaluations to run (function returns the average).
    forceorder : bool
        Whether to force a particular order instead of picking random samples

    Returns
    -------
    dict
        Dictionary with metric values for all t runs.

    """

    glob_metrics = {}
    i = 0
    for _ in range(t):

        if forceorder:
            # pick the same samples in the same order for evaluation
            # forceorder is only True when the function is used during training
            sub_ids = np.array(range(i, i + k))
            i += k
        else:
            sub_ids = random.sample(range(0, len(imfeats)), k)
        imfeats_sub = imfeats[sub_ids, :]
        recipefeats_sub = recipefeats[sub_ids, :]

        metrics = compute_metrics(imfeats_sub,
                                  recipefeats_sub,
                                  recall_klist=(1, 5, 10))

        for metric_name, metric_value in metrics.items():
            if metric_name not in glob_metrics:
                glob_metrics[metric_name] = []
            glob_metrics[metric_name].append(metric_value)
    return glob_metrics
예제 #6
0
    def test(self, which_dataset, condition_target):
        """Test segmentation."""

        if which_dataset == 'source':
            loader = self.source_loader
        else:
            loader = self.target_loader

        # Load the trained generator.
        self.restore_model(self.G, 'G', self.log_dir)
        self.restore_model(self.S, 'S', self.log_dir)

        # Load the trained generator.
        self.G.eval()
        self.S.eval()

        # Evaluate segmentation
        metrics = {'loss_segm': 0, 'iou': 0, 'accuracy': 0}
        cm = torch.zeros(2, 2).float().cuda()

        with torch.no_grad():
            for i, (x, gt) in enumerate(loader):
                # Prepare input images and target masks.
                x = x.to(self.device)
                gt = gt.to(self.device)

                # Segment images
                condition = 1. if condition_target == 'source' else 0.
                _, h = self.G(
                    x,
                    condition * torch.ones(x.size(0), 1).to(self.device))

                s = self.S(h)
                metrics['loss_segm'] += self.segm_criterion(s, gt).item()

                # Update metrics
                cm = update_cm(cm, s, gt)

        metrics['loss_segm'] /= len(loader)

        # Compute metrics
        metrics = compute_metrics(cm, metrics)

        print_metrics('TEST ' + which_dataset + ': ', metrics)
예제 #7
0
    def val(self, epoch, model_path, val_loader, val_log, cfg):
        detecter = Detector(model_path, cfg)
        mean_precision = 0
        mean_recall = 0
        sample_count = val_loader.num_samples
        for i in range(sample_count):
            image_path, gt_bboxes = val_loader.getitem(i)
            results = detecter.run(image_path)
            pre_bboxes = results[1]
            if len(gt_bboxes) > 0:
                precision, recall = compute_metrics(pre_bboxes, gt_bboxes)
                mean_precision += precision
                mean_recall += recall

        log_str = "{},{:.6f},{:.6f}\n".format(epoch,
                                              mean_precision / sample_count,
                                              mean_recall / sample_count)
        val_log.write(log_str)
        val_log.flush()
예제 #8
0
    def validation(self, epoch):
        """Translate images using StarGAN trained on a single dataset."""
        # Load the trained generator.
        self.G.eval()
        self.D.eval()
        if self.Df is not None:
            self.Df.eval()
        self.S.eval()

        mix_iter = iter(self.mix_loader_val)
        source_iter = iter(self.source_loader_val)
        target_iter = iter(self.target_loader_val)

        # Evaluate segmentation
        metrics = {
            'S/loss_segm': 0,
            'iou': [],
            'accuracy': [],
            'G/loss': 0,
            'G/loss_fake': 0,
            'G/loss_cycle': 0,
            'G/loss_cls': 0,
            'G/loss_id': 0,
            'Ge/loss_fdom': 0,
            'Ge/loss_frf': 0,
            'Ge/loss_ffeat': 0
        }
        cm = torch.zeros(2, 2).float().cuda()
        i = 0
        with torch.no_grad():
            while True:

                # =================================================================================== #
                #                                 1. Preprocessing                                    #
                # =================================================================================== #
                # Fetch real images and labels.
                try:
                    x_real, c_org, gt_real = next(mix_iter)
                except:
                    print(
                        "mix_iter shouldn't have raised this exception in validation"
                    )

                # Fetch source images and masks
                try:
                    x, gt = next(source_iter)
                    if x.size(0) < self.batch_size:
                        raise Exception
                    x_source, gt_source = x, gt
                except:
                    break

                # Fetch target images and masks
                try:
                    x, gt = next(target_iter)
                    if x.size(0) < self.batch_size:
                        raise Exception
                    x_target, gt_target = x, gt
                except:
                    break

                x_real = x_real.to(self.device)  # Input images.
                x_source = x_source.to(self.device)
                x_target = x_target.to(self.device)
                gt_source = gt_source.to(self.device)
                gt_target = gt_target.to(self.device)
                gt_real = gt_real.to(self.device)
                c_org = c_org.to(self.device)  # Original domain labels.

                # =================================================================================== #
                #                               4. Generator                                #
                # =================================================================================== #
                _, s_source, loss_log = self.G_losses(x_real, c_org, gt_real,
                                                      x_source, gt_source,
                                                      x_target, gt_target)

                cm = update_cm(cm, s_source, gt_source)

                # =================================================================================== #
                #                                 5. Miscellaneous                                    #
                # =================================================================================== #
                for k in loss_log:
                    metrics[k] += loss_log[k]
                i += 1
        metrics = compute_metrics(cm, metrics)
        metrics['G/loss_es'] = metrics['G/loss'] - self.lambda_g * metrics[
            'G/loss_fake'] - self.lambda_fdom * metrics[
                'Ge/loss_fdom'] - self.lambda_frf * metrics['Ge/loss_frf']
        pattern = re.compile("(?!iou|accuracy).*")
        metrics.update(
            {k: v / i
             for k, v in metrics.items() if pattern.match(k)})

        # Log metrics
        self.logger.scalar_summary(mode='val', epoch=epoch, **metrics)

        # Log visualization
        x_target = x_target.to(self.device)
        self.tb_images(x_target,
                       torch.zeros(x_target.size(0), 1).to(self.device), epoch,
                       'val')

        return metrics['G/loss_es']
예제 #9
0
def main(args):

    # Create model directory & other aux folders for logging
    where_to_save = os.path.join(args.save_dir, args.project_name,
                                 args.model_name)
    checkpoints_dir = os.path.join(where_to_save, 'checkpoints')
    logs_dir = os.path.join(where_to_save, 'logs')
    tb_logs = os.path.join(args.save_dir, args.project_name, 'tb_logs',
                           args.model_name)
    make_dir(where_to_save)
    make_dir(logs_dir)
    make_dir(checkpoints_dir)
    make_dir(tb_logs)
    if args.tensorboard:
        logger = Visualizer(tb_logs, name='visual_results')

    # check if we want to resume from last checkpoint of current model
    if args.resume:
        args = pickle.load(
            open(os.path.join(checkpoints_dir, 'args.pkl'), 'rb'))
        args.resume = True

    # logs to disk
    if not args.log_term:
        print("Training logs will be saved to:",
              os.path.join(logs_dir, 'train.log'))
        sys.stdout = open(os.path.join(logs_dir, 'train.log'), 'w')
        sys.stderr = open(os.path.join(logs_dir, 'train.err'), 'w')

    print(args)
    pickle.dump(args, open(os.path.join(checkpoints_dir, 'args.pkl'), 'wb'))

    # patience init
    curr_pat = 0

    # Build data loader
    data_loaders = {}
    datasets = {}

    data_dir = args.recipe1m_dir
    for split in ['train', 'val']:

        transforms_list = [transforms.Resize((args.image_size))]

        if split == 'train':
            # Image preprocessing, normalization for the pretrained resnet
            transforms_list.append(transforms.RandomHorizontalFlip())
            transforms_list.append(
                transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)))
            transforms_list.append(transforms.RandomCrop(args.crop_size))

        else:
            transforms_list.append(transforms.CenterCrop(args.crop_size))
        transforms_list.append(transforms.ToTensor())
        transforms_list.append(
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))

        transform = transforms.Compose(transforms_list)
        max_num_samples = max(args.max_eval,
                              args.batch_size) if split == 'val' else -1
        data_loaders[split], datasets[split] = get_loader(
            data_dir,
            args.aux_data_dir,
            split,
            args.maxseqlen,
            args.maxnuminstrs,
            args.maxnumlabels,
            args.maxnumims,
            transform,
            args.batch_size,
            shuffle=split == 'train',
            num_workers=args.num_workers,
            drop_last=True,
            max_num_samples=max_num_samples,
            use_lmdb=args.use_lmdb,
            suff=args.suff)

    ingr_vocab_size = datasets[split].get_ingrs_vocab_size()
    instrs_vocab_size = datasets[split].get_instrs_vocab_size()

    # Build the model
    model = get_model(args, ingr_vocab_size, instrs_vocab_size)
    keep_cnn_gradients = False

    decay_factor = 1.0

    # add model parameters
    if args.ingrs_only:
        params = list(model.ingredient_decoder.parameters()) + list(
            model.ingredient_encoder.parameters())
    elif args.recipe_only:
        params = list(model.recipe_decoder.parameters()) + list(
            model.ingredient_encoder.parameters())
    else:
        params = list(model.recipe_decoder.parameters()) + list(model.ingredient_decoder.parameters()) \
                 + list(model.ingredient_encoder.parameters())

    # only train the linear layer in the encoder if we are not transfering from another model
    if args.transfer_from == '':
        params += list(model.image_encoder.linear.parameters())
    params_cnn = list(model.image_encoder.resnet.parameters())

    print("CNN params:", sum(p.numel() for p in params_cnn if p.requires_grad))
    print("decoder params:", sum(p.numel() for p in params if p.requires_grad))
    # start optimizing cnn from the beginning
    if params_cnn is not None and args.finetune_after == 0:
        optimizer = torch.optim.Adam(
            [{
                'params': params
            }, {
                'params': params_cnn,
                'lr': args.learning_rate * args.scale_learning_rate_cnn
            }],
            lr=args.learning_rate,
            weight_decay=args.weight_decay)
        keep_cnn_gradients = True
        print("Fine tuning resnet")
    else:
        optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    if args.resume:
        model_path = os.path.join(args.save_dir, args.project_name,
                                  args.model_name, 'checkpoints', 'model.ckpt')
        optim_path = os.path.join(args.save_dir, args.project_name,
                                  args.model_name, 'checkpoints', 'optim.ckpt')
        optimizer.load_state_dict(torch.load(optim_path, map_location=map_loc))
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(device)
        model.load_state_dict(torch.load(model_path, map_location=map_loc))

    if args.transfer_from != '':
        # loads CNN encoder from transfer_from model
        model_path = os.path.join(args.save_dir, args.project_name,
                                  args.transfer_from, 'checkpoints',
                                  'modelbest.ckpt')
        pretrained_dict = torch.load(model_path, map_location=map_loc)
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if 'encoder' in k
        }
        model.load_state_dict(pretrained_dict, strict=False)
        args, model = merge_models(args, model, ingr_vocab_size,
                                   instrs_vocab_size)

    if device != 'cpu' and torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    model = model.to(device)
    cudnn.benchmark = True

    if not hasattr(args, 'current_epoch'):
        args.current_epoch = 0

    es_best = 10000 if args.es_metric == 'loss' else 0
    # Train the model
    start = args.current_epoch
    for epoch in range(start, args.num_epochs):

        # save current epoch for resuming
        if args.tensorboard:
            logger.reset()

        args.current_epoch = epoch
        # increase / decrase values for moving params
        if args.decay_lr:
            frac = epoch // args.lr_decay_every
            decay_factor = args.lr_decay_rate**frac
            new_lr = args.learning_rate * decay_factor
            print('Epoch %d. lr: %.5f' % (epoch, new_lr))
            set_lr(optimizer, decay_factor)

        if args.finetune_after != -1 and args.finetune_after < epoch \
                and not keep_cnn_gradients and params_cnn is not None:

            print("Starting to fine tune CNN")
            # start with learning rates as they were (if decayed during training)
            optimizer = torch.optim.Adam([{
                'params': params
            }, {
                'params':
                params_cnn,
                'lr':
                decay_factor * args.learning_rate *
                args.scale_learning_rate_cnn
            }],
                                         lr=decay_factor * args.learning_rate)
            keep_cnn_gradients = True

        for split in ['train', 'val']:

            if split == 'train':
                model.train()
            else:
                model.eval()
            total_step = len(data_loaders[split])
            loader = iter(data_loaders[split])

            total_loss_dict = {
                'recipe_loss': [],
                'ingr_loss': [],
                'eos_loss': [],
                'loss': [],
                'iou': [],
                'perplexity': [],
                'iou_sample': [],
                'f1': [],
                'card_penalty': []
            }

            error_types = {
                'tp_i': 0,
                'fp_i': 0,
                'fn_i': 0,
                'tn_i': 0,
                'tp_all': 0,
                'fp_all': 0,
                'fn_all': 0
            }

            torch.cuda.synchronize()
            start = time.time()

            for i in range(total_step):

                img_inputs, captions, ingr_gt, img_ids, paths = loader.next()

                ingr_gt = ingr_gt.to(device)
                img_inputs = img_inputs.to(device)
                captions = captions.to(device)
                true_caps_batch = captions.clone()[:, 1:].contiguous()
                loss_dict = {}

                if split == 'val':
                    with torch.no_grad():
                        losses = model(img_inputs, captions, ingr_gt)

                        if not args.recipe_only:
                            outputs = model(img_inputs,
                                            captions,
                                            ingr_gt,
                                            sample=True)

                            ingr_ids_greedy = outputs['ingr_ids']

                            mask = mask_from_eos(ingr_ids_greedy,
                                                 eos_value=0,
                                                 mult_before=False)
                            ingr_ids_greedy[mask == 0] = ingr_vocab_size - 1
                            pred_one_hot = label2onehot(
                                ingr_ids_greedy, ingr_vocab_size - 1)
                            target_one_hot = label2onehot(
                                ingr_gt, ingr_vocab_size - 1)
                            iou_sample = softIoU(pred_one_hot, target_one_hot)
                            iou_sample = iou_sample.sum() / (
                                torch.nonzero(iou_sample.data).size(0) + 1e-6)
                            loss_dict['iou_sample'] = iou_sample.item()

                            update_error_types(error_types, pred_one_hot,
                                               target_one_hot)

                            del outputs, pred_one_hot, target_one_hot, iou_sample

                else:
                    losses = model(img_inputs,
                                   captions,
                                   ingr_gt,
                                   keep_cnn_gradients=keep_cnn_gradients)

                if not args.ingrs_only:
                    recipe_loss = losses['recipe_loss']

                    recipe_loss = recipe_loss.view(true_caps_batch.size())
                    non_pad_mask = true_caps_batch.ne(instrs_vocab_size -
                                                      1).float()

                    recipe_loss = torch.sum(recipe_loss * non_pad_mask,
                                            dim=-1) / torch.sum(non_pad_mask,
                                                                dim=-1)
                    perplexity = torch.exp(recipe_loss)

                    recipe_loss = recipe_loss.mean()
                    perplexity = perplexity.mean()

                    loss_dict['recipe_loss'] = recipe_loss.item()
                    loss_dict['perplexity'] = perplexity.item()
                else:
                    recipe_loss = 0

                if not args.recipe_only:

                    ingr_loss = losses['ingr_loss']
                    ingr_loss = ingr_loss.mean()
                    loss_dict['ingr_loss'] = ingr_loss.item()

                    eos_loss = losses['eos_loss']
                    eos_loss = eos_loss.mean()
                    loss_dict['eos_loss'] = eos_loss.item()

                    iou_seq = losses['iou']
                    iou_seq = iou_seq.mean()
                    loss_dict['iou'] = iou_seq.item()

                    card_penalty = losses['card_penalty'].mean()
                    loss_dict['card_penalty'] = card_penalty.item()
                else:
                    ingr_loss, eos_loss, card_penalty = 0, 0, 0

                loss = args.loss_weight[0] * recipe_loss + args.loss_weight[1] * ingr_loss \
                       + args.loss_weight[2]*eos_loss + args.loss_weight[3]*card_penalty

                loss_dict['loss'] = loss.item()

                for key in loss_dict.keys():
                    total_loss_dict[key].append(loss_dict[key])

                if split == 'train':
                    model.zero_grad()
                    loss.backward()
                    optimizer.step()

                # Print log info
                if args.log_step != -1 and i % args.log_step == 0:
                    elapsed_time = time.time() - start
                    lossesstr = ""
                    for k in total_loss_dict.keys():
                        if len(total_loss_dict[k]) == 0:
                            continue
                        this_one = "%s: %.4f" % (
                            k, np.mean(total_loss_dict[k][-args.log_step:]))
                        lossesstr += this_one + ', '
                    # this only displays nll loss on captions, the rest of losses will be in tensorboard logs
                    strtoprint = 'Split: %s, Epoch [%d/%d], Step [%d/%d], Losses: %sTime: %.4f' % (
                        split, epoch, args.num_epochs, i, total_step,
                        lossesstr, elapsed_time)
                    print(strtoprint)

                    if args.tensorboard:
                        # logger.histo_summary(model=model, step=total_step * epoch + i)
                        logger.scalar_summary(
                            mode=split + '_iter',
                            epoch=total_step * epoch + i,
                            **{
                                k: np.mean(v[-args.log_step:])
                                for k, v in total_loss_dict.items() if v
                            })

                    torch.cuda.synchronize()
                    start = time.time()
                del loss, losses, captions, img_inputs

            if split == 'val' and not args.recipe_only:
                ret_metrics = {
                    'accuracy': [],
                    'f1': [],
                    'jaccard': [],
                    'f1_ingredients': [],
                    'dice': []
                }
                compute_metrics(
                    ret_metrics,
                    error_types,
                    ['accuracy', 'f1', 'jaccard', 'f1_ingredients', 'dice'],
                    eps=1e-10,
                    weights=None)

                total_loss_dict['f1'] = ret_metrics['f1']
            if args.tensorboard:
                # 1. Log scalar values (scalar summary)
                logger.scalar_summary(
                    mode=split,
                    epoch=epoch,
                    **{k: np.mean(v)
                       for k, v in total_loss_dict.items() if v})

        # Save the model's best checkpoint if performance was improved
        es_value = np.mean(total_loss_dict[args.es_metric])

        # save current model as well
        save_model(model, optimizer, checkpoints_dir, suff='')
        if (args.es_metric == 'loss'
                and es_value < es_best) or (args.es_metric == 'iou_sample'
                                            and es_value > es_best):
            es_best = es_value
            save_model(model, optimizer, checkpoints_dir, suff='best')
            pickle.dump(args,
                        open(os.path.join(checkpoints_dir, 'args.pkl'), 'wb'))
            curr_pat = 0
            print('Saved checkpoint.')
        else:
            curr_pat += 1

        if curr_pat > args.patience:
            break

    if args.tensorboard:
        logger.close()
def train(args):
    # Create directories if not exist.
    model_dir = os.path.join(args.log_dir, args.exp_name)
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # logs to disk
    if not args.log_term:
        print("Training logs will be saved to:", os.path.join(model_dir, 'train.log'))
        sys.stdout = open(os.path.join(model_dir, 'train.log'), 'w')
        sys.stderr = open(os.path.join(model_dir, 'train.err'), 'w')
    # save args
    pickle.dump(args, open(os.path.join(model_dir, 'args.pkl'), 'wb'))

    curr_pat = 0

    # Data loader
    source_train = get_loader(args.batch_size, args.source, 'train', args.num_workers,
                              mnistpath=args.mnist_dir, mnistmpath=args.mnist_m_dir,
                              mnistthinpath=args.mnist_thin_dir, source=args.source, colors=False)
    source_val = get_loader(args.batch_size, args.source, 'val', args.num_workers,
                            mnistpath=args.mnist_dir, mnistmpath=args.mnist_m_dir,
                            mnistthinpath=args.mnist_thin_dir, source=args.source, colors=False)
    source_test = get_loader(args.batch_size, args.source, 'test', args.num_workers,
                             mnistpath=args.mnist_dir, mnistmpath=args.mnist_m_dir,
                             mnistthinpath=args.mnist_thin_dir, source=args.source, colors=False)
    target_test = get_loader(args.batch_size, args.target, 'test', args.num_workers,
                             mnistpath=args.mnist_dir, mnistmpath=args.mnist_m_dir,
                             mnistthinpath=args.mnist_thin_dir, source=args.target, colors=False)

    # Training criterion
    if args.criterion == 'softiou':
        criterion = softIoULoss()
    elif args.criterion == 'crossentropy':
        criterion = nn.CrossEntropyLoss()
    else:
        raise ValueError('Unknown loss')

    # Build model
    model = Segmenter(conv_dim=args.conv_dim, repeat_num=args.repeat_num, num_down=args.num_down, bias=True,
                      n_classes=2, drop=args.drop)

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # gpus
    model = model.cuda()
    cudnn.benchmark = True

    # Visualizer
    if args.use_tensorboard:
        visualizer = Visualizer(model_dir, name='visual_results')

    # Train the model
    for epoch in range(0, args.num_epochs):
        # reset visualizer
        visualizer.reset()

        # increase / decrase values for moving params
        set_lr(optimizer, args.lr_decay)

        # split loop
        for split in ['train', 'val']:

            if split == 'train':
                loader = source_train
                model.train()
            else:
                loader = source_val
                model.eval()

            metrics = {'loss': 0, 'iou': [], 'accuracy': []}
            cm = torch.from_numpy(np.zeros((2, 2))).float().cuda()

            total_step = len(loader)
            torch.cuda.synchronize()
            start = time.time()

            # minibatch loop
            for i, (images, gts) in enumerate(loader):
                global_iter = total_step * epoch + i

                # send to cuda
                images = images.cuda()
                gts = gts.cuda()

                loss_dict = {}

                if split == 'val':
                    with torch.no_grad():
                        outputs = model(images)
                else:
                    outputs = model(images)

                # loss computation
                loss = criterion(outputs, gts)

                # update confusion matrix
                cm = update_cm(cm, outputs, gts)

                # update dicts
                loss_dict['loss'] = loss.data
                metrics['loss'] += loss_dict['loss']

                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    visualizer.scalar_summary(mode=split, epoch=global_iter, **loss_dict)

            # end of epoch
            metrics['loss'] /= total_step
            str_endepoch = 'total epoch %d; split: %s; loss: %.4f; time: %s' % (
            epoch, split, metrics['loss'], time.time() - start)
            print(str_endepoch)

            torch.cuda.synchronize()
            start = time.time()

            # compute metrics and visualize them
            metrics = compute_metrics(cm, metrics)

            if split == 'train':
                visualizer.scalar_summary(mode=split, epoch=epoch,
                                          **{k: v for k, v in metrics.items() if v and k != 'loss'})

            if split == 'val':
                visualizer.scalar_summary(mode=split, epoch=epoch, **metrics)

        # Save the model checkpoints if performance was improved
        if epoch == 0 or metrics['loss'] < es_best:
            es_best = metrics['loss']
            torch.save(model.state_dict(), os.path.join(
                model_dir, 'model.ckpt'))
            torch.save(optimizer.state_dict(), os.path.join(
                model_dir, 'optim.ckpt'))

            curr_pat = 0
        else:
            curr_pat += 1

        if curr_pat > args.patience:
            break

    visualizer.close()

    # restore model
    model = restore_model(model, model_dir)

    # test on source domain
    model.eval()
    cm = torch.from_numpy(np.zeros((2, 2))).float().cuda()
    metrics = {'loss': 0, 'iou': [], 'accuracy': []}
    for i, (images, gts) in enumerate(source_test):
        # send to cuda
        images = images.cuda()
        gts = gts.cuda()

        with torch.no_grad():
            outputs = model(images)

        # loss computation
        loss = criterion(outputs, gts)

        # update confusion matrix
        cm = update_cm(cm, outputs, gts)

        # update dicts
        metrics['loss'] += loss.data

    # compute metrics and visualize them
    metrics['loss'] /= len(source_test)
    metrics = compute_metrics(cm, metrics)

    print_metrics('TEST SOURCE: ', metrics)

    # test on target domain
    model.eval()
    cm = torch.from_numpy(np.zeros((2, 2))).float().cuda()
    metrics = {'loss': 0, 'iou': [], 'accuracy': []}
    for i, (images, gts) in enumerate(target_test):
        # send to cuda
        images = images.cuda()
        gts = gts.cuda()

        with torch.no_grad():
            outputs = model(images)

        # loss computation
        loss = criterion(outputs, gts)

        # update confusion matrix
        cm = update_cm(cm, outputs, gts)

        # update dicts
        metrics['loss'] += loss.data

    # compute metrics and visualize them
    metrics['loss'] /= len(target_test)
    metrics = compute_metrics(cm, metrics)

    print_metrics('TEST TARGET: ', metrics)
예제 #11
0
def validate_model(Net,
                   seed,
                   mini_batch_size=100,
                   optimizer=optim.Adam,
                   criterion=nn.CrossEntropyLoss(),
                   n_epochs=40,
                   eta=1e-3,
                   lambda_l2=0,
                   alpha=0.5,
                   beta=0.5,
                   plot=True,
                   rotate=False,
                   translate=False,
                   swap_channel=False,
                   GPU=False):
    """ 
    
    General :
         
         - Train a network model  which weights has been initialized with a specific seed over n_epochs 
         - Data is created with the same seed : train,validation and test calling the prologue
         - Record the train and validation accuracy and loss and can display they evolution curve
     
     
     Input :
     
         - Net : A network dictionnary from the <Nets> class
         - seed : seed for pseudo random number generator used in weight initialization and data loading
         -> mini_batch_size,optimizer, criterion, n_epochs, eta, lambda_2, alpha, beta see training.py
         - plot : if true plot the learning curve evolution over the epochs -> default true
         -> rotate,translate and swap_channels -> data augmentation see loader.py 
     
     Output : printed loss and accuracy of the network after training on the test set and learning curve if plot true
     
    """

    # set the pytorch seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    # set the seed for random spliting of the dataset in training and validation
    random.seed(0)

    # create the dataset
    data = PairSetMNIST()
    train_data = Training_set(data)
    test_data = Test_set(data)
    train_data_split = Training_set_split(train_data, rotate, translate,
                                          swap_channel)
    validation_data = Validation_set(train_data)

    # construct the net type with default parameter
    if (Net['net_type'] == 'Net2c'):
        model = Net['net'](nb_hidden=Net['hidden_layers'],
                           dropout_prob=Net['drop_prob'])
    if (Net['net_type'] == 'LeNet_sharing'):
        model = Net['net'](nb_hidden=Net['hidden_layers'],
                           dropout_ws=Net['drop_prob_ws'],
                           dropout_comp=Net['drop_prob_comp'])
    if (Net['net_type'] == 'LeNet_sharing_aux'):
        # check if any data augmentation has been called
        # if none construct with tuned parameters without data augmentation
        # if yes construct with tuned parameters with data augmentation
        if (rotate == False and translate == False and swap_channel == False):
            model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'],
                               nbhidden_comp=Net['hidden_layers_comp'],
                               drop_prob_aux=Net['drop_prob_aux'],
                               drop_prob_comp=Net['drop_prob_comp'])
        else:
            Net['learning rate'] = Net['learning rate augm']
            model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'],
                               nbhidden_comp=Net['hidden_layers_comp'],
                               drop_prob_aux=Net['drop_prob_aux_augm'],
                               drop_prob_comp=Net['drop_prob_comp_augm'])
    if (Net['net_type'] == 'Google_Net'):
        model = Net['net'](channels_1x1=Net['channels_1x1'],
                           channels_3x3=Net['channels_3x3'],
                           channels_5x5=Net['channels_5x5'],
                           pool_channels=Net['pool_channels'],
                           nhidden=Net['hidden_layers'],
                           drop_prob_comp=Net['drop_prob_comp'],
                           drop_prob_aux=Net['drop_prob_aux'])

    if GPU and cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    model = model.to(device)

    # train the model on the train set and validate at each epoch
    train_losses, train_acc, valid_losses, valid_acc = train_model(
        model, train_data_split, validation_data, device, mini_batch_size,
        optimizer, criterion, n_epochs, Net['learning rate'], lambda_l2, alpha,
        beta)

    if plot:

        learning_curve(train_losses, train_acc, valid_losses, valid_acc)

    # loss and accuracy of the network on the test
    test_loss, test_accuracy = compute_metrics(model, test_data, device)

    print('\nTest Set | Loss: {:.4f} | Accuracy: {:.2f}%\n'.format(
        test_loss, test_accuracy))
예제 #12
0
    def run(self, data, gt):
        results_list = []
        for bandwidth in tqdm(self.bandwidths,
                              desc="Processing bandwidth",
                              leave=False):
            MeanShifter = MeanShift(n_iter=self.n_iter,
                                    bandwidth=bandwidth,
                                    kernel=self.kernel,
                                    blurring=self.blurring,
                                    use_keops=self.keops)

            convergence_points = MeanShifter(
                torch.tensor(
                    data.reshape(data.shape[0], -1,
                                 data.shape[-1]))).detach().cpu().numpy()
            convergence_points.reshape(*data.shape)

            np.save(
                os.path.join(
                    self.target_dir,
                    "mean_shift_conv_points_bandwidth_{}.npy".format(
                        bandwidth)), convergence_points)

            tqdm.write("Obtaining hard clustering")
            labels = obtain_hard_clusters(convergence_points, [
                threshold if not (threshold == "same") else bandwidth
                for threshold in self.thresholds
            ])

            # compute scores, save labels and scores for each threshold
            for i, threshold in enumerate(self.thresholds):
                np.save(
                    os.path.join(
                        self.target_dir,
                        "mean_shift_labels_bandwidth_{}_threshold_{}.npy".
                        format(bandwidth, threshold)), labels[:, i, ...])

                # compute metrics
                results = {
                    "parameters": {
                        "bandwidth": bandwidth,
                        "threshold": threshold
                    },
                    "scores": compute_metrics(labels[:, i, ...], gt.copy())
                }
                # save results
                np.save(
                    os.path.join(
                        self.target_dir,
                        "mean_shift_scores_bandwidth_{}_threshold_{}".format(
                            bandwidth, threshold)),
                    np.array([
                        results["scores"]["CREMI_score"],
                        results["scores"]["arand"],
                        results["scores"]["voi"][0],
                        results["scores"]["voi"][1]
                    ]))
                results_list.append(results)
            tqdm.write("Done with bandwidth {}".format(bandwidth))
        return sorted(results_list,
                      key=lambda x: x["scores"]["CREMI_score"])[0]
예제 #13
0
def main():

    params = parseyaml()

    if params['arch'] == 'Generator':

        device = to_gpu(ngpu=params['n_gpu'])

        if params['image_size'] == 64:

            netG = Generator(ngpu=0, nz=256,
                             ngf=64, nc=64).to(device)

        elif params['image_size'] == 128:

            netG = Generator_128(ngpu=0, nz=256,
                                 ngf=64, nc=64).to(device)

        elif params['image_size'] == 256:

            netG = Generator_256(ngpu=0, nz=256,
                                 ngf=64, nc=64).to(device)

        netG.apply(weights_init)
        netG.load_state_dict(torch.load(params['path']))

        for i in range(params['quantity']):

            fixed_noise = torch.randn(64, 256, 1, 1, device=device)
            fakes = netG(fixed_noise)

            for j in range(len(fakes)):
                save_image(fakes[j], params['out'] + params['run'] +
                           '_' + str(i) + '_' + str(j) + '_img.png')

    else:

        dataloader = dataLoader(
            path=params['path'], image_size=params['image_size'], batch_size=params['batch_size'],
            workers=params['loader_workers'])

        device = to_gpu(ngpu=params['n_gpu'])

        if params['arch'] == 'DCGAN':

            if params['image_size'] == 64:

                netG = Generator(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                 ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator(params['n_gpu'], nc=params['number_channels'],
                                     ndf=params['dis_feature_maps']).to(device)

            elif params['image_size'] == 128:

                netG = Generator_128(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                     ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_128(params['n_gpu'], nc=params['number_channels'],
                                         ndf=params['dis_feature_maps']).to(device)

            elif params['image_size'] == 256:

                netG = Generator_256(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                     ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_256(params['n_gpu'], nc=params['number_channels'],
                                         ndf=params['dis_feature_maps']).to(device)

        elif params['arch'] == 'SNGAN':

            if params['image_size'] == 64:

                netG = Generator(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                 ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_SN(params['n_gpu'], nc=params['number_channels'],
                                        ndf=params['dis_feature_maps']).to(device)

            elif params['image_size'] == 128:

                netG = Generator_128(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                     ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_SN_128(params['n_gpu'], nc=params['number_channels'],
                                            ndf=params['dis_feature_maps']).to(device)

            elif params['image_size'] == 256:

                netG = Generator_256(ngpu=params['n_gpu'], nz=params['latent_vector'],
                                     ngf=params['gen_feature_maps'], nc=params['number_channels']).to(device)

                netD = Discriminator_SN_256(params['n_gpu'], nc=params['number_channels'],
                                            ndf=params['dis_feature_maps']).to(device)

        if (device.type == 'cuda') and (params['n_gpu'] > 1):
            netG = nn.DataParallel(netG, list(range(params['n_gpu'])))

        if (device.type == 'cuda') and (params['n_gpu'] > 1):
            netD = nn.DataParallel(netD, list(range(params['n_gpu'])))

        netG.apply(weights_init)
        netD.apply(weights_init)

        print(netG)
        print(netD)

        criterion = nn.BCELoss()

        fixed_noise = torch.randn(params['image_size'],
                                  params['latent_vector'], 1, 1, device=device)

        if params['learning_rate'] >= 1:

            optimizerD = optim.Adam(netD.parameters(), lr=0.0002 * params['learning_rate'], betas=(
                params['beta_adam'], 0.999))
            optimizerG = optim.Adam(netG.parameters(), lr=0.0002, betas=(
                params['beta_adam'], 0.999))

        else:

            optimizerD = optim.Adam(netD.parameters(), lr=params['learning_rate'], betas=(
                params['beta_adam'], 0.999))
            optimizerG = optim.Adam(netG.parameters(), lr=params['learning_rate'], betas=(
                params['beta_adam'], 0.999))

        G_losses, D_losses, img_list, img_list_only = training_loop(num_epochs=params['num_epochs'], dataloader=dataloader,
                                                                    netG=netG, netD=netD, device=device, criterion=criterion, nz=params[
                                                                        'latent_vector'],
                                                                    optimizerG=optimizerG, optimizerD=optimizerD, fixed_noise=fixed_noise, out=params['out'] + params['run'] + '_')

        loss_plot(G_losses=G_losses, D_losses=D_losses, out=params['out'] + params['run'] + '_')

        image_grid(dataloader=dataloader, img_list=img_list,
                   device=device, out=params['out'] + params['run'] + '_')

        compute_metrics(real=next(iter(dataloader)), fakes=img_list_only,
                        size=params['image_size'], out=params['out'] + params['run'] + '_')
예제 #14
0
def main(args):

    where_to_save = os.path.join(args.save_dir, args.project_name,
                                 args.model_name)
    checkpoints_dir = os.path.join(where_to_save, 'checkpoints')
    logs_dir = os.path.join(where_to_save, 'logs')

    if not args.log_term:
        print("Eval logs will be saved to:",
              os.path.join(logs_dir, 'eval.log'))
        sys.stdout = open(os.path.join(logs_dir, 'eval.log'), 'w')
        sys.stderr = open(os.path.join(logs_dir, 'eval.err'), 'w')

    vars_to_replace = [
        'greedy', 'recipe_only', 'ingrs_only', 'temperature', 'batch_size',
        'maxseqlen', 'get_perplexity', 'use_true_ingrs', 'eval_split',
        'save_dir', 'aux_data_dir', 'recipe1m_dir', 'project_name', 'use_lmdb',
        'beam'
    ]
    store_dict = {}
    for var in vars_to_replace:
        store_dict[var] = getattr(args, var)
    #args = pickle.load(open(os.path.join(checkpoints_dir, 'args.pkl'), 'rb'))
    for var in vars_to_replace:
        setattr(args, var, store_dict[var])
    print(args)

    transforms_list = []
    transforms_list.append(transforms.Resize((args.crop_size)))
    transforms_list.append(transforms.CenterCrop(args.crop_size))
    transforms_list.append(transforms.ToTensor())
    transforms_list.append(
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
    # Image preprocessing
    transform = transforms.Compose(transforms_list)

    # data loader
    data_dir = args.recipe1m_dir
    data_loader, dataset = get_loader(data_dir,
                                      args.aux_data_dir,
                                      args.eval_split,
                                      args.maxseqlen,
                                      args.maxnuminstrs,
                                      args.maxnumlabels,
                                      args.maxnumims,
                                      transform,
                                      args.batch_size,
                                      shuffle=False,
                                      num_workers=args.num_workers,
                                      drop_last=False,
                                      max_num_samples=-1,
                                      use_lmdb=args.use_lmdb,
                                      suff=args.suff)

    ingr_vocab_size = dataset.get_ingrs_vocab_size()
    instrs_vocab_size = dataset.get_instrs_vocab_size()

    args.numgens = 1

    # Build the model
    model = get_model(args, ingr_vocab_size, instrs_vocab_size)
    model_path = os.path.join(args.save_dir, args.project_name,
                              args.model_name, 'checkpoints', 'modelbest.ckpt')

    # overwrite flags for inference
    model.recipe_only = args.recipe_only
    model.ingrs_only = args.ingrs_only

    # Load the trained model parameters
    model.load_state_dict(torch.load(model_path, map_location=map_loc))

    model.eval()
    model = model.to(device)
    results_dict = {'recipes': {}, 'ingrs': {}, 'ingr_iou': {}}
    captions = {}
    iou = []
    error_types = {
        'tp_i': 0,
        'fp_i': 0,
        'fn_i': 0,
        'tn_i': 0,
        'tp_all': 0,
        'fp_all': 0,
        'fn_all': 0
    }
    perplexity_list = []
    n_rep, th = 0, 0.3

    for i, (img_inputs, true_caps_batch, ingr_gt, imgid,
            impath) in tqdm(enumerate(data_loader)):

        ingr_gt = ingr_gt.to(device)
        true_caps_batch = true_caps_batch.to(device)

        true_caps_shift = true_caps_batch.clone()[:, 1:].contiguous()
        img_inputs = img_inputs.to(device)

        true_ingrs = ingr_gt if args.use_true_ingrs else None
        for gens in range(args.numgens):
            with torch.no_grad():

                if args.get_perplexity:

                    losses = model(img_inputs,
                                   true_caps_batch,
                                   ingr_gt,
                                   keep_cnn_gradients=False)
                    recipe_loss = losses['recipe_loss']
                    recipe_loss = recipe_loss.view(true_caps_shift.size())
                    non_pad_mask = true_caps_shift.ne(instrs_vocab_size -
                                                      1).float()
                    recipe_loss = torch.sum(recipe_loss * non_pad_mask,
                                            dim=-1) / torch.sum(non_pad_mask,
                                                                dim=-1)
                    perplexity = torch.exp(recipe_loss)

                    perplexity = perplexity.detach().cpu().numpy().tolist()
                    perplexity_list.extend(perplexity)

                else:

                    outputs = model.sample(img_inputs, args.greedy,
                                           args.temperature, args.beam,
                                           true_ingrs)

                    if not args.recipe_only:
                        fake_ingrs = outputs['ingr_ids']
                        pred_one_hot = label2onehot(fake_ingrs,
                                                    ingr_vocab_size - 1)
                        target_one_hot = label2onehot(ingr_gt,
                                                      ingr_vocab_size - 1)
                        iou_item = torch.mean(
                            softIoU(pred_one_hot, target_one_hot)).item()
                        iou.append(iou_item)

                        update_error_types(error_types, pred_one_hot,
                                           target_one_hot)

                        fake_ingrs = fake_ingrs.detach().cpu().numpy()

                        for ingr_idx, fake_ingr in enumerate(fake_ingrs):

                            iou_item = softIoU(
                                pred_one_hot[ingr_idx].unsqueeze(0),
                                target_one_hot[ingr_idx].unsqueeze(0)).item()
                            results_dict['ingrs'][imgid[ingr_idx]] = []
                            results_dict['ingrs'][imgid[ingr_idx]].append(
                                fake_ingr)
                            results_dict['ingr_iou'][
                                imgid[ingr_idx]] = iou_item

                    if not args.ingrs_only:
                        sampled_ids_batch = outputs['recipe_ids']
                        sampled_ids_batch = sampled_ids_batch.cpu().detach(
                        ).numpy()

                        for j, sampled_ids in enumerate(sampled_ids_batch):
                            score = compute_score(sampled_ids)
                            if score < th:
                                n_rep += 1
                            if imgid[j] not in captions.keys():
                                results_dict['recipes'][imgid[j]] = []
                                results_dict['recipes'][imgid[j]].append(
                                    sampled_ids)
    if args.get_perplexity:
        print(len(perplexity_list))
        print(np.mean(perplexity_list))
    else:

        if not args.recipe_only:
            ret_metrics = {
                'accuracy': [],
                'f1': [],
                'jaccard': [],
                'f1_ingredients': []
            }
            compute_metrics(ret_metrics,
                            error_types,
                            ['accuracy', 'f1', 'jaccard', 'f1_ingredients'],
                            eps=1e-10,
                            weights=None)

            for k, v in ret_metrics.items():
                print(k, np.mean(v))

        if args.greedy:
            suff = 'greedy'
        else:
            if args.beam != -1:
                suff = 'beam_' + str(args.beam)
            else:
                suff = 'temp_' + str(args.temperature)

        results_file = os.path.join(
            args.save_dir, args.project_name, args.model_name, 'checkpoints',
            args.eval_split + '_' + suff + '_gencaps.pkl')
        print(results_file)
        pickle.dump(results_dict, open(results_file, 'wb'))

        print("Number of samples with excessive repetitions:", n_rep)
예제 #15
0
def main(args):

    # Get models to test from models_path
    models_to_test = glob.glob(args.models_path + '/*.ckpt')

    # To store results
    mat_f1 = np.zeros((len(models_to_test), ))
    mat_f1_c = np.zeros((len(models_to_test), ))
    mat_f1_i = np.zeros((len(models_to_test), ))

    if not os.path.exists(args.save_results_path):
        os.makedirs(args.save_results_path)

    print('Results will be saved here: ' + args.save_results_path)

    # Extract model names
    models_to_test = [m for m in models_to_test if args.dataset in m]
    model_names = [re.split(r'[/]', m)[-1] for m in models_to_test]

    # Iterate over models to test
    for k, m in enumerate(models_to_test):
        print('---------------------------------------------')
        print('Evaluating ' + model_names[k])

        # Load model
        checkpoint = torch.load(m, map_location=map_loc)
        model_args = checkpoint['args']

        # Image pre-processing
        transforms_list = []
        transforms_list.append(transforms.Resize(model_args.image_size))
        transforms_list.append(transforms.CenterCrop(model_args.crop_size))
        transforms_list.append(transforms.ToTensor())
        transforms_list.append(
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
        transform = transforms.Compose(transforms_list)

        # Load data
        datapaths = json.load(open('../configs/datapaths.json'))
        dataset_root = datapaths[model_args.dataset]
        data_loader, dataset = get_loader(
            dataset=model_args.dataset,
            dataset_root=dataset_root,
            split=args.eval_split,
            transform=transform,
            batch_size=args.batch_size,
            include_eos=(model_args.decoder != 'ff'),
            shuffle=False,
            num_workers=8,
            drop_last=False,
            shuffle_labels=False)

        vocab_size = len(dataset.get_vocab())
        print('Vocabulary size is {}'.format(vocab_size))
        print('Dataset {} split contains {} images'.format(
            args.eval_split, len(dataset)))

        # Load model
        model = get_model(model_args, vocab_size)
        model.load_state_dict(checkpoint['state_dict'])

        # Eval
        model.eval()
        model = model.to(device)
        total_step = len(data_loader)
        print('Number of iterations is {}'.format(total_step))

        overall_error_counts = {
            'tp_c': 0,
            'fp_c': 0,
            'fn_c': 0,
            'tn_c': 0,
            'tp_all': 0,
            'fp_all': 0,
            'fn_all': 0
        }
        error_counts_per_card = {}
        f1s_image_per_card = {}
        card_l1_err = []
        f1s_image = []
        card_accs = []

        for l, (img_inputs, target) in tqdm(enumerate(data_loader)):

            img_inputs = img_inputs.to(device)

            with torch.no_grad():
                # get model predictions
                # predictions format can either be a matrix of size batch_size x maxnumlabels, where
                # each row contains the integer labels of an image, followed by pad_value
                # or a list of sublists, where each sublist contains the integer labels of an image
                # and len(list) = batch_size and len(sublist) is variable
                _, predictions = model(img_inputs,
                                       maxnumlabels=model_args.maxnumlabels,
                                       compute_predictions=True)
                # convert model predictions and targets to k-hots
                pred_k_hots = label2_k_hots(
                    predictions,
                    vocab_size - 1,
                    remove_eos=(model_args.decoder != 'ff'))
                target_k_hots = label2_k_hots(
                    target,
                    vocab_size - 1,
                    remove_eos=(model_args.decoder != 'ff'))
                # update overall and per class error counts
                update_error_counts(overall_error_counts, pred_k_hots,
                                    target_k_hots)

                # get per-image error counts
                for i in range(pred_k_hots.size(0)):
                    # compute per image metrics
                    image_error_counts = {
                        'tp_c': 0,
                        'fp_c': 0,
                        'fn_c': 0,
                        'tn_c': 0,
                        'tp_all': 0,
                        'fp_all': 0,
                        'fn_all': 0
                    }
                    update_error_counts(image_error_counts,
                                        pred_k_hots[i].unsqueeze(0),
                                        target_k_hots[i].unsqueeze(0))

                    image_metrics = compute_metrics(image_error_counts,
                                                    which_metrics=['f1'])
                    f1s_image.append(image_metrics['f1'])

        # compute overall and per class metrics
        overall_metrics = compute_metrics(overall_error_counts, ['f1', 'c_f1'],
                                          weights=None)
        overall_metrics['f1_i'] = np.mean(f1s_image)
        print(overall_metrics)

        # save results
        mat_f1[k] = overall_metrics['f1']
        mat_f1_c[k] = overall_metrics['c_f1']
        mat_f1_i[k] = overall_metrics['f1_i']

    print('Saving results...')
    data = {
        'Model': model_names,
        'f1': mat_f1,
        'f1_c': mat_f1_c,
        'f1_i': mat_f1_i
    }
    df = pd.DataFrame(data)
    df.to_csv(os.path.join(args.save_results_path, 'results.csv'))
예제 #16
0
def train_model(model,
                train_data,
                validation_data,
                device,
                mini_batch_size=100,
                optimizer=optim.Adam,
                criterion=nn.CrossEntropyLoss(),
                n_epochs=40,
                eta=1e-3,
                lambda_l2=0,
                alpha=0.5,
                beta=0.5):
    """
    Train  a neural network model and record train/validation history
    
    Input :
        
        - model : Neural network class to train
        - train_data : data used for training the network : input,target and classes
        - Validation data : unseen data used to evaluate the performance of the network at each epoch on evaluation mode -> input,target 
                            and classes
        - mini_batch_size : batch size in which the data is splitted by the data loader -> default 100
        - optimizer : optimizer used to optimize the network -> default Adam
        -criterion : loss functionto minimize to train the network -> default cross entropy loss
        - n_epochs : number of steps to optimize the network -> default 40
        - eta : learning rate used by the optimizer -> default 10e-3 
        - lambda_l2 : weight penalty term (weight decay) -> default 0
        - alpha : weight term  of the binary loss in the overall loss-> default 0.5
        - beta : weight term  of the auxiliary loss in the overall loss-> default 0.5
    
    Output :
    
        - List of the train accuracy at each epoch
        - List of the train losses at each epoch
        - List of the validation accuracy at each epoch
        - List of the validation loss at each epoch
    
    """
    # Accuracy and loss history of the train and validation data
    train_acc = []
    train_losses = []
    valid_acc = []
    valid_losses = []

    # optimizer class initialized with the parameters passed in the constructor
    optimizer = optimizer(model.parameters(), lr=eta, weight_decay=lambda_l2)
    # data loader
    train_loader = DataLoader(train_data,
                              batch_size=mini_batch_size,
                              shuffle=True)

    for e in range(n_epochs):
        epoch_loss = 0
        # set the model to train mode
        model.train(True)
        for i, data in enumerate(train_loader, 0):

            # get the data from the batch
            input_, target_, classes_ = data

            input_ = input_.to(device)
            target_ = target_.to(device)
            classes_ = classes_.to(device)

            # check the name of the model to know if the output contain auxiliary loss
            if (model.__class__.__name__ == 'LeNet_sharing_aux'
                    or model.__class__.__name__ == 'Google_Net'):
                # get model output
                class_1, class_2, out = model(input_)
                # compute the different losses
                aux_loss1 = criterion(class_1, classes_[:, 0])
                aux_loss2 = criterion(class_2, classes_[:, 1])
                out_loss = criterion(out, target_)
                # Overall loss to minimize
                net_loss = (alpha * (out_loss) + beta *
                            (aux_loss1 + aux_loss2))
            else:
                # get the model output
                out = model(input_)
                # Compute the overall loss to minimize
                net_loss = criterion(out, target_)

            # overral loss on the batch
            epoch_loss += net_loss

            # backward
            optimizer.zero_grad()
            net_loss.backward()
            # gradient step
            optimizer.step()

        # compute the loss and accuracy on the whole batch for the training for the epoch
        tr_loss, tr_acc = compute_metrics(model, train_data, device)
        # compute the loss and accuracy on the validation set for the epoch
        val_loss, val_acc = compute_metrics(model, validation_data, device)

        # Save the metrics in the list
        train_losses.append(tr_loss)
        train_acc.append(tr_acc)
        valid_acc.append(val_acc)
        valid_losses.append(val_loss)

    return train_losses, train_acc, valid_losses, valid_acc
예제 #17
0
def evaluate(args, model, prefix="", save_preds=False):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_task_names = ("mnli", "mnli-mm") if args.task_name == "mnli" else (
        args.task_name, )
    eval_outputs_dirs = (args.output_dir, args.output_dir +
                         "-MM") if args.task_name == "mnli" else (
                             args.output_dir, )

    results = {}
    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset, _ = load_after_examples(args,
                                              eval_task,
                                              args.aux_name,
                                              mode="dev")

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        # multi-gpu eval
        if args.n_gpu > 1 and not isinstance(model, torch.nn.DataParallel):
            model = torch.nn.DataParallel(model)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss, dom_loss = 0.0, 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "labels": batch[3]
                }
                if args.model_type != "distilbert":
                    inputs["token_type_ids"] = (
                        batch[2] if args.model_type
                        in ["bert", "xlnet", "albert"] else None
                    )  # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids
                outputs = model(**inputs, aux=False)
                tmp_eval_loss, tmp_dom_loss, logits = outputs[:3]

                eval_loss += tmp_eval_loss.mean().item()
                dom_loss += tmp_dom_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()[:, 0]
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs["labels"].detach().cpu().numpy()[:, 0],
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps

        if args.eval_domain:
            _, eval_aux_dataset = load_after_examples(args,
                                                      eval_task,
                                                      args.aux_name,
                                                      mode="dev")

            if not os.path.exists(eval_output_dir) and args.local_rank in [
                    -1, 0
            ]:
                os.makedirs(eval_output_dir)

            args.eval_batch_size = args.per_gpu_eval_batch_size * max(
                1, args.n_gpu)
            # Note that DistributedSampler samples randomly
            eval_aux_sampler = SequentialSampler(eval_aux_dataset)
            eval_aux_dataloader = DataLoader(eval_aux_dataset,
                                             sampler=eval_aux_sampler,
                                             batch_size=args.eval_batch_size)

            # multi-gpu eval
            if args.n_gpu > 1 and not isinstance(model, torch.nn.DataParallel):
                model = torch.nn.DataParallel(model)

            # Eval!
            logger.info(
                "***** Running auxiliary evaluation {} *****".format(prefix))
            logger.info("  Num examples = %d", len(eval_aux_dataset))
            logger.info("  Batch size = %d", args.eval_batch_size)

            for batch in tqdm(eval_aux_dataloader, desc="Evaluating"):
                model.eval()
                batch = tuple(t.to(args.device) for t in batch)

                with torch.no_grad():
                    inputs = {
                        "input_ids": batch[0],
                        "attention_mask": batch[1],
                        "labels": batch[3]
                    }
                    if args.model_type != "distilbert":
                        inputs["token_type_ids"] = (
                            batch[2] if args.model_type
                            in ["bert", "xlnet", "albert"] else None
                        )  # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids
                    outputs = model(**inputs, aux=True)
                    _, tmp_dom_loss, logits = outputs[:3]

                    dom_loss += tmp_dom_loss.mean().item()
                nb_eval_steps += 1

            dom_loss = dom_loss / nb_eval_steps

        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)

        if save_preds:
            metrics = np.array(preds == out_label_ids, dtype=int)
            with open(eval_output_dir + '/preds.tsv', 'a+') as out_file:
                tsv_writer = csv.writer(out_file, delimiter='\t')
                tsv_writer.writerow(["preds", "labels"])
                for pred, label, metric in zip(preds, out_label_ids, metrics):
                    tsv_writer.writerow([pred, label])

        result = compute_metrics(eval_task, preds, out_label_ids)
        # AfterBERT
        result.update({"loss": eval_loss})
        if args.eval_domain:
            result.update({"dom loss": dom_loss})
        results.update(result)

        output_eval_file = os.path.join(eval_output_dir, prefix,
                                        "eval_results.txt")
        with open(output_eval_file, "a+") as writer:
            logger.info("***** Eval results {} *****".format(prefix))
            for key in sorted(result.keys()):
                writer.write("%s = %s\n" % (key, str(result[key])))

    return results
예제 #18
0
def grid_search_aux(lrs,
                    drop_prob_aux,
                    drop_prob_comp,
                    seeds,
                    mini_batch_size=100,
                    optimizer=optim.Adam,
                    criterion=nn.CrossEntropyLoss(),
                    n_epochs=40,
                    lambda_l2=0,
                    alpha=0.5,
                    beta=0.5,
                    rotate=False,
                    translate=False,
                    swap_channel=False,
                    GPU=False):
    """
    
     General : Iterate over combinations of parameters list to optimize and repeat a 10 round training/validation procedure at each
               combination -> Select the combination with the highest validation accuracy for a LeNet_sharing_aux network
               
               => only called in the <Nets> class by the Tune_LeNet_sharing_aux function
               
     Input : 
         
         - lrs : list of learning rate
         - drop_prob_aux : list of dropout rate for the CNN auxiliary part
         - drop_prob_comp : list of dropout rate for the FC layer part for binary classification
         - seeds : list of seeds for statistics 
         -> mini_batch_size,optimizer, criterion, n_epochs, eta, lambda_2, alpha, beta see training.py
         -> rotate,translate and swap_channels -> data augmentation see loader.py 
        
     Ouput :
         
         - train_results : A (len(lrs)xlen(drop_prob_aux)xlen(drop_prob_comp)xlen(seeds),4,n_epochs) tensor
                           len() -> number of parameters or seed
                           4 -> train loss ,train accuracy, validation loss, validation accuracy
                           n_epochs -> evolution during training
         - test_losses : A tensor of shape (10,) containing the test loss at each seed
         - test_accuracies : A tensor of shape (10,) containing the test loss at each seed
         - opt_lr : tuned value for learning rate 
         - opt_prob_aux : tuned value for drop_prob_aux
         - opt_prob_comp : tuned value for drop_prob_comp
    """
    # tensor to record the metrics
    train_results = torch.empty(len(lrs), len(drop_prob_aux),
                                len(drop_prob_comp), len(seeds), 4, n_epochs)
    test_losses = torch.empty(len(lrs), len(drop_prob_aux),
                              len(drop_prob_comp), len(seeds))
    test_accuracies = torch.empty(len(lrs), len(drop_prob_aux),
                                  len(drop_prob_comp), len(seeds))

    # iterate over the parameter combiantion for each seed in seeds
    for idz, eta in enumerate(lrs):
        for idx, prob_aux in enumerate(drop_prob_aux):
            for idy, prob_comp in enumerate(drop_prob_comp):
                for n, seed in enumerate(seeds):
                    print(
                        ' lr : {:.4f}, prob aux : {:.2f}, prob comp : {:.2f} (n= {:d})'
                        .format(eta, prob_aux, prob_comp, n))

                    # set seed
                    torch.manual_seed(seed)
                    torch.cuda.manual_seed(seed)

                    #set the random seed
                    random.seed(0)

                    # create the data
                    data = PairSetMNIST()
                    train_data = Training_set(data)
                    test_data = Test_set(data)
                    train_data_split = Training_set_split(
                        train_data, rotate, translate, swap_channel)
                    validation_data = Validation_set(train_data)

                    # create the network
                    model = LeNet_sharing_aux(drop_prob_aux=prob_aux,
                                              drop_prob_comp=prob_comp)

                    if GPU and cuda.is_available():
                        device = torch.device('cuda')
                    else:
                        device = torch.device('cpu')

                    model = model.to(device)

                    # train the network
                    train_losses, train_acc, valid_losses, valid_acc = train_model(
                        model, train_data_split, validation_data, device,
                        mini_batch_size, optimizer, criterion, n_epochs, eta,
                        lambda_l2, alpha, beta)

                    # store train and test results
                    train_results[idz, idx, idy, n, ] = torch.tensor(
                        [train_losses, train_acc, valid_losses, valid_acc])
                    test_loss, test_acc = compute_metrics(
                        model, test_data, device)
                    test_losses[idz, idx, idy, n] = test_loss
                    test_accuracies[idz, idx, idy, n] = test_acc

    # compute the validation mean accuracy and standard deviation of the accuracy
    validation_grid_mean_acc = torch.mean(train_results[:, :, :, :, 3, 39],
                                          dim=3)
    validation_grid_std_acc = torch.std(train_results[:, :, :, :, 3, 39],
                                        dim=3)

    # compute thetest mean accuracy and standard deviation of the accuracy
    train_grid_mean_acc = torch.mean(train_results[:, :, :, :, 1, 39], dim=3)
    train_grid_std_acc = torch.std(train_results[:, :, :, :, 1, 39], dim=3)

    # get the indices of the parameter with the highest mean validation accuracy
    idx = torch.where(
        validation_grid_mean_acc == validation_grid_mean_acc.max())

    if len(idx[0]) >= 2:
        idx = idx[0]

    # get the tuned parameters
    opt_lr = lrs[idx[0].item()]
    opt_prob_aux = drop_prob_aux[idx[1].item()]
    opt_prob_comp = drop_prob_comp[idx[2].item()]

    print(
        'Best mean validation accuracy on {:d} seeds : {:.2f}%, std = {:.2f} with: learning rate = {:.4f}  dropout rate aux = {:.2f} and dropout rate comp = {:.2f}'
        .format(
            len(seeds), validation_grid_mean_acc[idx[0].item(), idx[1].item(),
                                                 idx[2].item()],
            validation_grid_std_acc[idx[0].item(), idx[1].item(),
                                    idx[2].item()], opt_lr, opt_prob_aux,
            opt_prob_comp))

    return train_results, test_losses, test_accuracies, opt_lr, opt_prob_aux, opt_prob_comp


###########################################################################################################################################
예제 #19
0
def evaluate(args, model, tokenizer, mode="", prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_task_names = (("mnli", "mnli-mm") if args.task_name == "mnli" else
                       (args.task_name, ))
    eval_outputs_dirs = ((args.output_dir, args.output_dir +
                          "-MM") if args.task_name == "mnli" else
                         (args.output_dir, ))

    results = {}
    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset = load_and_cache_examples(args, eval_task, tokenizer,
                                               mode)

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        # multi-gpu eval
        if args.n_gpu > 1 and not isinstance(model, torch.nn.DataParallel):
            model = torch.nn.DataParallel(model)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "labels": batch[3]
                }
                # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use
                # segment_ids
                if args.model_type != "distilbert":
                    inputs["token_type_ids"] = (batch[2] if args.model_type
                                                in ["bert", "xlnet", "albert"
                                                    ] else None)
                outputs = model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs["labels"].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)

        if args.write_preds:
            output_path_file = os.path.join(eval_output_dir, prefix,
                                            "predictions.txt")
            logger.info("***** Writing Predictions to "
                        "{} *****".format(output_path_file))
            write_predictions(output_path_file, eval_task, preds)

        result = compute_metrics(eval_task, preds, out_label_ids)
        results.update(result)

        output_eval_file = os.path.join(eval_output_dir, prefix,
                                        "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results {} *****".format(prefix))
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    return results
예제 #20
0
def evaluate(args, model, tokenizer, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_task_names = (args.task_name, )
    eval_outputs_dirs = (args.output_dir, )

    results = []
    for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
        eval_dataset = load_and_cache_examples(args,
                                               eval_task,
                                               tokenizer,
                                               evaluate=True)

        if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(eval_output_dir)

        args.eval_batch_size = args.per_gpu_eval_batch_size * max(
            1, args.n_gpu)
        # Note that DistributedSampler samples randomly
        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset,
                                     sampler=eval_sampler,
                                     batch_size=args.eval_batch_size)

        # multi-gpu eval
        if args.n_gpu > 1:
            model = torch.nn.DataParallel(model)

        # Eval!
        logger.info("***** Running evaluation {} *****".format(prefix))
        logger.info("  Num examples = %d", len(eval_dataset))
        logger.info("  Batch size = %d", args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        processor = processors[args.task_name]()

        # output file for sequence tagging only
        data_name = args.data_dir.split('/')[-2]
        output_file = os.path.join(
            os.path.join(eval_output_dir, prefix),
            "sequence_tagging_predictions_" + data_name + ".conll")
        if os.path.isfile(output_file):
            os.remove(output_file)
            logger.info("  Deleted existing evaluation file!")

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            model.eval()
            batch = tuple(t.to(args.device) for t in batch)

            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "labels": batch[3]
                }
                if args.model_type != "distilbert":
                    inputs["token_type_ids"] = (
                        batch[2] if args.model_type
                        in ["bert", "xlnet", "albert"] else None
                    )  # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids

                outputs = model(**inputs)

                if args.output_mode == "sequencetagging":
                    tmp_eval_loss, emissions, path = outputs[:3]
                    logits_tensor = path.detach().cpu().numpy()
                    labels_tensor = inputs["labels"].detach().cpu().numpy()
                    # Write output as conll file
                    tokenizer.batch_to_conll(inputs["input_ids"],
                                             logits_tensor, labels_tensor,
                                             processor, output_file)
                    logits = logits_tensor.flatten()
                    labels = labels_tensor.flatten()

                else:
                    tmp_eval_loss, logits = outputs[:2]
                    logits = logits.detach().cpu().numpy()
                    labels = inputs["labels"].detach().cpu().numpy()

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1
            if preds is None:
                preds = logits
                out_label_ids = labels
            else:
                preds = np.append(preds, logits, axis=0)
                out_label_ids = np.append(out_label_ids, labels, axis=0)

        eval_loss = eval_loss / nb_eval_steps
        if args.output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif args.output_mode == "regression":
            preds = np.squeeze(preds)
        result = compute_metrics(eval_task, preds, out_label_ids)

        results.append(result)

        output_eval_file = os.path.join(eval_output_dir, prefix,
                                        "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            logger.info("***** Eval results {} *****".format(prefix))
            logger.info(results[0])
            writer.write(results[0])

    return results
예제 #21
0
def evaluate(args, model, tokenizer, prefix=""):
    eval_task = args.task_name
    results = {}

    # Change output_dir with hyper parameters.
    hyper_param_str = get_eval_folder_name(args)
    eval_output_dir =  os.path.join(args.output_dir, hyper_param_str)
    if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir)

    eval_dataset = load_and_cache_examples(args, eval_task, tokenizer, evaluate=True) 
    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

    # multi-gpu eval
    if args.n_gpu > 1 and not isinstance(model, torch.nn.DataParallel):
        model = torch.nn.DataParallel(model)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    all_guids = None
    all_probs = None
    preds = None
    out_label_ids = None
    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        guids = batch[0]
        with torch.no_grad():
            inputs = {"input_ids": batch[1], "attention_mask": batch[2], "labels": batch[4]}
            if args.model_type != "distilbert" and args.model_type != "bart":
                inputs["token_type_ids"] = (
                    batch[3] if args.model_type in ["bert", "xlnet", "albert"] else None
                )  # XLM, DistilBERT, RoBERTa, and XLM-RoBERTa don't use segment_ids
            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1
        if preds is None:
            all_guids = guids.detach().cpu().numpy()
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs["labels"].detach().cpu().numpy()
        else:
            all_guids = np.append(all_guids, guids.detach().cpu().numpy(), axis=0)
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)

    all_probs = softmax(preds, axis=1)[:,1]
    eval_loss = eval_loss / nb_eval_steps
    if args.output_mode == "classification":
        preds = np.argmax(preds, axis=1)
    elif args.output_mode == "regression":
        preds = np.squeeze(preds)

    result = compute_metrics(eval_task, preds, out_label_ids)
    results.update(result)

    if args.cross_validation:
        cross_validation_results_dir = os.path.join(eval_output_dir, "cross_validation_results") 
        if not os.path.exists(cross_validation_results_dir) and args.local_rank in [-1, 0]:
            os.makedirs(cross_validation_results_dir)
        output_eval_file = os.path.join(cross_validation_results_dir, "eval_results_fold_" + str(args.fold_number) +".txt")
        output_pred_file = os.path.join(cross_validation_results_dir, "submission_fold_" + str(args.fold_number) + ".csv")

        all_guids = all_guids.tolist()
        all_probs = all_probs.tolist()
        submission = pd.DataFrame()
        submission['sentenceID'] = all_guids
        submission['pred_prob'] = all_probs
        submission.to_csv(output_pred_file, index=False)
    else:
        output_eval_file = os.path.join(eval_output_dir, prefix, "eval_results.txt")
        output_pred_file = os.path.join(eval_output_dir, prefix, "submission.csv")

        all_guids = all_guids.tolist()
        all_probs = all_probs.tolist()
        submission = pd.DataFrame()
        submission['sentenceID'] = all_guids
        submission['pred_prob'] = all_probs
        submission.to_csv(output_pred_file, index=False)
        # submission = pd.read_csv("submissions/sample_submission.csv")
        # submission = pd.read_csv("submissions/train_sample_submission.csv")
        # submission['pred_label'] = preds.reshape(len(preds), 1)
        # submission.to_csv(output_pred_file, index=False)
        
    with open(output_eval_file, "w") as writer:
        logger.info("***** Eval results {} *****".format(prefix))
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))

    return results
예제 #22
0
def main(args):
    global HALT_filename, CHECKPOINT_tempfile

    # Create model directory & other aux folders for logging
    where_to_save = os.path.join(args.save_dir, args.dataset, args.model_name,
                                 args.image_model, args.experiment_name)
    checkpoints_dir = os.path.join(where_to_save, 'checkpoints')
    suffix = '_'.join([args.dataset, args.model_name, str(args.seed)])
    checkpoint_filename = os.path.join(checkpoints_dir,
                                       '_'.join([suffix, 'checkpoint']))
    print(checkpoint_filename)
    logs_dir = os.path.join(where_to_save, 'logs')
    tb_logs = os.path.join(where_to_save, 'tb_logs', args.dataset,
                           args.model_name + '_' + str(args.seed))
    make_dir(where_to_save)
    make_dir(logs_dir)
    make_dir(checkpoints_dir)
    make_dir(tb_logs)

    # Create loggers
    # stdout logger
    stdout_logger = logging.getLogger('STDOUT')
    stdout_logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(threadName)s - %(levelname)s: %(message)s')
    fh_out = logging.FileHandler(
        os.path.join(logs_dir, 'train_{}.log'.format(suffix)))
    fh_out.setFormatter(formatter)
    stdout_logger.addHandler(fh_out)
    ch = logging.StreamHandler(stream=sys.stdout)
    ch.setFormatter(formatter)
    stdout_logger.addHandler(ch)
    # stderr logger
    stderr_logger = logging.getLogger('STDERR')
    fh_err = logging.FileHandler(os.path.join(logs_dir,
                                              'train_{}.err'.format(suffix)),
                                 mode='w')
    fh_err.setFormatter(formatter)
    stderr_logger.addHandler(fh_err)
    sl_stderr = StreamToLogger(stderr_logger, logging.ERROR)
    sys.stderr = sl_stderr

    # HALT file is used as a sign of job completion.
    # Check if no HALT file left from previous runs.
    HALT_filename = os.path.join(where_to_save, 'HALT_{}'.format(suffix))
    if os.path.isfile(HALT_filename):
        os.remove(HALT_filename)

    # Remove CHECKPOINT_tempfile
    CHECKPOINT_tempfile = checkpoint_filename + '.tmp.ckpt'
    if os.path.isfile(CHECKPOINT_tempfile):
        os.remove(CHECKPOINT_tempfile)

    # Create tensorboard visualizer
    if args.tensorboard:
        logger = Visualizer(tb_logs, name='visual_results', resume=args.resume)

    # Check if we want to resume from last checkpoint of current model
    checkpoint = None
    if args.resume:
        if os.path.isfile(checkpoint_filename + '.ckpt'):
            checkpoint = torch.load(checkpoint_filename + '.ckpt',
                                    map_location=map_loc)
            num_epochs = args.num_epochs
            args = checkpoint['args']
            args.num_epochs = num_epochs

    # Build data loader
    data_loaders = {}
    datasets = {}
    for split in ['train', 'val']:

        transforms_list = [transforms.Resize(args.image_size)]

        # Image pre-processing
        if split == 'train':
            transforms_list.append(transforms.RandomHorizontalFlip())
            transforms_list.append(
                transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)))
            transforms_list.append(transforms.RandomCrop(args.crop_size))

        else:
            transforms_list.append(transforms.CenterCrop(args.crop_size))
        transforms_list.append(transforms.ToTensor())
        transforms_list.append(
            transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
        transform = transforms.Compose(transforms_list)

        # Load dataset path
        datapaths = json.load(open('../configs/datapaths.json'))
        dataset_root = datapaths[args.dataset]
        data_loaders[split], datasets[split] = get_loader(
            dataset=args.dataset,
            dataset_root=dataset_root,
            split=split,
            transform=transform,
            batch_size=args.batch_size,
            include_eos=(args.decoder != 'ff'),
            shuffle=(split == 'train'),
            num_workers=args.num_workers,
            drop_last=(split == 'train'),
            shuffle_labels=args.shuffle_labels,
            seed=args.seed,
            checkpoint=checkpoint)
        stdout_logger.info('Dataset {} split contains {} images'.format(
            split, len(datasets[split])))

    vocab_size = len(datasets[split].get_vocab())
    stdout_logger.info('Vocabulary size is {}'.format(vocab_size))

    # Build the model
    model = get_model(args, vocab_size)

    # add model parameters
    if model.image_encoder.last_module is not None:
        params = list(model.decoder.parameters()) + list(
            model.image_encoder.last_module.parameters())
    else:
        params = list(model.decoder.parameters())
    params_cnn = list(model.image_encoder.pretrained_net.parameters())

    n_p_cnn = sum(p.numel() for p in params_cnn if p.requires_grad)
    n_p = sum(p.numel() for p in params if p.requires_grad)
    total = n_p + n_p_cnn
    stdout_logger.info("CNN params: {}".format(n_p_cnn))
    stdout_logger.info("decoder params: {}".format(n_p))
    stdout_logger.info("total params: {}".format(total))

    # encoder and decoder optimizers
    if params_cnn is not None and args.finetune_after == 0:
        optimizer = torch.optim.Adam(
            [{
                'params': params
            }, {
                'params': params_cnn,
                'lr': args.learning_rate * args.scale_learning_rate_cnn
            }],
            lr=args.learning_rate,
            weight_decay=args.weight_decay)
        keep_cnn_gradients = True
        stdout_logger.info("Fine tuning image encoder")
    else:
        optimizer = torch.optim.Adam(params, lr=args.learning_rate)
        keep_cnn_gradients = False
        stdout_logger.info("Freezing image encoder")

    # early stopping and checkpoint
    es_best = {'o_f1': 0, 'c_f1': 0, 'i_f1': 0, 'average': 0}
    epoch_best = {'o_f1': -1, 'c_f1': -1, 'i_f1': -1, 'average': -1}
    if checkpoint is not None:
        optimizer.load_state_dict(checkpoint['optimizer'])
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(device)
        model.load_state_dict(checkpoint['state_dict'])
        es_best = checkpoint['es_best']
        epoch_best = checkpoint['epoch_best']

    if device != 'cpu' and torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    model = model.to(device)
    cudnn.benchmark = True

    if not hasattr(args, 'current_epoch'):
        args.current_epoch = 0

    # Train the model
    decay_factor = 1.0
    start_step = 0 if checkpoint is None else checkpoint['current_step']
    curr_pat = 0 if checkpoint is None else checkpoint['current_pat']
    for epoch in range(args.current_epoch, args.num_epochs):

        # save current epoch for resuming
        if args.tensorboard:
            logger.reset()

        # increase / decrease values for moving params
        if args.decay_lr:
            frac = epoch // args.lr_decay_every
            decay_factor = args.lr_decay_rate**frac
            new_lr = args.learning_rate * decay_factor
            stdout_logger.info('Epoch %d. lr: %.5f' % (epoch, new_lr))
            set_lr(optimizer, decay_factor)

        if args.finetune_after != -1 and args.finetune_after < epoch \
                and not keep_cnn_gradients and params_cnn is not None:

            stdout_logger.info("Starting to fine tune CNN")
            # start with learning rates as they were (if decayed during training)
            optimizer = torch.optim.Adam([{
                'params': params
            }, {
                'params':
                params_cnn,
                'lr':
                decay_factor * args.learning_rate *
                args.scale_learning_rate_cnn
            }],
                                         lr=decay_factor * args.learning_rate)
            keep_cnn_gradients = True

        for split in ['train', 'val']:

            if split == 'train':
                model.train()
            else:
                model.eval()
            total_step = len(data_loaders[split])
            loader = iter(data_loaders[split])

            total_loss_dict = {
                'label_loss': [],
                'eos_loss': [],
                'cardinality_loss': [],
                'loss': [],
                'o_f1': [],
                'c_f1': [],
                'i_f1': [],
            }

            torch.cuda.synchronize()
            start = time.time()

            overall_error_counts = {
                'tp_c': 0,
                'fp_c': 0,
                'fn_c': 0,
                'tn_c': 0,
                'tp_all': 0,
                'fp_all': 0,
                'fn_all': 0
            }

            i = 0 if split == 'val' else start_step
            for info in loader:
                img_inputs, gt = info

                # adapt gts by adding pad_value to match maxnumlabel length
                gt = [
                    sublist + [vocab_size - 1] *
                    (args.maxnumlabels - len(sublist)) for sublist in gt
                ]
                gt = torch.LongTensor(gt)

                # move to device
                img_inputs = img_inputs.to(device)
                gt = gt.to(device)

                loss_dict = {}

                if split == 'val':
                    with torch.no_grad():
                        # get losses and label predictions
                        _, predictions = model(img_inputs,
                                               maxnumlabels=args.maxnumlabels,
                                               compute_losses=False,
                                               compute_predictions=True)

                        # convert model predictions and targets to k-hots
                        pred_k_hots = label2_k_hots(
                            predictions,
                            vocab_size - 1,
                            remove_eos=(args.decoder != 'ff'))
                        target_k_hots = label2_k_hots(
                            gt,
                            vocab_size - 1,
                            remove_eos=(args.decoder != 'ff'))

                        # update overall and per class error types
                        update_error_counts(overall_error_counts, pred_k_hots,
                                            target_k_hots)

                        # update per image error types
                        i_f1s = []
                        for i in range(pred_k_hots.size(0)):
                            image_error_counts = {
                                'tp_c': 0,
                                'fp_c': 0,
                                'fn_c': 0,
                                'tn_c': 0,
                                'tp_all': 0,
                                'fp_all': 0,
                                'fn_all': 0
                            }
                            update_error_counts(image_error_counts,
                                                pred_k_hots[i].unsqueeze(0),
                                                target_k_hots[i].unsqueeze(0))
                            image_metrics = compute_metrics(
                                image_error_counts, which_metrics=['f1'])
                            i_f1s.append(image_metrics['f1'])

                        loss_dict['i_f1'] = np.mean(i_f1s)
                        del predictions, pred_k_hots, target_k_hots, image_metrics

                else:
                    losses, _ = model(img_inputs,
                                      gt,
                                      maxnumlabels=args.maxnumlabels,
                                      keep_cnn_gradients=keep_cnn_gradients,
                                      compute_losses=True)

                    # label loss
                    label_loss = losses['label_loss']
                    label_loss = label_loss.mean()
                    loss_dict['label_loss'] = label_loss.item()

                    # cardinality loss
                    if args.pred_cardinality != 'none':
                        cardinality_loss = losses['cardinality_loss']
                        cardinality_loss = cardinality_loss.mean()
                        loss_dict['cardinality_loss'] = cardinality_loss.item()
                    else:
                        cardinality_loss = 0

                    # eos loss
                    if args.perminv:
                        eos_loss = losses['eos_loss']
                        eos_loss = eos_loss.mean()
                        loss_dict['eos_loss'] = eos_loss.item()
                    else:
                        eos_loss = 0

                    # total loss
                    loss = args.loss_weight[0] * label_loss \
                           + args.loss_weight[1]*cardinality_loss + \
                           args.loss_weight[2]*eos_loss
                    loss_dict['loss'] = loss.item()

                    # optimizer step
                    model.zero_grad()
                    loss.backward()
                    optimizer.step()

                    del loss, losses
                del img_inputs

                for key in loss_dict.keys():
                    total_loss_dict[key].append(loss_dict[key])

                # Print log info
                if args.log_step != -1 and i % args.log_step == 0:
                    elapsed_time = time.time() - start
                    lossesstr = ""
                    for k in total_loss_dict.keys():
                        if len(total_loss_dict[k]) == 0:
                            continue
                        this_one = "%s: %.4f" % (
                            k, np.mean(total_loss_dict[k][-args.log_step:]))
                        lossesstr += this_one + ', '
                    # this only displays nll loss on captions, the rest of losses will
                    # be in tensorboard logs
                    strtoprint = 'Split: %s, Epoch [%d/%d], Step [%d/%d], Losses: %sTime: %.4f' % (
                        split, epoch, args.num_epochs, i, total_step,
                        lossesstr, elapsed_time)
                    stdout_logger.info(strtoprint)
                    if args.tensorboard and split == 'train':
                        logger.scalar_summary(
                            mode=split + '_iter',
                            epoch=total_step * epoch + i,
                            **{
                                k: np.mean(v[-args.log_step:])
                                for k, v in total_loss_dict.items() if v
                            })

                    torch.cuda.synchronize()
                    start = time.time()

                i += 1

            if split == 'train':
                increase_loader_epoch(data_loaders['train'])
                start_step = 0

            if split == 'val':
                overal_metrics = compute_metrics(overall_error_counts,
                                                 ['f1', 'c_f1'],
                                                 weights=None)

                total_loss_dict['o_f1'] = overal_metrics['f1']
                total_loss_dict['c_f1'] = overal_metrics['c_f1']

                if args.tensorboard:
                    # 1. Log scalar values (scalar summary)
                    logger.scalar_summary(
                        mode=split,
                        epoch=epoch,
                        **{
                            k: np.mean(v)
                            for k, v in total_loss_dict.items() if v
                        })

        # early stopping
        metric_average = 0
        best_at_checkpoint_metric = False
        if args.metric_to_checkpoint != 'average':
            es_value = np.mean(total_loss_dict[args.metric_to_checkpoint])
            if es_value > es_best[args.metric_to_checkpoint]:
                es_best[args.metric_to_checkpoint] = es_value
                epoch_best[args.metric_to_checkpoint] = epoch
                best_at_checkpoint_metric = True
                save_checkpoint(
                    model, optimizer, args, es_best, epoch_best, 0, 0,
                    '{}.best.{}'.format(checkpoint_filename,
                                        args.metric_to_checkpoint))
        else:
            for metric in ['o_f1', 'c_f1', 'i_f1']:
                es_value = np.mean(total_loss_dict[metric])
                metric_average += es_value
            metric_average /= 3
            if metric_average > es_best['average']:
                es_best['average'] = metric_average
                epoch_best['average'] = epoch
                if 'average' == args.metric_to_checkpoint:
                    best_at_checkpoint_metric = True
                    save_checkpoint(
                        model, optimizer, args, es_best, epoch_best, 0, 0,
                        '{}.best.average'.format(checkpoint_filename))

        if best_at_checkpoint_metric:
            curr_pat = 0
        else:
            curr_pat += 1

        args.current_epoch = epoch + 1  # Save the epoch at which the model needs to start
        save_checkpoint(model, optimizer, args, es_best, epoch_best, 0,
                        curr_pat, checkpoint_filename)
        stdout_logger.info('Saved checkpoint for epoch {}.'.format(epoch))

        if curr_pat > args.patience:
            break

    # Mark job as finished
    f = open(HALT_filename, 'w')
    for metric in es_best.keys():
        f.write('{}:{}\n'.format(metric, es_best[metric]))
    f.close()

    if args.tensorboard:
        logger.close()
예제 #23
0
def evaluate_model(Net,
                   seeds,
                   mini_batch_size=100,
                   optimizer=optim.Adam,
                   criterion=nn.CrossEntropyLoss(),
                   n_epochs=40,
                   eta=1e-3,
                   lambda_l2=0,
                   alpha=0.5,
                   beta=0.5,
                   plot=True,
                   statistics=True,
                   rotate=False,
                   translate=False,
                   swap_channel=False,
                   GPU=False):
    """ 
    General : 10 rounds of network training / validation with statistics
         
         - Repeat the training/validation procedure 10 times for ten different seeds in seeds
             1) At every seed -> reinitializes a network and a dataset : train,validation and test 
             2) Weights initialization and data loading are using the seed 
             3) Record the train and validation accuracy and loss and can display their evolution curve
             4) Compute the statistics at the end of each training for performance evaluation
                 i)  Mean training accuracy for each seed -> value at the end of the last epoch
                 ii) Mean validation accuracy for each seed -> value at the end of the last epoch
                 iii) Mean test accuracy for each seed -> compute the accuracy on the test after each training
                 -> display a boxplot of the statistics if statistics is true and print the mean and standard deviation
     
     Input :
     
         - Net : A network dictionnary from the <Nets> class
         - seeds : a list of seed to iterate over for pseudo random number generator used in weight initialization and data loading
         -> mini_batch_size,optimizer, criterion, n_epochs, eta, lambda_2, alpha, beta see training.py
         - plot : if true plot the learning curve evolution over the epochs -> default true
         - statistics : if true display the boxplot of the train accuracies, validations and test and print the mean and standard deviation 
                        statistics
         -> rotate,translate and swap_channels -> data augmentation see loader.py 
     
     Output : 
     
         - train_result : A (10x4xn_epochs) tensor 
                             10 -> seed
                             4 -> train loss ,train accuracy, validation loss, validation accuracy
                             n_epochs -> evolution during training
         - test_losses : A tensor of shape (10,) containing the test loss at each seed
         - test_accuracies : A tensor of shape (10,) containing the test loss at each seed
         
    """

    # tensor initialization to store the metrics
    train_results = torch.empty(len(seeds), 4, n_epochs)
    test_losses = []
    test_accuracies = []

    for n, seed in enumerate(seeds):

        # set the pytorch seed
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

        # set the seed for random spliting of the dataset in training and validation
        random.seed(0)

        # load the dataset train,validation and test
        data = PairSetMNIST()
        train_data = Training_set(data)
        test_data = Test_set(data)
        train_data_split = Training_set_split(train_data, rotate, translate,
                                              swap_channel)
        validation_data = Validation_set(train_data)

        # construct the net type with default parameter
        if (Net['net_type'] == 'Net2c'):
            model = Net['net'](nb_hidden=Net['hidden_layers'],
                               dropout_prob=Net['drop_prob'])
        if (Net['net_type'] == 'LeNet_sharing'):
            model = Net['net'](nb_hidden=Net['hidden_layers'],
                               dropout_ws=Net['drop_prob_ws'],
                               dropout_comp=Net['drop_prob_comp'])
        if (Net['net_type'] == 'LeNet_sharing_aux'):
            # check if any data augmentation has been called
            # if none construct with tuned parameters without data augmentation
            # if yes construct with tuned parameters with data augmentation
            if (rotate == False and translate == False
                    and swap_channel == False):
                model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'],
                                   nbhidden_comp=Net['hidden_layers_comp'],
                                   drop_prob_aux=Net['drop_prob_aux'],
                                   drop_prob_comp=Net['drop_prob_comp'])
            else:
                Net['learning rate'] = Net['learning rate augm']
                model = Net['net'](nbhidden_aux=Net['hidden_layers_aux'],
                                   nbhidden_comp=Net['hidden_layers_comp'],
                                   drop_prob_aux=Net['drop_prob_aux_augm'],
                                   drop_prob_comp=Net['drop_prob_comp_augm'])
        if (Net['net_type'] == 'Google_Net'):
            model = Net['net'](channels_1x1=Net['channels_1x1'],
                               channels_3x3=Net['channels_3x3'],
                               channels_5x5=Net['channels_5x5'],
                               pool_channels=Net['pool_channels'],
                               nhidden=Net['hidden_layers'],
                               drop_prob_comp=Net['drop_prob_comp'],
                               drop_prob_aux=Net['drop_prob_aux'])

        if GPU and cuda.is_available():
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

        model = model.to(device)

        # train the model on the train set and validate at each epoch
        train_losses, train_acc, valid_losses, valid_acc = train_model(
            model, train_data_split, validation_data, device, mini_batch_size,
            optimizer, criterion, n_epochs, Net['learning rate'], lambda_l2,
            alpha, beta)
        # store the training and validation accuracies and losses during the training
        train_results[n, ] = torch.tensor(
            [train_losses, train_acc, valid_losses, valid_acc])
        # compute the loss and accuracy of the model on the test set
        test_loss, test_acc = compute_metrics(model, test_data, device)
        # store the test metrics in the list
        test_losses.append(test_loss)
        test_accuracies.append(test_acc)

        # learning curve
        if plot:
            learning_curve(train_losses, train_acc, valid_losses, valid_acc)

        print(
            'Seed {:d} | Test Loss: {:.4f} | Test Accuracy: {:.2f}%\n'.format(
                n, test_loss, test_acc))

    # store the train, validation and test accuracies in a tensor for the boxplot
    data = torch.stack([
        train_results[:, 1, (n_epochs - 1)], train_results[:, 3,
                                                           (n_epochs - 1)],
        torch.tensor(test_accuracies)
    ])
    data = data.view(1, 3, 10)
    # boxplot
    if statistics:
        Title = " Models accuracies"
        models = [Net['net_type']]
        boxplot(data, Title, models, True)

    return train_results, torch.tensor(test_losses), torch.tensor(
        test_accuracies)