示例#1
0
 def loss_fn(self, out, annot):
     tar_vector = Losses.get_tar_vector(annot)
     loss_loc = Losses.get_loc_error(out, tar_vector)
     loss_wh = Losses.get_w_h_error(out, tar_vector)
     loss_conf = Losses.get_confidence_error(out, tar_vector)
     loss_cls = Losses.get_class_error(out, tar_vector)
     return loss_loc, loss_wh, loss_conf, loss_cls
示例#2
0
def transfer(model, decoder, sess, args, vocab, data0, data1, out_path):
    batches, order0, order1 = get_batches(data0, data1, vocab.word2id,
                                          args.batch_size)

    data0_tsf, data1_tsf = [], []
    losses = Losses(len(batches))
    for batch in batches:
        ori, tsf = decoder.rewrite(batch)
        half = batch['size'] / 2
        data0_tsf += tsf[:half]
        data1_tsf += tsf[half:]

        loss, loss_g, loss_d, loss_d0, loss_d1 = sess.run(
            [
                model.loss, model.loss_g, model.loss_d, model.loss_d0,
                model.loss_d1
            ],
            feed_dict=feed_dictionary(model, batch, args.rho, args.gamma_min))
        losses.add(loss, loss_g, loss_d, loss_d0, loss_d1)

    n0, n1 = len(data0), len(data1)
    data0_tsf = reorder(order0, data0_tsf)[:n0]
    data1_tsf = reorder(order1, data1_tsf)[:n1]

    if out_path:
        write_sent(data0_tsf, out_path + '.0' + '.tsf')
        write_sent(data1_tsf, out_path + '.1' + '.tsf')

    return losses
示例#3
0
    def __init__(self,
                 model,
                 optimizer,
                 train_loader,
                 test_loader,
                 args,
                 epoch=-1,
                 global_step=0,
                 test_mode=False):

        if args.fp16:
            try:
                from apex import amp
                global amp
                amp = amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
                )

        self.model = model
        self.args = args
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.epoch = epoch
        self.module = model.module if hasattr(
            model, 'module') else model  # for data parallel
        self.masking_policies = [
            'random', 'seen_noun', 'seen_verb',
            'seen_combo_seen_noun_seen_verb', 'new_noun', 'new_verb',
            'new_combo_seen_noun_seen_verb', 'new_combo_new_noun_new_verb',
            'seen_combo_seen_noun_seen_verb_merge',
            'new_combo_seen_noun_seen_verb_merge',
            'new_combo_new_noun_new_verb_merge'
        ]
        if test_mode and not args.pointing:
            self.masker = TestMasker(annotation_root=args.annotation_root,
                                     masking_policy=args.test_masking_policy,
                                     tok=self.train_loader.dataset.tokenizer,
                                     p_mask_img=args.p_mask_img,
                                     p_mask_txt=args.p_mask_txt)
        else:
            self.masker = Masker(self.train_loader.dataset.tokenizer,
                                 **vars(args))
        self.losses = Losses(self.module.cfg, args, **vars(args))
        self.global_step = global_step
示例#4
0
def transfer(model, sess, args, vocab, data0, data1, out_path):
    batches, order0, order1 = get_batches(data0, data1, vocab.word2id,
                                          args.batch_size)

    data0_tsf, data1_tsf = [], []
    losses = Losses(len(batches))
    for batch in batches:
        ori, tsf, loss, loss_g, loss_d, loss_d0, loss_d1 = rewrite(
            model, sess, args, vocab, batch)
        half = batch['size'] / 2
        data0_tsf += tsf[:half]
        data1_tsf += tsf[half:]
        losses.add(loss, loss_g, loss_d, loss_d0, loss_d1)

    n0, n1 = len(data0), len(data1)
    data0_tsf = reorder(order0, data0_tsf)[:n0]
    data1_tsf = reorder(order1, data1_tsf)[:n1]

    if out_path:
        write_sent(data0_tsf, out_path + '.0' + '.tsf')
        write_sent(data1_tsf, out_path + '.1' + '.tsf')

    return losses
示例#5
0
class Trainer:
    """ Class implementing the trainer for the project """
    def __init__(self,
                 model,
                 optimizer,
                 train_loader,
                 test_loader,
                 args,
                 epoch=-1,
                 global_step=0,
                 test_mode=False):

        if args.fp16:
            try:
                from apex import amp
                global amp
                amp = amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
                )

        self.model = model
        self.args = args
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.epoch = epoch
        self.module = model.module if hasattr(
            model, 'module') else model  # for data parallel
        self.masking_policies = [
            'random', 'seen_noun', 'seen_verb',
            'seen_combo_seen_noun_seen_verb', 'new_noun', 'new_verb',
            'new_combo_seen_noun_seen_verb', 'new_combo_new_noun_new_verb',
            'seen_combo_seen_noun_seen_verb_merge',
            'new_combo_seen_noun_seen_verb_merge',
            'new_combo_new_noun_new_verb_merge'
        ]
        if test_mode and not args.pointing:
            self.masker = TestMasker(annotation_root=args.annotation_root,
                                     masking_policy=args.test_masking_policy,
                                     tok=self.train_loader.dataset.tokenizer,
                                     p_mask_img=args.p_mask_img,
                                     p_mask_txt=args.p_mask_txt)
        else:
            self.masker = Masker(self.train_loader.dataset.tokenizer,
                                 **vars(args))
        self.losses = Losses(self.module.cfg, args, **vars(args))
        self.global_step = global_step

    def train(self):

        best_eval = 0
        try:
            for epoch in trange(self.epoch + 1,
                                self.args.num_train_epochs,
                                desc='Training model'):
                if self.args.local_rank != -1:
                    self.train_loader.sampler.set_epoch(epoch)

                self.run_epoch(epoch)

                # Evaluate on validation set
                # The last one is the one that we take into account for the checkpoints
                val_score = self.run_epoch(epoch, train=False)
                # Remember best eval score and save checkpoint
                is_best = val_score > best_eval
                best_eval = max(val_score, best_eval)
                if self.args.local_rank <= 0 and not self.args.debug:
                    print('Saving checkpoint')
                    utils.save_checkpoint(self.model,
                                          self.optimizer,
                                          self.train_loader.dataset.tokenizer,
                                          is_best,
                                          epoch,
                                          self.args.checkpoint_dir,
                                          amp=amp,
                                          global_step=self.global_step,
                                          args=self.args)

        except KeyboardInterrupt:
            if self.args.local_rank <= 0:
                print(f'You decided to finish the training at epoch {epoch}')

    def run_epoch(self, epoch, train=True):
        """
        During the training loop, we find the following arrays:
        - text_mask_locs:
        Tensor of size B x T, T being the maximum of all the B T's. Each element contains a boolean tensor that contains
        True if the token at that position MUST be masked. This will depend on the `target_token_ids` and whether or not
        the token at position belongs to the target sequence. This masking means that the specific token will be
        predicted (true?) in all the text losses (language model, pointing, episodic), but will not necessarily be
        substituted by a [MASK] token, as this is random and sometimes it stays the same or is substuted by a random
        word.
        - text_no_mask_locs:
        Tensor of size B x T, each element containing a boolean tensor that contains True if in that position the token
        CANNOT be masked.
        - img_no_mask_locs similarly.
        """
        torch.cuda.synchronize()

        # Initialize meters
        avg_batch_time = utils.AverageMeter()
        avg_data_time = utils.AverageMeter()

        list_losses = ['total', 'lm', 'vm']
        list_losses.extend(['pointing'] if self.args.pointing else [])
        list_losses.extend(
            ['input_pointing'] if self.args.input_pointing else [])

        average_meters = defaultdict(lambda: utils.AverageMeter())

        if not train:
            avg_lm_top1 = utils.AverageMeter()
            avg_lm_top5 = utils.AverageMeter()
            avg_pointing_acc = utils.AverageMeter()
            avg_input_pointing_acc = utils.AverageMeter()

        # Switch to train mode
        if train:
            self.model.train()
        else:
            self.model.eval()

        end = time.time()

        with torch.set_grad_enabled(train), \
             tqdm(self.train_loader if train else self.test_loader,
                  desc=f'Training epoch {epoch}' if train else f'Validating {f"epoch {epoch}" if epoch else ""}',
                  disable=self.args.local_rank > 0) as t:
            for batch_idx, data in enumerate(t):
                # Measure data loading time
                avg_data_time.update(time.time() - end)

                # -------------- Organize inputs ------------- #

                img_no_mask_locs = None
                text_no_mask_locs = None
                text_mask_locs = None
                with torch.no_grad():
                    if self.args.pointing:
                        text_mask_locs, text_no_mask_locs = masker.gen_pointing_text_mask_locs(
                            data)

                imgs, vm_labels, neg_vm_labels = self.masker.mask_imgs(
                    data['imgs'].cuda(), no_mask_locs=img_no_mask_locs)

                # Note that this does not mask sep tokens
                text, lm_labels, input_pointing_labels = \
                    self.masker.mask_text(data['text'].cuda(), self.args.input_pointing, no_mask_locs=text_no_mask_locs,
                                          mask_locs=text_mask_locs, **data)
                img_bboxes = data['img_bboxes'].cuda()
                imgs_len = data['imgs_len'].cuda()
                text_len = data['text_len'].cuda()

                img_locs = txt_locs = None

                if self.args.pointing:
                    attn_mask, img_locs, txt_locs = masker.attn_mask_pointing(
                        imgs_len, text_len, data['seq_type'],
                        data['num_seqs'].cuda(), self.args.attn_masking)
                    # The input to the model is:
                    # imgs = [[img0, img1, ..., imgN1, PAD, ..., PAD], [...], [[img0, img1, ..., imgNk, PAD, ..., PAD]]]
                    # where the padding is such that all K in the batch have the same total lenght (minimal padding)
                    # The N images include all the images from all the sequences, concatenated. Only padding at the end

                else:
                    img_attn_mask = \
                        torch.arange(self.args.max_img_seq_len, device=imgs.device)[None, :] < imgs_len[:, None]
                    text_attn_mask = \
                        torch.arange(self.args.max_txt_seq_len, device=imgs.device)[None, :] < text_len[:, None]
                    attn_mask = torch.cat(
                        (text_attn_mask[:, :1], img_attn_mask,
                         text_attn_mask[:, 1:]),
                        dim=1)

                # text starts with [IMG] token that gets moved to beginning of input in forward pass

                # -------------- Forward pass ---------------- #

                lm_preds, vm_preds, input_pointing_pred, hidden_states, *_ = \
                    self.model(imgs, text, img_bboxes, attention_mask=attn_mask, img_lens=imgs_len,
                               txt_lens=text_len, img_locs=img_locs, txt_locs=txt_locs)

                # -------------- Compute losses -------------- #

                loss_values = {}
                if self.args.pointing:
                    non_padding_text = (torch.arange(
                        text.shape[1], device=text.device)[None, :] <
                                        text_len.cumsum(dim=1)[:, -1][:, None])
                    non_padding_imgs = (torch.arange(
                        imgs.shape[1], device=imgs.device)[None, :] <
                                        imgs_len.cumsum(dim=1)[:, -1][:, None])
                    loss_values['lm'] = self.losses.lm_loss(
                        lm_preds, lm_labels[non_padding_text])
                    loss_values['vm'] = self.losses.vm_loss(
                        vm_preds,
                        vm_labels[non_padding_imgs],
                        neg_vm_labels[non_padding_imgs],
                        embedder=self.module.embeddings.img_embeddings)
                else:
                    loss_values['lm'] = self.losses.lm_loss(
                        lm_preds, lm_labels)
                    loss_values['vm'] = self.losses.vm_loss(
                        vm_preds,
                        vm_labels,
                        neg_vm_labels,
                        embedder=self.module.embeddings.img_embeddings)
                loss = self.args.lm_loss_lambda * loss_values[
                    'lm'] + self.args.vm_loss_lambda * loss_values['vm']

                if self.args.pointing:
                    pointing_loss, (pointing_acc, pointing_cnt) = \
                        self.losses.pointing_loss(data, hidden_states, lm_labels, text, text_len, txt_locs)
                    loss_values['pointing'] = pointing_loss
                    loss += self.args.pointing_loss_lambda * loss_values[
                        'pointing']

                if self.args.input_pointing:
                    input_pointing_loss, (input_pointing_acc, input_pointing_cnt), *_ = \
                        self.losses.input_pointing_pointing_loss(
                            input_pointing_pred[0], input_pointing_pred[1],
                            input_pointing_labels, txt_locs, lm_labels,
                            data=data, log=True)
                    loss_values['input_pointing'] = input_pointing_loss
                    loss += self.args.input_pointing_loss_lambda * loss_values[
                        'input_pointing']

                if self.args.n_gpu > 1:
                    loss = loss.mean()
                loss_values['total'] = loss

                # --------------- Update model -------------- #

                if train:
                    if self.args.fp16:
                        with amp.scale_loss(loss,
                                            self.optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        (loss /
                         self.args.gradient_accumulation_steps).backward()

                if (batch_idx +
                        1) % self.args.gradient_accumulation_steps == 0:
                    for loss_name in list_losses:  # Record losses
                        average_meters[loss_name].update(
                            loss_values[loss_name].item() /
                            self.args.gradient_accumulation_steps,
                            imgs.size(0))

                    if train:
                        if self.args.fp16:
                            torch.nn.utils.clip_grad_norm_(
                                amp.master_params(self.optimizer),
                                self.args.max_grad_norm)
                        else:
                            torch.nn.utils.clip_grad_norm_(
                                self.model.parameters(),
                                self.args.max_grad_norm)
                        self.optimizer.step()
                        # scheduler.step()  # no scheduler for now
                        self.model.zero_grad()

                # Measure elapsed time
                avg_batch_time.update(time.time() - end)
                end = time.time()

                # ------------- Show information ------------ #

                postfix_kwargs = {}

                if not train:
                    if self.args.pointing:
                        lm_labels = lm_labels[non_padding_text]
                        avg_pointing_acc.update(pointing_acc, pointing_cnt)
                        postfix_kwargs['PointingAcc'] = avg_pointing_acc.avg
                        if self.args.input_pointing:
                            avg_input_pointing_acc.update(
                                input_pointing_acc, input_pointing_cnt)
                            postfix_kwargs[
                                'input_pointingAcc'] = avg_input_pointing_acc.avg
                    results = tests.accuracy(lm_preds, lm_labels, topk=(1, 5))
                    avg_lm_top1.update(*results['top1'])
                    avg_lm_top5.update(*results['top5'])
                    postfix_kwargs['LMTop1'] = avg_lm_top1.avg
                    postfix_kwargs['LMTop5'] = avg_lm_top5.avg

                for loss_name in list_losses:
                    postfix_kwargs[loss_name] = average_meters[loss_name].avg

                t.set_postfix(DataTime=avg_data_time.avg,
                              BatchTime=avg_batch_time.avg,
                              **postfix_kwargs)

                if train:
                    if self.global_step % self.args.print_freq == 0 and self.args.writer and not self.args.debug:
                        self.args.writer.add_scalars(
                            'train/loss', {**postfix_kwargs},
                            self.global_step * self.args.train_batch_size *
                            self.args.step_n_gpus)

                self.global_step += 1

        if not train:
            cnt = average_meters['total'].count

            if epoch is not None:
                loss_scalars = {}
                for loss_name in list_losses:
                    loss_scalars[loss_name] = utils.gather_score(
                        average_meters[loss_name].avg, cnt)

                acc_scalars = {
                    'lm_top1': utils.gather_score(avg_lm_top1.avg, cnt),
                    'lm_top5': utils.gather_score(avg_lm_top5.avg, cnt)
                }
                if self.args.pointing:
                    acc_scalars['pointing_acc'] = utils.gather_score(
                        avg_pointing_acc.avg, cnt)
                if self.args.input_pointing:
                    acc_scalars['input_pointing_acc'] = utils.gather_score(
                        avg_input_pointing_acc.avg, cnt)
                if self.args.writer and not self.args.debug:
                    self.args.writer.add_scalars('val/loss', loss_scalars,
                                                 epoch)
                    self.args.writer.add_scalars('val/acc', acc_scalars, epoch)

            return utils.gather_score(avg_lm_top5.avg, cnt)

    def test(self, masking_policy=None):
        torch.cuda.synchronize()
        if masking_policy == 'all_acc_tests':
            for p in self.masking_policies:
                self.test(p)
        else:
            tests.test_accuracy(self, masking_policy)
示例#6
0
    with tf.Session(config=config) as sess:
        model = create_model(sess, args, vocab)

        if args.beam > 1:
            decoder = beam_search.Decoder(sess, args, vocab, model)
        else:
            decoder = greedy_decoding.Decoder(sess, args, vocab, model)

        if args.train:
            batches, _, _ = get_batches(train0, train1, vocab.word2id,
                                        args.batch_size)
            random.shuffle(batches)

            start_time = time.time()
            step = 0
            losses = Losses(args.steps_per_checkpoint)
            best_dev = float('inf')
            learning_rate = args.learning_rate
            rho = args.rho
            gamma = args.gamma_init
            dropout = args.dropout_keep_prob

            for epoch in range(1, 1 + args.max_epochs):
                print '--------------------epoch %d--------------------' % epoch
                print 'learning_rate:', learning_rate, '  gamma:', gamma

                for batch in batches:
                    feed_dict = feed_dictionary(model, batch, rho, gamma,
                                                dropout, learning_rate)

                    loss_d0, _ = sess.run([model.loss_d0, model.optimizer_d0],
示例#7
0
some_target_for_loc_error = {
    (3, 3):
    [[0, [1, 1], [torch.Tensor([0.1]).cuda(),
                  torch.Tensor([0.1]).cuda()]]]
}
some_predictions_for_loc_error = torch.zeros((1, cell_num, cell_num, 30))
for y in range(cell_num):
    for x in range(cell_num):
        some_predictions_for_loc_error[0, y, x, :] = torch.Tensor([
            0.2, 0.2, 0, 0, 0, 0.2, 0.2, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1
        ])
unittest.TestCase().assertAlmostEqual(
    0.01,
    Losses.get_loc_error(some_predictions_for_loc_error,
                         some_target_for_loc_error).cpu().detach().numpy()[0],
    2)

some_targets_class_prob_exist = {
    (3, 3): [[1, [0, 0], [torch.Tensor([0]).cuda(),
                          torch.Tensor([0]).cuda()]]]
}
some_predictions_class_prob_exist = torch.zeros((1, cell_num, cell_num, 30))
for y in range(cell_num):
    for x in range(cell_num):
        if y == 3 and x == 3:
            some_predictions_class_prob_exist[0, y, x, :] = torch.Tensor([
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0
            ])
        else:
示例#8
0
文件: train.py 项目: xuqy1981/ViP
def train(**args):
    """
    Evaluate selected model 
    Args:
        rerun        (Int):        Integer indicating number of repetitions for the select experiment 
        seed         (Int):        Integer indicating set seed for random state
        save_dir     (String):     Top level directory to generate results folder
        model        (String):     Name of selected model 
        dataset      (String):     Name of selected dataset  
        exp          (String):     Name of experiment 
        debug        (Int):        Debug state to avoid saving variables 
        load_type    (String):     Keyword indicator to evaluate the testing or validation set
        pretrained   (Int/String): Int/String indicating loading of random, pretrained or saved weights
        opt          (String):     Int/String indicating loading of random, pretrained or saved weights
        lr           (Float):      Learning rate 
        momentum     (Float):      Momentum in optimizer 
        weight_decay (Float):      Weight_decay value 
        final_shape  ([Int, Int]): Shape of data when passed into network
        
    Return:
        None
    """

    print(
        "\n############################################################################\n"
    )
    print("Experimental Setup: ", args)
    print(
        "\n############################################################################\n"
    )

    for total_iteration in range(args['rerun']):

        # Generate Results Directory
        d = datetime.datetime.today()
        date = d.strftime('%Y%m%d-%H%M%S')
        result_dir = os.path.join(
            args['save_dir'], args['model'], '_'.join(
                (args['dataset'], args['exp'], date)))
        log_dir = os.path.join(result_dir, 'logs')
        save_dir = os.path.join(result_dir, 'checkpoints')

        if not args['debug']:
            os.makedirs(result_dir, exist_ok=True)
            os.makedirs(log_dir, exist_ok=True)
            os.makedirs(save_dir, exist_ok=True)

            # Save copy of config file
            with open(os.path.join(result_dir, 'config.yaml'), 'w') as outfile:
                yaml.dump(args, outfile, default_flow_style=False)

            # Tensorboard Element
            writer = SummaryWriter(log_dir)

        # Check if GPU is available (CUDA)
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        # Load Network
        model = create_model_object(**args).to(device)

        # Load Data
        loader = data_loader(model_obj=model, **args)

        if args['load_type'] == 'train':
            train_loader = loader['train']
            valid_loader = loader[
                'train']  # Run accuracy on train data if only `train` selected

        elif args['load_type'] == 'train_val':
            train_loader = loader['train']
            valid_loader = loader['valid']

        else:
            sys.exit('Invalid environment selection for training, exiting')

        # END IF

        # Training Setup
        params = [p for p in model.parameters() if p.requires_grad]

        if args['opt'] == 'sgd':
            optimizer = optim.SGD(params,
                                  lr=args['lr'],
                                  momentum=args['momentum'],
                                  weight_decay=args['weight_decay'])

        elif args['opt'] == 'adam':
            optimizer = optim.Adam(params,
                                   lr=args['lr'],
                                   weight_decay=args['weight_decay'])

        else:
            sys.exit('Unsupported optimizer selected. Exiting')

        # END IF

        scheduler = MultiStepLR(optimizer,
                                milestones=args['milestones'],
                                gamma=args['gamma'])

        if isinstance(args['pretrained'], str):
            ckpt = load_checkpoint(args['pretrained'])
            model.load_state_dict(ckpt)
            start_epoch = load_checkpoint(args['pretrained'],
                                          key_name='epoch') + 1
            optimizer.load_state_dict(
                load_checkpoint(args['pretrained'], key_name='optimizer'))

            for quick_looper in range(start_epoch):
                scheduler.step()

            # END FOR

        else:
            start_epoch = 0

        # END IF

        model_loss = Losses(device=device, **args)
        acc_metric = Metrics(**args)
        best_val_acc = 0.0

        ############################################################################################################################################################################

        # Start: Training Loop
        for epoch in range(start_epoch, args['epoch']):
            running_loss = 0.0
            print('Epoch: ', epoch)

            # Setup Model To Train
            model.train()

            # Start: Epoch
            for step, data in enumerate(train_loader):
                if step % args['pseudo_batch_loop'] == 0:
                    loss = 0.0
                    optimizer.zero_grad()

                # END IF

                x_input = data['data'].to(device)
                annotations = data['annots']

                assert args['final_shape'] == list(x_input.size(
                )[-2:]), "Input to model does not match final_shape argument"
                outputs = model(x_input)
                loss = model_loss.loss(outputs, annotations)
                loss = loss * args['batch_size']
                loss.backward()

                running_loss += loss.item()

                if np.isnan(running_loss):
                    import pdb
                    pdb.set_trace()

                # END IF

                if not args['debug']:
                    # Add Learning Rate Element
                    for param_group in optimizer.param_groups:
                        writer.add_scalar(
                            args['dataset'] + '/' + args['model'] +
                            '/learning_rate', param_group['lr'],
                            epoch * len(train_loader) + step)

                    # END FOR

                    # Add Loss Element
                    writer.add_scalar(
                        args['dataset'] + '/' + args['model'] +
                        '/minibatch_loss',
                        loss.item() / args['batch_size'],
                        epoch * len(train_loader) + step)

                # END IF

                if ((epoch * len(train_loader) + step + 1) % 100 == 0):
                    print('Epoch: {}/{}, step: {}/{} | train loss: {:.4f}'.
                          format(
                              epoch, args['epoch'], step + 1,
                              len(train_loader), running_loss /
                              float(step + 1) / args['batch_size']))

                # END IF

                if (epoch * len(train_loader) +
                    (step + 1)) % args['pseudo_batch_loop'] == 0 and step > 0:
                    # Apply large mini-batch normalization
                    for param in model.parameters():
                        param.grad *= 1. / float(
                            args['pseudo_batch_loop'] * args['batch_size'])
                    optimizer.step()

                # END IF

            # END FOR: Epoch

            if not args['debug']:
                # Save Current Model
                save_path = os.path.join(
                    save_dir, args['dataset'] + '_epoch' + str(epoch) + '.pkl')
                save_checkpoint(epoch, step, model, optimizer, save_path)

            # END IF: Debug

            scheduler.step(epoch=epoch)
            print('Schedulers lr: %f', scheduler.get_lr()[0])

            ## START FOR: Validation Accuracy
            running_acc = []
            running_acc = valid(valid_loader, running_acc, model, device,
                                acc_metric)
            if not args['debug']:
                writer.add_scalar(
                    args['dataset'] + '/' + args['model'] +
                    '/validation_accuracy', 100. * running_acc[-1],
                    epoch * len(valid_loader) + step)
            print('Accuracy of the network on the validation set: %f %%\n' %
                  (100. * running_acc[-1]))

            # Save Best Validation Accuracy Model Separately
            if best_val_acc < running_acc[-1]:
                best_val_acc = running_acc[-1]

                if not args['debug']:
                    # Save Current Model
                    save_path = os.path.join(
                        save_dir, args['dataset'] + '_best_model.pkl')
                    save_checkpoint(epoch, step, model, optimizer, save_path)

                # END IF

            # END IF

        # END FOR: Training Loop

    ############################################################################################################################################################################

        if not args['debug']:
            # Close Tensorboard Element
            writer.close()
    def __init__(
            self,
            seed=None,
            optimizer=Adam,
            optimizer_kwargs={},
            learning_rate_init=0.04,
            gamma=0.995,  # learning rate decay factor
            considered_groups=list(
                range(12)),  ## group layers to be considered from start
            sample_variance_threshold=0.002,
            weight_loss_sample_variance=0,  # 10.
            evaluation_steps=250,  # number of batches between loss tracking
            N_batches_test=1,  # number of batches considered for evaluation
    ):
        super(ImageClassifier,
              self).__init__(considered_groups=considered_groups)
        if seed is not None:
            torch.manual_seed(seed)

        #'''
        resnet = models.resnet18(pretrained=False)
        self.conv = Sequential(
            *(list(resnet.children())[:-1]),
            Flatten(),
        )
        '''  architecture used by Dielemann et al 2015
        self.conv = Sequential(
#            Conv2dUntiedBias(41, 41, 3, 32, kernel_size=6),
            Conv2d(3,32, kernel_size=6),
            ReLU(),
            MaxPool2d(2),
#            Conv2dUntiedBias(16, 16, 32, 64, kernel_size=5),
            Conv2d(32, 64, kernel_size=5),
            ReLU(),
            MaxPool2d(2),
#            Conv2dUntiedBias(6, 6, 64, 128, kernel_size=3),
            Conv2d(64, 128, kernel_size=3),
            ReLU(),
#            Conv2dUntiedBias(4, 4, 128, 128, kernel_size=3), #weight_std=0.1),
            Conv2d(128, 128, kernel_size=3),
            ReLU(),
            MaxPool2d(2),
            Flatten(),
        )
        #'''
        self.dense1 = MaxOut(8192, 2048, bias=0.01)
        self.dense2 = MaxOut(2048, 2048, bias=0.01)
        self.dense3 = Sequential(
            MaxOut(2048, 37, bias=0.1),
            #            LeakyReLU(negative_slope=1e-7),
            ALReLU(negative_slope=1e-2),
        )
        self.dropout = Dropout(p=0.5)

        self.augment = Compose([
            Lambda(lambda img: torch.cat([img, hflip(img)], 0)),
            Lambda(lambda img: torch.cat([img, rotate(img, 45)], 0)),
            FiveCrop(45),
            Lambda(lambda crops: torch.cat([
                rotate(crop, ang)
                for crop, ang in zip(crops, (0, 90, 270, 180))
            ], 0)),
        ])
        self.N_augmentations = 16
        self.N_conv_outputs = 512

        self.set_optimizer(optimizer,
                           lr=learning_rate_init,
                           **optimizer_kwargs)
        #        self.scheduler = ExponentialLR(self.optimizer, gamma=gamma)
        self.scheduler = MultiStepLR(self.optimizer,
                                     milestones=[292, 373],
                                     gamma=gamma)

        self.make_labels_hierarchical = False  # if True, output probabilities are renormalized to fit the hierarchical label structure
        self.N_batches_test = N_batches_test
        self.evaluation_steps = evaluation_steps  # number of batches between loss tracking
        self.weight_loss_sample_variance = weight_loss_sample_variance
        self.sample_variance_threshold = sample_variance_threshold

        self.iteration = 0
        self.epoch = 0
        self.losses_train = Losses("loss", "train")
        self.losses_valid = Losses("loss", "valid")
        self.sample_variances_train = Losses("sample variance", "train")
        self.sample_variances_valid = Losses("sample variance", "valid")
        for g in range(1, 12):
            setattr(self, f"accuracies_Q{g}_train",
                    Accuracies("accuracy train", f"Q{g}"))
            setattr(self, f"accuracies_Q{g}_valid",
                    Accuracies("accuracy valid", f"Q{g}"))
        self.losses_regression = Losses("loss", "regression")
        self.losses_variance = Losses("loss", "sample variance")

        ## return to random seed
        if seed is not None:
            sd = np.random.random() * 10000
            torch.manual_seed(sd)
class ImageClassifier(ClassifierBase):
    """ model for morphological classification of galaxy images

    Usage
    -----

    to use pretrained model, do
    >>> classifier = ImageClassifier()
    >>> classifier.load()
    >>> classifier.eval()
    >>> classifier.use_label_hierarchy()
    >>> labels = classifier(images)

    """
    def __init__(
            self,
            seed=None,
            optimizer=Adam,
            optimizer_kwargs={},
            learning_rate_init=0.04,
            gamma=0.995,  # learning rate decay factor
            considered_groups=list(
                range(12)),  ## group layers to be considered from start
            sample_variance_threshold=0.002,
            weight_loss_sample_variance=0,  # 10.
            evaluation_steps=250,  # number of batches between loss tracking
            N_batches_test=1,  # number of batches considered for evaluation
    ):
        super(ImageClassifier,
              self).__init__(considered_groups=considered_groups)
        if seed is not None:
            torch.manual_seed(seed)

        #'''
        resnet = models.resnet18(pretrained=False)
        self.conv = Sequential(
            *(list(resnet.children())[:-1]),
            Flatten(),
        )
        '''  architecture used by Dielemann et al 2015
        self.conv = Sequential(
#            Conv2dUntiedBias(41, 41, 3, 32, kernel_size=6),
            Conv2d(3,32, kernel_size=6),
            ReLU(),
            MaxPool2d(2),
#            Conv2dUntiedBias(16, 16, 32, 64, kernel_size=5),
            Conv2d(32, 64, kernel_size=5),
            ReLU(),
            MaxPool2d(2),
#            Conv2dUntiedBias(6, 6, 64, 128, kernel_size=3),
            Conv2d(64, 128, kernel_size=3),
            ReLU(),
#            Conv2dUntiedBias(4, 4, 128, 128, kernel_size=3), #weight_std=0.1),
            Conv2d(128, 128, kernel_size=3),
            ReLU(),
            MaxPool2d(2),
            Flatten(),
        )
        #'''
        self.dense1 = MaxOut(8192, 2048, bias=0.01)
        self.dense2 = MaxOut(2048, 2048, bias=0.01)
        self.dense3 = Sequential(
            MaxOut(2048, 37, bias=0.1),
            #            LeakyReLU(negative_slope=1e-7),
            ALReLU(negative_slope=1e-2),
        )
        self.dropout = Dropout(p=0.5)

        self.augment = Compose([
            Lambda(lambda img: torch.cat([img, hflip(img)], 0)),
            Lambda(lambda img: torch.cat([img, rotate(img, 45)], 0)),
            FiveCrop(45),
            Lambda(lambda crops: torch.cat([
                rotate(crop, ang)
                for crop, ang in zip(crops, (0, 90, 270, 180))
            ], 0)),
        ])
        self.N_augmentations = 16
        self.N_conv_outputs = 512

        self.set_optimizer(optimizer,
                           lr=learning_rate_init,
                           **optimizer_kwargs)
        #        self.scheduler = ExponentialLR(self.optimizer, gamma=gamma)
        self.scheduler = MultiStepLR(self.optimizer,
                                     milestones=[292, 373],
                                     gamma=gamma)

        self.make_labels_hierarchical = False  # if True, output probabilities are renormalized to fit the hierarchical label structure
        self.N_batches_test = N_batches_test
        self.evaluation_steps = evaluation_steps  # number of batches between loss tracking
        self.weight_loss_sample_variance = weight_loss_sample_variance
        self.sample_variance_threshold = sample_variance_threshold

        self.iteration = 0
        self.epoch = 0
        self.losses_train = Losses("loss", "train")
        self.losses_valid = Losses("loss", "valid")
        self.sample_variances_train = Losses("sample variance", "train")
        self.sample_variances_valid = Losses("sample variance", "valid")
        for g in range(1, 12):
            setattr(self, f"accuracies_Q{g}_train",
                    Accuracies("accuracy train", f"Q{g}"))
            setattr(self, f"accuracies_Q{g}_valid",
                    Accuracies("accuracy valid", f"Q{g}"))
        self.losses_regression = Losses("loss", "regression")
        self.losses_variance = Losses("loss", "sample variance")

        ## return to random seed
        if seed is not None:
            sd = np.random.random() * 10000
            torch.manual_seed(sd)

    def update_optimizer(self, **kwargs) -> None:
        self.set_optimizer(optimizer, **kwargs)

    def update_optimizer_learningrate(self, learning_rate) -> None:
        print("update lr", learning_rate)
        for i in range(len(self.optimizer.param_groups)):
            self.optimizer.param_groups[i]['lr'] = learning_rate

    def use_label_hierarchy(self) -> None:
        self.make_labels_hierarchical = True

    def forward(self, x: torch.Tensor, train=False) -> torch.Tensor:
        x = self.augment(x)
        x = self.conv(x)

        x = self.recombine_augmentation(x)

        x = self.dropout(x)
        x = self.dense1(x)
        x = self.dropout(x)
        x = self.dense2(x)
        x = self.dropout(x)
        x = self.dense3(x)
        #        x += 1e-4  ## use only with LeakyReLU to prevent values < 0
        if self.make_labels_hierarchical:
            x = make_galaxy_labels_hierarchical(x)
        return x

    def recombine_augmentation(self, x) -> torch.Tensor:
        """ recombine results of augmented views to single vector """
        batch_size = x.size(0) // self.N_augmentations
        x = x.reshape(self.N_augmentations, batch_size, self.N_conv_outputs)
        x = x.permute(1, 0, 2)
        x = x.reshape(batch_size, self.N_augmentations * self.N_conv_outputs)
        return x

    def train_step(self, images: torch.tensor, labels: torch.tensor) -> float:
        self.train()
        labels_pred = self.forward(images, train=True)
        loss_regression = mse(labels_pred[:, self.considered_label_indices],
                              labels[:, self.considered_label_indices])
        loss_variance = self.weight_loss_sample_variance * \
            loss_sample_variance(labels_pred[:,self.considered_label_indices],
                                 threshold=self.sample_variance_threshold)
        loss = loss_regression + loss_variance
        self.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1)
        self.optimizer.step()
        self.iteration += 1
        return loss.item()

    def train_epoch(
        self,
        data_loader_train: torch.utils.data.DataLoader,
        data_loader_valid: torch.utils.data.DataLoader,
        track: bool = False,
    ) -> None:
        for images, labels in tqdm(data_loader_train,
                                   desc=f"epoch {self.epoch}"):
            images = images.to(device)
            labels = labels.to(device)
            loss = self.train_step(images, labels)
            if np.isnan(loss):
                from pdb import set_trace
                set_trace()
                loss = self.train_step(images, labels)
                raise Exception("loss is NaN")
            if not self.iteration % self.evaluation_steps - 1:
                loss_regression_train, loss_variance_train, accs_train, variance_train = self.evaluate_batch(
                    images, labels, print_labels=False)
                loss_train = loss_regression_train + loss_variance_train * self.weight_loss_sample_variance
                self.losses_regression.append(self.iteration,
                                              loss_regression_train)
                self.losses_variance.append(self.iteration,
                                            loss_variance_train)
                self.losses_train.append(self.iteration, loss_train)
                self.sample_variances_train.append(self.iteration,
                                                   variance_train)
                for group, acc in accs_train.items():
                    getattr(self, f"accuracies_Q{group}_train").append(
                        self.iteration, acc)
                for images, labels in data_loader_valid:
                    images = images.to(device)
                    labels = labels.to(device)
                    break
                loss_regression_valid, loss_variance_valid, accs_valid, variance_valid = self.evaluate_batch(
                    images, labels)
                loss_valid = loss_regression_valid + loss_variance_valid * self.weight_loss_sample_variance
                self.losses_valid.append(self.iteration, loss_valid)
                self.sample_variances_valid.append(self.iteration,
                                                   variance_valid)
                for group, acc in accs_valid.items():
                    getattr(self, f"accuracies_Q{group}_valid").append(
                        self.iteration, acc)
                if track:
                    import wandb
                    logs = {
                        "loss_regression_train": loss_regression_train,
                        "loss_variance_train": loss_variance_train,
                        "loss_train": loss_train,
                        "variance_train": variance_train,
                        "loss_regression_valid": loss_regression_valid,
                        "loss_variance_valid": loss_variance_valid,
                        "loss_valid": loss_valid,
                        "variance_valid": variance_valid,
                    }
                    logs.update({
                        f"accuracy_Q{group}_train": acc
                        for group, acc in accs_train.items()
                    })
                    logs.update({
                        f"accuracy_Q{group}_valid": acc
                        for group, acc in accs_valid.items()
                    })
                    wandb.log(logs)

        self.epoch += 1
        self.scheduler.step()
        self.save()

    def predict(self, images: torch.tensor) -> torch.Tensor:
        self.eval()
        return self(images)

    def evaluate_batches(self,
                         data_loader: torch.utils.data.DataLoader) -> list:
        with torch.no_grad():
            loss = 0
            accs = Counter({group: 0 for group in range(1, 12)})
            variance = 0
            for N_test, (images, labels) in enumerate(data_loader):
                images = images.to(device)
                labels = labels.to(device)
                if N_test >= self.N_batches_test:
                    break
                loss_, accs_, variance_ = self.evaluate_batch(images, labels)
                loss += loss_
                accs.update(accs_)
                variance += variance_
            loss /= N_test + 1
            variance /= N_test + 1
            for group in accs.keys():
                accs[group] /= N_test + 1
        return loss, accs, variance

    def evaluate_batch(self,
                       images: torch.tensor,
                       labels: torch.tensor,
                       print_labels=False) -> tuple:
        """ evaluations for batch """
        self.eval()
        with torch.no_grad():
            labels_pred = self.forward(images)
            if print_labels:
                for i, (prediction,
                        target) in enumerate(zip(labels_pred, labels)):
                    print(
                        "target\t\t",
                        np.around(target[self.considered_label_indices].cpu(),
                                  3))
                    print("\033[1mprediction\t",
                          np.around(
                              prediction[self.considered_label_indices].cpu(),
                              3),
                          end="\033[0m\n")
                    if i >= 2:
                        break
                print(
                    "<target>\t",
                    np.around(
                        torch.mean(labels[:, self.considered_label_indices],
                                   dim=0).cpu(), 3))
                print(
                    "<target>\t",
                    np.around(
                        torch.std(labels[:, self.considered_label_indices],
                                  dim=0).cpu(), 3))
                print("\033[1m<prediction>\t",
                      np.around(
                          torch.mean(
                              labels_pred[:, self.considered_label_indices],
                              dim=0).cpu(), 3),
                      end="\033[0m\n")
                print("\033[1m<prediction>\t",
                      np.around(
                          torch.std(labels_pred[:,
                                                self.considered_label_indices],
                                    dim=0).cpu(), 3),
                      end="\033[0m\n")
            loss_regression = torch.sqrt(
                mse(labels_pred[:, self.considered_label_indices],
                    labels[:, self.considered_label_indices])).item()
            loss_variance = self.weight_loss_sample_variance * \
                    loss_sample_variance(labels_pred[:,self.considered_label_indices],
                                         threshold=self.sample_variance_threshold
                                        ).item()
            accs = measure_accuracy_classifier(
                labels_pred,
                labels,
                considered_groups=self.considered_groups.considered_groups)
            variance = get_sample_variance(
                labels_pred[:, self.considered_label_indices]).item()
        return loss_regression, loss_variance, accs, variance

    def plot_losses(self, save=False):
        self.losses_train.plot()
        self.losses_valid.plot()
        self.losses_regression.plot(linestyle=":")
        self.losses_variance.plot(linestyle=":")
        if save:
            plt.savefig(folder_results + "loss.png")
            plt.close()
        else:
            plt.show()

    def plot_sample_variances(self, save=False):
        self.sample_variances_train.plot()
        self.sample_variances_valid.plot()
        if save:
            plt.savefig(folder_results + "variances.png")
            plt.close()
        else:
            plt.show()

    def plot_accuracy(self, save=False):
        for group in range(1, 12):
            if not group in self.considered_groups.considered_groups:
                continue
            getattr(self, f"accuracies_Q{group}_train").plot()
        if save:
            plt.savefig(folder_results + "accuracy_train.png")
            plt.close()
        else:
            plt.show()

    def plot_test_accuracy(self, save=False):
        for group in range(1, 12):
            if not group in self.considered_groups.considered_groups:
                continue
            getattr(self, f"accuracies_Q{group}_valid").plot()
        if save:
            plt.savefig(folder_results + "accuracy_valid.png")
            plt.close()
        else:
            plt.show()
示例#11
0
def unet_model_fn(features, labels, mode, params):
    tf.local_variables_initializer()
    loss, train_op, = None, None
    eval_metric_ops, training_hooks, evaluation_hooks = None, None, None
    predictions_dict = None
    unet = Unet(params=params)
    logits = unet.model(input_tensor=features['image'])
    y_pred = tf.math.softmax(logits, axis=-1)
    output_img = tf.expand_dims(tf.cast(tf.math.argmax(y_pred, axis=-1) * 255, dtype=tf.uint8), axis=-1)

    if mode in (estimator.ModeKeys.TRAIN, estimator.ModeKeys.EVAL):

        with tf.name_scope('Loss_Calculation'):
            loss = Losses(logits=logits, labels=labels['label'])
            loss = loss.custom_loss()

        with tf.name_scope('Dice_Score_Calculation'):
            dice = f1(labels=labels['label'], predictions=y_pred)

        with tf.name_scope('Images_{}'.format(mode)):
            with tf.name_scope('Reformat_Outputs'):
                label = tf.expand_dims(tf.cast(tf.argmax(labels['label'], -1) * 255, dtype=tf.uint8), axis=-1)
                image = tf.math.divide(features['image'] - tf.reduce_max(features['image'], [0, 1, 2]),
                                       tf.reduce_max(features['image'], [0, 1, 2]) - tf.reduce_min(features['image'],
                                                                                                   [0, 1, 2]))
            summary.image('1_Medical_Image', image, max_outputs=1)
            summary.image('2_Output', output_img, max_outputs=1)
            summary.image('3_Output_pred', tf.expand_dims(y_pred[:, :, :, 1], -1), max_outputs=1)
            summary.image('4_Output_label', label, max_outputs=1)

    if mode == estimator.ModeKeys.TRAIN:
        with tf.name_scope('Learning_Rate'):
            global_step = tf.compat.v1.train.get_or_create_global_step()
            learning_rate = tf.compat.v1.train.exponential_decay(params['lr'], global_step=global_step,
                                                                 decay_steps=params['decay_steps'],
                                                                 decay_rate=params['decay_rate'], staircase=False)
        with tf.name_scope('Optimizer_conf'):
            train_op = Adam(learning_rate=learning_rate).minimize(loss=loss, global_step=global_step)

        with tf.name_scope('Metrics'):
            summary.scalar('Output_DSC', dice[1])
            summary.scalar('Learning_Rate', learning_rate)

    if mode == estimator.ModeKeys.EVAL:
        eval_metric_ops = {'Metrics/Output_DSC': dice}
        eval_summary_hook = tf.estimator.SummarySaverHook(output_dir=params['eval_path'],
                                                          summary_op=summary.merge_all(),
                                                          save_steps=params['eval_steps'])
        evaluation_hooks = [eval_summary_hook]

    if mode == estimator.ModeKeys.PREDICT:
        predictions_dict = {'image': features['image'],
                            'y_preds': y_pred[:, :, :, 1],
                            'output_img': output_img,
                            'path': features['path']}

    return estimator.EstimatorSpec(mode,
                                   predictions=predictions_dict,
                                   loss=loss,
                                   train_op=train_op,
                                   eval_metric_ops=eval_metric_ops,
                                   training_hooks=training_hooks,
                                   evaluation_hooks=evaluation_hooks)