Пример #1
0
def decode(encoded, modeldict, wflen=MAXWFLEN):
    """

    :param encoded:
    :param modeldict:
    :param wflen:
    :return:
    """
    pchar = modeldict['cencoder'][BD]
    h = Variable(modeldict['ench0'], requires_grad=1)
    c = Variable(modeldict['encc0'], requires_grad=1)
    cdistrs = []
    chars = []
    for i in range(min(wflen + 1, MAXWFLEN)):
        pchar = Variable(LTYPE([pchar]), requires_grad=0)
        pemb = modeldict['cembedding'](pchar)
        _input = cat([pemb, encoded.view(1, 2 * LSTMDIM)], dim=1)
        _, states = modeldict['dec'](_input, (h, c))
        h, c = states
        c = c.view(1, LSTMDIM)
        cdistr = modeldict['sm'](modeldict['pred'](c))
        cdistrs.append(cdistr)
        _, pchar = tmax(cdistr, 1)
        pchar = int(pchar.data.numpy()[0])
        chars.append(pchar)
    return cat(cdistrs, dim=0), chars
Пример #2
0
def add_noise(image, noise, normalize):
    """
    Add noise to image
    """
    noise = randn(image.size()) * noise
    noisy_img = image + noise
    if normalize:
        noisy_img = noisy_img / tmax(noisy_img)
    return noisy_img
Пример #3
0
 def mergeWeights(self, weights):
     
     merged_w = []
     
     for _w in weights.transpose(0, 1):
         merged_w.append(tmax(_w))
     
     merged_w = from_numpy(np.array(merged_w))
     
     return merged_w  
Пример #4
0
def compute_nb_errors(model, data, targets):
    """Count the total of misclassified points."""
    nb_errors = 0
    output = model(data)
    predictions = tmax(output.data, 1).indices

    for pred, targ in zip(predictions, targets):
        assert pred in [0, 1] and targ in [0, 1]
        if pred != targ:
            nb_errors = nb_errors + 1
    return nb_errors
def validate(val_loader, encoder, decoder, criterion):
    """
    Performs one epoch's validation.
    :param val_loader: DataLoader for validation data.
    :param encoder: encoder model
    :param decoder: decoder model
    :param criterion: loss layer
    :return: BLEU-2 score
    """
    decoder.eval()  # eval mode (no dropout or batchnorm)
    if encoder is not None:
        encoder.eval()

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1accs = AverageMeter()
    top5accs = AverageMeter()
    top10accs = AverageMeter()
    top20accs = AverageMeter()

    start = time()

    references = list(
    )  # references (true captions) for calculating BLEU-2 score
    hypotheses = list()  # hypotheses (predictions)

    # Batches
    for i, (imgs, caps, caplens, allcaps) in enumerate(val_loader):

        # Move to device, if available
        imgs = imgs.to(device)
        caps = caps.to(device)
        caplens = caplens.to(device)

        # Forward prop.
        if encoder is not None:
            imgs = encoder(imgs)
        scores, caps_sorted, decode_lengths, alphas, sort_ind = decoder(
            imgs, caps, caplens)

        # Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
        targets = caps_sorted[:, 1:]

        # Remove timesteps that we didn't decode at, or are pads
        # pack_padded_sequence is an easy trick to do this
        scores_copy = scores.clone()
        scores, _ = pack_padded_sequence(scores,
                                         decode_lengths,
                                         batch_first=True)
        targets, _ = pack_padded_sequence(targets,
                                          decode_lengths,
                                          batch_first=True)

        # Calculate loss
        loss = criterion(scores, targets)

        # Add doubly stochastic attention regularization
        loss += alpha_c * ((1. - alphas.sum(dim=1))**2).mean()

        # Keep track of metrics
        losses.update(loss.item(), sum(decode_lengths))
        top1 = accuracy(scores, targets, 1)
        top1accs.update(top1, sum(decode_lengths))
        top5 = accuracy(scores, targets, 5)
        top5accs.update(top5, sum(decode_lengths))
        top10 = accuracy(scores, targets, 10)
        top10accs.update(top10, sum(decode_lengths))
        top20 = accuracy(scores, targets, 20)
        top20accs.update(top20, sum(decode_lengths))

        batch_time.update(time() - start)

        start = time()

        if i % print_freq == 0:
            print('Validation: [{0}/{1}]\t'
                  'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top-1 Accuracy {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})\t'
                  'Top-10 Accuracy {top10.val:.3f} ({top10.avg:.3f})\t'
                  'Top-20 Accuracy {top20.val:.3f} ({top20.avg:.3f})\t'.format(
                      i,
                      len(val_loader),
                      batch_time=batch_time,
                      loss=losses,
                      top1=top1accs,
                      top5=top5accs,
                      top10=top10accs,
                      top20=top20accs))

        # Store references (true captions), and hypothesis (prediction) for each image
        # If for n images, we have n hypotheses, and references a, b, c... for each image, we need -
        # references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...]

        # References
        allcaps = allcaps[sort_ind]
        for j in range(allcaps.shape[0]):
            img_caps = allcaps[j].tolist()
            img_captions = list(
                map(
                    lambda c: [
                        w for w in c
                        if w not in {word2index['<SOS>'], word2index['<pad>']}
                    ], img_caps))  # remove <start> and pads
            references.append(img_captions)
        # references = caps
        # Hypotheses
        _, preds = tmax(scores_copy, dim=2)
        preds = preds.tolist()
        temp_preds = list()
        for j, p in enumerate(preds):
            temp_preds.append(preds[j][:decode_lengths[j]])  # remove pads
        preds = temp_preds
        hypotheses.extend(preds)
        assert len(references) == len(hypotheses)

        # Calculate BLEU-2 scores
        bleu2 = corpus_bleu(references, hypotheses, weights=bleu_weights)

        print(
            '\n * LOSS - {loss.avg:.3f}, TOP-1 ACCURACY - {top1.avg:.3f}, TOP-5 ACCURACY - {top5.avg:.3f}, TOP-10 ACCURACY - {top10.avg:.3f}, TOP-20 ACCURACY - {top20.avg:.3f}, BLEU-2 - {bleu}\n'
            .format(loss=losses,
                    top1=top1accs,
                    top5=top5accs,
                    top10=top10accs,
                    top20=top20accs,
                    bleu=bleu2))
    print('    Total training time:', time() - start_time)
    return bleu2
Пример #6
0
def probs_to_category(probs):
    probability, index = tmax(probs, 1)
    category = categories[index.data[0][0]]
    return category, index.data[0][0]
Пример #7
0
        data = Variable(torch.from_numpy(data))
        labels = Variable(torch.from_numpy(labels))

        if use_gpu:
            data, labels = data.float().cuda(), labels.long().cuda()

        optimizer.zero_grad()

        #clear out the hidden state of the LSTM, detaching it from its history on the last instance.
        net.hidden = net.init_hidden(cfg.batchsize_train)

        #forward pass
        out = net(data)

        #compute accuracy
        _, predicted = tmax(out.data, 1)
        acc = float(
            sum(labels.cpu().data.numpy() == predicted.cpu().numpy()) /
            float(labels.size(0)))

        #backprop
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

        #save checkpoint
        if iter_total % cfg.train_checkpoint_freq == 0:
            save_checkpoint(cfg.checkpoint_path, epoch, iter_total,
                            net.state_dict(), optimizer.state_dict())

        #print result and save for tensorboard