Пример #1
0
 def init_hidden(self, batch_size):
     # NOTE: LSTM needs 2 hidden states
     hidden = [
         Variable(torch.zeros(self.n_layers, batch_size, self.d_inner_hid)),
         Variable(torch.zeros(self.n_layers, batch_size, self.d_inner_hid))
     ]
     hidden[0] = check_cuda(hidden[0], self.use_cuda)
     hidden[1] = check_cuda(hidden[1], self.use_cuda)
     return hidden
Пример #2
0
def get_batch_label(data, label, index, batch_size, testing=False):
    tensor = torch.from_numpy(data[index:index + batch_size]).type(
        torch.LongTensor)
    input_data = Variable(tensor, volatile=testing, requires_grad=False)
    input_data = check_cuda(input_data, use_cuda)
    label_tensor = torch.from_numpy(label[index:index + batch_size]).type(
        torch.LongTensor)
    output_data = Variable(label_tensor, volatile=testing, requires_grad=False)
    output_data = check_cuda(output_data, use_cuda)
    return input_data, output_data
Пример #3
0
    def _sample_latent(self, enc_hidden):
        mu = self._enc_mu(enc_hidden)
        log_sigma = self._enc_log_sigma(enc_hidden)
        sigma = torch.exp(log_sigma)
        std_z = torch.from_numpy(np.random.normal(0, 1,
                                                  size=sigma.size())).float()

        self.z_mean = mu
        self.z_sigma = sigma

        std_z_var = Variable(std_z, requires_grad=False)
        std_z_var = check_cuda(std_z_var, self.use_cuda)
        return mu + sigma * std_z_var
Пример #4
0
 def forward(self, input_sentence, is_softmax=False, dont_pass_emb=False):
     if dont_pass_emb:
         emb_sentence = input_sentence
     else:
         emb_sentence = self.src_word_emb(input_sentence)
     relu1 = F.relu(self.conv1(emb_sentence))
     layer1 = F.max_pool1d(relu1, 3)
     relu2 = F.relu(self.conv2(layer1))
     layer2 = F.max_pool1d(relu2, 3)
     layer3 = F.max_pool1d(F.relu(self.conv2(layer2)), 10)
     flatten = self.drop(layer2.view(layer3.size()[0], -1))
     if not hasattr(self, 'linear'):
         self.linear = nn.Linear(flatten.size()[1], 2)
         self.linear = check_cuda(self.linear, self.use_cuda)
     logit = self.linear(flatten)
     if is_softmax:
         logit = self.softmax(logit)
     return logit
Пример #5
0
def main():
    """!
    @brief Main function for predicting the image class(es) using a
        trained model.
    """
    args = parse_arguments()
    test_on_gpu = (args.gpu and check_cuda())
    cat_to_name = get_label_mapping(input_json=args.cat_to_name)

    # Load checkpoint
    model, ckpt_dict = load_checkpoint(args.checkpoint,
                                       train_on_gpu=test_on_gpu)
    idx_to_class = {
        idx: cat_to_name[c]
        for c, idx in ckpt_dict['class_to_idx'].items()
    }

    # Pre-process image
    image = process_image(args.image_path)
    image = torch.unsqueeze(image, 0)
    if test_on_gpu:
        image = image.cuda()

    # Get actual label
    label = os.path.basename(os.path.dirname(args.image_path))
    label = cat_to_name[label]

    # Predictions - top K classes
    prob_k, ind_k = predict(image, model, topk=args.top_k)
    classes_k = map_classes(ind_k, idx_to_class)
    print("True label: '{}'".format(label))
    print("")
    print_results(classes_k, prob_k)

    # Plot image and predictions
    if args.plot:
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(5, 10))
        imshow(torch.squeeze(image.cpu()), ax=ax1, title=label)
        plot_predictions(prob_k, classes_k, ax=ax2, topk=args.top_k)
        plt.show()
Пример #6
0
def test(net, test_dataset):
    """
    Test the model on test data, and print statistics
    :param net: nn.Module
    :param test_dataset: torch.utils.data.Dataset
    :return:
    """
    logging.info("Started predicting testing data...")
    computing_device, extra = check_cuda()
    test_loader = DataLoader(test_dataset,
                             batch_size=SETTINGS["BATCH_SIZE"],
                             shuffle=False)
    with torch.no_grad():
        net.eval()
        all_predictions = []
        all_labels = []
        for images, labels in test_loader:  # Remember they come in batches
            images, labels = images.to(computing_device), labels.to(
                computing_device)

            # Since we are not doing this through criterion, we must add softmax our self
            outputs = func.softmax(net(images), dim=1)
            _, predicted = torch.max(outputs.data, 1)

            predicted = func.one_hot(predicted,
                                     num_classes=SETTINGS['NUM_CLASSES']).type(
                                         torch.FloatTensor)
            labels = func.one_hot(labels,
                                  num_classes=SETTINGS['NUM_CLASSES']).type(
                                      torch.FloatTensor)
            all_predictions.append(predicted)
            all_labels.append(labels)

        all_predictions = torch.cat(all_predictions)
        all_labels = torch.cat(all_labels)
        logging.info("Evaluating test results...")
        evaluate(all_predictions, all_labels, net, SETTINGS)
        sklearn_acc_per_class(all_labels, all_predictions)
Пример #7
0
# Make instances
encoder = Encoder(
    n_src_vocab=max_features,
    use_cuda=use_cuda,
)
decoder = Generator(
    n_target_vocab=max_features,
    c_dim=c_dim,
    use_cuda=use_cuda,
)
discriminator = Discriminator(
    n_src_vocab=max_features,
    maxlen=maxlen,
    use_cuda=use_cuda,
)
encoder = check_cuda(encoder, use_cuda)
decoder = check_cuda(decoder, use_cuda)
discriminator = check_cuda(discriminator, use_cuda)
criterion = torch.nn.CrossEntropyLoss()
vae_parameters = list(encoder.parameters()) + list(decoder.parameters())
vae_opt = Adam(vae_parameters)
e_opt = Adam(encoder.parameters())
g_opt = Adam(decoder.parameters())
d_opt = Adam(discriminator.parameters())


def train_discriminator(discriminator):
    # TODO: empirical Shannon entropy
    print_epoch = 0
    for epoch_index in range(epoch):
        for batch, index in enumerate(range(0, len(x_train) - 1, batch_size)):
Пример #8
0
def main():
    """!
    @brief Main function for model training and evaluation.
    """
    args = parse_arguments()

    loader_tr, loader_val, class2idx = \
        load_data(args.input,
                  batch_size=args.batch_size,
                  n_workers=args.n_workers)
    train_on_gpu = (args.gpu and check_cuda())
    num_classes = len(class2idx)

    if args.checkpoint:
        print("Loading model checkpoint...")
        model, ckpt_dict = load_checkpoint(args.checkpoint,
                                           train_on_gpu=train_on_gpu)
        loss_val_min = ckpt_dict['loss']
        epoch1 = ckpt_dict['epoch'] + 1
        args.pretrained = ckpt_dict['pretrained']
        class2idx = ckpt_dict['class_to_idx']
    else:
        epoch1 = 0
        # Build model architecture
        model = build_model(num_classes,
                            pretrained=args.pretrained,
                            train_on_gpu=train_on_gpu)
        loss_val_min = np.Inf

    # Specify optimizer and learning rate
    if args.pretrained[:6] == "resnet":
        params = model.parameters()
    else:
        params = model.classifier.parameters()
    optimizer = optim.SGD(params,
                          lr=args.learning_rate,
                          momentum=0.9)
    if args.checkpoint:
        optimizer.load_state_dict(ckpt_dict['optimizer_state'])
    # Specify loss function (categorical cross-entropy)
    criterion = nn.CrossEntropyLoss()

    # Train model
    for epoch in range(epoch1, args.n_epochs):
        loss_tr = train_model(model,
                              loader_tr,
                              criterion,
                              optimizer,
                              train_on_gpu=train_on_gpu)
        loss_val, acc_val = evaluate_model(model,
                                           loader_val,
                                           criterion,
                                           n_classes=num_classes,
                                           train_on_gpu=train_on_gpu)
        # Print training/validation statistics
        print('Epoch: {} \tTraining Loss: {:.6f} '
              '\tValidation Loss: {:.6f}'
              '\tValidation Accuracy: {:.2f}'
              .format(epoch + 1,
                      loss_tr,
                      loss_val,
                      acc_val))

        # Save model if validation loss has decreased
        if loss_val <= loss_val_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}). '
                  'Saving model ...'.format(loss_val_min,
                                            loss_val))
            save_checkpoint(args.output,
                            model,
                            optimizer,
                            loss_val,
                            epoch,
                            class2idx,
                            pretrained=args.pretrained)
            loss_val_min = loss_val
Пример #9
0
def train(dataset):
    computing_device, extra = check_cuda()

    # Save all the k models to compare
    nnets = []
    batch_size = SETTINGS['BATCH_SIZE']

    # Get a lists of train-val-split for k folds
    if SETTINGS['K-FOLD']:
        indices = get_k_fold_indecies(dataset,
                                      SETTINGS['RANDOM_SEED'],
                                      k=SETTINGS['K-FOLD-NUMBER'])
    else:
        indices = list(range(len(dataset)))
        validation_split = .1
        split = int(np.floor(validation_split * len(dataset)))
        np.random.seed(SETTINGS['RANDOM_SEED'])
        np.random.shuffle(indices)
        train_indices, val_indices = indices[split:], indices[:split]
        indices = [(train_indices, val_indices)]

    for k, (train_indices, val_indices) in enumerate(indices):
        logging.info("#" * 20)
        logging.info("Training Model {}".format(k))

        # Load data for this fold
        train_sampler = SubsetRandomSampler(train_indices)
        valid_sampler = SubsetRandomSampler(val_indices)
        train_loader = DataLoader(dataset,
                                  batch_size=batch_size,
                                  sampler=train_sampler,
                                  num_workers=10)
        validation_loader = DataLoader(dataset,
                                       batch_size=batch_size,
                                       sampler=valid_sampler,
                                       num_workers=10)

        # Initialize CNN
        if SETTINGS['NNET'] is None:
            net = models.resnet152(pretrained=True)

            # Freeze parameters, so gradient not computed here
            for param in net.parameters():
                param.requires_grad = False

            net.fc = nn.Linear(net.fc.in_features, SETTINGS['NUM_CLASSES'])
            net = net.to(computing_device)
            net.train_epoch_losses = []
            net.val_epoch_losses = []
        else:
            net = SETTINGS['NNET'](
                SETTINGS['NUM_CLASSES']).to(computing_device)
            net.apply(weights_init)

        # Initialize optimizer and criterion
        if SETTINGS["WLOSS"]:
            criterion = nn.CrossEntropyLoss(
                weight=dataset.get_class_weights().to(computing_device))
        else:
            criterion = nn.CrossEntropyLoss()

        parameters_to_learn = []
        if SETTINGS['NNET'] is None:
            for name, param in net.named_parameters():
                if param.requires_grad:
                    parameters_to_learn.append(param)
        else:
            parameters_to_learn = net.parameters()

        optimizer = optim.Adam(parameters_to_learn,
                               lr=SETTINGS["LR"],
                               weight_decay=SETTINGS["DECAY"])

        # Fit and save model to file
        if SETTINGS['K-FOLD']:
            save_path = "./{}_model{}_{}.pth".format(net.__class__.__name__, k,
                                                     TIME)
        else:
            save_path = "./{}_model_{}.pth".format(net.__class__.__name__,
                                                   TIME)

        fit_model(computing_device,
                  net,
                  criterion,
                  optimizer,
                  train_loader,
                  validation_loader,
                  save_path=save_path)
        nnets.append(net)

    best_net = None
    for nnet in nnets:
        if best_net is None or min(nnet.val_epoch_losses) < min(
                best_net.val_epoch_losses):
            best_net = nnet
    return best_net
Пример #10
0
 def init_hidden_c_for_lstm(self, batch_size):
     hidden = Variable(
         torch.zeros(self.n_layers, batch_size, self.d_inner_hid))
     hidden = check_cuda(hidden, self.use_cuda)
     return hidden
Пример #11
0
    model = models.create_model(args, place)

    Example = namedtuple("Example", ["src", "data_id"])
    context = []
    start_info = "Enter [EXIT] to quit the interaction, [NEXT] to start a new conversation."
    cprint(start_info, "yellow", attrs=["bold"])
    while True:
        user_utt = input(colored("[Human]: ", "red", attrs=["bold"])).strip()
        if user_utt == "[EXIT]":
            break
        elif user_utt == "[NEXT]":
            context = []
            cprint(start_info, "yellow", attrs=["bold"])
        else:
            context.append(user_utt)
            example = Example(src=" [SEP] ".join(context), data_id=0)
            record = task.reader._convert_example_to_record(example, is_infer=True)
            data = task.reader._pad_batch_records([record], is_infer=True)
            pred = task.infer_step(model, data)[0]
            bot_response = pred["response"]
            print(colored("[Bot]:", "blue", attrs=["bold"]), colored(bot_response, attrs=["bold"]))
            context.append(bot_response)

    return


if __name__ == "__main__":
    args = setup_args()
    check_cuda(True)
    interact(args)
Пример #12
0
def train_vae(encoder, decoder):
    encoder.train()
    decoder.train()
    for epoch_index in range(epoch):
        for batch, index in enumerate(range(0, len(x_train) - 1, batch_size)):
            total_loss = 0
            input_data, output_data = get_batch(x_train, index, batch_size)
            encoder.zero_grad()
            decoder.zero_grad()
            vae_opt.zero_grad()

            # Considering the data may do not have enough data for batching
            # Init. hidden with len(input_data) instead of batch_size
            enc_hidden = encoder.init_hidden(len(input_data))
            # Input of encoder is a batch of sequence.
            enc_hidden = encoder(input_data, enc_hidden)

            # Generate the random one-hot array from prior p(c)
            # NOTE: Assume general distribution for now
            random_one_dim = np.random.randint(c_dim, size=len(input_data))
            one_hot_array = np.zeros((len(input_data), c_dim))
            one_hot_array[np.arange(len(input_data)), random_one_dim] = 1

            c = torch.from_numpy(one_hot_array).float()
            var_c = Variable(c, requires_grad=False)
            var_c = check_cuda(var_c, use_cuda)
            # TODO: use iteration along first dim.
            cat_hidden = (torch.cat([enc_hidden[0][0], var_c],
                                    dim=1).unsqueeze(0),
                          torch.cat([
                              decoder.init_hidden_c_for_lstm(
                                  len(input_data))[0], var_c
                          ],
                                    dim=1).unsqueeze(0))

            # Reshape output_data from (batch_size, seq_len) to (seq_len, batch_size)
            output_data = output_data.permute(1, 0)
            # Input of decoder is a batch of word-by-word.
            for index, word in enumerate(output_data):
                if index == len(output_data) - 1:
                    break
                output, cat_hidden = decoder(word, cat_hidden)
                next_word = output_data[index + 1]
                total_loss += criterion(output.view(-1, max_features),
                                        next_word)
            # Train
            avg_loss = total_loss.data[0] / maxlen
            ll = latent_loss(encoder.z_mean, encoder.z_sigma)
            total_loss += ll
            total_loss.backward()
            vae_opt.step()

            if batch % 25 == 0:
                print(
                    "[VAE] Epoch {} batch {}'s average language loss: {}, latent loss: {}"
                    .format(
                        epoch_index,
                        batch,
                        avg_loss,
                        ll.data[0],
                    ))
Пример #13
0
def train_vae_with_attr_loss(encoder, decoder, discriminator):
    for epoch_index in range(epoch):
        for batch, index in enumerate(range(0, len(x_train) - 1, batch_size)):
            encoder.zero_grad()
            decoder.zero_grad()
            e_opt.zero_grad()
            g_opt.zero_grad()
            vae_loss = 0
            ll = 0

            input_data, output_data = get_batch_label(x_train, y_train, index,
                                                      batch_size)

            enc_hidden = encoder.init_hidden(len(input_data))
            enc_hidden = encoder(input_data, enc_hidden)

            target = np.array([output_data.cpu().data.numpy()]).reshape(-1)
            one_hot_array = np.eye(c_dim)[target]
            c = torch.from_numpy(one_hot_array).float()
            var_c = Variable(c, requires_grad=False)
            var_c = check_cuda(var_c, use_cuda)
            # TODO: use iteration along first dim.
            cat_hidden = (torch.cat([enc_hidden[0][0], var_c],
                                    dim=1).unsqueeze(0),
                          torch.cat([
                              decoder.init_hidden_c_for_lstm(
                                  len(input_data))[0], var_c
                          ],
                                    dim=1).unsqueeze(0))

            batch_init_word = np.zeros((batch_size, max_features))
            batch_init_word[np.arange(batch_size), Constants.BOS] = 1
            batch_init_word = Variable(torch.from_numpy(batch_init_word),
                                       requires_grad=False).float()
            batch_init_word = check_cuda(batch_init_word, use_cuda)

            input_data = input_data.permute(1, 0)
            for index in range(maxlen - 1):
                if 'next_word' in locals():
                    word = next_word.squeeze(1)
                    word = check_cuda(word, use_cuda)
                    output, cat_hidden, pre_soft = decoder(word,
                                                           cat_hidden,
                                                           low_temp=True,
                                                           one_hot_input=True)
                else:
                    word = batch_init_word
                    word = check_cuda(word, use_cuda)
                    output, cat_hidden, pre_soft = decoder(word,
                                                           cat_hidden,
                                                           low_temp=True,
                                                           one_hot_input=True)
                # From one-hot to word embedding
                next_word = output
                correct_word = input_data[index + 1]
                vae_loss += criterion(pre_soft.view(-1, max_features),
                                      correct_word)
                if len(batch_init_word.size()) == 2:
                    batch_init_word = batch_init_word.unsqueeze(1)
                if len(next_word.size()) == 2:
                    next_word = next_word.unsqueeze(1)
                batch_init_word = torch.cat([batch_init_word, next_word],
                                            dim=1)
            # NOTE Latent loss
            ll = latent_loss(encoder.z_mean, encoder.z_sigma)
            # NOTE L_attr_c loss
            generated_sentence = batch_init_word
            discriminator.eval()
            logit = discriminator(generated_sentence, dont_pass_emb=True)
            l_attr_c = criterion(logit, output_data)
            # NOTE L_attr_z loss
            encoder.eval()
            generated_sentence = decoder.one_hot_to_word_emb(
                generated_sentence)
            encoded_gen = encoder.init_hidden(len(generated_sentence))
            encoded_gen = encoder(generated_sentence,
                                  encoded_gen,
                                  dont_pass_emb=True)
            l_attr_z = latent_loss(encoder.z_mean, encoder.z_sigma)

            avg_loss = vae_loss.data[0] / maxlen

            total_vae_loss = vae_loss + ll
            extra_decoder_loss = lambda_c * l_attr_c + lambda_z * l_attr_z
            total_vae_loss.backward()
            #e_opt.step()
            #extra_decoder_loss.backward()
            #g_opt.step()
            vae_opt.step()

            if batch % 25 == 0:
                print(
                    "[Attr] Epoch {} batch {}'s average language loss: {}, latent loss: {}"
                    .format(
                        epoch_index,
                        batch,
                        avg_loss,
                        ll.data[0],
                    ))
                print("l_attr_c loss: {}, l_attr_z loss: {}".format(
                    l_attr_c.data[0],
                    l_attr_z.data[0],
                ))
Пример #14
0
import torch
import torch.nn as nn
import pdb
from utils import check_cuda

device = check_cuda()

## Credit https://github.com/fastai/fastai/blob/master/fastai/layers.py#L285


def trunc_normal_(x, mean: float = 0., std: float = 1.):
    "Truncated normal initialization."
    return x.normal_().fmod_(2).mul_(std).add_(mean)


def embedding(ni, nf, padding_idx=None):
    "Create an embedding layer."
    emb = nn.Embedding(ni, nf, padding_idx)
    # See https://arxiv.org/abs/1711.09160
    with torch.no_grad():
        trunc_normal_(emb.weight, std=0.01)
    return emb


class RecurLayer(nn.Module):
    """Multiple LSTM Layers with skip input connections.
        Gives all layer outsputs and states"""
    def __init__(self, dims=10, num_layers=1):
        super().__init__()
        self.num_layers = num_layers
        self.rnns = nn.ModuleList([nn.LSTM(dims, dims, batch_first=True)])
Пример #15
0
        pcff_str = '' if samplerate <= 0.0 else f'pcff_s{samplerate}'
        drop_str = '' if droprate <= 0.0 else f'_p{droprate}'
        return f'dense_2_1024_{activname}{pcff_str}{drop_str}'.replace('.', '')

    log_file = 'drop_{}_{}_{}_{}.log'.format(
        get_model_name(), '-'.join([
            str(i) for i in (train_batch, val_batch, test_batch)]),
        dataset_flavor, timestamp_run)
    log_title = log_file[:-4]
    logger = Log(log_dir + log_file)
    logger.start(log_title)
    logger.start_intercept()

    # check cuda availablility when needed
    if use_cuda:
        check_cuda()

    # set up mnist dataset image size (c, h, w) = (1, 28, 28)
    if dataset_flavor in AVAILABLE_FLAVORS:
        ((train_loader, val_loader, test_loader),
         (nb_train, nb_val, nb_test)) = get_mnist_dataloaders(
            data_dir, train_batch, val_batch, test_batch,
            train_val_split, use_cuda, dataset_flavor,
            keep_shape=False)
    else:
        raise Exception('Unknown dataset: {}'.format(dataset_flavor))
    print('dataset: {}, location: {}'.format(dataset_flavor, data_dir))
    print('sample / batch number for training:  ',
          nb_train, len(train_loader))
    print('sample / batch number for validation:',
          nb_val, len(val_loader))
Пример #16
0
    Example = namedtuple("Example", ["src", "data_id"])
    context = []
    start_info = "Enter [EXIT] to quit the interaction, [NEXT] to start a new conversation."
    cprint(start_info, "yellow", attrs=["bold"])
    while True:
        user_utt = input(colored("[Human]: ", "red", attrs=["bold"])).strip()
        if user_utt == "[EXIT]":
            break
        elif user_utt == "[NEXT]":
            context = []
            cprint(start_info, "yellow", attrs=["bold"])
        else:
            context.append(user_utt)
            example = Example(src=" [SEP] ".join(context), data_id=0)
            record = task.reader._convert_example_to_record(example,
                                                            is_infer=True)
            data = task.reader._pad_batch_records([record], is_infer=True)
            pred = task.infer_step(model, data)[0]
            bot_response = pred["response"]
            print(colored("[Bot]:", "blue", attrs=["bold"]),
                  colored(bot_response, attrs=["bold"]))
            context.append(bot_response)

    return


if __name__ == "__main__":
    args = setup_args()
    check_cuda(False)
    interact(args)
Пример #17
0
        train_pyreader.decorate_paddle_reader(train_reader)
        valid_pyreader.decorate_paddle_reader(valid_reader)
        train_with_pyreader(exe,
                            train_prog,
                            train_exe,
                            train_pyreader,
                            train_fetch_list,
                            train_metrics,
                            epochs=epochs,
                            log_interval=args.log_interval,
                            valid_interval=args.valid_interval,
                            save_dir=args.save_dir,
                            save_model_name=args.model_name,
                            enable_ce=args.enable_ce,
                            test_exe=valid_exe,
                            test_pyreader=valid_pyreader,
                            test_fetch_list=valid_fetch_list,
                            test_metrics=valid_metrics)


if __name__ == "__main__":
    args = parse_args()
    # check whether the installed paddle is compiled with GPU
    check_cuda(args.use_gpu)
    logger.info(args)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    train(args)
        print("Final test result:")
        fetch_list = [test_net["probs"].name, test_net["labels"].name]
        evaluate(test_exe, test_prog, test_pyreader,
                fetch_list,
                "test",
                True)

    # infer
    if args.do_infer:
        print("Final infer result:")
        fetch_list = [infer_net["probs"].name]
        infer(test_exe, test_prog, infer_pyreader,
             fetch_list,
             "infer")


def get_cards():
    num = 0
    cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
    if cards != '':
        num = len(cards.split(","))
    return num


if __name__ == "__main__":
    args = PDConfig('config.json')
    args.build()
    #args.print_arguments()
    utils.check_cuda(args.use_cuda)
    main(args)