Exemplo n.º 1
0
    def setup(self):
        (txt_file, seq_length, gen_length, lstm_num_hidden, lstm_num_layers, batch_size, learning_rate, learning_rate_decay, learning_rate_step, dropout_keep_prob, train_steps, max_norm, summary_path, print_every, sample_every, wanted_device, temperature) = itemgetter(*flags)(vars(self.config))

        # Initialize the device which to run the model on
        self.device = torch.device(wanted_device)

        # Initialize the dataset and data loader (note the +1)
        self.dataset = TextDataset(txt_file, seq_length)
        self.data_loader = DataLoader(self.dataset, batch_size, num_workers=1)

        self.vocabulary_size = self.dataset.vocab_size
        # Initialize the model that we are going to use
        dropout = 1 - dropout_keep_prob
        model_path = txt_file + '.pt'
        self.model = \
            torch.load(model_path) \
        if os.path.exists(model_path) else \
            TextGenerationModel(batch_size, seq_length, self.vocabulary_size, lstm_num_hidden, lstm_num_layers, self.device)
        # , dropout
        self.model.to(self.device)

        # Setup the loss and optimizer
        # cross-entropy loss can be computed by averaging over all timesteps using the target labels y(t)
        self.criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        gamma = 1 - learning_rate_decay
        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=learning_rate_step, gamma=gamma)
Exemplo n.º 2
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(...)  # fixme
    data_loader = DataLoader(dataset, config.batch_size)

    # Initialize the model that we are going to use
    model = TextGenerationModel(...)  # FIXME

    # Setup the loss and optimizer
    criterion = None  # FIXME
    optimizer = None  # FIXME

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        # Add more code here ...
        #######################################################

        loss = np.inf   # fixme
        accuracy = 0.0  # fixme

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if (step + 1) % config.print_every == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                    Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
                    ))

        if (step + 1) % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            pass

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error,
            # check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Exemplo n.º 3
0
def infer():
    flags = parse_flags()
    flag_keys = ['checkpoint_path', 'txt_file', 'length', 'device', 'temperature']
    (checkpoint_path, txt_file, length, wanted_device, temperature) = itemgetter(*flag_keys)(vars(flags))

    # init with dummy values to load in checkpoint
    dataset = TextDataset(txt_file, 1)
    device = torch.device(wanted_device)
    model = torch.load(checkpoint_path)

    while True:
        start = input('Please input your text to complete:\n')
        chars = [dataset._char_to_ix[char] for char in start]
        s = generate(model, dataset, length, device, chars, temperature)
        print(s)
Exemplo n.º 4
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), config.learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=config.learning_rate_step,
                                          gamma=config.learning_rate_decay)

    accuracy_train = []
    loss_train = []

    if config.samples_out_file != "STDOUT":
        samples_out_file = open(config.samples_out_file, 'w')

    epochs = config.train_steps // len(data_loader) + 1

    print(
        "Will train on {} batches in {} epochs, max {} batches/epoch.".format(
            config.train_steps, epochs, len(data_loader)))

    for epoch in range(epochs):
        data_loader_iter = iter(data_loader)

        if epoch == config.train_steps // len(data_loader):
            batches = config.train_steps % len(data_loader)
        else:
            batches = len(data_loader)

        for step in range(batches):
            batch_inputs, batch_targets = next(data_loader_iter)
            model.zero_grad()

            # Only for time measurement of step through network
            t1 = time.time()

            batch_inputs = F.one_hot(
                batch_inputs,
                num_classes=dataset.vocab_size,
            ).float().to(device)
            batch_targets = batch_targets.to(device)

            optimizer.zero_grad()

            pred, _ = model.forward(batch_inputs)
            loss = criterion(pred.transpose(2, 1), batch_targets)
            accuracy = acc(
                pred.transpose(2, 1),
                F.one_hot(batch_targets,
                          num_classes=dataset.vocab_size).float(),
                dataset.vocab_size)
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           max_norm=config.max_norm)
            optimizer.step()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            scheduler.step()

            if (epoch * len(data_loader) + step + 1) % config.seval_every == 0:
                accuracy_train.append(accuracy)
                loss_train.append(loss.item())

            if (epoch * len(data_loader) + step + 1) % config.print_every == 0:
                print(
                    "[{}] Epoch: {:04d}/{:04d}, Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), epoch + 1,
                        epochs, (epoch * len(data_loader) + step + 1),
                        config.train_steps, config.batch_size,
                        examples_per_second, accuracy, loss))

            if (epoch * len(data_loader) + step +
                    1) % config.sample_every == 0:
                with torch.no_grad():
                    codes = []

                    input_tensor = torch.zeros((1, 1, dataset.vocab_size),
                                               device=device)
                    input_tensor[0, 0,
                                 np.random.randint(0, dataset.vocab_size)] = 1

                    for i in range(config.seq_length - 1):
                        response = model.step(input_tensor)
                        logits = F.log_softmax(config.temp * response, dim=1)
                        dist = torch.distributions.one_hot_categorical.OneHotCategorical(
                            logits=logits)
                        code = dist.sample().argmax().item()
                        input_tensor *= 0
                        input_tensor[0, 0, code] = 1
                        codes.append(code)

                    string = dataset.convert_to_string(codes)
                    model.reset_stepper()

                    if config.samples_out_file != "STDOUT":
                        samples_out_file.write("Step {}: ".format(
                            epoch * len(data_loader) + step + 1) + string +
                                               "\n")
                    else:
                        print(string)

    if config.samples_out_file != "STDOUT":
        samples_out_file.close()

    if config.model_out_file != None:
        torch.save(model, config.model_out_file)

    if config.curves_out_file != None:
        import matplotlib.pyplot as plt
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        fig.suptitle(
            'Training curves for Pytorch 2-layer LSTM.\nFinal loss: {:.4f}. Final accuracy: {:.4f}\nSequence length: {}, Hidden units: {}, LSTM layers: {}, Learning rate: {:.4f}'
            .format(loss_train[-1], accuracy_train[-1], config.seq_length,
                    config.lstm_num_hidden, config.lstm_num_layers,
                    config.learning_rate))
        plt.subplots_adjust(top=0.8)

        ax[0].set_title('Loss')
        ax[0].set_ylabel('Loss value')
        ax[0].set_xlabel('No of batches seen x{}'.format(config.seval_every))
        ax[0].plot(loss_train, label='Train')
        ax[0].legend()

        ax[1].set_title('Accuracy')
        ax[1].set_ylabel('Accuracy value')
        ax[1].set_xlabel('No of batches seen x{}'.format(config.seval_every))
        ax[1].plot(accuracy_train, label='Train')
        ax[1].legend()

        plt.savefig(config.curves_out_file)

    print('Done training.')
Exemplo n.º 5
0
def train(_run):
    config = argparse.Namespace(**_run.config)

    # Initialize the device
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    total_samples = int(config.train_steps * config.batch_size)
    sampler = RandomSampler(dataset,
                            replacement=True,
                            num_samples=total_samples)
    data_sampler = BatchSampler(sampler, config.batch_size, drop_last=False)
    data_loader = DataLoader(dataset,
                             num_workers=1,
                             batch_sampler=data_sampler)

    # Initialize the model that we are going to use
    model = TextGenerationModel(dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers).to(device)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Prepare data
        batch_inputs = torch.stack(batch_inputs).to(device)
        batch_targets = torch.stack(batch_targets).t().to(device)

        # Forward, backward, optimize
        optimizer.zero_grad()
        logits = model(batch_inputs)
        batch_loss = criterion(logits, batch_targets)
        batch_loss.backward()
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.print_every == 0:
            accuracy = eval_accuracy(logits, batch_targets)
            loss = batch_loss.item()
            log_str = ("[{}] Train Step {:04d}/{:04d}, "
                       "Batch Size = {}, Examples/Sec = {:.2f}, "
                       "Accuracy = {:.2f}, Loss = {:.3f}")
            print(
                log_str.format(datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                               config.train_steps, config.batch_size,
                               examples_per_second, accuracy, loss))

            _run.log_scalar('loss', loss, step)
            _run.log_scalar('acc', accuracy, step)

        if step % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            print('-' * (config.sample_length + 1))
            x0 = torch.randint(low=0, high=dataset.vocab_size, size=(1, 5))
            samples = model.sample(x0, config.sample_length).detach().cpu()
            samples = samples.numpy()

            for sample in samples:
                print(dataset.convert_to_string(sample))

            print('-' * (config.sample_length + 1))

        if step == config.train_steps:
            break

    print('Done training.')
    ckpt_path = os.path.join(SAVE_PATH, str(config.timestamp) + '.pt')
    torch.save(
        {
            'state_dict': model.state_dict(),
            'hparams': model.hparams,
            'ix_to_char': dataset.ix_to_char
        }, ckpt_path)
    print('Saved checkpoint to {}'.format(ckpt_path))
Exemplo n.º 6
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        model_out = model.forward(batch_inputs)
        loss = criterion(model_out, batch_targets)
        optimizer.zero_grad()
        loss.backward()
        #######################################################

        torch.nn.utils.clip_grad_norm(model.parameters(),
                                      max_norm=config.max_norm)

        optimizer.step()

        loss = loss.item()

        accuracy = np.average((torch.max(model_out, 1)[1] == batch_targets))

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.print_every == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step == config.sample_every:
            # Generate some sentences by sampling from the model
            pass

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Exemplo n.º 7
0
def train(config):

    # Initialize the device which to run the model on
    if torch.cuda.is_available():
        dev = "cuda:0"
    else:
        dev = "cpu"
    # Initialize the device which to run the model on
    device = torch.device(dev)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers,
                                device).to(device)  # fixme

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)  # fixme

    total_steps = 0

    training_summary = [['Total steps', 'Accuracy', 'Loss']]
    sampling_summary = [['Total steps', 'Sentence']]

    while config.train_steps > total_steps:

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            total_steps += 1

            if total_steps > config.train_steps: break

            batch_inputs = batch_inputs.to(device)
            batch_targets = batch_targets.to(device)

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            batch_inputs = torch.nn.functional.one_hot(batch_inputs,
                                                       dataset.vocab_size)
            optimizer.zero_grad()
            output = model.forward(batch_inputs)
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           max_norm=config.max_norm)

            loss = 0.0
            for i in range(len(output[0])):
                pred = output[:, i, :]
                target = batch_targets[:, i]

                loss += criterion.forward(pred, target) / len(output[0])

            loss.backward()

            optimizer.step()

            with torch.no_grad():
                accuracy = 0.0

                total_size = 0
                correct = 0

                for i in range(len(output[0])):
                    pred = torch.nn.functional.softmax(output[:, i, :], dim=1)
                    pred = torch.max(pred, 1)[1]

                    correct += pred.eq(batch_targets[:, i]).sum().item()
                    total_size += len(pred)

                accuracy = correct / total_size

                # Just for time measurement
                t2 = time.time()
                examples_per_second = config.batch_size / float(t2 - t1)

                if total_steps % config.print_every == 0:
                    training_summary.append(
                        [total_steps, accuracy,
                         loss.item()])
                    print(
                        "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                        "Accuracy = {:.2f}, Loss = {:.3f}".format(
                            datetime.now().strftime("%Y-%m-%d %H:%M"),
                            total_steps, int(config.train_steps),
                            config.batch_size, examples_per_second, accuracy,
                            loss))

                if total_steps % config.sample_every == 0:
                    # Generate some sentences by sampling from the model
                    text = torch.zeros(
                        (1, 1)).long().random_(0,
                                               dataset.vocab_size).to(device)
                    text = torch.nn.functional.one_hot(text,
                                                       dataset.vocab_size)

                    temprature = config.temprature if config.temprature is not None else 1
                    for i in range(config.seq_length - 1):
                        prediction = model.forward(text)
                        pred = torch.nn.functional.softmax(temprature *
                                                           prediction[:, i, :],
                                                           dim=1)

                        if config.temprature is not None:
                            m = torch.distributions.categorical.Categorical(
                                pred)
                            pred = m.sample()
                        else:
                            pred = torch.max(pred, 1)[1]
                        pred = torch.nn.functional.one_hot(
                            pred, dataset.vocab_size)
                        pred = pred.unsqueeze(0)
                        text = torch.cat((text, pred), 1)
                        stuff = torch.argmax(text[0], 1)
                        sentence = dataset.convert_to_string(stuff.tolist())
                    print(sentence)
                    sampling_summary.append([total_steps, sentence])

                if total_steps == config.train_steps:
                    # If you receive a PyTorch data-loader error, check this bug report:
                    # https://github.com/pytorch/pytorch/pull/9655
                    break

    print('Done training.')
    print('Storing data')

    if not os.path.exists(config.summary_path):
        os.makedirs(config.summary_path)

    training_summary = pd.DataFrame(training_summary)
    sampling_summary = pd.DataFrame(sampling_summary)

    training_summary.to_csv(config.summary_path + "training_summary.csv",
                            header=False,
                            index=False,
                            sep=';')
    sampling_summary.to_csv(config.summary_path + "sampling_summary.csv",
                            header=False,
                            index=False,
                            sep=';')
    print('Finished')
Exemplo n.º 8
0
def train(config):
    # Initialize the device which to run the model on
    # device = torch.device(config.device)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    dataset = TextDataset(filename=config.txt_file,
                          seq_length=config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    VOCAB_SIZE = dataset.vocab_size
    CHAR2IDX = dataset._char_to_ix
    IDX2CHAR = dataset._ix_to_char

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=VOCAB_SIZE,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    scheduler = scheduler_lib.StepLR(optimizer=optimizer,
                                     step_size=config.learning_rate_step,
                                     gamma=config.learning_rate_decay)

    if True:
        model.load_state_dict(
            torch.load('grimm-results/intermediate-model-epoch-30-step-0.pth',
                       map_location='cpu'))
        optimizer.load_state_dict(
            torch.load("grimm-results/intermediate-optim-epoch-30-step-0.pth",
                       map_location='cpu'))

        print("Loaded it!")

    model = model.to(device)

    EPOCHS = 50

    for epoch in range(EPOCHS):
        # initialization of state that's given to the forward pass
        # reset every epoch
        h, c = model.reset_lstm(config.batch_size)
        h = h.to(device)
        c = c.to(device)

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            model.train()

            optimizer.zero_grad()

            x = torch.stack(batch_inputs, dim=1).to(device)

            if x.size()[0] != config.batch_size:
                print("We're breaking because something is wrong")
                print("Current batch is of size {}".format(x.size()[0]))
                print("Supposed batch size is {}".format(config.batch_size))
                break

            y = torch.stack(batch_targets, dim=1).to(device)

            x = one_hot_encode(x, VOCAB_SIZE)

            output, (h, c) = model(x=x, prev_state=(h, c))

            loss = criterion(output.transpose(1, 2), y)

            accuracy = calculate_accuracy(output, y)
            h = h.detach()
            c = c.detach()
            loss.backward()
            # add clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           max_norm=config.max_norm)
            optimizer.step()
            scheduler.step()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:
                #TODO FIX THIS PRINTING
                print(
                    f"Epoch {epoch} Train Step {step}/{config.train_steps}, Examples/Sec = {examples_per_second}, Accuracy = {accuracy}, Loss = {loss}"
                )
                #
                # print("[{}]".format(datetime.now().strftime("%Y-%m-%d %H:%M")))
                # print("[{}] Train Step {:04f}/{:04f}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}".format(
                #     datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss
                # ))

                # print(loss)

            if step % config.sample_every == 0:
                FIRST_CHAR = 'I'  # Is randomized within the prediction, actually
                predict(device, model, FIRST_CHAR, VOCAB_SIZE, IDX2CHAR,
                        CHAR2IDX)
                # Generate some sentences by sampling from the model
                path_model = 'intermediate-model-epoch-{}-step-{}.pth'.format(
                    epoch, step)
                path_optimizer = 'intermediate-optim-epoch-{}-step-{}.pth'.format(
                    epoch, step)
                torch.save(model.state_dict(), path_model)
                torch.save(optimizer.state_dict(), path_optimizer)

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    print('Done training.')
Exemplo n.º 9
0
def train(config):
    seed = 42
    torch.manual_seed(seed)
    np.random.seed(seed)

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    writer = SummaryWriter()

    seq_length = config.seq_length
    batch_size = config.batch_size
    lstm_num_hidden = config.lstm_num_hidden
    lstm_num_layers = config.lstm_num_layers
    dropout_keep_prob = config.dropout_keep_prob

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, seq_length)
    data_loader = DataLoader(dataset, batch_size, num_workers=1)

    vocab_size = dataset.vocab_size

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size, seq_length, vocab_size,
                                lstm_num_hidden, lstm_num_layers,
                                dropout_keep_prob, device)
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                             config.learning_rate_step,
                                             config.learning_rate_decay)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        # Add more code here ...
        #######################################################

        # To onehot represetation of input or embedding => decided for embedding
        # batch_inputs = F.one_hot(batch_inputs, vocab_size).type(torch.FloatTensor).to(device)
        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        train_output, _ = model.forward(batch_inputs)

        loss = criterion(train_output, batch_targets)
        accuracy = torch.sum(
            torch.eq(torch.argmax(train_output, dim=1),
                     batch_targets)).item() / (batch_targets.size(0) *
                                               batch_targets.size(1))

        writer.add_scalar('Loss/train', loss.item(), step)
        writer.add_scalar('Accuracy/train', accuracy, step)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step(step)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.print_every == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    int(config.train_steps), config.batch_size,
                    examples_per_second, accuracy, loss))

        if step % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            sample_from_model(config, step, model, dataset)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
    torch.save(model, "trained_model_part2.pth")
    writer.close()
Exemplo n.º 10
0
def train(config):
    
    def acc(predictions, targets):
        accuracy = (predictions.argmax(dim=2) == targets).float().mean()
        return accuracy

    # Initialize the device which to run the model on
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    print("Device", device)
    print("book:", config.txt_file)
    
    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset._vocab_size,
                 config.lstm_num_hidden, config.lstm_num_layers, device).to(device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

    gen_lengths = [20, 30, 100, 200]
    print("temperature:", config.temperature_int)

    all_accuracies = []
    all_losses = []
    all_train_steps = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        batch_inputs = (torch.arange(dataset._vocab_size) == batch_inputs[..., None])  # create one-hot

        # Only for time measurement of step through network
        t1 = time.time()

        # set the data to device
        batch_inputs = batch_inputs.float().to(device)
        batch_targets = batch_targets.to(device)

        out, _ = model.forward(batch_inputs)  # forward pass

        loss = criterion(out.permute(0, 2, 1), batch_targets)  # calculate the loss
        accuracy = acc(out, batch_targets)  # calculate the accuracy

        optimizer.zero_grad() # throw away previous grads

        loss.backward() # calculate new gradients

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)  # make sure the gradients do not explode

        optimizer.step() # update the weights

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % config.print_every == 0:

            print("[{}] Train Step {:04d}/{:04f}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

            all_accuracies.append(accuracy.item())
            all_losses.append(loss.item())
            all_train_steps.append(step)


        if step % config.sample_every == 0:

            for gen_length in gen_lengths:
                print("Generated sentence with length of {}".format(gen_length))

                previous = random.randint(0, dataset._vocab_size - 1)  # get the first random letter
                letters = [previous]
                cell = None

                for i in range(gen_length):
                    # create input
                    input = torch.zeros(1, 1, dataset._vocab_size).to(device)
                    input[0, 0, previous] = 1

                    # do a forward pass
                    out, cell = model.forward(input, cell)

                    # get the next letter
                    out = out.squeeze()
                    if config.temperature is True:
                        out *= config.temperature_int
                        out = torch.softmax(out, dim=0)
                        previous = torch.multinomial(out, 1)[0].item()

                    else:
                        previous = out.argmax().item()

                    letters.append(previous)

                # convert to sentence
                sentence = dataset.convert_to_string(letters)
                print(sentence)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    with open("acc_loss_T_{}.txt".format(config.temperature_int), "w") as output:
        output.write("accuracies \n")
        output.write(str(all_accuracies) + "\n")
        output.write("losses \n")
        output.write(str(all_losses) + "\n")
        output.write("train steps \n")
        output.write(str(all_train_steps) + "\n")


    print('Done training.')
Exemplo n.º 11
0
def train(config):

    # Initialize the device which to run the model on
    #device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    #print(dataset._char_to_ix) vocabulary order changes, but batches are same sentence examples with the seeds earlier.

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, config.device)  # fixme

    device = model.device
    model = model.to(device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)
    print("Len dataset:", len(dataset))
    print("Amount of steps for dataset:", len(dataset) / config.batch_size)

    current_step = 0
    not_max = True

    list_train_acc = []
    list_train_loss = []
    acc_average = []
    loss_average = []

    file = open("sentences.txt", 'w', encoding='utf-8')
    '''
    file_greedy = open("sentences_greedy.txt",'w',encoding='utf-8')
    file_tmp_05 = open("sentences_tmp_05.txt", 'w', encoding='utf-8')
    file_tmp_1 = open("sentences_tmp_1.txt", 'w', encoding='utf-8')
    file_tmp_2 = open("sentences_tmp_2.txt", 'w', encoding='utf-8')
    '''

    while not_max:

        for (batch_inputs, batch_targets) in data_loader:

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...

            #List of indices from word to ID, that is in dataset for embedding
            #Embedding lookup
            embed = model.embed  #Embeding shape(dataset.vocab_size, config.lstm_num_hidden)

            #Preprocess input to embeddings to give to LSTM all at once
            all_embed = []
            #sentence = []
            for batch_letter in batch_inputs:
                batch_letter_to = batch_letter.to(
                    device)  #torch.tensor(batch_letter,device = device)
                embedding = embed(batch_letter_to)
                all_embed.append(embedding)

                #sentence.append(batch_letter_to[0].item())
            all_embed = torch.stack(all_embed)

            #Print first example sentence of batch along with target
            #print(dataset.convert_to_string(sentence))
            #sentence = []
            #for batch_letter in batch_targets:
            #    sentence.append(batch_letter[0].item())
            #print(dataset.convert_to_string(sentence))

            all_embed = all_embed.to(device)
            outputs = model(
                all_embed
            )  #[30,64,vocab_size] 87 last dimension for fairy tails

            #######################################################

            #loss = np.inf   # fixme
            #accuracy = 0.0  # fixme

            #For loss: ensuring that the prediction dim are batchsize x vocab_size x sequence length and targets: batchsize x sequence length
            batch_first_output = outputs.transpose(0, 1).transpose(1, 2)
            batch_targets = torch.stack(batch_targets).to(device)
            loss = criterion(batch_first_output, torch.t(batch_targets))

            #Backpropagate
            model.zero_grad()
            loss.backward()
            loss = loss.item()
            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)
            optimizer.step()

            #Accuracy
            number_predictions = torch.argmax(outputs, dim=2)
            result = number_predictions == batch_targets
            accuracy = result.sum().item() / (batch_targets.shape[0] *
                                              batch_targets.shape[1])
            ''''
            #Generate sentences for all settings on every step
            sentence_id = model.generate_sentence(config.gsen_length, -1)
            sentence = dataset.convert_to_string(sentence_id)
            #print(sentence)
            file_greedy.write( (str(current_step) + ": " + sentence + "\n"))

            sentence_id = model.generate_sentence(config.gsen_length, 0.5)
            sentence = dataset.convert_to_string(sentence_id)
            #print(sentence)
            file_tmp_05.write( (str(current_step) + ": " + sentence + "\n"))

            sentence_id = model.generate_sentence(config.gsen_length, 1)
            sentence = dataset.convert_to_string(sentence_id)
            #print(sentence)
            file_tmp_1.write( (str(current_step) + ": " + sentence + "\n"))

            sentence_id = model.generate_sentence(config.gsen_length, 2)
            sentence = dataset.convert_to_string(sentence_id)
            #print(sentence)
            file_tmp_2.write( (str(current_step) + ": " + sentence + "\n"))
            '''

            if config.measure_type == 2:
                acc_average.append(accuracy)
                loss_average.append(loss)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if current_step % config.print_every == 0:

                # Average accuracy and loss over the last print every step (5 by default)
                if config.measure_type == 2:
                    accuracy = sum(acc_average) / config.print_every
                    loss = sum(loss_average) / config.print_every
                    acc_average = []
                    loss_average = []

                # Either accuracy and loss on the print every interval or the average of that interval as stated above
                list_train_acc.append(accuracy)
                list_train_loss.append(loss)

                print(
                    "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"),
                        current_step, config.train_steps, config.batch_size,
                        examples_per_second, accuracy, loss))
            elif config.measure_type == 0:
                # Track accuracy and loss for every step
                list_train_acc.append(accuracy)
                list_train_loss.append(loss)

            if current_step % config.sample_every == 0:
                # Generate sentence
                sentence_id = model.generate_sentence(config.gsen_length,
                                                      config.temperature)
                sentence = dataset.convert_to_string(sentence_id)
                print(sentence)
                file.write((str(current_step) + ": " + sentence + "\n"))

            if current_step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                not_max = False
                break

            current_step += 1

    # Close the file and make sure sentences en measures are saved
    file.close()
    pickle.dump((list_train_acc, list_train_loss),
                open("loss_and_train.p", "wb"))

    #Plot
    print(len(list_train_acc))

    if config.measure_type == 0:
        eval_steps = list(range(config.train_steps + 1))  # Every step Acc
    else:  #
        eval_steps = list(
            range(0, config.train_steps + config.print_every,
                  config.print_every))

    if config.measure_type == 2:
        plt.plot(eval_steps[:-1], list_train_acc[1:], label="Train accuracy")
    else:
        plt.plot(eval_steps, list_train_acc, label="Train accuracy")

    plt.xlabel("Step")
    plt.ylabel("Accuracy")
    plt.title("Training accuracy LSTM", fontsize=18, fontweight="bold")
    plt.legend()
    # plt.savefig('accuracies.png', bbox_inches='tight')
    plt.show()

    if config.measure_type == 2:
        plt.plot(eval_steps[:-1], list_train_loss[1:], label="Train loss")
    else:
        plt.plot(eval_steps, list_train_loss, label="Train loss")
    plt.xlabel("Step")
    plt.ylabel("Loss")
    plt.title("Training loss LSTM", fontsize=18, fontweight="bold")
    plt.legend()
    # plt.savefig('loss.png', bbox_inches='tight')
    plt.show()
    print('Done training.')
Exemplo n.º 12
0
                        type=str,
                        default="./summaries/",
                        help='Output path for summaries')
    parser.add_argument('--print_every',
                        type=int,
                        default=5,
                        help='How often to print training progress')
    parser.add_argument('--sample_every',
                        type=int,
                        default=100,
                        help='How often to sample from the model')

    config = parser.parse_args()

    dataset = TextDataset(
        config.txt_file,
        config.seq_length)  #'./part2/een_klein_heldendicht.txt', 10)

    # get a couple of sequance examples from batches
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # for step, (batch_inputs, batch_targets) in enumerate(data_loader):
    X_itarable = enumerate(data_loader)
    step, (X_transposed, y_transposed) = next(X_itarable)
    X_batch = torch.stack(X_transposed).t()
    Y_batch = torch.stack(y_transposed).t()

    # one-hot encode
    X = torch.zeros(len(X_batch), 30,
                    dataset.vocab_size).scatter_(2, X_batch.unsqueeze(2), 1)
Exemplo n.º 13
0
def sample(config):
    dataset = TextDataset(config.base_txt, config.seq_length)

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use
    model = torch.load(config.model_file, map_location=device)

    if config.samples_out_file != "STDOUT":
        samples_out_file = open(config.samples_out_file, 'w')

    if config.no_random != None:
        with torch.no_grad():
            codes = []

            for k in range(config.no_random):
                input_tensor = torch.zeros((1, 1, dataset.vocab_size),
                                           device=device)
                input_tensor[0, 0,
                             np.random.randint(0, dataset.vocab_size)] = 1

                for i in range(config.seq_length - 1):
                    response = model.step(input_tensor)
                    logits = F.log_softmax(config.temp * response, dim=1)
                    dist = torch.distributions.one_hot_categorical.OneHotCategorical(
                        logits=logits)
                    code = dist.sample().argmax().item()
                    input_tensor *= 0
                    input_tensor[0, 0, code] = 1
                    codes.append(code)

                string = dataset.convert_to_string(codes)
                model.reset_stepper()

                if config.samples_out_file != "STDOUT":
                    samples_out_file.write("Sample {}: ".format(k) + string +
                                           "\n")
                else:
                    print("Sample {}: ".format(k) + string)
                string = ''
                codes = []
    elif config.sentence != None:
        with torch.no_grad():
            codes = []
            for char in config.sentence:
                codes.append(dataset._char_to_ix[char])
            input_tensor = torch.zeros((1, len(codes), dataset.vocab_size),
                                       device=device)
            input_tensor[0, np.arange(0, len(codes), 1), codes] = 1

            chars_to_gen = config.seq_length - len(codes)

            for i in range(len(codes)):
                response = model.step(input_tensor[:, i, :].view(
                    1, 1, dataset.vocab_size))

            input_tensor = torch.zeros((1, 1, dataset.vocab_size),
                                       device=device)
            for i in range(chars_to_gen):
                logits = F.log_softmax(config.temp * response, dim=1)
                dist = torch.distributions.one_hot_categorical.OneHotCategorical(
                    logits=logits)
                code = dist.sample().argmax().item()
                input_tensor *= 0
                input_tensor[0, 0, code] = 1
                codes.append(code)
                response = model.step(input_tensor)

            string = dataset.convert_to_string(codes)
            model.reset_stepper()

            if config.samples_out_file != "STDOUT":
                samples_out_file.write(string + "\n")
            else:
                print(string)
    else:
        with torch.no_grad():
            codes = []
            beams = []
            for k in range(config.beam_width):
                beam_dict = {}
                beam_dict['hidden_state'] = None
                beam_dict['logit'] = -np.log(config.beam_width)
                beam_dict['seq_codes'] = [
                    np.random.randint(0, dataset.vocab_size)
                ]
                beams.append(beam_dict)

            input_tensor = torch.zeros((1, 1, dataset.vocab_size),
                                       device=device)

            import copy

            for i in range(config.seq_length):
                new_beams = []

                for element in beams:
                    input_tensor *= 0
                    input_tensor[0, 0, element['seq_codes'][-1]] = 1.0
                    response, hid = model.forward(input_tensor,
                                                  element['hidden_state'])
                    logits = F.log_softmax(config.temp * response, dim=2)
                    for code, logit in enumerate(logits[0, 0, :]):
                        new_dict = copy.deepcopy(element)
                        new_dict['hidden_state'] = hid
                        new_dict['seq_codes'].append(code)
                        new_dict['logit'] += logit.item()
                        new_beams.append(new_dict)
                new_beams.sort(reverse=True, key=lambda dic: dic['logit'])
                beams = new_beams[:config.beam_width]
            for beam in beams:
                string = dataset.convert_to_string(beam['seq_codes'])
                if config.samples_out_file != "STDOUT":
                    samples_out_file.write(string + "\n")
                else:
                    print(string)
Exemplo n.º 14
0
def train(config):
    print(config.train_steps)
    device = torch.device(config.device)

    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)


    model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers,
                                device)


    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)

    generated_text = []
    for epochs in range(10):
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            x = torch.stack(batch_inputs, dim=1).to(device)

            # one hot
            encodded_size = list(x.shape)
            encodded_size.append(dataset.vocab_size)
            one_hot = torch.zeros(encodded_size, device=x.device)
            one_hot.scatter_(2, x.unsqueeze(-1), 1)

            targets = torch.stack(batch_targets, dim=1).to(device)

            #######################################################
            predictions = model.forward(one_hot)
            loss = criterion(predictions.transpose(2, 1), targets)
            loss.backward()
            #######################################################

            optimizer.step()
            optimizer.zero_grad()

            loss = loss.item()

            size = targets.shape[0] * targets.shape[1]
            accuracy = torch.sum(predictions.argmax(dim=2) == targets).to(torch.float32) / size
            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)

            if step % config.print_every == 0:

                print("examples per sec " + str(examples_per_second)+" step "+str(step)+" accuracy "+str(accuracy.item()) +" loss "+str(loss))
                # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #       "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #         datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #         config.train_steps, config.batch_size, examples_per_second,
                #         accuracy, loss
                # ))

                # Generate some sentences by sampling from the model
                random_seed = torch.randint(low=0, high=dataset.vocab_size, size=(1, 1), dtype=torch.long, device=device)

                text_fifteen, text, temp_nine, temp_five, temp_one = generator(model=model, seed=random_seed, length=config.seq_length, dataset=dataset)

                generated_text.append(text_fifteen)
                generated_text.append(text)
                generated_text.append(temp_nine)
                generated_text.append(temp_five)
                generated_text.append(temp_one)

                print("temp 1.5: " + generated_text[-5])
                print("temp 1: " + generated_text[-4])
                print("temp 0.9: " + generated_text[-3])
                print("temp 0.5: " + generated_text[-2])
                print("temp 0.2: " + generated_text[-1])
                print("")

                file = open("generated.txt", "a")
                file.write("beta 1.5: " + generated_text[-5] + "\n")
                file.write("beta 1: " + generated_text[-4] + "\n")
                file.write("beta 0.9: " + generated_text[-3] + "\n")
                file.write("beta 0.5: " + generated_text[-2] + "\n")
                file.write("beta 0.2: " + generated_text[-1] + "\n")
                file.write("")
                file.close()

            if step == config.sample_every:
                # Generate some sentences by sampling from the model
                pass

            if step == 30000:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break


    print('Done training.')
Exemplo n.º 15
0
def train(config):
    """
    """

    # some additional vars
    learning_rate = config.learning_rate

    # TODO: Initialize the device which to run the model on
    device = 'cpu'
    device = torch.device(device)

    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(vocabulary_size=dataset.vocab_size, device='cpu', **config.__dict__)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

    # evaluation
    loss_list = list()
    accuracy_list = list()

    mean_loss_list = list()
    mean_accuracy_list = list()

    step = 0
    epoch = 0
    steps_total = 0

    text_greedy_generated = dict()
    text_random_generated = dict()

    while steps_total < config.train_steps:
        epoch += 1
        for step, (X_transposed, y_transposed) in enumerate(data_loader):
            steps_total = step * epoch
            # Only for time measurement of step through network
            t1 = time.time()

            X_batch = torch.stack(X_transposed).t()
            Y_batch = torch.stack(y_transposed).t()

            X = X_batch.to(device)
            y = Y_batch.to(device)

            X = torch.zeros(len(X), config.seq_length, dataset.vocab_size).scatter_(2, X.unsqueeze(2), 1)

            optimizer.zero_grad()
            outputs = model.forward(X).type(dtype)

            # Add more code here ...
            loss_current = criterion(outputs.transpose(2, 1), y)
            loss_current.backward(retain_graph=True)
            optimizer.step()

            # evaluation
            loss = loss_current.detach().item()
            accuracy = (outputs.argmax(dim=2) == y.long()).sum().float() / (float(y.shape[0]) * float(y.shape[1]))

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)

            loss_list.append(loss)
            accuracy_list.append(accuracy)

            if step % config.print_every == 0:

                mean_loss_list.append(np.mean(loss_list[-50:]))
                mean_accuracy_list.append(np.mean(accuracy_list[-50:]))

                print("[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), steps_total,
                        config.train_steps, config.batch_size, examples_per_second,
                        accuracy, loss
                ))

                # Text generation
                if step % config.sample_every == 0:
                    # Generate some sentences by sampling from the model
                    text_greedy, text_random = text_generator(model, config.seq_length, 0.2, dataset, device)
                    text_greedy_generated[len(mean_accuracy_list)] = text_greedy
                    text_random_generated[len(mean_accuracy_list)] = text_random
                    print(text_greedy, len(text_greedy))
                    print(text_random, len(text_random))

                # if step == config.train_steps:
                #     # If you receive a PyTorch data-loader error, check this bug report:
                #     # https://github.com/pytorch/pytorch/pull/9655
                    if step > config.train_steps:
                        break

    print('Done training.')
    return mean_loss_list, mean_accuracy_list, text_greedy_generated, text_random_generated