示例#1
0
def train(config):

    if config.tensorboard:
        writer = SummaryWriter(config.summary +
                               datetime.now().strftime("%Y%m%d-%H%M%S"))
    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=config.device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)
    for epoch in range(config.epochs):
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            optimizer.zero_grad()
            # Set to float LongTensor output dtype of one_hot produces internal error for forward
            batch_inputs = torch.nn.functional.one_hot(
                batch_inputs,
                num_classes=dataset.vocab_size).float().to(device)

            batch_targets = batch_targets.to(device)
            out, _ = model.forward(batch_inputs)

            #Expected size 64 x 87 x 30 got 64 x 30 x 87 to compute with 64 x 30
            loss = criterion(out.permute(0, 2, 1), batch_targets)
            loss.backward()

            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)
            optimizer.step()

            predictions = out.argmax(dim=-1)
            accuracy = (predictions == batch_targets).float().mean()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print(
                    "[{}] Train Step {:04d}/{:04d}, Epoch {:d} Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        int(config.train_steps), epoch, config.batch_size,
                        examples_per_second, accuracy, loss))
                if config.tensorboard:
                    writer.add_scalar('training_loss', loss, step)
                    writer.add_scalar('accuracy', accuracy, step)

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                # print(f'shape state {state[1].shape}')
                # sys.exit(0)
                generate_sentence(step, model, config, dataset)
                # pass

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                print('Done training.')
                break
def train(config):

    # Initialize the device which to run the model on
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    #path to save the model
    path = "results/"

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    # print("Data file:", dataset._data[0:5])
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset,
                                config.lstm_num_hidden, config.lstm_num_layers,
                                device)
    # model = torch.load("results/book_EN_grimms_fairy_tails_final_model.pt")

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    # Store Accuracy and losses:
    results = {'accuracy': [], 'loss': []}

    # Training:
    total_steps = 0
    while total_steps <= config.train_steps:

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()
            optimizer.zero_grad()

            # Stacking and One-hot encoding:
            batch_inputs = torch.stack(batch_inputs, dim=1).to(device)
            batch_targets = torch.stack(batch_targets, dim=1).to(device)
            # print("Inputs and targets:", x_onehot.size(), batch_targets.size())

            # forward inputs to the model:
            pred_targets, _ = model.forward(
                index_to_onehot(batch_inputs, dataset.vocab_size))
            # print("pred_targets trans shape:", pred_targets.transpose(2,1).size())
            loss = criterion(pred_targets.transpose(2, 1), batch_targets)

            #Backward pass
            loss.backward(retain_graph=True)
            optimizer.step()

            #Accuracy
            # argmax along the vocab dimension
            accuracy = (pred_targets.argmax(
                dim=2) == batch_targets).float().mean().item()

            #Update the accuracy and losses for visualization:
            results['accuracy'].append(accuracy)
            results['loss'].append(loss.item())

            # Just for time measurement
            t2 = time.time()
            # examples_per_second = config.batch_size/float(t2-t1)
            total_steps += 1

            if step % config.print_every == 0:

                # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #       "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #         datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #         config.train_steps, config.batch_size, examples_per_second,
                #         accuracy, loss
                # ))
                print("[{}] Train Step {:07d}/{:07d}, Batch Size = {}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                          total_steps, config.batch_size,
                          results['accuracy'][-1], results['loss'][-1]))

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                print('GENERATED NO TEMP:')
                print(model.generate_sentence(100))
                print('__________________')
                print('GENERATED 0.5 TEMP:')
                print(model.generate_sentence(100, 0.5))
                print('__________________')
                print('GENERATED 1 TEMP:')
                print(model.generate_sentence(100, 1))
                print('__________________')
                print('GENERATED 2 TEMP:')
                print(model.generate_sentence(100, 2))
                # save model for individual timesteps
                torch.save(
                    model, path + config.txt_file.split('/')[1].split('.')[0] +
                    str(step) + "_model.pt")

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

        print('Done training.')
        #Save the final model

        torch.save(
            model, path + config.txt_file.split('/')[1].split('.')[0] +
            "_final_model.pt")
        print("saving results in folder...")
        np.save(path + "loss_train", results['loss'])
        np.save(path + "accuracy_train", results['accuracy'])
示例#3
0
def train(config):
    # empty file to write generated text to
    with open('generated.txt', 'w'): pass

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, \
                                lstm_num_hidden=config.lstm_num_hidden, device=device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = torch.stack(batch_inputs)
        embedding = one_hot(batch_inputs, dataset._vocab_size)

        batch_targets = torch.stack(batch_targets)

        h_0 = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden)
        c_0 = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden)
        output = model.forward(embedding, h_0, c_0)
        optimizer.zero_grad()

        losses, accuracies = [], []
        for i, out in enumerate(output):
            label = batch_targets[i,:]

            loss = criterion(out, label)
            losses.append(loss)

            accuracy = (torch.max(out, 1)[1] == label).float().mean()
            accuracies.append(accuracy)

        loss = sum(losses) / len(losses)
        accuracy = sum(accuracies) / len(accuracies)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
        optimizer.step()


        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % config.print_every == 0:

            train_step = float(step) / float(config.train_steps)
            print("[{}] Train Step {:.0f}, Batch Size = {}, Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), train_step*1000000, config.batch_size, examples_per_second,
                    accuracy, loss
            ))

        if step % config.sample_every == 0:
            sample(dataset, model)
            #sample2(dataset, model)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
示例#4
0
    offset = 2380
    temperature = 1
    policy = 'greedy'
    for e in range(epoch):
        #torch.save(model.state_dict(), str(e+1) + 'model.pt')
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            #lr_optim.step()
            optimizer.zero_grad()
            t1 = time.time()
            inputs = torch.stack([*batch_inputs], dim=1)
            targets = torch.stack([*batch_targets], dim=1)
            inputs = inputs.to(device)
            targets = targets.to(device)
            out = model.forward(inputs)[0]
            out = out.permute(0, 2, 1)
            loss = criterion(out, targets)
            accuracy = acc(out, targets)

            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
            loss.backward()
            optimizer.step()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)

            if step % config.print_every == 0:

                print('accuracy, loss, step: \n',
示例#5
0
temp_list = [0.5, 1., 2.]
policy_list = ['greedy', 'temp']
seq_length = 111
alice_string = list('Alice')

# Generate some sentences by sampling from the model
for policy in policy_list:
    for temperature in temp_list:
        char_list = []
        hidden = None
        for alice in alice_string:
            idx = dataset.convert_to_idx(alice)
            char_list.append(idx)
            generator = torch.tensor([idx]).unsqueeze(-1)
            generator = generator.to(device)
            generator, hidden = model.forward(generator, hidden)

        for _ in range(seq_length):
            if policy == 'greedy':
                idx = torch.argmax(generator).item()
            else:
                temp = generator.squeeze() / temperature
                soft = torch.softmax(temp, dim=0)
                idx = torch.multinomial(soft, 1).item()
            generator = torch.tensor([idx]).unsqueeze(-1)
            generator = generator.to(device)
            generator, hidden = model.forward(generator, hidden)
            char_list.append(idx)
        char = dataset.convert_to_string(char_list)
        with open("BonusTemp" + str(int(np.floor(temperature))) + "Book.txt", "w+") as text_file:
            print(policy + ': ', temperature, '\n Output: ', char, file=text_file)
示例#6
0
def train(config):

    # Initialize the device which to run the model on
    use_cuda = torch.cuda.is_available()
    if use_cuda:
        print("cuda")
        device = torch.device('cuda:0')
    else:
        print("no cuda")
        device = torch.device('cpu')

    # Text generation options
    generate_text = True
    generated_text_size = 1500
    fixed_output_samples = False
    fixed_random_samples = True

    #    device = torch.device(device)
    dtype = torch.cuda.LongTensor if use_cuda else torch.LongTensor
    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    if config.load_model == "none":
        model = TextGenerationModel(config.batch_size, config.seq_length,
                                    dataset.vocab_size).to(device)  # fixme
        print(model)
    else:
        print("load model")
        model = TextGenerationModel(config.batch_size, config.seq_length,
                                    dataset.vocab_size).to(device)

        if use_cuda:
            model.load_state_dict(torch.load("model.pt"))
        else:
            trained_model = torch.load(
                "model.pt", map_location=lambda storage, loc: storage)
            model.load_state_dict(trained_model)
        print(model)
    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = optim.RMSprop(model.parameters(),
                              config.learning_rate)  # fixme
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        # Add more code here ...
        #######################################################

        model_output = model.forward(batch_inputs, use_cuda, config.temp)

        out_max = torch.argmax(model_output, dim=2)

        batch_targets = torch.stack(batch_targets)

        optimizer.zero_grad()

        accuracy = 0.0

        model_output = model_output.view(-1, model_output.shape[2])

        batch_targets = batch_targets.view(-1).type(
            torch.LongTensor).type(dtype)

        loss = criterion(model_output, batch_targets)

        accuracy = accuracy_(model_output, batch_targets)

        loss.backward()

        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.print_every == 0:

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    int(config.train_steps), config.batch_size,
                    examples_per_second, accuracy, loss))

        text = torch.stack(batch_inputs)
        sentece1 = text[:, 0].view(text[:, 0].shape[0], 1)
        #        print(dataset.convert_to_string(sentece1))

        if step % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            text = torch.stack(batch_inputs)
            if generate_text:

                sentece = text[:, 0].view(text[:, 0].shape[0], 1)
                text = model.create_text(sentece, generated_text_size,
                                         use_cuda, config.temp)
                print("Generated Text : ", dataset.convert_to_string(text),
                      " : end")

            if fixed_output_samples:

                sentece1 = text[:, 0].view(text[:, 0].shape[0], 1)
                gen_sentence1 = out_max[:, 0].view(out_max[:, 0].shape[0], 1)
                print("Original Text : ", dataset.convert_to_string(sentece1),
                      " Generated Text : ",
                      dataset.convert_to_string(gen_sentence1))
                sentece2 = text[:, 1].view(text[:, 1].shape[0], 1)
                gen_sentence2 = out_max[:, 1].view(out_max[:, 1].shape[0], 1)
                print("Original Text : ", dataset.convert_to_string(sentece2),
                      " Generated Text : ",
                      dataset.convert_to_string(gen_sentence2))
                sentece3 = text[:, 2].view(text[:, 2].shape[0], 1)
                gen_sentence3 = out_max[:, 2].view(out_max[:, 2].shape[0], 1)
                print("Original Text : ", dataset.convert_to_string(sentece3),
                      " Generated Text : ",
                      dataset.convert_to_string(gen_sentence3))

            if fixed_random_samples:

                text = model.random_sampling(config.seq_length, use_cuda,
                                             config.temp)
                print("Generated Text : ", dataset.convert_to_string(text),
                      " : end")

            print("Saving model...")
            torch.save(model.state_dict(), "model.pt")

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
示例#7
0
文件: train.py 项目: frank/dl
def train():
    # Torch settings
    device = torch.device(config.device)
    if device == 'cpu':
        torch.set_default_tensor_type(torch.FloatTensor)
    elif device == 'cuda:0':
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    dtype = torch.float

    # Tensorboard summary writer
    if config.tensorboard:
        run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_"
                                         + config.model_type.lower()
                                         + '_' + str(config.input_length))
        log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id
        writer = SummaryWriter(log_dir=log_dir)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Model parameters
    lr = config.learning_rate
    lr_decay = config.learning_rate_decay
    lr_step = config.learning_rate_step
    dropout = 1.0 - config.dropout_keep_prob
    temp = [0.5, 1., 2.]
    assert config.sample_num % 3 == 0

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                dataset.vocab_size,
                                dropout,
                                device).to(device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Characters used to start sentences (closing characters such as ')', '.' or others were removed)
    start_characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9',
                        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
                        'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                        'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
                        'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
                        'w', 'x', 'y', 'z',
                        '(', '[', '*', '-', '‘', '“']
    start_characters = list(set(start_characters) & set(dataset.vocab))

    # Store all generated sentences
    sentences = {}

    # Load model, if there's any model to load
    model, optimizer, sentences, start_step = load_model(model, optimizer, sentences, step=0)

    try:
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # If the model has been loaded, regulate step number accordingly
            step += start_step

            # Only for time measurement of step through network
            t1 = time.time()

            # Get batches as tensors of size (batch_size x seq_length)
            batch_inputs = torch.stack(batch_inputs).permute((1, 0))
            batch_targets = torch.stack(batch_targets).permute((1, 0)).to(device)

            # Convert batches to one-hot representation (batch_size x seq_length x vocab_size)
            batch_inputs = get_one_hot(batch_inputs,
                                       config.batch_size,
                                       config.seq_length,
                                       dataset.vocab_size).to(device)

            # Forward pass
            model.train()
            optimizer.zero_grad()
            predictions = model.forward(batch_inputs)

            # Compute loss
            loss = criterion(predictions.permute(0, 2, 1), batch_targets)

            # Backward pass
            loss.backward()

            # Clipping gradients to avoid exploding gradient problem
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)

            # Update weights
            optimizer.step()

            # Compute accuracy
            accuracy = get_accuracy(predictions, batch_targets)

            # Add accuracy and loss to the writer
            if config.tensorboard:
                writer.add_scalars('Accuracy_and_Loss', {'accuracy': accuracy, 'loss': loss}, step)
                writer.add_scalar('Learning_Rate', lr, step)

            # Update learning rate
            if (step % lr_step == 0) and step != 0:
                lr *= lr_decay
                for group in optimizer.param_groups:
                    group['lr'] = lr

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:
                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                                                                step,
                                                                int(config.train_steps),
                                                                config.batch_size,
                                                                examples_per_second,
                                                                accuracy,
                                                                loss))

            if step % config.sample_every == 0:
                model.eval()

                # Store sentences for this step
                step_sentences = {temp[0]: [], temp[1]: [], temp[2]: []}

                # Get 6 random starter characters
                sample = random.sample(start_characters, config.sample_num)

                print()
                for idx, c in enumerate(sample):
                    # Temperature parameter
                    t = temp[int(idx / 2)]

                    # Character's one-hot representation
                    c_oh = torch.tensor(dataset.convert_to_one_hot(c), dtype=dtype).to(device)

                    # Returns a sentence of indexes and length 30
                    sentence = dataset.convert_to_string(model.generate(c_oh, t))
                    print("[t={:.1f}] {}".format(t, sentence.replace('\n', '\\n ')))
                    step_sentences[t].append(sentence)
                print()
                sentences[step] = step_sentences

            if (step % config.save_every == 0) and step != 0:
                save_model(model, optimizer, sentences, step)

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

        if config.tensorboard:
            writer.close()

        print('Done training.')

    except (KeyboardInterrupt, BrokenPipeError):
        if config.tensorboard:
            writer.close()
        print("\n" + random.choice(quit_msgs))
示例#8
0
def train(config):
    # determine the filename (to be used for saving results, checkpoints, models, etc.)
    filename = Path(config.txt_file).stem

    # Initialize the device which to run the model on
    if config.device == 'cuda':
        if torch.cuda.is_available():
            device = torch.device(config.device)
        else:
            device = torch.device('cpu')
    else:
        device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(
        filename=config.txt_file,
        seq_length=config.seq_length
    )
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # get the vocabulary size and int2char and char2int dictionaries for use later
    VOCAB_SIZE = dataset.vocab_size

    # Initialize the model that we are going to use
    model = TextGenerationModel(
        batch_size=config.batch_size,
        seq_length=config.seq_length,
        vocabulary_size=VOCAB_SIZE,
        lstm_num_hidden=config.lstm_num_hidden,
        lstm_num_layers=config.lstm_num_layers,
        device=device,
        batch_first=config.batch_first,
        dropout=1.0-config.dropout_keep_prob
    )

    # Setup the loss and optimizer and learning rate scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        model.parameters(),
        config.learning_rate
    )

    # Load the latest checkpoint, if any exist
    checkpoints = list(CHECKPOINTS_DIR.glob(f'{model.__class__.__name__}_{filename}_checkpoint_*.pt'))
    if len(checkpoints) > 0:
        # load the latest checkpoint
        checkpoints.sort(key=os.path.getctime)
        latest_checkpoint_path = checkpoints[-1]
        start_step, results, sequences = load_checkpoint(latest_checkpoint_path, model, optimizer)
    else:
         # initialize the epoch, results and best_accuracy
        start_step = 0
        results = {
            'step': [],
            'accuracy': [],
            'loss': [],
        }
        sequences = {
            'step': [],
            't': [],
            'temperature': [],
            'sequence': []
        }

    for step in range(start_step, int(config.train_steps)):
        # reinitialize the data_loader iterater if we have iterated over all available mini-batches
        if step % len(data_loader) == 0 or step == start_step:
            data_iter = iter(data_loader)
        
        # get the mini-batch
        batch_inputs, batch_targets = next(data_iter)

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        # Add more code here ...
        #######################################################

        # put the model in training mode
        model.train()

        # convert the data and send to device
        X = torch.stack(batch_inputs, dim=1)
        X = X.to(device)

        Y = torch.stack(batch_targets, dim=1)
        Y = Y.to(device)

        # forward pass the mini-batch
        Y_out, _ = model.forward(X)
        Y_pred = Y_out.argmax(dim=-1)

        # (re)set the optimizer gradient to 0
        optimizer.zero_grad()

        # compute the accuracy and the loss
        accuracy = get_accuracy(Y_pred, Y)
        loss = criterion.forward(Y_out.transpose(2, 1), Y)

        # backwards propogate the loss
        loss.backward()

        # clip the gradients (to preven them from exploding)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)

        # tune the model parameters
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % config.print_every == 0:
            print(f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}], Train Step {step:04d}/{int(config.train_steps):04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}')

            # append the accuracy and loss to the results
            results['step'].append(step)
            results['accuracy'].append(accuracy.item())
            results['loss'].append(loss.item())

        if step % config.sample_every == 0:
            for T in [20, 30, 60, 120]:
                for temperature in [0.0, 0.5, 1.0, 2.0]:
                    # Generate some sentences by sampling from the model
                    sequence = sample_sequence(
                        model=model,
                        vocab_size=VOCAB_SIZE,
                        T=T,
                        char=None,
                        temperature=temperature,
                        device=device
                    )
                    sequence_str = dataset.convert_to_string(sequence)
                    print(f'Generated sample sequence (T={T}, temp={temperature}): {sequence_str}')

                    # append the generated sequence to the sequences
                    sequences['step'].append(step)
                    sequences['t'].append(T)
                    sequences['temperature'].append(temperature)
                    sequences['sequence'].append(sequence_str)

        if step % config.checkpoint_every == 0:
            # create a checkpoint
            create_checkpoint(CHECKPOINTS_DIR, filename, step, model, optimizer, results, sequences)

            # save the results
            save_results(RESULTS_DIR, filename, results, sequences, model)

            # save the model
            save_model(MODELS_DIR, filename, model)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
示例#9
0
def train(config):

    # Initialize the device which to run the model on
    config.device = 'cuda'
    device = torch.device(config.device)

    # Initialize the model that we are going to use


    dataset = TextDataset(config.txt_file, config.seq_length)

    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    vocab_size = dataset.vocab_size
    config.vocab_size = vocab_size

    model = TextGenerationModel(config.batch_size, config.seq_length, vocab_size,
                 config.lstm_num_hidden, config.lstm_num_layers, config.device)
    model = model.to(device)
    # Initialize the dataset and data loader (note the +1)


    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    # criterion = nn.NLLLoss()


    # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    writer = SummaryWriter(comment=config.txt_file)
    writer_iteration = 0

    for epoch in range(50):
        print("\n\n\n EPOCH: {}".format(epoch))
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################


            # print(batch_inputs)
            # print(asdasd)
            batch_inputs = torch.stack(batch_inputs).to(device)
            # print(batch_inputs.shape)
            # batch_inputs = F.one_hot(batch_inputs, vocab_size)

            one_hot = torch.FloatTensor(batch_inputs.size(0),
                                             batch_inputs.size(1),
                                             vocab_size).zero_().to(config.device)
            one_hot.scatter_(2, batch_inputs.unsqueeze(-1), 1)

            # make batch first dim
            batch_targets = torch.stack(batch_targets, dim = 1).to(device)

            out, _ = model.forward(one_hot)

            # The data is (sequence,batch,one-hot) (30, 64, 87)
            # but criterion gets angry, you can keep the batch targets as index
            # but the input must be the shape (sequence, one-hot, batch)?

            # all these errors yelling at me
            # print(out.transpose(2,1).shape, batch_targets.shape)
            # print(asdasd)
            loss = criterion(out.transpose(2,1), batch_targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)
            if step % config.print_every == 0:

                compare = (out.argmax(2) == batch_targets)
                summed = compare.sum().item()
                accuracy = summed/compare.numel()

                writer.add_scalar('loss', loss, writer_iteration)
                writer.add_scalar('accuracy', accuracy, writer_iteration)
                writer_iteration +=1

                print("[{}] Train Step {:04d}/{:d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), int(step),
                        int(config.train_steps), config.batch_size, examples_per_second,
                        accuracy, loss
                ))

            if step % config.sample_every == 0:
                # sleeping_beauty(dataset, config, model)
                random_int_sentence(dataset, config, model)


            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    torch.save(model, config.txt_file.strip('.txt') + ".pt")
    print('Done training.')
示例#10
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=device)

    # Setup the loss and optimizer
    criterion = CrossEntropyLoss()
    optimizer = RMSprop(model.parameters(), lr=config.learning_rate)

    realsteps = 0
    for epoch in range(1000):
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):
            realsteps += 1
            step = realsteps
            t1 = time.time()

            batch_targets = torch.stack(batch_targets)
            batch_targets.to(device)
            optimizer.zero_grad()
            print(len(batch_inputs), len(batch_inputs[0]))
            if (len(batch_inputs[0]) < 64):
                continue
            probs = model.forward(batch_inputs)

            loss = 0
            accuracy = 0
            for prob, target in zip(probs, batch_targets):
                # prediction = torch.argmax(prob, dim=1).float()
                loss += criterion.forward(prob, target)
                predictions = prob.argmax(dim=1).float()
                accuracy += float(torch.sum(
                    predictions == target.float())) / config.batch_size
            loss = loss / config.seq_length
            loss.backward()
            writer.add_scalar('Train/Loss', loss, realsteps)
            writer.add_scalar('Train/Accurac3y', accuracy, realsteps)
            optimizer.step()
            accuracy = accuracy / config.seq_length

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % 10000 == 0:
                torch.save(model, './' + str(step))
            if step % config.print_every == 0:

                print(
                    "[{}] Train Step {:04d}/{:04f}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        config.train_steps, config.batch_size,
                        examples_per_second, accuracy, loss))

            # if step % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            # greedy_sampling_model(model, dataset)
            if realsteps > config.train_steps:
                break

        if realsteps > config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
示例#11
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    dataset = TextDataset(filename=config.txt_file,
                          seq_length=config.seq_length)

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=config.device).to(
                                    config.device)  # fixme

    # Initialize the dataset and data loader (note the +1)

    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=config.learning_rate)  # fixme

    # if the number of required steps exceed the size of the data, then more than one epoch required and I need the outer loop for it
    steps_in_epoch = int(dataset.__len__() / config.batch_size) + 1
    epochs = int(config.train_steps / steps_in_epoch) + 1
    print('EPOCHS ', epochs)
    print('STEPS IN EPOCH ', steps_in_epoch)
    print('TOTAL NUMBER OF STEPS  ', config.train_steps)
    #print('MAX POSSIBLE NUMBER OF STEPS  ', dataset.__len__(), '  TOTAL NUMBER OF STEPS  ', config.train_steps)

    #save_model and save_model1 are lists with the number of steps for which I save the model
    save_model = [int(h * 0.2 * config.train_steps) for h in range(5)]
    save_model1 = [100, 500, 1500]
    accuracy_dict = {}
    loss_dict = {}

    for j in range(epochs):
        print('EPOCH ', j)

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################

            batch_targets = torch.stack(batch_targets).to(config.device)
            y_pred = model.forward(batch_inputs).transpose(0, 2)

            optimizer.zero_grad()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           max_norm=config.max_norm)
            batch_targets = batch_targets.transpose(0, 1)
            loss = criterion(y_pred, batch_targets)
            loss.backward()
            optimizer.step()
            accuracy = acc(y_pred, batch_targets)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if int(step + j * steps_in_epoch) % config.print_every == 0:

                accuracy_dict[int(step + j * steps_in_epoch)] = accuracy
                loss_dict[int(step + j * steps_in_epoch)] = float(loss)

                print(
                    "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"),
                        int(step + j * steps_in_epoch),
                        int(config.train_steps), config.batch_size,
                        examples_per_second, accuracy, loss))

            if step == config.sample_every:
                # Generate some sentences by sampling from the model
                pass

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

            if int(step + j * steps_in_epoch) in save_model:
                name_model = 'model_' + str(
                    int(step + j * steps_in_epoch)) + '.pickle'
                torch.save(model.state_dict(), name_model)

            if int(step + j * steps_in_epoch) in save_model1:
                name_model = 'model_' + str(
                    int(step + j * steps_in_epoch)) + '.pickle'
                torch.save(model.state_dict(), name_model)

    torch.save(model.state_dict(), 'model_final.pickle')
    f1 = open("accuracy.txt", "w")
    f1.write(str(accuracy_dict))
    f1.close()

    f2 = open("loss.txt", "w")
    f2.write(str(loss_dict))
    f2.close()

    print('Done training.')
示例#12
0
def train(config):

    if torch.cuda.is_available():
        dev = "cuda:0"
    else:
        dev = "cpu"
    # Initialize the device which to run the model on
    device = torch.device(dev)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers,
                                device).to(device)  # fixme

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=config.learning_rate,
        weight_decay=config.learning_rate_decay)  # fixme

    # added for vscode debug functionality
    multiprocessing.set_start_method('spawn', True)

    total_steps = 0

    while config.train_steps > total_steps:

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            total_steps += 1

            if total_steps > config.train_steps: break

            batch_inputs = batch_inputs.to(device)
            batch_targets = batch_targets.to(device)
            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            batch_inputs = torch.nn.functional.one_hot(batch_inputs,
                                                       dataset.vocab_size)
            optimizer.zero_grad()
            output = model.forward(batch_inputs)

            loss = 0.0
            for i in range(len(output[0])):
                pred = output[:, i, :]
                target = batch_targets[:, i]
                loss += criterion.forward(pred, target) / len(output[0])

            loss.backward()

            optimizer.step()
            with torch.no_grad():
                accuracy = 0.0

                total_size = 0
                correct = 0

                for i in range(len(output[0])):
                    pred = torch.nn.functional.softmax(output[:, i, :], dim=1)
                    pred = torch.max(pred, 1)[1]

                    correct += pred.eq(batch_targets[:, i]).sum().item()
                    total_size += len(pred)

                accuracy = correct / total_size

                # Just for time measurement
                t2 = time.time()
                examples_per_second = config.batch_size / float(t2 - t1)

                if total_steps % config.print_every == 0:

                    print(
                        "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                        "Accuracy = {:.2f}, Loss = {:.3f}".format(
                            datetime.now().strftime("%Y-%m-%d %H:%M"),
                            total_steps, int(config.train_steps),
                            config.batch_size, examples_per_second, accuracy,
                            loss))

                if total_steps % config.sample_every == -1232342342152345236526:
                    # Generate some sentences by sampling from the model
                    text = torch.zeros(
                        (1, 1)).long().random_(0,
                                               dataset.vocab_size).to(device)
                    text = torch.nn.functional.one_hot(text,
                                                       dataset.vocab_size)
                    for i in range(config.seq_length - 1):
                        prediction = model.forward(text)
                        pred = torch.nn.functional.softmax(prediction[:, i, :],
                                                           dim=1)
                        pred = torch.max(pred, 1)[1]
                        pred = torch.nn.functional.one_hot(
                            pred, dataset.vocab_size)
                        pred = pred.unsqueeze(0)
                        text = torch.cat((text, pred), 1)
                        stuff = torch.argmax(text[0], 1)
                        sentence = dataset.convert_to_string(stuff.tolist())
                    print(sentence)

                if total_steps == config.train_steps:
                    # If you receive a PyTorch data-loader error, check this bug report:
                    # https://github.com/pytorch/pytorch/pull/9655
                    break

    print('Done training.')
示例#13
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the model that we are going to use

    dataset = TextDataset(config.txt_file, config.seq_length)
    torch.save(dataset, config.txt_file + '.dataset')

    model = TextGenerationModel(dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, config.device,
                                1. - config.dropout_keep_prob)

    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=config.learning_rate_step,
        gamma=config.learning_rate_decay)
    accuracies = [0, 1]
    losses = [0, 1]

    for step in range(int(config.train_steps)):
        if step % len(data_loader) == 0:
            data_iter = iter(data_loader)
        batch_inputs, batch_targets = next(data_iter)

        # Only for time measurement of step through network
        t1 = time.time()

        device_inputs = torch.stack(batch_inputs, dim=0).to(device)
        device_targets = torch.stack(batch_targets, dim=1).to(device)

        out, _ = model.forward(device_inputs)
        outt = out.transpose(0, 1).transpose(1, 2)
        optimizer.zero_grad()
        loss = criterion.forward(outt, device_targets)
        losses.append(loss.item())
        accuracy = (outt.argmax(dim=1) == device_targets).float().mean()
        accuracies.append(accuracy)

        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.print_every == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}, LR = {}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    int(config.train_steps), config.batch_size,
                    examples_per_second, accuracies[-1], losses[-1],
                    optimizer.param_groups[-1]['lr']))

        if step % config.sample_every == 0:
            torch.save(model, config.txt_file + '.model')
            with torch.no_grad(), open(config.txt_file + '.generated',
                                       'a') as fp:
                for length, temp in product([20, 30, 50, 120],
                                            [0, 0.5, 1.0, 2.0]):
                    text = seq_sampling(model, dataset, length, temp, device)
                    fp.write("{};{};{};{}\n".format(step, length, temp, text))

    print('Done training.')
示例#14
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file,
                          config.seq_length)  # should we do +1??
    torch.save(dataset, config.save_dataset)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers,
                                1 - config.dropout_keep_prob, device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    losses = []
    accuracies = []

    # run through the dataset several times till u reach max_steps
    step = 0
    while step < config.train_steps:
        for (batch_inputs, batch_targets) in data_loader:
            step += 1
            # Only for time measurement of step through network
            t1 = time.time()

            batch_inputs = torch.stack(batch_inputs).to(device)
            batch_targets = torch.stack(batch_targets, dim=1).to(
                device)  #dim=1 to avoid transposing

            batch_predictions, (_, _) = model.forward(batch_inputs)
            batch_predictions = batch_predictions.permute(1, 2, 0)
            loss = criterion(batch_predictions, batch_targets)
            losses.append(loss.item())
            model.zero_grad()  # should we do this??
            loss.backward()

            torch.nn.utils.clip_grad_norm(
                model.parameters(),
                max_norm=config.max_norm)  # prevents maximum gradient problem

            optimizer.step()

            accuracy = accuracy_(batch_predictions, batch_targets)
            accuracies.append(accuracy)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print(
                    "[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), int(step),
                        int(config.train_steps), config.batch_size,
                        examples_per_second, accuracy, loss))

            if step % config.sample_every == 0:

                for temperature in [0]:
                    for length in [30, 60, 90, 120]:
                        sentence = generate_sentence(model, dataset,
                                                     temperature, length,
                                                     device)
                        with open(config.save_generated_text,
                                  'a',
                                  encoding='utf-8') as file:
                            file.write("{};{};{};{}\n".format(
                                step, temperature, length, sentence))

            if step % config.save_every == 0:
                torch.save(model.state_dict(), config.save_model)

            if step == config.train_steps:
                # save only the model parameters
                torch.save(model.state_dict(), config.save_model)
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    # revive the model
    # model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size(),
    #                                 config.lstm_num_hidden, config.lstm_num_layers, device)
    # model.load_state_dict(torch.load(config.save_model))

    print('Done training.')
示例#15
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                dataset.vocab_size,
                                config.dropout_prob,
                                config.lstm_num_hidden,
                                config.lstm_num_layers,
                                device=device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss(
    )  ############################################################################################################
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    model.to(device)

    train_loss = []
    train_acc = []
    t_loss = []
    t_acc = []
    texts = []

    #Convergence condition
    eps = 1e-6

    for epoch in range(20):

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Clear stored gradient
            model.zero_grad()

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...

            # #Convert list of tensors into one tensor for inputs and labels
            # x = torch.stack(batch_inputs).to(device)
            # y = torch.stack(batch_targets).to(device)
            #
            # print(x.shape)

            x = (batch_inputs.to(device)).t()  #############################
            y = (batch_targets.to(device)).t()

            # print(x.shape)

            #Convert input to one-hot vectors
            x = idx_2_onehot(
                x, dataset.vocab_size
            )  #x = (sentence length, batch_size, one_hot vec(char))

            #Forward pass
            pred, _ = model.forward(
                x)  #pred = (sentence length, score of each char ,batch_size)

            print(pred.shape)

            loss = criterion(pred, y)
            train_loss.append(loss.item())
            optimizer.zero_grad()

            #Backward pass
            loss.backward()
            optimizer.step()

            accuracy = get_accuracy(pred, y, config.batch_size,
                                    config.seq_length)
            train_acc.append(accuracy.item())

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            # if step % config.print_every == 0:

            # print("[{}] Train Step {:04}/{:04}, Batch Size = {}, Examples/Sec = {:.2f}, "
            #       "Accuracy = {:.2f}, Loss = {:.3f}".format(
            #         datetime.now().strftime("%Y-%m-%d %H:%M"), step,
            #         config.train_steps, config.batch_size, examples_per_second,
            #         accuracy, loss
            # ))

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                #get text in int format
                text = text_gen(model,
                                config.seq_length,
                                dataset.vocab_size,
                                temperature=None)
                #convert text to string
                text = dataset.convert_to_string(text)
                print(
                    '\nEpoch ', epoch + 1, '/ 20, Training Step ', step, '/',
                    int(config.train_steps), ', Training Accuracy = ',
                    accuracy.item(), ', Training Loss = ', loss.item(),
                    '\n-----------------------------------------------\nGenerated text: ',
                    text)

                #Get loss and accuracy averages over 100 steps
                t_loss.append(np.mean(train_loss))
                t_acc.append(np.mean(train_acc))
                train_loss = []
                train_acc = []
                texts.append(text)

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

            if epoch != 0:
                if step == 0:
                    #save current model at the start of every epoch
                    torch.save(model, "epoch_" + str(epoch - 1) + "_model")

                    #save current train accuracy, loss and text
                    np.save("epoch_" + str(epoch + 1) + "_accuracy", t_acc)
                    np.save("epoch_" + str(epoch + 1) + "_loss", t_loss)
                    np.save("epoch_" + str(epoch + 1) + "_texts", texts)

        if step > 0 and abs(t_loss[-1] - t_loss[-2]) < eps:
            break

    print('Done training.')

    #save final model
    torch.save(model, "final_model")
示例#16
0
def train(config):
    def acc(predictions, targets):
        hotvec = predictions.argmax(-2) == targets
        accuracy = torch.mean(hotvec.float())
        return accuracy

    # Initialize the device which to run the model on
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=0)
    print('batch', config.batch_size)

    vocabulary_size = dataset.vocab_size
    print('vocab', vocabulary_size)
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                vocabulary_size=vocabulary_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                dropout=1 - config.dropout_keep_prob,
                                device=device)
    model = model.to(device)
    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.learning_rate,
                                 weight_decay=1e-5)
    gamma = 1 - config.learning_rate_decay
    lr_optim = torch.optim.lr_scheduler.StepLR(optimizer,
                                               config.learning_rate_step,
                                               gamma=gamma,
                                               last_epoch=-1)
    print('Hi')
    acc_list = []
    loss_list = []
    step_list = []
    text_list = []
    epoch = 100
    offset = 2380
    temperature = 1
    policy = 'greedy'
    for e in range(epoch):
        torch.save(model.state_dict(), str(e + 1) + 'tunedmodel.pt')
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            lr_optim.step()
            optimizer.zero_grad()
            t1 = time.time()
            inputs = torch.stack([*batch_inputs], dim=1)
            targets = torch.stack([*batch_targets], dim=1)
            inputs = inputs.to(device)
            targets = targets.to(device)
            out = model.forward(inputs)[0]
            out = out.permute(0, 2, 1)
            loss = criterion(out, targets)
            accuracy = acc(out, targets)

            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)
            loss.backward()
            optimizer.step()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print('accuracy, loss, step: \n',
                      np.around(accuracy.item(), 4), np.around(loss.item(),
                                                               4), step, '\n')
                acc_list.append(accuracy.item())
                loss_list.append(loss.item())

                step_list.append(step + offset * e)

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                generator = torch.randint(low=0,
                                          high=vocabulary_size,
                                          size=(1, 1)).to(device)
                hidden = None
                char_list = []
                for _ in range(config.seq_length):
                    generator, hidden = model.forward(generator, hidden)
                    if policy == 'greedy':
                        idx = torch.argmax(generator).item()
                    else:
                        pass
                    generator = torch.Tensor([idx]).unsqueeze(-1)
                    generator = generator.to(device)
                    char_list.append(idx)
                char = dataset.convert_to_string(char_list)
                with open("MyTunedBook.txt", "a") as text_file:
                    print('Epoch. ',
                          e,
                          'Stahp: ',
                          step,
                          '\n Output: ',
                          char,
                          file=text_file)

                print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char)
                text_list.append((str((step + offset * e)) + '\n' + char))

                pass

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    print('Done training.')

    with open('FinalTunedBook.txt', 'w+') as f:
        for item in text_list:
            f.write("%s\n" % item)

    # save with pandas
    header = ['accuracy', 'length', 'loss', 'step']
    savefiles = zip(acc_list, [config.seq_length] * len(acc_list), loss_list,
                    step_list)
    df = pd.DataFrame(list(savefiles), columns=header)
    df.to_csv('GEN' + str(config.seq_length) + 'tunedlstm.csv')

    print('I am Loaded')

    temp_list = [0., 0.5, 1., 2.]
    policy_list = ['greedy', 'temp']
    seq_length = 111
    alice_string = list('Alice')

    # Generate some sentences by sampling from the model
    for policy in policy_list:
        for temperature in temp_list:
            char_list = []
            hidden = None
            for alice in alice_string:
                idx = dataset.convert_to_idx(alice)
                char_list.append(idx)
                generator = torch.tensor([idx]).unsqueeze(-1)
                generator = generator.to(device)
                generator, hidden = model.forward(generator, hidden)

            for _ in range(seq_length):
                if policy == 'greedy':
                    idx = torch.argmax(generator).item()
                else:
                    temp = generator.squeeze() / temperature
                    soft = torch.softmax(temp, dim=0)
                    idx = torch.multinomial(soft, 1)[-1].item()
                generator = torch.tensor([idx]).unsqueeze(-1)
                generator = generator.to(device)
                generator, hidden = model.forward(generator, hidden)
                char_list.append(idx)
            char = dataset.convert_to_string(char_list)
            with open(
                    "BonusTemp" + str(int(np.floor(temperature))) + "Book.txt",
                    "w+") as text_file:
                print(policy + ': ',
                      temperature,
                      '\n Output: ',
                      char,
                      file=text_file)

            print(policy + ': ', temperature, '\n Output: ', char)
    print('Finito!')
示例#17
0
def train(config):
    # Initialise custom summary writer
    writer = Writer(config.summary_path)

    # Initialize the dataset and data loader
    writer.log("Loading dataset from: " + config.txt_file)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the device which to run the model on
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    writer.log("Device: " + device)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.lstm_num_embed, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device)
    if config.checkpoint_path is not None:
        model = torch.load(config.checkpoint_path).to(device)
    writer.log("Model:\n" + str(model))

    # Setup the loss and optimizer
    criterion = F.cross_entropy
    learning_rate = config.learning_rate
    optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

    epoch = 0
    total_step = 0
    while True:

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            batch_inputs = torch.LongTensor([x.tolist()
                                             for x in batch_inputs]).to(device)
            batch_targets = torch.LongTensor(
                [x.tolist() for x in batch_targets]).to(device)

            # Forward pass
            logits = model.forward(batch_inputs)

            # backprop
            optimizer.zero_grad()
            loss = criterion(logits.transpose(1, 2), batch_targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           max_norm=config.max_norm)
            optimizer.step()

            with torch.no_grad():
                # accuracy = fraction of characters predicted correctly
                accuracy = (logits.argmax(dim=2) == batch_targets).to(
                    dtype=torch.float).mean()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            # Learning rate decay
            if step % config.learning_rate_step == 0 and step != 0:
                learning_rate *= config.learning_rate_decay
                writer.log("Reduced learning rate: {}".format(learning_rate))
                for g in optimizer.param_groups:
                    g['lr'] = learning_rate

            # Metrics and samples
            if step % config.print_every == 0:
                writer.log(
                    "[{}] Epoch {:02d}, Train Step {:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), epoch, step,
                        config.batch_size, examples_per_second, accuracy,
                        loss))
                writer.write(
                    'metrics', '{},{},{},{}'.format(total_step, accuracy, loss,
                                                    learning_rate))

            if step % config.sample_every == 0:
                writer.log("Generating sentences")
                writer.write('samples', 'ITER{}'.format(step))
                for temp in [0, .5, 1, 2]:
                    writer.log("\nTemperature: {}".format(temp))
                    writer.write('samples', 'T{}'.format(temp))
                    for i in np.random.choice(dataset.vocab_size, size=5):
                        text = dataset.convert_to_string(
                            model.predict([i], 100,
                                          temp)).replace("\n", "<br>")
                        writer.log(text)
                        writer.write('samples', text)
                    for string in [
                            "1:1. In the beginning God created",
                            "1:5. And he called the light Day, and the darkness",
                            "7:1. And the Lord said to him:",
                            "Genesis Chapter 7"
                    ]:
                        text = dataset.convert_to_string(
                            model.predict(dataset.convert_to_id(string), 100,
                                          temp)).replace("\n", "<br>")
                        writer.log(text)
                        writer.write('samples', text)

            if step % config.checkpoint_every == 0:
                writer.save_model(model, step)

            # if step == config.train_steps:
            #     # If you receive a PyTorch data-loader error, check this bug report:
            #     # https://github.com/pytorch/pytorch/pull/9655
            #     break

            total_step += 1
        epoch += 1

    writer.log('Done training.')
示例#18
0
def train(config, lr):

    # Initialize the device which to run the model on
    #device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(
        batch_size=config.batch_size,
        seq_length=config.seq_length,
        vocabulary_size=dataset.vocab_size,
        lstm_num_hidden=config.lstm_num_hidden,
        lstm_num_layers=config.lstm_num_layers)  # fixme

    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    print('Currently using: ', device)

    model = model.to(device)
    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    #optimizer = torch.optim.Adam(model.parameters(), lr = config.learning_rate, amsgrad = True)  # fixme
    #optimizer = torch.optim.Adam(model.parameters(), lr = lr, amsgrad = True)
    acc_list = []
    loss_list = []

    test_batches_in = []
    test_batches_ta = []

    test_acc = []

    best_accuracy = 0

    ### Flag for temperature
    temp = True
    temp_value = 2

    for runs in range(3):
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            if step % config.print_every != 0 or step == 0:

                t1 = time.time()
                #print(type(step))

                #model.train()

                #######################################################
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              max_norm=config.max_norm)

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                output, _ = model.forward(zerox)  #.to(device)

                targets = torch.stack(batch_targets).to(device)

                output_indices = torch.argmax(output, dim=2).to(device)

                output = output.transpose(0, 1).transpose(1, 2).to(device)

                #print(output.shape, targets.shape)
                #return 'a'

                #print(output.transpose(0,2).shape, targets.t().shape)
                #return 'a'
                loss_for_backward = criterion(output.transpose(0, 2),
                                              targets.t()).to(device)

                optimizer.zero_grad()
                loss_for_backward.backward()
                optimizer.step()

                correct_indices = output_indices == targets.transpose(
                    0, 1).to(device)

                #return correct_indices
                #######################################################

                #loss = criterion.forward(output, targets)

                #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]*
                #correct_indices.shape[1])
                #print(type(accuracy),type(loss))
                # Just for time measurement
                t2 = time.time()
                examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0 and step != 0:
                #model.eval()

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                output, _ = model.forward(zerox)

                output_indices = torch.argmax(output, dim=2).to(device)

                output = output.transpose(0, 1).transpose(1, 2).to(device)
                targets = torch.stack(batch_targets).to(device)

                #loss_for_backward = criterion(output,targets).to(device)
                loss_for_backward = criterion(output.transpose(0, 2),
                                              targets.t()).to(device)
                correct_indices = output_indices == targets.transpose(
                    0, 1)  #.to(device)
                #return output_indices, targets.transpose(0,1)

                #print(correct_indices.shape)
                #accuracy = sum(acc_list) / len(acc_list)
                #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.numel())
                accuracy = np.array(correct_indices.detach().cpu()).mean()

                #print("[{}] Train Step {:04d}/{:f}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #        config.train_steps, config.batch_size, examples_per_second,
                #        accuracy,
                #        loss_for_backward
                #))
                acc_list.append(accuracy)
                loss_list.append(float(loss_for_backward))

                if accuracy > best_accuracy:
                    torch.save(
                        {
                            'model_state_dict': model.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict()
                        }, 'model.pth')

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                ## Generate a good sample instead of the same one over and over again
                #model.eval()

                ### Append every modulo batch to a list of test batches and run
                ### over that list to test

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                test_batches_in.append(zerox)

                targets = torch.stack(batch_targets).to(device)

                test_batches_ta.append(targets)

                batch_inputz = torch.stack(batch_inputs).to(device)
                batch_input = batch_inputz.transpose(1, 0).to(device)

                output, _ = model.forward(zerox)  #.to(device)
                output_indices = torch.argmax(output, dim=2).to(device)
                output = output.transpose(0, 1).transpose(1, 2).to(device)

                loss_for_backward = criterion(output, targets).to(device)
                correct_indices = output_indices == targets.transpose(
                    0, 1).to(device)

                best_sample = np.argmax(
                    np.asarray(sum(correct_indices.t().detach().cpu())))
                print(
                    'Real: ',
                    dataset.convert_to_string(
                        np.asarray(batch_input[best_sample].cpu())))
                output, _ = model.forward(zerox)  #.to(device)
                output_indices = torch.argmax(output, dim=2).to(device)
                print(
                    'prediction: ',
                    dataset.convert_to_string(
                        np.asarray(output_indices[best_sample].cpu())))

                bc = int(sum(correct_indices.t().detach().cpu())
                         [best_sample]) / config.seq_length
                print('This sample had:', bc, 'characters right')

                output = np.random.randint(dataset.vocab_size)
                letters = [output]

                greedy_output = np.random.randint(dataset.vocab_size)
                greedy_letters = [greedy_output]

                Temperature_time(runs, step, dataset, device, model)
                for i in range(config.seq_length - 1):

                    #if temp:
                    # =============================================================================
                    #
                    #                         soft = torch.nn.Softmax(dim=2)
                    #
                    #
                    #
                    #
                    #                         zerol = torch.zeros([1,1,dataset.vocab_size])
                    #                         one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
                    #                         zerol.scatter_(2,one_hot_letter,1)
                    #                         zerol = zerol.to(device)
                    #                         if i == 0:
                    #                             output, h = model.forward(zerol)
                    #
                    #                         else:
                    #                             output, h = model.forward(zerol, h)
                    #
                    #                         tempered = soft(output/temp_value)
                    #                         #print(tempered)
                    #                         output = int(torch.multinomial(tempered[0][0],1).detach().cpu())
                    #                         #print(output)
                    #                         letters.append(output)
                    # =============================================================================

                    greedy_zerol = torch.zeros([1, 1, dataset.vocab_size])
                    greedy_one_hot_letter = torch.tensor(
                        greedy_output).unsqueeze(-1).unsqueeze(-1).unsqueeze(
                            -1)
                    greedy_zerol.scatter_(2, greedy_one_hot_letter, 1)
                    greedy_zerol = greedy_zerol.to(device)

                    if i == 0:
                        greedy_output, greedy_h = model.forward(greedy_zerol)
                    else:
                        greedy_output, greedy_h = model.forward(
                            greedy_zerol, greedy_h)

                    greedy_output = int(
                        torch.argmax(greedy_output, dim=2).detach().cpu())
                    greedy_letters.append(greedy_output)

                print('Greedy Generation ',
                      dataset.convert_to_string(greedy_letters))
                abs_step = (runs * 10000) + step
                line = ' '.join(('Step:', str(abs_step),
                                 dataset.convert_to_string(letters)))

                with open('GreedyGeneration.txt', 'a') as file:
                    file.write(line + '\n')

    # =============================================================================
    #         if step % (config.sample_every*1000) ==0:
    #             avg = []
    #             print('Testing over ', len(test_batches_in), 'batches')
    #             for z in range(len(test_batches_in)):
    #                 ##OUTPUT
    #                 output,_ = model.forward(test_batches_in[z])
    #                 output_indices = torch.argmax(output, dim=2).to(device)
    #                 output =  output.transpose(0,1).transpose(1,2).to(device)
    #
    #                 ##LOSS AND ACCURACY
    #                 loss_for_backward = criterion(output,targets).to(device)
    #                 correct_indices = output_indices == test_batches_ta[z].transpose(0,1).to(device)
    #
    #                 accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]*
    #                               correct_indices.shape[1])
    #
    #                 avg.append(accuracy)
    #
    #             this_test_acc = sum(avg)/len(avg)
    #             print('The test accuracy over ',len(test_batches_in), 'is: ', this_test_acc)
    #             test_acc.append(this_test_acc)
    #             #if bc > 0.8:
    #             #    print(bc)
    #             #    #return correct_indices
    #
    # =============================================================================
            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
        print('Done training.')
        line = ' '.join(
            ('Test accuracy:', str(test_acc.append), 'Learning rate:', str(lr),
             'Accuracy:', str(acc_list), 'Loss:', str(loss_list)))
        with open('textresults.txt', 'a') as file:
            file.write(line + '\n')

        #hiddenstates = [None]*30
        output = np.random.randint(dataset.vocab_size)
        letters = [output]
        for i in range(400):
            zerol = torch.zeros([1, 1, dataset.vocab_size])
            one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(
                -1).unsqueeze(-1)
            zerol.scatter_(2, one_hot_letter, 1)
            zerol = zerol.to(device)
            if i == 0:
                output, h = model.forward(zerol)

                output = int(torch.argmax(output, dim=2).detach().cpu())

                letters.append(output)
                #hiddenstates[i] = h
            else:
                output, h = model.forward(zerol, h)

                output = int(torch.argmax(output, dim=2).detach().cpu())

                letters.append(output)
                #hiddenstates[i % 30] = h
        print('Final generation: ', dataset.convert_to_string(letters))
    line = ' '.join(('Accuracy:', str(acc_list), 'Loss', str(loss_list)))
    with open('PrideAndPrejudice2.txt', 'a') as file:
        file.write(line + '\n')
示例#19
0
output,h = string_generator(pre,zerol,model)

soft = torch.nn.Softmax(dim=2)
 
zerol = torch.zeros([1,1,dataset.vocab_size])
one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
zerol.scatter_(2,one_hot_letter,1)
zerol = zerol.to(device)





    if i == 0:
        output, h = model.forward(zerol)
 
    else:
        output, h = model.forward(zerol, h)
     
    tempered = soft(output/temp_value)
    #print(tempered)
    output = int(torch.multinomial(tempered[0][0],1).detach())
    #print(output)
    letters.append(output)
the_string =  dataset.convert_to_string(letters)
abs_step = 1
 
line = ' '.join(('Step:',str(abs_step),'Temperature:'
                 ,str(temp_value), 'Text:',the_string))