示例#1
0
def get_model(model_name, device):
    if model_name == "LSTM":
        model = LSTM(input_size=NUM_FEAT,
                     hidden_size=500,
                     output_size=len(classes),
                     num_layers=2,
                     bi=False).to(device)
    elif model_name == "BiLSTM":
        model = LSTM(input_size=NUM_FEAT,
                     hidden_size=500,
                     output_size=len(classes),
                     num_layers=2,
                     bi=True).to(device)
    elif model_name == "GRU":
        model = GRU(input_size=NUM_FEAT,
                    hidden_size=500,
                    output_size=len(classes),
                    num_layers=2,
                    bi=False).to(device)
    elif model_name == "BiGRU":
        model = GRU(input_size=NUM_FEAT,
                    hidden_size=500,
                    output_size=len(classes),
                    num_layers=2,
                    bi=True).to(device)
    elif model_name == "NN":
        model = NN(input_size=NUM_FEAT * SEQ_LENGTH,
                   output_size=len(classes)).to(device)

    if os.path.exists(os.sep.join([WEIGHTS_DIR, model_name + ".pt"])):
        model.load_state_dict(
            torch.load(os.sep.join([WEIGHTS_DIR, model_name + ".pt"])))
    else:
        model.apply(init_weights)
    return model
示例#2
0
    def __init__(self, trajectories, model_dir=None, **kwargs):
        super().__init__(trajectories, **kwargs)

        DATA_PATH = Path(
            os.getenv("DATA_PATH", "/home/stud/grimmalex/datasets/"))
        OUT_PATH = Path(
            os.getenv("OUT_PATH", "/home/stud/grimmalex/thesis/output/"))
        if model_dir is None:
            model_dir = OUT_PATH / "gru-sim/first/ml-1m/gru/2"
            print("model dir is {}".format(model_dir))
        data, agent, seed = str(model_dir).split("/")[-3:]
        if "ml" in data:
            data = "ml/{}".format(data)
        data_dir = DATA_PATH / data

        trajectory_file = data_dir / "test.csv"

        config = get_base_config(trajectory_file, Path(model_dir), 1)
        with open(model_dir / "hyperparameters.yaml", "r") as f:
            hyperparameters = yaml.load(f, yaml.Loader)
        main_key = list(hyperparameters.keys())[0]
        config.hyperparameters = hyperparameters[main_key]

        w2v_path = config.hyperparameters["Embedding"]["w2v_context_path"]
        if str(DATA_PATH) not in w2v_path:
            end_path = str(w2v_path).split("datasets/")[-1]
            w2v_path = DATA_PATH / end_path
            config.hyperparameters["Embedding"]["w2v_context_path"] = w2v_path
        agent = GRU(config)
        path = agent.model_saver.get_last_checkpoint_path()
        agent.load_pretrained_models(path)
        self.agent = agent

        self.reward_type = "list"
示例#3
0
def generate_sequences(id_2_word, num_samples, model_type, emb_size, hidden_size, seq_len, batch_size, num_layers, dp_keep_prob, vocab_size, path):
	if model_type=='RNN':
		model = RNN(emb_size=emb_size, hidden_size=hidden_size,
				seq_len=seq_len, batch_size=batch_size,
				vocab_size=vocab_size, num_layers=num_layers,
				dp_keep_prob=dp_keep_prob)
	else:
		model = GRU(emb_size=emb_size, hidden_size=hidden_size,
				seq_len=seq_len, batch_size=batch_size,
				vocab_size=vocab_size, num_layers=num_layers,
				dp_keep_prob=dp_keep_prob)

	model.load_state_dict(torch.load(path))
	model = model.to(device)
	hidden = nn.Parameter(torch.zeros(num_layers, num_samples, hidden_size)).to(device)
	input = torch.ones(10000)*1/1000
	input = torch.multinomial(input, num_samples).to(device)
	output = model.generate(input, hidden, seq_len)
	f = open(model_type + '_generated_sequences' +'.txt','w')

	for i in range(num_samples):
		for j in range(seq_len):
			f.write(id_2_word.get(output[j,i].item())+' ')
		f.write('\n')
	f.close()
示例#4
0
文件: q5.py 项目: lebrice/IFT6135
def get_best_model(model_type: str) -> nn.Module:
    model: nn.Module = None
    if model_type == 'RNN':
        model = RNN(emb_size=200,
                    hidden_size=1500,
                    seq_len=35,
                    batch_size=20,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
        model.load_state_dict(
            torch.load('./4_1_a/best_params.pt', map_location=device))
    elif model_type == 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=1500,
                    seq_len=35,
                    batch_size=20,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
        model.load_state_dict(
            torch.load('./4_1_b/best_params.pt', map_location=device))
    elif model_type == 'TRANSFORMER':
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=512,
                            n_blocks=6,
                            dropout=1. - 0.9)
        model.batch_size = 128
        model.seq_len = 35
        model.vocab_size = vocab_size
        model.load_state_dict(torch.load('./4_1_c/best_params.pt'))
    return model
示例#5
0
def _load_model(model_type):
    emb_size = 200
    hidden_size = 1500
    seq_len = 35  # 70
    batch_size = 20
    vocab_size = 10000
    num_layers = 2
    dp_keep_prob = 0.35

    # Load model (Change to RNN if you want RNN to predict)
    if model_type == 'RNN':
        model = RNN(emb_size, hidden_size, seq_len, batch_size, vocab_size,
                    num_layers, dp_keep_prob)
        PATH = os.path.join("RNN_ADAM_0", "best_params.pt")
    else:
        model = GRU(emb_size, hidden_size, seq_len, batch_size, vocab_size,
                    num_layers, dp_keep_prob)
        PATH = os.path.join("GRU_SGD_LR_SCHEDULE_0", "best_params.pt")

    if torch.cuda.is_available():
        model.load_state_dict(torch.load(PATH)).cuda()
        model.eval()
    else:
        model.load_state_dict(torch.load(PATH, map_location='cpu'))
        model.eval()
    return model
示例#6
0
def train():
    fluid.enable_dygraph(device)
    processor = SentaProcessor(data_dir=args.data_dir,
                               vocab_path=args.vocab_path,
                               random_seed=args.random_seed)
    num_labels = len(processor.get_labels())

    num_train_examples = processor.get_num_examples(phase="train")

    max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

    train_data_generator = processor.data_generator(
        batch_size=args.batch_size,
        padding_size=args.padding_size,
        places=device,
        phase='train',
        epoch=args.epoch,
        shuffle=False)

    eval_data_generator = processor.data_generator(
        batch_size=args.batch_size,
        padding_size=args.padding_size,
        places=device,
        phase='dev',
        epoch=args.epoch,
        shuffle=False)
    if args.model_type == 'cnn_net':
        model = CNN(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'bow_net':
        model = BOW(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'gru_net':
        model = GRU(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'bigru_net':
        model = BiGRU(args.vocab_size, args.batch_size, args.padding_size)

    optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr,
                                        parameter_list=model.parameters())

    inputs = [Input([None, None], 'int64', name='doc')]
    labels = [Input([None, 1], 'int64', name='label')]

    model.prepare(optimizer,
                  CrossEntropy(),
                  Accuracy(topk=(1, )),
                  inputs,
                  labels,
                  device=device)

    model.fit(train_data=train_data_generator,
              eval_data=eval_data_generator,
              batch_size=args.batch_size,
              epochs=args.epoch,
              save_dir=args.checkpoints,
              eval_freq=args.eval_freq,
              save_freq=args.save_freq)
示例#7
0
class RIM(nn.Module):
    def __init__(self,
                 input_size,
                 st_size,
                 hidden_size,
                 output_size,
                 bounded=-1,
                 lr=.001):
        super().__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.st_size = st_size
        self.lr = lr

        self.fc0_layer = nn.Linear(input_size, hidden_size)
        self.fc1_layer = nn.Linear(hidden_size, hidden_size)
        self.rnn_layer = GRU(hidden_size, st_size)
        self.fc3_layer = nn.Linear(st_size, hidden_size)
        self.fc4_layer = nn.Linear(hidden_size, output_size)

        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)

        self.bounded = bounded

    def forward(self, xt, st):
        out = f.relu(self.fc0_layer.forward(xt))
        # out = nn.BatchNorm1d(out.shape[1])(out)
        out = f.relu(self.fc1_layer.forward(out))
        # out = nn.BatchNorm1d(out.shape[1])(out)
        st_out = self.rnn_layer.forward(out, st)
        out = f.relu(self.fc3_layer.forward(st_out))
        # out = nn.BatchNorm1d(out.shape[1])(out)
        if self.bounded > 0:
            out = torch.clamp(self.fc4_layer.forward(out), -self.bounded,
                              self.bounded)
        else:
            out = self.fc4_layer.forward(out)

        return out, st_out

    def backprop(self, loss):
        loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()

    def loss(self, theta, list_psi_t):
        loss_t = self.loss_func(theta, list_psi_t)
        return self.weight_func(loss_t)

    def init_hidden(self, batch_dim=1):
        return torch.zeros((batch_dim, self.st_size))
示例#8
0
def create_model():
    if args.model_type == 'cnn_net':
        model = CNN(args.vocab_size, args.padding_size)
    elif args.model_type == 'bow_net':
        model = BOW(args.vocab_size, args.padding_size)
    elif args.model_type == 'lstm_net':
        model = LSTM(args.vocab_size, args.padding_size)
    elif args.model_type == 'gru_net':
        model = GRU(args.vocab_size, args.padding_size)
    elif args.model_type == 'bigru_net':
        model = BiGRU(args.vocab_size, args.batch_size, args.padding_size)
    else:
        raise ValueError("Unknown model type!")
    return model
示例#9
0
    def __init__(self,
                 input_size,
                 st_size,
                 hidden_size,
                 output_size,
                 bounded=-1,
                 lr=.001):
        super().__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.st_size = st_size
        self.lr = lr

        self.fc0_layer = nn.Linear(input_size, hidden_size)
        self.fc1_layer = nn.Linear(hidden_size, hidden_size)
        self.rnn_layer = GRU(hidden_size, st_size)
        self.fc3_layer = nn.Linear(st_size, hidden_size)
        self.fc4_layer = nn.Linear(hidden_size, output_size)

        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)

        self.bounded = bounded
示例#10
0
def infer():
    fluid.enable_dygraph(device)
    processor = SentaProcessor(data_dir=args.data_dir,
                               vocab_path=args.vocab_path,
                               random_seed=args.random_seed)

    infer_data_generator = processor.data_generator(
        batch_size=args.batch_size,
        padding_size=args.padding_size,
        places=device,
        phase='infer',
        epoch=1,
        shuffle=False)
    if args.model_type == 'cnn_net':
        model_infer = CNN(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'bow_net':
        model_infer = BOW(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'gru_net':
        model_infer = GRU(args.vocab_size, args.batch_size, args.padding_size)
    elif args.model_type == 'bigru_net':
        model_infer = BiGRU(args.vocab_size, args.batch_size,
                            args.padding_size)

    print('Do inferring ...... ')
    inputs = [Input([None, None], 'int64', name='doc')]
    model_infer.prepare(None,
                        CrossEntropy(),
                        Accuracy(topk=(1, )),
                        inputs,
                        device=device)
    model_infer.load(args.checkpoints, reset_optimizer=True)
    preds = model_infer.predict(test_data=infer_data_generator)
    preds = np.array(preds[0]).reshape((-1, 2))

    if args.output_dir:
        with open(os.path.join(args.output_dir, 'predictions.json'), 'w') as w:

            for p in range(len(preds)):
                label = np.argmax(preds[p])
                result = json.dumps({
                    'index': p,
                    'label': label,
                    'probs': preds[p].tolist()
                })
                w.write(result + '\n')
        print('Predictions saved at ' +
              os.path.join(args.output_dir, 'predictions.json'))
def _load_model(emb_size, hidden_size, seq_len, batch_size, vocab_size,
                num_layers, dp_keep_prob, PATH, model_type):
    # Load model (Change to RNN if you want RNN to predict)
    if model_type == 'RNN':
        model = RNN(emb_size, hidden_size, seq_len, batch_size, vocab_size,
                    num_layers, dp_keep_prob)
    else:
        model = GRU(emb_size, hidden_size, seq_len, batch_size, vocab_size,
                    num_layers, dp_keep_prob)

    if torch.cuda.is_available():
        model.load_state_dict(torch.load(PATH)).cuda()
        model.eval()
    else:
        model.load_state_dict(torch.load(PATH, map_location='cpu'))
        model.eval()
    return model
示例#12
0
def make_my_model(model_name, device, seq_len=35, batch_size=20, pt=None):
    #          --model=RNN --optimizer=ADAM --initial_lr=0.0001 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best
    #          --model=GRU --optimizer=SGD_LR_SCHEDULE --initial_lr=10 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best
    #          --model=TRANSFORMER --optimizer=SGD_LR_SCHEDULE --initial_lr=20 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=6 --dp_keep_prob=0.9 --save_best
    if model_name == 'RNN':
        model = RNN(emb_size=200,
                    hidden_size=1500,
                    seq_len=seq_len,
                    batch_size=batch_size,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
    elif model_name == 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=1500,
                    seq_len=seq_len,
                    batch_size=batch_size,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
    elif model_name == 'TRANSFORMER':
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=512,
                            n_blocks=6,
                            dropout=1. - 0.9)
        # these 3 attributes don't affect the Transformer's computations;
        # they are only used in run_epoch
        model.batch_size = 128
        model.seq_len = 35
        model.vocab_size = vocab_size
    else:
        print("ERROR: Model type not recognized.")
        return
    # Model to device
    model = model.to(device)
    # Load pt
    if pt is not None:
        model.load_state_dict(torch.load(pt, map_location=device))
    return model
def load_model(model_info,
               device,
               vocab_size,
               emb_size=200,
               load_on_device=True):
    params_path = model_info.get_params_path()

    if model_info.model == 'RNN':
        model = RNN(emb_size=emb_size,
                    hidden_size=model_info.hidden_size,
                    seq_len=model_info.seq_len,
                    batch_size=model_info.batch_size,
                    vocab_size=vocab_size,
                    num_layers=model_info.num_layers,
                    dp_keep_prob=model_info.dp_keep_prob)
    elif model_info.model == 'GRU':
        model = GRU(emb_size=emb_size,
                    hidden_size=model_info.hidden_size,
                    seq_len=model_info.seq_len,
                    batch_size=model_info.batch_size,
                    vocab_size=vocab_size,
                    num_layers=model_info.num_layers,
                    dp_keep_prob=model_info.dp_keep_prob)
    else:
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=model_info.hidden_size,
                            n_blocks=model_info.num_layers,
                            dropout=1. - model_info.dp_keep_prob)
        model.batch_size = model_info.batch_size
        model.seq_len = model_info.seq_len
        model.vocab_size = vocab_size

    if load_on_device:
        model = model.to(device)
    model.load_state_dict(torch.load(params_path, map_location=device))
    return model
示例#14
0
def main():
    #print the config args
    print(config.transfer_learning)
    print(config.mode)
    print(config.input_size)

    # Fix Seed for Reproducibility #
    random.seed(config.seed)
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(config.seed)

    # Samples, Weights, and Plots Path #
    paths = [config.weights_path, config.plots_path, config.numpy_path]
    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = load_data(config.combined_path, config.which_data, config.preprocess, config.resample)
    # id = config.which_data.split('_')[0]
    id = 12 #BOON added
    print("Data of {} is successfully Loaded!".format(config.which_data))
    print(type(data))
    print(data.shape)

    # Plot Time-series Data #
    if config.plot:
        plot_full(config.plots_path, data, id, config.feature)
        plot_split(config.plots_path, data, id, config.valid_start, config.test_start, config.feature)

    # Min-Max Scaler #
    scaler = MinMaxScaler()
    data.iloc[:,:] = scaler.fit_transform(data)
    print(type(data))

    # Split the Dataset #
    train_X, train_Y, val_X, val_Y, test_X, test_Y, test_shifted = \
        get_time_series_data_(data, config.valid_start, config.test_start, config.feature, config.label, config.window)

    print(train_X.shape)
    print(train_Y.shape)

    # Get Data Loader #
    train_loader, val_loader, test_loader = \
        get_data_loader(train_X, train_Y, val_X, val_Y, test_X, test_Y, config.batch_size)

    # Constants #
    best_val_loss = 100
    best_val_improv = 0

    # Lists #
    train_losses, val_losses = list(), list()
    val_maes, val_mses, val_rmses, val_mapes, val_mpes, val_r2s = list(), list(), list(), list(), list(), list()

    # Prepare Network #
    if config.network == 'dnn':
        model = DNN(config.window, config.hidden_size, config.output_size).to(device)
    elif config.network == 'cnn':
        model = CNN(config.window, config.hidden_size, config.output_size).to(device)
    elif config.network == 'rnn':
        model = RNN(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device)
    elif config.network == 'lstm':
        model = LSTM(config.input_size, config.hidden_size, config.num_layers, config.output_size, config.bidirectional).to(device)
    elif config.network == 'gru':
        model = GRU(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device)
    elif config.network == 'recursive':
        model = RecursiveLSTM(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device)
    elif config.network == 'attentional':
        model = AttentionalLSTM(config.input_size, config.key, config.query, config.value, config.hidden_size, config.num_layers, config.output_size, config.bidirectional).to(device)
    else:
        raise NotImplementedError

    if config.mode == 'train':

        # If fine-tuning #
        print('config.TL = {}'.format(config.transfer_learning))
        if config.transfer_learning:
            print('config.TL = {}'.format(config.transfer_learning))
            print('TL: True')
            model.load_state_dict(torch.load(os.path.join(config.weights_path, 'BEST_{}_Device_ID_12.pkl'.format(config.network))))

            for param in model.parameters():
                param.requires_grad = True

        # Loss Function #
        criterion = torch.nn.MSELoss()

        # Optimizer #
        optimizer = torch.optim.Adam(model.parameters(), lr=config.lr, betas=(0.5, 0.999))
        optimizer_scheduler = get_lr_scheduler(config.lr_scheduler, optimizer, config)

        # Train and Validation #
        print("Training {} started with total epoch of {} using Driver ID of {}.".format(config.network, config.num_epochs, id))
        for epoch in range(config.num_epochs):

            # Train #
            for i, (data, label) in enumerate(train_loader):

                # Data Preparation #
                data = data.to(device, dtype=torch.float32)
                label = label.to(device, dtype=torch.float32)

                # Forward Data #
                pred = model(data)

                # Calculate Loss #
                train_loss = criterion(pred, label)

                # Back Propagation and Update #
                optimizer.zero_grad()
                train_loss.backward()
                optimizer.step()

                # Add items to Lists #
                train_losses.append(train_loss.item())

            print("Epoch [{}/{}]".format(epoch+1, config.num_epochs))
            print("Train")
            print("Loss : {:.4f}".format(np.average(train_losses)))

            optimizer_scheduler.step()

            # Validation #
            with torch.no_grad():
                for i, (data, label) in enumerate(val_loader):

                    # Data Preparation #
                    data = data.to(device, dtype=torch.float32)
                    label = label.to(device, dtype=torch.float32)

                    # Forward Data #
                    pred_val = model(data)

                    # Calculate Loss #
                    val_loss = criterion(pred_val, label)
                    val_mae = mean_absolute_error(label.cpu(), pred_val.cpu())
                    val_mse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=True)
                    val_rmse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=False)
                    val_mpe = mean_percentage_error(label.cpu(), pred_val.cpu())
                    val_mape = mean_absolute_percentage_error(label.cpu(), pred_val.cpu())
                    val_r2 = r2_score(label.cpu(), pred_val.cpu())

                    # Add item to Lists #
                    val_losses.append(val_loss.item())
                    val_maes.append(val_mae.item())
                    val_mses.append(val_mse.item())
                    val_rmses.append(val_rmse.item())
                    val_mpes.append(val_mpe.item())
                    val_mapes.append(val_mape.item())
                    val_r2s.append(val_r2.item())

                # Print Statistics #
                print("Validation")
                print("Loss : {:.4f}".format(np.average(val_losses)))
                print(" MAE : {:.4f}".format(np.average(val_maes)))
                print(" MSE : {:.4f}".format(np.average(val_mses)))
                print("RMSE : {:.4f}".format(np.average(val_rmses)))
                print(" MPE : {:.4f}".format(np.average(val_mpes)))
                print("MAPE : {:.4f}".format(np.average(val_mapes)))
                print(" R^2 : {:.4f}".format(np.average(val_r2s)))

                # Save the model only if validation loss decreased #
                curr_val_loss = np.average(val_losses)

                if curr_val_loss < best_val_loss:
                    best_val_loss = min(curr_val_loss, best_val_loss)

                    # if config.transfer_learning:
                    #     torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_transfer.pkl'.format(config.network, id)))
                    # else:
                    #     torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}.pkl'.format(config.network, id)))

                    if config.transfer_learning:
                        torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_transfer_BOON_reshaped.pkl'.format(config.network, id)))
                    else:
                        torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_BOON_reshaped.pkl'.format(config.network, id)))

                    print("Best model is saved!\n")
                    best_val_improv = 0

                elif curr_val_loss >= best_val_loss:
                    best_val_improv += 1
                    print("Best Validation has not improved for {} epochs.\n".format(best_val_improv))

                    if best_val_improv == 10:
                        break

    elif config.mode == 'test':

        # Prepare Network #
        if config.transfer_learning:
            model.load_state_dict(torch.load(os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_transfer_BOON_reshaped.pkl'.format(config.network, id))))
        else:
            model.load_state_dict(torch.load(os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_BOON_reshaped.pkl'.format(config.network, id))))

        print("{} for Device ID {} is successfully loaded!".format((config.network).upper(), id))

        with torch.no_grad():

            pred_test, labels = list(), list()

            for i, (data, label) in enumerate(test_loader):

                # Data Preparation #
                data = data.to(device, dtype=torch.float32)
                label = label.to(device, dtype=torch.float32)

                # Forward Data #
                pred = model(data)

                # Add items to Lists #
                pred_test += pred
                labels += label

            # Derive Metric and Plot #
            if config.transfer_learning:
                pred, actual = test(config.plots_path, id, config.network, scaler, pred_test, labels, test_shifted, transfer_learning=True)
            else:
                pred, actual = test(config.plots_path, id, config.network, scaler, pred_test, labels, test_shifted)
示例#15
0
# This is where your model code will be called. You may modify this code
# if required for your implementation, but it should not typically be necessary,
# and you must let the TAs know if you do so.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
        # Note that we're using num_layers and hidden_size to mean slightly
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=args.hidden_size,
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
示例#16
0
文件: ptb-lm.py 项目: lebrice/IFT6135
# This is where your model code will be called. You may modify this code
# if required for your implementation, but it should not typically be necessary,
# and you must let the TAs know if you do so.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
        # Note that we're using num_layers and hidden_size to mean slightly
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=args.hidden_size,
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
示例#17
0
文件: main.py 项目: ml-lab/SRU-1
    clip = 2925.4042227640757
elif model_name == 'lstm':
    hidden_size = 200
    num_layers  = 1
    init_forget_bias = 1
    lr = 0.00016654418947982137
    weight_decay = 7.040822706204121e-05
    dropout = 0.18404592540409914
    clip = 4389.748805208904

# モデルのインスタンス作成
if model_name == 'sru':
    model = SRU(input_size, phi_size, r_size, cell_out_size, output_size, dropout=dropout, gpu=gpu)
    model.initWeight()
elif model_name == 'gru':
    model = GRU(input_size, hidden_size, output_size, num_layers, dropout, gpu=gpu)
    model.initWeight(init_forget_bias)
elif model_name == 'lstm':
    model = LSTM(input_size, hidden_size, output_size, num_layers, dropout, gpu=gpu)
    model.initWeight(init_forget_bias)
if gpu == True:
    model.cuda()

# loss, optimizerの定義
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)


''' 訓練 '''

n_epochs = 400
def main(config):

    # Fix Seed #
    random.seed(config.seed)
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed(config.seed)

    # Weights and Plots Path #
    paths = [config.weights_path, config.plots_path]

    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = load_data(config.which_data)[[config.feature]]
    data = data.copy()

    # Plot Time-Series Data #
    if config.plot_full:
        plot_full(config.plots_path, data, config.feature)

    scaler = MinMaxScaler()
    data[config.feature] = scaler.fit_transform(data)

    train_loader, val_loader, test_loader = \
        data_loader(data, config.seq_length, config.train_split, config.test_split, config.batch_size)

    # Lists #
    train_losses, val_losses = list(), list()
    val_maes, val_mses, val_rmses, val_mapes, val_mpes, val_r2s = list(), list(), list(), list(), list(), list()
    test_maes, test_mses, test_rmses, test_mapes, test_mpes, test_r2s = list(), list(), list(), list(), list(), list()

    # Constants #
    best_val_loss = 100
    best_val_improv = 0

    # Prepare Network #
    if config.network == 'dnn':
        model = DNN(config.seq_length, config.hidden_size, config.output_size).to(device)
    elif config.network == 'cnn':
        model = CNN(config.seq_length, config.batch_size).to(device)
    elif config.network == 'rnn':
        model = RNN(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device)
    elif config.network == 'lstm':
        model = LSTM(config.input_size, config.hidden_size, config.num_layers, config.output_size, config.bidirectional).to(device)
    elif config.network == 'gru':
        model = GRU(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device)
    elif config.network == 'recursive':
        model = RecursiveLSTM(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device)
    elif config.network == 'attention':
        model = AttentionLSTM(config.input_size, config.key, config.query, config.value, config.hidden_size, config.num_layers, config.output_size, config.bidirectional).to(device)
    else:
        raise NotImplementedError

    # Loss Function #
    criterion = torch.nn.MSELoss()

    # Optimizer #
    optim = torch.optim.Adam(model.parameters(), lr=config.lr, betas=(0.5, 0.999))
    optim_scheduler = get_lr_scheduler(config.lr_scheduler, optim)

    # Train and Validation #
    if config.mode == 'train':

        # Train #
        print("Training {} started with total epoch of {}.".format(model.__class__.__name__, config.num_epochs))

        for epoch in range(config.num_epochs):
            for i, (data, label) in enumerate(train_loader):

                # Prepare Data #
                data = data.to(device, dtype=torch.float32)
                label = label.to(device, dtype=torch.float32)

                # Forward Data #
                pred = model(data)

                # Calculate Loss #
                train_loss = criterion(pred, label)

                # Initialize Optimizer, Back Propagation and Update #
                optim.zero_grad()
                train_loss.backward()
                optim.step()

                # Add item to Lists #
                train_losses.append(train_loss.item())

            # Print Statistics #
            if (epoch+1) % config.print_every == 0:
                print("Epoch [{}/{}]".format(epoch+1, config.num_epochs))
                print("Train Loss {:.4f}".format(np.average(train_losses)))

            # Learning Rate Scheduler #
            optim_scheduler.step()

            # Validation #
            with torch.no_grad():
                for i, (data, label) in enumerate(val_loader):

                    # Prepare Data #
                    data = data.to(device, dtype=torch.float32)
                    label = label.to(device, dtype=torch.float32)

                    # Forward Data #
                    pred_val = model(data)

                    # Calculate Loss #
                    val_loss = criterion(pred_val, label)
                    val_mae = mean_absolute_error(label.cpu(), pred_val.cpu())
                    val_mse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=True)
                    val_rmse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=False)
                    val_mpe = mean_percentage_error(label.cpu(), pred_val.cpu())
                    val_mape = mean_absolute_percentage_error(label.cpu(), pred_val.cpu())
                    val_r2 = r2_score(label.cpu(), pred_val.cpu())

                    # Add item to Lists #
                    val_losses.append(val_loss.item())
                    val_maes.append(val_mae.item())
                    val_mses.append(val_mse.item())
                    val_rmses.append(val_rmse.item())
                    val_mpes.append(val_mpe.item())
                    val_mapes.append(val_mape.item())
                    val_r2s.append(val_r2.item())

            if (epoch + 1) % config.print_every == 0:

                # Print Statistics #
                print("Val Loss {:.4f}".format(np.average(val_losses)))
                print("Val  MAE : {:.4f}".format(np.average(val_maes)))
                print("Val  MSE : {:.4f}".format(np.average(val_mses)))
                print("Val RMSE : {:.4f}".format(np.average(val_rmses)))
                print("Val  MPE : {:.4f}".format(np.average(val_mpes)))
                print("Val MAPE : {:.4f}".format(np.average(val_mapes)))
                print("Val  R^2 : {:.4f}".format(np.average(val_r2s)))

                # Save the model Only if validation loss decreased #
                curr_val_loss = np.average(val_losses)

                if curr_val_loss < best_val_loss:
                    best_val_loss = min(curr_val_loss, best_val_loss)
                    torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}.pkl'.format(model.__class__.__name__)))

                    print("Best model is saved!\n")
                    best_val_improv = 0

                elif curr_val_loss >= best_val_loss:
                    best_val_improv += 1
                    print("Best Validation has not improved for {} epochs.\n".format(best_val_improv))

    elif config.mode == 'test':

        # Load the Model Weight #
        model.load_state_dict(torch.load(os.path.join(config.weights_path, 'BEST_{}.pkl'.format(model.__class__.__name__))))

        # Test #
        with torch.no_grad():
            for i, (data, label) in enumerate(test_loader):

                # Prepare Data #
                data = data.to(device, dtype=torch.float32)
                label = label.to(device, dtype=torch.float32)

                # Forward Data #
                pred_test = model(data)

                # Convert to Original Value Range #
                pred_test = pred_test.data.cpu().numpy()
                label = label.data.cpu().numpy().reshape(-1, 1)

                pred_test = scaler.inverse_transform(pred_test)
                label = scaler.inverse_transform(label)

                # Calculate Loss #
                test_mae = mean_absolute_error(label, pred_test)
                test_mse = mean_squared_error(label, pred_test, squared=True)
                test_rmse = mean_squared_error(label, pred_test, squared=False)
                test_mpe = mean_percentage_error(label, pred_test)
                test_mape = mean_absolute_percentage_error(label, pred_test)
                test_r2 = r2_score(label, pred_test)

                # Add item to Lists #
                test_maes.append(test_mae.item())
                test_mses.append(test_mse.item())
                test_rmses.append(test_rmse.item())
                test_mpes.append(test_mpe.item())
                test_mapes.append(test_mape.item())
                test_r2s.append(test_r2.item())

            # Print Statistics #
            print("Test {}".format(model.__class__.__name__))
            print("Test  MAE : {:.4f}".format(np.average(test_maes)))
            print("Test  MSE : {:.4f}".format(np.average(test_mses)))
            print("Test RMSE : {:.4f}".format(np.average(test_rmses)))
            print("Test  MPE : {:.4f}".format(np.average(test_mpes)))
            print("Test MAPE : {:.4f}".format(np.average(test_mapes)))
            print("Test  R^2 : {:.4f}".format(np.average(test_r2s)))

            # Plot Figure #
            plot_pred_test(pred_test, label, config.plots_path, config.feature, model)
 # MODEL SETUP
 #
 ###############################################################################
 if args["model"] == 'RNN':
     model = RNN(emb_size=args["emb_size"],
                 hidden_size=args["hidden_size"],
                 seq_len=args["seq_len"],
                 batch_size=args["batch_size"],
                 vocab_size=vocab_size,
                 num_layers=args["num_layers"],
                 dp_keep_prob=args["dp_keep_prob"])
 elif args["model"] == 'GRU':
     model = GRU(emb_size=args["emb_size"],
                 hidden_size=args["hidden_size"],
                 seq_len=args["seq_len"],
                 batch_size=args["batch_size"],
                 vocab_size=vocab_size,
                 num_layers=args["num_layers"],
                 dp_keep_prob=args["dp_keep_prob"])
 elif args["model"] == 'TRANSFORMER':
     if args["debug"]:  # use a very small model
         model = TRANSFORMER(vocab_size=vocab_size,
                             n_units=16,
                             n_blocks=2)
     else:
         # Note that we're using num_layers and hidden_size to mean slightly
         # different things here than in the RNNs.
         # Also, the Transformer also has other hyperparameters
         # (such as the number of attention heads) which can change it's behavior.
         model = TRANSFORMER(vocab_size=vocab_size,
                             n_units=args["hidden_size"],
示例#20
0
def objective(args):
    global count
    count += 1
    print(
        '-------------------------------------------------------------------')
    print('%d回目' % count)
    print(args)

    lr = args['l_rate']
    weight_decay = args['weight_decay']
    dropout = args['dropout']
    clip = args['clip']
    if mode == 'full':
        if model_name == 'sru':
            phi_size = int(args['phi_size'])
            r_size = int(args['r_size'])
            cell_out_size = int(args['cell_out_size'])
        elif model_name in ['gru', 'lstm']:
            hidden_size = int(args['hidden_size'])
            num_layers = int(args['num_layers'])
            init_forget_bias = args['init_forget_bias']
    elif mode == 'limited':
        if model_name == 'sru':
            phi_size = 200
            r_size = 60
            cell_out_size = 200
        elif model_name in ['gru', 'lstm']:
            hidden_size = 200
            num_layers = 1
            init_forget_bias = 1

    train_X, test_X, train_y, test_y = load_mnist()
    input_size = train_X.shape[2]
    output_size = np.unique(train_y).size

    # モデルのインスタンスの作成
    if model_name == 'sru':
        model = SRU(input_size,
                    phi_size,
                    r_size,
                    cell_out_size,
                    output_size,
                    dropout=dropout,
                    gpu=gpu)
        model.initWeight()
    elif model_name == 'gru':
        model = GRU(input_size,
                    hidden_size,
                    output_size,
                    num_layers,
                    dropout,
                    gpu=gpu)
        model.initWeight(init_forget_bias)
    elif model_name == 'lstm':
        model = LSTM(input_size,
                     hidden_size,
                     output_size,
                     num_layers,
                     dropout,
                     gpu=gpu)
        model.initWeight(init_forget_bias)
    if gpu == True:
        model.cuda()

    # loss, optimizerの定義
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=lr,
                           weight_decay=weight_decay)
    ''' 訓練 '''
    n_batches = train_X.shape[0] // batch_size
    n_batches_test = test_X.shape[0] // batch_size
    all_cost, all_acc = [], []
    start_time = time.time()
    stop_count = 0

    for epoch in range(n_epochs):
        train_cost, test_cost, train_acc, test_acc = 0, 0, 0, 0
        train_X, train_y = shuffle(train_X, train_y, random_state=seed)

        # 訓練
        model.train()
        train_X_t = np.transpose(
            train_X,
            (1, 0, 2))  # X.shape => (seq_len, n_samples, n_features) に変換
        for i in range(n_batches):
            start = i * batch_size
            end = start + batch_size
            inputs, labels = train_X_t[:, start:end, :], train_y[start:end]
            inputs, labels = Variable(torch.from_numpy(inputs)), Variable(
                torch.from_numpy(labels))
            if gpu == True:
                inputs, labels = inputs.cuda(), labels.cuda()
            cost, accuracy = train(model, inputs, labels, optimizer, criterion,
                                   clip)
            train_cost += cost / n_batches
            train_acc += accuracy / n_batches

        # 検証
        model.eval()
        test_X_t = np.transpose(test_X, (1, 0, 2))
        for i in range(n_batches_test):
            start = i * batch_size
            end = start + batch_size
            inputs, labels = test_X_t[:, start:end, :], test_y[start:end]
            inputs, labels = Variable(torch.from_numpy(inputs)), Variable(
                torch.from_numpy(labels))
            if gpu == True:
                inputs, labels = inputs.cuda(), labels.cuda()
            cost, accuracy = test(model, inputs, labels, criterion)
            test_cost += cost / n_batches_test
            test_acc += accuracy / n_batches_test

        print(
            'EPOCH:: %i, (%s) train_cost: %.3f, test_cost: %.3f, train_acc: %.3f, test_acc: %.3f'
            % (epoch + 1, timeSince(start_time), train_cost, test_cost,
               train_acc, test_acc))

        # costが爆発したときに学習打ち切り
        if test_cost != test_cost or test_cost > 100000:
            print('Stop learning due to the extremely high cost')
            all_acc.append(test_acc)
            break

        # 5epochs連続でtest_costの減少が見られないとき早期打ち切り
        if len(all_cost) > 0 and test_cost >= all_cost[-1]:
            stop_count += 1
        else:
            stop_count = 0
        if stop_count == 5:
            print('Early stopping observing no learning')
            all_acc.append(test_acc)
            break

        # 過去のエポックのtest_accを上回った時だけモデルの保存
        if len(all_acc) == 0 or test_acc > max(all_acc):
            checkpoint(model, optimizer, test_acc * 10000)

        all_cost.append(test_cost)
        all_acc.append(test_acc)

    print('max test_acc: %.3f' % max(all_acc))

    # test_accの最大値をhyperoptに評価させる
    return -max(all_acc)
示例#21
0
def main(args):

    # Fix Seed #
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Weights and Plots Path #
    paths = [args.weights_path, args.plots_path, args.numpy_path]
    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = load_data(args.which_data)[[args.feature]]
    data = data.copy()

    # Plot Time-Series Data #
    if args.plot_full:
        plot_full(args.plots_path, data, args.feature)

    scaler = MinMaxScaler()
    data[args.feature] = scaler.fit_transform(data)

    # Split the Dataset #
    copied_data = data.copy().values

    if args.multi_step:
        X, y = split_sequence_multi_step(copied_data, args.seq_length,
                                         args.output_size)
        step = 'MultiStep'
    else:
        X, y = split_sequence_uni_step(copied_data, args.seq_length)
        step = 'SingleStep'

    train_loader, val_loader, test_loader = data_loader(
        X, y, args.train_split, args.test_split, args.batch_size)

    # Lists #
    train_losses, val_losses = list(), list()
    val_maes, val_mses, val_rmses, val_mapes, val_mpes, val_r2s = list(), list(
    ), list(), list(), list(), list()
    test_maes, test_mses, test_rmses, test_mapes, test_mpes, test_r2s = list(
    ), list(), list(), list(), list(), list()
    pred_tests, labels = list(), list()

    # Constants #
    best_val_loss = 100
    best_val_improv = 0

    # Prepare Network #
    if args.model == 'dnn':
        model = DNN(args.seq_length, args.hidden_size,
                    args.output_size).to(device)
    elif args.model == 'cnn':
        model = CNN(args.seq_length, args.batch_size,
                    args.output_size).to(device)
    elif args.model == 'rnn':
        model = RNN(args.input_size, args.hidden_size, args.num_layers,
                    args.output_size).to(device)
    elif args.model == 'lstm':
        model = LSTM(args.input_size, args.hidden_size, args.num_layers,
                     args.output_size, args.bidirectional).to(device)
    elif args.model == 'gru':
        model = GRU(args.input_size, args.hidden_size, args.num_layers,
                    args.output_size).to(device)
    elif args.model == 'attentional':
        model = AttentionalLSTM(args.input_size, args.qkv, args.hidden_size,
                                args.num_layers, args.output_size,
                                args.bidirectional).to(device)
    else:
        raise NotImplementedError

    # Loss Function #
    criterion = torch.nn.MSELoss()

    # Optimizer #
    optim = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             betas=(0.5, 0.999))
    optim_scheduler = get_lr_scheduler(args.lr_scheduler, optim)

    # Train and Validation #
    if args.mode == 'train':

        # Train #
        print("Training {} using {} started with total epoch of {}.".format(
            model.__class__.__name__, step, args.num_epochs))

        for epoch in range(args.num_epochs):
            for i, (data, label) in enumerate(train_loader):

                # Prepare Data #
                data = data.to(device, dtype=torch.float32)
                label = label.to(device, dtype=torch.float32)

                # Forward Data #
                pred = model(data)

                # Calculate Loss #
                train_loss = criterion(pred, label)

                # Initialize Optimizer, Back Propagation and Update #
                optim.zero_grad()
                train_loss.backward()
                optim.step()

                # Add item to Lists #
                train_losses.append(train_loss.item())

            # Print Statistics #
            if (epoch + 1) % args.print_every == 0:
                print("Epoch [{}/{}]".format(epoch + 1, args.num_epochs))
                print("Train Loss {:.4f}".format(np.average(train_losses)))

            # Learning Rate Scheduler #
            optim_scheduler.step()

            # Validation #
            with torch.no_grad():
                for i, (data, label) in enumerate(val_loader):

                    # Prepare Data #
                    data = data.to(device, dtype=torch.float32)
                    label = label.to(device, dtype=torch.float32)

                    # Forward Data #
                    pred_val = model(data)

                    # Calculate Loss #
                    val_loss = criterion(pred_val, label)

                    if args.multi_step:
                        pred_val = np.mean(pred_val.detach().cpu().numpy(),
                                           axis=1)
                        label = np.mean(label.detach().cpu().numpy(), axis=1)
                    else:
                        pred_val, label = pred_val.cpu(), label.cpu()

                    # Calculate Metrics #
                    val_mae = mean_absolute_error(label, pred_val)
                    val_mse = mean_squared_error(label, pred_val, squared=True)
                    val_rmse = mean_squared_error(label,
                                                  pred_val,
                                                  squared=False)
                    val_mpe = mean_percentage_error(label, pred_val)
                    val_mape = mean_absolute_percentage_error(label, pred_val)
                    val_r2 = r2_score(label, pred_val)

                    # Add item to Lists #
                    val_losses.append(val_loss.item())
                    val_maes.append(val_mae.item())
                    val_mses.append(val_mse.item())
                    val_rmses.append(val_rmse.item())
                    val_mpes.append(val_mpe.item())
                    val_mapes.append(val_mape.item())
                    val_r2s.append(val_r2.item())

            if (epoch + 1) % args.print_every == 0:

                # Print Statistics #
                print("Val Loss {:.4f}".format(np.average(val_losses)))
                print(" MAE : {:.4f}".format(np.average(val_maes)))
                print(" MSE : {:.4f}".format(np.average(val_mses)))
                print("RMSE : {:.4f}".format(np.average(val_rmses)))
                print(" MPE : {:.4f}".format(np.average(val_mpes)))
                print("MAPE : {:.4f}".format(np.average(val_mapes)))
                print(" R^2 : {:.4f}".format(np.average(val_r2s)))

                # Save the model only if validation loss decreased #
                curr_val_loss = np.average(val_losses)

                if curr_val_loss < best_val_loss:
                    best_val_loss = min(curr_val_loss, best_val_loss)
                    torch.save(
                        model.state_dict(),
                        os.path.join(
                            args.weights_path, 'BEST_{}_using_{}.pkl'.format(
                                model.__class__.__name__, step)))

                    print("Best model is saved!\n")
                    best_val_improv = 0

                elif curr_val_loss >= best_val_loss:
                    best_val_improv += 1
                    print("Best Validation has not improved for {} epochs.\n".
                          format(best_val_improv))

    elif args.mode == 'test':

        # Load the Model Weight #
        model.load_state_dict(
            torch.load(
                os.path.join(
                    args.weights_path,
                    'BEST_{}_using_{}.pkl'.format(model.__class__.__name__,
                                                  step))))

        # Test #
        with torch.no_grad():
            for i, (data, label) in enumerate(test_loader):

                # Prepare Data #
                data = data.to(device, dtype=torch.float32)
                label = label.to(device, dtype=torch.float32)

                # Forward Data #
                pred_test = model(data)

                # Convert to Original Value Range #
                pred_test, label = pred_test.detach().cpu().numpy(
                ), label.detach().cpu().numpy()

                pred_test = scaler.inverse_transform(pred_test)
                label = scaler.inverse_transform(label)

                if args.multi_step:
                    pred_test = np.mean(pred_test, axis=1)
                    label = np.mean(label, axis=1)

                pred_tests += pred_test.tolist()
                labels += label.tolist()

                # Calculate Loss #
                test_mae = mean_absolute_error(label, pred_test)
                test_mse = mean_squared_error(label, pred_test, squared=True)
                test_rmse = mean_squared_error(label, pred_test, squared=False)
                test_mpe = mean_percentage_error(label, pred_test)
                test_mape = mean_absolute_percentage_error(label, pred_test)
                test_r2 = r2_score(label, pred_test)

                # Add item to Lists #
                test_maes.append(test_mae.item())
                test_mses.append(test_mse.item())
                test_rmses.append(test_rmse.item())
                test_mpes.append(test_mpe.item())
                test_mapes.append(test_mape.item())
                test_r2s.append(test_r2.item())

            # Print Statistics #
            print("Test {} using {}".format(model.__class__.__name__, step))
            print(" MAE : {:.4f}".format(np.average(test_maes)))
            print(" MSE : {:.4f}".format(np.average(test_mses)))
            print("RMSE : {:.4f}".format(np.average(test_rmses)))
            print(" MPE : {:.4f}".format(np.average(test_mpes)))
            print("MAPE : {:.4f}".format(np.average(test_mapes)))
            print(" R^2 : {:.4f}".format(np.average(test_r2s)))

            # Plot Figure #
            plot_pred_test(pred_tests[:args.time_plot],
                           labels[:args.time_plot], args.plots_path,
                           args.feature, model, step)

            # Save Numpy files #
            np.save(
                os.path.join(
                    args.numpy_path,
                    '{}_using_{}_TestSet.npy'.format(model.__class__.__name__,
                                                     step)),
                np.asarray(pred_tests))
            np.save(
                os.path.join(args.numpy_path,
                             'TestSet_using_{}.npy'.format(step)),
                np.asarray(labels))

    else:
        raise NotImplementedError
示例#22
0
# load data
training_set = SignalDataset_iq(path, train=True)
train_loader = torch.utils.data.DataLoader(training_set, **params_dataloader)

test_set = SignalDataset_iq(path, train=False)
test_loader = torch.utils.data.DataLoader(test_set, **params_dataloader)

# get num_classes from training data set
num_classes = training_set.num_classes

# init model
if arch == "rnn":
    model = RNN(**params_model, output_size=num_classes).to(device=device)
elif arch == "gru":
    model = GRU(**params_model, output_size=num_classes).to(device=device)
elif arch == "lstm":
    model = LSTM(**params_model, output_size=num_classes).to(device=device)
else:
    raise Exception(
        "Only 'rnn', 'gru', and 'lstm' are available model options.")

print("Model size: {0}".format(count_parameters(model)))

criterion = nn.NLLLoss()
op = torch.optim.SGD(model.parameters(), **params_op)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(op,
                                                       patience=4,
                                                       factor=0.5,
                                                       verbose=True)
示例#23
0
word_to_id, id_2_word = _build_vocab(train_path)
vocab_size = len(word_to_id)

# Create the model
rnn = RNN(emb_size=argsdict["RNN_emb_size"],
          hidden_size=argsdict["RNN_hidden_size"],
          seq_len=argsdict["seq_len"],
          batch_size=argsdict["batch_size"],
          vocab_size=vocab_size,
          num_layers=argsdict["RNN_num_layers"],
          dp_keep_prob=1)

gru = GRU(emb_size=argsdict["GRU_emb_size"],
          hidden_size=argsdict["GRU_hidden_size"],
          seq_len=argsdict["seq_len"],
          batch_size=argsdict["batch_size"],
          vocab_size=vocab_size,
          num_layers=argsdict["GRU_num_layers"],
          dp_keep_prob=1)

# Load the model weight
rnn.load_state_dict(torch.load(args.RNN_path))
gru.load_state_dict(torch.load(args.GRU_path))

rnn.eval()
gru.eval()

# Initialize the hidden state
hidden = [rnn.init_hidden(), gru.init_hidden()]

# Set the random seed manually for reproducibility.
示例#24
0
def main(args):

    # Fix Seed for Reproducibility #
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Samples, Weights, and Plots Path #
    paths = [args.weights_path, args.plots_path, args.numpy_path]
    for path in paths:
        make_dirs(path)

    # Prepare Data #
    data = load_data(args.combined_path, args.which_data, args.preprocess,
                     args.resample)[[args.feature]]
    id = args.which_data.split('_')[0]
    print("Data of {} is successfully Loaded!".format(args.which_data))

    # Plot Time-series Data #
    if args.plot:
        plot_full(args.plots_path, data, id, args.feature)
        plot_split(args.plots_path, data, id, args.valid_start,
                   args.test_start, args.feature)

    # Min-Max Scaler #
    scaler = MinMaxScaler()
    data[args.feature] = scaler.fit_transform(data)

    # Split the Dataset #
    copied_data = data.copy()

    if args.multi_step:
        X, y = split_sequence_multi_step(copied_data, args.window,
                                         args.output_size)
    else:
        X, y = split_sequence_uni_step(copied_data, args.window)

    # Get Data Loader #
    train_loader, val_loader, test_loader = get_data_loader(
        X, y, args.train_split, args.test_split, args.batch_size)

    # Constants #
    best_val_loss = 100
    best_val_improv = 0

    # Lists #
    train_losses, val_losses = list(), list()
    val_maes, val_mses, val_rmses, val_mapes, val_mpes, val_r2s = list(), list(
    ), list(), list(), list(), list()
    test_maes, test_mses, test_rmses, test_mapes, test_mpes, test_r2s = list(
    ), list(), list(), list(), list(), list()

    # Prepare Network #
    if args.network == 'dnn':
        model = DNN(args.window, args.hidden_size, args.output_size).to(device)

    elif args.network == 'cnn':
        model = CNN(args.window, args.hidden_size, args.output_size).to(device)

    elif args.network == 'rnn':
        model = RNN(args.input_size, args.hidden_size, args.num_layers,
                    args.output_size).to(device)

    elif args.network == 'lstm':
        model = LSTM(args.input_size, args.hidden_size, args.num_layers,
                     args.output_size, args.bidirectional).to(device)

    elif args.network == 'gru':
        model = GRU(args.input_size, args.hidden_size, args.num_layers,
                    args.output_size).to(device)

    elif args.network == 'recursive':
        model = RecursiveLSTM(args.input_size, args.hidden_size,
                              args.num_layers, args.output_size).to(device)

    elif args.network == 'attentional':
        model = AttentionalLSTM(args.input_size, args.qkv, args.hidden_size,
                                args.num_layers, args.output_size,
                                args.bidirectional).to(device)

    else:
        raise NotImplementedError

    if args.mode == 'train':

        # If fine-tuning #
        if args.transfer_learning:
            model.load_state_dict(
                torch.load(
                    os.path.join(
                        args.weights_path, 'BEST_{}_Device_ID_12.pkl'.format(
                            model.__class__.__name__))))

            for param in model.parameters():
                param.requires_grad = True

        # Loss Function #
        criterion = torch.nn.MSELoss()

        # Optimizer #
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     betas=(0.5, 0.999))
        optimizer_scheduler = get_lr_scheduler(optimizer, args)

        # Train and Validation #
        print(
            "Training {} started with total epoch of {} using Driver ID of {}."
            .format(model.__class__.__name__, args.num_epochs, id))

        for epoch in range(args.num_epochs):

            # Train #
            for i, (data, label) in enumerate(train_loader):

                # Data Preparation #
                data = data.to(device, dtype=torch.float32)
                label = label.to(device, dtype=torch.float32)

                # Forward Data #
                pred = model(data)

                # Calculate Loss #
                train_loss = criterion(pred, label)

                # Back Propagation and Update #
                optimizer.zero_grad()
                train_loss.backward()
                optimizer.step()

                # Add items to Lists #
                train_losses.append(train_loss.item())

            print("Epoch [{}/{}]".format(epoch + 1, args.num_epochs))
            print("Train")
            print("Loss : {:.4f}".format(np.average(train_losses)))

            optimizer_scheduler.step()

            # Validation #
            with torch.no_grad():
                for i, (data, label) in enumerate(val_loader):

                    # Data Preparation #
                    data = data.to(device, dtype=torch.float32)
                    label = label.to(device, dtype=torch.float32)

                    # Forward Data #
                    pred_val = model(data)

                    # Calculate Loss #
                    val_loss = criterion(pred_val, label)
                    val_mae = mean_absolute_error(label.cpu(), pred_val.cpu())
                    val_mse = mean_squared_error(label.cpu(),
                                                 pred_val.cpu(),
                                                 squared=True)
                    val_rmse = mean_squared_error(label.cpu(),
                                                  pred_val.cpu(),
                                                  squared=False)
                    # val_mpe = mean_percentage_error(label.cpu(), pred_val.cpu())
                    # val_mape = mean_absolute_percentage_error(label.cpu(), pred_val.cpu())
                    val_r2 = r2_score(label.cpu(), pred_val.cpu())

                    # Add item to Lists #
                    val_losses.append(val_loss.item())
                    val_maes.append(val_mae.item())
                    val_mses.append(val_mse.item())
                    val_rmses.append(val_rmse.item())
                    # val_mpes.append(val_mpe.item())
                    # val_mapes.append(val_mape.item())
                    val_r2s.append(val_r2.item())

                # Print Statistics #
                print("Validation")
                print("Loss : {:.4f}".format(np.average(val_losses)))
                print(" MAE : {:.4f}".format(np.average(val_maes)))
                print(" MSE : {:.4f}".format(np.average(val_mses)))
                print("RMSE : {:.4f}".format(np.average(val_rmses)))
                # print(" MPE : {:.4f}".format(np.average(val_mpes)))
                # print("MAPE : {:.4f}".format(np.average(val_mapes)))
                print(" R^2 : {:.4f}".format(np.average(val_r2s)))

                # Save the model only if validation loss decreased #
                curr_val_loss = np.average(val_losses)

                if curr_val_loss < best_val_loss:
                    best_val_loss = min(curr_val_loss, best_val_loss)

                    if args.transfer_learning:
                        torch.save(
                            model.state_dict(),
                            os.path.join(
                                args.weights_path,
                                'BEST_{}_Device_ID_{}_transfer.pkl'.format(
                                    model.__class__.__name__, id)))
                    else:
                        torch.save(
                            model.state_dict(),
                            os.path.join(
                                args.weights_path,
                                'BEST_{}_Device_ID_{}.pkl'.format(
                                    model.__class__.__name__, id)))

                    print("Best model is saved!\n")
                    best_val_improv = 0

                elif curr_val_loss >= best_val_loss:
                    best_val_improv += 1
                    print("Best Validation has not improved for {} epochs.\n".
                          format(best_val_improv))

                    if best_val_improv == 10:
                        break

    elif args.mode == 'test':

        # Prepare Network #
        if args.transfer_learning:
            model.load_state_dict(
                torch.load(
                    os.path.join(
                        args.weights_path, 'BEST_{}_Device_ID_{}.pkl'.format(
                            model.__class__.__name__, id))))
        else:
            model.load_state_dict(
                torch.load(
                    os.path.join(
                        args.weights_path, 'BEST_{}_Device_ID_{}.pkl'.format(
                            model.__class__.__name__, id))))

        print("{} for Device ID {} is successfully loaded!".format(
            model.__class__.__name__, id))

        with torch.no_grad():

            for i, (data, label) in enumerate(test_loader):

                # Data Preparation #
                data = data.to(device, dtype=torch.float32)
                label = label.to(device, dtype=torch.float32)

                # Forward Data #
                pred_test = model(data)

                # Convert to Original Value Range #
                pred_test = pred_test.data.cpu().numpy()
                label = label.data.cpu().numpy()

                if not args.multi_step:
                    label = label.reshape(-1, 1)

                pred_test = scaler.inverse_transform(pred_test)
                label = scaler.inverse_transform(label)

                # Calculate Loss #
                test_mae = mean_absolute_error(label, pred_test)
                test_mse = mean_squared_error(label, pred_test, squared=True)
                test_rmse = mean_squared_error(label, pred_test, squared=False)
                # test_mpe = mean_percentage_error(label, pred_test)
                # test_mape = mean_absolute_percentage_error(label, pred_test)
                test_r2 = r2_score(label, pred_test)

                # Add item to Lists #
                test_maes.append(test_mae.item())
                test_mses.append(test_mse.item())
                test_rmses.append(test_rmse.item())
                # test_mpes.append(test_mpe.item())
                # test_mapes.append(test_mape.item())
                test_r2s.append(test_r2.item())

            # Print Statistics #
            print("Test {}".format(model.__class__.__name__))
            print(" MAE : {:.4f}".format(np.average(test_maes)))
            print(" MSE : {:.4f}".format(np.average(test_mses)))
            print("RMSE : {:.4f}".format(np.average(test_rmses)))
            # print(" MPE : {:.4f}".format(np.average(test_mpes)))
            # print("MAPE : {:.4f}".format(np.average(test_mapes)))
            print(" R^2 : {:.4f}".format(np.average(test_r2s)))

            # Derive Metric and Plot #
            if args.transfer_learning:
                test_plot(pred_test,
                          label,
                          args.plots_path,
                          args.feature,
                          id,
                          model,
                          transfer_learning=False)
            else:
                test_plot(pred_test,
                          label,
                          args.plots_path,
                          args.feature,
                          id,
                          model,
                          transfer_learning=False)
                                     train_dataloader,
                                     valid_dataloader,
                                     learning_rate=learning_rate,
                                     patience=5)
    test_result = TestModel(model, test_dataloader, max_speed)
    StoreData(result_dict, model_name, train_result, test_result, directory,
              model, random_seed, save_model)

    # GRU
    importlib.reload(models)
    from models import GRU
    importlib.reload(utils)
    from utils import TrainModel, TestModel
    model_name = 'GRU'
    print(model_name)
    gru = GRU(A.shape[0])
    gru, train_result = TrainModel(gru,
                                   train_dataloader,
                                   valid_dataloader,
                                   learning_rate=learning_rate,
                                   patience=5)
    test_result = TestModel(gru, test_dataloader, max_speed)
    StoreData(result_dict, model_name, train_result, test_result, directory,
              model, random_seed, save_model)

    # GRU-I
    importlib.reload(models)
    from models import GRU
    importlib.reload(utils)
    from utils import TrainModel, TestModel
    model_name = 'GRUI'
示例#26
0
video_lengths = [video.shape[1] for video in videos]
''' set models '''

img_size = 96
nc = 3
ndf = 64  # from dcgan
ngf = 64
d_E = 10
hidden_size = 100  # guess
d_C = 50
d_M = d_E
nz = d_C + d_M
criterion = nn.BCELoss()

gen_i = Generator_I(nc, ngf, nz, ngpu=ngpu)
gru = GRU(d_E, hidden_size, gpu=cuda)
gru.initWeight()
''' prepare for test '''

label = torch.FloatTensor()


def save_video(fake_video, epoch):
    outputdata = fake_video * 255
    outputdata = outputdata.astype(np.uint8)
    dir_path = os.path.join(current_path, 'result_videos')
    file_path = os.path.join(dir_path, 'Video_epoch-%d.mp4' % epoch)
    skvideo.io.vwrite(file_path, outputdata)


trained_path = os.path.join(current_path, 'trained_models')
示例#27
0
def main():
    parser = argparse.ArgumentParser(description='Start trainning MoCoGAN.....')
    parser.add_argument('--batch-size', type=int, default=16,
                     help='set batch_size')
    parser.add_argument('--epochs', type=int, default=60000,
                     help='set num of iterations')
    parser.add_argument('--pre-train', type=int, default=-1,
                     help='set 1 when you use pre-trained models'),
    parser.add_argument('--img_size', type=int, default=96,
                       help='set the input image size of frame'),
    parser.add_argument('--data', type=str, default='data',
                       help='set the path for the direcotry containing dataset'),
    parser.add_argument('--channel', type=int, default=3,
                       help='set the no. of channel of the frame'),
    parser.add_argument('--hidden', type=int, default=100,
                       help='set the hidden layer size for gru'),
    parser.add_argument('--dc', type=int, default=50, help='set the size of motion vector'),
    parser.add_argument('--de', type=int, default=10, help='set the size of randomly generated epsilon'),
    parser.add_argument('--lr', type=int, default=0.0002, 
                        help='set the learning rate'),
    parser.add_argument('--beta', type=int, default=0.5, 
                        help='set the beta for the optimizer'),
    parser.add_argument('--trained_path', type=str, default='trained_models', 
                        help='set the path were to trained models are saved'),
    parser.add_argument('--T', type=int, default=16, 
                        help='set the no. of frames to be selected')

    args = parser.parse_args()
    
    batch_size = args.batch_size
    pre_train  = args.pre_train
    img_size = args.img_size
    channel = args.channel
    d_E = args.de
    hidden_size = args.hidden  
    d_C = args.dc
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    args.device = torch.device('cuda:0') if torch.cuda.is_available() else 'cpu'
    cuda = 1 if torch.cuda.is_available() else -1
    
    # Making required folder
    if not os.path.exists('./generated_videos'):
      os.makedirs('./generated_videos')
    if not os.path.exists('./trained_models'):
      os.makedirs('./trained_models')
    if not os.path.exists('./resized_data'):
      os.makedirs('./resized_data')

    T = args.T
    start_epoch = 1
    seed = 0
    np.random.seed(seed)
    if cuda == True:
        torch.cuda.set_device(0)

    videos, current_path = preprocess(args)
    num_vid = len(videos)
    d_M = d_E
    nz = d_C + d_M
    criterion = nn.BCELoss()

    # setup model #
    dis_i = Image_Discriminator(channel)
    dis_v = Video_Discriminator()
    gen_i = Generator(channel, nz)
    gru = GRU(d_E, hidden_size, gpu=cuda)
    gru.initWeight()
    
    # setup optimizer #
    lr = args.lr
    beta = args.beta
    optim_Di = optim.Adam(dis_i.parameters(), lr=lr, betas=(beta,0.999))
    optim_Dv = optim.Adam(dis_v.parameters(), lr=lr, betas=(beta,0.999))
    optim_Gi = optim.Adam(gen_i.parameters(), lr=lr, betas=(beta,0.999))
    optim_GRU = optim.Adam(gru.parameters(), lr=lr, betas=(beta,0.999))

    if cuda == True:
        dis_i.cuda()
        dis_v.cuda()
        gen_i.cuda()
        gru.cuda()
        criterion.cuda()

    trained_path = os.path.join(current_path, args.trained_path)
    video_lengths = [video.shape[1] for video in videos]

    if pre_train == True:
      checkpoint = torch.load(trained_path+'/last_state')
      start_epoch = checkpoint['epoch']
      Gi_loss = checkpoint['Gi']
      Gv_loss = checkpoint['Gv']
      Dv_loss = checkpoint['Dv']
      Di_loss = checkpoint['Di']
      dis_i.load_state_dict(torch.load(trained_path + '/Image_Discriminator.model'))
      dis_v.load_state_dict(torch.load(trained_path + '/Video_Discriminator.model'))
      gen_i.load_state_dict(torch.load(trained_path + '/Generator.model'))
      gru.load_state_dict(torch.load(trained_path + '/GRU.model'))
      optim_Di.load_state_dict(torch.load(trained_path + '/Image_Discriminator.state'))
      optim_Dv.load_state_dict(torch.load(trained_path + '/Video_Discriminator.state'))
      optim_Gi.load_state_dict(torch.load(trained_path + '/Generator.state'))
      optim_GRU.load_state_dict(torch.load(trained_path + '/GRU.state'))
      print("Using Pre-trained model")

    def checkpoint(model, optimizer, epoch):
      state = {'epoch': epoch+1, 'Gi': Gi_loss, 'Gv': Gv_loss, 'Dv': Dv_loss, 'Di': Di_loss}
      torch.save(state, os.path.join(trained_path, 'last_state'))
      filename = os.path.join(trained_path, '%s' % (model.__class__.__name__))
      torch.save(model.state_dict(), filename + '.model')
      torch.save(optimizer.state_dict(), filename + '.state')

    def generate_z(num_frame):
        eps = Variable(torch.randn(batch_size, d_E))
        z_c = Variable(torch.randn(batch_size, 1, d_C))
        z_c = z_c.repeat(1, num_frame, 1)
        if cuda == True:
            z_c, eps = z_c.cuda(), eps.cuda()
        # Initialising the hidden var for GRU
        gru.initHidden(batch_size)
        z_m = gru(eps, num_frame).transpose(1, 0)
        # print(z_m.shape)
        z = torch.cat((z_m, z_c), 2) # (batch_size, num_frame, nz)
        return  z

    if pre_train == -1:
        Gi_loss = []
        Gv_loss = []
        Di_loss = []
        Dv_loss = []

    for epoch in range(start_epoch, args.epochs+1):
        start_time = time.time()
        real_videos = Variable(randomVideo(videos, batch_size, T))  # (batch_size, channel, T, img_size, img_size)
        if cuda == True:
            real_videos = real_videos.cuda()
        real_imgs = real_videos[:, :, np.random.randint(0, T), :, :]

        num_frame = video_lengths[np.random.randint(0, num_vid)]
        # Generate Z having num_frame no. of frames

        Z = generate_z(num_frame).view(batch_size,num_frame, nz, 1, 1)
        #print(Z.shape)
        Z = sample(Z, T).contiguous().view(batch_size*T, nz, 1, 1) # So that conv layers (nz, 1, 1) noise to (channel, img_size, img_size) image frame
        fake_vid = gen_i(Z).view(batch_size, T, channel, img_size, img_size)
        fake_vid = fake_vid.transpose(2, 1)
        # sample a fake image from fake_vid frames
        fake_img = fake_vid[: , :, np.random.randint(0, T), :, :]

        r_label = Variable(torch.FloatTensor(batch_size, 1).fill_(0.9)).to(args.device)
        f_label = Variable(torch.FloatTensor(batch_size, 1).fill_(0.0)).to(args.device)
        # Training Discriminators
        # Video Discriminator
        dis_v.zero_grad()
        outputs = dis_v(real_videos)
        loss = criterion(outputs, r_label)
        loss.backward()
        real_loss = loss
        outputs = dis_v(fake_vid.detach())
        loss = criterion(outputs, f_label)
        loss.backward()
        fake_loss = loss
        dv_loss = real_loss + fake_loss
        
        optim_Dv.step()

        # Image Discriminator
        dis_i.zero_grad()
        r_outputs = dis_i(real_imgs)
        lossi = criterion(r_outputs, r_label)
        lossi.backward()
        real_lossi = lossi
        f_outputs = dis_i(fake_img.detach())
        fake_lossi = criterion(f_outputs, f_label)
        fake_lossi.backward()
        di_loss = real_lossi + fake_lossi
        optim_Di.step()

        # Training Generator and GRU
        gen_i.zero_grad()
        gru.zero_grad()
        gen_outputs = dis_v(fake_vid)
        gv_loss = criterion(gen_outputs, r_label)
        gv_loss.backward(retain_graph=True)
        gen_out = dis_i(fake_img)
        gi_loss = criterion(gen_out, r_label)
        gi_loss.backward()
        optim_Gi.step()
        optim_GRU.step()

        Gi_loss.append(gi_loss.item())
        Gv_loss.append(gv_loss.item())
        Dv_loss.append(dv_loss.item())
        Di_loss.append(di_loss.item())

        end_time = time.time()

        if epoch % 100 == 0:
            print('[%d/%d] Time_taken: %f || Gi loss: %.3f || Gv loss: %.3f || Di loss: %.3f || Dv loss: %.3f'%(epoch, args.epochs, end_time-start_time, gi_loss, gv_loss, di_loss, dv_loss))

        if epoch % 5000 == 0:
            checkpoint(dis_i, optim_Di, epoch)
            checkpoint(dis_v, optim_Dv, epoch)
            checkpoint(gen_i, optim_Gi, epoch)
            checkpoint(gru,   optim_GRU, epoch)

        if epoch % 1000 == 0:
            save_video(fake_vid[0].data.cpu().numpy().transpose(1, 2, 3, 0), epoch, current_path)
            
    # Plot 
    plt.plot(Gi_loss, label='Image Generator')
    plt.plot(Gv_loss, label='Video Generator')
    plt.plot(Di_loss, label='Image Discriminator')
    plt.plot(Dv_loss, label='Video Discriminator')
    plt.legend()
    plt.savefig("plot.png")
for m in range(len(model_types)):
    print("\n########## Running Main Loop ##########################")
    train_ppls = []
    train_losses = []
    val_ppls = []
    val_losses = []
    best_val_so_far = np.inf
    times = []
    if model_types[m]=='RNN':
        model = RNN(emb_size=embSize[m], hidden_size=hiddenSize[m], 
                    seq_len=seqLen[m], batch_size=batchSize[m],
                    vocab_size=vocab_size, num_layers=numLayers[m], 
                    dp_keep_prob=dropOut[m])
    elif model_types[m]=='GRU':
        model =GRU(emb_size=embSize[m], hidden_size=hiddenSize[m], 
                   seq_len=seqLen[m], batch_size=batchSize[m],
                   vocab_size=vocab_size, num_layers=numLayers[m], 
                   dp_keep_prob=dropOut[m])
    else:
        model=TRANSFORMER(vocab_size=vocab_size,n_units=hiddenSize[m],
                          n_blocks=numLayers[m],dropout=1-dropOut[m])
    model.load_state_dict(torch.load(path[m]))
    model.batch_size=batchSize[m]
    model.seq_len=seqLen[m]
    model.vocab_size=vocab_size
    model = model.to(device)
    
    # MAIN LOOP
    val_loss = run_epoch(model, valid_data,model_types[m])
    total_loss[m,:]=val_loss
    time=np.arange(1,seqLen[m]+1)
    print('Plotting graph...')
示例#29
0
device = 'cuda'

dis_v = Discriminator_V(nc, ndf, T=T, ngpu=ngpu)
if args.model == 'default':
    dis_i = Discriminator_I(nc, ndf, ngpu=ngpu)
    gen_i = Generator_I(nc, ngf, nz, ngpu=ngpu)
else:
    args.latent = 512
    args.n_mlp = 8
    dis_i = Discriminator(
        args.size, channel_multiplier=args.channel_multiplier).to(device)
    gen_i = Generator(args.size,
                      args.latent,
                      args.n_mlp,
                      channel_multiplier=args.channel_multiplier).to(device)
gru = GRU(d_E, hidden_size, gpu=cuda)
gru.initWeight()
''' prepare for train '''
label = torch.FloatTensor()


def timeSince(since):
    now = time.time()
    s = now - since
    d = math.floor(s / ((60**2) * 24))
    h = math.floor(s / (60**2)) - d * 24
    m = math.floor(s / 60) - h * 60 - d * 24 * 60
    s = s - m * 60 - h * (60**2) - d * 24 * (60**2)
    return '%dd %dh %dm %ds' % (d, h, m, s)

    print("Generation:")
    raw_data = ptb_raw_data(data_path=DATAPATH)
    train_data, valid_data, test_data, word_to_id, id_2_word = raw_data
    vocab_size = len(word_to_id)
    RNN = RNN(emb_size=200,
              hidden_size=1500,
              seq_len=0,
              batch_size=20,
              num_layers=2,
              vocab_size=vocab_size,
              dp_keep_prob=0.35).to(device)
    GRU = GRU(emb_size=200,
              hidden_size=1500,
              seq_len=0,
              batch_size=20,
              num_layers=2,
              vocab_size=vocab_size,
              dp_keep_prob=0.35).to(device)
    for seq_len in seq_lens:
        print("Sequence length: ", seq_len)
        #RNN output
        #Load "Best params model"
        RNN.seq_len = seq_len
        RNN.load_state_dict(
            torch.load(RNN_bestparams_path, map_location=device))
        RNN_generation = generation(RNN, train_data, valid_data, test_data,
                                    word_to_id, id_2_word, seq_len, BatchSize)
        #         print("RNN generated:")
        #         print(RNN_generation)
        with open(os.path.join(OUTPUTPATH, 'RNN_%s_samples.txt' % (seq_len)),