Пример #1
0
def main():
    model = LSTM(settings.vocab_size, settings.word_embedding_size,
                 settings.hidden_size, settings.num_layers, settings.out_dim, settings.drop_out)
    '''     pre-train word embedding init    '''
    dataset = Dataset(args.data)
    model.word_embed.weight = nn.Parameter(torch.from_numpy(dataset.get_wordembedding()))
    if torch.cuda.is_available():
        torch.cuda.manual_seed(settings.seed)
        model.cuda()
    optimizer = optim.SGD(model.parameters(), lr=settings.lr, weight_decay=1e-5)
    criteria = nn.CrossEntropyLoss()
    best_dev_acc = 0.0
    best_test_acc = 0.0

    for i in xrange(dataset.size/settings.batch_size*settings.max_epochs):
        batch_data = dataset.get_batch()
        loss = train(model, batch_data, optimizer, criteria)
        if (i+1) % settings.validate_freq == 0:
            print "validating..."
            dev_acc = test(model, dataset.dev_data)
            test_acc = test(model, dataset.test_data)
            if dev_acc > best_dev_acc:
                best_dev_acc = dev_acc
                best_test_acc = test_acc
                torch.save(model, os.path.join(args.model_dir, "sa_{}.model".format(best_dev_acc)))
            with open(os.path.join(args.model_dir, "log.txt"), "a") as logger:
                logger.write("epoch: {}, dev acc: {}, test acc: {}, " \
                  "batch loss: {}, best dev acc:{}, best test acc:{}\n".format(i*settings.batch_size/float(dataset.size),
                   dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc))
            print "epoch: {}, dev acc: {}, test acc: {}, " \
                  "batch loss: {}, best dev acc:{}, best test acc:{}".format(i*settings.batch_size/float(dataset.size),
                   dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)
Пример #2
0
def main(args):
    if args.model == 'base':
        postprocessing = None
    elif args.model == 'jump':
        postprocessing = pick_fix_length(400, PAD_TOKEN)
    TEXT = data.Field(lower=True,
                      postprocessing=postprocessing,
                      pad_token=PAD_TOKEN,
                      include_lengths=True)
    LABEL = data.Field(sequential=False, pad_token=None, unk_token=None)

    train, test = datasets.IMDB.splits(TEXT, LABEL)

    TEXT.build_vocab(train)
    LABEL.build_vocab(train)

    train_iter, test_iter = data.BucketIterator.splits(
        (train, test),
        batch_sizes=(args.batch, args.batch * 4),
        device=args.gpu,
        repeat=False,
        sort_within_batch=True)

    if args.model == 'base':
        model = LSTM(len(TEXT.vocab), 300, 128, len(LABEL.vocab))
    elif args.model == 'jump':
        model = LSTMJump(len(TEXT.vocab), 300, 128, len(LABEL.vocab), args.R,
                         args.K, args.N, 80, 8)
    model.load_pretrained_embedding(
        get_word2vec(TEXT.vocab.itos,
                     '.vector_cache/GoogleNews-vectors-negative300.bin'))
    model.cuda(args.gpu)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    max_accuracy = 0
    for i in range(args.epoch):
        print('Epoch: {}'.format(i + 1))
        sum_loss = 0
        model.train()
        for batch in train_iter:
            optimizer.zero_grad()
            xs, lengths = batch.text
            loss = model(xs, lengths, batch.label)
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 1.)
            optimizer.step()
            sum_loss += loss.data[0]
        print(f'Loss: {sum_loss / len(train_iter)}')
        sum_correct = 0
        total = 0
        model.eval()
        for batch in test_iter:
            y = model.inference(*batch.text)
            sum_correct += y.eq(batch.label).sum().float()
            total += batch.label.size(0)
        accuracy = (sum_correct / total).data[0]
        max_accuracy = max(accuracy, max_accuracy)
        print(f'Accuracy: {accuracy}')
    print(f'Max Accuracy: {max_accuracy}')
Пример #3
0
def main(opt):
    train_dataset = BADataset(opt.dataroot, opt.L, True, False, False)
    train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \
                                      shuffle=True, num_workers=opt.workers, drop_last=True)

    valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False)
    valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    test_dataset = BADataset(opt.dataroot, opt.L, False, False, True)
    test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    all_dataset = BADataset(opt.dataroot, opt.L, False, False, False)
    all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \
                                     shuffle=False, num_workers=opt.workers, drop_last=False)

    opt.n_edge_types = train_dataset.n_edge_types
    opt.n_node = train_dataset.n_node
    opt.n_existing_node = all_node_num

    net = LSTM(opt, hidden_state=opt.state_dim*5)
    net.double()
    print(net)

    criterion = nn.CosineSimilarity(dim=1, eps=1e-6)

    if opt.cuda:
        net.cuda()
        criterion.cuda()

    optimizer = optim.Adam(net.parameters(), lr=opt.lr)
    early_stopping = EarlyStopping(patience=opt.patience, verbose=True)

    os.makedirs(OutputDir, exist_ok=True)
    train_loss_ls = []
    valid_loss_ls = []
    test_loss_ls = []

    for epoch in range(0, opt.niter):
        train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt)
        valid_loss = valid(valid_dataloader, net, criterion, opt)
        test_loss = test(test_dataloader, net, criterion, opt)

        train_loss_ls.append(train_loss)
        valid_loss_ls.append(valid_loss)
        test_loss_ls.append(test_loss)

        early_stopping(valid_loss, net, OutputDir)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls})
    df.to_csv(OutputDir + '/loss.csv', index=False)

    net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt'))
    inference(all_dataloader, net, criterion, opt, OutputDir)
Пример #4
0
def create_model():
    model = LSTM(input_size=input_size,
                 num_classes=num_classes,
                 hidden=args.hidden_unit,
                 num_layers=args.num_layers,
                 mean_after_fc=args.mean_after_fc,
                 mask_empty_frame=args.mask_empty_frame)
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    return (model, optimizer)
Пример #5
0
def load_model():
    print("==> loading existing lstm model")
    model_info = torch.load(model_path)
    model = LSTM(input_size=input_size,
                 num_classes=model_info['num_classes'],
                 hidden=model_info['hidden'],
                 num_layers=model_info['num_layers'],
                 mean_after_fc=model_info['mean_after_fc'],
                 mask_empty_frame=model_info['mask_empty_frame'])
    model.cuda()
    model.load_state_dict(model_info['state_dict'])
    best_acc = model_info['best_acc']
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    optimizer.load_state_dict(model_info['optimizer'])
    return (model, optimizer)
Пример #6
0
def test(test, feature, model, hidden, layer,  output, index2char, index2phone, phone_map, phone2index):
	ans = open(output,'w')
	ans.write('id,phone_sequence\n')
	test_set = Feature_Dataset(feature,'test')
	if feature == 'mfcc':
		feature_dim = 39
	elif feature == 'fbank':
		feature_dim = 69
	elif feature == 'all':
		feature_dim = 108
	
	if model == 'LSTM':
		test_model = LSTM(feature_dim, hidden, layer)
	elif model == 'BiLSTM':
		test_model = LSTM(feature_dim,hidden,layer,bi = True)
	elif model == 'C_RNN':
		group_size = 5
		test_model = C_RNN(group_size, feature_dim, hidden, layer)    
	
	checkpoint = torch.load(test)
	test_model.load_state_dict(checkpoint['model'])
	test_model.eval()
	if USE_CUDA:
		test_model = test_model.cuda()		
	for i in tqdm(range(1,len(test_set)+1)):
		data = test_set[i-1]
		speaker = data[0]
		test_feature = Variable(data[1].float())
		test_hidden = test_model.init_hidden()
		output = torch.max(test_model(test_feature,test_hidden),1)[1]
		result = test_trim(index2char,index2phone, phone_map, phone2index, output.data.cpu().numpy())
		ans.write('{},{}\n'.format(speaker,result))
	ans.close()
Пример #7
0
def main():
    global args, best_prec1
    best_prec1 = 1e6
    args = parser.parse_args()
    args.original_lr = 1e-6
    args.lr = 1e-6
    args.momentum = 0.95
    args.decay = 5 * 1e-4
    args.start_epoch = 0
    args.epochs = 5000
    args.steps = [-1, 1, 100, 150]
    args.scales = [1, 1, 1, 1]
    args.workers = 4
    args.seed = time.time()
    args.print_freq = 30
    args.feature_size = 100
    args.lSeq=5
    wandb.config.update(args)
    wandb.run.name = f"Default_{wandb.run.name}" if (args.task == wandb.run.name) else f"{args.task}_{wandb.run.name}"

    conf = configparser.ConfigParser()
    conf.read(args.config)
    # print(conf)
    TRAIN_DIR = conf.get("lstm", "train")
    VALID_DIR = conf.get("lstm", "valid")
    TEST_DIR = conf.get("lstm", "test")
    LOG_DIR = conf.get("lstm", "log")
    create_dir_not_exist(LOG_DIR)
    # TODO: train_list to train_file
    train_list = [os.path.join(TRAIN_DIR, item) for item in os.listdir(TRAIN_DIR)]
    val_list = [os.path.join(VALID_DIR, item) for item in os.listdir(VALID_DIR)]
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    torch.cuda.manual_seed(int(args.seed))
    model = LSTM(args.feature_size, args.feature_size, args.feature_size)
    model = model.cuda()
    criterion = nn.MSELoss().cuda()
    optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay)
    model = DataParallel_withLoss(model, criterion)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        train(train_list, model, criterion, optimizer, epoch)
        prec1 = validate(val_list, model, criterion, epoch)
        with open(os.path.join(LOG_DIR, args.task + ".txt"), "a") as f:
            f.write("epoch " + str(epoch) + "  MSELoss: " + str(float(prec1)))
            f.write("\n")
        wandb.save(os.path.join(LOG_DIR, args.task + ".txt"))
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        print(' * best MSELoss {MSELoss:.3f} '.format(MSELoss=best_prec1))
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.pre,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
        }, is_best, args.task, epoch=epoch, path=os.path.join(LOG_DIR, args.task))
Пример #8
0
from Parameters import Parameter
import torch.nn as nn
import torch
import torch.nn.functional as F
import torch.optim as optim
from DataProcessing import DataProcessing
from model import LSTM
import numpy as np
import time

models = LSTM().double()
models = models.cuda()
loss_function = nn.BCELoss(size_average=True, reduce=True)
optimizer = optim.Adam(models.parameters())

# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()

DataObject = DataProcessing()

for epoch in range(
        300):  # again, normally you would NOT do 300 epochs, it is toy data
    print("Beginning as a batch")
    StepsOfEpoch = 0
    DataMethodObject = DataObject.FetchInputsAndLabels()
    for wav, label in DataMethodObject:
        then = time.time()
        StepsOfEpoch += 1
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
Пример #9
0
class dl_model():

    def __init__(self, mode):

        # read config fiel which contains parameters
        self.config_file = read_yaml()
        self.mode = mode

        arch_name = '_'.join(
            [self.config_file['rnn'], str(self.config_file['num_layers']), str(self.config_file['hidden_dim'])])
        self.config_file['dir']['models'] = self.config_file['dir']['models'].split('/')[0] + '_' + arch_name + '/'
        self.config_file['dir']['plots'] = self.config_file['dir']['plots'].split('/')[0] + '_' + arch_name + '/'

        #if not os.path.exists(self.config_file['dir']['models']):
        #    os.mkdir(self.config_file['dir']['models'])
        #if not os.path.exists(self.config_file['dir']['plots']):
        #    os.mkdir(self.config_file['dir']['plots'])

        if self.config_file['rnn'] == 'LSTM':
            from model import LSTM as Model
        elif self.config_file['rnn'] == 'GRU':
            from model import GRU as Model
        else:
            print("Model not implemented")
            exit(0)

        self.cuda = (self.config_file['cuda'] and torch.cuda.is_available())
        self.output_dim = self.config_file['num_phones']

        if mode == 'train' or mode == 'test':

            self.plots_dir = self.config_file['dir']['plots']
            # store hyperparameters
            self.total_epochs = self.config_file['train']['epochs']
            self.test_every = self.config_file['train']['test_every_epoch']
            self.test_per = self.config_file['train']['test_per_epoch']
            self.print_per = self.config_file['train']['print_per_epoch']
            self.save_every = self.config_file['train']['save_every']
            self.plot_every = self.config_file['train']['plot_every']
            # dataloader which returns batches of data
            self.train_loader = timit_loader('train', self.config_file)
            self.test_loader = timit_loader('test', self.config_file)

            self.start_epoch = 1
            self.test_acc = []
            self.train_losses, self.test_losses = [], []
            # declare model
            self.model = Model(self.config_file, weights=self.train_loader.weights)

        else:

            self.model = Model(self.config_file, weights=None)

        if self.cuda:
            self.model.cuda()

        # resume training from some stored model
        if self.mode == 'train' and self.config_file['train']['resume']:
            self.start_epoch, self.train_losses, self.test_losses, self.test_acc = self.model.load_model(mode,
                                                                                                         self.config_file[
                                                                                                             'rnn'],
                                                                                                         self.model.num_layers,
                                                                                                         self.model.hidden_dim)
            self.start_epoch += 1

        # load best model for testing/feature extraction
        elif self.mode == 'test' or mode == 'test_one':
            self.model.load_model(mode, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim)

        self.replacement = {'aa': ['ao'], 'ah': ['ax', 'ax-h'], 'er': ['axr'], 'hh': ['hv'], 'ih': ['ix'],
                            'l': ['el'], 'm': ['em'], 'n': ['en', 'nx'], 'ng': ['eng'], 'sh': ['zh'],
                            'pau': ['pcl', 'tcl', 'kcl', 'bcl', 'dcl', 'gcl', 'h#', 'epi', 'q'],
                            'uw': ['ux']}
Пример #10
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Build the data loader
    dataset, targets = load_dataset()
    print('\nThe data are loaded')

    # Build the models
    lstm = LSTM(args.input_size, args.output_size)
    print('The model is build')
    print(lstm)

    if torch.cuda.is_available():
        lstm.cuda()

    # Loss and Optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(lstm.parameters(), lr=args.learning_rate)

    # Train the Models
    toatal_time = 0
    sm = 50  # start saving models after 100 epochs

    for epoch in range(args.num_epochs):
        print('\nepoch ' + str(epoch) + ':')
        avg_loss = 0
        start = time.time()

        for i in range(0, len(dataset), args.batch_size):
            lstm.zero_grad()
            bi, bt = get_input(i, dataset, targets, args.batch_size)
            bi = bi.view(-1, 1, 32)
            bi = to_var(bi)
            bt = to_var(bt)
            bo = lstm(bi)
            loss = criterion(bo, bt)
            avg_loss = avg_loss + loss.item()
            loss.backward()
            optimizer.step()

        epoch_avg_loss = avg_loss / (len(dataset) / args.batch_size)
        print('--average loss:', epoch_avg_loss)

        end = time.time()
        epoch_time = end - start
        toatal_time = toatal_time + epoch_time
        print('time of per epoch:', epoch_time)

        # save the data into csv
        data = [epoch_avg_loss]
        with open(args.model_path + 'lstm_loss.csv', 'a+') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(data)

        if epoch == sm:
            model_path = 'lstm_' + str(sm) + '.pkl'
            torch.save(lstm.state_dict(),
                       os.path.join(args.model_path, model_path))
            sm = sm + args.save_step

    model_path = 'lstm_final.pkl'
    torch.save(lstm.state_dict(), os.path.join(args.model_path, model_path))
def main():
    '''
    主要目的為 計算測試資料的 error rate
    '''
    parser = argparse.ArgumentParser()
    # Observed length of the trajectory parameter
    parser.add_argument('--obs_length',
                        type=int,
                        default=240,
                        help='Observed length of the trajectory')
    # Predicted length of the trajectory parameter
    # parser.add_argument('--pred_length', type=int, default=378-60-1,
    #                     help='Predicted length of the trajectory')

    parser.add_argument('--pred_length',
                        type=int,
                        default=240,
                        help='Predicted length of the trajectory')
    # Model to be loaded
    parser.add_argument('--epoch',
                        type=int,
                        default=199,
                        help='Epoch of model to be loaded')
    # cuda support
    parser.add_argument('--use_cuda',
                        action="store_true",
                        default=True,
                        help='Use GPU or not')

    # gru model
    parser.add_argument('--gru',
                        action="store_true",
                        default=False,
                        help='True : GRU cell, False: LSTM cell')
    # method selection
    parser.add_argument(
        '--method',
        type=int,
        default=1,
        help=
        'Method of lstm will be used (1 = social lstm, 2 = obstacle lstm, 3 = vanilla lstm)'
    )

    # Parse the parameters
    sample_args = parser.parse_args()

    # for drive run
    prefix = ''
    f_prefix = '.'

    method_name = "VANILLALSTM"
    model_name = "LSTM"
    save_tar_name = method_name + "_lstm_model_"
    if sample_args.gru:
        model_name = "GRU"
        save_tar_name = method_name + "_gru_model_"

    print("Selected method name: ", method_name, " model name: ", model_name)

    # Save directory
    save_directory = os.path.join(f_prefix, 'model/', method_name, model_name)
    # plot directory for plotting in the future
    plot_directory = os.path.join(f_prefix, 'plot/', method_name, model_name)

    result_directory = os.path.join(f_prefix, 'result/', method_name)
    plot_test_file_directory = 'test'

    # Define the path for the config file for saved args
    with open(os.path.join(save_directory, 'config.pkl'), 'rb') as f:
        saved_args = pickle.load(f)

    seq_lenght = sample_args.pred_length + sample_args.obs_length

    # Create the DataLoader object
    dataloader = DataLoader(f_prefix,
                            1,
                            sample_args.pred_length + sample_args.obs_length,
                            forcePreProcess=True,
                            infer=True)
    create_directories(os.path.join(result_directory, model_name),
                       dataloader.get_all_directory_namelist())
    create_directories(plot_directory, [plot_test_file_directory])
    dataloader.reset_batch_pointer(valid=False)

    dataset_pointer_ins = dataloader.dataset_pointer

    smallest_err = 100000
    smallest_err_iter_num = -1
    origin = (0, 0)
    reference_point = (0, 1)

    submission_store = []  # store submission data points (txt)
    result_store = []  # store points for plotting

    # Initialize net
    net = LSTM(saved_args, True)

    if sample_args.use_cuda:
        net = net.cuda()

    # Get the checkpoint path
    checkpoint_path = os.path.join(
        save_directory, save_tar_name + str(sample_args.epoch) + '.tar')
    if os.path.isfile(checkpoint_path):
        print('Loading checkpoint')
        checkpoint = torch.load(checkpoint_path)
        model_epoch = checkpoint['epoch']
        net.load_state_dict(checkpoint['state_dict'])
        print('Loaded checkpoint at epoch', model_epoch)

    results_it = []
    for iterator in range(50):
        x_seq_arr = []
        ret_x_seq_arr = []
        error_arr = []
        expected_day_arr = []
        predicted_day_arr = []

        total_error = 0

        for batch in range(dataloader.num_batches):
            # Get data
            x, y, d = dataloader.next_batch(randomUpdate=False)

            # Get the sequence
            x_seq, y_seq, d_seq = x[0], y[0], d[0]
            x_seq = np.array(x_seq)
            '''
            x_seq = dataloader.inverse_transform_MinMaxScaler(x_seq)
            print('{}/{}'.format(batch, dataloader.num_batches))
            x_seq[sample_args.obs_length:,-2]= 17
            x_seq[sample_args.obs_length:,-1]= 28
            x_seq = dataloader.fit_transform_MinMaxScaler(x_seq)
            '''
            x_seq = Variable(torch.from_numpy(x_seq).float())

            temp = x_seq[:, -2:]
            # x_seq = x_seq[:,:-2]

            if sample_args.use_cuda:
                x_seq = x_seq.cuda()
                temp = temp.cuda()

            obs_data = x_seq[:sample_args.obs_length]

            ret_x_seq = sample(sample_args, x_seq, temp, net)

            error = get_mean_error(x_seq[sample_args.obs_length:, :-2],
                                   ret_x_seq[sample_args.obs_length:, :-2],
                                   False)
            total_error += error

            # 顯示預測
            # x_seq = result[0]
            x_seq = x_seq.data.cpu().numpy()
            # print(x_seq.size())
            # x_seq = np.reshape(x_seq,(x_seq.shape[0], saved_args.input_size))
            x_seq = dataloader.inverse_transform_MinMaxScaler(x_seq)
            # ret_x_seq = result[1]
            ret_x_seq = ret_x_seq.data.cpu().numpy()
            # ret_x_seq = np.reshape(ret_x_seq,(ret_x_seq.shape[0], saved_args.input_size))
            ret_x_seq = dataloader.inverse_transform_MinMaxScaler(ret_x_seq)

            gt = (x_seq[:, 0] - x_seq[:, 2]) / (x_seq[:, 1] - x_seq[:, 0])
            pred = (ret_x_seq[:, 0] - ret_x_seq[:, 2]) / (ret_x_seq[:, 1] -
                                                          ret_x_seq[:, 0])

            gt2 = gt[sample_args.obs_length:]
            pred2 = pred[sample_args.obs_length:]
            expected_day = np.mean(gt2)
            predicted_day = np.mean(pred2)
            # print(expected_day, predicted_day, expected_day-predicted_day)
            # print('Error: ',error)

            expected_day = np.mean(gt2)
            predicted_day = np.mean(pred2)

            x_seq_arr.append(x_seq)
            ret_x_seq_arr.append(ret_x_seq)
            error_arr.append(error.data.cpu().numpy())
            expected_day_arr.append(expected_day)
            predicted_day_arr.append(predicted_day)

            # fig, axs = plt.subplots(6, 1)
            # axs[0].plot(ret_x_seq[:,0], color = 'blue' , label = 'Predict h1', linestyle='--', marker='^')
            # axs[0].plot(x_seq[:,0], color = 'red', label = 'Real h1', linestyle='-', marker='.')
            # axs[1].plot(ret_x_seq[:,1], color = 'blue' , label = 'Predict h2', linestyle='--', marker='^')
            # axs[1].plot(x_seq[:,1], color = 'red', label = 'Real h2', linestyle='-', marker='.')
            # axs[2].plot(ret_x_seq[:,2], color = 'blue' , label = 'Predict h3', linestyle='--', marker='^')
            # axs[2].plot(x_seq[:,2], color = 'red', label = 'Real h3', linestyle='-', marker='.')
            # axs[3].plot(pred, color = 'blue' , label = 'Predict h3', linestyle='--', marker='^')
            # axs[3].plot(gt, color = 'red', label = 'Real h3', linestyle='-', marker='.')

            # axs[4].plot(ret_x_seq[:,-2], color = 'blue' , label = 'Predict Tevwi', linestyle='--', marker='^')
            # axs[4].plot(x_seq[:,-2], color = 'red', label = 'Real Tevwi', linestyle='-', marker='.')

            # axs[5].plot(ret_x_seq[:,-1], color = 'blue' , label = 'Predict Tcdwi', linestyle='--', marker='^')
            # axs[5].plot(x_seq[:,-1], color = 'red', label = 'Real Tcdwi', linestyle='-', marker='.')

            # for ax in axs:
            #     ax.legend()
            #     ax.grid()
            # plt.show()

        total_error = total_error / dataloader.num_batches
        if total_error < smallest_err:
            print("**********************************************************")
            print('Best iteration has been changed. Previous best iteration: ',
                  smallest_err_iter_num, 'Error: ', smallest_err)
            print('New best iteration : ', iterator, 'Error: ', total_error)
            smallest_err_iter_num = iterator
            smallest_err = total_error

        results_it.append((sample_args.pred_length, sample_args.obs_length,
                           x_seq_arr, ret_x_seq_arr, error_arr))

    dataloader.write_to_plot_file([results_it[smallest_err_iter_num]],
                                  os.path.join(plot_directory,
                                               plot_test_file_directory))
Пример #12
0
    return Variable(tmp)


use_gpu = torch.cuda.is_available()
# print(use_gpu)
input_size = 900
output_size = 900
hidden_dim = 2000
num_layer = 4
model = LSTM(input_size, hidden_dim, num_layer, output_size)
loss_function = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.9)

if use_gpu:
    model = model.cuda()

root_path = "dataset"
data_num = 100
time_step = 5
datalist = create_datalist(root_path)
train_data, test_data = create_dataset(data_num, datalist, time_step)
# print(len(train_data)) #17*80
# print(len(test_data))  #17*20


def train(epoch):
    for step, input_data in enumerate(train_data, 1):
        seq = ToVariable(input_data[0])
        outs = ToVariable(input_data[1])
        if use_gpu:
Пример #13
0
from tqdm import tqdm
import os
import sys
from yahoo_fin.stock_info import get_day_gainers

batch_size = 3
input_sizes = [1, 6, 20]
hidden_size = 300
num_layers = 2
dropout = 0.5
output_size = 5
lr = 0.0001
seq_length = 20
epochs = 100000
model = LSTM(input_sizes, hidden_size, num_layers, dropout, output_size)
model.cuda()
csv = pd.read_csv("nasdaq.csv")
stocks = {}
num_models = 15
hidden = {}
ppo = {}
reward_list = {}
last_profit = {}
for i in range(num_models):
    hidden[i] = model.init_state(batch_size)
    stocks[i] = random.choices(csv["Symbol"], k=batch_size)
    ppo[i] = DQN(model, lr, stocks[i], output_size, hidden[i], batch_size)
    reward_list[i] = deque(maxlen=100)
    last_profit[i] = 0
rewards = {}
for e in tqdm(range(epochs)):
Пример #14
0
    #                           train = False, debug=False)
    # lafan_loader_test = DataLoader(lafan_data_test, \
    #                                batch_size=opt['train']['batch_size'], \
    #                                shuffle=True, num_workers=opt['data']['num_workers'])

    ## initialize model ##

    state_encoder = StateEncoder(in_dim=opt['model']['state_input_dim'])
    state_encoder = state_encoder.cuda()
    offset_encoder = OffsetEncoder(in_dim=opt['model']['offset_input_dim'])
    offset_encoder = offset_encoder.cuda()
    target_encoder = TargetEncoder(in_dim=opt['model']['target_input_dim'])
    target_encoder = target_encoder.cuda()
    lstm = LSTM(in_dim=opt['model']['lstm_dim'],
                hidden_dim=opt['model']['lstm_dim'] * 2)
    lstm = lstm.cuda()
    decoder = Decoder(in_dim=opt['model']['lstm_dim'] * 2,
                      out_dim=opt['model']['state_input_dim'])
    decoder = decoder.cuda()
    if len(opt['train']['pretrained']) > 0:
        state_encoder.load_state_dict(
            torch.load(
                os.path.join(opt['train']['pretrained'], 'state_encoder.pkl')))
        offset_encoder.load_state_dict(
            torch.load(
                os.path.join(opt['train']['pretrained'],
                             'offset_encoder.pkl')))
        target_encoder.load_state_dict(
            torch.load(
                os.path.join(opt['train']['pretrained'],
                             'target_encoder.pkl')))
Пример #15
0
def train(feature,label, epochs, model, layer, hidden, save,postfix, index2char, index2phone, phone_map, phone2index):
	dataset = Feature_Dataset(feature,'train')
	train_size = int(0.9*len(dataset))
	if feature == 'mfcc':
		feature_dim = 39
	elif feature == 'fbank':
		feature_dim = 69
	elif feature == 'all':
		feature_dim = 108

	print("Building model and optimizer...")
	if model == 'LSTM':
		train_model = LSTM(feature_dim,hidden,layer)
	elif model == 'C_RNN':
		group_size = 5 
		train_model = C_RNN(group_size,feature_dim,hidden,layer)
	elif model == 'BiLSTM':
		train_model = LSTM(feature_dim, hidden, layer, bi = True)
	
	if USE_CUDA:
		train_model = train_model.cuda()
	optimizer = optim.Adam(train_model.parameters(), lr = 0.005)
	#optimizer = optim.SGD(train_model.parameters(),lr = 0.1)
	criterion = nn.NLLLoss()
	if USE_CUDA:
		criterion = criterion.cuda() 

	for epoch in range(1,epochs+1):
		print("Epoch {}".format(epoch))
		epoch_loss = 0
		epoch_edit = 0
		for i in tqdm(range(1,train_size+1)):
			data = dataset[i-1]
			speaker = data[0]
		
			train_model.zero_grad()
			input_hidden = train_model.init_hidden()
			
			train_feature = Variable(data[1].float())
			output =  train_model(train_feature,input_hidden)
			
			output_seq = test_trim(index2char, index2phone, phone_map, phone2index, torch.max(output,1)[1].data.cpu().numpy())
			target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index, [[int(l)] for l in label[speaker]])
			
			target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int')))
			target = target.cuda() if USE_CUDA else target
			
			loss = criterion(output,target)
			edit = editdistance.eval(output_seq,target_seq)

			epoch_loss += loss.data[0]/train_size
			epoch_edit += edit/train_size
		
			loss.backward()
			optimizer.step()

		print("Negative log-likelihood: {}".format(epoch_loss))
		print("Edit distance: {} ".format(epoch_edit))
		val_loss = 0
		val_edit = 0
		for i in tqdm(range(train_size+1,len(dataset)+1)):
			data = dataset[i-1]
			speaker = data[0]
			val_feature = Variable(data[1].float())
			
			output = train_model(val_feature,train_model.init_hidden())
			target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int')))
			target = target.cuda() if USE_CUDA else target
			
			val_loss += criterion(output,target).data[0]		
			output_seq = test_trim(index2char,index2phone, phone_map, phone2index,torch.max(output,1)[1].data.cpu().numpy())
			target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index,[[int(l)] for l in label[speaker]])
				
			val_edit += editdistance.eval(output_seq,target_seq)
		print("Validation loss: {}".format(val_loss/(len(dataset)-train_size)))
		print("Validation edit distance: {}".format(val_edit/(len(dataset)-train_size)))

		if epoch%save == 0:
			directory = os.path.join(SAVE_DIR, feature, model, '{}-{}{}'.format(layer,hidden,postfix))
			if not os.path.exists(directory):
				os.makedirs(directory)
			torch.save({
				'model': train_model.state_dict(),
                		'opt': optimizer.state_dict(),
                		'val_loss': val_loss/(len(dataset)-train_size),
				'val_edit': val_edit/(len(dataset)-train_size),
				}, os.path.join(directory, '{}.tar'.format(epoch)))
	print("Finish training")
Пример #16
0
class dl_model():
    def __init__(self, mode):

        # read config fiel which contains parameters
        self.config_file = read_yaml()
        self.mode = mode

        arch_name = '_'.join([
            self.config_file['rnn'],
            str(self.config_file['num_layers']),
            str(self.config_file['hidden_dim'])
        ])
        self.config_file['dir']['models'] = self.config_file['dir'][
            'models'].split('/')[0] + '_' + arch_name + '/'
        self.config_file['dir']['plots'] = self.config_file['dir'][
            'plots'].split('/')[0] + '_' + arch_name + '/'

        #if not os.path.exists(self.config_file['dir']['models']):
        #    os.mkdir(self.config_file['dir']['models'])
        #if not os.path.exists(self.config_file['dir']['plots']):
        #    os.mkdir(self.config_file['dir']['plots'])

        if self.config_file['rnn'] == 'LSTM':
            from model import LSTM as Model
        elif self.config_file['rnn'] == 'GRU':
            from model import GRU as Model
        else:
            print("Model not implemented")
            exit(0)

        self.cuda = (self.config_file['cuda'] and torch.cuda.is_available())
        self.output_dim = self.config_file['num_phones']

        if mode == 'train' or mode == 'test':

            self.plots_dir = self.config_file['dir']['plots']
            # store hyperparameters
            self.total_epochs = self.config_file['train']['epochs']
            self.test_every = self.config_file['train']['test_every_epoch']
            self.test_per = self.config_file['train']['test_per_epoch']
            self.print_per = self.config_file['train']['print_per_epoch']
            self.save_every = self.config_file['train']['save_every']
            self.plot_every = self.config_file['train']['plot_every']
            # dataloader which returns batches of data
            self.train_loader = timit_loader('train', self.config_file)
            self.test_loader = timit_loader('test', self.config_file)

            self.start_epoch = 1
            self.test_acc = []
            self.train_losses, self.test_losses = [], []
            # declare model
            self.model = Model(self.config_file,
                               weights=self.train_loader.weights)

        else:

            self.model = Model(self.config_file, weights=None)

        if self.cuda:
            self.model.cuda()

        # resume training from some stored model
        if self.mode == 'train' and self.config_file['train']['resume']:
            self.start_epoch, self.train_losses, self.test_losses, self.test_acc = self.model.load_model(
                mode, self.config_file['rnn'], self.model.num_layers,
                self.model.hidden_dim)
            self.start_epoch += 1

        # load best model for testing/feature extraction
        elif self.mode == 'test' or mode == 'test_one':
            self.model.load_model(mode, self.config_file['rnn'],
                                  self.model.num_layers, self.model.hidden_dim)

        self.replacement = {
            'aa': ['ao'],
            'ah': ['ax', 'ax-h'],
            'er': ['axr'],
            'hh': ['hv'],
            'ih': ['ix'],
            'l': ['el'],
            'm': ['em'],
            'n': ['en', 'nx'],
            'ng': ['eng'],
            'sh': ['zh'],
            'pau':
            ['pcl', 'tcl', 'kcl', 'bcl', 'dcl', 'gcl', 'h#', 'epi', 'q'],
            'uw': ['ux']
        }

    def train(self):

        print("Starting training at t =", datetime.datetime.now())
        print('Batches per epoch:', len(self.train_loader))
        self.model.train()

        # when to print losses during the epoch
        print_range = list(
            np.linspace(0,
                        len(self.train_loader),
                        self.print_per + 2,
                        dtype=np.uint32)[1:-1])

        if self.test_per == 0:
            test_range = []
        else:
            test_range = list(
                np.linspace(0,
                            len(self.train_loader),
                            self.test_per + 2,
                            dtype=np.uint32)[1:-1])

        for epoch in range(self.start_epoch, self.total_epochs + 1):

            print("Epoch:", str(epoch))
            epoch_loss = 0.0
            i = 0

            while True:

                i += 1

                inputs, labels, lens, status = self.train_loader.return_batch()
                inputs, labels, lens = torch.from_numpy(
                    np.array(inputs)).float(), torch.from_numpy(
                        np.array(labels)).long(), torch.from_numpy(
                            np.array(lens)).long()

                if self.cuda:
                    inputs = inputs.cuda()
                    labels = labels.cuda()
                    lens = lens.cuda()

                # zero the parameter gradients
                self.model.optimizer.zero_grad()
                # forward + backward + optimize
                outputs = self.model(inputs, lens)
                loss = self.model.calculate_loss(outputs, labels, lens)
                loss.backward()

                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               self.config_file['grad_clip'])
                self.model.optimizer.step()

                # store loss
                epoch_loss += loss.item()

                if i in print_range:
                    try:
                        print(
                            'After %i batches, Current Loss = %.7f, Avg. Loss = %.7f'
                            % (i + 1, epoch_loss / (i + 1),
                               np.mean([x[0] for x in self.train_losses])))
                    except:
                        pass

                if i in test_range:
                    self.test(epoch)
                    self.model.train()

                if status == 1:
                    break

            self.train_losses.append(
                (epoch_loss / len(self.train_loader), epoch))

            # test every 5 epochs in the beginning and then every fixed no of epochs specified in config file
            # useful to see how loss stabilises in the beginning
            if epoch % 5 == 0 and epoch < self.test_every:
                self.test(epoch)
                self.model.train()
            elif epoch % self.test_every == 0:
                self.test(epoch)
                self.model.train()
            # plot loss and accuracy
            if epoch % self.plot_every == 0:
                self.plot_loss_acc(epoch)

            # save model
            if epoch % self.save_every == 0:
                self.model.save_model(False, epoch, self.train_losses,
                                      self.test_losses, self.test_acc,
                                      self.config_file['rnn'],
                                      self.model.num_layers,
                                      self.model.hidden_dim)

    def test(self, epoch=None):

        self.model.eval()
        correct = 0
        total = 0
        correct_nopause = 0
        total_nopause = 0
        pause_id = 27
        # confusion matrix data is stored in this matrix
        matrix = np.zeros((self.output_dim, self.output_dim))
        pad_id = self.output_dim

        print("Testing...")
        print('Total batches:', len(self.test_loader))
        test_loss = 0

        with torch.no_grad():

            while True:

                inputs, labels, lens, status = self.train_loader.return_batch()
                inputs, labels, lens = torch.from_numpy(
                    np.array(inputs)).float(), torch.from_numpy(
                        np.array(labels)).long(), torch.from_numpy(
                            np.array(lens)).long()
                # print(inputs.shape, labels.shape, lens)
                if self.cuda:
                    inputs = inputs.cuda()
                    labels = labels.cuda()
                    lens = lens.cuda()

                # zero the parameter gradients
                self.model.optimizer.zero_grad()
                # forward + backward + optimize
                outputs = self.model(inputs, lens)
                loss = self.model.calculate_loss(outputs, labels, lens)
                test_loss += loss.item()

                outputs = outputs.cpu().numpy()
                labels = labels.cpu().numpy(
                )[:, :
                  outputs.shape[1]]  # remove extra padding from current batch
                outputs = np.reshape(
                    outputs[:, :, :-1],
                    (-1, self.output_dim))  # ignore blank token
                labels = np.reshape(labels, (-1))
                total_pad_tokens = np.sum(labels == pad_id)
                argmaxed = np.argmax(outputs, 1)

                # total number of correct phone predictions
                for i in range(len(labels)):
                    if labels[i] != pause_id and labels[
                            i] != pad_id:  # is not pause or pad
                        if argmaxed[i] == labels[i]:
                            correct_nopause += 1
                        total_nopause += 1
                correct += np.sum(argmaxed == labels)
                total += len(argmaxed) - total_pad_tokens

                # matrix[i][j] denotes the no of examples classified by model as class j but have ground truth label i
                for k in range(argmaxed.shape[0]):
                    if labels[k] == pad_id:
                        continue
                    matrix[labels[k]][argmaxed[k]] += 1

                if status == 1:
                    break

        for i in range(self.output_dim):
            matrix[i] /= sum(matrix[i])

        acc_all = correct / total
        acc_nopause = correct_nopause / total_nopause
        print(acc_all, acc_nopause)

        test_loss /= len(self.test_loader)

        # plot confusion matrix
        if epoch is not None:
            filename = self.plots_dir + 'confmat_epoch_acc_' + str(
                epoch) + '_' + str(int(100 * acc_all)) + '.png'
            plt.clf()
            plt.imshow(matrix, cmap='hot', interpolation='none')
            plt.gca().invert_yaxis()
            plt.xlabel("Predicted Label ID")
            plt.ylabel("True Label ID")
            plt.colorbar()
            plt.savefig(filename)

        print("Testing accuracy: All - %.4f, No Pause - %.4f , Loss: %.7f" %
              (acc_all, acc_nopause, test_loss))

        self.test_acc.append((acc_all, epoch))
        self.test_losses.append((test_loss, epoch))

        # if testing loss is minimum, store it as the 'best.pth' model, which is used for feature extraction
        if test_loss == min([x[0] for x in self.test_losses]):
            print("Best new model found!")
            self.model.save_model(True, epoch, self.train_losses,
                                  self.test_losses, self.test_acc,
                                  self.config_file['rnn'],
                                  self.model.num_layers, self.model.hidden_dim)

        return acc_all

    # Called during feature extraction. Takes log mel filterbank energies as input and outputs the phone predictions
    def test_one(self, file_path):

        (rate, sig) = wav.read(file_path)
        assert rate == 16000
        # sig ranges from -32768 to +32768 AND NOT -1 to +1
        feat, energy = fbank(sig,
                             samplerate=rate,
                             nfilt=self.config_file['feat_dim'],
                             winfunc=np.hamming)
        tsteps, hidden_dim = feat.shape
        # calculate log mel filterbank energies for complete file
        feat_log_full = np.reshape(np.log(feat), (1, tsteps, hidden_dim))
        lens = np.array([tsteps])
        inputs, lens = torch.from_numpy(
            np.array(feat_log_full)).float(), torch.from_numpy(
                np.array(lens)).long()
        id_to_phone = {v[0]: k for k, v in self.model.phone_to_id.items()}

        self.model.eval()

        with torch.no_grad():
            if self.cuda:
                inputs = inputs.cuda()
                lens = lens.cuda()

            # Pass through model
            a = time.time()

            outputs = self.model(inputs, lens).cpu().numpy()
            print(time.time() - a)
            # Since only one example per batch and ignore blank token
            outputs = outputs[0, :, :-1]
            softmax = np.exp(outputs) / np.sum(np.exp(outputs), axis=1)[:,
                                                                        None]

        return softmax, id_to_phone

    # Test for each wav file in the folder and also compare with ground truth
    def test_folder(self, test_folder, top_n=1, show_graphs=False):

        accs = []

        for wav_file in sorted(os.listdir(test_folder)):

            # Read input test file
            wav_path = os.path.join(test_folder, wav_file)
            dump_path = wav_path[:-4] + '_pred.txt'

            # Read only wav
            if wav_file == '.DS_Store' or wav_file.split(
                    '.')[-1] != 'wav':  # or os.path.exists(dump_path):
                continue

            (rate, sig) = wav.read(wav_path)
            assert rate == 16000
            # sig ranges from -32768 to +32768 AND NOT -1 to +1
            feat, energy = fbank(sig,
                                 samplerate=rate,
                                 nfilt=self.config_file['feat_dim'],
                                 winfunc=np.hamming)
            tsteps, hidden_dim = feat.shape
            # calculate log mel filterbank energies for complete file
            feat_log_full = np.reshape(np.log(feat), (1, tsteps, hidden_dim))
            lens = np.array([tsteps])
            inputs, lens = torch.from_numpy(
                np.array(feat_log_full)).float(), torch.from_numpy(
                    np.array(lens)).long()
            id_to_phone = {v[0]: k for k, v in self.model.phone_to_id.items()}

            self.model.eval()

            with torch.no_grad():

                if self.cuda:
                    inputs = inputs.cuda()
                    lens = lens.cuda()

                # Pass through model
                outputs = self.model(inputs, lens).cpu().numpy()
                # Since only one example per batch and ignore blank token
                outputs = outputs[0, :, :-1]
                softmax = np.exp(outputs) / np.sum(np.exp(outputs),
                                                   axis=1)[:, None]
                softmax_probs = np.max(softmax, axis=1)
                # print(softmax)
                # Take argmax ot generate final string
                argmaxed = np.argmax(outputs, axis=1)
                final_str = [id_to_phone[a] for a in argmaxed]
                # Generate dumpable format of phone, start time and end time
                ans = compress_seq(final_str)
                print("Predicted:", ans)

            phone_path = wav_path[:-3] + 'PHN'

            # If .PHN file exists, report accuracy
            if os.path.exists(phone_path):
                grtuth = read_phones(phone_path, self.replacement)
                print("Ground truth:", grtuth)

                unrolled_truth = []
                for elem in grtuth:
                    unrolled_truth += [elem[0]] * (elem[2] - elem[1] + 1)

                truth_softmax = []
                top_n_softmax = [[] for x in range(top_n)]
                # Check for top-n
                correct, total = 0, 0
                for i in range(min(len(unrolled_truth), len(final_str))):

                    truth_softmax.append(softmax[i][self.model.phone_to_id[
                        unrolled_truth[i]][0]])

                    indices = list(range(len(final_str)))
                    zipped = zip(indices, outputs[i])
                    desc = sorted(zipped, key=lambda x: x[1], reverse=True)
                    cur_frame_res = [id_to_phone[x[0]] for x in desc][:top_n]

                    for k in range(top_n):
                        top_n_softmax[k].append(softmax[i][
                            self.model.phone_to_id[cur_frame_res[k]][0]])

                    if unrolled_truth[i] in cur_frame_res:
                        # print truth softmax
                        # if unrolled_truth[i] != cur_frame_res[0]:
                        # print(i, truth_softmax[-1])
                        correct += 1

                    total += 1

                accs.append(correct / total)

                if show_graphs:
                    # Plot actual softmax and predicted softmax
                    for i in range(top_n):
                        plt.plot(top_n_softmax[i], label=str(i + 1) + ' prob.')
                    print(top_n_softmax)
                    plt.plot(truth_softmax,
                             label='Ground Truth prob',
                             alpha=0.6)
                    plt.xlabel("Frame number")
                    plt.ylabel("Prob")
                    plt.legend()
                    plt.show()

                with open(dump_path, 'w') as f:
                    f.write('Predicted:\n')
                    for t in ans:
                        f.write(' '.join(str(s) for s in t) + '\n')
                    f.write('\nGround Truth:\n')
                    for t in grtuth:
                        f.write(' '.join(str(s) for s in t) + '\n')
                    f.write('\nTop-' + str(top_n) + ' accuracy is ' +
                            str(correct / total))
            else:
                with open(dump_path, 'w') as f:
                    f.write('Predicted:\n')
                    for t in ans:
                        f.write(' '.join(str(s) for s in t) + '\n')
        print(accs)

    # take train/test loss and test accuracy input and plot it over time
    def plot_loss_acc(self, epoch):

        plt.clf()
        plt.plot([x[1] for x in self.train_losses],
                 [x[0] for x in self.train_losses],
                 c='r',
                 label='Train')
        plt.plot([x[1] for x in self.test_losses],
                 [x[0] for x in self.test_losses],
                 c='b',
                 label='Test')
        plt.title("Train/Test loss")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.legend()
        plt.grid(True)

        filename = self.plots_dir + 'loss' + '_' + str(epoch) + '.png'
        plt.savefig(filename)

        plt.clf()
        plt.plot([x[1] for x in self.test_acc],
                 [100 * x[0] for x in self.test_acc],
                 c='r')
        plt.title("Test accuracy")
        plt.xlabel("Epochs")
        plt.ylabel("Accuracy in %%")
        plt.grid(True)

        filename = self.plots_dir + 'test_acc' + '_' + str(epoch) + '.png'
        plt.savefig(filename)

        print("Saved plots")
def train(args):
    prefix = ''
    f_prefix = '.'
    
    if not os.path.isdir("log/"):
        print("Directory creation script is running...")
        subprocess.call([f_prefix+'/make_directories.sh'])

    args.freq_validation = np.clip(args.freq_validation, 0, args.num_epochs)
    validation_epoch_list = list(range(args.freq_validation, args.num_epochs+1, args.freq_validation))
    validation_epoch_list[-1]-=1


    # Create the data loader object. This object would preprocess the data in terms of
    # batches each of size args.batch_size, of length args.seq_length
    dataloader = DataLoader(f_prefix, args.batch_size, args.seq_length, args.num_validation, forcePreProcess=True)

    method_name = "VANILLALSTM"
    model_name = "LSTM"
    save_tar_name = method_name+"_lstm_model_"
    if args.gru:
        model_name = "GRU"
        save_tar_name = method_name+"_gru_model_"

    # Log directory
    log_directory = os.path.join(prefix, 'log/')
    plot_directory = os.path.join(prefix, 'plot/', method_name, model_name)
    plot_train_file_directory = 'validation'



    # Logging files
    log_file_curve = open(os.path.join(log_directory, method_name, model_name,'log_curve.txt'), 'w+')
    log_file = open(os.path.join(log_directory, method_name, model_name, 'val.txt'), 'w+')

    # model directory
    save_directory = os.path.join(f_prefix, 'model')
    
    # Save the arguments int the config file
    with open(os.path.join(save_directory, method_name, model_name,'config.pkl'), 'wb') as f:
        pickle.dump(args, f)

    # Path to store the checkpoint file
    def checkpoint_path(x):
        return os.path.join(save_directory, method_name, model_name, save_tar_name+str(x)+'.tar')
    
    # model creation
    net = LSTM(args)
    if args.use_cuda:
        net = net.cuda()

    # optimizer = torch.optim.Adagrad(net.parameters(), weight_decay=args.lambda_param)
    optimizer = torch.optim.RMSprop(net.parameters(), lr=args.learning_rate)
    loss_f = torch.nn.MSELoss()
    learning_rate = args.learning_rate

    best_val_loss = 100
    best_val_data_loss = 100

    smallest_err_val = 100000
    smallest_err_val_data = 100000


    best_epoch_val = 0
    best_epoch_val_data = 0

    best_err_epoch_val = 0
    best_err_epoch_val_data = 0

    all_epoch_results = []
    grids = []
    num_batch = 0

    # Training
    for epoch in range(args.num_epochs):
        print('****************Training epoch beginning******************')
        if dataloader.additional_validation and (epoch-1) in validation_epoch_list:
            dataloader.switch_to_dataset_type(True)
        dataloader.reset_batch_pointer(valid=False)
        loss_epoch = 0
        # For each batch
        # num_batches 資料可以被分多少批 要跑幾個iter
        
        for batch in range(dataloader.num_batches):
            start = time.time()

            # print(dataloader.num_batches, dataloader.batch_size)
            
            # Get batch data
            x, y, d = dataloader.next_batch(randomUpdate=False)
            
            loss_batch = 0

            # x_cat = Variable(torch.from_numpy(np.array(x[0])).float())
            x_seq = np.array(x)
            y_seq = np.array(y)
            x_seq = Variable(torch.from_numpy(x_seq).float())
            y_seq = Variable(torch.from_numpy(y_seq).float())
            temp = x_seq[:,:,-2:]
            x_seq = x_seq[:,:,:-2]
            y_seq = y_seq[:,:,:3]
            
            hidden_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size))
            cell_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size))

            if args.use_cuda:                  
                x_seq = x_seq.cuda()
                y_seq = y_seq.cuda()
                temp = temp.cuda()
                hidden_states = hidden_states.cuda()
                cell_states = cell_states.cuda()

            # Zero out gradients
            net.zero_grad()
            optimizer.zero_grad()
            
            outputs, _, _ = net(x_seq, temp, hidden_states, cell_states)

            loss = loss_f(outputs, y_seq)
            loss_batch = loss.detach().item()

            # Compute gradients
            loss.backward()

            # Clip gradients
            torch.nn.utils.clip_grad_norm_(net.parameters(), args.grad_clip)

            # Update parameters
            optimizer.step()

            end = time.time()
            loss_epoch += loss_batch

            print('{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}'.format((batch+1) * dataloader.batch_size,
                                                                                    dataloader.num_batches * dataloader.batch_size,
                                                                                    epoch,
                                                                                    loss_batch, end - start))
        loss_epoch /= dataloader.num_batches
        print("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch))
        #Log loss values
        log_file_curve.write("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch)+'\n')


        # Validation dataset
        if dataloader.additional_validation and (epoch) in validation_epoch_list:
            dataloader.switch_to_dataset_type()
            print('****************Validation with dataset epoch beginning******************')
            dataloader.reset_batch_pointer(valid=False)
            dataset_pointer_ins = dataloader.dataset_pointer
            validation_dataset_executed = True

            loss_epoch = 0
            err_epoch = 0
            num_of_batch = 0
            smallest_err = 100000

            #results of one epoch for all validation datasets
            epoch_result = []
            #results of one validation dataset
            results = []

            # For each batch
            for batch in range(dataloader.num_batches):
                # Get batch data
                x, y, d = dataloader.next_batch(randomUpdate=False)

                # Loss for this batch
                loss_batch = 0
                err_batch = 0

                # For each sequence
                for sequence in range(len(x)):
                    # Get the sequence
                    x_seq = x[sequence]
                    y_seq = y[sequence]
                    x_seq= np.array(x_seq)
                    y_seq= np.array(y_seq)[:,:3]
                    x_seq = Variable(torch.from_numpy(x_seq).float())
                    y_seq = Variable(torch.from_numpy(y_seq).float())

                    temp = x_seq[:,-2:]
                    x_seq = x_seq[:,:-2]
                    y_seq = y_seq[:,:3]

                    if args.use_cuda:
                        x_seq = x_seq.cuda()
                        y_seq = y_seq.cuda()
                        temp = temp.cuda()

                    #will be used for error calculation
                    orig_x_seq = y_seq.clone() 

                    # print(x_seq.size(), args.seq_length)

                    with torch.no_grad():
                        hidden_states = Variable(torch.zeros(1, args.rnn_size))
                        cell_states = Variable(torch.zeros(1, args.rnn_size))
                        ret_x_seq = Variable(torch.zeros(args.seq_length, net.input_size))
                        # all_outputs = Variable(torch.zeros(1, args.seq_length, net.input_size))

                        # Initialize the return data structure
                        if args.use_cuda:
                            ret_x_seq = ret_x_seq.cuda()
                            hidden_states = hidden_states.cuda()
                            cell_states = cell_states.cuda()

                        total_loss = 0
                        # For the observed part of the trajectory
                        for tstep in range(args.seq_length):
                            outputs, hidden_states, cell_states = net(x_seq[tstep].view(1, 1, net.input_size), temp[tstep].view(1, 1, temp.size()[-1]), hidden_states, cell_states)
                            ret_x_seq[tstep, 0] = outputs[0,0,0]
                            ret_x_seq[tstep, 1] = outputs[0,0,1]
                            ret_x_seq[tstep, 2] = outputs[0,0,2]
                            print(outputs.size(), )
                            loss = loss_f(outputs, y_seq[tstep].view(1, 1, y_seq.size()[1]))
                            total_loss += loss

                        total_loss = total_loss / args.seq_length

                    #get mean and final error
                    # print(ret_x_seq.size(), y_seq.size())
                    err = get_mean_error(ret_x_seq.data, y_seq.data, args.use_cuda)

                    loss_batch += total_loss.item()
                    err_batch += err
                    print('Current file : ',' Batch : ', batch+1, ' Sequence: ', sequence+1, ' Sequence mean error: ', err, 'valid_loss: ',total_loss.item())
                    results.append((y_seq.data.cpu().numpy(), ret_x_seq.data.cpu().numpy()))

                loss_batch = loss_batch / dataloader.batch_size
                err_batch = err_batch / dataloader.batch_size
                num_of_batch += 1
                loss_epoch += loss_batch
                err_epoch += err_batch

            epoch_result.append(results)
            all_epoch_results.append(epoch_result)

            if dataloader.num_batches != 0:            
                loss_epoch = loss_epoch / dataloader.num_batches
                err_epoch = err_epoch / dataloader.num_batches
                # avarage_err = (err_epoch + f_err_epoch)/2

                # Update best validation loss until now
                if loss_epoch < best_val_data_loss:
                    best_val_data_loss = loss_epoch
                    best_epoch_val_data = epoch

                if err_epoch<smallest_err_val_data:
                    # Save the model after each epoch
                    print('Saving model')
                    torch.save({
                        'epoch': epoch,
                        'state_dict': net.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict()
                    }, checkpoint_path(epoch))

                    smallest_err_val_data = err_epoch
                    best_err_epoch_val_data = epoch

                print('(epoch {}), valid_loss = {:.3f}, valid_mean_err = {:.3f}'.format(epoch, loss_epoch, err_epoch))
                print('Best epoch', best_epoch_val_data, 'Best validation loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data)
                log_file_curve.write("Validation dataset epoch: "+str(epoch)+" loss: "+str(loss_epoch)+" mean_err: "+str(err_epoch.data.cpu().numpy())+'\n')
            


        optimizer = time_lr_scheduler(optimizer, epoch, lr_decay_epoch = args.freq_optimizer)

    if dataloader.valid_num_batches != 0:        
        print('Best epoch', best_epoch_val, 'Best validation Loss', best_val_loss, 'Best error epoch',best_err_epoch_val, 'Best error', smallest_err_val)
        # Log the best epoch and best validation loss
        log_file.write('Validation Best epoch:'+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss))

    if dataloader.additional_validation:
        print('Best epoch acording to validation dataset', best_epoch_val_data, 'Best validation Loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data)
        log_file.write("Validation dataset Best epoch: "+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss)+'Best error epoch: ',str(best_err_epoch_val_data),'\n')
        #dataloader.write_to_plot_file(all_epoch_results[best_epoch_val_data], plot_directory)

    #elif dataloader.valid_num_batches != 0:
    #    dataloader.write_to_plot_file(all_epoch_results[best_epoch_val], plot_directory)

    #else:
    if validation_dataset_executed:
        dataloader.switch_to_dataset_type(load_data=False)
        create_directories(plot_directory, [plot_train_file_directory])
        dataloader.write_to_plot_file(all_epoch_results[len(all_epoch_results)-1], os.path.join(plot_directory, plot_train_file_directory))

    # Close logging files
    log_file.close()
    log_file_curve.close()