Exemplo n.º 1
0
def main(args):
    if args.model == 'base':
        postprocessing = None
    elif args.model == 'jump':
        postprocessing = pick_fix_length(400, PAD_TOKEN)
    TEXT = data.Field(lower=True,
                      postprocessing=postprocessing,
                      pad_token=PAD_TOKEN,
                      include_lengths=True)
    LABEL = data.Field(sequential=False, pad_token=None, unk_token=None)

    train, test = datasets.IMDB.splits(TEXT, LABEL)

    TEXT.build_vocab(train)
    LABEL.build_vocab(train)

    train_iter, test_iter = data.BucketIterator.splits(
        (train, test),
        batch_sizes=(args.batch, args.batch * 4),
        device=args.gpu,
        repeat=False,
        sort_within_batch=True)

    if args.model == 'base':
        model = LSTM(len(TEXT.vocab), 300, 128, len(LABEL.vocab))
    elif args.model == 'jump':
        model = LSTMJump(len(TEXT.vocab), 300, 128, len(LABEL.vocab), args.R,
                         args.K, args.N, 80, 8)
    model.load_pretrained_embedding(
        get_word2vec(TEXT.vocab.itos,
                     '.vector_cache/GoogleNews-vectors-negative300.bin'))
    model.cuda(args.gpu)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    max_accuracy = 0
    for i in range(args.epoch):
        print('Epoch: {}'.format(i + 1))
        sum_loss = 0
        model.train()
        for batch in train_iter:
            optimizer.zero_grad()
            xs, lengths = batch.text
            loss = model(xs, lengths, batch.label)
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 1.)
            optimizer.step()
            sum_loss += loss.data[0]
        print(f'Loss: {sum_loss / len(train_iter)}')
        sum_correct = 0
        total = 0
        model.eval()
        for batch in test_iter:
            y = model.inference(*batch.text)
            sum_correct += y.eq(batch.label).sum().float()
            total += batch.label.size(0)
        accuracy = (sum_correct / total).data[0]
        max_accuracy = max(accuracy, max_accuracy)
        print(f'Accuracy: {accuracy}')
    print(f'Max Accuracy: {max_accuracy}')
Exemplo n.º 2
0
def train():
    int_to_vocab, vocab_to_int, n_vocab, in_text = get_data_from_file( flags.batch_size, flags.seq_size)
    x_batch,y_batch = create_batch(in_text,flags.batch_size,flags.seq_size)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = LSTM(n_vocab, flags.seq_size,flags.embedding_size, flags.lstm_size).to(device)

    #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.7)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_function = nn.CrossEntropyLoss()

    for e in range(flags.num_epochs):
        print(f'epoch #{e}: ',end="")
        batches = get_batches(x_batch,y_batch,flags.batch_size, flags.seq_size)
        (state_h_1, state_c_1),(state_h_2, state_c_2) = model.zero_state(flags.batch_size)
        state_h_1 = state_h_1.to(device)
        state_c_1 = state_c_1.to(device)
        state_h_2 = state_h_2.to(device)
        state_c_2 = state_c_2.to(device)
        
        for i,(x, y) in enumerate(batches):
            model.train()
            optimizer.zero_grad()
 

            x = torch.tensor(x , dtype=torch.int64).to(device)
            #print("x shape {} ".format(np.shape(x)))
            
            tmp = []
            for index,el in enumerate(y) :
                tmp.append(np.zeros(n_vocab))
                tmp[index][y[index]] = 1
            #print(y)
            y = tmp 
            y = torch.tensor(y , dtype=torch.int64).to(device)
            logits, (state_h_1, state_c_1),(state_h_2, state_c_2) = model(x, (state_h_1, state_c_1),(state_h_2, state_c_2))
            #print("logits shape {} , y shape {}".format(np.shape(logits),np.shape(y)))
            loss = loss_function(logits, y)

            state_h_1 = state_h_1.detach()
            state_c_1 = state_c_1.detach()
            state_h_2 = state_h_2.detach()
            state_c_2 = state_c_2.detach()

            loss_value = loss.item()

            loss.backward()
            _ = torch.nn.utils.clip_grad_norm_(model.parameters(), flags.gradients_norm)
            optimizer.step()
        print(f'batch #{i}:\tloss={loss.item():.10f}')
    return model 
Exemplo n.º 3
0
def main():
    model = LSTM(settings.vocab_size, settings.word_embedding_size,
                 settings.hidden_size, settings.num_layers, settings.out_dim, settings.drop_out)
    '''     pre-train word embedding init    '''
    dataset = Dataset(args.data)
    model.word_embed.weight = nn.Parameter(torch.from_numpy(dataset.get_wordembedding()))
    if torch.cuda.is_available():
        torch.cuda.manual_seed(settings.seed)
        model.cuda()
    optimizer = optim.SGD(model.parameters(), lr=settings.lr, weight_decay=1e-5)
    criteria = nn.CrossEntropyLoss()
    best_dev_acc = 0.0
    best_test_acc = 0.0

    for i in xrange(dataset.size/settings.batch_size*settings.max_epochs):
        batch_data = dataset.get_batch()
        loss = train(model, batch_data, optimizer, criteria)
        if (i+1) % settings.validate_freq == 0:
            print "validating..."
            dev_acc = test(model, dataset.dev_data)
            test_acc = test(model, dataset.test_data)
            if dev_acc > best_dev_acc:
                best_dev_acc = dev_acc
                best_test_acc = test_acc
                torch.save(model, os.path.join(args.model_dir, "sa_{}.model".format(best_dev_acc)))
            with open(os.path.join(args.model_dir, "log.txt"), "a") as logger:
                logger.write("epoch: {}, dev acc: {}, test acc: {}, " \
                  "batch loss: {}, best dev acc:{}, best test acc:{}\n".format(i*settings.batch_size/float(dataset.size),
                   dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc))
            print "epoch: {}, dev acc: {}, test acc: {}, " \
                  "batch loss: {}, best dev acc:{}, best test acc:{}".format(i*settings.batch_size/float(dataset.size),
                   dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)
def main(opt):
    model = LSTM(opt, batch_first=True, dropout=opt.dropout)
    if opt.pre_train:
        model.load_state_dict(torch.load(opt.save_path))
    optimizer = optim.Adam(model.parameters(), opt.learning_rate)
    mseloss = nn.MSELoss()

    dataset = PowerDataset(opt,
                           prepocess_path=opt.prepocess_path,
                           transform=transforms.Compose(
                               [transforms.ToTensor()]))
    train_dataset = data.Subset(dataset, indices=range(8664))
    test_dataset = data.Subset(dataset, indices=range(8664, len(dataset)))
    train_dataloader = data.dataloader.DataLoader(train_dataset,
                                                  num_workers=opt.n_threads,
                                                  batch_size=opt.batch_size,
                                                  shuffle=True)
    test_sampler = data.SequentialSampler(test_dataset)
    test_dataloader = data.dataloader.DataLoader(
        test_dataset,
        num_workers=opt.n_threads,
        batch_size=opt.test_batch_size,
        shuffle=False,
        sampler=test_sampler)

    for e in range(opt.epochs):
        if opt.test_only:
            test(model, test_dataloader)
            break
        print('epoch: ', e)
        train(model, mseloss, optimizer, train_dataloader)
        test(model, test_dataloader)
        torch.save(model.state_dict(), opt.save_path)
Exemplo n.º 5
0
def main(opt):
    train_dataset = BADataset(opt.dataroot, opt.L, True, False, False)
    train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \
                                      shuffle=True, num_workers=opt.workers, drop_last=True)

    valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False)
    valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    test_dataset = BADataset(opt.dataroot, opt.L, False, False, True)
    test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \
                                     shuffle=True, num_workers=opt.workers, drop_last=True)

    all_dataset = BADataset(opt.dataroot, opt.L, False, False, False)
    all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \
                                     shuffle=False, num_workers=opt.workers, drop_last=False)

    opt.n_edge_types = train_dataset.n_edge_types
    opt.n_node = train_dataset.n_node
    opt.n_existing_node = all_node_num

    net = LSTM(opt, hidden_state=opt.state_dim*5)
    net.double()
    print(net)

    criterion = nn.CosineSimilarity(dim=1, eps=1e-6)

    if opt.cuda:
        net.cuda()
        criterion.cuda()

    optimizer = optim.Adam(net.parameters(), lr=opt.lr)
    early_stopping = EarlyStopping(patience=opt.patience, verbose=True)

    os.makedirs(OutputDir, exist_ok=True)
    train_loss_ls = []
    valid_loss_ls = []
    test_loss_ls = []

    for epoch in range(0, opt.niter):
        train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt)
        valid_loss = valid(valid_dataloader, net, criterion, opt)
        test_loss = test(test_dataloader, net, criterion, opt)

        train_loss_ls.append(train_loss)
        valid_loss_ls.append(valid_loss)
        test_loss_ls.append(test_loss)

        early_stopping(valid_loss, net, OutputDir)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls})
    df.to_csv(OutputDir + '/loss.csv', index=False)

    net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt'))
    inference(all_dataloader, net, criterion, opt, OutputDir)
Exemplo n.º 6
0
def main():
    global args, best_prec1
    best_prec1 = 1e6
    args = parser.parse_args()
    args.original_lr = 1e-6
    args.lr = 1e-6
    args.momentum = 0.95
    args.decay = 5 * 1e-4
    args.start_epoch = 0
    args.epochs = 5000
    args.steps = [-1, 1, 100, 150]
    args.scales = [1, 1, 1, 1]
    args.workers = 4
    args.seed = time.time()
    args.print_freq = 30
    args.feature_size = 100
    args.lSeq=5
    wandb.config.update(args)
    wandb.run.name = f"Default_{wandb.run.name}" if (args.task == wandb.run.name) else f"{args.task}_{wandb.run.name}"

    conf = configparser.ConfigParser()
    conf.read(args.config)
    # print(conf)
    TRAIN_DIR = conf.get("lstm", "train")
    VALID_DIR = conf.get("lstm", "valid")
    TEST_DIR = conf.get("lstm", "test")
    LOG_DIR = conf.get("lstm", "log")
    create_dir_not_exist(LOG_DIR)
    # TODO: train_list to train_file
    train_list = [os.path.join(TRAIN_DIR, item) for item in os.listdir(TRAIN_DIR)]
    val_list = [os.path.join(VALID_DIR, item) for item in os.listdir(VALID_DIR)]
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    torch.cuda.manual_seed(int(args.seed))
    model = LSTM(args.feature_size, args.feature_size, args.feature_size)
    model = model.cuda()
    criterion = nn.MSELoss().cuda()
    optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay)
    model = DataParallel_withLoss(model, criterion)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        train(train_list, model, criterion, optimizer, epoch)
        prec1 = validate(val_list, model, criterion, epoch)
        with open(os.path.join(LOG_DIR, args.task + ".txt"), "a") as f:
            f.write("epoch " + str(epoch) + "  MSELoss: " + str(float(prec1)))
            f.write("\n")
        wandb.save(os.path.join(LOG_DIR, args.task + ".txt"))
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        print(' * best MSELoss {MSELoss:.3f} '.format(MSELoss=best_prec1))
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': args.pre,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
        }, is_best, args.task, epoch=epoch, path=os.path.join(LOG_DIR, args.task))
Exemplo n.º 7
0
def create_model():
    model = LSTM(input_size=input_size,
                 num_classes=num_classes,
                 hidden=args.hidden_unit,
                 num_layers=args.num_layers,
                 mean_after_fc=args.mean_after_fc,
                 mask_empty_frame=args.mask_empty_frame)
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    return (model, optimizer)
Exemplo n.º 8
0
def load_model():
    print("==> loading existing lstm model")
    model_info = torch.load(model_path)
    model = LSTM(input_size=input_size,
                 num_classes=model_info['num_classes'],
                 hidden=model_info['hidden'],
                 num_layers=model_info['num_layers'],
                 mean_after_fc=model_info['mean_after_fc'],
                 mask_empty_frame=model_info['mask_empty_frame'])
    model.cuda()
    model.load_state_dict(model_info['state_dict'])
    best_acc = model_info['best_acc']
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    optimizer.load_state_dict(model_info['optimizer'])
    return (model, optimizer)
Exemplo n.º 9
0
def train():
    train_writer = SummaryWriter(
        os.path.join(LOG_DIR, 'train7-64-LSTM-Doppler'))
    test_writer = SummaryWriter(os.path.join(LOG_DIR, 'test7-64-LSTM-Doppler'))

    train_loader, test_loader = load_data(TRAIN_DIR, TEST_DIR)

    lstm = LSTM().to(DEVICE)
    optimizer = torch.optim.Adam(lstm.parameters(), lr=LR)
    loss_func = nn.CrossEntropyLoss().to(DEVICE)

    for epoch in range(MAX_EPOCH):
        log_string('**** EPOCH %3d ****' % (epoch))
        sys.stdout.flush()

        train_one_epoch(epoch, train_writer, train_loader, lstm, loss_func,
                        optimizer)
        eval_one_epoch(epoch, test_writer, test_loader, lstm, loss_func)

    # save model parameters to files
    torch.save(lstm.state_dict(), MODEL_DIR)
Exemplo n.º 10
0
def main():
    names_str = read_csv(filname='data/names/names.csv')
    all_char_str = set([char for name in names_str for char in name])
    char2idx = {char: i for i, char in enumerate(all_char_str)}
    char2idx['EOS'] = len(char2idx)
    # save char dictionary
    cPickle.dump(char2idx, open("dic.p", "wb"))

    names_idx = [[char2idx[char_str] for char_str in name_str]
                 for name_str in names_str]

    # build model
    model = LSTM(input_dim=len(char2idx), embed_dim=100, hidden_dim=128)

    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters())

    n_iters = 5

    for iter in range(1, n_iters + 1):

        # data shuffle
        random.shuffle(names_idx)

        total_loss = 0

        for i, name_idx in enumerate(names_idx):
            input = inputTensor(name_idx)
            target = targetTensor(name_idx, char2idx)

            loss = train(model, criterion, input, target)
            total_loss += loss

            optimizer.step()

        print(iter, "/", n_iters)
        print("loss {:.4}".format(float(total_loss / len(names_idx))))

        # save trained model
        torch.save(model.state_dict(), "model.pt")
def train_initialization(domain, classifier_name, all_data, data_type):
    train_data, test_data, Final_test, Final_test_original, Final_test_gt, unique_vocab_dict, unique_vocab_list = all_data
    output_size = 2
    batch_size = 32
    pre_train = True
    embedding_tune = True
    if data_type == 'train':
        epoch_num = 10 if domain == 'captions' else 4
    else:  # 'dev'
        epoch_num = 3  # sample test
    embedding_length = 300 if domain != 'captions' else 50
    hidden_size = 256 if domain != 'captions' else 32

    learning_rate = collections.defaultdict(dict)
    learning_rate['amazon'] = {'LSTM': 0.001, 'LSTMAtten': 0.0002, 'RNN': 0.001, 'RCNN': 0.001, 'SelfAttention': 0.001, 'CNN': 0.001}
    learning_rate['yelp'] = {'LSTM': 0.002, 'LSTMAtten': 0.0002, 'RNN': 0.0001, 'RCNN': 0.001, 'SelfAttention': 0.0001, 'CNN': 0.001}
    learning_rate['captions'] = {'LSTM': 0.005, 'LSTMAtten': 0.005, 'RNN': 0.01, 'RCNN': 0.01, 'SelfAttention': 0.005, 'CNN': 0.001}

    TEXT, vocab_size, word_embeddings, train_iter, test_iter, Final_test_iter, Final_test_original_iter, Final_test_gt_iter = load_dataset(train_data, test_data, Final_test, Final_test_original, Final_test_gt, embedding_length, batch_size)
    if classifier_name == 'LSTM':
        model = LSTM(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune)
    elif classifier_name == 'LSTMAtten':
        model = LSTM_AttentionModel(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune)
    elif classifier_name == 'RNN':
        model = RNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune)
    elif classifier_name == 'RCNN':
        model = RCNN(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune)
    elif classifier_name == 'SelfAttention':
        model = SelfAttention(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune)
    elif classifier_name == 'CNN':
        model = CNN(batch_size, output_size, 1, 32, [2,4,6], 1, 0, 0.6, vocab_size, embedding_length, word_embeddings, pre_train, embedding_tune)
    else:
        raise ValueError('Not a valid classifier_name!!!')
    loss_fn = F.cross_entropy
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate[domain][classifier_name])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 2, gamma=0.1)
    return train_iter, test_iter, Final_test_iter, Final_test_original_iter, Final_test_gt_iter, epoch_num, model, loss_fn, optimizer, scheduler
Exemplo n.º 12
0
def main():
    global device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    notes = loade_data('./notes.json')['notes']
    validation = loade_data('./validation.json')['notes']
    test = loade_data('./test.json')['notes']
    int_to_sign = loade_data('./int2sign.json')
    sign_to_int = loade_data('./sign2int.json')
    seq_length = 100

    #refactor this, we only need a one-hot for the input
    #select a sequence or whatever here, use predefined for now (testing)

    learning_rate = 0.001

    network = LSTM(hidden_size=64, input_size=90, output_size=90)
    criterion = nn.CrossEntropyLoss()
    network.to(device)
    optimizer = optim.Adam(network.parameters(), learning_rate)
    scheduler = optim.lr_scheduler.CyclicLR(optimizer,
                                            base_lr=0.0001,
                                            max_lr=0.001,
                                            cycle_momentum=False)
    # move network to GPU

    print(device)
    #network, _, losses, best_net = trainLoop(network, criterion, notes, optimizer, 3, seq_length, sign_to_int, scheduler)
    best_net = network
    """plt.plot(losses)
    plt.savefig('losses.png')
    plt.close('all')"""
    print('saving network....')
    #save_network(best_net, "net.pth")
    print('evaluating on test data...')
    evaluateAccuracy(test, best_net, seq_length, sign_to_int)
    print("eval done!")
Exemplo n.º 13
0
class Train():
    def __init__(self, difficulty):
        self.data_path = "../data"
        self.model_path = "../models"
        self.output_path = "../outputs"
        self.difficulty = difficulty
        self.timestamp = str(int(time.time()))
        self.model_name = "lstm_" + self.difficulty
        self.data = Data(difficulty=self.difficulty, data_path=self.data_path)
        (self.img_features, self.w2i, self.i2w, self.nwords, self.UNK, self.PAD) = self.data()
        self.train = list(self.data.get_train_data())
        self.dev = list(self.data.get_validation_data())
        self.test = list(self.data.get_test_data())
        self.image_feature_size = 2048
        self.output_vector_size = 10

    def __call__(self, number_of_iterations = 2, learning_rate = 0.005, embedding_size = 300, hidden_size=100, batch_size=100):
        print("Starting 'Image Retrieval' in 'LSTM' mode with '" + self.difficulty + "' data")

        self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".pty"
        self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".csv"

        self.number_of_iterations = number_of_iterations
        self.learning_rate = learning_rate
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.model = LSTM(self.nwords, self.embedding_size, self.image_feature_size, self.output_vector_size, self.hidden_size, self.batch_size)
        self.criterion = nn.CrossEntropyLoss()

        self.evaluate = Evaluate(self.model, self.img_features, self.minibatch, self.preprocess, self.image_feature_size, self.output_vector_size)
        print(self.model)

        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)

        self.train_loss_values = []

        self.magic()

        self.save_model()

        self.save_data()

    def minibatch(self, data, batch_size = 50):
        for i in range(0, len(data), batch_size):
            yield data[i:i+batch_size]

    def preprocess(self, batch):
        """Helper function for functional batches"""
        correct_indexes = [observation[2] for observation in batch]
        img_ids = [observation[1] for observation in batch]
        text_features = [observation[0] for observation in batch]
        last_words = [len(dialog) for dialog in text_features]

        #Add Padding to max len of sentence in batch
        max_length = max(map(len, text_features))
        text_features = [txt + [self.PAD] * (max_length - len(txt)) for txt in text_features]

        #return in "stacked" format, added last_words for excluding padding effects on LSTM
        return text_features, img_ids, correct_indexes, last_words

    def magic(self):
        for ITER in range(self.number_of_iterations):

            random.shuffle(self.train)
            train_loss = 0.0
            start = time.time()
            iteration = 0

            for batch in self.minibatch(self.train, self.batch_size):
                self.model.zero_grad()
                self.optimizer.zero_grad()
                self.model.hidden = self.model.init_hidden()

                #Load data for model
                text_features, h5_ids, correct_index, last_words = self.preprocess(batch)
                lookup_text_tensor = Variable(torch.LongTensor([text_features])).squeeze()

                full_img_batch = np.empty([len(batch), self.output_vector_size, self.image_feature_size])

                for obs, img_ids in enumerate(h5_ids):
                    for index, h5_id in enumerate(img_ids):
                        full_img_batch[obs, index] = self.img_features[h5_id]
            
                full_img_batch = Variable(torch.from_numpy(full_img_batch).type(torch.FloatTensor))

                #Target
                target = Variable(torch.LongTensor([correct_index])).squeeze()
                #Vector for excluding padding effects
                last_words = Variable(torch.LongTensor(last_words))

                #Run model and calculate loss
                prediction = self.model(lookup_text_tensor, full_img_batch, last_words)
                loss = self.criterion(prediction, target)
                train_loss += loss.data[0]

                iteration += self.batch_size
                print(iteration)
        
                loss.backward()
                self.optimizer.step()

            print("ITERATION %r: train loss/sent=%.4f, time=%.2fs" % (ITER+1, train_loss/len(self.train), time.time() - start))
            self.train_loss_values.append(train_loss/len(self.train))

    def save_model(self):
        #Save model
        torch.save(self.model, self.model_full_path)
        print("Saved model has test score", self.evaluate(self.test, self.batch_size))

    def plot(self):
        plt.plot(self.train_loss_values, label = "Train loss")
        plt.legend(loc='best')
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.title(self.model_name + " - has loss with lr = %.4f, embedding size = %r" % (self.learning_rate, self.embedding_size))
        plt.show()

    def save_data(self):
        file = open(self.output_file_name, "w")
        file.write(", ".join(map(str, self.train_loss_values)))
        file.write("\n")
        file.write(str(self.evaluate(self.test, self.batch_size)))
        file.write("\n")
        file.close()
Exemplo n.º 14
0
                  batch_size=cfg['model']['batch_size'],
                  output_dim=cfg['model']['data_dim'],
                  num_layers_lstm=cfg['model']['lstm_layers'],
                  inference=False).cuda()

print('-> READ DATA')
dataset = MusicDataset(cfg['data']['processed_numpy_file'],
                       cfg['hyperparams']['sequence_length'],
                       cfg['data']['data_augmentation'])
dataloader = DataLoader(dataset,
                        batch_size=cfg['model']['batch_size'],
                        shuffle=False)

print('-> START TRAINING')
if cfg['hyperparams']['optimiser'] == 'adam':
    optimiser = torch.optim.Adam(lstm_model.parameters(),
                                 lr=cfg['hyperparams']['learning_rate'])

for batch_idx, batch_data in enumerate(dataloader):
    # zero grad model
    optimiser.zero_grad()

    # re-init hidden states
    lstm_model.hidden = lstm_model.init_hidden()

    # sort batch based on sequence length
    sort_batch(batch_data)

    # put batch on GPU
    batch_data = to_cuda(batch_data)
Exemplo n.º 15
0
def ToVariable(x):
    tmp = torch.FloatTensor(x)
    return Variable(tmp)


use_gpu = torch.cuda.is_available()
# print(use_gpu)
input_size = 900
output_size = 900
hidden_dim = 2000
num_layer = 4
model = LSTM(input_size, hidden_dim, num_layer, output_size)
loss_function = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.9)

if use_gpu:
    model = model.cuda()

root_path = "dataset"
data_num = 100
time_step = 5
datalist = create_datalist(root_path)
train_data, test_data = create_dataset(data_num, datalist, time_step)
# print(len(train_data)) #17*80
# print(len(test_data))  #17*20


def train(epoch):
    for step, input_data in enumerate(train_data, 1):
        seq_len = TrainX.shape[1]

        net = LSTM(input_dim,
                   output_dim,
                   seq_len,
                   n_hidden,
                   n_layers,
                   fixed_pt_quantize=fixed_pt_quantize)

        lossfunc = nn.MSELoss()

        lr = 0.002
        if fixed_pt_quantize:
            lr = 0.003

        optimizer = torch.optim.Adamax(net.parameters(), lr=lr)

        ##############################################PRUNING###########################################################################
        if pruning:
            print(
                "Pruning============================================================================"
            )
            figure_name = "/Subject_" + str(Idx_subject) + "_Finger_" + str(
                Finger) + "_pruning"

            PATH_pre_trained = checkpoint_path + '/s' + str(
                Idx_subject) + '_f' + str(Finger) + '_trained_model'
            net.load_state_dict(torch.load(PATH_pre_trained))
            net.train()
            net.threshold_pruning()
            #train the prunned model:
Exemplo n.º 17
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Build the data loader
    dataset, targets = load_dataset()
    print('\nThe data are loaded')

    # Build the models
    lstm = LSTM(args.input_size, args.output_size)
    print('The model is build')
    print(lstm)

    if torch.cuda.is_available():
        lstm.cuda()

    # Loss and Optimizer
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(lstm.parameters(), lr=args.learning_rate)

    # Train the Models
    toatal_time = 0
    sm = 50  # start saving models after 100 epochs

    for epoch in range(args.num_epochs):
        print('\nepoch ' + str(epoch) + ':')
        avg_loss = 0
        start = time.time()

        for i in range(0, len(dataset), args.batch_size):
            lstm.zero_grad()
            bi, bt = get_input(i, dataset, targets, args.batch_size)
            bi = bi.view(-1, 1, 32)
            bi = to_var(bi)
            bt = to_var(bt)
            bo = lstm(bi)
            loss = criterion(bo, bt)
            avg_loss = avg_loss + loss.item()
            loss.backward()
            optimizer.step()

        epoch_avg_loss = avg_loss / (len(dataset) / args.batch_size)
        print('--average loss:', epoch_avg_loss)

        end = time.time()
        epoch_time = end - start
        toatal_time = toatal_time + epoch_time
        print('time of per epoch:', epoch_time)

        # save the data into csv
        data = [epoch_avg_loss]
        with open(args.model_path + 'lstm_loss.csv', 'a+') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(data)

        if epoch == sm:
            model_path = 'lstm_' + str(sm) + '.pkl'
            torch.save(lstm.state_dict(),
                       os.path.join(args.model_path, model_path))
            sm = sm + args.save_step

    model_path = 'lstm_final.pkl'
    torch.save(lstm.state_dict(), os.path.join(args.model_path, model_path))
def main(trial_num):
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_type = "lstm"

    # Hyper-parameters
    sequence_length = 28
    input_size = 28
    num_layers = 1
    hidden_size = 128
    num_classes = 10
    batch_size = 100
    num_epochs = 20
    learning_rate = 0.01
    num_trials = 100
    a_range = [1.0, 3.0]
    # a_s = [1.5, 2.0, 2.2, 2.5, 2.7, 3.0]

    # just for testing
    # num_trials = 1
    # num_epochs = 20
    # a_s = [1.0]

    # for a in a_s:
    trials = {}
    for num_trial in range(num_trials):
        a = random.random() * (a_range[1] - a_range[0]) + a_range[0]
        print('trial Num: ', trial_num,  "a: ", a, "num_trial: ", num_trial)
        trial = {}
        trial['a'] = a
        # define model
        if model_type == 'lstm':
            model = LSTM(input_size, hidden_size, num_layers, num_classes, a, device).to(device)
        elif model_type == 'gru':
            model = GRU(input_size, hidden_size, num_layers, num_classes, a, device).to(device)

        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        train_dataloader = MNIST_dataloader(batch_size, train=True)
        test_dataloader = MNIST_dataloader(batch_size, train=False)
        # Train the model
        total_step = len(train_dataloader.dataloader)

        total = 0
        total_loss = 0
        for epoch in range(num_epochs):
            model.train()
            for i, (images, labels) in enumerate(train_dataloader.dataloader):
                images = images.reshape(-1, sequence_length, input_size).to(device)
                labels = labels.to(device)

                # Forward pass
                outputs, hts = model(images)
                loss = criterion(outputs, labels)
                total_loss += loss * labels.size(0)
                total += labels.size(0)
                # print(LEs, rvals)

                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # if (i + 1) % 300 == 0:
                #     print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                #           .format(epoch + 1, num_epochs, i + 1, total_step, total_loss / total))

            # for i, (name, param) in enumerate(model.named_parameters()):
            #     if i == 3:
            #         print(name, param)
            # Test the model
            model.eval()
            with torch.no_grad():
                correct = 0
                total = 0
                total_loss = 0
                for i, (images, labels) in enumerate(test_dataloader.dataloader):
                    images = images.reshape(-1, sequence_length, input_size).to(device)
                    labels = labels.to(device)
                    outputs, _ = model(images)

                    # h = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                    # c = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                    # params = (images, (h, c))
                    # if i == 0:
                    #     LEs, rvals = calc_LEs_an(*params, model=model)

                    loss = criterion(outputs, labels)

                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                    total_loss += loss * labels.size(0)
                if epoch == (num_epochs - 1):
                    print('Epoch [{}/{}] Loss: {}, Test Accuracy: {} %'.format(epoch + 1, num_epochs, total_loss / total, 100 * correct / total))
            saved_model = copy.deepcopy(model)
            trial[epoch] = {"model": saved_model, "accuracy": 100 * correct / total, "loss": total_loss / total}
            del saved_model
        trials[num_trial] = trial
        pickle.dump(trials, open('trials/{}/models/{}_{}_trials_{}.pickle'.format(model_type, model_type, hidden_size, trial_num), 'wb'))
def train(args):
    prefix = ''
    f_prefix = '.'
    
    if not os.path.isdir("log/"):
        print("Directory creation script is running...")
        subprocess.call([f_prefix+'/make_directories.sh'])

    args.freq_validation = np.clip(args.freq_validation, 0, args.num_epochs)
    validation_epoch_list = list(range(args.freq_validation, args.num_epochs+1, args.freq_validation))
    validation_epoch_list[-1]-=1


    # Create the data loader object. This object would preprocess the data in terms of
    # batches each of size args.batch_size, of length args.seq_length
    dataloader = DataLoader(f_prefix, args.batch_size, args.seq_length, args.num_validation, forcePreProcess=True)

    method_name = "VANILLALSTM"
    model_name = "LSTM"
    save_tar_name = method_name+"_lstm_model_"
    if args.gru:
        model_name = "GRU"
        save_tar_name = method_name+"_gru_model_"

    # Log directory
    log_directory = os.path.join(prefix, 'log/')
    plot_directory = os.path.join(prefix, 'plot/', method_name, model_name)
    plot_train_file_directory = 'validation'



    # Logging files
    log_file_curve = open(os.path.join(log_directory, method_name, model_name,'log_curve.txt'), 'w+')
    log_file = open(os.path.join(log_directory, method_name, model_name, 'val.txt'), 'w+')

    # model directory
    save_directory = os.path.join(f_prefix, 'model')
    
    # Save the arguments int the config file
    with open(os.path.join(save_directory, method_name, model_name,'config.pkl'), 'wb') as f:
        pickle.dump(args, f)

    # Path to store the checkpoint file
    def checkpoint_path(x):
        return os.path.join(save_directory, method_name, model_name, save_tar_name+str(x)+'.tar')
    
    # model creation
    net = LSTM(args)
    if args.use_cuda:
        net = net.cuda()

    # optimizer = torch.optim.Adagrad(net.parameters(), weight_decay=args.lambda_param)
    optimizer = torch.optim.RMSprop(net.parameters(), lr=args.learning_rate)
    loss_f = torch.nn.MSELoss()
    learning_rate = args.learning_rate

    best_val_loss = 100
    best_val_data_loss = 100

    smallest_err_val = 100000
    smallest_err_val_data = 100000


    best_epoch_val = 0
    best_epoch_val_data = 0

    best_err_epoch_val = 0
    best_err_epoch_val_data = 0

    all_epoch_results = []
    grids = []
    num_batch = 0

    # Training
    for epoch in range(args.num_epochs):
        print('****************Training epoch beginning******************')
        if dataloader.additional_validation and (epoch-1) in validation_epoch_list:
            dataloader.switch_to_dataset_type(True)
        dataloader.reset_batch_pointer(valid=False)
        loss_epoch = 0
        # For each batch
        # num_batches 資料可以被分多少批 要跑幾個iter
        
        for batch in range(dataloader.num_batches):
            start = time.time()

            # print(dataloader.num_batches, dataloader.batch_size)
            
            # Get batch data
            x, y, d = dataloader.next_batch(randomUpdate=False)
            
            loss_batch = 0

            # x_cat = Variable(torch.from_numpy(np.array(x[0])).float())
            x_seq = np.array(x)
            y_seq = np.array(y)
            x_seq = Variable(torch.from_numpy(x_seq).float())
            y_seq = Variable(torch.from_numpy(y_seq).float())
            temp = x_seq[:,:,-2:]
            x_seq = x_seq[:,:,:-2]
            y_seq = y_seq[:,:,:3]
            
            hidden_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size))
            cell_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size))

            if args.use_cuda:                  
                x_seq = x_seq.cuda()
                y_seq = y_seq.cuda()
                temp = temp.cuda()
                hidden_states = hidden_states.cuda()
                cell_states = cell_states.cuda()

            # Zero out gradients
            net.zero_grad()
            optimizer.zero_grad()
            
            outputs, _, _ = net(x_seq, temp, hidden_states, cell_states)

            loss = loss_f(outputs, y_seq)
            loss_batch = loss.detach().item()

            # Compute gradients
            loss.backward()

            # Clip gradients
            torch.nn.utils.clip_grad_norm_(net.parameters(), args.grad_clip)

            # Update parameters
            optimizer.step()

            end = time.time()
            loss_epoch += loss_batch

            print('{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}'.format((batch+1) * dataloader.batch_size,
                                                                                    dataloader.num_batches * dataloader.batch_size,
                                                                                    epoch,
                                                                                    loss_batch, end - start))
        loss_epoch /= dataloader.num_batches
        print("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch))
        #Log loss values
        log_file_curve.write("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch)+'\n')


        # Validation dataset
        if dataloader.additional_validation and (epoch) in validation_epoch_list:
            dataloader.switch_to_dataset_type()
            print('****************Validation with dataset epoch beginning******************')
            dataloader.reset_batch_pointer(valid=False)
            dataset_pointer_ins = dataloader.dataset_pointer
            validation_dataset_executed = True

            loss_epoch = 0
            err_epoch = 0
            num_of_batch = 0
            smallest_err = 100000

            #results of one epoch for all validation datasets
            epoch_result = []
            #results of one validation dataset
            results = []

            # For each batch
            for batch in range(dataloader.num_batches):
                # Get batch data
                x, y, d = dataloader.next_batch(randomUpdate=False)

                # Loss for this batch
                loss_batch = 0
                err_batch = 0

                # For each sequence
                for sequence in range(len(x)):
                    # Get the sequence
                    x_seq = x[sequence]
                    y_seq = y[sequence]
                    x_seq= np.array(x_seq)
                    y_seq= np.array(y_seq)[:,:3]
                    x_seq = Variable(torch.from_numpy(x_seq).float())
                    y_seq = Variable(torch.from_numpy(y_seq).float())

                    temp = x_seq[:,-2:]
                    x_seq = x_seq[:,:-2]
                    y_seq = y_seq[:,:3]

                    if args.use_cuda:
                        x_seq = x_seq.cuda()
                        y_seq = y_seq.cuda()
                        temp = temp.cuda()

                    #will be used for error calculation
                    orig_x_seq = y_seq.clone() 

                    # print(x_seq.size(), args.seq_length)

                    with torch.no_grad():
                        hidden_states = Variable(torch.zeros(1, args.rnn_size))
                        cell_states = Variable(torch.zeros(1, args.rnn_size))
                        ret_x_seq = Variable(torch.zeros(args.seq_length, net.input_size))
                        # all_outputs = Variable(torch.zeros(1, args.seq_length, net.input_size))

                        # Initialize the return data structure
                        if args.use_cuda:
                            ret_x_seq = ret_x_seq.cuda()
                            hidden_states = hidden_states.cuda()
                            cell_states = cell_states.cuda()

                        total_loss = 0
                        # For the observed part of the trajectory
                        for tstep in range(args.seq_length):
                            outputs, hidden_states, cell_states = net(x_seq[tstep].view(1, 1, net.input_size), temp[tstep].view(1, 1, temp.size()[-1]), hidden_states, cell_states)
                            ret_x_seq[tstep, 0] = outputs[0,0,0]
                            ret_x_seq[tstep, 1] = outputs[0,0,1]
                            ret_x_seq[tstep, 2] = outputs[0,0,2]
                            print(outputs.size(), )
                            loss = loss_f(outputs, y_seq[tstep].view(1, 1, y_seq.size()[1]))
                            total_loss += loss

                        total_loss = total_loss / args.seq_length

                    #get mean and final error
                    # print(ret_x_seq.size(), y_seq.size())
                    err = get_mean_error(ret_x_seq.data, y_seq.data, args.use_cuda)

                    loss_batch += total_loss.item()
                    err_batch += err
                    print('Current file : ',' Batch : ', batch+1, ' Sequence: ', sequence+1, ' Sequence mean error: ', err, 'valid_loss: ',total_loss.item())
                    results.append((y_seq.data.cpu().numpy(), ret_x_seq.data.cpu().numpy()))

                loss_batch = loss_batch / dataloader.batch_size
                err_batch = err_batch / dataloader.batch_size
                num_of_batch += 1
                loss_epoch += loss_batch
                err_epoch += err_batch

            epoch_result.append(results)
            all_epoch_results.append(epoch_result)

            if dataloader.num_batches != 0:            
                loss_epoch = loss_epoch / dataloader.num_batches
                err_epoch = err_epoch / dataloader.num_batches
                # avarage_err = (err_epoch + f_err_epoch)/2

                # Update best validation loss until now
                if loss_epoch < best_val_data_loss:
                    best_val_data_loss = loss_epoch
                    best_epoch_val_data = epoch

                if err_epoch<smallest_err_val_data:
                    # Save the model after each epoch
                    print('Saving model')
                    torch.save({
                        'epoch': epoch,
                        'state_dict': net.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict()
                    }, checkpoint_path(epoch))

                    smallest_err_val_data = err_epoch
                    best_err_epoch_val_data = epoch

                print('(epoch {}), valid_loss = {:.3f}, valid_mean_err = {:.3f}'.format(epoch, loss_epoch, err_epoch))
                print('Best epoch', best_epoch_val_data, 'Best validation loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data)
                log_file_curve.write("Validation dataset epoch: "+str(epoch)+" loss: "+str(loss_epoch)+" mean_err: "+str(err_epoch.data.cpu().numpy())+'\n')
            


        optimizer = time_lr_scheduler(optimizer, epoch, lr_decay_epoch = args.freq_optimizer)

    if dataloader.valid_num_batches != 0:        
        print('Best epoch', best_epoch_val, 'Best validation Loss', best_val_loss, 'Best error epoch',best_err_epoch_val, 'Best error', smallest_err_val)
        # Log the best epoch and best validation loss
        log_file.write('Validation Best epoch:'+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss))

    if dataloader.additional_validation:
        print('Best epoch acording to validation dataset', best_epoch_val_data, 'Best validation Loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data)
        log_file.write("Validation dataset Best epoch: "+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss)+'Best error epoch: ',str(best_err_epoch_val_data),'\n')
        #dataloader.write_to_plot_file(all_epoch_results[best_epoch_val_data], plot_directory)

    #elif dataloader.valid_num_batches != 0:
    #    dataloader.write_to_plot_file(all_epoch_results[best_epoch_val], plot_directory)

    #else:
    if validation_dataset_executed:
        dataloader.switch_to_dataset_type(load_data=False)
        create_directories(plot_directory, [plot_train_file_directory])
        dataloader.write_to_plot_file(all_epoch_results[len(all_epoch_results)-1], os.path.join(plot_directory, plot_train_file_directory))

    # Close logging files
    log_file.close()
    log_file_curve.close()
Exemplo n.º 20
0
seed_everything(seed)

df = get_df(DATA_PATH, columns).reset_index(drop=True)
price = df['Closing_price']
train_len = len(df) - valid_len

_, train, valid, scaler = get_data(DATA_PATH, columns, valid_len)
train_loader = generate_dataset(train, seq_length)
valid_loader = generate_dataset(valid, seq_length)

test_inputs = train[-seq_length:].tolist()

model = LSTM()
model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=10, cooldown=5)


def main():
    train_losses = []
    valid_losses = []
    best_loss = np.inf
    for i in range(epochs):
        train_loss = 0
        valid_loss = 0
        if i % 10 == 0:
            print('-----------------------')
            print(f'epoch: {i+1} / {epochs}')
            print('-----------------------')
        for seq, label in (train_loader):
Exemplo n.º 21
0
    for shop in tqdm(range(_SHOP_NUM)):
        if X_train is None:
            X_train, y_train = np.load(_TITLE_TRAIN_FEATURES.format(shop)), np.load(_TITLE_TRAIN_LABELS.format(shop))
        else:
            X, y = np.load(_TITLE_TRAIN_FEATURES.format(shop)), np.load(_TITLE_TRAIN_LABELS.format(shop))
            X_train, y_train = np.vstack([X_train, X]), np.vstack([y_train, y])

    X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)

    print(X_train.shape, y_train.shape)

    train_data = TensorDataset(X_train, y_train)
    trainloader = DataLoader(train_data, batch_size=64, shuffle=True)

    net = LSTM()
    optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    criterion = nn.MSELoss()
    running_loss = 0.0

    print('Start training...')
    for epoch in range(EPOCHS):
        print('{} epoch begins...'.format(epoch + 1))
        for i, data in enumerate(trainloader):
            net.zero_grad()
            X, y = data
            X = X.view(-1, 34, 3)
            outputs = net(X)
            labels = y[:, -2:-1]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
Exemplo n.º 22
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)

    idx_to_word, word_to_idx, vocab_size, in_text, out_text = read_file(
        train_file, batch_size, seq_size)

    num_batches, _ = in_text.shape

    val_index = np.random.choice(np.arange(num_batches),
                                 int(num_batches * val_data_proportion),
                                 replace=False)
    train_index = np.delete(np.arange(num_batches), val_index)

    train_in_text = in_text[train_index, :]
    train_out_text = out_text[train_index, :]

    val_in_text = in_text[val_index, :]
    val_out_text = out_text[val_index, :]

    # print(num_batches)
    # print(train_in_text.shape)
    # print(val_in_text.shape)
    # print(vocab_size)

    lstm_model = LSTM(vocab_size, seq_size, emb_size, hidden_size)
    lstm_model = lstm_model.to(device)

    lstm_optim = optim.Adam(lstm_model.parameters(), lr=l_rate)
    loss_function = torch.nn.CrossEntropyLoss()

    train_set_loss = []
    val_set_loss = []

    for i in range(epoch):
        train_batches = generate_batch(train_in_text, train_out_text,
                                       batch_size, seq_size)
        val_batches = generate_batch(val_in_text, val_out_text, batch_size,
                                     seq_size)
        h0, c0 = lstm_model.initial_state(batch_size)
        h0 = h0.to(device)
        c0 = c0.to(device)
        total_loss, iterations, val_loss, val_iterations = 0, 0, 0, 0

        # training_batch
        for x, y in train_batches:
            iterations += 1
            lstm_model.train()
            # shape of x is (batch_size, seq_size)
            x = torch.tensor(x).to(device)
            y = torch.tensor(y).to(device)

            lstm_optim.zero_grad()
            logits, (h0, c0) = lstm_model(x, (h0, c0))
            _, _, n_cat = logits.shape
            loss = loss_function(logits.view(-1, n_cat), y.view(-1))
            total_loss += loss.item()
            loss.backward()

            # Starting each batch, we detach the hidden state from how it was previously produced.
            # If we didn't, the model would try backpropagating all the way to start of the dataset.
            h0 = h0.detach()
            c0 = c0.detach()

            _ = torch.nn.utils.clip_grad_norm_(lstm_model.parameters(),
                                               gradients_norm)
            lstm_optim.step()
            # break

        for x_val, y_val in val_batches:

            val_iterations += 1
            lstm_model.eval()

            x_val = torch.tensor(x_val).to(device)
            y_val = torch.tensor(y_val).to(device)

            logits, (h0, c0) = lstm_model(x_val, (h0, c0))

            _, _, n_cat = logits.shape
            loss = loss_function(logits.view(-1, n_cat), y_val.view(-1))
            val_loss += loss.item()

        avg_loss = total_loss / iterations
        val_avg_loss = val_loss / val_iterations
        train_set_loss.append(avg_loss)
        val_set_loss.append(val_avg_loss)

        print('Epoch: {}'.format(i), 'Loss: {}'.format(avg_loss),
              'Validation Loss: {}'.format(val_avg_loss))
        # if i % 10 == 0:
        # 	torch.save(lstm_model.state_dict(),'checkpoint_pt/model-{}.pth'.format(i))
    _ = predict(device,
                lstm_model,
                vocab_size,
                word_to_idx,
                idx_to_word,
                top_k=predict_top_k)
    return train_set_loss, val_set_loss
Exemplo n.º 23
0
class StockPrediction():
    def __init__(self, stock, time_window, batch_size, learning_rate=0.001):

        self.stock = stock
        self.time_window = time_window
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.input_size = 4
        self.output_size = 1
        self.nb_neurons = 200

        self.prepare_data()
        self.output = "/Users/baptiste/Desktop/training"

    def validate(self):

        self.lstm_model.eval()
        error = []
        loss_function = nn.MSELoss()
        it = iter(self.real_data_dataloader)
        real_data = next(it)
        loss = []
        for i, (x, _) in enumerate(self.testing_dataloader):
            try:
                with torch.no_grad():
                    pred = self.lstm_model(x.float())
                    pred = self.data.unnormalizeData(pred)
                    real_data = real_data.view(-1, 1)
                    error = self.compute_error(error, pred, real_data)
                real_data = next(it)
            except:
                pass
        error_mean = np.mean(error) * 100
        print("Mean error percentage : ", error_mean)
        self.lstm_model.train()

    def compute_error(self, error, pred, target):

        for i in range(self.batch_size):
            error.append(abs(pred[i, 0] - target[i, 0]) / target[i, 0])
        return (error)

    def prepare_data(self):

        validation_split = 0
        test_split = 0.1
        train_split = 1 - validation_split - test_split

        self.data = Data(self.stock)
        df = self.data.getData()
        df_normalized = self.data.normalizeData(df)
        df_normalized = torch.FloatTensor(df_normalized.to_numpy())

        train_split = int(train_split * df.shape[0])
        validation_split = int(validation_split * df.shape[0])
        test_split = int(test_split * df.shape[0])

        training_split = df_normalized[:train_split, :]

        training_data = Dataset(training_split, self.time_window)
        self.training_dataloader = DataLoader(training_data,
                                              batch_size=self.batch_size)

        #testing_data
        real_data_tensor = torch.FloatTensor(df.to_numpy())
        self.real_data_test = torch.FloatTensor(
            real_data_tensor[-test_split:-self.time_window, 3])
        testing_dataset = Dataset(df_normalized[-test_split:, :],
                                  self.time_window)
        self.testing_dataloader = DataLoader(testing_dataset,
                                             batch_size=self.batch_size)
        self.real_data_dataloader = DataLoader(self.real_data_test,
                                               batch_size=self.batch_size)

    def train(self):

        #Model
        self.lstm_model = LSTM(self.input_size, self.output_size,
                               self.nb_neurons)
        self.lstm_model.load_state_dict(
            torch.load("/Users/baptiste/Desktop/training/AAPL_36.pth"))
        loss_function = nn.MSELoss()
        optimizer = torch.optim.Adam(self.lstm_model.parameters(),
                                     lr=self.learning_rate)
        print("Start training")
        for epoch in range(nb_epochs):

            for (x, y) in self.training_dataloader:

                optimizer.zero_grad()
                self.lstm_model.hidden_cell = (torch.zeros(
                    1, self.batch_size, self.lstm_model.nb_neurons),
                                               torch.zeros(
                                                   1, self.batch_size,
                                                   self.lstm_model.nb_neurons))
                pred = self.lstm_model(x.float())
                y = y.view(self.batch_size, 1)
                loss = loss_function(pred, y)
                loss.backward()
                optimizer.step()

            print("epoch n°%s : loss = %s" % (epoch, loss.item()))
            self.validate()
            if epoch % 5 == 1:
                model_name = "%s_%s.pth" % (self.stock, epoch)
                torch.save(self.lstm_model.state_dict(),
                           os.path.join(output_path, model_name))

    def show_result(self):

        files = os.listdir(self.output)
        for file in files:
            if ".pth" in file:
                path = os.path.join(self.output, file)
                lstm_model = LSTM(self.input_size, self.output_size,
                                  self.nb_neurons)
                lstm_model.load_state_dict(torch.load(path))
                lstm_model.eval()
                print("model : %s loaded" % path)
                predictions = []

                for (x, _) in self.testing_dataloader:
                    if x.shape[0] == self.batch_size:
                        with torch.no_grad():
                            lstm_model.hidden_cell = (
                                torch.zeros(1, self.batch_size,
                                            lstm_model.nb_neurons),
                                torch.zeros(1, self.batch_size,
                                            lstm_model.nb_neurons))
                            output = lstm_model(x.float())
                            output = self.data.unnormalizeData(
                                output).squeeze()
                            predictions += output.tolist()

                plt.plot(predictions, label="prediction")
                plt.plot(self.real_data_test, label="target")
                plt.title(file)
                plt.legend()
                plt.show()
Exemplo n.º 24
0
epochs = 10

data, targets = get_data()
data = torch.FloatTensor(data)
data = data.unsqueeze(-1)
data = data.permute(1, 2, 0)
targets = torch.FloatTensor(targets).unsqueeze(-1).unsqueeze(-1)
# L1 Norm, sum to 1
# s_targets = data[1:, :, :-1]
# data = f.normalize(data, p=1, dim=2)
# targets = data[1:, :, :-1]

l = LSTM()

# optimizer = optim.SGD(l.parameters(), lr=0.001)
optimizer = optim.Adam(l.parameters(), lr=0.02)
loss_function = nn.MSELoss()
# loss_function = nn.L1Loss()
import random

sequence = 10


def sample(data, targets, s):
    n = random.randint(0, len(data) - (s + 5))
    # return data[n: n + s + 1], targets[n: n + s + 1]


for _ in range(1, epochs + 1):
    epoch_loss = 0
    l.reset_hidden()
Exemplo n.º 25
0
from model import Resnet50
from model import LSTM
from model import Loss

# loading features extracted by pretrain model
train_features = torch.load('train_features.pt')  # list of torch
valid_features = torch.load('valid_features.pt')  # list of torch
train_vals = torch.load('train_vals.pt')  # list of torchs
valid_vals = torch.load('valid_vals.pt')  # list of torchs

# model, optimzer, loss function
feature_size = 2048
learning_rate = 0.0001
model = LSTM(feature_size).cuda()
model = torch.load("../problem2/best_rnnbased.pth")
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       mode='min',
                                                       factor=0.5,
                                                       patience=5,
                                                       verbose=True)
loss_function = Loss()

for param_group in optimizer.param_groups:
    param_group['lr'] = learning_rate

# some training parameters
BATCH_SIZE = 32
EPOCH_NUM = 500
datalen = len(train_features)
datalen_valid = len(valid_features)
Exemplo n.º 26
0
def train(feature,label, epochs, model, layer, hidden, save,postfix, index2char, index2phone, phone_map, phone2index):
	dataset = Feature_Dataset(feature,'train')
	train_size = int(0.9*len(dataset))
	if feature == 'mfcc':
		feature_dim = 39
	elif feature == 'fbank':
		feature_dim = 69
	elif feature == 'all':
		feature_dim = 108

	print("Building model and optimizer...")
	if model == 'LSTM':
		train_model = LSTM(feature_dim,hidden,layer)
	elif model == 'C_RNN':
		group_size = 5 
		train_model = C_RNN(group_size,feature_dim,hidden,layer)
	elif model == 'BiLSTM':
		train_model = LSTM(feature_dim, hidden, layer, bi = True)
	
	if USE_CUDA:
		train_model = train_model.cuda()
	optimizer = optim.Adam(train_model.parameters(), lr = 0.005)
	#optimizer = optim.SGD(train_model.parameters(),lr = 0.1)
	criterion = nn.NLLLoss()
	if USE_CUDA:
		criterion = criterion.cuda() 

	for epoch in range(1,epochs+1):
		print("Epoch {}".format(epoch))
		epoch_loss = 0
		epoch_edit = 0
		for i in tqdm(range(1,train_size+1)):
			data = dataset[i-1]
			speaker = data[0]
		
			train_model.zero_grad()
			input_hidden = train_model.init_hidden()
			
			train_feature = Variable(data[1].float())
			output =  train_model(train_feature,input_hidden)
			
			output_seq = test_trim(index2char, index2phone, phone_map, phone2index, torch.max(output,1)[1].data.cpu().numpy())
			target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index, [[int(l)] for l in label[speaker]])
			
			target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int')))
			target = target.cuda() if USE_CUDA else target
			
			loss = criterion(output,target)
			edit = editdistance.eval(output_seq,target_seq)

			epoch_loss += loss.data[0]/train_size
			epoch_edit += edit/train_size
		
			loss.backward()
			optimizer.step()

		print("Negative log-likelihood: {}".format(epoch_loss))
		print("Edit distance: {} ".format(epoch_edit))
		val_loss = 0
		val_edit = 0
		for i in tqdm(range(train_size+1,len(dataset)+1)):
			data = dataset[i-1]
			speaker = data[0]
			val_feature = Variable(data[1].float())
			
			output = train_model(val_feature,train_model.init_hidden())
			target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int')))
			target = target.cuda() if USE_CUDA else target
			
			val_loss += criterion(output,target).data[0]		
			output_seq = test_trim(index2char,index2phone, phone_map, phone2index,torch.max(output,1)[1].data.cpu().numpy())
			target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index,[[int(l)] for l in label[speaker]])
				
			val_edit += editdistance.eval(output_seq,target_seq)
		print("Validation loss: {}".format(val_loss/(len(dataset)-train_size)))
		print("Validation edit distance: {}".format(val_edit/(len(dataset)-train_size)))

		if epoch%save == 0:
			directory = os.path.join(SAVE_DIR, feature, model, '{}-{}{}'.format(layer,hidden,postfix))
			if not os.path.exists(directory):
				os.makedirs(directory)
			torch.save({
				'model': train_model.state_dict(),
                		'opt': optimizer.state_dict(),
                		'val_loss': val_loss/(len(dataset)-train_size),
				'val_edit': val_edit/(len(dataset)-train_size),
				}, os.path.join(directory, '{}.tar'.format(epoch)))
	print("Finish training")
Exemplo n.º 27
0
LABEL.build_vocab(train)

train_iter, val_iter = data.BucketIterator.splits((train, val),
                                                  batch_size=BATCH_SIZE,
                                                  device='cuda')
train_iter.repeat = False
val_iter.repeat = False

# Instantiate the classifier
net = LSTM(layer_dim=N_LAYERS, hidden_dim=100, vocab_size=len(TEXT.vocab),
           embedding_dim=EMBEDDING_DIM, output_dim=N_CLASSES,
           dropout_proba=0.2).cuda()

# Define loss function and optimiser
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-08)

# train model
epochs = 5

for epoch in range(epochs):
    train_loss = 0
    val_loss = 0

    net.train()
    train_correct = 0
    for batch in tqdm(train_iter):
        optimizer.zero_grad()
        text, target = batch.text, batch.label
        output = net(text)
Exemplo n.º 28
0
                    os.path.join(opt['train']['pretrained'],
                                 'short_discriminator.pkl')))
            long_discriminator.load_state_dict(
                torch.load(
                    os.path.join(opt['train']['pretrained'],
                                 'long_discriminator.pkl')))
            print('discriminator model loaded')

    # print('ztta:', ztta.size())
    # assert 0

    ## initilize optimizer_g ##
    optimizer_g = optim.Adam(lr = opt['train']['lr'], params = list(state_encoder.parameters()) +\
                                             list(offset_encoder.parameters()) +\
                                             list(target_encoder.parameters()) +\
                                             list(lstm.parameters()) +\
                                             list(decoder.parameters()), \
                                             betas = (opt['train']['beta1'], opt['train']['beta2']), \
                                             weight_decay = opt['train']['weight_decay'])
    if len(opt['train']['pretrained']) > 0:
        optimizer_g.load_state_dict(
            torch.load(
                os.path.join(opt['train']['pretrained'], 'optimizer_g.pkl')))
        print('optimizer_g model loaded')
    ## initialize optimizer_d ##
    if opt['train']['use_adv']:
        optimizer_d = optim.Adam(lr = opt['train']['lr'] * 0.1, params = list(short_discriminator.parameters()) +\
                                             list(long_discriminator.parameters()), \
                                             betas = (opt['train']['beta1'], opt['train']['beta2']), \
                                             weight_decay = opt['train']['weight_decay'])
        if len(opt['train']['pretrained']) > 0:
Exemplo n.º 29
0
from Parameters import Parameter
import torch.nn as nn
import torch
import torch.nn.functional as F
import torch.optim as optim
from DataProcessing import DataProcessing
from model import LSTM
import numpy as np
import time

models = LSTM().double()
models = models.cuda()
loss_function = nn.BCELoss(size_average=True, reduce=True)
optimizer = optim.Adam(models.parameters())

# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()

DataObject = DataProcessing()

for epoch in range(
        300):  # again, normally you would NOT do 300 epochs, it is toy data
    print("Beginning as a batch")
    StepsOfEpoch = 0
    DataMethodObject = DataObject.FetchInputsAndLabels()
    for wav, label in DataMethodObject:
        then = time.time()
        StepsOfEpoch += 1
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
Exemplo n.º 30
0
def train(config, start_epoch=1, best_validation_loss=np.inf):
    """Trains AWD-LSTM model using parameters from config."""
    print(f'Training for {config.epochs} epochs using the {config.dataset}',
          f'dataset with lambda value of {config.encoding_lmbd}')

    device = torch.device(config.device)
    dataLoader = DataLoader(config.dataset, config.batch_size, device,
                            config.bptt)
    model = LSTM(embedding_size=config.embedding_size,
                 hidden_size=config.hidden_size,
                 lstm_num_layers=config.lstm_num_layers,
                 vocab_size=len(dataLoader.corpus.dictionary),
                 batch_size=config.batch_size,
                 dropoute=config.dropoute,
                 dropouti=config.dropouti,
                 dropouth=config.dropouth,
                 dropouto=config.dropouto,
                 weight_drop=config.weight_drop,
                 tie_weights=config.tie_weights,
                 device=device)

    # D is set of gendered words, N is neutral words (not entirely correct, but close enough)
    D, N = get_gendered_words(config.dataset, dataLoader.corpus)

    criterion = torch.nn.CrossEntropyLoss(reduction='mean')
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=config.learning_rate,
                                weight_decay=config.weight_decay)

    def using_asgd():
        """Checks if optimizer is using ASGD"""
        return 't0' in optimizer.param_groups[0]

    if not config.overwrite and check_model_exists(config):
        print("Loading model from precious state")
        model, optimizer, start_epoch, best_validation_loss = load_current_state(
            model, optimizer, config)
        if using_asgd():
            temp = torch.optim.ASGD(model.parameters(),
                                    lr=config.learning_rate,
                                    t0=0,
                                    lambd=0.,
                                    weight_decay=config.weight_decay)
            temp.load_state_dict(optimizer.state_dict())
            optimizer = temp
        print("start epoch", start_epoch)

    params = list(model.parameters()) + list(criterion.parameters())

    val_losses = deque(maxlen=config.nonmono)

    for e in range(start_epoch, config.epochs + 1):
        epoch_done = False
        model.train()
        model.initialize_hidden()

        epoch_loss = 0  # Loss over the epoch
        n_batch = 0  # Number of batches that have been done
        t_start = time.time()
        print(f"starting epoch {e}/{config.epochs}")

        while not epoch_done:
            lr = optimizer.param_groups[0]['lr']

            # tr_batch, tr_labels are matrices with horizontal sequences.
            # seq_len is the sequence length in this iteration of the epoch,
            # see the openreviewpaper mentioned in the dataloader file
            tr_batch, tr_labels, seq_len, epoch_done = dataLoader.get_train_minibatch(
            )

            # Rescale learning rate for sequence length
            optimizer.param_groups[0]['lr'] = lr * seq_len / config.bptt

            n_batch += 1
            model.detach_hidden()  # Need to prevent improper backprop
            optimizer.zero_grad()

            out, _, lstm_raw_out, lstm_drop_out = model(tr_batch,
                                                        return_out=True)
            loss = criterion(out.permute(0, 2, 1), tr_labels.t())

            # AR optimisation
            if config.alpha:
                loss += config.alpha * lstm_drop_out.pow(2).mean()

            # TAR optimisation
            if config.beta:
                loss += config.beta * (lstm_raw_out[1:] -
                                       lstm_raw_out[:-1]).pow(2).mean()

            # Encoding bias regularization
            if config.encoding_lmbd > 0:
                loss += bias_regularization_term(model.embed.weight, D, N,
                                                 config.bias_variation,
                                                 config.encoding_lmbd)

            # Decoding bias regularization
            if config.decoding_lmbd > 0:
                loss += bias_regularization_term(model.decoder.weight, D, N,
                                                 config.bias_variation,
                                                 config.decoding_lmbd)

            loss.backward()

            # Gradient clipping added to see effects. Turned off by default
            if config.clip: torch.nn.utils.clip_grad_norm_(params, config.clip)

            optimizer.step()

            # Add current loss to epoch loss
            epoch_loss += loss.item()

            # Return learning rate to default
            optimizer.param_groups[0]['lr'] = lr

            # Evaluate the training
            if n_batch % config.batch_interval == 0:
                cur_loss = epoch_loss / n_batch
                elapsed = float(time.time() - t_start)
                examples_per_second = n_batch / elapsed
                print(
                    '| epoch {:3d} | {:5d} batch | lr {:05.5f} | batch/s {:5.2f} | '
                    'train loss {:5.2f} | perplexity {:5.2f} |'.format(
                        e, n_batch, optimizer.param_groups[0]['lr'],
                        examples_per_second, cur_loss, np.exp(cur_loss)))

        print("Saving current model")
        save_current_state(model, optimizer, e, best_validation_loss, config)

        # Evaluate the model on the validation set for early stopping
        if e % config.eval_interval == 0:
            print('Evaluating on validation for early stopping criterion')
            test_done = False
            model.initialize_hidden()
            model.eval()
            epoch_loss = 0
            n_batch = 0
            tot_seq_len = 0
            while not test_done:
                n_batch += 1
                va_batch, va_labels, seq_len, test_done = dataLoader.get_validation_minibatch(
                )
                tot_seq_len += seq_len
                out, _ = model(va_batch)
                model.detach_hidden()
                loss = criterion(out.permute(0, 2, 1), va_labels.t())
                epoch_loss += loss.item()
            cur_loss = epoch_loss / n_batch

            if best_validation_loss > cur_loss:
                print("best_validation_loss > cur_loss")
                best_validation_loss = cur_loss
                val_losses.append(cur_loss)
                save_for_early_stopping(model, config, best_validation_loss)

            print(
                '| epoch {:3d} | lr {:05.5f} | validation loss {:5.2f} | perplexity {:5.2f} |'
                .format(e, optimizer.param_groups[0]['lr'], cur_loss,
                        np.exp(cur_loss)))

            if not config.no_asgd and not using_asgd() and (
                    len(val_losses) == val_losses.maxlen
                    and cur_loss > min(val_losses)):
                print('Switching to ASGD')
                optimizer = torch.optim.ASGD(model.parameters(),
                                             lr=config.learning_rate,
                                             t0=0,
                                             lambd=0.,
                                             weight_decay=config.weight_decay)

        # Evaluate the model on the test set
        if e % config.eval_interval == 0:
            print('Evaluating on test')
            test_done = False
            model.eval()
            model.initialize_hidden()
            epoch_loss = 0
            n_batch = 0
            while not test_done:
                n_batch += 1
                te_batch, te_labels, seq_len, test_done = dataLoader.get_test_minibatch(
                )
                out, _ = model(te_batch)
                model.detach_hidden()
                loss = criterion(out.permute(0, 2, 1), te_labels.t())
                epoch_loss += loss.item()
            cur_loss = epoch_loss / n_batch

            print(
                '| epoch {:3d} | lr {:05.5f} | test loss {:5.2f} | perplexity {:5.2f} |'
                .format(e, optimizer.param_groups[0]['lr'], cur_loss,
                        np.exp(cur_loss)))

    print(
        f'Training is done. Best validation loss: {best_validation_loss}, validation perplexity: {np.exp(best_validation_loss)}'
    )