Пример #1
0
    def run(self):
        starttime = time.time()
        rootName = (self.rootDir)

        if os.path.exists(rootName):
            root = loadModel(rootName)
        else:
            dictName = self.dictDir
            word_freq = loadWords(dictName)
            root = TrieNode('*', word_freq)
            saveModel(root, rootName)

        # 加载新的文章
        fileName = self.demoDir
        data = self.loadData(fileName, self.stopwords)
        # 将新的文章插入到Root中
        self.loadData2Root(root, data)

        # 定义取TOP5个
        N = 5
        result, add_word = root.wordFind(N)
        # 如果想要调试和选择其他的阈值,可以print result来调整
        print("\n----\n", '增加了 %d 个新词, 词语和得分分别为: \n' % len(add_word))
        print('#############################')
        for word, score in add_word.items():
            print(word + ' ---->  ', score)
        print('#############################\n')

        for word, score in add_word.items():
            jieba.add_word(word)

        print("互信息、信息熵:")
        print("".join([(x + '/ ') for x in jieba.cut(self.test_text, cut_all=False) if x not in self.stopwords]))
        endtime = time.time()
        print('time cost:' + str(round((endtime - starttime), 4)) + ' seconds.\n')
Пример #2
0
def runNetwork(model, batch_size, epochs):
"""
Compiles and runs the network with ImageDataGenerator.

Args:
    model: A neural network model structure
    batch_size: no. of training examples per iteration
    epochs: no. of iterations the neurons update their weights
    
Returns:
    printed results of the model's running accuracy
"""
        x_train, y_train, x_test, y_test = prepareData()

        # Compile the network
        model.compile(loss="categorical_crossentropy",  # use the loss function to take into note the probabilities of each category
                optimizer="rmsprop",                    
                metrics=["accuracy"])                   # use accuracy as a metric of progress during training

        # Training
        model.fit_generator(createGenerator().flow(x_train, y_train, batch_size=batch_size),
                            steps_per_epoch=x_train.shape[0] // batch_size, epochs=epochs, verbose=1,
                            validation_data=(x_test, y_test))
        
        # Evaluate performance. If training accuracy >> evaluation accuracy, overfitting is present.
        test_loss, test_acc = model.evaluate(x_test, y_test)
        print("Accuracy: ", str(test_acc))
        utils.saveModel(model)
Пример #3
0
def trainingLoopAE(obj, model, num_epoch, train_dataloader, criterion,
                   optimizer, loss_dict, path, device):
    """
    A universal training loop to optimize any loss function

    Args:
    
    obj       : Object on which model is to be trained
    model     : Architecture of the neural network
    num_epoch : number of iterations
    criterion : loss function which we need to optimize
    train_dataloader : Training Dataset
    optimizer : optimize the loss using this optimizer
    loss_dict : A dictionary to keep track of loss
    path      : location where to store model
    device    : GPU or CPU device
    """

    # Check if path to directory exist. If no: then create one
    if not os.path.exists(path):
        os.makedirs(path)

    curr_epoch = currEpoch(path)
    epochs_left = num_epoch - curr_epoch

    # Load Model
    if curr_epoch is not -1:
        model.load_state_dict(torch.load(path + '/' + str(curr_epoch)))
        # print(model)
        model.eval()

    for epoch in range(epochs_left):  # loop over the dataset multiple times
        running_loss = 0.0

        for data in train_dataloader:
            data = data.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(data)
            loss = criterion(outputs, data)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

        loss_dict[model.name].append(running_loss)

        if epoch % 50 == 0:  # print every 50 mini-batches
            plotLoss(loss_dict, model.name)
            plotPointCloud(obj, model)
            saveModel(path, model, epoch)
            print(epoch)
Пример #4
0
    def train(self, epoch=5):
        self.iteration = 0
        best_dev_loss = self.eval()

        for ep in range(epoch):
            epoch_tic = time.time()
            for batch_idx, (data, target, imageName) in enumerate(self.loader):
                self.model.train()  # set training mode
                self.optimizer.zero_grad()
                #self.model.zero_grad()
                self.model.hidden = self.model.init_hidden(data[0].shape[0])

                if self.device != None:
                    data = (data[0].cuda(self.device),
                            data[1].cuda(self.device),
                            data[2].cuda(self.device))
                    target = target.cuda(self.device)
                    self.model.hidden = (self.model.hidden[0].cuda(
                        self.device), self.model.hidden[1].cuda(self.device))

                # forward pass
                output = self.model(data)
                loss = self.loss_fn(torch.squeeze(output), target.float())

                # backward pass
                loss.backward()

                # weight update
                self.optimizer.step()

                if self.iteration % self.log_interval == 0:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                          format(ep, batch_idx * len(data),
                                 len(self.loader.dataset),
                                 100. * batch_idx / len(self.loader),
                                 loss.item()))
                    self.train_logger.writeLoss(self.iteration, loss.item())

                if self.iteration != 0 and self.iteration % self.eval_interval == 0:
                    dev_loss = self.eval()
                    if dev_loss < best_dev_loss:
                        best_dev_loss = dev_loss
                        saveModel('%s/hp-best.pth' % self.exp_path, self.model,
                                  self.optimizer)
                #if self.iteration % self.save_interval == 0:
                #    self.saveModel()

                self.iteration += 1
            epoch_toc = time.time()
            print('End of epoch %i. Seconds took: %.2f s.' %
                  (ep, epoch_toc - epoch_tic))
def train(model,
          x_train,
          y_train,
          x_test,
          y_test,
          loss_op,
          optimization,
          epochs,
          model_save_path='',
          model_name='model'):
    train_loss, train_accuracy, test_loss, test_accuracy = getTrainAndTestMetrics(
    )
    train_summary_writer, test_summary_writer = trainLogging.getTrainAndTestSummaryWriters(
    )
    n_batches = len(x_train)

    best_accuracy = 0

    for epoch in range(epochs):
        n_batch = 0
        for x, y in zip(x_train, y_train):
            n_batch += 1

            trainLogging.printTrainingBatchProgress(epoch + 1, epochs, n_batch,
                                                    n_batches, train_loss,
                                                    train_accuracy)
            trainStep(model, x, y, loss_op, optimization, train_loss,
                      train_accuracy)

        testStep(model, x_test, y_test, loss_op, test_loss, test_accuracy)
        trainLogging.printTrainingEpochProgress(epoch + 1, epochs, n_batch,
                                                n_batches, train_loss,
                                                train_accuracy, test_loss,
                                                test_accuracy)

        if test_accuracy.result() > best_accuracy:
            best_accuracy = test_accuracy.result()
            utils.saveModel(model, model_save_path, model_name)
            print(
                'Saved new best model with accuracy: {}'.format(best_accuracy))

        # Reset the metrics for the next epoch
        train_loss, train_accuracy, test_loss, test_accuracy = resetMetrics(
            train_loss, train_accuracy, test_loss, test_accuracy)

    return model
Пример #6
0
    def train(self, epoch=5):
        self.model.train()  # set training mode
        self.iteration = 0
        best_dev_loss = self.eval()
        for ep in range(epoch):
            epoch_tic = time.time()
            for batch_idx, (data, target) in enumerate(self.loader):
                if self.device != None:
                    data, target = data.cuda(self.device), target.cuda(
                        self.device)
                self.optimizer.zero_grad()

                # forward pass
                output = self.model(data)
                output = func.log_softmax(output, dim=1)
                loss = func.nll_loss(output, target)

                # backward pass
                loss.backward()

                # weight update
                self.optimizer.step()

                if self.iteration % self.log_interval == 0:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                          format(ep, batch_idx * len(data),
                                 len(self.loader.dataset),
                                 100. * batch_idx / len(self.loader),
                                 loss.item()))
                    self.train_logger.writeLoss(self.iteration, loss.item())

                if self.iteration != 0 and self.iteration % self.eval_interval == 0:
                    dev_loss = self.eval()
                    if dev_loss < best_dev_loss:
                        best_dev_loss = dev_loss
                        saveModel('%s/lm-best.pth' % self.exp_path, self.model,
                                  self.optimizer)
                #if self.iteration % self.save_interval == 0:
                #    self.saveModel()

                self.iteration += 1
            epoch_toc = time.time()
            print('End of epoch %i. Seconds took: %.2f s.' %
                  (ep, epoch_toc - epoch_tic))
Пример #7
0
def runNetwork(model, batch_size, epochs):
    x_train, y_train, x_test, y_test = prepareData()
    # Compile the network
    model.compile(
        loss=
        "categorical_crossentropy",  # use the loss function to take into note the probabilities of each category
        optimizer="rmsprop",
        metrics=["accuracy"
                 ])  # use accuracy as a metric of progress during training

    # Training
    model.fit_generator(createGenerator().flow(x_train,
                                               y_train,
                                               batch_size=batch_size),
                        steps_per_epoch=x_train.shape[0] // batch_size,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_test, y_test))

    test_loss, test_acc = model.evaluate(x_test, y_test)
    print("Accuracy: ", str(test_acc))
    utils.saveModel(model)
Пример #8
0
        r2 = sklearn.metrics.r2_score(x1.cpu().data, y1.cpu().data)
        print('        Test R² = %.2f' % (r2), '\n')

    avg_time_per_epoch += (timer() - start)

    # Early stopping
    if arg.patience > 0:  # When 0 or smaller, run until end of epochs
        if test_score > prev_test_score:
            early_stop_count += 1
            if early_stop_count == arg.patience:
                print('Early stopping condition reached')
                last_epoch = epoch + 1
                break
        else:
            early_stop_count = 0
            saveModel(model, save_path)
            prev_test_score = test_score
            best_epoch = epoch + 1

avg_time_per_epoch = avg_time_per_epoch / last_epoch
print('Average time per epoch: %.2fs\n' % avg_time_per_epoch)

# ---- REVERT TO BEST MODEL ----
if arg.patience > 0:
    print('Load best known configuration (epoch %d)\n' % best_epoch)
    model.load_state_dict(torch.load(
        '%s/PoNDeR.pth' % (save_path)))  # Load best known configuration

# ---- PLOTTING ----
print('Running final eval on test set...')
test_score, x1, y1 = evaluateModel(model,
Пример #9
0
def train(model,
          old_model,
          epoch,
          lr,
          tempature,
          lamda,
          train_loader,
          test_loader,
          modelPath,
          checkPoint,
          useCuda=True,
          adjustLR=False,
          earlyStop=False,
          tolearnce=4):

    tolerance_cnt = 0
    step = 0
    best_acc = 0

    if useCuda:
        model = model.cuda()
        old_model = old_model.cuda()

    ceriation = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=5e-4)

    # train
    for epoch_index in range(1, epoch + 1):

        sum_loss = 0
        sum_dist_loss = 0

        model.train()
        old_model.eval()
        old_model.freeze_weight()

        if adjustLR:  # use LR adjustment
            cifar100_adjust_lr(optimizer, lr, epoch_index)

        for batch_idx, (x, target) in enumerate(train_loader):

            optimizer.zero_grad()

            if useCuda:  # use GPU
                x, target = x.cuda(), target.cuda()

            x, target = Variable(x), Variable(target)
            logits = model(x)
            cls_loss = ceriation(logits, target)

            dist_loss = Variable(torch.zeros(1).cuda())
            if i > 1:  # distill loss is not used in first class batch
                dist_target = old_model(x)
                logits_dist = logits[:, :-CLASS_NUM_IN_BATCH]
                dist_loss = MultiClassCrossEntropy(logits_dist, dist_target,
                                                   tempature)
                loss = cls_loss + lamda * dist_loss
            else:
                loss = cls_loss

            sum_loss += loss.data[0]
            sum_dist_loss += dist_loss.data[0]

            loss.backward()
            optimizer.step()

            step += 1

            if (batch_idx + 1) % checkPoint == 0 or (batch_idx +
                                                     1) == len(trainLoader):
                print(
                    '==>>> epoch: {}, batch index: {}, step: {}, train loss: {:.6f}, dist loss:{:.6f}'
                    .format(epoch_index, batch_idx + 1, step,
                            sum_loss / (batch_idx + 1),
                            sum_dist_loss / (batch_idx + 1)))

        acc = inference(net, test_loader, useCuda=True, k=1)

        # early stopping
        if earlyStop:
            if acc < best_acc:
                tolerance_cnt += 1
            else:
                best_acc = acc
                tolerance_cnt = 0
                saveModel(model, epoch_index, best_acc, modelPath)

            if tolerance_cnt >= tolearnce:
                print("early stopping training....")
                saveModel(model, epoch_index, best_acc, modelPath)
                return model
        else:
            if best_acc < acc:
                saveModel(model, epoch_index, acc, modelPath)
                best_acc = acc

    print("best acc:", best_acc)
Пример #10
0
def trainingLoopGAN(obj,
                    training_generator,
                    generator,
                    discriminator,
                    model_name,
                    num_epoch,
                    optimizer_g,
                    optimizer_d,
                    loss_dict,
                    path,
                    device,
                    ae=None,
                    mu=0,
                    sigma=0.2,
                    discriminator_boost=5,
                    lambda_gp=10):
    """
    A universal training loop to optimize any loss function

    Args:

    obj       : Object on which model is to be trained
    training_generator  : Training Set Generator
    generator           : Architecture of the generator
    discriminator       : Architecture of the generator
    num_epoch           : number of iterations
    optimizer_g         : optimize the loss for generator using this optimizer
    optimizer_d         : optimize the loss for discriminator using this optimizer
    loss_dict           : A dictionary to keep track of loss
    path                : location where to store model
    device              : GPU or CPU device
    ae                  : Autoencoder Model
    mu, sigma           : Mean and Standard Deviation for Normal Distribution 
    discriminator_boost : For every training iteration of generator train the 
                            critic this many times
    lambda_gp           : Regularizing factor for Gradient Penalty
    """

    # Check if path to directory exist. If no: then create one
    if not os.path.exists(path):
        os.makedirs(path)

    for epoch in range(num_epoch):  # loop over the dataset multiple times
        running_loss_g = 0.0
        running_loss_d = 0.0

        for data in training_generator:
            data = data.to(device)
            if ae is not None:
                data = ae(data)
            for _ in range(discriminator_boost):
                # zero the parameter gradients
                optimizer_d.zero_grad()

                # forward + backward + optimize
                noise = noiseFunc(mu, sigma, data.shape[0], device)
                outputs = generator(noise)
                loss_d_fake = discriminator(outputs).mean()
                loss_d_fake.backward()

                loss_d_real = discriminator(data).mean()
                loss_d = loss_d_real + loss_d_fake

                # Gradient Penalty for Latent GAN
                if 'Latent' in generator.name:
                    grad_penal = compute_gradient_penalty(
                        discriminator, data, outputs, device)
                    loss_d = loss_d + lambda_gp * grad_penal
                optimizer_d.step()
                running_loss_d += loss_d

            optimizer_g.zero_grad()
            noise = torch.randn((50, 128)).to(device)
            outputs = generator(noise)
            loss_g = discriminator(outputs).mean()
            loss_g.backward()
            optimizer_g.step()
            running_loss_g += loss_g

        loss_dict[generator.name].append(running_loss_g)
        loss_dict[discriminator.name].append(running_loss_d)

        if epoch % 50 == 0:  # print every 10 mini-batches
            plotLoss(loss_dict, model_name)
            plotPointCloud(obj, generator)
            saveModel(path + 'Gen ', generator, epoch)
            saveModel(path + 'Dis ', discriminator, epoch)
            print(epoch)
Пример #11
0
Файл: LM.py Проект: lovygit/LMRC
def train(model, head_index, epoch, lr, output_dim, train_loader, test_loader, label_dict,
          modelPath, checkPoint, useCuda=True, adjustLR=False, earlyStop=False, tolearnce=4):

    tolerance_cnt = 0
    step = 0
    best_acc, best_new_acc, best_old_acc = 0, 0, 0

    old_model = copy.deepcopy(model)  # copy the old model

    if useCuda:
        model = model.cuda()
        old_model = old_model.cuda()

    ceriation = CosineLoss(output_dim)
    # optimizer = optim.Adam(net.parameters(), lr=lr)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

    # train
    for epoch_index in range(1, epoch+1):

        sum_loss = 0

        model.train()
        old_model.eval()
        # old_model.freeze_weight()

        if adjustLR:  # use LR adjustment
            ImgaeNet200_adjust_lr(optimizer, lr, epoch_index)

        for batch_idx, (x, target) in enumerate(train_loader):

            optimizer.zero_grad()

            #  get label vector
            y_vec_train_np = labels2Vec(target, label_dict, output_dim)
            y_vec_train = torch.from_numpy(y_vec_train_np)

            if useCuda:  # use GPU
                x, y_vec_train = x.cuda(), y_vec_train.cuda()
            x, y_vec_train = Variable(x), Variable(y_vec_train)

            # only calculate new loss
            out = model(x, head_index)
            loss = ceriation(out, y_vec_train)
            sum_loss += loss.data[0]

            loss.backward()
            optimizer.step()

            step += 1

            if (batch_idx + 1) % checkPoint == 0 or (batch_idx + 1) == len(trainLoader):
                print('==>>> epoch: {}, batch index: {}, step: {}, train loss: {:.6f},'.
                      format(epoch_index, batch_idx + 1, step, sum_loss/(batch_idx+1),))

        acc = inference(model, test_loader, label_dict, useCuda=True, k=5)

        # observe old and new class acc
        new_testLoader, new_test_classes = load_ImageNet200_online([test_root],
                                                                category_indexs=class_index[i:i + CLASS_NUM_IN_BATCH],
                                                                batchSize=batch_size, train=False)
        print("new test classes")
        new_acc = inference(model, new_testLoader, label_dict, useCuda=True, k=5)

        old_acc = 0
        if i != 0:
            old_testLoader, old_test_classes = load_ImageNet200_online([test_root],
                                                                    category_indexs=class_index[:i],
                                                                    batchSize=batch_size, train=False)
            print("old test classes")
            old_acc = inference(model, old_testLoader, label_dict, useCuda=True, k=5)

        # early stopping
        if earlyStop:
            if acc < best_acc:
                tolerance_cnt += 1
            else:
                best_acc = acc
                tolerance_cnt = 0
                saveModel(model, epoch_index, best_acc, modelPath)

            if tolerance_cnt >= tolearnce:
                print("early stopping training....")
                saveModel(model, epoch_index, best_acc, modelPath)
                return model
        else:
            if best_acc < acc:
                saveModel(model, epoch_index, acc, modelPath)
                best_acc = acc
                best_new_acc = new_acc
                best_old_acc = old_acc

    print("best acc:", best_acc)
    print("best new acc:", best_new_acc)
    print("best old acc:", best_old_acc)
Пример #12
0
def train(model,
          head_index,
          lamda,
          epoch,
          lr,
          train_loader,
          test_loader,
          T,
          modelPath,
          checkPoint,
          useCuda=True,
          adjustLR=False,
          earlyStop=False,
          tolearnce=4):

    tolerance_cnt = 0
    step = 0
    best_acc = 0

    old_model = copy.deepcopy(model)  # copy old model

    if useCuda:
        model = model.cuda()
        old_model = old_model.cuda()

    ceriation = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=lr)
    # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)

    # train
    for epoch_index in range(1, epoch + 1):

        sum_loss = 0
        old_sum_loss = 0
        new_sum_loss = 0

        model.train()
        old_model.eval()
        old_model.freeze_weight()

        if adjustLR:  # use LR adjustment
            ImgaeNet200_adjust_lr(optimizer, lr, epoch_index)

        for batch_idx, (x, target) in enumerate(train_loader):

            optimizer.zero_grad()

            if useCuda:  # use GPU
                x, target = x.cuda(), target.cuda()
            test_x = Variable(x, volatile=True)
            x, target = Variable(x), Variable(target)

            # get response from old heads
            old_outputs = []
            old_mid_out = old_model.get_middle_output(test_x)
            for head_idx in range(len(model.head_list) - 1):
                old_output = old_model.get_output(old_mid_out, head_idx)
                old_output = old_output.cpu().data.numpy()
                old_output = Variable(torch.from_numpy(old_output).cuda())
                old_outputs.append(old_output)

            # distilling loss
            old_loss = Variable(torch.zeros(1).cuda())
            new_mid_out = model.get_middle_output(x)
            for idx in range(len(old_outputs)):
                out = model.get_output(new_mid_out, idx)
                old_loss += MultiClassCrossEntropy(out, old_outputs[idx], T=T)

            # calculate new loss
            out = model.get_output(new_mid_out, head_index)
            target -= CLASS_NUM_IN_BATCH * head_index  # transform the class labels
            new_loss = ceriation(out, target)

            loss = new_loss + lamda * old_loss

            sum_loss += loss.data[0]
            old_sum_loss += old_loss.data[0]
            new_sum_loss += new_loss.data[0]

            loss.backward()
            optimizer.step()

            step += 1

            if (batch_idx + 1) % checkPoint == 0 or (batch_idx +
                                                     1) == len(trainLoader):
                print(
                    '==>>> epoch: {}, batch index: {}, step: {}, train loss: {:.6f},'
                    ' new loss: {:.6f}, old loss: {:.6f}'.format(
                        epoch_index, batch_idx + 1, step,
                        sum_loss / (batch_idx + 1),
                        new_sum_loss / (batch_idx + 1),
                        old_sum_loss / (batch_idx + 1)))

        acc = inference(model, test_loader, useCuda=True, k=5)

        #  observe new and old classes acc
        new_testLoader, new_test_classes = load_ImageNet200_online(
            [test_root],
            category_indexs=class_index[i:i + CLASS_NUM_IN_BATCH],
            batchSize=batch_size,
            train=False)
        print("new test classes")
        new_acc = inference(model, new_testLoader, useCuda=True, k=5)
        if i != 0:
            old_testLoader, old_test_classes = load_ImageNet200_online(
                [test_root],
                category_indexs=class_index[:i],
                batchSize=batch_size,
                train=False)
            print("old test classes")
            old_acc = inference(model, old_testLoader, useCuda=True, k=5)

        # early stopping
        if earlyStop:
            if acc < best_acc:
                tolerance_cnt += 1
            else:
                best_acc = acc
                tolerance_cnt = 0
                saveModel(model, epoch_index, best_acc, modelPath)

            if tolerance_cnt >= tolearnce:
                print("early stopping training....")
                saveModel(model, epoch_index, best_acc, modelPath)
                return model
        else:
            if best_acc < acc:
                saveModel(model, epoch_index, acc, modelPath)
                best_acc = acc

    print("best acc:", best_acc)
Пример #13
0
def train(model,
          batchSize,
          epoch,
          checkPoint,
          savePoint,
          modelPath,
          curEpoch=0,
          best_acc=0,
          useCuda=True,
          adjustLR=True,
          earlyStop=True,
          tolearnce=4):

    tolerance_cnt = 0
    step = 0

    if useCuda:
        model = model.cuda()

    ceriation = nn.CrossEntropyLoss()
    # optimizer = optim.Adam(net.parameters(), lr=args.lr)
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=0.9,
                          weight_decay=5e-4)
    trainLoader, testLoader = loadCIFAR10(batchSize=batchSize)

    for i in range(curEpoch, curEpoch + epoch):

        model.train()

        # trainning
        sum_loss = 0

        for batch_idx, (x, target) in enumerate(trainLoader):

            optimizer.zero_grad()
            if adjustLR:
                adjust_lr(optimizer, epoch)

            if useCuda:
                x, target = x.cuda(), target.cuda()

            x, target = Variable(x), Variable(target)
            out = model(x)

            loss = ceriation(out, target)
            sum_loss += loss.item()

            loss.backward()
            optimizer.step()

            step += 1

            if (batch_idx + 1) % checkPoint == 0 or (batch_idx +
                                                     1) == len(trainLoader):
                print(
                    '==>>> epoch: {}, batch index: {}, step: {}, train loss: {:.6f}'
                    .format(i, batch_idx + 1, step,
                            sum_loss / (batch_idx + 1)))

            # save model every savepoint steps
            # if (step + 1) % savePoint == 0:
            #     saveModel(model, i, best_acc, modelPath)
            #     print("----------save finish----------------")

        acc = test(net, testLoader, useCuda=True)

        # early stopping
        if earlyStop:
            if acc < best_acc:
                tolerance_cnt += 1
            else:
                best_acc = acc
                tolerance_cnt = 0
                saveModel(model, epoch, best_acc, modelPath)

            if tolerance_cnt >= tolearnce:
                print("early stopping training....")
                saveModel(model, epoch, best_acc, modelPath)
                return
        else:
            if best_acc < acc:
                saveModel(model, epoch, acc, modelPath)
                best_acc = acc
Пример #14
0
import preprocess
from config import config
import modelCNN
from utils import saveModel

if __name__ == '__main__':
    raw_data = preprocess.readData(config)
    sms_text, sms_label = preprocess.cleanData(raw_data)
    x_train, y_train, x_val, y_val = preprocess.categorical(
        sms_text, sms_label, config)
    embedding_layer = preprocess.train_dic(sms_text, config)
    clf = modelCNN.trainCNN(x_train, y_train, x_val, y_val, embedding_layer)
    saveModel(clf, "CNN", config)
Пример #15
0
import preprocess
from config import config
import models
from utils import saveModel, logging

if __name__ == '__main__':
    raw_data = preprocess.readData(config)
    sms_text, sms_label = preprocess.cleanData(raw_data)

    x_train, y_train, x_test, y_test = preprocess.vectorize(
        sms_text, sms_label, config)

    clf = models.trainNB(x_train, y_train, x_test, y_test)
    saveModel(clf, "贝叶斯", config)
    clf = models.trainTree(x_train, y_train, x_test, y_test)
    saveModel(clf, "决策树", config)
    clf = models.trainRandomForest(x_train, y_train, x_test, y_test)
    saveModel(clf, "随机森林", config)
    clf = models.trainLinearSVC(x_train, y_train, x_test, y_test)
    saveModel(clf, "线性SVC", config)
    clf = models.trainSGDSVM(x_train, y_train, x_test, y_test)
    saveModel(clf, "SGDSVM", config)
    clf = models.trainSGDLog(x_train, y_train, x_test, y_test)
    saveModel(clf, "SDD逻辑回归", config)
Пример #16
0
def train(epoch):
    generator_loss = []
    discriminator_loss = []
    discriminator_acc = []

    for i, (imgs, labels) in enumerate(dataloader, 1):
        batch_size = imgs.shape[0]

        # Adversarial ground truths
        valid = FloatTensor(batch_size, 1).fill_(1.0)
        fake = FloatTensor(batch_size, 1).fill_(0.0)

        # Configure input
        real_imgs = imgs.type(FloatTensor)
        labels = labels.type(FloatTensor)
        # print("Label.shape: \t{}".format(labels.shape))

        # --------------------- #
        #  Train Generator      #
        # --------------------- #
        optimizer_G.zero_grad()

        # Sample noise and labels as generator input
        z = FloatTensor(
            np.random.normal(0, 1, size=(batch_size, opt.latent_dim)))
        gen_labels = FloatTensor(np.random.randint(0, 2, size=(batch_size, 1)))
        gen_labels = torch.cat((1 - gen_labels, gen_labels), dim=1)

        # Generate a batch of images
        gen_imgs = generator(z, gen_labels)

        # Loss measures generator's ability to fool the discriminator
        validity, pred_label = discriminator(gen_imgs)
        g_loss = adversarial_loss(validity, valid) + auxiliary_loss(
            pred_label, gen_labels)

        # Comment:
        # Adversarial loss:
        #   Measure the loss where caused by generated images
        #   (because the G(z, class) seems too fake)
        #   Need to minimize the loss of G(z, class)
        # Auxiliary loss:
        #   Measure P(class | G(z, class))
        #   Want to maximize Prob(), that is, minimized the difference between D(G(z, class)) and class

        g_loss.backward()
        optimizer_G.step()

        generator_loss.append(g_loss.item())

        # --------------------- #
        #  Train Discriminator  #
        # --------------------- #
        optimizer_D.zero_grad()

        # Loss for real images
        # real_pred, real_aux = discriminator(real_imgs)
        # d_real_loss = (adversarial_loss(real_pred, valid) + auxiliary_loss(real_aux, labels))

        # Loss for fake images
        # fake_pred, fake_aux = discriminator(gen_imgs.detach())
        # d_fake_loss = (adversarial_loss(fake_pred, fake) + auxiliary_loss(fake_aux, gen_labels))

        # Loss for Adversarial loss
        real_adv, real_aux = discriminator(real_imgs)
        fake_adv, fake_aux = discriminator(gen_imgs.detach())

        d_loss_adv = adversarial_loss(real_adv, valid) + adversarial_loss(
            fake_adv, fake)
        d_loss_aux = auxiliary_loss(real_aux, labels)

        # Total discriminator loss
        # d_loss = 0.5 * (d_real_loss + d_fake_loss)
        d_loss = d_loss_adv + d_loss_aux

        # Comment:
        # Adversarial loss:
        #   Measure the loss where caused by difference sources
        #   Classify G(z, class) as fake and X as real.
        # Auxiliary loss:
        #   Measure P(class | X)
        #   Want to maximize Prob(), that is, minimized the difference between D(X) and class

        # Calculate discriminator class accuracy
        # pred = np.concatenate([real_aux.data.cpu().numpy(), fake_aux.data.cpu().numpy()], axis=0)
        # gt = np.concatenate([labels.data.cpu().numpy(), gen_labels.data.cpu().numpy()], axis=0)
        # d_acc = np.mean(np.argmax(pred, axis=1) == gt)
        cls_pred = real_aux.data.cpu().numpy()
        gt = labels.data.cpu().numpy()
        d_acc_real = np.mean(
            np.argmax(cls_pred, axis=1) == np.argmax(gt, axis=1))

        cls_pred = fake_aux.data.cpu().numpy()
        gt = gen_labels.data.cpu().numpy()
        d_acc_fake = np.mean(
            np.argmax(cls_pred, axis=1) == np.argmax(gt, axis=1))

        d_loss.backward()
        optimizer_D.step()

        discriminator_loss.append(d_loss.item())
        discriminator_acc.append((d_acc_real + d_acc_fake) / 2)

        batches_done = (epoch - 1) * len(dataloader) + i

        # 1. Logging
        if batches_done % opt.log_interval == 0:
            print(
                "[Epoch %d] [Batch %d/%d] [D loss: %f, accR: %d%%, accF: %d%%] [G loss: %f]"
                % (epoch, i, len(dataloader), d_loss.item(), 100 * d_acc_real,
                   100 * d_acc_fake, g_loss.item()))

        # 2. Sampling
        if batches_done % opt.sample_interval == 0:
            number = batches_done // opt.sample_interval
            sample_image(number)

        # 3. Saving
        if batches_done % opt.save_interval == 0:
            number = batches_done // opt.save_interval

            savepath = "./models/acgan/{}-{}".format(opt.tag, feature)
            utils.saveModel(
                os.path.join(savepath, "generator_{}.pth".format(number)),
                generator)

            print("Model saved to: {}, iteration: {}".format(savepath, number))

    return generator_loss, discriminator_loss, discriminator_acc
Пример #17
0
from Models.bilstm import  BiLSTM
from torch.optim import Adamax

import torch
import torch.nn.functional as F
trainWordLists,trainTagLists,word2id,tag2id=utils.create('train.txt',make_vocab=True)
devWordLists,devTagList=utils.create('dev.txt',make_vocab=False)
#隐马尔科夫模型训练
print('HMM************************')
if os.path.exists('ckpts/hmm.pkl'):
    hmm=utils.loadModel('ckpts/hmm.pkl')
    predictTags = hmm.test(devWordLists, word2id, tag2id)
else:
    hmm=HMM(len(tag2id),len(word2id))
    hmm.train(trainWordLists,trainTagLists,tag2id,word2id)
    utils.saveModel('ckpts/hmm.pkl',hmm)
    predictTags=hmm.test(devWordLists,word2id,tag2id)
accuracy=metric.accuracy(predictTags,devTagList)
print('accuracy: ',accuracy)
print('CRF****************************')
# #条件随机序列场模型训练
if os.path.exists('ckpts/crf.pkl'):
    crf=utils.loadModel('ckpts/crf.pkl')
    print(crf)
    predictTags=crf.test(devWordLists)
else:
    crf=CRFModel()
    crf.train(trainWordLists,trainTagLists)
    utils.saveModel('ckpts/crf.pkl',crf)
    predictTags=crf.test(devWordLists)
accuracy=metric.accuracy(predictTags,devTagList)
Пример #18
0
        if (it + 1) % 1000 == 0:
            save_path = saver.save(
                sess, '%s/Epoch_(%d)_(%dof%d).ckpt' %
                (ckpt_dir, epoch, it_epoch, batch_epoch))
            print('Model saved in file: % s' % save_path)

        # sample
        if (it + 1) % 100 == 0:

            f_sample_opt = sess.run(f_sample, feed_dict={z: z_ipt_sample})
            f_sample_opt = (f_sample_opt + 1) * 0.5
            f_sample_opt = np.round(f_sample_opt, decimals=0)

            save_dir = './sample_images_while_training/3dganFix'
            utils.mkdir(save_dir + '/')
            utils.saveModel(f_sample_opt, save_dir, sample_batch_size, it)

#        # sample
#        if (it + 1) % 1 == 0:
##            np.set_printoptions(threshold=np.inf)
##            f_sample_opt = sess.run(real, feed_dict={real: real_ipt})
##            print(f_sample_opt.shape)
##            f_sample_opt = (f_sample_opt+1)*0.5
##            f_sample_opt = np.round(f_sample_opt, decimals=0)
#            output_data = np.load("3dDataIntFake.npy")
##            np.savetxt("./data.txt",output_data[0,:,:,:,0])
#            print("play1")
#            save_dir = './sample_images_while_training/3dganFakeSlim1'
#            utils.mkdir(save_dir + '/')
#            utils.saveModel(output_data,save_dir,100,it)
#            print("play2")
Пример #19
0
import utils


def saveModel(data, save_dir, sample_batch_size, it):
    file_path = "./1a0c94a2e3e67e4a2e4877b52b3fca7.binvox"
    with open(file_path, 'rb') as f:
        model = binvox_rw.read_as_3d_array(f)
        divide = 4
        model.dims = [
            model.dims[0] // divide, model.dims[1] // divide,
            model.dims[2] // divide
        ]
        model.scale = model.scale / divide
        data = data.astype("bool")
        data = np.reshape(data, [sample_batch_size] + model.dims)
        #        print(data[0])
        for i in range(sample_batch_size):
            filename = str(it) + '_' + str(i) + '.binvox'
            print(data[i].shape)

            model.data = data[i]
            print(model.data.shape)
            with open(save_dir + '/' + filename, mode='w') as f_test:
                model.write(f_test)


data = np.load("3dDataInt.npy")
save_dir = './data_view'
utils.mkdir(save_dir + '/')
utils.saveModel(data, save_dir, 100, 1)
Пример #20
0
            #                                                 QNet_moving_input:minibatch_s_next,
            #                                                 QNet_moving_a_input:minibatch_a})

            #train_writer.add_summary(summary, step_tot)


            # 5. Overwite 'target' with 'moving' Q-network
            if(step_tot % Qnetwork_update_frequency == 0):
                merge = tf.summary.merge_all()
                summary = sess.run(merge, feed_dict={QNet_moving_target_term:target_term,
                                                             QNet_moving_input:minibatch_s_next,
                                                             QNet_moving_a_input:minibatch_a})
                train_writer.add_summary(summary, step_tot)
                updateTargetNetwork(QNet_target_update_op_list, sess)              


            S = s_next


            if done:
                print("ep {:d} | steps {:d} | ep_r {:.0f} | exp {:.2f}".format(ep,step_tot,ep_r,random_action_chance))
                break

        # Save
        if(ep % save_ep_interval == 0):
            saveModel(ep, save_path, tf_saver, sess, save_model)


    # Final save
    saveModel(ep, save_path, tf_saver, sess, save_model)
Пример #21
0
                           ascending=False).to_csv(os.path.join(
                               output_DIR, output_filename),
                                                   index=False,
                                                   header=True,
                                                   sep=';',
                                                   encoding='utf-8')
    df_results.set_index('Model', inplace=True)
    print(df_results)
    print(
        df_results.drop(['Pipeline'],
                        axis=1).sort_values(by=['F1score'],
                                            ascending=False).iloc[:, 7:])
    print(
        df_results.drop(['Pipeline'],
                        axis=1).sort_values(by=['eval_F1score'],
                                            ascending=False).iloc[:, :7])

    # --------------------------#
    #     SAVE Best MODEL       #
    # --------------------------#
    # -- Just run the script with the best model obtained (comment unwanted ones)
    Makedir(model_DIR)
    print(f'[BEST MODEL] {Best_model_name}  pipeline saved as best model')
    print(f'\n {Best_model}')
    saveModel(
        Best_model,
        os.path.join(
            model_DIR,
            Best_model_name.replace(' ', '').replace('+', '-') + '_' +
            saved_model_suffix_filename))
Пример #22
0
    logger.info(f"Loaded {savedModel}")

logger.info("Model defined")

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, net.parameters()), 1e-3)
#Defining Learning Rate scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

print("Reached here")

#### TRAINING PROCESS

net = freezeLayers(net)
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, net.parameters()), 1e-3)

net, val_acc = trainModel(model=net,
                          traindl=trainLoader,
                          criterion=criterion,
                          optimizer=optimizer,
                          scheduler=exp_lr_scheduler,
                          logger=logger,
                          num_epochs=5,
                          validdl=validLoader,
                          test_valsubset=False)

saveModel(net, modelFolder, modelNo, 5, val_acc)
Пример #23
0

def printTopics(model):
    predicted_topics = model.print_topics(num_topics=5, num_words=5)
    for i, topics in predicted_topics:
        print('Words in Topic {}:\n {}'.format(i + 1, topics))


if __name__ == '__main__':
    arguments = parseArgs()
    dataset = utils.loadDataset(arguments.reprocessDataset,
                                classification=False,
                                splitWords=True)

    # Creating dictionary from dataset, where each unique term is assigned an index
    dictionary = corpora.Dictionary(dataset)

    # Converting list of documents into Bag of Words using dictionary
    doc_term_matrix = [dictionary.doc2bow(doc) for doc in dataset]

    # Training models on the document term matrix
    modelList = [
        LdaModel(doc_term_matrix, num_topics=10, id2word=dictionary, passes=2),
        LsiModel(doc_term_matrix, num_topics=10, id2word=dictionary)
    ]

    for model in modelList:
        print('Topic Modelling using %s' % utils.getClassName(model))
        printTopics(model)
        utils.saveModel(model)
Пример #24
0
            # loss = sess.run(cost, feed_dict={Z: xs})
            # log = '%s  batch: %10d  cost: %.8e nmi-1: %.8f nmi-2: %.8f nmi-3: %.8f' % (
            #     dt.datetime.now(), i, loss, 0, 0, 0)
            # print(log)

        if not epoch % 1:
            loss, dys = sess.run([cost, Y], feed_dict={Z: X})

            nmi1 = 0
            nmi2 = 0
            nmi3 = 0

            log = '%s  epoch: %10d  cost: %.8e nmi-1: %.8f nmi-2: %.8f nmi-3: %.8f' % (
                dt.datetime.now(), epoch, loss, nmi1, nmi2, nmi3)
            utils.writeLog('../log', name, log)
            print(log)

            # dhs = np.histogram(dys, bins=outdim)[0]

            # print(khs)
            # print(dhs)

        if not epoch % 100:
            utils.saveModel(sess, '../model', name, epoch)

        epoch += 1

        flag = False
        if flag: break
    print("optimization is finished! ")
Пример #25
0
def train(model,
          epoch,
          lr,
          train_loader,
          test_loader,
          modelPath,
          checkPoint,
          useCuda=True,
          adjustLR=False,
          earlyStop=False,
          tolearnce=4):

    tolerance_cnt = 0
    step = 0
    best_acc = 0

    if useCuda:
        model = model.cuda()

    # optimizer = optim.Adam(net.parameters(), lr=lr)
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=5e-4)

    # trainning
    for i in range(1, epoch + 1):

        sum_loss = 0
        model.train()

        # class_weight = get_class_weight(trainLoader, train_classes)
        # class_weight = torch.FloatTensor(class_weight).cuda()

        if adjustLR:  # use LR adjustment
            ImgaeNet200_adjust_lr(optimizer, lr, i)

        for batch_idx, (x, target) in enumerate(train_loader):

            optimizer.zero_grad()

            if useCuda:  # use GPU
                x, target = x.cuda(), target.cuda()

            x, target = Variable(x), Variable(target)
            out = model(x)

            loss = nn.CrossEntropyLoss()(out, target)
            sum_loss += loss.data[0]

            loss.backward()
            optimizer.step()

            step += 1

            if (batch_idx + 1) % checkPoint == 0 or (batch_idx +
                                                     1) == len(trainLoader):
                print(
                    '==>>> epoch: {}, batch index: {}, step: {}, train loss: {:.6f}'
                    .format(i, batch_idx + 1, step,
                            sum_loss / (batch_idx + 1)))

        acc = inference(net, test_loader, useCuda=True, k=5)

        # early stopping
        if earlyStop:
            if acc < best_acc:
                tolerance_cnt += 1
            else:
                best_acc = acc
                tolerance_cnt = 0
                saveModel(model, i, best_acc, modelPath)

            if tolerance_cnt >= tolearnce:
                print("early stopping training....")
                saveModel(model, i, best_acc, modelPath)
                return model
        else:
            if best_acc < acc:
                saveModel(model, i, acc, modelPath)
                best_acc = acc

    print("best acc:", best_acc)
Пример #26
0
 def save(self, outFile):
     cu.saveModel(self, outFile)
        linear_model.LogisticRegression(solver='saga', multi_class='auto'),
        ensemble.RandomForestClassifier(n_estimators=25),
        xgboost.XGBClassifier()
    ]

    for xEncoder, yEncoder in dataEncodersList:
        print('Using {} and {} for encoding xData and yData'.format(
            utils.getClassName(xEncoder), utils.getClassName(yEncoder)))

        # fit the encoders on the dataset
        xEncoder.fit(xData)
        yEncoder.fit(yData)

        print('Encoding and splitting xData, yData')
        xDataEncoded, yDataEncoded = xEncoder.transform(
            xData), yEncoder.transform(yData)
        xTrain, xValid, yTrain, yValid = model_selection.train_test_split(
            xDataEncoded, yDataEncoded)

        for model in modelsList:
            print('Training model:', utils.getClassName(model))
            trainedModel, accuracy = trainModel(model, xTrain, yTrain, xValid,
                                                yValid)
            print('Accuracy:', accuracy)

            if arguments.printMetrics:
                printMetrics(trainedModel, xValid, yValid)

            filePrefix = utils.getClassName(xEncoder) + '_'
            utils.saveModel(trainedModel, filePrefix=filePrefix)
Пример #28
0
def train(model,
          epoch,
          lr,
          train_loader,
          test_loader,
          modelPath,
          checkPoint=10,
          lamda=15,
          useCuda=True,
          adjustLR=False,
          earlyStop=False,
          tolearnce=4):

    tolerance_cnt = 0
    step = 0
    best_acc = 0

    if useCuda:
        model = model.cuda()

    ceriation = nn.CrossEntropyLoss()
    # optimizer = optim.Adam(net.parameters(), lr=lr)
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=5e-4)

    # train
    for epoch_idx in range(1, epoch + 1):

        sum_loss = 0
        sum_ewc_loss = 0
        model.train()

        if adjustLR:  # use LR adjustment
            ImgaeNet200_adjust_lr(optimizer, lr, epoch_idx)

        for batch_idx, (x, target) in enumerate(train_loader):

            optimizer.zero_grad()

            if useCuda:
                x, target = x.cuda(), target.cuda()

            x, target = Variable(x), Variable(target)
            out = model(x)

            objective_loss = ceriation(out, target)

            ewc_loss = model.ewc_loss(lamda, cuda=use_cuda)
            loss = objective_loss + ewc_loss

            sum_ewc_loss += ewc_loss.data[0]
            sum_loss += loss.data[0]

            loss.backward()
            optimizer.step()

            step += 1

            if (batch_idx + 1) % checkPoint == 0 or (batch_idx +
                                                     1) == len(trainLoader):
                print(
                    '==>>> epoch: {}, batch index: {}, step: {}, train loss: {:.6f}, ewc loss: {:.6f}'
                    .format(epoch_idx, batch_idx + 1, step,
                            sum_loss / (batch_idx + 1),
                            sum_ewc_loss / (batch_idx + 1)))

        acc = inference(net, test_loader, useCuda=True, k=5)

        # early stopping
        if earlyStop:
            if acc < best_acc:
                tolerance_cnt += 1
            else:
                best_acc = acc
                tolerance_cnt = 0
                saveModel(model, i, best_acc, modelPath)

            if tolerance_cnt >= tolearnce:
                print("early stopping training....")
                saveModel(model, i, best_acc, modelPath)
                return model
        else:
            if best_acc < acc:
                saveModel(model, epoch_idx, acc, modelPath)
                best_acc = acc

    print("best acc:", best_acc)
Пример #29
0
            root.add(d)
    print('------> 插入成功')


if __name__ == "__main__":
    # 加载停用词
    stopwords = getStopwords()

    rootName = ("data/root.pkl")
    if os.path.exists(rootName):
        root = loadModel(rootName)
    else:
        dictName = 'data/dict.txt'
        word_freq = loadWords(dictName)
        root = TrieNode('*', word_freq)
        saveModel(root, rootName)

    # 加载新的文章
    fileName = 'data/demo.txt'
    data = loadDate(fileName, stopwords)
    # 将新的文章插入到Root中
    loadDate2Root(data)

    # 定义取TOP5个
    N = 5
    result, add_word = root.wordFind(N)
    # 如果想要调试和选择其他的阈值,可以print result来调整
    # print("\n----\n", result)
    print("\n----\n", '增加了 %d 个新词, 词语和得分分别为: \n' % len(add_word))
    print('#############################')
    for word, score in add_word.items():
Пример #30
0
decoderInputs = [attrCodeTensor, childrenCodeTensor]

# Apply decoder on encoder outputs.
childrenCodeActivated = childrenCodec.decode(decoderInputs)
attrsCodeActivated = attributeCodec.decode(decoderInputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
trainer_model = Model(
    [*attributesEncoderInputs, *childrenEncoderInputs, *decoderInputs],
    [childrenCodeActivated, attrsCodeActivated])

# Get inputs.
inputData = get_inputs(modelArgs)

plot_model(trainer_model, to_file="plot.png")

# Run training
trainer_model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
trainer_model.fit([inputData.encoder_input_data, inputData.decoder_input_data],
                  inputData.decoder_target_data,
                  batch_size=modelArgs.batch_size,
                  epochs=modelArgs.epochs,
                  validation_split=0.2)

# Save model
saveModel(trainer_model, 't_s2s.json')
with open("modelArgs.json", "w") as fp:
    json.dump(modelArgs, fp)
Пример #31
0
 def save(self,outFile):
   cu.saveModel(self,outFile)