Пример #1
0
def run(params):
    """Run training

    Args:
        params: Parameters for training

    Returns: None

    """

    logging.info(f'params: {params}')
    strategy = distribution_utils.get_distribution_strategy(
        params.get('tpu_address'))
    batch_size = distribution_utils.update_batch_size(strategy,
                                                      params['batch_size'])

    with strategy.scope():
        model = model_builder.build_model()
    input_image_size = model.input_shape[1]

    # Build dataset
    train_image_paths, train_scores = data_loader.read_csv(params['train_csv'],
                                                           params['image_dir'],
                                                           is_training=True)
    validation_image_paths, validation_scores = data_loader.read_csv(
        params['validation_csv'], params['image_dir'], is_training=False)

    train_dataset = data_loader.build_dataset(train_image_paths,
                                              train_scores,
                                              is_training=True,
                                              batch_size=batch_size,
                                              target_size=input_image_size)

    validation_dataset = data_loader.build_dataset(
        validation_image_paths,
        validation_scores,
        is_training=False,
        batch_size=batch_size,
        target_size=input_image_size)

    train_dataset = strategy.experimental_distribute_dataset(train_dataset)
    validation_dataset = strategy.experimental_distribute_dataset(
        validation_dataset)

    loss_fn = loss_builder.build_loss_fn(
        loss_name=params['loss'],
        trainable_variables=model.trainable_variables)

    train(model=model,
          loss_fn=loss_fn,
          strategy=strategy,
          epochs=params['epochs'],
          batch_size=batch_size,
          train_dataset=train_dataset,
          validation_dataset=validation_dataset,
          checkpoint_dir=params['checkpoint_dir'],
          log_dir=params['log_dir'])
Пример #2
0
def train(config):

    set_manual_seed(10)
    """ 1: 文本清洗和分词,构建词表 """
    print("Preparing the batch data ... \n")
    corpus_x, corpus_y, vocab = build_dataset(config)
    """ 2:计算类别权重,缓解类别不平衡问题 """
    class_weights = calcu_class_weights(corpus_y, config)
    config.class_weights = class_weights
    """ 3:加载预训练的词向量 """
    embed_matrix = load_embed_matrix(vocab, config)
    config.embed_matrix = embed_matrix
    """ 4: 划分数据集和生成batch迭代器 """
    train_iter, valid_iter, test_iter = batch_generator(
        corpus_x, corpus_y, 0.15, config)
    """ 5:模型初始化 """
    print("Building the textcnn model ... \n")
    model = TextCNN(config)
    print(f'The model has {count_params(model):,} trainable parameters\n')

    model.to(config.device)
    """ 6:开始训练模型 """
    print("Start the training ... \n")
    init_network(model)
    train_model(config, model, train_iter, valid_iter, test_iter)
Пример #3
0
def train(config):

    set_manual_seed(10)
    """ 1: 划分数据集并保存 """
    print("Preparing the batch data ... \n")
    build_dataset(config)
    """ 2:计算类别权重,缓解类别不平衡问题 """
    class_weights = calcu_class_weights(config)
    config.class_weights = class_weights
    """ 3: 划分数据集和生成batch迭代器 """
    train_iter, valid_iter, test_iter = batch_generator(config)
    """ 5:模型初始化 """
    print("Building the textcnn model ... \n")
    model = TextCNN(config)
    print(f'The model has {count_params(model):,} trainable parameters\n')

    model.to(config.device)
    """ 6:开始训练模型 """
    print("Start the training ... \n")
    init_network(model)
    train_model(config, model, train_iter, valid_iter, test_iter)
    def update(self, gameid):
        # remove prev_round for updating (so it's the history *at the prev round*)
        prev_round = game_histories[gameid].pop()
        prev_agent = load_agent(prev_round)
        prev_agent.update_model(prev_round['roundNum'], prev_round['cap'])

        # add it back to history for future rounds
        game_histories[gameid].append(prev_round)

        # precompute history captions so we don't have to do it again on every step
        for reduced_cap in build_dataset(prev_round['cap'],
                                         prev_agent.dataset_type):
            orig_captions[prev_agent.gameid].append(
                (prev_round['target'], reduced_cap))
Пример #5
0
    def update_model(self, round_num, caption) :
        # Remove <start> and <end> if they're part of caption
        if caption[:7] == '<start>' :
            caption = caption[8:-6]

        # don't update if caption is empty
        if(len(caption.split()) < 1) :
            return

        combined_loss = CombinedLoss(self)
        data_loader = get_reduction_loader(
            self.raw_image, self.vocab, self.batch_size, caption, self.dataset_type,
            shuffle=True, num_workers=self.num_workers
        )
        
        # define optimizer
        params = list(self.decoder.parameters())
        optimizer = torch.optim.Adam(params, lr=self.learning_rate)

        # Keep training until we hit specified number of gradient steps
        steps = 0
        while True :
            for i, batch in enumerate(data_loader):
#                print('num reductions for reduction', self.dataset_type, ':', batch[1].size())
                if steps==self.num_steps:
                    break
                loss = combined_loss.compute(batch, steps)
                self.decoder.zero_grad()
                loss.backward()
                optimizer.step()
                steps += 1

            if steps==self.num_steps :
                break

        # After adaptation, add current trial's data to 'memory' for future rounds
        self.history.append({'target': self.raw_image, 'cap': caption})

        # precompute history captions so we don't have to do it again on every step
        for reduced_cap in build_dataset(caption, self.dataset_type) :
            self.orig_captions.append((self.raw_image, reduced_cap))

        # Save the model checkpoints
        if(self.checkpoint) :
            ckpt_loc = 'decoder-{}.ckpt'.format(self.gameid)
            torch.save(self.decoder.state_dict(),
                       os.path.join(self.model_path, ckpt_loc))
Пример #6
0
import config
import datetime
import math
from matplotlib import pyplot as plt

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

# Load model
model = inception_v3.InceptionV3(num_class=config.classes)
model.build(input_shape=(None, config.image_size, config.image_size, config.image_channels))

# Load dataset
train_ds, train_len, test_ds, test_len, valid_ds, valid_len = build_dataset()

# Loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizers = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_acc = tf.keras.metrics.SparseCategoricalCrossentropy(name='train_acc')

valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_acc = tf.keras.metrics.SparseCategoricalCrossentropy(name='valid_acc')

# checkpoint
checkpoints_dir = 'checkpoints/'
checkpoint = tf.train.Checkpoint(model=model, optimizers=optimizers)
checkpoint_manager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=checkpoints_dir, max_to_keep=1)
Пример #7
0
def meta_train():
    args = cli_def().parse_args()
    print(args)
    network = args.network
    dataset = args.dataset
    batch_size = args.batch_size
    lr = args.lr
    num_epoch = args.num_epoch

    if not os.path.isdir('result'):
        os.mkdir('result')
    save_path = './result/meta-train_' + network + '_' + dataset
    tr_loss = []
    t_loss = []
    tr_acc = []
    t_acc = []
    lr_save = []

    # We are using cuda for training - no point trying out on CPU for ResNet
    device = torch.device("cuda")

    if dataset == 'cifar10':
        num_classes = 10
    if dataset == 'cifar100':
        num_classes = 100

    model = build_network(network, num_classes)
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.params()])))
    model.to(device).apply(init_weights)

    mlr_snet = MLRSNet(1, 50).to(device)
    print(mlr_snet)

    # assign argparse parameters
    criterion = nn.CrossEntropyLoss().to(device)
    best_val_accuracy = 0.0

    num_meta = 1000
    train_data, meta_data, test_data = build_dataset(dataset, num_meta,
                                                     batch_size)
    print(len(train_data), len(meta_data), len(test_data))

    train_loss, train_acc = compute_loss_accuracy(model, train_data, criterion,
                                                  device)
    print('Initial training loss is %.3f' % train_loss)

    gamma = (train_loss**0.5 * np.log(train_loss * num_classes) /
             num_classes**0.25) / 4
    print('Gamma is %.3f' % gamma)

    optimizer_vnet = torch.optim.Adam(mlr_snet.params(),
                                      lr=lr,
                                      weight_decay=1e-4)
    optimizer = optim.SGD(model.params(),
                          lr=1,
                          momentum=args.momentum,
                          weight_decay=args.wd)

    meta_data_iter = iter(meta_data)
    for epoch in range(num_epoch):
        train_correct = 0
        train_loss = 0

        for i, (inputs, labels) in enumerate(train_data):
            model.train()
            mlr_snet.reset_lstm(keep_states=(epoch + i) > 0, device=device)
            inputs, labels = inputs.to(device), labels.to(device)
            if (i + 1) % args.t_val == 0:

                meta_model = build_network(network, num_classes)
                meta_model.to(device)
                meta_model.load_state_dict(model.state_dict())
                meta_model.train()

                outputs = meta_model(inputs)
                loss = criterion(outputs, labels)
                loss = loss.unsqueeze(0)

                meta_model.zero_grad()
                grads = torch.autograd.grad(loss, (meta_model.params()),
                                            create_graph=True)
                input = loss
                lr_ = mlr_snet(input.unsqueeze(0))

                optimizer_metamodel = MetaSGD(meta_model)
                optimizer_metamodel.load_state_dict(optimizer.state_dict())
                optimizer_metamodel.step(lr=lr_ * gamma, grad=grads)

                del grads

                try:
                    inputs_val, targets_val = next(meta_data_iter)
                except StopIteration:
                    meta_data_iter = iter(meta_data)
                    inputs_val, targets_val = next(meta_data_iter)
                inputs_val, targets_val = inputs_val.to(
                    device), targets_val.to(device)
                y_g_hat = meta_model(inputs_val)
                l_g_meta = criterion(y_g_hat, targets_val.long())

                optimizer_vnet.zero_grad()
                l_g_meta.backward()
                optimizer_vnet.step()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            input = loss.unsqueeze(0)
            with torch.no_grad():
                new_lr = mlr_snet(input)

            new_lr = float(new_lr.data) * gamma
            lr_save.append(new_lr)

            for group in optimizer.param_groups:
                group['lr'] = new_lr

            train_loss += loss.item() * labels.size(0)

            train_pred = outputs.argmax(1)
            train_correct += train_pred.eq(labels).sum().item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        train_acc = 100.0 * (train_correct / len(train_data.dataset))
        val_loss, val_acc = compute_loss_accuracy(model, test_data, criterion,
                                                  device)

        tr_loss.append(train_loss / len(train_data.dataset))
        t_loss.append(val_loss)
        tr_acc.append(train_acc)
        t_acc.append(val_acc)
        torch.save(
            {
                'train_acc': tr_acc,
                'test_acc': t_acc,
                'train_loss': tr_loss,
                'test_loss': t_loss,
                'lr': lr_save
            }, save_path)
        print('train loss is : %.4f' % (train_loss / len(train_data.dataset)))
        print('test loss is: %.4f' % val_loss)

        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc

        torch.save(mlr_snet.state_dict(),
                   './result/mlr_snet %d.pth' % (epoch + 1))

        print('train_accuracy at epoch :{} is : {}'.format(epoch, train_acc))
        print('val_accuracy at epoch :{} is : {}'.format(epoch, val_acc))
        print('best val_accuracy is : {}'.format(best_val_accuracy))

        cur_lr = 0.0
        for param_group in optimizer.param_groups:
            cur_lr = param_group['lr']
        print('learning_rate after epoch :{} is : {}'.format(epoch, cur_lr))
Пример #8
0
def train():
    
    """ 1: 加载数据集,把样本和标签都转化为id"""
    if os.path.isfile(config.data_proc_file):
        
        with open(config.data_proc_file, "rb") as f:
            train_data,dev_data,test_data = pickle.load(f)
            char_to_id,id_to_char,tag_to_id,id_to_tag = pickle.load(f)
            emb_matrix = pickle.load(f)
            
        logger.info("%i / %i / %i sentences in train / dev / test." % (len(train_data), len(dev_data), len(test_data)))
            
    else:
        
        train_data,dev_data,test_data, char_to_id, tag_to_id, id_to_tag, emb_matrix = build_dataset()
        
    """ 2: 产生batch训练数据 """
    train_manager = BatchManager(train_data, config.batch_size)
    dev_manager = BatchManager(dev_data, config.batch_size)
    test_manager = BatchManager(test_data, config.batch_size) 
    
    model = NERLSTM_CRF(config, char_to_id, tag_to_id, emb_matrix)
    model.train()
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
    
    """ 3: 用early stop 防止过拟合 """
    total_batch = 0  
    dev_best_f1 = float('-inf')
    last_improve = 0  
    flag = False     
    
    start_time = time.time()
    logger.info(" 开始训练模型 ...... ")
    for epoch in range(config.max_epoch):
        
        logger.info('Epoch [{}/{}]'.format(epoch + 1, config.max_epoch))
        
        for index, batch in enumerate(train_manager.iter_batch(shuffle=True)):
            
            optimizer.zero_grad()
            
            """ 计算损失和反向传播 """
            _, char_ids, seg_ids, tag_ids, mask = batch
            loss = model.log_likelihood(char_ids,seg_ids,tag_ids, mask)
            loss.backward()
            
            """ 梯度截断,最大梯度为5 """
            nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=config.clip)
            optimizer.step()
            
            if total_batch % config.steps_check == 0:
                
                model.eval()
                dev_f1,dev_loss = evaluate(model, dev_manager, id_to_tag)
                
                """ 以f1作为early stop的监控指标 """
                if dev_f1 > dev_best_f1:
                    
                    evaluate(model, test_manager, id_to_tag, test=True)
                    dev_best_f1 = dev_f1
                    torch.save(model, os.path.join(config.save_dir,"medical_ner.ckpt"))
                    improve = '*'
                    last_improve = total_batch
                else:
                    improve = ''
                    
                time_dif = get_time_dif(start_time)
                msg = 'Iter: {} | Dev Loss: {:.4f} | Dev F1-macro: {:.4f} | Time: {} | {}'
                logger.info(msg.format(total_batch, dev_loss, dev_f1, time_dif, improve))  
                
                model.train()
                
            total_batch += 1
            if total_batch - last_improve > config.require_improve:
                """ 验证集f1超过5000batch没上升,结束训练 """
                logger.info("No optimization for a long time, auto-stopping...")
                flag = True
                break
        if flag:
            break