예제 #1
0
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    #update_tag_scheme(train_sentences, FLAGS.tag_schema)
    #update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):
        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = bert_tag_mapping(train_sentences)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_bert_dataset(train_sentences, FLAGS.max_seq_len,
                                      tag_to_id, FLAGS.lower)
    dev_data = prepare_bert_dataset(dev_sentences, FLAGS.max_seq_len,
                                    tag_to_id, FLAGS.lower)
    test_data = prepare_bert_dataset(test_sentences, FLAGS.max_seq_len,
                                     tag_to_id, FLAGS.lower)
    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), 0, len(test_data)))

    train_manager = BertBatchManager(train_data, FLAGS.batch_size)
    dev_manager = BertBatchManager(dev_data, FLAGS.batch_size)
    test_manager = BertBatchManager(test_data, FLAGS.batch_size)
    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = create_bert_model(sess, Model, FLAGS.ckpt_path, config, logger)

        logger.info("start training")
        loss = []
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)

                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    loss = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                save_model(sess, model, FLAGS.ckpt_path, logger)
                #save_model(sess, model, FLAGS.ckpt_path, logger, global_steps=step)
            evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #2
0
def train():
    # load data sets
    train_sentences = load_sentences(args.train_file, args.lower, args.zeros)
    dev_sentences = load_sentences(args.dev_file, args.lower, args.zeros)
    test_sentences = load_sentences(args.test_file, args.lower, args.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    # 检测并维护数据集的 tag 标记
    update_tag_scheme(train_sentences, args.tag_schema)
    update_tag_scheme(test_sentences, args.tag_schema)
    update_tag_scheme(dev_sentences, args.tag_schema)

    # create maps if not exist
    # 根据数据集创建 char_to_id, id_to_char, tag_to_id, id_to_tag 字典,并储存为 pkl 文件
    if not os.path.isfile(args.map_file):
        # create dictionary for word
        if args.pre_emb:
            dico_chars_train = char_mapping(train_sentences, args.lower)[0]
            # 利用预训练嵌入集增强(扩充)字符字典,然后返回字符与位置映射关系
            dico_chars, char_to_id, id_to_char = augment_with_pretrained(
                dico_chars_train.copy(), args.emb_file,
                list(
                    itertools.chain.from_iterable([[w[0] for w in s]
                                                   for s in test_sentences])))
        else:
            _c, char_to_id, id_to_char = char_mapping(train_sentences,
                                                      args.lower)

        # Create a dictionary and a mapping for tags
        # 获取标记与位置映射关系
        tag_to_id, id_to_tag, intent_to_id, id_to_intent = tag_mapping(
            train_sentences)

        with open(args.map_file, "wb") as f:
            pickle.dump([
                char_to_id, id_to_char, tag_to_id, id_to_tag, intent_to_id,
                id_to_intent
            ], f)
    else:
        with open(args.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag, intent_to_id, id_to_intent = pickle.load(
                f)

    # 提取句子特征
    # prepare data, get a collection of list containing index
    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 intent_to_id, args.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               intent_to_id, args.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                intent_to_id, args.lower)

    # code.interact(local=locals())

    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), len(dev_data), len(test_data)))

    # 获取可供模型训练的单个批次数据
    train_manager = BatchManager(train_data, args.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)

    # make path for store log and model if not exist
    make_path(args)
    if os.path.isfile(args.config_file):
        config = load_config(args.config_file)
    else:
        config = config_model(char_to_id, tag_to_id, intent_to_id)
        save_config(config, args.config_file)
    make_path(args)

    logger = get_logger(args.log_file)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True

    # 训练集全量跑一次需要迭代的次数
    steps_per_epoch = train_manager.len_data

    with tf.Session(config=tf_config) as sess:
        # 此处模型创建为项目最核心代码
        model = create_model(sess, Model, args.ckpt_path, load_word2vec,
                             config, id_to_char, logger)
        logger.info("start training")
        loss_slot = []
        loss_intent = []

        # with tf.device("/gpu:0"):
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss_slot, batch_loss_intent = model.run_step(
                    sess, True, batch)
                loss_slot.append(batch_loss_slot)
                loss_intent.append(batch_loss_intent)

                if step % args.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "INTENT loss:{:>9.6f}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss_intent),
                                    np.mean(loss_slot)))
                    loss_slot = []
                    loss_intent = []

            best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger)
            if best:
                # if i%7 == 0:
                save_model(sess, model, args.ckpt_path, logger)
        evaluate(sess, model, "test", test_manager, id_to_tag, logger)
예제 #3
0
import tensorflow as tf
import numpy as np
from sklearn.utils import shuffle

import matplotlib.pyplot as plt
from tqdm import tqdm
import time
import datetime


for tc in range(10):
    # GPU 자동 할당
    tf.debugging.set_log_device_placement(True)

    # config 저장
    utils.save_config("sunwoo_test", config)

    # 이미지 경로 및 캡션 불러오기
    # img_paths, captions = preprocess.get_path_caption(config.dataset_file_path)
    # img_paths = [config.img_file_path + path for path in img_paths]

    # 전체 데이터셋을 분리해 저장하기
    # img_name_train, img_name_val, token_train, token_val = preprocess.dataset_split_save(img_paths, captions)

    # 저장된 데이터셋 불러오기
    tr_img_paths, tr_captions = preprocess.get_data_file(config.train_dataset_path)
    val_img_paths, val_captions = preprocess.get_data_file(config.validation_dataset_path)


    shuffle(tr_img_paths, tr_captions)
    tr_img_paths, tr_captions = tr_img_paths[:10000], tr_captions[:10000]
예제 #4
0
파일: solver_er.py 프로젝트: kiminh/MIR
                  Test Acc:{metrics.acc_task(tid_done)},\
                  Test Forgetting:{metrics.forgetting_task(tid_done)}')


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--clean_run', type=bool, default=True)
    parser.add_argument('--config_file', type=str, default="")
    parser.add_argument("opts", default=None, nargs=argparse.REMAINDER)
    args = parser.parse_args()
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    set_seed(cfg)
    log_dir, chkpt_dir = save_config(cfg.SYSTEM.SAVE_DIR, cfg, args.clean_run)
    logger = setup_logger(
        cfg.SYSTEM.EXP_NAME,
        os.path.join(cfg.SYSTEM.SAVE_DIR, cfg.SYSTEM.EXP_NAME), 0)
    writer = SummaryWriter(log_dir)
    metrics = Metrics(cfg.SOLVER.NUM_TASKS)
    if cfg.DATA.TYPE == 'mnist':
        model = get_model('mlp',
                          input_size=cfg.MODEL.MLP.INPUT_SIZE,
                          hidden_size=cfg.MODEL.MLP.HIDDEN_SIZE,
                          out_size=cfg.MODEL.MLP.OUTPUT_SIZE)
    elif cfg.DATA.TYPE == 'cifar':
        model = get_model('resnet', n_cls=cfg.DATA.NUM_CLASSES)
    model.to(device)
    mem = Buffer(cfg)
    for tid in range(cfg.SOLVER.NUM_TASKS):
예제 #5
0
파일: train.py 프로젝트: micts/acgcn
def prepare_training(cfg, args):

    if cfg.use_gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = cfg.device_list

    if cfg.use_gpu and torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    if cfg.model_name == 'baseline':  # initalize baseline model
        model = baseline_model.BaseNet(cfg)
    elif cfg.model_name == 'gcn':  # initalize gcn model
        model = gcn_model.GCNNet(cfg)
    else:
        assert (
            False
        ), 'Variable model_name should be either \'baseline\' or 2 \'gcn\'.'

    model = model.to(device=device)
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=1,
                          momentum=cfg.momentum,
                          weight_decay=cfg.weight_decay)

    if cfg.resume_training:
        cfg.filename = 'checkpoint_' + cfg.checkpoint_path.split('/')[-2]
        model, optimizer, start_epoch = load_checkpoint(
            model, optimizer, cfg.checkpoint_path)
        cfg.start_epoch = start_epoch
    else:
        cfg.filename = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())

    utils.make_dirs(
        os.path.join(cfg.results_path, cfg.model_name, cfg.filename))
    utils.save_config(cfg)
    utils.print_config(cfg)

    if cfg.save_scores:
        utils.make_dirs(
            os.path.join(cfg.scores_path, cfg.model_name, cfg.filename,
                         datasets['train'].split))
        utils.make_dirs(
            os.path.join(cfg.scores_path, cfg.model_name, cfg.filename,
                         datasets['val'].split))
    if cfg.plot_grad_flow:
        utils.make_dirs(
            os.path.join(cfg.results_path, cfg.model_name, cfg.filename,
                         'grad_flow'))

    #utils.save_config(cfg)
    #utils.print_config(cfg)

    training_set = dataset.return_dataset(args.data_path, args.annot_path, cfg,
                                          'training')
    validation_set = dataset.return_dataset(args.data_path, args.annot_path,
                                            cfg, 'validation')
    datasets = {'train': training_set, 'val': validation_set}

    training_loader = data.DataLoader(training_set,
                                      batch_size=cfg.training_batch_size,
                                      shuffle=True,
                                      num_workers=4,
                                      worker_init_fn=worker_init_fn)
    validation_loader = data.DataLoader(validation_set,
                                        batch_size=cfg.validation_batch_size,
                                        shuffle=False,
                                        num_workers=4)

    dataloaders = {'train': training_loader, 'val': validation_loader}

    return dataloaders, datasets, model, device, optimizer
def train(arguments):

    # Parse input arguments
    json_filename = arguments.config
    network_debug = arguments.debug

    # Load options
    json_opts = json_file_to_pyobj(json_filename)
    train_opts = json_opts.training

    # Architecture type
    arch_type = train_opts.arch_type

    # Setup Dataset and Augmentation
    ds_class = get_dataset(arch_type)
    ds_path = get_dataset_path(arch_type, json_opts.data_path)
    ds_transform = get_dataset_transformation(
        arch_type,
        opts=json_opts.augmentation,
        max_output_channels=json_opts.model.output_nc,
        verbose=json_opts.training.verbose)

    # Setup channels
    channels = json_opts.data_opts.channels
    if len(channels) != json_opts.model.input_nc \
            or len(channels) != getattr(json_opts.augmentation, arch_type).scale_size[-1]:
        raise Exception(
            'Number of data channels must match number of model channels, and patch and scale size dimensions'
        )

    # Setup the NN Model
    model = get_model(json_opts.model)
    if network_debug:
        print('# of pars: ', model.get_number_parameters())
        print('fp time: {0:.3f} sec\tbp time: {1:.3f} sec per sample'.format(
            *model.get_fp_bp_time()))
        exit()

    # Setup Data Loader
    split_opts = json_opts.data_split
    train_dataset = ds_class(ds_path,
                             split='train',
                             transform=ds_transform['train'],
                             preload_data=train_opts.preloadData,
                             train_size=split_opts.train_size,
                             test_size=split_opts.test_size,
                             valid_size=split_opts.validation_size,
                             split_seed=split_opts.seed,
                             channels=channels)
    valid_dataset = ds_class(ds_path,
                             split='validation',
                             transform=ds_transform['valid'],
                             preload_data=train_opts.preloadData,
                             train_size=split_opts.train_size,
                             test_size=split_opts.test_size,
                             valid_size=split_opts.validation_size,
                             split_seed=split_opts.seed,
                             channels=channels)
    test_dataset = ds_class(ds_path,
                            split='test',
                            transform=ds_transform['valid'],
                            preload_data=train_opts.preloadData,
                            train_size=split_opts.train_size,
                            test_size=split_opts.test_size,
                            valid_size=split_opts.validation_size,
                            split_seed=split_opts.seed,
                            channels=channels)
    train_loader = DataLoader(dataset=train_dataset,
                              num_workers=16,
                              batch_size=train_opts.batchSize,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_dataset,
                              num_workers=16,
                              batch_size=train_opts.batchSize,
                              shuffle=False)
    test_loader = DataLoader(dataset=test_dataset,
                             num_workers=16,
                             batch_size=train_opts.batchSize,
                             shuffle=False)

    # Visualisation Parameters
    visualizer = Visualiser(json_opts.visualisation, save_dir=model.save_dir)
    error_logger = ErrorLogger()

    # Training Function
    model.set_scheduler(train_opts)
    # Setup Early Stopping
    early_stopper = EarlyStopper(json_opts.training.early_stopping,
                                 verbose=json_opts.training.verbose)
    for epoch in range(model.which_epoch, train_opts.n_epochs):
        print('(epoch: %d, total # iters: %d)' % (epoch, len(train_loader)))
        train_volumes = []
        validation_volumes = []

        # Training Iterations
        for epoch_iter, (images, labels,
                         indices) in tqdm(enumerate(train_loader, 1),
                                          total=len(train_loader)):
            # Make a training update
            model.set_input(images, labels)
            model.optimize_parameters()
            #model.optimize_parameters_accumulate_grd(epoch_iter)

            # Error visualisation
            errors = model.get_current_errors()
            error_logger.update(errors, split='train')

            ids = train_dataset.get_ids(indices)
            volumes = model.get_current_volumes()
            visualizer.display_current_volumes(volumes, ids, 'train', epoch)
            train_volumes.append(volumes)

        # Validation and Testing Iterations
        for loader, split, dataset in zip([valid_loader, test_loader],
                                          ['validation', 'test'],
                                          [valid_dataset, test_dataset]):
            for epoch_iter, (images, labels,
                             indices) in tqdm(enumerate(loader, 1),
                                              total=len(loader)):
                ids = dataset.get_ids(indices)

                # Make a forward pass with the model
                model.set_input(images, labels)
                model.validate()

                # Error visualisation
                errors = model.get_current_errors()
                stats = model.get_segmentation_stats()
                error_logger.update({**errors, **stats}, split=split)

                if split == 'validation':  # do not look at testing
                    # Visualise predictions
                    volumes = model.get_current_volumes()
                    visualizer.display_current_volumes(volumes, ids, split,
                                                       epoch)
                    validation_volumes.append(volumes)

                    # Track validation loss values
                    early_stopper.update({**errors, **stats})

        # Update the plots
        for split in ['train', 'validation', 'test']:
            visualizer.plot_current_errors(epoch,
                                           error_logger.get_errors(split),
                                           split_name=split)
            visualizer.print_current_errors(epoch,
                                            error_logger.get_errors(split),
                                            split_name=split)
        visualizer.save_plots(epoch, save_frequency=5)
        error_logger.reset()

        # Save the model parameters
        if not early_stopper.is_improving is False:
            model.save(json_opts.model.model_type, epoch)
            save_config(json_opts, json_filename, model, epoch)

        # Update the model learning rate
        model.update_learning_rate(
            metric=early_stopper.get_current_validation_loss())

        if early_stopper.interrogate(epoch):
            break
예제 #7
0
import config
from data import preprocess 
from utils import utils


# config 저장
utils.save_config()


# 3-1 이미지 경로 및 캡션 불러오기
dataset_path = preprocess.get_path_caption()

# 3-2 전체 데이터셋을 분리해 저장하기
train_dataset_path, val_dataset_path = preprocess.dataset_split_save(dataset_path)


# 3-3 저장된 데이터셋 불러오기
# 3-2 에서 데이터를 가져왔으므로 스킵
# img_paths, caption = preprocess.get_data_file()


# 3-4 데이터 샘플링
sample_rate = 70 # %단위로 입력
dataset_origin = train_dataset_path;
if config.do_sampling:
    dataset_sampled = preprocess.sampling_data(sample_rate, dataset_origin)


# 4-1 이미지와 캡션 시각화 하기
target_idx = 1
utils.visualize_img_caption(dataset_sampled, target_idx)
예제 #8
0
    def eval(self):
        with tqdm(total=len(self.val_loader),
                  miniters=1,
                  desc='Val Epoch: [{}/{}]'.format(self.epoch,
                                                   self.nEpochs)) as t1:
            psnr_list, ssim_list = [], []
            for iteration, batch in enumerate(self.val_loader, 1):

                ms_image, lms_image, pan_image, bms_image, file = Variable(
                    batch[0]), Variable(batch[1]), Variable(
                        batch[2]), Variable(batch[3]), (batch[4])
                if self.cuda:
                    ms_image, lms_image, pan_image, bms_image = ms_image.cuda(
                        self.gpu_ids[0]), lms_image.cuda(
                            self.gpu_ids[0]), pan_image.cuda(
                                self.gpu_ids[0]), bms_image.cuda(
                                    self.gpu_ids[0])

                self.model.eval()
                with torch.no_grad():
                    y = self.model(lms_image, bms_image, pan_image)
                    loss = self.loss(y, ms_image)

                batch_psnr, batch_ssim = [], []
                y = y[:, 0:3, :, :]
                ms_image = ms_image[:, 0:3, :, :]
                for c in range(y.shape[0]):
                    if not self.cfg['data']['normalize']:
                        predict_y = (y[c, ...].cpu().numpy().transpose(
                            (1, 2, 0))) * 255
                        ground_truth = (ms_image[c,
                                                 ...].cpu().numpy().transpose(
                                                     (1, 2, 0))) * 255
                    else:
                        predict_y = (y[c, ...].cpu().numpy().transpose(
                            (1, 2, 0)) + 1) * 127.5
                        ground_truth = (
                            ms_image[c, ...].cpu().numpy().transpose(
                                (1, 2, 0)) + 1) * 127.5
                    psnr = calculate_psnr(predict_y, ground_truth, 255)
                    ssim = calculate_ssim(predict_y, ground_truth, 255)
                    batch_psnr.append(psnr)
                    batch_ssim.append(ssim)
                avg_psnr = np.array(batch_psnr).mean()
                avg_ssim = np.array(batch_ssim).mean()
                psnr_list.extend(batch_psnr)
                ssim_list.extend(batch_ssim)
                t1.set_postfix_str(
                    'Batch loss: {:.4f}, PSNR: {:.4f}, SSIM: {:.4f}'.format(
                        loss.item(), avg_psnr, avg_ssim))
                t1.update()
            self.records['Epoch'].append(self.epoch)
            self.records['PSNR'].append(np.array(psnr_list).mean())
            self.records['SSIM'].append(np.array(ssim_list).mean())

            save_config(
                self.log_name, 'Val Epoch {}: PSNR={:.4f}, SSIM={:.4f}'.format(
                    self.epoch, self.records['PSNR'][-1],
                    self.records['SSIM'][-1]))
            self.writer.add_scalar('PSNR_epoch', self.records['PSNR'][-1],
                                   self.epoch)
            self.writer.add_scalar('SSIM_epoch', self.records['SSIM'][-1],
                                   self.epoch)
예제 #9
0
from config import config
from data import preprocess
from utils import utils

# cofig 저장
utils.save_config(config)

# 이미지 경로 및 캡션 불러오기
img_paths, captions = preprocess.get_path_caption()
# print("이미지 경로 : " + str(img_paths))
# print("캡션 : " + str(captions))

# 전체 데이터셋을 분리해 저장하기
train_dataset_path, test_dataset_path = preprocess.dataset_split_save(captions)

# 저장된 데이터셋 불러오기
img_paths, caption = preprocess.get_data_file(config.data, train_dataset_path,
                                              test_dataset_path)

# 데이터 샘플링
if config.do_sampling:
    img_paths, caption = preprocess.sampling_data(img_paths, caption,
                                                  config.do_sampling)

# # 이미지와 캡션 시각화 하기
utils.visualize_img_caption(
    img_paths,
    caption,
)
예제 #10
0
from config import config
from data import preprocess
from utils import utils

# config 저장
utils.save_config(config())

# 이미지 경로 및 캡션 불러오기
img_paths, captions = preprocess.get_path_caption(config())

# 전체 데이터셋을 분리해 저장하기
train_dataset_path, val_dataset_path = preprocess.dataset_split_save(config())

# 저장된 데이터셋 불러오기
img_paths, caption = preprocess.get_data_file(train_dataset_path)

# 이미지와 캡션 시각화 하기
utils.visualize_img_caption(img_paths[0], caption[0])
예제 #11
0
def train():
    # load data sets
    train_sentences = load_sentences(FLAGS.train_file, FLAGS.lower,
                                     FLAGS.zeros)
    dev_sentences = load_sentences(FLAGS.dev_file, FLAGS.lower, FLAGS.zeros)
    test_sentences = load_sentences(FLAGS.test_file, FLAGS.lower, FLAGS.zeros)

    # Use selected tagging scheme (IOB / IOBES)
    #update_tag_scheme(train_sentences, FLAGS.tag_schema)
    #update_tag_scheme(test_sentences, FLAGS.tag_schema)

    # create maps if not exist
    if not os.path.isfile(FLAGS.map_file):
        # create dictionary for word
        _c, char_to_id, id_to_char = char_mapping(train_sentences, FLAGS.lower)

        # Create a dictionary and a mapping for tags
        _t, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        os.makedirs('%s' % FLAGS.save_path)
        with open(FLAGS.map_file, "wb") as f:
            pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
    else:
        with open(FLAGS.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    # prepare data, get a collection of list containing index
    train_data = prepare_padding_dataset(train_sentences, FLAGS.max_seq_len,
                                         char_to_id, tag_to_id, FLAGS.lower)
    dev_data = prepare_padding_dataset(dev_sentences, FLAGS.max_seq_len,
                                       char_to_id, tag_to_id, FLAGS.lower)
    test_data = prepare_padding_dataset(test_sentences, FLAGS.max_seq_len,
                                        char_to_id, tag_to_id, FLAGS.lower)

    print("%i / %i / %i sentences in train / dev / test." %
          (len(train_data), len(dev_data), len(test_data)))

    train_manager = BatchManager(train_data, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, 100)
    test_manager = BatchManager(test_data, 100)
    """
    batch = train_manager.batch_data[0]
    strings, chars, segs, tags = batch
    for chrs in chars:
        print(chrs)
    for chrs in segs:
        print(chrs)
    print(tag_to_id)
    """
    # make path for store log and model if not exist
    make_path(FLAGS)
    if os.path.isfile(FLAGS.config_file):
        config = load_config(FLAGS.config_file)
    else:
        config = config_model(char_to_id, tag_to_id)
        save_config(config, FLAGS.config_file)
    make_path(FLAGS)

    log_path = os.path.join(FLAGS.save_path, "log", FLAGS.log_file)
    logger = get_logger(log_path)
    print_config(config, logger)

    # limit GPU memory
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    steps_per_epoch = train_manager.len_data
    with tf.Session(config=tf_config) as sess:
        model = TransformerCRFModel(config, is_training=True)
        sess.run(tf.global_variables_initializer())
        logger.info("start training")
        loss = []
        for i in range(100):
            for batch in train_manager.iter_batch(shuffle=True):
                step, batch_loss = model.run_step(sess, True, batch)
                loss.append(batch_loss)
                if step % FLAGS.steps_check == 0:
                    iteration = step // steps_per_epoch + 1
                    logger.info("iteration:{} step:{}/{}, "
                                "NER loss:{:>9.6f}".format(
                                    iteration, step % steps_per_epoch,
                                    steps_per_epoch, np.mean(loss)))
                    loss = []

            predict_lists = []
            source_tag = []
            best_dev_f1 = 0.0
            best_test_f1 = 0.0
            for batch in dev_manager.iter_batch(shuffle=False):
                lengths, logits = model.run_step(sess, False, batch)
                _, chars, segs, tags = batch
                transition = model.transition.eval(session=sess)
                pre_seq = model.predict(logits, transition, lengths)
                pre_label = recover_label(pre_seq, lengths, id_to_tag)
                """
                for p in range(len(pre_label)):
                    print(chars[p])
                    print(pre_label[p])
                """
                source_label = recover_label(tags, lengths, id_to_tag)
                predict_lists.extend(pre_label)
                source_tag.extend(source_label)
            train_loss_v = np.round(float(np.mean(loss)), 4)
            print('****************************************************')
            acc, p, r, f = get_ner_fmeasure(source_tag, predict_lists,
                                            config["tag_schema"])
            logger.info('epoch:\t{}\ttrain loss:\t{}\t'.format(
                i + 1, train_loss_v))
            logger.info('dev acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}'.format(
                acc, p, r, f))

            for batch in test_manager.iter_batch(shuffle=False):
                lengths, logits = model.run_step(sess, False, batch)
                _, chars, segs, tags = batch
                transition = model.transition.eval(session=sess)
                pre_seq = model.predict(logits, transition, lengths)
                pre_label = recover_label(pre_seq, lengths, id_to_tag)
                source_label = recover_label(tags, lengths, id_to_tag)
                predict_lists.extend(pre_label)
                source_tag.extend(source_label)

            acc_t, p_t, r_t, f_t = get_ner_fmeasure(source_tag, predict_lists,
                                                    config["tag_schema"])
            logger.info('test acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}'.format(
                acc_t, p_t, r_t, f_t))
            if f > best_dev_f1:
                save_model(sess, model, FLAGS.ckpt_path, logger)
                best_dev_f1 = f
                best_test_f1 = f_t
                logger.info(
                    'save epoch:\t{} model with best dev f1-score'.format(i +
                                                                          1))

            print('****************************************************\n\n')