Пример #1
0
    parser.add_argument('--encoding',
                        default='utf8',
                        help="The encoding for input and output file.")

    args = parser.parse_args()
    dataset_cfg = DatasetCfg(args.data_dir)
    exp_cfg = ExperimentCfg(args.exp_dir)
    inputs_file = args.inputs_file
    outputs_file = args.outputs_file
    restore_checkpoint = args.restore_checkpoint
    encoding = args.encoding

    msg = "Inputs file not exists: {}"
    assert os.path.isfile(inputs_file), msg.format(inputs_file)

    logger = Logger.set(os.path.join(exp_cfg.experiment_dir(), 
                                     'predict.log'))

    checkpoint = Checkpoint(
        checkpoint_dir=exp_cfg.experiment_dir(),
        filename=exp_cfg.checkpoint_filename(),
        best_checkpoint=exp_cfg.best_checkpoint(),
        latest_checkpoint=exp_cfg.latest_checkpoint(),
        logger=logger)

    # load params
    word_vocab = Vocab(words_file)
    tag_vocab = Vocab(tags_file)

    params = Params(exp_cfg.params_file())
    params.update(Params(dataset_cfg.params_file()))
    params.set('cuda', torch.cuda.is_available())
Пример #2
0
    # load parser
    dataset_cfg = DatasetCfg(config.data_dir)
    exp_cfg = ExperimentCfg(config.base_model_dir)
    parser = get_parser(data_dir=dataset_cfg.data_dir(),
                        exp_dir=exp_cfg.experiment_dir(),
                        restore_checkpoint=None)

    # parse command line arguments
    args = parser.parse_args()
    restore_checkpoint = args.restore_checkpoint
    dataset_cfg.set_data_dir(args.data_dir)
    exp_cfg.set_experiment_dir(args.exp_dir)

    # set logger
    # Note: log file will be stored in the `exp_dir` directory
    logger = Logger.set(exp_cfg.train_log())

    # load experiment configuration
    logger.info("Loading the experiment configurations...")
    params = Params(exp_cfg.params_file())
    logger.info("- done.")

    # set params
    params.set('cuda', torch.cuda.is_available())

    # load datesets
    logger.info("Loading the datasets...")
    # add datasets parameters into params
    params.update(Params(dataset_cfg.params_file()))
    trainloader, valloader = load_data(params,
                                       dataset_cfg.data_dir(),
Пример #3
0
    args = parser.parse_args()

    msg = 'Data file {} not found.'
    assert os.path.isfile(args.data_file), msg.format(args.data_file)
    msg = '{} directory not found. Please create it first.'
    assert os.path.isdir(args.data_dir), msg.format(args.data_dir)
    msg = 'the proportion of dataset to builded must in (0.0, 1.0]'
    assert (args.data_factor > 0.0) and (args.data_factor <= 1.0), msg
    msg = 'train factor + val factor + test factor must be equal to 1.0'
    total = args.train_factor + args.val_factor + args.test_factor
    assert (1.0 == total), msg

    dataset_cfg.set_data_dir(args.data_dir)

    # set and get logger
    logger = Logger.set(dataset_cfg.log_file())

    # build, load and dump datasets
    builder = Builder(data_factor=args.data_factor,
                      train_factor=args.train_factor,
                      val_factor=args.val_factor,
                      test_factor=args.test_factor,
                      train_name=args.train_name,
                      val_name=args.val_name,
                      test_name=args.test_name,
                      logger=logger)
    builder.load(args.data_file, encoding='windows-1252')
    builder.dump(dataset_cfg.data_dir(),
                 dataset_cfg.params_file(),
                 min_count_word=args.min_count_word,
                 min_count_tag=args.min_count_tag,
Пример #4
0
if __name__ == '__main__':
    dataset_cfg = DatasetCfg(config.data_dir)
    exp_cfg = ExperimentCfg(config.base_model_dir)
    parser = get_parser(data_dir=dataset_cfg.data_dir(),
                        exp_dir=exp_cfg.experiment_dir(),
                        restore_checkpoint=exp_cfg.best_checkpoint(),
                        dataset_name=dataset_cfg.test_name())

    args = parser.parse_args()
    dataset_name = args.dataset_name
    restore_checkpoint = args.restore_checkpoint
    dataset_cfg.set_data_dir(args.data_dir)
    exp_cfg.set_experiment_dir(args.exp_dir)

    # set logger
    logger = Logger.set(exp_cfg.evaluate_log())

    # load model configuration
    logger.info("Loading the experiment configurations...")
    params = Params(exp_cfg.params_file())
    # cuda flag
    params.set('cuda', torch.cuda.is_available())
    logger.info("- done.")

    # load datesets
    logger.info("Loading the {} dataset...".format(dataset_name))
    # add datasets parameters into params
    params.update(Params(dataset_cfg.params_file()))
    dataset = load_data(params, dataset_cfg.data_dir(), dataset_name,
                        params['{}_size'.format(dataset_name)])
    logger.info("- done.")