Exemplo n.º 1
0
    # 5.1.1 encoders for the new model
    logging.info('creating and loading data loaders')
    data_encoder = OrderedDict()
    data_encoder[WordEncoder.FEATURE_NAME] = WordEncoder(os.path.join(args.data_dir, 'feats'),
                                                         dim=new_params.embedding_dim)
    label_encoder = OrderedDict()
    label_encoder[ClassEncoder.FEATURE_NAME] = ClassEncoder(os.path.join(args.data_dir, 'feats'))
    new_params.data_feats = []
    new_params.label_feats = []
    for feat in data_encoder:
        new_params.data_feats.append(feat)
    for feat in label_encoder:
        new_params.label_feats.append(feat)

    # 5.1.2 encoders for the c1 model
    c1_data_encoder = utils.load_obj(os.path.join(c1_model_dir, 'data_encoder.pkl'))
    c1_label_encoder = utils.load_obj(os.path.join(c1_model_dir, 'label_encoder.pkl'))
    c1_params.data_feats = []
    c1_params.label_feats = []
    for feat in c1_data_encoder:
        c1_params.data_feats.append(feat)
    for feat in c1_label_encoder:
        c1_params.label_feats.append(feat)

    # 5.1.3 encoders for the c2 model
    c2_data_encoder = utils.load_obj(os.path.join(c2_model_dir, 'data_encoder.pkl'))
    c2_label_encoder = utils.load_obj(os.path.join(c2_model_dir, 'label_encoder.pkl'))
    c2_params.data_feats = []
    c2_params.label_feats = []
    for feat in c2_data_encoder:
        c2_params.data_feats.append(feat)
Exemplo n.º 2
0
    params.cuda = torch.cuda.is_available()

    # 3. Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)
    np.random.seed(0)

    # 4. Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # 5. Create the input data pipeline
    logging.info("Loading the datasets...")
    # 5.1 specify features
    from collections import OrderedDict

    data_encoder = utils.load_obj(
        os.path.join(pretrained_model_dir, 'data_encoder.pkl'))
    pretrained_label_encoder = utils.load_obj(
        os.path.join(pretrained_model_dir, 'label_encoder.pkl'))
    label_encoder = OrderedDict()
    label_encoder[ClassEncoder.FEATURE_NAME] = ClassEncoder(
        os.path.join(args.data_dir, 'feats'))

    # 5.2 load data

    k_fold = None
    combine_train_dev = False
    train_on_dev = False

    data_loader = DataLoader(params, args.data_dir, data_encoder,
                             label_encoder)
    if k_fold:
    params.cuda = torch.cuda.is_available()

    # 3. Set the random seed for reproducible experiments
    torch.manual_seed(230)
    if params.cuda: torch.cuda.manual_seed(230)
    np.random.seed(0)

    # 4. Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # 5. Create the input data pipeline
    logging.info("Loading the datasets...")
    # 5.1 specify features
    from collections import OrderedDict

    data_encoder = utils.load_obj(
        os.path.join(args.model_dir, 'data_encoder.pkl'))
    label_encoder = utils.load_obj(
        os.path.join(args.model_dir, 'label_encoder.pkl'))
    # 5.2 load data
    data_loader = DataLoader(params, args.data_dir, data_encoder,
                             label_encoder)
    data = data_loader.load_data(['test'])
    test_data = data['test']
    # 5.3 specify the train and val dataset sizes
    params.test_size = test_data['size']
    test_data_iterator = data_loader.batch_iterator(test_data,
                                                    params,
                                                    shuffle=False)
    logging.info("- done.")

    # 6. Modeling