Exemplo n.º 1
0
    warnings.warn('model_parameters is not set. '
                  'Randomly initialized model will be used for inference.')

get_pretrained = True

get_model_params = {
    'name': args.model_name,
    'dataset_name': args.dataset,
    'pretrained': get_pretrained,
    'ctx': ctxs,
    'use_decoder': False,
    'dropout': args.dropout,
    'attention_dropout': args.attention_dropout
}

xlnet_base, vocab, tokenizer = model.get_model(**get_model_params)
# initialize the rest of the parameters
initializer = mx.init.Normal(0.02)

do_regression = not task.class_labels
if do_regression:
    num_classes = 1
    loss_function = gluon.loss.L2Loss()
else:
    num_classes = len(task.class_labels)
    loss_function = gluon.loss.SoftmaxCELoss()
# reuse the XLnetClassifier class with num_classes=1 for regression
model = XLNetClassifier(xlnet_base,
                        units=xlnet_base._net._units,
                        dropout=0.1,
                        num_classes=num_classes)
Exemplo n.º 2
0
                        type=int,
                        default=64,
                        help='Batch size for evaluation.')
    parser.add_argument('--gpu', type=int, help='GPU id')
    args = parser.parse_args()

    start_time = time.time()

    # Model
    from transformer.model import get_model
    with open(args.vocab_file, 'r') as f:
        vocab = nlp.Vocab.from_json(f.read())

    ctx = mx.gpu(args.gpu) if args.gpu is not None else mx.cpu()
    model, vocab = get_model('transformerxl',
                             vocab=vocab,
                             dataset_name=args.dataset,
                             clamp_len=args.clamp_len)
    model.initialize(ctx=ctx)
    model.load_parameters(args.parameter_file, ignore_extra=False)
    model.hybridize()
    print(model)

    # Data
    if args.dataset == 'wt103':
        val_dataset, test_dataset = [
            nlp.data.WikiText103(segment=segment,
                                 skip_empty=False,
                                 bos=vocab.bos_token,
                                 eos=vocab.eos_token)
            for segment in ['val', 'test']
        ]