'Randomly initialized model will be used for inference.') get_pretrained = not (pretrained_bert_parameters is not None or model_parameters is not None) bert, vocabulary = get_model(name=model_name, dataset_name=dataset, pretrained=get_pretrained, ctx=ctx, use_pooler=True, use_decoder=False, use_classifier=False) if not task.class_labels: # STS-B is a regression task. # STSBTask().class_labels returns None model = BERTRegression(bert, dropout=0.1) if not model_parameters: model.regression.initialize(init=mx.init.Normal(0.02), ctx=ctx) loss_function = gluon.loss.L2Loss() else: model = BERTClassifier(bert, dropout=0.1, num_classes=len(task.class_labels)) if not model_parameters: model.classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) loss_function = gluon.loss.SoftmaxCELoss() # load checkpointing output_dir = args.output_dir if pretrained_bert_parameters: logging.info('loading bert params from %s', pretrained_bert_parameters)
dataset_name=args.dataset_name, pretrained=False, use_pooler=True, use_decoder=False, use_classifier=False, seq_length=args.seq_length) net = BERTClassifier(bert, num_classes=2, dropout=args.dropout) elif args.task == 'regression': bert, _ = get_model(name=args.model_name, dataset_name=args.dataset_name, pretrained=False, use_pooler=True, use_decoder=False, use_classifier=False, seq_length=args.seq_length) net = BERTRegression(bert, dropout=args.dropout) elif args.task == 'question_answering': bert, _ = get_model(name=args.model_name, dataset_name=args.dataset_name, pretrained=False, use_pooler=False, use_decoder=False, use_classifier=False, seq_length=args.seq_length) net = BertForQA(bert) else: raise ValueError('unknown task: %s' % args.task) if args.model_parameters: net.load_parameters(args.model_parameters) else: