nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) berttoken = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) net = BertForQA(bert=bert) if pretrained_bert_parameters and not model_parameters: bert.load_parameters(pretrained_bert_parameters, ctx=ctx, ignore_extra=True) if not model_parameters: net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) else: net.load_parameters(model_parameters, ctx=ctx) net.hybridize(static_alloc=True) loss_function = BertForQALoss() loss_function.hybridize(static_alloc=True) def train(): """Training function.""" segment = 'train' if not args.test_mode else 'dev' log.info('Loading %s data...', segment) if version_2: train_data = SQuAD(segment, version='2.0') else: train_data = SQuAD(segment, version='1.1') log.info('Number of records in Train data:{}'.format(len(train_data)))
nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) BERT_DIM = {'bert_12_768_12': 768, 'bert_24_1024_16': 1024} net = BertForQA(bert=bert, \ n_rnn_layers = args.n_rnn_layers, apply_coattention=args.apply_coattention, bert_out_dim=BERT_DIM[args.bert_model], \ remove_special_token=args.remove_special_token, mask_output=args.mask_output) if model_parameters: # load complete BertForQA parameters net.load_parameters(model_parameters, ctx=ctx, cast_dtype=True) elif pretrained_bert_parameters: # only load BertModel parameters bert.load_parameters(pretrained_bert_parameters, ctx=ctx, ignore_extra=True, cast_dtype=True) net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) elif pretrained: # only load BertModel parameters net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) else: # no checkpoint is loaded net.initialize(init=mx.init.Normal(0.02), ctx=ctx) if args.apply_coattention: