and not pretrained_bert_parameters, ctx=ctx, use_pooler=False, use_decoder=False, use_classifier=False) batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) berttoken = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) net = BertForQA(bert=bert) if pretrained_bert_parameters and not model_parameters: bert.load_parameters(pretrained_bert_parameters, ctx=ctx, ignore_extra=True) if not model_parameters: net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) else: net.load_parameters(model_parameters, ctx=ctx) net.hybridize(static_alloc=True) loss_function = BertForQALoss() loss_function.hybridize(static_alloc=True) def train():
if args.sentencepiece: tokenizer = nlp.data.BERTSPTokenizer(args.sentencepiece, vocab, lower=lower) else: tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) net = BertForQA(bert=bert) if model_parameters: # load complete BertForQA parameters nlp.utils.load_parameters(net, model_parameters, ctx=ctx, cast_dtype=True) elif pretrained_bert_parameters: # only load BertModel parameters nlp.utils.load_parameters(bert, pretrained_bert_parameters, ctx=ctx, ignore_extra=True, cast_dtype=True) net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) elif pretrained: # only load BertModel parameters net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) else:
dataset_name=args.dataset_name, pretrained=False, use_pooler=True, use_decoder=False, use_classifier=False, seq_length=args.seq_length) net = BERTRegression(bert, dropout=args.dropout) elif args.task == 'question_answering': bert, _ = get_model(name=args.model_name, dataset_name=args.dataset_name, pretrained=False, use_pooler=False, use_decoder=False, use_classifier=False, seq_length=args.seq_length) net = BertForQA(bert) else: raise ValueError('unknown task: %s' % args.task) if args.model_parameters: net.load_parameters(args.model_parameters) else: net.initialize() warnings.warn( '--model_parameters is not provided. The parameter checkpoint (.params) ' 'file will be created based on default parameter initialization.') net.hybridize(static_alloc=True, static_shape=True) ############################################################################### # Prepare dummy input data #
lower=lower) else: tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) BERT_DIM = {'bert_12_768_12': 768, 'bert_24_1024_16': 1024} net = BertForQA(bert=bert, \ n_rnn_layers = args.n_rnn_layers, apply_coattention=args.apply_coattention, bert_out_dim=BERT_DIM[args.bert_model], \ remove_special_token=args.remove_special_token, mask_output=args.mask_output) if model_parameters: # load complete BertForQA parameters net.load_parameters(model_parameters, ctx=ctx, cast_dtype=True) elif pretrained_bert_parameters: # only load BertModel parameters bert.load_parameters(pretrained_bert_parameters, ctx=ctx, ignore_extra=True, cast_dtype=True) net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) elif pretrained: # only load BertModel parameters net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx)
tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) BERT_DIM = {'bert_12_768_12': 768, 'bert_24_1024_16': 1024} net = BertForQA(bert=bert, \ add_query=args.add_query, \ apply_coattention=args.apply_coattention, bert_out_dim=BERT_DIM[args.bert_model],\ apply_self_attention=args.apply_self_attention, apply_transformer=args.apply_transformer, qanet_style_out=args.qanet_style_out, bidaf_style_out=args.bidaf_style_out, n_rnn_layers=args.n_rnn_layers, remove_special_token=args.remove_special_token) # print(net) # exit(0) if args.apply_coattention and (args.qanet_style_out or args.bidaf_style_out): additional_params = None else: additional_params = net.span_classifier.collect_params() if model_parameters: # load complete BertForQA parameters net.load_parameters(model_parameters, ctx=ctx, cast_dtype=True)
vocab, lower=lower) else: tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) BERT_DIM = {'bert_12_768_12': 768, 'bert_24_1024_16': 1024} net = BertForQA(bert=bert, \ add_query=args.add_query, \ apply_coattention=args.apply_coattention, bert_out_dim=BERT_DIM[args.bert_model],\ apply_self_attention=args.apply_self_attention) if model_parameters: # load complete BertForQA parameters net.load_parameters(model_parameters, ctx=ctx, cast_dtype=True) elif pretrained_bert_parameters: # only load BertModel parameters bert.load_parameters(pretrained_bert_parameters, ctx=ctx, ignore_extra=True, cast_dtype=True) net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) elif pretrained: # only load BertModel parameters net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) else:
if args.sentencepiece: tokenizer = nlp.data.BERTSPTokenizer(args.sentencepiece, vocab, lower=lower) else: tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=lower) batchify_fn = nlp.data.batchify.Tuple( nlp.data.batchify.Stack(), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Pad(axis=0, pad_val=vocab[vocab.padding_token]), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32'), nlp.data.batchify.Stack('float32')) net = BertForQA(bert=bert) if model_parameters: # load complete BertForQA parameters net.load_parameters(model_parameters, ctx=ctx, cast_dtype=True) elif pretrained_bert_parameters: # only load BertModel parameters bert.load_parameters(pretrained_bert_parameters, ctx=ctx, ignore_extra=True, cast_dtype=True) net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) elif pretrained: # only load BertModel parameters net.span_classifier.initialize(init=mx.init.Normal(0.02), ctx=ctx) else: # no checkpoint is loaded