def __init__(self, weight=None, batch_axis=0, customize_loss=False, **kwargs): # pylint: disable=unused-argument super(BertForQALoss, self).__init__( weight=None, batch_axis=0, **kwargs) self.customize_loss = customize_loss if self.customize_loss: self.loss = loss.SoftmaxCELoss(sparse_label=False) else: self.loss = loss.SoftmaxCELoss()
def __init__(self, emb_size, vocab_size, hidden_size=256, num_layers=2, dropout=.2, \ bidir=True, latent_size=64, **kwargs): super(VAE_LSTM, self).__init__(**kwargs) with self.name_scope(): self.latent_size = latent_size # i have confirmed the calculation of kl divergence is right self.kl_div = lambda mean, logv: 0.5 * nd.sum( 1 + logv - mean.square() - logv.exp()) # self.kl_div = lambda mu, sg: (-0.5 * nd.sum(sg - mu*mu - nd.exp(sg) + 1, 1)).mean().squeeze() self.ce_loss = loss.SoftmaxCELoss() self.encoder = VAEEncoder(vocab_size=vocab_size, emb_size=emb_size, hidden_size=hidden_size, \ num_layers=num_layers, dropout=dropout, bidir=bidir) self.decoder = VAEDecoder(vocab_size=vocab_size, emb_size=emb_size, hidden_size=hidden_size, \ num_layers=num_layers, dropout=dropout, bidir=bidir)
def __init__(self, xlnet_base, start_top_n=None, end_top_n=None, is_eval=False, units=768, prefix=None, params=None): super(XLNetForQA, self).__init__(prefix=prefix, params=params) with self.name_scope(): self.xlnet = xlnet_base self.start_top_n = start_top_n self.end_top_n = end_top_n self.loss = loss.SoftmaxCELoss() self.start_logits = PoolerStartLogits() self.end_logits = PoolerEndLogits(units=units, is_eval=is_eval) self.eval = is_eval self.answer_class = XLNetPoolerAnswerClass(units=units) self.cls_loss = loss.SigmoidBinaryCrossEntropyLoss()
def main(args): # init some setting # config logging log_path = os.path.join(args.log_root, '{}.log'.format(args.model_name)) logger = config_logger(log_path) gpu_idx = args.gpu if not gpu_idx: ctx = mx.cpu() else: ctx = mx.gpu(gpu_idx - 1) logger.info("Using ctx: {}".format(ctx)) # Loading vocab and model ch_bert, ch_vocab = gluonnlp.model.get_model( args.bert_model, dataset_name=args.ch_bert_dataset, pretrained=True, ctx=ctx, use_pooler=False, use_decoder=False, use_classifier=False) model = BertClass(bert=ch_bert, max_seq_len=args.max_seq_len, ctx=ctx) logger.info("Model Creating Completed.") # init or load params for model if args.istrain: model.output_dense.initialize(init.Xavier(), ctx) else: model.load_parameters(args.model_params_path, ctx=ctx) logger.info("Parameters Initing and Loading Completed") model.hybridize() if args.istrain: # Loading dataloader assiant = DatasetAssiantTransformer(ch_vocab=ch_vocab, max_seq_len=args.max_seq_len) dataset = ClassDataset(args.train_file_path) train_dataset, dev_dataset = train_valid_split(dataset, valid_ratio=0.1) train_dataiter = ClassDataLoader(train_dataset, batch_size=args.batch_size, assiant=assiant, shuffle=True).dataiter dev_dataiter = ClassDataLoader(dev_dataset, batch_size=args.batch_size, assiant=assiant, shuffle=True).dataiter logger.info("Data Loading Completed") else: assiant = DatasetAssiantTransformer(ch_vocab=ch_vocab, max_seq_len=args.max_seq_len, istrain=args.istrain) test_dataset = ClassTestDataset(args.test_file_path) test_dataiter = ClassDataLoader(test_dataset, batch_size=args.batch_size, assiant=assiant, shuffle=True).dataiter # build trainer finetune_trainer = gluon.Trainer(ch_bert.collect_params(), args.optimizer, {"learning_rate": args.finetune_lr}) trainer = gluon.Trainer(model.collect_params("dense*"), args.optimizer, {"learning_rate": args.train_lr}) loss_func = gloss.SoftmaxCELoss() if args.istrain: logger.info("## Trainning Start ##") train_and_valid(ch_bert=ch_bert, model=model, ch_vocab=ch_vocab, train_dataiter=train_dataiter, dev_dataiter=dev_dataiter, trainer=trainer, finetune_trainer=finetune_trainer, epochs=args.epochs, loss_func=loss_func, ctx=ctx, lr=args.train_lr, batch_size=args.batch_size, params_save_step=args.params_save_step, params_save_path_root=args.params_save_path_root, eval_step=args.eval_step, log_step=args.log_step, check_step=args.check_step, logger=logger, num_train_examples=len(train_dataset), warmup_ratio=args.warmup_ratio) else: predict(ch_bert=ch_bert, model=model, ch_vocab=ch_vocab, test_dataiter=test_dataiter, logger=logger, ctx=ctx)
def __init__(self): super(MyLoss3, self).__init__() self.loss1 = loss.SoftmaxCELoss() self.loss2 = loss.SigmoidBCELoss()
def __init__(self): super(MyLoss2, self).__init__() self.loss = loss.SoftmaxCELoss()
def __init__(self, weight=None, batch_axis=0, **kwargs): # pylint: disable=unused-argument super(BertForQALoss, self).__init__(weight=None, batch_axis=0, **kwargs) self.loss = loss.SoftmaxCELoss()
dataset = gdata.ArrayDataset(features, labels) # 随机读取小批量 train_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) test_iter = gdata.DataLoader(dataset, batch_size, shuffle=True) for X, y in train_iter: print(X, y) break # sigmoid relu net = nn.Sequential() # net.add(nn.Dense(5, activation="sigmoid"), nn.Dropout(0.1),nn.Dense(3)) net.add(nn.Dense(5, activation="relu"), nn.Dense(3)) net.initialize(init.Normal(sigma=0.5)) loss = gloss.SoftmaxCELoss() # 平方损失又称L2范数损失 trainer = gluon.Trainer(net.collect_params(), "sgd", { "learning_rate": 0.015, 'wd': 1 }) def evaluate_accuracy(data_iter, net): acc_sum, n = 0.0, 0 for X, y in data_iter: y = y.astype("float32") acc_sum += (net(X).argmax(axis=1) == y).sum().asscalar() n += y.size return acc_sum / n
import d2lzh as d2l from mxnet import gluon, init from mxnet.gluon import loss as gloss, nn bathsize = 256 trainer_iter, test_iter = d2l.load_data_fashion_mnist(bathsize) net = nn.Sequential() net.add(nn.Dense(10)) net.initialize(init.Normal(sigma=0.01)) loss = gloss.SoftmaxCELoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) num = 5 d2l.train_ch3(net, trainer_iter, test_iter, loss, num, bathsize, None, None, trainer) for x, y in test_iter: break truelabes = d2l.get_fashion_mnist_labels(y.asnumpy()) falselabes = d2l.get_fashion_mnist_labels(net(x).argmax(axis=1).asnumpy()) title = [true + '\n' + pred for true, pred in zip(truelabes, falselabes)] d2l.show_fashion_mnist(x[0:9], title[0:9])