def __init__(self, node_weight=1.0, edge_weight=1.0, ignore=0): super().__init__() self.loss_node = nn.CrossEntropyLoss(ignore_index=ignore) self.loss_edge = nn.CrossEntropyLoss(ignore_index=-1) self.node_weight = node_weight self.edge_weight = edge_weight self.ignore = ignore
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, next_sentence_label=None): sequence_output, pooled_output = self.nezha(input_ids, token_type_ids, attention_mask) prediction_scores, seq_relationship_score = self.cls( sequence_output, pooled_output) if masked_lm_labels is not None and next_sentence_label is not None: loss_fct = nn.CrossEntropyLoss(ignore_index=-1) masked_lm_loss = loss_fct( prediction_scores.reshape( (-1, self.nezha.config["vocab_size"])), masked_lm_labels.reshape((-1, ))) next_sentence_loss = loss_fct( seq_relationship_score.reshape((-1, 2)), next_sentence_label.reshape((-1, ))) total_loss = masked_lm_loss + next_sentence_loss return total_loss elif masked_lm_labels is not None: loss_fct = nn.CrossEntropyLoss(ignore_index=-1) masked_lm_loss = loss_fct( prediction_scores.reshape( (-1, self.nezha.config["vocab_size"])), masked_lm_labels.reshape((-1, ))) total_loss = masked_lm_loss return total_loss else: return prediction_scores, seq_relationship_score
def __init__(self, model_config, compound_encoder): super(GeoPredModel, self).__init__() self.compound_encoder = compound_encoder self.hidden_size = model_config['hidden_size'] self.dropout_rate = model_config['dropout_rate'] self.act = model_config['act'] self.pretrain_tasks = model_config['pretrain_tasks'] # context mask if 'Cm' in self.pretrain_tasks: self.Cm_vocab = model_config['Cm_vocab'] self.Cm_linear = nn.Linear(compound_encoder.embed_dim, self.Cm_vocab + 3) self.Cm_loss = nn.CrossEntropyLoss() # functinal group self.Fg_linear = nn.Linear(compound_encoder.embed_dim, model_config['Fg_size']) # 494 self.Fg_loss = nn.BCEWithLogitsLoss() # bond angle with regression if 'Bar' in self.pretrain_tasks: self.Bar_mlp = MLP(2, hidden_size=self.hidden_size, act=self.act, in_size=compound_encoder.embed_dim * 3, out_size=1, dropout_rate=self.dropout_rate) self.Bar_loss = nn.SmoothL1Loss() # bond length with regression if 'Blr' in self.pretrain_tasks: self.Blr_mlp = MLP(2, hidden_size=self.hidden_size, act=self.act, in_size=compound_encoder.embed_dim * 2, out_size=1, dropout_rate=self.dropout_rate) self.Blr_loss = nn.SmoothL1Loss() # atom distance with classification if 'Adc' in self.pretrain_tasks: self.Adc_vocab = model_config['Adc_vocab'] self.Adc_mlp = MLP(2, hidden_size=self.hidden_size, in_size=self.compound_encoder.embed_dim * 2, act=self.act, out_size=self.Adc_vocab + 3, dropout_rate=self.dropout_rate) self.Adc_loss = nn.CrossEntropyLoss() print('[GeoPredModel] pretrain_tasks:%s' % str(self.pretrain_tasks))
def train(): # enable dygraph mode paddle.disable_static() dist.init_parallel_env() # create network layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # print(core._get_device_properties(dist.ParallelEnv().device_id)) # create data loader # loader = paddle.io.DataLoader.from_generator(capacity=5, use_multiprocess=True) loader = paddle.io.DataLoader.from_generator(capacity=5) loader.set_batch_generator(random_batch_reader()) for epoch_id in range(EPOCH_NUM): for batch_id, (image, label) in enumerate(loader()): out = layer(image) loss = loss_fn(out, label) loss = dp_layer.scale_loss(loss) loss.backward() dp_layer.apply_collective_grads() adam.step() adam.clear_grad() print("Epoch {} batch {}: loss = {}".format( epoch_id, batch_id, np.mean(loss.numpy())))
def train(): # init env dist.init_parallel_env() # create network layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # create data loader dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) loader = paddle.io.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=1) # train for epoch_id in range(EPOCH_NUM): for batch_id, (image, label) in enumerate(loader()): out = layer(image) loss = loss_fn(out, label) loss.backward() adam.step() adam.clear_grad() if dist.get_rank() == 0: print("Epoch {} batch {}: loss = {}".format( epoch_id, batch_id, np.mean(loss.numpy())))
def finetune(args): paddle.set_device(args.device) if dist.get_world_size() > 1: dist.init_parallel_env() pos_file = os.path.join(args.data_dir, 'rt-polarity.pos') neg_file = os.path.join(args.data_dir, 'rt-polarity.neg') x_text, y = load_data_and_labels(pos_file, neg_file) x_train, x_test, y_train, y_test = train_test_split(x_text, y, test_size=0.1, random_state=args.seed) if not args.init_from_ckpt: raise ValueError('`init_from_ckpt` should be set.') model = ELMoBowTextClassification(args.init_from_ckpt, args.batch_size, args.sent_embedding_dim, args.dropout, args.num_classes) if dist.get_world_size() > 1: model = paddle.DataParallel(model) model.train() adam = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() vocab = load_vocab() train_dataset = SentencePolarityDatasetV1(x_train, y_train, vocab, args.max_seq_len) test_dataset = SentencePolarityDatasetV1(x_test, y_test, vocab, args.max_seq_len) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, return_list=True, shuffle=True, collate_fn=lambda batch: generate_batch(batch)) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, return_list=True, shuffle=False, collate_fn=lambda batch: generate_batch(batch)) for epoch in range(args.epochs): print('Epoch {}/{}'.format(epoch + 1, args.epochs)) for step, batch_data in enumerate(train_loader, start=1): ids, ids_reverse, label = batch_data output = model((ids, ids_reverse)) loss = criterion(output, label) loss.backward() adam.step() adam.clear_grad() if step % args.logging_step == 0: print('step {}, loss {}'.format(step, loss.numpy()[0])) acc = test(model, test_loader) print('\ntest acc {}\n'.format(acc))
def main(): # Initialization for the parallel enviroment paddle.set_device(args.device) set_seed(args) # Define the model and metric # In finetune task, bigbird performs better when setting dropout to zero. model = BigBirdForSequenceClassification.from_pretrained( args.model_name_or_path, attn_dropout=args.attn_dropout, hidden_dropout_prob=args.hidden_dropout_prob) criterion = nn.CrossEntropyLoss() metric = paddle.metric.Accuracy() # Define the tokenizer and dataloader tokenizer = BigBirdTokenizer.from_pretrained(args.model_name_or_path) config = getattr(model, BigBirdForSequenceClassification.base_model_prefix).config train_data_loader, test_data_loader = \ create_dataloader(args.batch_size, args.max_encoder_length, tokenizer, config) # Define the Adam optimizer optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=args.learning_rate, epsilon=1e-6) # Finetune the classification model do_train(model, criterion, metric, optimizer, train_data_loader, tokenizer) # Evaluate the finetune model do_evalute(model, criterion, metric, test_data_loader)
def main(): # Initialization for the parallel enviroment assert args.device in [ "cpu", "gpu", "xpu" ], "Invalid device! Available device should be cpu, gpu, or xpu." paddle.set_device(args.device) set_seed(args) # Define the model and metric model = BigBirdForSequenceClassification.from_pretrained( args.model_name_or_path) criterion = nn.CrossEntropyLoss() metric = paddle.metric.Accuracy() # Define the tokenizer and dataloader tokenizer = BigBirdTokenizer.from_pretrained(args.model_name_or_path) global config config = getattr(model, BigBirdForSequenceClassification.base_model_prefix).config train_data_loader, test_data_loader = \ create_dataloader(args.batch_size, args.max_encoder_length, tokenizer) # Define the Adam optimizer optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=args.learning_rate, epsilon=1e-6) # Finetune the classification model do_train(model, criterion, metric, optimizer, train_data_loader, tokenizer) # Evaluate the finetune model do_evalute(model, criterion, metric, test_data_loader)
def setUp(self): # enable dygraph mode place = paddle.CPUPlace() paddle.disable_static(place) # config seed paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) # create network self.layer = LinearNet() self.loss_fn = nn.CrossEntropyLoss() self.sgd = opt.SGD(learning_rate=0.001, parameters=self.layer.parameters()) # create data loader dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) self.loader = paddle.io.DataLoader( dataset, places=place, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=0) # train train(self.layer, self.loader, self.loss_fn, self.sgd) # save self.model_path = "linear.example.model" paddle.jit.save(self.layer, self.model_path)
def main(): # Initialization for the parallel enviroment paddle.set_device(args.device) set_seed(args) # Define the model and metric model = BigBirdForSequenceClassification.from_pretrained( args.model_name_or_path) criterion = nn.CrossEntropyLoss() metric = paddle.metric.Accuracy() # Define the tokenizer and dataloader tokenizer = BigBirdTokenizer.from_pretrained(args.model_name_or_path) global config config = BigBirdModel.pretrained_init_configuration[ args.model_name_or_path] train_data_loader, test_data_loader = \ create_dataloader(args.batch_size, args.max_encoder_length, tokenizer) # Define the Adam optimizer optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=args.learning_rate, epsilon=1e-6) # Finetune the classification model do_train(model, criterion, metric, optimizer, train_data_loader, test_data_loader) # Evaluate the finetune model do_evalute(model, criterion, metric, test_data_loader)
def get_paddle_model(model_path): def train(layer, loader, loss_fn, optimizer): for _ in range(1): for _, (image, label) in enumerate(loader()): out = layer(image) loss = loss_fn(out, label) loss.backward() optimizer.step() optimizer.clear_grad() paddle.disable_static() model_layer = _LinearNet() loss_func = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=model_layer.parameters()) dataset = _RandomDataset(64) data_loader = paddle.io.DataLoader(dataset, batch_size=16, shuffle=True, drop_last=True, num_workers=2) train(model_layer, data_loader, loss_func, adam) paddle.jit.save(layer=model_layer, path=os.path.join(model_path, 'model'), input_spec=[InputSpec(shape=[None, 784], dtype='float32')])
def loss(self, embeds): """ Computes the softmax loss according the section 2.1 of GE2E. :param embeds: the embeddings as a tensor of shape (speakers_per_batch, utterances_per_speaker, embedding_size) :return: the loss and the EER for this batch of embeddings. """ speakers_per_batch, utterances_per_speaker = embeds.shape[:2] # Loss sim_matrix, *_ = self.similarity_matrix(embeds) sim_matrix = sim_matrix.reshape( [speakers_per_batch * utterances_per_speaker, speakers_per_batch]) target = paddle.arange(0, speakers_per_batch, dtype="int64").unsqueeze(-1) target = paddle.expand(target, [speakers_per_batch, utterances_per_speaker]) target = paddle.reshape(target, [-1]) loss = nn.CrossEntropyLoss()(sim_matrix, target) # EER (not backpropagated) with paddle.no_grad(): ground_truth = target.numpy() inv_argmax = lambda i: np.eye( 1, speakers_per_batch, i, dtype=np.int)[0] labels = np.array([inv_argmax(i) for i in ground_truth]) preds = sim_matrix.numpy() # Snippet from https://yangcha.github.io/EER-ROC/ fpr, tpr, thresholds = roc_curve(labels.flatten(), preds.flatten()) eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.) return loss, eer
def __init__(self, opt): super(MotLoss, self).__init__() self.crit = paddle.nn.MSELoss() if opt.mse_loss else FocalLoss() self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \ RegLoss() if opt.reg_loss == 'sl1' else None self.crit_wh = paddle.nn.L1Loss(reduction='sum') if opt.dense_wh else \ NormRegL1Loss() if opt.norm_wh else \ RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg self.opt = opt self.emb_dim = opt.reid_dim self.nID = opt.nID # param_attr = paddle.ParamAttr(initializer=KaimingUniform()) # bound = 1 / math.sqrt(self.emb_dim) # bias_attr = paddle.ParamAttr(initializer=Uniform(-bound, bound)) # self.classifier = nn.Linear(self.emb_dim, self.nID, weight_attr=param_attr, bias_attr=bias_attr) self.classifier = nn.Linear(self.emb_dim, self.nID, bias_attr=True) if opt.id_loss == 'focal': # 一般用不到 # torch.nn.init.normal_(self.classifier.weight, std=0.01) prior_prob = 0.01 bias_value = -math.log((1 - prior_prob) / prior_prob) # torch.nn.init.constant_(self.classifier.bias, bias_value) weight_attr = paddle.framework.ParamAttr(initializer=nn.initializer.Normal(std=0.01)) bias_attr = paddle.framework.ParamAttr(initializer=nn.initializer.Constant(bias_value)) self.classifier = nn.Linear(self.emb_dim, self.nID, weight_attr=weight_attr, bias_attr=bias_attr) self.IDLoss = nn.CrossEntropyLoss(ignore_index=-1) self.emb_scale = math.sqrt(2) * math.log(self.nID - 1) # self.s_det = nn.Parameter(-1.85 * torch.ones(1)) # self.s_id = nn.Parameter(-1.05 * torch.ones(1)) self.s_det = paddle.create_parameter([1], dtype='float32', default_initializer = nn.initializer.Constant(value=-1.85)) self.s_id = paddle.create_parameter([1], dtype='float32', default_initializer = nn.initializer.Constant(value=-1.05))
def __init__(self, **kwargs): super().__init__() self.loss_func = nn.CrossEntropyLoss(weight=None, ignore_index=0, reduction='none', soft_label=True, axis=-1)
def __init__(self, model_config, compound_encoder): super(AttrmaskModel, self).__init__() self.compound_encoder = compound_encoder out_size = CompoundKit.get_atom_feature_size('atomic_num') + 3 self.linear = nn.Linear(compound_encoder.node_dim, out_size) self.criterion = nn.CrossEntropyLoss()
def __init__(self, vocab_size, gen_weight, disc_weight): super(ElectraPretrainingCriterion, self).__init__() self.vocab_size = vocab_size self.gen_weight = gen_weight self.disc_weight = disc_weight self.gen_loss_fct = nn.CrossEntropyLoss(reduction='none') self.disc_loss_fct = nn.BCEWithLogitsLoss(reduction='none')
def _softmax_cross_entropy_with_logits(logits, labels): # print("++++++++++++++++++++++++++++++++++++START SOFT_CROSS_LOSS++++++++++++++++++++++++++++++++++++++++++++++++") param = list(range(len(logits.shape))) transpose_param = [0] + [param[-1]] + param[1:-1] logits = logits.transpose(transpose_param) # [N, ..., C] -> [N, C, ...] # print("++++++++++++++++++++++++++++++++++++START SOFT_CROSS_LOSS++++++++++++++++++++++++++++++++++++++++++++++++") loss_ftor = nn.CrossEntropyLoss(reduction="none") loss = loss_ftor(logits, paddle.argmax(labels, axis=-1)) return loss
def __init__(self, config): super(GNN, self).__init__() log.info("model_type is %s" % self.__class__.__name__) self.config = config self.pretrain_tasks = config.pretrain_tasks.split(',') self.num_layers = config.num_layers self.drop_ratio = config.drop_ratio self.JK = config.JK self.block_num = config.block_num self.emb_dim = config.emb_dim self.num_tasks = config.num_tasks self.residual = config.residual self.graph_pooling = config.graph_pooling if self.num_layers < 2: raise ValueError("Number of GNN layers must be greater than 1.") ### GNN to generate node embeddings self.gnn_blocks = paddle.nn.LayerList() for i in range(self.config.block_num): self.gnn_blocks.append(getattr(CONV, self.config.gnn_type)(config)) hidden_size = self.emb_dim * self.block_num ### Pooling function to generate whole-graph embeddings if self.config.graph_pooling == "bisop": pass else: self.pool = MeanGlobalPool() if self.config.clf_layers == 3: log.info("clf_layers is 3") self.graph_pred_linear = nn.Sequential( L.Linear(hidden_size, hidden_size // 2), L.batch_norm_1d(hidden_size // 2), nn.Swish(), L.Linear(hidden_size // 2, hidden_size // 4), L.batch_norm_1d(hidden_size // 4), nn.Swish(), L.Linear(hidden_size // 4, self.num_tasks)) elif self.config.clf_layers == 2: log.info("clf_layers is 2") self.graph_pred_linear = nn.Sequential( L.Linear(hidden_size, hidden_size // 2), L.batch_norm_1d(hidden_size // 2), nn.Swish(), L.Linear(hidden_size // 2, self.num_tasks)) else: self.graph_pred_linear = L.Linear(hidden_size, self.num_tasks) if 'Con' in self.pretrain_tasks: self.context_loss = nn.CrossEntropyLoss() self.contextmlp = nn.Sequential( L.Linear(self.emb_dim, self.emb_dim // 2), L.batch_norm_1d(self.emb_dim // 2), nn.Swish(), L.Linear(self.emb_dim // 2, 5000)) if 'Ba' in self.pretrain_tasks: self.pretrain_bond_angle = PretrainBondAngle(config) if 'Bl' in self.pretrain_tasks: self.pretrain_bond_length = PretrainBondLength(config)
def train_single_epoch(model: MemN2N, lr, data, config): """ train one epoch Args: model (MemN2N): model to be trained lr (float): the learning rate of this epoch data: training data config: configs Returns: float: average loss """ model.train() N = int(math.ceil(len(data) / config.batch_size)) # total train N batchs clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=config.max_grad_norm) optimizer = paddle.optimizer.SGD(learning_rate=lr, parameters=model.parameters(), grad_clip=clip) lossfn = nn.CrossEntropyLoss(reduction='sum') total_loss = 0 if config.show: ProgressBar = getattr(import_module('utils'), 'ProgressBar') bar = ProgressBar('Train', max=N) for batch in range(N): if config.show: bar.next() optimizer.clear_grad() context = np.ndarray([config.batch_size, config.mem_size], dtype=np.int64) target = np.ndarray([config.batch_size], dtype=np.int64) for i in range(config.batch_size): m = random.randrange(config.mem_size, len(data)) target[i] = data[m] context[i, :] = data[m - config.mem_size:m] batch_data = paddle.to_tensor(context) batch_label = paddle.to_tensor(target) preict = model(batch_data) loss = lossfn(preict, batch_label) loss.backward() optimizer.step() total_loss += loss if config.show: bar.finish() return total_loss / N / config.batch_size
def __init__(self, structure_weight, loc_weight, use_giou=False, giou_weight=1.0, **kwargs): super(TableAttentionLoss, self).__init__() self.loss_func = nn.CrossEntropyLoss(weight=None, reduction='none') self.structure_weight = structure_weight self.loc_weight = loc_weight self.use_giou = use_giou self.giou_weight = giou_weight
def eval(model: MemN2N, data, config, mode="Test"): """ evaluate the model performance Args: model (MemN2N): the model to be evaluate data: evaluation data config: model and eval configs mode: Valid or Test Returns: average loss """ model.eval() lossfn = nn.CrossEntropyLoss(reduction='sum') N = int(math.ceil(len(data) / config.batch_size)) total_loss = 0 context = np.ndarray([config.batch_size, config.mem_size], dtype=np.int64) target = np.ndarray([config.batch_size], dtype=np.int64) if config.show: ProgressBar = getattr(import_module('utils'), 'ProgressBar') bar = ProgressBar(mode, max=N - 1) m = config.mem_size for batch in range(N): if config.show: bar.next() for i in range(config.batch_size): if m >= len(data): break target[i] = data[m] context[i, :] = data[m - config.mem_size:m] m += 1 if m >= len(data): break batch_data = paddle.to_tensor(context) batch_label = paddle.to_tensor(target) preict = model(batch_data) loss = lossfn(preict, batch_label) total_loss += loss if config.show: bar.finish() return total_loss / N / config.batch_size
def __init__(self, with_avg_pool=False, in_channels=2048, num_classes=1000): super(ClasHead, self).__init__() self.with_avg_pool = with_avg_pool self.in_channels = in_channels self.num_classes = num_classes self.criterion = nn.CrossEntropyLoss() if self.with_avg_pool: self.avg_pool = nn.AdaptiveAvgPool2D((1, 1)) self.fc_cls = nn.Linear(in_channels, num_classes) reset_parameters(self.fc_cls)
def build_and_train_model(self): # create network layer = LinearNet() loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) # create data loader # TODO: using new DataLoader cause unknown Timeout on windows, replace it loader = random_batch_reader() # train train(layer, loader, loss_fn, adam) return layer, adam
def __init__(self, weight=None, size_average=True, ignore_index=-100, sequence_normalize=False, sample_normalize=True, **kwargs): super(AsterLoss, self).__init__() self.weight = weight self.size_average = size_average self.ignore_index = ignore_index self.sequence_normalize = sequence_normalize self.sample_normalize = sample_normalize self.loss_sem = CosineEmbeddingLoss() self.is_cosin_loss = True self.loss_func_rec = nn.CrossEntropyLoss(weight=None, reduction='none')
def forward( self, input_ids=None, bbox=None, image=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, labels=None, ): outputs = self.layoutxlm( input_ids=input_ids, bbox=bbox, image=image, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, ) seq_length = input_ids.shape[1] # sequence out and image out sequence_output, image_output = outputs[0][:, :seq_length], outputs[ 0][:, seq_length:] sequence_output = self.dropout(sequence_output) logits = self.classifier(sequence_output) outputs = logits, if labels is not None: loss_fct = nn.CrossEntropyLoss() if attention_mask is not None: active_loss = attention_mask.reshape([-1, ]) == 1 active_logits = logits.reshape( [-1, self.num_classes])[active_loss] active_labels = labels.reshape([-1, ])[active_loss] loss = loss_fct(active_logits, active_labels) else: loss = loss_fct( logits.reshape([-1, self.num_classes]), labels.reshape([-1, ])) outputs = (loss, ) + outputs return outputs
def __init__(self, vocab, hidden_size, latent_size, depthT, depthG): super(JTNNVAE, self).__init__() self.vocab = vocab self.hidden_size = hidden_size self.latent_size = latent_size = int(latent_size / 2) self.jtnn = JTNNEncoder(hidden_size, depthT, nn.Embedding(vocab.size(), hidden_size)) self.decoder = JTNNDecoder(vocab, hidden_size, latent_size, nn.Embedding(vocab.size(), hidden_size)) self.jtmpn = JTMPN(hidden_size, depthG) self.mpn = MPN(hidden_size, depthG) self.A_assm = nn.Linear(latent_size, hidden_size, bias_attr=False) self.assm_loss = nn.CrossEntropyLoss(reduction='sum') self.T_mean = nn.Linear(hidden_size, latent_size) self.T_var = nn.Linear(hidden_size, latent_size) self.G_mean = nn.Linear(hidden_size, latent_size) self.G_var = nn.Linear(hidden_size, latent_size)
def __init__(self, balance_loss=True, main_loss_type='DiceLoss', negative_ratio=3, return_origin=False, eps=1e-6, **kwargs): """ The BalanceLoss for Differentiable Binarization text detection args: balance_loss (bool): whether balance loss or not, default is True main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss', 'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'. negative_ratio (int|float): float, default is 3. return_origin (bool): whether return unbalanced loss or not, default is False. eps (float): default is 1e-6. """ super(BalanceLoss, self).__init__() self.balance_loss = balance_loss self.main_loss_type = main_loss_type self.negative_ratio = negative_ratio self.return_origin = return_origin self.eps = eps if self.main_loss_type == "CrossEntropy": self.loss = nn.CrossEntropyLoss() elif self.main_loss_type == "Euclidean": self.loss = nn.MSELoss() elif self.main_loss_type == "DiceLoss": self.loss = DiceLoss(self.eps) elif self.main_loss_type == "BCELoss": self.loss = BCELoss(reduction='none') elif self.main_loss_type == "MaskL1Loss": self.loss = MaskL1Loss(self.eps) else: loss_type = [ 'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss' ] raise Exception( "main_loss_type in BalanceLoss() can only be one of {}".format( loss_type))
def __init__(self, vocab, hidden_size, latent_size, embedding): super(JTNNDecoder, self).__init__() self.hidden_size = hidden_size self.vocab_size = vocab.size() self.vocab = vocab self.embedding = embedding latent_size = int(latent_size) self.W_z = nn.Linear(2 * hidden_size, hidden_size) self.U_r = nn.Linear(hidden_size, hidden_size, bias_attr=False) self.W_r = nn.Linear(hidden_size, hidden_size) self.W_h = nn.Linear(2 * hidden_size, hidden_size) self.W = nn.Linear(hidden_size + latent_size, hidden_size) self.U = nn.Linear(hidden_size + latent_size, hidden_size) self.U_i = nn.Linear(2 * hidden_size, hidden_size) self.W_o = nn.Linear(hidden_size, self.vocab_size) self.U_o = nn.Linear(hidden_size, 1) self.pred_loss = nn.CrossEntropyLoss(reduction='sum') self.stop_loss = nn.BCEWithLogitsLoss(reduction='sum')
def train_ch6(net, train_iter, test_iter, batch_size, optimi, num_epochs): loss = nn.CrossEntropyLoss() batch_count = 0 for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time() for idx, (X, y) in enumerate(train_iter): y_hat = net(X) l = loss(y_hat, y) optimi.clear_grad() l.backward() optimi.step() train_l_sum += l.numpy()[0] train_acc_sum += (y_hat.argmax( axis=1) == y.flatten()).astype('float32').sum().numpy()[0] n += y.shape[0] batch_count += 1 test_acc = evaluate_accuracy(test_iter, net) print( 'epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec' % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
def __init__(self, num_classes=10, **kwargs): self.num_classes = num_classes decs = [ LayerDesc(nn.Conv2D, 1, 64, kernel_size=11, stride=4, padding=5), LayerDesc(nn.ReLU), LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2), LayerDesc(nn.Conv2D, 64, 192, kernel_size=5, padding=2), F.relu, LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2), LayerDesc(nn.Conv2D, 192, 384, kernel_size=3, padding=1), F.relu, LayerDesc(nn.Conv2D, 384, 256, kernel_size=3, padding=1), F.relu, LayerDesc(nn.Conv2D, 256, 256, kernel_size=3, padding=1), F.relu, LayerDesc(nn.MaxPool2D, kernel_size=2, stride=2), LayerDesc(ReshapeHelp, shape=[-1, 256]), LayerDesc(nn.Linear, 256, self.num_classes), # classifier ] super(AlexNetPipeDesc, self).__init__(layers=decs, loss_fn=nn.CrossEntropyLoss(), **kwargs)