def __init__(self, bert: BERTLM, optimizer: Optimizer, with_cuda: bool = True, log_freq: int = 10): assert paddle.is_compiled_with_cuda, 'the version of padddle is not compiled with the cuda' assert with_cuda is True, 'warning: do you want to train bert using cpu? are you kidding me?' self.model = bert self.opt = optimizer self.criterion_sentence = nn.NLLLoss() self.criterion_tokens = nn.NLLLoss(ignore_index=0) self.log_freq = log_freq list_n_elemnets = [] for p in self.model.parameters(): n = 1 for i in p.shape: n *= i list_n_elemnets.append(n) print('Total Parameters:', sum(list_n_elemnets))
def __init__(self, feature_dim: int, n_classes: int, eps: float = 1e-5, margin: float = 0.3, scale: float = 30.0): super(AMSoftmaxLoss, self).__init__() self.w = paddle.create_parameter((feature_dim, n_classes), 'float32') self.eps = eps self.scale = scale self.margin = margin self.nll_loss = nn.NLLLoss() self.n_classes = n_classes
def __init__(self, margin=0.0, scale=1.0, feature_dim=256, n_classes=1000, easy_margin=False): super(AdditiveAngularMargin, self).__init__(margin, scale) self.easy_margin = easy_margin self.w = paddle.create_parameter((feature_dim, n_classes), 'float32') self.cos_m = math.cos(self.margin) self.sin_m = math.sin(self.margin) self.th = math.cos(math.pi - self.margin) self.mm = math.sin(math.pi - self.margin) * self.margin self.nll_loss = nn.NLLLoss() self.n_classes = n_classes
def loss(self, logit_dict, label_dict, loss_func_dict=None): if loss_func_dict is None: loss_func_dict = defaultdict(list) loss_func_dict['glance'].append(nn.NLLLoss()) loss_func_dict['focus'].append(MRSD()) loss_func_dict['cm'].append(MRSD()) loss_func_dict['err'].append(paddleseg.models.MSELoss()) loss_func_dict['refine'].append(paddleseg.models.L1Loss()) loss = {} # glance loss computation # get glance label glance_label = F.interpolate(label_dict['trimap'], logit_dict['glance'].shape[2:], mode='nearest', align_corners=False) glance_label_trans = (glance_label == 128).astype('int64') glance_label_bg = (glance_label == 0).astype('int64') glance_label = glance_label_trans + glance_label_bg * 2 loss_glance = loss_func_dict['glance'][0]( paddle.log(logit_dict['glance'] + 1e-6), glance_label.squeeze(1)) loss['glance'] = loss_glance # focus loss computation focus_label = F.interpolate(label_dict['alpha'], logit_dict['focus'].shape[2:], mode='bilinear', align_corners=False) loss_focus = loss_func_dict['focus'][0](logit_dict['focus'], focus_label, glance_label_trans) loss['focus'] = loss_focus # collaborative matting loss loss_cm_func = loss_func_dict['cm'] # fusion_sigmoid loss loss_cm = loss_cm_func[0](logit_dict['fusion'], focus_label) loss['cm'] = loss_cm # error loss err = F.interpolate(logit_dict['error'], label_dict['alpha'].shape[2:], mode='bilinear', align_corners=False) err_label = (F.interpolate(logit_dict['fusion'], label_dict['alpha'].shape[2:], mode='bilinear', align_corners=False) - label_dict['alpha']).abs() loss_err = loss_func_dict['err'][0](err, err_label) loss['err'] = loss_err loss_all = 0.25 * loss_glance + 0.25 * loss_focus + 0.25 * loss_cm + loss_err # refine loss if self.if_refine: loss_refine = loss_func_dict['refine'][0](logit_dict['refine'], label_dict['alpha']) loss['refine'] = loss_refine loss_all = loss_all + loss_refine loss['all'] = loss_all return loss
def __init__(self, s=20.0, eps=1e-8): super(ProtoTypical, self).__init__() self.nll_loss = nn.NLLLoss() self.eps = eps self.s = s
def forward(self, logit, label, semantic_weights=None): temp = F.log_softmax(logit, axis=1) loss_func = nn.NLLLoss(ignore_index=self.ignore_index, weight=paddle.to_tensor(self.weights)) loss = loss_func(temp, label) return loss
def main(args): if args.seed is not None: paddle.fluid.Program.random_seed = args.seed np.random.seed(args.seed) if args.gpu < 0: device = "cpu" else: device = "cuda:{args.gpu}" # Load dataset # data = load_data(device, args) # g, labels, num_classes, train_nid, val_nid, test_nid = data labels = np.load("./data/lables.npy") num_classes = np.load("./data/num_classes.npy") train_nid = np.load("./data/train_nid.npy") val_nid = np.load("./data/val_nid.npy") test_nid = np.load("./data/test_nid.npy") evaluator = get_evaluator(args.dataset) # Preprocess neighbor-averaged features over sampled relation subgraphs rel_subsets = [] with paddle.no_grad(): feats = [] for i in range(args.R + 1): #数据集请自行在OGB官网下载,并按照官网教程生产训练集,或者在AiStudio上查询data88697 feature = np.load(f'../data/data88697/feat{i}.npy') feats.append(paddle.to_tensor(feature)) # feats = preprocess_features(g, rel_subsets, args, device) print("Done preprocessing") # labels = labels.to(device) # Release the graph since we are not going to use it later g = None # Set up logging logging.basicConfig(format='[%(levelname)s] %(message)s', level=logging.INFO) logging.info(str(args)) _, num_feats, in_feats = feats[0].shape logging.info("new input size: {} {}".format(num_feats, in_feats)) # Create model num_hops = args.R + 1 # include self feature hop 0 model = nn.Sequential( WeightedAggregator(num_feats, in_feats, num_hops), SIGN(in_feats, args.num_hidden, num_classes, num_hops, args.ff_layer, args.dropout, args.input_dropout)) if len(labels.shape) == 1: # single label multi-class loss_fcn = nn.NLLLoss() else: # multi-label multi-class loss_fcn = nn.KLDivLoss(reduction='batchmean') print('!' * 100) optimizer = paddle.optimizer.Adam(parameters=model.parameters(), weight_decay=args.weight_decay) # optimizer = paddle.optimizer.Adam(parameters=model.parameters()) # Start training best_epoch = 0 best_val = 0 f = open('log.txt', 'w+') for epoch in range(1, args.num_epochs + 1): start = time.time() print(epoch) train(model, feats, labels, train_nid, loss_fcn, optimizer, args.batch_size) if epoch % args.eval_every == 0: with paddle.no_grad(): train_res, val_res, test_res = test(model, feats, labels, train_nid, val_nid, test_nid, evaluator, args.eval_batch_size) end = time.time() val_acc = val_res[0] log = "Epoch {}, Times(s): {:.4f}".format(epoch, end - start) log += ", Accuracy: Train {:.4f}, Val {:.4f}".format( train_res[0], val_res[0]) log += f", best_acc:{best_val}" logging.info(log) print(log, file=f, flush=True) if val_acc > best_val: best_val = val_acc best_epoch = epoch f.close() logging.info("Best Epoch {}, Val {:.4f}".format(best_epoch, best_val))