def mm_backwardcorrect_sparse_embed(sparseX: FloatTensor, denseY: paddle.fluid.dygraph.core.VarBase): update_inds = sparseX.indices[0] updates = paddle.nn.functional.embedding( sparseX.indices[1], denseY, sparse=False) * sparseX.values.view(-1, 1) ret_Mat2 = paddle.scatter_nd(paddle.reshape(update_inds, (-1, 1)), updates, (sparseX.shape[0], denseY.shape[1])) return paddorch.convertTensor(ret_Mat2)
def linear(input, weight, bias=None): if input.shape[-1]!=weight.shape[0]: weight=paddle.transpose(weight,[1,0]) layer_obj=fluid.dygraph.Linear(input.shape[-1],weight.shape[1]) fluid.layers.assign(weight,layer_obj.weight) if bias is not None: fluid.layers.assign(bias, layer_obj.bias) return paddorch.convertTensor(layer_obj(input.astype("float32")))
def _add_undirected_graph_positional_embedding(g, hidden_size, retry=10): # We use eigenvectors of normalized graph laplacian as vertex features. # It could be viewed as a generalization of positional embedding in the # attention is all you need paper. # Recall that the eignvectors of normalized laplacian of a line graph are cos/sin functions. # See section 2.4 of http://www.cs.yale.edu/homes/spielman/561/2009/lect02-09.pdf n = g.number_of_nodes() adj = g.adjacency_matrix_scipy(transpose=False, return_edge_ids=False).astype(float) norm = sparse.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1)**-0.5, dtype=float) laplacian = norm * adj * norm k = min(n - 2, hidden_size) x = eigen_decomposision(n, k, laplacian, hidden_size, retry) g.ndata["pos_undirected"] = torch.convertTensor(x) return g
def from_dlpack(dlpack): tensor_from_dlpack = fluid.core.from_dlpack(dlpack) place = tensor_from_dlpack._place() if True: # "win" in platform: # CPU env if "int32" in str(tensor_from_dlpack): return paddorch.convertTensor( paddle.to_tensor(np.array(tensor_from_dlpack), dtype="int32")) else: return paddorch.Tensor( paddle.to_tensor(np.array(tensor_from_dlpack))) else: with paddle.fluid.dygraph.guard(place=place): tensor_from_dlpack.__class__ = paddle.fluid.LoDTensor ret = paddle.Tensor(tensor_from_dlpack) if "int32" in str(tensor_from_dlpack): ret = paddle.to_tensor(ret, dtype="int32") tensor_from_dlpack.__class__ = paddle.fluid.core_avx.Tensor return ret
def test_finetune(epoch, valid_loader, model, output_layer, criterion, sw, opt): n_batch = len(valid_loader) model.eval() output_layer.eval() epoch_loss_meter = AverageMeter() epoch_f1_meter = AverageMeter() for idx, batch in enumerate(valid_loader): graph_q, y = batch bsz = graph_q.batch_size # ===================forward===================== with torch.no_grad(): feat_q = model(graph_q) assert feat_q.shape == (graph_q.batch_size, opt.hidden_size) out = output_layer(feat_q) loss = torch.convertTensor(criterion(out, y)) preds = out.argmax(dim=1) f1 = f1_score(y.cpu().numpy(), preds.cpu().numpy(), average="micro") # ===================meters===================== epoch_loss_meter.update(loss.item(), bsz) epoch_f1_meter.update(f1, bsz) global_step = (epoch + 1) * n_batch sw.add_scalar("ft_loss/valid", epoch_loss_meter.avg, global_step) sw.add_scalar("ft_f1/valid", epoch_f1_meter.avg, global_step) print(opt.model_folder) print( f"Epoch {epoch}, loss {epoch_loss_meter.avg:.3f}, f1 {epoch_f1_meter.avg:.3f}" ) return epoch_loss_meter.avg, epoch_f1_meter.avg
def normalize(input, p=2, dim=1, eps=1e-12, out=None): return torch.convertTensor( input/paddle.norm(input,p,axis=dim,keepdim=True))
import paddorch arr = paddorch.convertTensor(1.0 / paddorch.arange(1, 30)) print(arr) arr[arr > 0.1] = 0.0 print(arr) arr[paddorch.isfinite(arr)] = 1.0 print(arr) arr[paddorch.isinf(arr)] = 2.0 print(arr) arr = paddorch.convertTensor(1.0 / paddorch.arange(1, 31)).view(-1, 5, 2) print("before", arr) arr[:, 2, :] = 999 print("after", arr)
def train_finetune( epoch, train_loader, model, output_layer, criterion, optimizer, output_layer_optimizer, sw, opt, ): """ one epoch training for moco """ n_batch = len(train_loader) model.train() output_layer.train() batch_time = AverageMeter() data_time = AverageMeter() loss_meter = AverageMeter() f1_meter = AverageMeter() epoch_loss_meter = AverageMeter() epoch_f1_meter = AverageMeter() prob_meter = AverageMeter() graph_size = AverageMeter() max_num_nodes = 0 max_num_edges = 0 end = time.time() for idx, batch in enumerate(train_loader): data_time.update(time.time() - end) graph_q, y = batch graph_q.to(torch.device(opt.gpu)) y = y.to(torch.device(opt.gpu)) bsz = graph_q.batch_size # ===================forward===================== feat_q = model(graph_q) assert feat_q.shape == (graph_q.batch_size, opt.hidden_size) out = output_layer(feat_q) loss = torch.convertTensor(criterion(out, y)) # ===================backward===================== optimizer.zero_grad() output_layer_optimizer.zero_grad() loss.backward() # torch.nn.utils.clip_grad_value_(model.parameters(), 1) # torch.nn.utils.clip_grad_value_(output_layer.parameters(), 1) global_step = epoch * n_batch + idx lr_this_step = opt.learning_rate * warmup_linear( global_step / (opt.epochs * n_batch), 0.1) # if lr_this_step is not None: # optimizer.set_lr(lr_this_step) # output_layer_optimizer.set_lr(lr_this_step) optimizer.step() output_layer_optimizer.step() preds = out.argmax(dim=1) f1 = f1_score(y.cpu().numpy(), preds.cpu().numpy(), average="micro") # ===================meters===================== f1_meter.update(f1, bsz) epoch_f1_meter.update(f1, bsz) loss_meter.update(loss.item(), bsz) epoch_loss_meter.update(loss.item(), bsz) graph_size.update(graph_q.number_of_nodes() / bsz, bsz) max_num_nodes = max(max_num_nodes, graph_q.number_of_nodes()) max_num_edges = max(max_num_edges, graph_q.number_of_edges()) batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: mem = psutil.virtual_memory() # print(f'{idx:8} - {mem.percent:5} - {mem.free/1024**3:10.2f} - {mem.available/1024**3:10.2f} - {mem.used/1024**3:10.2f}') # mem_used.append(mem.used/1024**3) print("Train: [{0}][{1}/{2}]\t" "BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t" "DT {data_time.val:.3f} ({data_time.avg:.3f})\t" "loss {loss.val:.3f} ({loss.avg:.3f})\t" "f1 {f1.val:.3f} ({f1.avg:.3f})\t" "GS {graph_size.val:.3f} ({graph_size.avg:.3f})\t" "mem {mem:.3f}".format( epoch, idx + 1, n_batch, batch_time=batch_time, data_time=data_time, loss=loss_meter, f1=f1_meter, graph_size=graph_size, mem=mem.used / 1024**3, )) # print(out[0].abs().max()) # tensorboard logger if (idx + 1) % opt.tb_freq == 0: sw.add_scalar("ft_loss", loss_meter.avg, global_step) sw.add_scalar("ft_f1", f1_meter.avg, global_step) sw.add_scalar("graph_size", graph_size.avg, global_step) sw.add_scalar("lr", lr_this_step, global_step) sw.add_scalar("graph_size/max", max_num_nodes, global_step) sw.add_scalar("graph_size/max_edges", max_num_edges, global_step) # sw.add_scalar( # "learning_rate", optimizer.param_groups[0]["lr"], global_step # ) loss_meter.reset() f1_meter.reset() graph_size.reset() max_num_nodes, max_num_edges = 0, 0 return epoch_loss_meter.avg, epoch_f1_meter.avg