def __init__(self, d_model, dropout=0.1, max_len=5000): super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(p=dropout) pe = torch.zeros(max_len, d_model) position = torch.arrange(0, max_len, dtype=torch.float).unsqueeze(1) div_term = torch.exp( torch.arrange(0, d_model, 2) / float() * (-math.log(100000.0) / d_model))
def train(model, data, num_epochs=30): for epoch in range(num_epochs): model.train() total_num_words = num_words = total_loss = 0. for it, (mb_x, mb_x_len, mb_y, mb_y_len) in enumerate(data): mb_x = torch.from_numpy(mb_x).to(device).long() mb_x_len = torch.from_numpy(mb_x_len).to(device).long() mb_input = torch.from_numpy(mb_y[:, :-1]).to(device).long() mb_output = torch.from_numpy(mb_y[:, 1:]).to(device).long() mb_y_len = torch.from_numpy(mb_y_len - 1).to(device).long() mb_y_len[mb_y_len <= 0] = 1 mb_pred, attn = model(mb_x, mb_x_len, mb_y, mb_y_len) mb_out_mask = torch.arrange( mb_y_len.max().item(), device=device)[None, :] < mb_y_len[:, None] mb_out_mask = mb_out_mask.float() loss = loss_fn(mb_pred, mb_output, mb_out_mask) num_words += torch.sum(mb_y_len).item() total_loss += loss.item() * num_words total_num_words += num_words # 更新模型 optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameter(), 5.) optimizer.step() if it % 100 == 0: print("Epoch", epoch, "iteration", it, "loss", loss.item()) print("Epoch", epoch, "Training loss", total_loss / total_num_words)
def compute_mean_hidden(self, x): if self.rnn.bidirectional: direction_size = x.size(-1) // 2 x_front = x[..., :direction_size] x_back = x[..., torch.arrange(direction_size * 2 - 1, direction_size - 1, -1)] x_ = torch.cat(x_front, x_back, dim=2) return x_.mean(dim=1) return x.mean(dim=1)
def forward(self, x, enc_out, src_mask, trg_mask): N, seq_length = x.shape positions = torch.arrange(0, seq_length).expand(N, seq_length).to(self.device) x = self.dropout((self.word_embedding(x) + self.position_embedding(positions))) for layer in self.layers: x = layer(x, enc_out, enc_out, src_mask, trg_mask) out = self.fc_out(x) return out
def forward(self, x, mask): N, seq_length = x.shape positions = torch.arrange(0, seq_length).expand(N, seq_length).to(self.device) out = self.dropout(self.word_embedding(x) + self.position_embedding(positions)) for layer in self.layers: out = layer(out, out, out, mask) return out
def collect_neighbor_v2(representation, positions): # representation: [batch_size, num_nodes, feature_dim] # positions: [batch_size, num_nodes, num_neighbors] batch_size = positions.size(0) node_num = positions.size(1) neigh_num = positions.size(2) rids = torch.arrange(0, batch_size) # [batch] rids = rids.reshape([-1, 1, 1]) # [batch, 1, 1] rids = rids.repeat(1, node_num, neigh_num) # [batch, nodes, neighbors] indices = torch.stack((rids, positions), 3) # [batch, nodes, neighbors, 2] return representation[indices[:, :, :, 0], indices[:, :, :, 1], :]
def main(): ###assigning the variables of get_loader with respective values image_dir = "cocoapi/images/train2014/" caption_path = "./cocoapi/annotations/captions_train2014.json" crop_size = [224, 224] transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) batch_size = 20 num_workers = 1 train = "train" test = "test" ####loading a data_loader train_loader = get_loader(image_dir, caption_path, train, transform, batch_size, shuffle=True, num_workers=num_workers) test_loader = get_loader(image_dir, caption_path, test, transform, batch_size, shuffle=True, num_workers=num_workers) print(len(train_loader)) print(len(test_loader)) ####assigning device device = "cuda:0" if torch.cuda.is_available() else "cpu" print(device) ####assigning model model, preprocess = clip.load("ViT-B/32", device=device, jit=False) ####assigning model weights if device == "cpu": model.float() else: clip.model.convert_weights(model) ####defining the image,text losses and optimizer loss_img = nn.CrossEntropyLoss() loss_txt = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-6, betas=(0.9, 0.98), eps=1e-7, weight_decay=0.5) print(len(train_loader)) epochs = 20 for epoch in range(0, epochs): #####batch training i = 0 batch_loss = 0 for batch in train_loader: i = i + 1 images, texts = batch ##preprocessing images and texts images2 = torch.stack([transform(img) for img in images], dim=0) texts2 = clip.tokenize(texts) optimizer.zero_grad() images2 = images2.cuda() texts2 = texts2.cuda() ###tensorizing the labels if device == "cpu": ground_truth = torch.arrange(len(images)).long().to(device) else: ground_truth = torch.arange(len(images)).long().to(device) logits_per_image, logits_per_text = model(images2, texts2) #print(logits_per_image) #print(logits_per_text) total_loss = (loss_img(logits_per_image, ground_truth) + loss_txt(logits_per_text, ground_truth)) / 2 print(i, total_loss) #loss=total_loss.numpy() batch_loss = batch_loss + total_loss.item() ###########################################logging using wandb#################################### #wandb.log({"loss":total_loss},step=i) total_loss.backward() if device == "cpu": optimizer.step() else: convert_models_to_fp32(model) optimizer.step() clip.model.convert_weights(model) batch_loss = batch_loss / i wandb.log({"epoch loss": batch_loss}, step=epoch) num = 0 for batch in test_loader: images, texts = batch ##preprocessing images and texts images2 = torch.stack([transform(img) for img in images], dim=0) texts2 = clip.tokenize(texts) images2 = images2.cuda() texts2 = texts2.cuda() logits_per_image, logits_per_text = model(images2, texts2) #probs=logits_per_image.softmax(dim=-1).cuda.numpy() #print(probs) #print(logits_per_text) for j in range(0, len(images)): if (max(logits_per_image[j]) == logits_per_image[j][j]): num = num + 1 print(num) print("accuracy", (num / 10000) * 100) accuracy = num / 100 wandb.log({"epoch testing accuracy": accuracy}, step=epoch)
def main(): ###assigning the variables of get_loader with respective values image_dir="cocoapi/images/train2014/" caption_path="./cocoapi/annotations/captions_train2014.json" crop_size=[224,224] transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) batch_size=20 num_workers=1 ####loading a data_loader data_loader = get_loader(image_dir, caption_path, transform, batch_size, shuffle=True, num_workers=num_workers) ####assigning device device = "cuda:0" if torch.cuda.is_available() else "cpu" print(device) mode="test" if(mode=="train"): ####assigning model model,preprocess=clip.load("ViT-B/32",device=device,jit=False) ####assigning model weights if device == "cpu": model.float() else : clip.model.convert_weights(model) ####defining the image,text losses and optimizer loss_img=nn.CrossEntropyLoss() loss_txt=nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=5e-5,betas=(0.9,0.98),eps=1e-6,weight_decay=0.2) print(len(data_loader)) epochs=2 i=0 for epoch in range(0,epochs): #####batch training for batch in data_loader: images,texts=batch i=i+1 ##preprocessing images and texts images2= torch.stack([transform(img) for img in images],dim=0) texts2 = clip.tokenize(texts) optimizer.zero_grad() images2=images2.cuda() texts2=texts2.cuda() ###tensorizing the labels if device == "cpu": ground_truth = torch.arrange(len(images)).long().to(device) else: ground_truth = torch.arange(len(images)).long().to(device) logits_per_image, logits_per_text = model(images2, texts2) #print(logits_per_image) #print(logits_per_text) total_loss = (loss_img(logits_per_image,ground_truth) + loss_txt(logits_per_text,ground_truth))/2 print(i,total_loss) ###########################################logging using wandb#################################### wandb.log({"batch loss":total_loss}) total_loss.backward() if device == "cpu": optimizer.step() else : convert_models_to_fp32(model) optimizer.step() clip.model.convert_weights(model) wandb.log({"epoch loss":total_loss}) torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': total_loss, }, f"./model_10.pt") #just change to your preferred folder/filename elif(mode=="test"): #model,preprocess=clip.load("ViT-B/32",device=device) model, preprocess = clip.load("ViT-B/32",device=device,jit=False) #Must set jit=False for training checkpoint = torch.load("./model_1_epoch.pt") # Use these 3 lines if you use default model setting(not training setting) of the clip. For example, if you set context_length to 100 since your string is very long during training, then assign 100 to checkpoint['model_state_dict']["context_length"] #checkpoint['model_state_dict']["input_resolution"] = model.input_resolution #default is 224 #checkpoint['model_state_dict']["context_length"] = model.context_length # default is 77 #checkpoint['model_state_dict']["vocab_size"] = model.vocab_size model.load_state_dict(checkpoint['model_state_dict']) i=0 num=0 for batch in data_loader: images,texts=batch i=i+1 ##preprocessing images and texts images2= torch.stack([transform(img) for img in images],dim=0) texts2 = clip.tokenize(texts) images2=images2.cuda() texts2=texts2.cuda() logits_per_image, logits_per_text = model(images2, texts2) #probs=logits_per_image.softmax(dim=-1).cuda.numpy() #print(probs) #print(logits_per_text) for j in range(0,len(images)): if(max(logits_per_image[j])==logits_per_image[j][j]): num=num+1 print(i) if(i==100): break print(num) print("accuracy",(num/2000)*100)
print(len(params)) # print(params) # forward函数的输入和输出都是Tensor。 for name, parameters in net.named_parameters(): print(name, ':', parameters.size()) input = t.randn(1, 1, 32, 32) out = net(input) print(out.size()) net.zero_grad() out.backward(t.ones(1, 10)) # ??? ''' output = net(input) target = t.arrange(0, 10).view(1, 10) criterion = nn.MSELoss() loss = criterion(output, target) print(loss) net.zero_grad() print('反向传播之前 conv1.bias的梯度') print(net.conv1.bias.grad) loss.backward() print('反向传播之后 conv1.bias的梯度') print(net.conv1.bias.grad) optimizer = optim.SGD(net.parameters(), lr=0.01) optimizer.zero_grad() # 计算损失 output = net(input)
def main(): ###assigning the variables of get_loader with respective values image_dir="cocoapi/images/train2014/" caption_path="./cocoapi/annotations/captions_train2014.json" crop_size=[224,224] transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) batch_size=100 num_workers=1 ####loading a data_loader data_loader = get_loader(image_dir, caption_path, transform, batch_size, shuffle=True, num_workers=num_workers) ####assigning device device = "cuda:0" if torch.cuda.is_available() else "cpu" print(device) ####assigning model model,preprocess=clip.load("ViT-B/32",device=device,jit=False) ####assigning model weights if device == "cpu": model.float() else : clip.model.convert_weights(model) ####defining the image,text losses and optimizer loss_img=nn.CrossEntropyLoss() loss_txt=nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=5e-5,betas=(0.9,0.98),eps=1e-6,weight_decay=0.2) epochs=2 for epoch in range(epochs): #####batch training for batch in data_loader: images,texts=batch ##preprocessing images and texts images2= torch.stack([transform(img) for img in images],dim=0) texts2 = clip.tokenize(texts) optimizer.zero_grad() images2=images2.cuda() texts2=texts2.cuda() ###tensorizing the labels if device == "cpu": ground_truth = torch.arrange(len(images)).long().to(device) else: ground_truth = torch.arange(len(images)).long().to(device) logits_per_image, logits_per_text = model(images2, texts2) #print(logits_per_image) #print(logits_per_text) total_loss = (loss_img(logits_per_image,ground_truth) + loss_txt(logits_per_text,ground_truth))/2 print(total_loss) ###########################################logging using wandb#################################### wandb.log({"batch loss":total_loss}) total_loss.backward() if device == "cpu": optimizer.step() else : convert_models_to_fp32(model) optimizer.step() clip.model.convert_weights(model) wandb.log({"epoch loss":total_loss})
input = Variable(torch.randn(1,1,32,32)) out = net(input) print(out) net.zero_gra() out.backward(torch.randn(1,10)) #Recap # torch.tensor - a multi dimensional array # autograd.Variable - wraps a Tensor and records the history of operations applied to it # nn.Module - Neural network module # autograd.Function - implements forward and backward definitions of an autograd operation # Loss Function output = net(input) target = Variable(torch.arrange(1,11)) #dummy target 1,11 criterion = nn.MSELoss() loss = criterion(output, target) print(loss) #Following a few backward steps print(loss.grad_fn) #MSELoss print(loss.grad_fn.next_functions[0][0]) # Linear print(locc.grad_fn.next_functions[0][0].next_functions[0][0]) #ReLU #Backprop # Backpropagate the error by loss.backward() # Clear the existing gradients or else they accumulate net.zero_grad()
def forward(self,x,targets = None): nA = self.num_anchors nB = x.size(0) nG = x.size(2) stride =self.image_dim /nG #tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuad.ByteTensor id x.is_cuda else torch.ByteTensor prediction = x.view(nB,nA,self.bbox_attrs,nG,nG).permute(0,1,3,4,2).contiguous() #get output x = torch.sigmoid(prediction[...,0]) #center x y = torch.sigmoid(prediction[...,1]) #center y w = prediction[...,2] h = prediction[...,3] pred_conf = torch.sigmoid(prediction[...,4]) pred_cls = torch.sigmoid(prediction[...,4]) #calculate offsets for each grid grid_x = torch.arrange(nG).repeat(nG,1).view([1,1,nG,nG]).type(FloatTensor) grid_y = torch.arrange(nG).repeat(nG,1).t().view([1,1,nG,nG]).type(FloatTensor) scale_anchors = FloatTensor([(a_w/stride ,a_h/stride) for a_w,a_h in self.anchors]) anchor_w = scale_anchors[:,0:1].view((1,nA,1,1)) anchor_h = scale_anchors[:,1:2].view((1,nA,1,1)) #add offsets and scale with anchors pred_boxes = FloatTensor(prediction[...,:4].shape) pred_boxes[...,0] = x.data + grid_x pred_boxes[...,1] = y.data + grid_y pred_boxes[...,2] = torch.exp(w.data) * anchor_w pred_boxes[...,3] = torch.exp(h.data) * anchor_h # training if targets is not None: if x.is_cuda: self.mse_loss = self.mse_loss.cuda() self.bce_loss = self.bce_loss.cuda() self.ce_loss = self.ce_loss.cuda() nGT,nCorrect,mask,conf_mask,tx,ty,tw,th,tconf,tcls = build_targets( pred_boxes = pred_boxes.cpu().data, pred_conf = pred_conf.cpu().data, pre_cls = pred_cls.cpu().data, target = targets.cpu().data, anchors = scale_anchors.cpu().data, num_anchors =nA, num_classes = self.num_classes, grid_size = nG, ignore_thres = self.ignore_thres, img_dim = self.image_dim, ) nProposals =int((pred_conf >0.5).sum().item()) recall = float(nCorrect /nGT) if nGT else 1 precision = float(nCorrect/nProposals) #handle masks mask = Variable(mask.type(ByteTensor)) conf_mask = Variable(mask.type(ByteTensor)) #handle target variable tx = Variable(tx.type(FloatTensor),requires_grad = False) ty = Variable(ty.type(FloatTensor),requires_grad = False) tw = Variable(tw.type(FloatTensor),requires_grad = False) th = Varibale(th.type(FloatTensor),requires_grad = False) tconf = Variable(tconf.type(FloatTensor),requires_grad = False) tcls = Variable(tcls.type(FloatTensor),requires_grad = False) #get conf mask where gt and where there is no gt conf_mask_true = mask conf_mask_false = conf_mask - mask #Mask outputs to ignore non-existing objeccts loss_x = self.mse_loss(x[mask],tx[mask]) loss_y = self.mse_loss(y[mask],ty[mask]) loss_w = self.mse_loss(w[mask],tw[mask]) loss_h = self.mse_loss(h[mask],yh[mask]) loss_conf = self.bce_loss(pred_conf[conf_mask_false],tconf[conf_mask_false])+ self.bce_loss(pred_conf[conf_mask_true],tconf[conf_mask_true]) loss_cls = (1/nB) * self.ce_loss(pred_cls[mask],torch.argmax(tcls[mask],1)) loss = loss_x +loss_y+loss_h+loss_w+loss_conf+loss_cls return( loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall, precision, )