示例#1
0
 def __init__(self, d_model, dropout=0.1, max_len=5000):
     super(PositionalEncoding, self).__init__()
     self.dropout = nn.Dropout(p=dropout)
     pe = torch.zeros(max_len, d_model)
     position = torch.arrange(0, max_len, dtype=torch.float).unsqueeze(1)
     div_term = torch.exp(
         torch.arrange(0, d_model, 2) / float() *
         (-math.log(100000.0) / d_model))
示例#2
0
def train(model, data, num_epochs=30):
    for epoch in range(num_epochs):
        model.train()
        total_num_words = num_words = total_loss = 0.
        for it, (mb_x, mb_x_len, mb_y, mb_y_len) in enumerate(data):
            mb_x = torch.from_numpy(mb_x).to(device).long()
            mb_x_len = torch.from_numpy(mb_x_len).to(device).long()
            mb_input = torch.from_numpy(mb_y[:, :-1]).to(device).long()
            mb_output = torch.from_numpy(mb_y[:, 1:]).to(device).long()
            mb_y_len = torch.from_numpy(mb_y_len - 1).to(device).long()
            mb_y_len[mb_y_len <= 0] = 1

            mb_pred, attn = model(mb_x, mb_x_len, mb_y, mb_y_len)

            mb_out_mask = torch.arrange(
                mb_y_len.max().item(), device=device)[None, :] < mb_y_len[:,
                                                                          None]
            mb_out_mask = mb_out_mask.float()

            loss = loss_fn(mb_pred, mb_output, mb_out_mask)
            num_words += torch.sum(mb_y_len).item()
            total_loss += loss.item() * num_words
            total_num_words += num_words

            # 更新模型
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameter(), 5.)
            optimizer.step()

            if it % 100 == 0:
                print("Epoch", epoch, "iteration", it, "loss", loss.item())
        print("Epoch", epoch, "Training loss", total_loss / total_num_words)
示例#3
0
 def compute_mean_hidden(self, x):
     if self.rnn.bidirectional:
         direction_size = x.size(-1) // 2
         x_front = x[..., :direction_size]
         x_back = x[...,
                    torch.arrange(direction_size * 2 - 1, direction_size -
                                  1, -1)]
         x_ = torch.cat(x_front, x_back, dim=2)
         return x_.mean(dim=1)
     return x.mean(dim=1)
    def forward(self, x, enc_out, src_mask, trg_mask):  
        N, seq_length = x.shape
        positions = torch.arrange(0, seq_length).expand(N, seq_length).to(self.device)
        x = self.dropout((self.word_embedding(x) + self.position_embedding(positions)))

        for layer in self.layers:
            x = layer(x, enc_out, enc_out, src_mask, trg_mask)

        out = self.fc_out(x)  
        return out
    def forward(self, x, mask):
        N, seq_length = x.shape 
        positions = torch.arrange(0, seq_length).expand(N, seq_length).to(self.device)

        out = self.dropout(self.word_embedding(x) + self.position_embedding(positions))

        for layer in self.layers:
            out = layer(out, out, out, mask)

        return out
示例#6
0
def collect_neighbor_v2(representation, positions):
    # representation: [batch_size, num_nodes, feature_dim]
    # positions: [batch_size, num_nodes, num_neighbors]
    batch_size = positions.size(0)
    node_num = positions.size(1)
    neigh_num = positions.size(2)
    rids = torch.arrange(0, batch_size)  # [batch]
    rids = rids.reshape([-1, 1, 1])  # [batch, 1, 1]
    rids = rids.repeat(1, node_num, neigh_num)  # [batch, nodes, neighbors]
    indices = torch.stack((rids, positions), 3)  # [batch, nodes, neighbors, 2]
    return representation[indices[:, :, :, 0], indices[:, :, :, 1], :]
示例#7
0
def main():
    ###assigning the variables of get_loader with respective values
    image_dir = "cocoapi/images/train2014/"
    caption_path = "./cocoapi/annotations/captions_train2014.json"
    crop_size = [224, 224]
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])
    batch_size = 20
    num_workers = 1
    train = "train"
    test = "test"
    ####loading a data_loader
    train_loader = get_loader(image_dir,
                              caption_path,
                              train,
                              transform,
                              batch_size,
                              shuffle=True,
                              num_workers=num_workers)
    test_loader = get_loader(image_dir,
                             caption_path,
                             test,
                             transform,
                             batch_size,
                             shuffle=True,
                             num_workers=num_workers)
    print(len(train_loader))
    print(len(test_loader))

    ####assigning device
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(device)
    ####assigning model
    model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
    ####assigning model weights
    if device == "cpu":
        model.float()
    else:
        clip.model.convert_weights(model)
    ####defining the image,text losses and optimizer
    loss_img = nn.CrossEntropyLoss()
    loss_txt = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-6,
                           betas=(0.9, 0.98),
                           eps=1e-7,
                           weight_decay=0.5)
    print(len(train_loader))
    epochs = 20
    for epoch in range(0, epochs):
        #####batch training
        i = 0
        batch_loss = 0
        for batch in train_loader:
            i = i + 1
            images, texts = batch
            ##preprocessing images and texts
            images2 = torch.stack([transform(img) for img in images], dim=0)
            texts2 = clip.tokenize(texts)
            optimizer.zero_grad()
            images2 = images2.cuda()
            texts2 = texts2.cuda()
            ###tensorizing the labels
            if device == "cpu":
                ground_truth = torch.arrange(len(images)).long().to(device)
            else:
                ground_truth = torch.arange(len(images)).long().to(device)
            logits_per_image, logits_per_text = model(images2, texts2)
            #print(logits_per_image)
            #print(logits_per_text)
            total_loss = (loss_img(logits_per_image, ground_truth) +
                          loss_txt(logits_per_text, ground_truth)) / 2
            print(i, total_loss)
            #loss=total_loss.numpy()
            batch_loss = batch_loss + total_loss.item()
            ###########################################logging using wandb####################################
            #wandb.log({"loss":total_loss},step=i)
            total_loss.backward()
            if device == "cpu":
                optimizer.step()
            else:
                convert_models_to_fp32(model)
                optimizer.step()
                clip.model.convert_weights(model)
        batch_loss = batch_loss / i
        wandb.log({"epoch loss": batch_loss}, step=epoch)
        num = 0
        for batch in test_loader:
            images, texts = batch
            ##preprocessing images and texts
            images2 = torch.stack([transform(img) for img in images], dim=0)
            texts2 = clip.tokenize(texts)
            images2 = images2.cuda()
            texts2 = texts2.cuda()
            logits_per_image, logits_per_text = model(images2, texts2)
            #probs=logits_per_image.softmax(dim=-1).cuda.numpy()
            #print(probs)
            #print(logits_per_text)

            for j in range(0, len(images)):
                if (max(logits_per_image[j]) == logits_per_image[j][j]):
                    num = num + 1
        print(num)
        print("accuracy", (num / 10000) * 100)
        accuracy = num / 100
        wandb.log({"epoch testing accuracy": accuracy}, step=epoch)
def main():
    ###assigning the variables of get_loader with respective values
    image_dir="cocoapi/images/train2014/"
    caption_path="./cocoapi/annotations/captions_train2014.json"
    crop_size=[224,224]
    transform = transforms.Compose([
            transforms.Resize(256),
            transforms.RandomCrop(crop_size),
            transforms.RandomHorizontalFlip(), 
            transforms.ToTensor(),
            ])
    batch_size=20
    num_workers=1
    ####loading a data_loader 
    data_loader = get_loader(image_dir, caption_path, 
                             transform, batch_size,
                             shuffle=True, num_workers=num_workers) 
    ####assigning device
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(device)
    mode="test"
    if(mode=="train"):
        ####assigning model
        model,preprocess=clip.load("ViT-B/32",device=device,jit=False)
        ####assigning model weights
        if device == "cpu":
            model.float()
        else :
            clip.model.convert_weights(model)
        ####defining the image,text losses and optimizer
        loss_img=nn.CrossEntropyLoss()
        loss_txt=nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=5e-5,betas=(0.9,0.98),eps=1e-6,weight_decay=0.2)
        print(len(data_loader)) 
        epochs=2
        i=0
        for epoch in range(0,epochs):
            #####batch training
            for batch in data_loader:
                images,texts=batch
                i=i+1
                ##preprocessing images and texts
                images2= torch.stack([transform(img) for img in images],dim=0)
                texts2 = clip.tokenize(texts)
                optimizer.zero_grad()
                images2=images2.cuda()
                texts2=texts2.cuda()
                ###tensorizing the labels
                if device == "cpu":
                    ground_truth = torch.arrange(len(images)).long().to(device)
                else:
                    ground_truth = torch.arange(len(images)).long().to(device)
                logits_per_image, logits_per_text = model(images2, texts2)
                #print(logits_per_image)
                #print(logits_per_text)
                total_loss = (loss_img(logits_per_image,ground_truth) + loss_txt(logits_per_text,ground_truth))/2
                print(i,total_loss)
                ###########################################logging using wandb####################################
                wandb.log({"batch loss":total_loss})
                total_loss.backward()
                if device == "cpu":
                    optimizer.step()
                else :                                          
                    convert_models_to_fp32(model)
                    optimizer.step()
                    clip.model.convert_weights(model)
            wandb.log({"epoch loss":total_loss})
            torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': total_loss,
            }, f"./model_10.pt") #just change to your preferred folder/filename
    elif(mode=="test"):
         #model,preprocess=clip.load("ViT-B/32",device=device)
         model, preprocess = clip.load("ViT-B/32",device=device,jit=False) #Must set jit=False for training
         checkpoint = torch.load("./model_1_epoch.pt")

# Use these 3 lines if you use default model setting(not training setting) of the clip. For example, if you set context_length to 100 since your string is very long during training, then assign 100 to checkpoint['model_state_dict']["context_length"] 
         #checkpoint['model_state_dict']["input_resolution"] = model.input_resolution #default is 224
         #checkpoint['model_state_dict']["context_length"] = model.context_length # default is 77
         #checkpoint['model_state_dict']["vocab_size"] = model.vocab_size 

         model.load_state_dict(checkpoint['model_state_dict'])
         i=0
         num=0
         for batch in data_loader:
                images,texts=batch
                i=i+1
                ##preprocessing images and texts
                images2= torch.stack([transform(img) for img in images],dim=0)
                texts2 = clip.tokenize(texts)
                images2=images2.cuda()
                texts2=texts2.cuda()
                logits_per_image, logits_per_text = model(images2, texts2)
                #probs=logits_per_image.softmax(dim=-1).cuda.numpy()
                #print(probs)
                #print(logits_per_text)

                for j in range(0,len(images)):
                    if(max(logits_per_image[j])==logits_per_image[j][j]):
                        num=num+1
                print(i)
                if(i==100):
                    break
         print(num)
         print("accuracy",(num/2000)*100)
示例#9
0
print(len(params))
# print(params)

# forward函数的输入和输出都是Tensor。
for name, parameters in net.named_parameters():
    print(name, ':', parameters.size())

input = t.randn(1, 1, 32, 32)
out = net(input)
print(out.size())

net.zero_grad()
out.backward(t.ones(1, 10))  # ???
'''
output = net(input)
target = t.arrange(0, 10).view(1, 10)
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

net.zero_grad()
print('反向传播之前 conv1.bias的梯度')
print(net.conv1.bias.grad)
loss.backward()
print('反向传播之后 conv1.bias的梯度')
print(net.conv1.bias.grad)

optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer.zero_grad()
# 计算损失
output = net(input)
示例#10
0
def main():
    ###assigning the variables of get_loader with respective values
    image_dir="cocoapi/images/train2014/"
    caption_path="./cocoapi/annotations/captions_train2014.json"
    crop_size=[224,224]
    transform = transforms.Compose([
            transforms.Resize(256),
            transforms.RandomCrop(crop_size),
            transforms.RandomHorizontalFlip(), 
            transforms.ToTensor(),
            ])
    batch_size=100
    num_workers=1
    ####loading a data_loader 
    data_loader = get_loader(image_dir, caption_path, 
                             transform, batch_size,
                             shuffle=True, num_workers=num_workers) 
    ####assigning device
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print(device)
    ####assigning model
    model,preprocess=clip.load("ViT-B/32",device=device,jit=False)
    ####assigning model weights
    if device == "cpu":
        model.float()
    else :
        clip.model.convert_weights(model)
    ####defining the image,text losses and optimizer
    loss_img=nn.CrossEntropyLoss()
    loss_txt=nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=5e-5,betas=(0.9,0.98),eps=1e-6,weight_decay=0.2)
    
    epochs=2
    for epoch in range(epochs):
        #####batch training
        for batch in data_loader:
            images,texts=batch
            ##preprocessing images and texts
            images2= torch.stack([transform(img) for img in images],dim=0)
            texts2 = clip.tokenize(texts)
            optimizer.zero_grad()
            images2=images2.cuda()
            texts2=texts2.cuda()
            ###tensorizing the labels
            if device == "cpu":
                ground_truth = torch.arrange(len(images)).long().to(device)
            else:
                ground_truth = torch.arange(len(images)).long().to(device)
            logits_per_image, logits_per_text = model(images2, texts2)
            #print(logits_per_image)
            #print(logits_per_text)
            total_loss = (loss_img(logits_per_image,ground_truth) + loss_txt(logits_per_text,ground_truth))/2
            print(total_loss)
            ###########################################logging using wandb####################################
            wandb.log({"batch loss":total_loss})
            total_loss.backward()
            if device == "cpu":
                optimizer.step()
            else :                                          
                convert_models_to_fp32(model)
                optimizer.step()
                clip.model.convert_weights(model)
        wandb.log({"epoch loss":total_loss})
示例#11
0
input = Variable(torch.randn(1,1,32,32))
out = net(input)
print(out)

net.zero_gra()
out.backward(torch.randn(1,10))

#Recap
# torch.tensor - a multi dimensional array
# autograd.Variable - wraps a Tensor and records the history of operations applied to it
# nn.Module - Neural network module
# autograd.Function - implements forward and backward definitions of an autograd operation

# Loss Function
output = net(input)
target = Variable(torch.arrange(1,11)) #dummy target 1,11
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

#Following a few backward steps
print(loss.grad_fn)  #MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(locc.grad_fn.next_functions[0][0].next_functions[0][0])  #ReLU

#Backprop
# Backpropagate the error by loss.backward()
# Clear the existing gradients or else they accumulate
net.zero_grad()
示例#12
0
    def forward(self,x,targets = None):
        nA = self.num_anchors
        nB = x.size(0)
        nG = x.size(2)
        stride =self.image_dim /nG

        #tensors for cuda support
        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
        ByteTensor = torch.cuad.ByteTensor id x.is_cuda else torch.ByteTensor

        prediction = x.view(nB,nA,self.bbox_attrs,nG,nG).permute(0,1,3,4,2).contiguous()

        #get output
        x = torch.sigmoid(prediction[...,0]) #center x
        y = torch.sigmoid(prediction[...,1]) #center y
        w = prediction[...,2]
        h = prediction[...,3]
        pred_conf = torch.sigmoid(prediction[...,4])
        pred_cls = torch.sigmoid(prediction[...,4])

        #calculate offsets for each grid
        grid_x = torch.arrange(nG).repeat(nG,1).view([1,1,nG,nG]).type(FloatTensor)
        grid_y = torch.arrange(nG).repeat(nG,1).t().view([1,1,nG,nG]).type(FloatTensor)
        scale_anchors = FloatTensor([(a_w/stride ,a_h/stride) for a_w,a_h in self.anchors])
        anchor_w = scale_anchors[:,0:1].view((1,nA,1,1))
        anchor_h = scale_anchors[:,1:2].view((1,nA,1,1))

        #add offsets and scale with anchors
        pred_boxes = FloatTensor(prediction[...,:4].shape)
        pred_boxes[...,0] = x.data + grid_x
        pred_boxes[...,1] = y.data + grid_y
        pred_boxes[...,2] = torch.exp(w.data) * anchor_w
        pred_boxes[...,3] = torch.exp(h.data) * anchor_h

        # training
        if targets is not None:
            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()
                self.ce_loss = self.ce_loss.cuda()

            nGT,nCorrect,mask,conf_mask,tx,ty,tw,th,tconf,tcls = build_targets(
                pred_boxes = pred_boxes.cpu().data,
                pred_conf = pred_conf.cpu().data,
                pre_cls = pred_cls.cpu().data,
                target = targets.cpu().data,
                anchors = scale_anchors.cpu().data,
                num_anchors =nA,
                num_classes = self.num_classes,
                grid_size = nG,
                ignore_thres = self.ignore_thres,
                img_dim = self.image_dim,
            )

            nProposals =int((pred_conf >0.5).sum().item())
            recall = float(nCorrect /nGT) if nGT else 1
            precision = float(nCorrect/nProposals)

            #handle masks
            mask = Variable(mask.type(ByteTensor))
            conf_mask = Variable(mask.type(ByteTensor))

            #handle target variable
            tx = Variable(tx.type(FloatTensor),requires_grad = False)
            ty = Variable(ty.type(FloatTensor),requires_grad = False)
            tw = Variable(tw.type(FloatTensor),requires_grad = False)
            th = Varibale(th.type(FloatTensor),requires_grad = False)
            tconf = Variable(tconf.type(FloatTensor),requires_grad = False)
            tcls = Variable(tcls.type(FloatTensor),requires_grad = False)

            #get conf mask where gt and where there is no gt
            conf_mask_true = mask
            conf_mask_false = conf_mask - mask

            #Mask outputs to ignore non-existing objeccts
            loss_x = self.mse_loss(x[mask],tx[mask])
            loss_y = self.mse_loss(y[mask],ty[mask])
            loss_w = self.mse_loss(w[mask],tw[mask])
            loss_h = self.mse_loss(h[mask],yh[mask])
            loss_conf = self.bce_loss(pred_conf[conf_mask_false],tconf[conf_mask_false])+
                        self.bce_loss(pred_conf[conf_mask_true],tconf[conf_mask_true])
            loss_cls = (1/nB) * self.ce_loss(pred_cls[mask],torch.argmax(tcls[mask],1))
            loss = loss_x +loss_y+loss_h+loss_w+loss_conf+loss_cls

            return(
                loss,
                loss_x.item(),
                loss_y.item(),
                loss_w.item(),
                loss_h.item(),
                loss_conf.item(),
                loss_cls.item(),
                recall,
                precision,
            )