def __init__(self, num_embeddings, num_classes):
        super().__init__()

        self.gnet = googlenet(pretrained=True, remove_fc=True)
        #self.embed = nn.Embedding(num_embeddings, 1024)
        self.embed = nn.Linear(num_embeddings, 1024)
        self.fc = nn.Linear(1024 + 1024, num_classes)
示例#2
0
    def __init__(self):
        super().__init__()

        self.ImageNet = googlenet(pretrained=True)
        self.WordNet = nn.Linear(in_features=5747, out_features=1000)
        self.LinearLayer = nn.Linear(in_features=2000, out_features=5217)
        self.activation = nn.Softmax(dim=1)
 def __init__(self, num_question, num_answer, pretrained=True):
     super().__init__()
     # output should be N * 1024
     self.feature = googlenet(pretrained=True)
     # output should be N * 1500
     # self.embedding = nn.Embedding(num_question, 1500)
     self.embedding = nn.Linear(num_question, 2000)
     # self.fc = nn.Linear(1024 + 2001, num_answer)
     self.fc = nn.Linear(1024 + 2000, num_answer)
示例#4
0
    def __init__(self, vocab_size, embedding_size, ans_size):
        super().__init__()

        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.ans_size = ans_size

        # Image features extraction
        self.img_features = googlenet(pretrained=True)

        # Word features extraction
        self.embedding = nn.Linear(self.vocab_size, self.embedding_size)
        # self.tanh = nn.Tanh()
        self.linear = nn.Linear(self.embedding_size + 1000, self.ans_size)
        self.softmax = nn.LogSoftmax(dim=1)
    def __init__(self):
        super().__init__()
        ############ 2.2 TODO
        # import ipdb; ipdb.set_trace()
        # in_dim = 1024 + 5717 // 1000 + (5717->1024)
        # hid_dim =
        # out_dim =
        self.googlenet = googlenet(pretrained=True)
        # for param in self.googlenet.parameters():

        # self.branch_img_feats = googlenet(pretrained=True)
        self.word_embeddings = nn.Sequential(nn.Linear(5747, 1024))
        in_dim = 1024 + 1000
        hid_dim = 1024
        out_dim = 5217
        self.softmax_layer = nn.Sequential(
            nn.Linear(in_dim, out_dim),
            # nn.ReLU(),
            # nn.Dropout(0.4,inplace=True),
            # nn.Linear(hid_dim, out_dim),
        )
    transform_pipeline = transforms.Compose([transforms.Resize(resize_dim),
                                            transforms.CenterCrop(crop_dim),
                                            transforms.ToTensor(),
                                            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                std=[0.229, 0.224, 0.225])])

    coco_dataset = CocoDataset(src_dir, transform=transform_pipeline)

    coco_loader = DataLoader(coco_dataset,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            shuffle=False,
                            pin_memory=True)

    if not os.path.exists(dst_dir):
        os.makedirs(dst_dir)

    
    model = googlenet(pretrained=True)
    model = model.cuda()
    model.eval()

    for i, (img_paths, images) in enumerate(coco_loader):
        images = images.cuda()
        output, _ = model(images)

        for j in range(len(img_paths)):
            feat_name = img_paths[j].replace('.jpg', '.npy')
            feat_name = join(dst_dir, feat_name)
            np.save(feat_name, output[j].cpu().data.numpy())
示例#7
0
 def __init__(self, n_ques, n_ans):
     super().__init__()
     self.img_feat = googlenet(pretrained=True)
     self.ques_feat = nn.Linear(n_ques, 1024)
     self.fc = nn.Linear(2048, n_ans + 1)
示例#8
0
 def __init__(self, num_question, num_answer, pretrained=True):
     super().__init__()
     self.feature = googlenet(pretrained=True)
     self.embedding = nn.Linear(num_question, 1500)
     self.fc = nn.Linear(2524, num_answer)