def __init__(self, num_embeddings, num_classes): super().__init__() self.gnet = googlenet(pretrained=True, remove_fc=True) #self.embed = nn.Embedding(num_embeddings, 1024) self.embed = nn.Linear(num_embeddings, 1024) self.fc = nn.Linear(1024 + 1024, num_classes)
def __init__(self): super().__init__() self.ImageNet = googlenet(pretrained=True) self.WordNet = nn.Linear(in_features=5747, out_features=1000) self.LinearLayer = nn.Linear(in_features=2000, out_features=5217) self.activation = nn.Softmax(dim=1)
def __init__(self, num_question, num_answer, pretrained=True): super().__init__() # output should be N * 1024 self.feature = googlenet(pretrained=True) # output should be N * 1500 # self.embedding = nn.Embedding(num_question, 1500) self.embedding = nn.Linear(num_question, 2000) # self.fc = nn.Linear(1024 + 2001, num_answer) self.fc = nn.Linear(1024 + 2000, num_answer)
def __init__(self, vocab_size, embedding_size, ans_size): super().__init__() self.vocab_size = vocab_size self.embedding_size = embedding_size self.ans_size = ans_size # Image features extraction self.img_features = googlenet(pretrained=True) # Word features extraction self.embedding = nn.Linear(self.vocab_size, self.embedding_size) # self.tanh = nn.Tanh() self.linear = nn.Linear(self.embedding_size + 1000, self.ans_size) self.softmax = nn.LogSoftmax(dim=1)
def __init__(self): super().__init__() ############ 2.2 TODO # import ipdb; ipdb.set_trace() # in_dim = 1024 + 5717 // 1000 + (5717->1024) # hid_dim = # out_dim = self.googlenet = googlenet(pretrained=True) # for param in self.googlenet.parameters(): # self.branch_img_feats = googlenet(pretrained=True) self.word_embeddings = nn.Sequential(nn.Linear(5747, 1024)) in_dim = 1024 + 1000 hid_dim = 1024 out_dim = 5217 self.softmax_layer = nn.Sequential( nn.Linear(in_dim, out_dim), # nn.ReLU(), # nn.Dropout(0.4,inplace=True), # nn.Linear(hid_dim, out_dim), )
transform_pipeline = transforms.Compose([transforms.Resize(resize_dim), transforms.CenterCrop(crop_dim), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) coco_dataset = CocoDataset(src_dir, transform=transform_pipeline) coco_loader = DataLoader(coco_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, pin_memory=True) if not os.path.exists(dst_dir): os.makedirs(dst_dir) model = googlenet(pretrained=True) model = model.cuda() model.eval() for i, (img_paths, images) in enumerate(coco_loader): images = images.cuda() output, _ = model(images) for j in range(len(img_paths)): feat_name = img_paths[j].replace('.jpg', '.npy') feat_name = join(dst_dir, feat_name) np.save(feat_name, output[j].cpu().data.numpy())
def __init__(self, n_ques, n_ans): super().__init__() self.img_feat = googlenet(pretrained=True) self.ques_feat = nn.Linear(n_ques, 1024) self.fc = nn.Linear(2048, n_ans + 1)
def __init__(self, num_question, num_answer, pretrained=True): super().__init__() self.feature = googlenet(pretrained=True) self.embedding = nn.Linear(num_question, 1500) self.fc = nn.Linear(2524, num_answer)