def __init__(self, args): action_space = args.action_space hidden_state_sz = args.hidden_state_sz super(MJOLNIR_O, self).__init__() # get and normalize adjacency matrix. np.seterr(divide='ignore') A_raw = torch.load("./data/gcn/adjmat.dat") A = normalize_adj(A_raw).tocsr().toarray() self.A = torch.nn.Parameter(torch.Tensor(A)) n = int(A.shape[0]) self.n = n self.embed_action = nn.Linear(action_space, 10) lstm_input_sz = 10 + n * 5 + 512 self.hidden_state_sz = hidden_state_sz self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz) num_outputs = action_space self.critic_linear = nn.Linear(hidden_state_sz, 1) self.actor_linear = nn.Linear(hidden_state_sz, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate) # glove embeddings for all the objs. with open("./data/gcn/objects.txt") as f: objects = f.readlines() self.objects = [o.strip() for o in objects] all_glove = torch.zeros(n, 300) glove = Glove(args.glove_file) for i in range(n): all_glove[i, :] = torch.Tensor( glove.glove_embeddings[self.objects[i]][:]) self.all_glove = nn.Parameter(all_glove) self.all_glove.requires_grad = False self.W0 = nn.Linear(401, 401, bias=False) self.W1 = nn.Linear(401, 401, bias=False) self.W2 = nn.Linear(401, 5, bias=False) self.W3 = nn.Linear(10, 1, bias=False) self.final_mapping = nn.Linear(n, 512)
def __init__(self, args): action_space = args.action_space target_embedding_sz = args.target_dim resnet_embedding_sz = args.hidden_state_sz hidden_state_sz = args.hidden_state_sz super(BaseModel, self).__init__() self.conv1 = nn.Conv2d( resnet_embedding_sz, 64, 1) # Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1)) self.maxp1 = nn.MaxPool2d( 2, 2 ) # MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) self.embed_glove = nn.Linear( target_embedding_sz, 64) # Linear(in_features=300, out_features=64, bias=True) self.embed_action = nn.Linear( action_space, 10) # Linear(in_features=7, out_features=10, bias=True) pointwise_in_channels = 138 self.pointwise = nn.Conv2d( pointwise_in_channels, 64, 1, 1) # Conv2d(138, 64, kernel_size=(1, 1), stride=(1, 1)) lstm_input_sz = 7 * 7 * 64 self.hidden_state_sz = hidden_state_sz self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz) # LSTMCell(3136, 512) num_outputs = action_space self.critic_linear = nn.Linear( hidden_state_sz, 1) #Linear(in_features=512, out_features=1, bias=True) self.actor_linear = nn.Linear( hidden_state_sz, num_outputs) # Linear(in_features=6272, out_features=7, bias=True) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.conv1.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate)
def __init__(self, args): action_space = args.action_space # target_embedding_sz = args.target_dim resnet_embedding_sz = args.hidden_state_sz hidden_state_sz = args.hidden_state_sz super(ProtoModel, self).__init__() # self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1) # Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1)) self.conv1 = nn.Sequential( nn.Conv2d(resnet_embedding_sz, 64, kernel_size=1), nn.BatchNorm2d(64, momentum=1, affine=True), nn.ReLU()) self.fusion_block = nn.Sequential( nn.Conv2d(fusion_channels, 64, kernel_size=match_block_kernel_size), nn.BatchNorm2d(64, momentum=1, affine=True), nn.ReLU()) # [64, 5,5]/[64, 7, 7] self.embed_action = nn.Linear( action_space, action_channels ) # Linear(in_features=7, out_features=10, bias=True) # pointwise_in_channels = 64 + 10 # # self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1) # Conv2d(138, 64, kernel_size=(1, 1), stride=(1, 1)) lstm_input_sz = feature_map_size * feature_map_size * 64 self.hidden_state_sz = hidden_state_sz self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz) # LSTMCell(1600, 512) num_outputs = action_space self.critic_linear = nn.Linear( hidden_state_sz, 1) #Linear(in_features=512, out_features=1, bias=True) self.actor_linear = nn.Linear( hidden_state_sz, num_outputs) # Linear(in_features=6272, out_features=7, bias=True) self.apply(weights_init) # relu_gain = nn.init.calculate_gain("relu") # self.conv1.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) # self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate)
def __init__(self, args): action_space = args.action_space target_embedding_sz = args.glove_dim resnet_embedding_sz = args.hidden_state_sz hidden_state_sz = args.hidden_state_sz resnet_embedding_sz = 512 hidden_state_sz = 512 super(GcnBaseModel, self).__init__() self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1) self.maxp1 = nn.MaxPool2d(2, 2) self.embed_glove = nn.Linear(target_embedding_sz, 64) self.embed_action = nn.Linear(action_space, 10) # GCN layer self.gcn_size = 64 self.gcn = GCN() self.gcn_embed = nn.Linear(512, self.gcn_size)#也可以考虑把512reshape成三维tensor后点卷积 pointwise_in_channels = 138 + self.gcn_size self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1) lstm_input_sz = 7 * 7 * 64 self.hidden_state_sz = hidden_state_sz self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz) num_outputs = action_space self.critic_linear = nn.Linear(hidden_state_sz, 1) self.actor_linear = nn.Linear(hidden_state_sz, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.conv1.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01 ) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0 ) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) #self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_sz) self.dropout = nn.Dropout(p=args.dropout_rate)
def __init__(self, args): action_space = args.action_space target_embedding_sz = args.glove_dim resnet_embedding_sz = 512 hidden_state_sz = args.hidden_state_sz super(GCN_MLP, self).__init__() self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1) self.maxp1 = nn.MaxPool2d(2, 2) self.embed_glove = nn.Linear(target_embedding_sz, 64) self.embed_action = nn.Linear(action_space, 10) pointwise_in_channels = 138 self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1) lstm_input_sz = 7 * 7 * 64 + 512 mlp_input_sz = lstm_input_sz self.hidden_state_sz = hidden_state_sz num_outputs = action_space self.critic_linear = nn.Linear(hidden_state_sz, 1) self.actor_linear = nn.Linear(hidden_state_sz, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.conv1.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate) n = 83 self.n = n # get and normalize adjacency matrix. A_raw = torch.load("./data/gcn/adjmat.dat") A = normalize_adj(A_raw).tocsr().toarray() self.A = torch.nn.Parameter(torch.Tensor(A)) # last layer of resnet18. resnet18 = models.resnet18(pretrained=True) modules = list(resnet18.children())[-2:] self.resnet18 = nn.Sequential(*modules) for p in self.resnet18.parameters(): p.requires_grad = False # glove embeddings for all the objs. objects = open("./data/gcn/objects.txt").readlines() objects = [o.strip() for o in objects] all_glove = torch.zeros(n, 300) glove = Glove(args.glove_file) for i in range(n): all_glove[i, :] = torch.Tensor( glove.glove_embeddings[objects[i]][:]) self.all_glove = nn.Parameter(all_glove) self.all_glove.requires_grad = False self.get_word_embed = nn.Linear(300, 512) self.get_class_embed = nn.Linear(1000, 512) self.W0 = nn.Linear(1024, 1024, bias=False) self.W1 = nn.Linear(1024, 1024, bias=False) self.W2 = nn.Linear(1024, 1, bias=False) self.final_mapping = nn.Linear(n, 512) hidden_o = mlp_input_sz // 2 self.W0m = nn.Linear(mlp_input_sz, 512, bias=False)
def __init__(self, args): action_space = args.action_space # target_embedding_sz = args.glove_dim target_embedding_sz = 95 resnet_embedding_sz = args.hidden_state_sz hidden_state_sz = args.hidden_state_sz super(BaseModel, self).__init__() self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1) self.maxp1 = nn.MaxPool2d(2, 2) # self.embed_glove = nn.Linear(19*262, 64*7*7) # self.embed_glove = nn.Linear(target_embedding_sz, 64) # self.embed_glove = nn.Conv2d(256, 64, 1, 1) self.embed_action = nn.Linear(action_space, 10) # self.embed_action = nn.Linear(10, 10) self.detection_appearance_linear_1 = nn.Linear(512, 128) self.detection_other_info_linear_1 = nn.Linear(6, 19) self.detection_other_info_linear_2 = nn.Linear(19, 19) self.detection_appearance_linear_2 = nn.Linear(128, 49) # self.graph = nn.Linear(19, 19) # pointwise_in_channels = 138 pointwise_in_channels = 93 self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1) lstm_input_sz = 7 * 7 * 64 self.hidden_state_sz = hidden_state_sz self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz) num_outputs = action_space # self.critic_linear = nn.Linear(hidden_state_sz, 1) self.critic_linear_1 = nn.Linear(hidden_state_sz, 64) # self.critic_linear_2 = nn.Linear(72, 1) self.critic_linear_2 = nn.Linear(64, 1) self.actor_linear = nn.Linear(hidden_state_sz, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.conv1.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) # self.critic_linear.weight.data = norm_col_init( # self.critic_linear.weight.data, 1.0 # ) # self.critic_linear.bias.data.fill_(0) self.critic_linear_1.weight.data = norm_col_init( self.critic_linear_1.weight.data, 1.0) self.critic_linear_1.bias.data.fill_(0) self.critic_linear_2.weight.data = norm_col_init( self.critic_linear_2.weight.data, 1.0) self.critic_linear_2.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate)
def __init__(self, args): action_space = args.action_space target_embedding_sz = args.glove_dim resnet_embedding_sz = 512 hidden_state_sz = args.hidden_state_sz super(MJOLNIR_R, self).__init__() self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1) self.maxp1 = nn.MaxPool2d(2, 2) self.embed_glove = nn.Linear(target_embedding_sz, 64) self.embed_action = nn.Linear(action_space, 10) pointwise_in_channels = 64 + 64 + 10 self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1) lstm_input_sz = 7 * 7 * 64 + 512 self.hidden_state_sz = hidden_state_sz self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz) num_outputs = action_space self.critic_linear = nn.Linear(hidden_state_sz, 1) self.actor_linear = nn.Linear(hidden_state_sz, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain("relu") self.conv1.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space) self.dropout = nn.Dropout(p=args.dropout_rate) # get and normalize adjacency matrix. np.seterr(divide='ignore') A_raw = torch.load("./data/gcn/adjmat.dat") A = normalize_adj(A_raw).tocsr().toarray() self.A = torch.nn.Parameter(torch.Tensor(A)) n = int(A.shape[0]) self.n = n # last layer of resnet18. resnet18 = models.resnet18(pretrained=True) modules = list(resnet18.children())[-2:] self.resnet18 = nn.Sequential(*modules) for p in self.resnet18.parameters(): p.requires_grad = False # glove embeddings for all the objs. with open("./data/gcn/objects.txt") as f: objects = f.readlines() self.objects = [o.strip() for o in objects] all_glove = torch.zeros(n, 300) glove = Glove(args.glove_file) for i in range(n): all_glove[i, :] = torch.Tensor( glove.glove_embeddings[self.objects[i]][:]) self.all_glove = nn.Parameter(all_glove) self.all_glove.requires_grad = False self.W0 = nn.Linear(401, 401, bias=False) self.W1 = nn.Linear(401, 401, bias=False) self.W2 = nn.Linear(401, 5, bias=False) self.W3 = nn.Linear(10, 1, bias=False) self.final_mapping = nn.Linear(n, 512)