예제 #1
0
    def __init__(self, args):
        action_space = args.action_space
        hidden_state_sz = args.hidden_state_sz
        super(MJOLNIR_O, self).__init__()

        # get and normalize adjacency matrix.
        np.seterr(divide='ignore')
        A_raw = torch.load("./data/gcn/adjmat.dat")
        A = normalize_adj(A_raw).tocsr().toarray()
        self.A = torch.nn.Parameter(torch.Tensor(A))

        n = int(A.shape[0])
        self.n = n

        self.embed_action = nn.Linear(action_space, 10)

        lstm_input_sz = 10 + n * 5 + 512

        self.hidden_state_sz = hidden_state_sz
        self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz)
        num_outputs = action_space
        self.critic_linear = nn.Linear(hidden_state_sz, 1)
        self.actor_linear = nn.Linear(hidden_state_sz, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)

        # glove embeddings for all the objs.
        with open("./data/gcn/objects.txt") as f:
            objects = f.readlines()
            self.objects = [o.strip() for o in objects]
        all_glove = torch.zeros(n, 300)
        glove = Glove(args.glove_file)
        for i in range(n):
            all_glove[i, :] = torch.Tensor(
                glove.glove_embeddings[self.objects[i]][:])

        self.all_glove = nn.Parameter(all_glove)
        self.all_glove.requires_grad = False

        self.W0 = nn.Linear(401, 401, bias=False)
        self.W1 = nn.Linear(401, 401, bias=False)
        self.W2 = nn.Linear(401, 5, bias=False)
        self.W3 = nn.Linear(10, 1, bias=False)

        self.final_mapping = nn.Linear(n, 512)
예제 #2
0
    def __init__(self, args):
        action_space = args.action_space
        target_embedding_sz = args.target_dim
        resnet_embedding_sz = args.hidden_state_sz
        hidden_state_sz = args.hidden_state_sz
        super(BaseModel, self).__init__()

        self.conv1 = nn.Conv2d(
            resnet_embedding_sz, 64,
            1)  # Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1))
        self.maxp1 = nn.MaxPool2d(
            2, 2
        )  # MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        self.embed_glove = nn.Linear(
            target_embedding_sz,
            64)  # Linear(in_features=300, out_features=64, bias=True)
        self.embed_action = nn.Linear(
            action_space,
            10)  # Linear(in_features=7, out_features=10, bias=True)

        pointwise_in_channels = 138

        self.pointwise = nn.Conv2d(
            pointwise_in_channels, 64, 1,
            1)  # Conv2d(138, 64, kernel_size=(1, 1), stride=(1, 1))

        lstm_input_sz = 7 * 7 * 64

        self.hidden_state_sz = hidden_state_sz
        self.lstm = nn.LSTMCell(lstm_input_sz,
                                hidden_state_sz)  # LSTMCell(3136, 512)
        num_outputs = action_space
        self.critic_linear = nn.Linear(
            hidden_state_sz,
            1)  #Linear(in_features=512, out_features=1, bias=True)
        self.actor_linear = nn.Linear(
            hidden_state_sz,
            num_outputs)  # Linear(in_features=6272, out_features=7, bias=True)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.conv1.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)
    def __init__(self, args):
        action_space = args.action_space
        # target_embedding_sz = args.target_dim
        resnet_embedding_sz = args.hidden_state_sz
        hidden_state_sz = args.hidden_state_sz
        super(ProtoModel, self).__init__()

        # self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1) # Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1))
        self.conv1 = nn.Sequential(
            nn.Conv2d(resnet_embedding_sz, 64, kernel_size=1),
            nn.BatchNorm2d(64, momentum=1, affine=True), nn.ReLU())
        self.fusion_block = nn.Sequential(
            nn.Conv2d(fusion_channels, 64,
                      kernel_size=match_block_kernel_size),
            nn.BatchNorm2d(64, momentum=1, affine=True),
            nn.ReLU())  # [64, 5,5]/[64, 7, 7]

        self.embed_action = nn.Linear(
            action_space, action_channels
        )  # Linear(in_features=7, out_features=10, bias=True)

        # pointwise_in_channels = 64 + 10
        #
        # self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1) # Conv2d(138, 64, kernel_size=(1, 1), stride=(1, 1))

        lstm_input_sz = feature_map_size * feature_map_size * 64

        self.hidden_state_sz = hidden_state_sz
        self.lstm = nn.LSTMCell(lstm_input_sz,
                                hidden_state_sz)  # LSTMCell(1600, 512)
        num_outputs = action_space
        self.critic_linear = nn.Linear(
            hidden_state_sz,
            1)  #Linear(in_features=512, out_features=1, bias=True)
        self.actor_linear = nn.Linear(
            hidden_state_sz,
            num_outputs)  # Linear(in_features=6272, out_features=7, bias=True)

        self.apply(weights_init)
        # relu_gain = nn.init.calculate_gain("relu")
        # self.conv1.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        # self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)
예제 #4
0
    def __init__(self, args):
        action_space = args.action_space
        target_embedding_sz = args.glove_dim
        resnet_embedding_sz = args.hidden_state_sz
        hidden_state_sz = args.hidden_state_sz
        resnet_embedding_sz = 512
        hidden_state_sz = 512
        super(GcnBaseModel, self).__init__()

        self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1)
        self.maxp1 = nn.MaxPool2d(2, 2)
        self.embed_glove = nn.Linear(target_embedding_sz, 64)
        self.embed_action = nn.Linear(action_space, 10)

        # GCN layer
        self.gcn_size = 64
        self.gcn = GCN()
        self.gcn_embed = nn.Linear(512, self.gcn_size)#也可以考虑把512reshape成三维tensor后点卷积

        pointwise_in_channels = 138 + self.gcn_size

        self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1)

        lstm_input_sz = 7 * 7 * 64

        self.hidden_state_sz = hidden_state_sz
        self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz)
        num_outputs = action_space
        self.critic_linear = nn.Linear(hidden_state_sz, 1)
        self.actor_linear = nn.Linear(hidden_state_sz, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.conv1.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01
        )
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0
        )
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        #self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_sz)

        self.dropout = nn.Dropout(p=args.dropout_rate)
예제 #5
0
    def __init__(self, args):
        action_space = args.action_space
        target_embedding_sz = args.glove_dim
        resnet_embedding_sz = 512
        hidden_state_sz = args.hidden_state_sz
        super(GCN_MLP, self).__init__()

        self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1)
        self.maxp1 = nn.MaxPool2d(2, 2)
        self.embed_glove = nn.Linear(target_embedding_sz, 64)
        self.embed_action = nn.Linear(action_space, 10)

        pointwise_in_channels = 138

        self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1)

        lstm_input_sz = 7 * 7 * 64 + 512
        mlp_input_sz = lstm_input_sz

        self.hidden_state_sz = hidden_state_sz
        num_outputs = action_space
        self.critic_linear = nn.Linear(hidden_state_sz, 1)
        self.actor_linear = nn.Linear(hidden_state_sz, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.conv1.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)

        n = 83
        self.n = n

        # get and normalize adjacency matrix.
        A_raw = torch.load("./data/gcn/adjmat.dat")
        A = normalize_adj(A_raw).tocsr().toarray()
        self.A = torch.nn.Parameter(torch.Tensor(A))

        # last layer of resnet18.
        resnet18 = models.resnet18(pretrained=True)
        modules = list(resnet18.children())[-2:]
        self.resnet18 = nn.Sequential(*modules)
        for p in self.resnet18.parameters():
            p.requires_grad = False

        # glove embeddings for all the objs.
        objects = open("./data/gcn/objects.txt").readlines()
        objects = [o.strip() for o in objects]
        all_glove = torch.zeros(n, 300)
        glove = Glove(args.glove_file)
        for i in range(n):
            all_glove[i, :] = torch.Tensor(
                glove.glove_embeddings[objects[i]][:])

        self.all_glove = nn.Parameter(all_glove)
        self.all_glove.requires_grad = False

        self.get_word_embed = nn.Linear(300, 512)
        self.get_class_embed = nn.Linear(1000, 512)

        self.W0 = nn.Linear(1024, 1024, bias=False)
        self.W1 = nn.Linear(1024, 1024, bias=False)
        self.W2 = nn.Linear(1024, 1, bias=False)

        self.final_mapping = nn.Linear(n, 512)
        hidden_o = mlp_input_sz // 2
        self.W0m = nn.Linear(mlp_input_sz, 512, bias=False)
예제 #6
0
    def __init__(self, args):
        action_space = args.action_space
        # target_embedding_sz = args.glove_dim
        target_embedding_sz = 95
        resnet_embedding_sz = args.hidden_state_sz
        hidden_state_sz = args.hidden_state_sz
        super(BaseModel, self).__init__()

        self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1)
        self.maxp1 = nn.MaxPool2d(2, 2)
        # self.embed_glove = nn.Linear(19*262, 64*7*7)
        # self.embed_glove = nn.Linear(target_embedding_sz, 64)
        # self.embed_glove = nn.Conv2d(256, 64, 1, 1)
        self.embed_action = nn.Linear(action_space, 10)
        # self.embed_action = nn.Linear(10, 10)

        self.detection_appearance_linear_1 = nn.Linear(512, 128)
        self.detection_other_info_linear_1 = nn.Linear(6, 19)
        self.detection_other_info_linear_2 = nn.Linear(19, 19)
        self.detection_appearance_linear_2 = nn.Linear(128, 49)
        # self.graph = nn.Linear(19, 19)

        # pointwise_in_channels = 138
        pointwise_in_channels = 93

        self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1)

        lstm_input_sz = 7 * 7 * 64

        self.hidden_state_sz = hidden_state_sz
        self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz)
        num_outputs = action_space
        # self.critic_linear = nn.Linear(hidden_state_sz, 1)
        self.critic_linear_1 = nn.Linear(hidden_state_sz, 64)
        # self.critic_linear_2 = nn.Linear(72, 1)
        self.critic_linear_2 = nn.Linear(64, 1)
        self.actor_linear = nn.Linear(hidden_state_sz, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.conv1.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)

        # self.critic_linear.weight.data = norm_col_init(
        #     self.critic_linear.weight.data, 1.0
        # )
        # self.critic_linear.bias.data.fill_(0)

        self.critic_linear_1.weight.data = norm_col_init(
            self.critic_linear_1.weight.data, 1.0)
        self.critic_linear_1.bias.data.fill_(0)
        self.critic_linear_2.weight.data = norm_col_init(
            self.critic_linear_2.weight.data, 1.0)
        self.critic_linear_2.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)
예제 #7
0
    def __init__(self, args):
        action_space = args.action_space
        target_embedding_sz = args.glove_dim
        resnet_embedding_sz = 512
        hidden_state_sz = args.hidden_state_sz
        super(MJOLNIR_R, self).__init__()

        self.conv1 = nn.Conv2d(resnet_embedding_sz, 64, 1)
        self.maxp1 = nn.MaxPool2d(2, 2)
        self.embed_glove = nn.Linear(target_embedding_sz, 64)
        self.embed_action = nn.Linear(action_space, 10)

        pointwise_in_channels = 64 + 64 + 10

        self.pointwise = nn.Conv2d(pointwise_in_channels, 64, 1, 1)

        lstm_input_sz = 7 * 7 * 64 + 512

        self.hidden_state_sz = hidden_state_sz
        self.lstm = nn.LSTMCell(lstm_input_sz, hidden_state_sz)
        num_outputs = action_space
        self.critic_linear = nn.Linear(hidden_state_sz, 1)
        self.actor_linear = nn.Linear(hidden_state_sz, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain("relu")
        self.conv1.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.action_predict_linear = nn.Linear(2 * lstm_input_sz, action_space)

        self.dropout = nn.Dropout(p=args.dropout_rate)

        # get and normalize adjacency matrix.
        np.seterr(divide='ignore')
        A_raw = torch.load("./data/gcn/adjmat.dat")
        A = normalize_adj(A_raw).tocsr().toarray()
        self.A = torch.nn.Parameter(torch.Tensor(A))

        n = int(A.shape[0])
        self.n = n

        # last layer of resnet18.
        resnet18 = models.resnet18(pretrained=True)
        modules = list(resnet18.children())[-2:]
        self.resnet18 = nn.Sequential(*modules)
        for p in self.resnet18.parameters():
            p.requires_grad = False

        # glove embeddings for all the objs.
        with open("./data/gcn/objects.txt") as f:
            objects = f.readlines()
            self.objects = [o.strip() for o in objects]
        all_glove = torch.zeros(n, 300)
        glove = Glove(args.glove_file)
        for i in range(n):
            all_glove[i, :] = torch.Tensor(
                glove.glove_embeddings[self.objects[i]][:])

        self.all_glove = nn.Parameter(all_glove)
        self.all_glove.requires_grad = False

        self.W0 = nn.Linear(401, 401, bias=False)
        self.W1 = nn.Linear(401, 401, bias=False)
        self.W2 = nn.Linear(401, 5, bias=False)
        self.W3 = nn.Linear(10, 1, bias=False)

        self.final_mapping = nn.Linear(n, 512)