def forward(self, indices): input_channels = self.apollo_net.blobs[self.input_name].shape[1] batch_size, mask_channels, width, height = self.apollo_net.blobs[ self.incoming_names[0]].shape assert mask_channels == 1 flat_shape = (batch_size, width * height) # TODO(jda) is this evil? self.apollo_net.blobs[self.incoming_names[0]].reshape(flat_shape) self.apollo_net.f( layers.Softmax(self.softmax_name, bottoms=[self.incoming_names[0]])) self.apollo_net.blobs[self.softmax_name].reshape( (batch_size, 1, width, height)) self.apollo_net.f( layers.Tile( #self.tile_name, axis=1, tiles=self.hidden_size, self.tile_name, axis=1, tiles=input_channels, bottoms=[self.softmax_name])) #self.apollo_net.f(layers.Convolution( # self.hidden_name, (1, 1), self.hidden_size, bottoms=[self.input_name])) self.apollo_net.f( layers.Eltwise(self.attention_name, bottoms=[self.tile_name, self.input_name], operation="PROD")) self.apollo_net.f( layers.Reduction(self.reduction_name, axis=2, bottoms=[self.attention_name])) self.apollo_net.f(layers.NumpyData(self.indices_name, indices)) self.apollo_net.f( layers.Wordvec(self.bias_name, len(ANSWER_INDEX), len(LAYOUT_INDEX), bottoms=[self.indices_name])) self.apollo_net.f( layers.InnerProduct(self.ip_name, len(ANSWER_INDEX), bottoms=[self.reduction_name])) self.apollo_net.f( layers.Eltwise(self.sum_name, bottoms=[self.bias_name, self.ip_name], operation="SUM"))
def forward(self, tokens): net = self.apollo_net net.f( layers.NumpyData(self.seed_name, np.zeros((tokens.shape[0], self.hidden_size)))) for t in range(tokens.shape[1]): word_name = self.word_name % t wordvec_name = self.wordvec_name % t concat_name = self.concat_name % t lstm_name = self.lstm_name % t hidden_name = self.hidden_name % t mem_name = self.mem_name % t if t == 0: prev_hidden = self.seed_name prev_mem = self.seed_name else: prev_hidden = self.hidden_name % (t - 1) prev_mem = self.mem_name % (t - 1) net.f(layers.NumpyData(word_name, np.asarray(tokens[:, t]))) net.f( layers.Wordvec(wordvec_name, self.hidden_size, len(STRING_INDEX), bottoms=[word_name], param_names=[self.wordvec_param_name], param_lr_mults=[self.param_mult])) net.f( layers.Concat(concat_name, bottoms=[prev_hidden, wordvec_name])) net.f( layers.LstmUnit(lstm_name, bottoms=[concat_name, prev_mem], param_names=[ self.input_value_param_name, self.input_gate_param_name, self.forget_gate_param_name, self.output_gate_param_name ], param_lr_mults=[self.param_mult] * 4, tops=[hidden_name, mem_name], num_cells=self.hidden_size)) net.f( layers.InnerProduct(self.ip_name, len(ANSWER_INDEX), bottoms=[hidden_name], param_lr_mults=[self.param_mult] * 2)) net.f(layers.ReLU(self.relu_name, bottoms=[self.ip_name])) net.f( layers.Eltwise(self.sum_name, bottoms=[self.relu_name, self.incoming_name], operation="SUM"))
def forward(self, tokens): net = self.apollo_net for t in range(tokens.shape[1]): word_name = self.word_name % t wordvec_name = self.wordvec_name % t net.f(layers.NumpyData(word_name, np.asarray(tokens[:, t]))) net.f( layers.Wordvec(wordvec_name, len(ANSWER_INDEX), len(STRING_INDEX), bottoms=[word_name], param_names=[self.wordvec_param_name])) word_bottoms = [self.wordvec_name % t for t in range(tokens.shape[1])] bottoms = word_bottoms + [self.incoming_name] net.f(layers.Eltwise(self.sum_name, bottoms=bottoms, operation="SUM"))
def forward(self, indices): self.apollo_net.f( layers.Eltwise(self.min_name, operation="SUM", bottoms=self.incoming_names))
def net_proto(self): conv_weight_filler = layers.Filler("gaussian", 0.01) bias_filler0 = layers.Filler("constant", 0.0) bias_filler1 = layers.Filler("constant", 0.1) bias_filler5 = layers.Filler("constant", 0.5) # same deploy structure as in deploy_demo.prototxt net_layers = [ # saliency path layers.Convolution("conv1", bottoms=["data"], param_lr_mults=[0, 0], param_decay_mults=[1, 0], kernel_dim=(11, 11), stride=4, weight_filler=conv_weight_filler, bias_filler=bias_filler0, num_output=96), layers.ReLU(name="relu1", bottoms=["conv1"], tops=["conv1"]), layers.Pooling(name="pool1", bottoms=["conv1"], kernel_size=3, stride=2), layers.LRN(name="norm1", bottoms=["pool1"], tops=["norm1"], local_size=5, alpha=0.0001, beta=0.75), layers.Convolution(name="conv2", bottoms=["norm1"], param_lr_mults=[0, 0], param_decay_mults=[1, 0], kernel_dim=(5, 5), pad=2, group=2, weight_filler=conv_weight_filler, bias_filler=bias_filler1, num_output=256), layers.ReLU(name="relu2", bottoms=["conv2"], tops=["conv2"]), layers.Pooling(name="pool2", bottoms=["conv2"], kernel_size=3, stride=2), layers.LRN(name="norm2", bottoms=["pool2"], tops=["norm2"], local_size=5, alpha=0.0001, beta=0.75), layers.Convolution(name="conv3", bottoms=["norm2"], param_lr_mults=[0.1, 0.2], param_decay_mults=[1, 0], kernel_dim=(3, 3), pad=1, weight_filler=conv_weight_filler, bias_filler=bias_filler0, num_output=384), layers.ReLU(name="relu3", bottoms=["conv3"], tops=["conv3"]), layers.Convolution(name="conv4", bottoms=["conv3"], param_lr_mults=[0.1, 0.2], param_decay_mults=[1, 0], kernel_dim=(3, 3), pad=1, group=2, weight_filler=conv_weight_filler, bias_filler=bias_filler1, num_output=384), layers.ReLU(name="relu4", bottoms=["conv4"], tops=["conv4"]), layers.Convolution(name="conv5", bottoms=["conv4"], param_lr_mults=[0.1, 0.2], param_decay_mults=[1, 0], kernel_dim=(3, 3), pad=1, group=2, weight_filler=conv_weight_filler, bias_filler=bias_filler1, num_output=256), layers.ReLU(name="relu5", bottoms=["conv5"], tops=["conv5"]), layers.Convolution(name="conv5_red", bottoms=["conv5"], param_lr_mults=[1.0, 2.0], param_decay_mults=[1, 0], kernel_dim=(1, 1), weight_filler=conv_weight_filler, bias_filler=bias_filler1, num_output=1), layers.ReLU(name="relu5_red", bottoms=["conv5_red"], tops=["conv5_red"]), # gaze path layers.Convolution("conv1_face", bottoms=["face"], param_lr_mults=[0, 0], param_decay_mults=[1, 0], kernel_dim=(11, 11), stride=4, weight_filler=conv_weight_filler, bias_filler=bias_filler0, num_output=96), layers.ReLU(name="relu1_face", bottoms=["conv1_face"], tops=["conv1_face"]), layers.Pooling(name="pool1_face", bottoms=["conv1_face"], kernel_size=3, stride=2), layers.LRN(name="norm1_face", bottoms=["pool1_face"], tops=["norm1_face"], local_size=5, alpha=0.0001, beta=0.75), layers.Convolution(name="conv2_face", bottoms=["norm1_face"], param_lr_mults=[0, 0], param_decay_mults=[1, 0], kernel_dim=(5, 5), pad=2, group=2, weight_filler=conv_weight_filler, bias_filler=bias_filler1, num_output=256), layers.ReLU(name="relu2_face", bottoms=["conv2_face"], tops=["conv2_face"]), layers.Pooling(name="pool2_face", bottoms=["conv2_face"], kernel_size=3, stride=2), layers.LRN(name="norm2_face", bottoms=["pool2_face"], tops=["norm2_face"], local_size=5, alpha=0.0001, beta=0.75), layers.Convolution(name="conv3_face", bottoms=["norm2_face"], param_lr_mults=[0.2, 0.4], param_decay_mults=[1, 0], kernel_dim=(3, 3), pad=1, weight_filler=conv_weight_filler, bias_filler=bias_filler0, num_output=384), layers.ReLU(name="relu3_face", bottoms=["conv3_face"], tops=["conv3_face"]), layers.Convolution(name="conv4_face", bottoms=["conv3_face"], param_lr_mults=[0.2, 0.4], param_decay_mults=[1, 0], kernel_dim=(3, 3), pad=1, group=2, weight_filler=conv_weight_filler, bias_filler=bias_filler1, num_output=384), layers.ReLU(name="relu4_face", bottoms=["conv4_face"], tops=["conv4_face"]), layers.Convolution(name="conv5_face", bottoms=["conv4_face"], param_lr_mults=[0.2, 0.4], param_decay_mults=[1, 0], kernel_dim=(3, 3), pad=1, group=2, weight_filler=conv_weight_filler, bias_filler=bias_filler1, num_output=256), layers.ReLU(name="relu5_face", bottoms=["conv5_face"], tops=["conv5_face"]), layers.Pooling(name="pool5_face", bottoms=["conv5_face"], kernel_size=3, stride=2), layers.InnerProduct(name="fc6_face", bottoms=["pool5_face"], tops=["fc6_face"], param_lr_mults=[1, 2], param_decay_mults=[1, 0], weight_filler=layers.Filler("gaussian", 0.5), bias_filler=bias_filler5, num_output=500), layers.ReLU(name="relu6_face", bottoms=["fc6_face"], tops=["fc6_face"]), layers.Flatten(name="eyes_grid_flat", bottoms=["eyes_grid"], tops=["eyes_grid_flat"]), layers.Power(name="eyes_grid_mult", bottoms=["eyes_grid_flat"], tops=["eyes_grid_mult"], power=1, scale=24, shift=0), layers.Concat(name="face_input", bottoms=["fc6_face", "eyes_grid_mult"], tops=["face_input"], axis=1), layers.InnerProduct(name="fc7_face", bottoms=["face_input"], tops=["fc7_face"], param_lr_mults=[1, 2], param_decay_mults=[1, 0], weight_filler=layers.Filler("gaussian", 0.01), bias_filler=bias_filler5, num_output=400), layers.ReLU(name="relu7_face", bottoms=["fc7_face"], tops=["fc7_face"]), layers.InnerProduct(name="fc8_face", bottoms=["fc7_face"], tops=["fc8_face"], param_lr_mults=[1, 2], param_decay_mults=[1, 0], weight_filler=layers.Filler("gaussian", 0.01), bias_filler=bias_filler5, num_output=200), layers.ReLU(name="relu8_face", bottoms=["fc8_face"], tops=["fc8_face"]), layers.InnerProduct(name="importance_no_sigmoid", bottoms=["fc8_face"], tops=["importance_no_sigmoid"], param_lr_mults=[0.2, 0.0], param_decay_mults=[1.0, 0.0], weight_filler=layers.Filler("gaussian", 0.01), num_output=169), layers.Sigmoid(name="importance_map_prefilter", bottoms=["importance_no_sigmoid"], tops=["importance_map_prefilter"]), layers.Reshape('importance_map_reshape', (1, 1, 13, 13), bottoms=['importance_map_prefilter'], tops=["importance_map_reshape"]), layers.Convolution(name="importance_map", bottoms=["importance_map_reshape"], param_lr_mults=[0.0, 0.0], param_decay_mults=[1.0, 0.0], kernel_dim=(3, 3), pad=1, stride=1, weight_filler=conv_weight_filler, bias_filler=bias_filler1, num_output=1), layers.Eltwise(name="fc_7", bottoms=["conv5_red", "importance_map"], tops=["fc_7"], operation="PROD"), # shifted grids layers.InnerProduct(name="fc_0_0", bottoms=["fc_7"], tops=["fc_0_0"], param_lr_mults=[1, 2], param_decay_mults=[1, 0], weight_filler=layers.Filler("gaussian", 0.01), bias_filler=bias_filler0, num_output=25), layers.InnerProduct(name="fc_1_0", bottoms=["fc_7"], tops=["fc_1_0"], param_lr_mults=[1, 2], param_decay_mults=[1, 0], weight_filler=layers.Filler("gaussian", 0.01), bias_filler=bias_filler0, num_output=25), layers.InnerProduct(name="fc_m1_0", bottoms=["fc_7"], tops=["fc_m1_0"], param_lr_mults=[1, 2], param_decay_mults=[1, 0], weight_filler=layers.Filler("gaussian", 0.01), bias_filler=bias_filler0, num_output=25), layers.InnerProduct(name="fc_0_1", bottoms=["fc_7"], tops=["fc_0_1"], param_lr_mults=[1, 2], param_decay_mults=[1, 0], weight_filler=layers.Filler("gaussian", 0.01), bias_filler=bias_filler0, num_output=25), layers.InnerProduct(name="fc_0_m1", bottoms=["fc_7"], tops=["fc_0_m1"], param_lr_mults=[1, 2], param_decay_mults=[1, 0], weight_filler=layers.Filler("gaussian", 0.01), bias_filler=bias_filler0, num_output=25), layers.Reshape('fc_0_0_reshape', (5, 5), bottoms=['fc_0_0'], tops=["fc_0_0_reshape"]), layers.Reshape('fc_1_0_reshape', (5, 5), bottoms=['fc_1_0'], tops=["fc_1_0_reshape"]), layers.Reshape('fc_m1_0_reshape', (5, 5), bottoms=['fc_m1_0'], tops=["fc_m1_0_reshape"]), layers.Reshape('fc_0_1_reshape', (5, 5), bottoms=['fc_0_1'], tops=["fc_0_1_reshape"]), layers.Reshape('fc_0_m1_reshape', (5, 5), bottoms=['fc_0_m1'], tops=["fc_0_m1_reshape"]) ] return net_layers