def sc(X, W, attr): xshp, wshp = X.shape, W.shape C, OC, IC = xshp[1], wshp[0], wshp[1] assert C >= IC and C % IC == 0 and C // IC == eval(attr['num_group']) num_group = C // IC assert num_group == eval(attr['num_group']) and \ OC >= num_group and OC % num_group == 0 xs = sym_slice(X, 1, 1) ws = kernel_slice_2d(W) OPG = OC // num_group nattr = attr.copy() nattr['num_group'] = '1' nattr['num_filter'] = '1' nodes = [] for o in range(OC): nnodes = [] j = int(o/OPG)*IC for i in range(IC): xoi, woi = xs[i+j], ws[o][i] yoi = nd.Convolution(xoi, woi, **nattr) nnodes.append(yoi) if len(nnodes) > 1: zi = nd.add_n(*nnodes) else: zi = nnodes[0] nodes.append(zi) return nd.concat(*nodes, dim=1)
def get_global_norm(arrays): ctx = arrays[0].context total_norm = nd.add_n(*[ nd.dot(x, x).as_in_context(ctx) for x in (arr.reshape((-1, )) for arr in arrays) ]) total_norm = nd.sqrt(total_norm).asscalar() return total_norm
def sc(X, W, attr, ichannel, step): xshp = X.shape xs = sym_slice(X, ichannel, step) ws = sym_slice(W, ichannel, step) nodes = [] j = 0 for i in range(0, xshp[ichannel], step): yi = nd.Convolution(xs[j], ws[j], **attr) nodes.append(yi) j += 1 return nd.add_n(*nodes)
def forward(self, input_vec, loss=None): # print('************* ' + str(input_vec.shape[1]) + ' *************') # print('############# ' + str(input_vec.shape) + ' #############') assert input_vec.shape[1] == self.input_dimension # get inputs for every slot(including global) inputs = {} for slot in self.slots: inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]] input_global = [] for seg in self.global_dimension: input_global.append(input_vec[:, seg[0]:seg[1]]) inputs['global'] = nd.concat(*input_global, dim=1) layer = [] # inputs -> first_hidden_layer if (not self.sort_input_vec) and self.state_feature != 'dip': layer.append([]) for slot in self.slots: layer[0].append(self.input_trans[slot](inputs[slot])) layer[0].append(self.input_trans['global'](inputs['global'])) elif self.state_feature == 'dip': sorted_inputs = [] for slot in self.slots: sorted_inputs.append(inputs[slot]) sorted_inputs.append(inputs['global']) layer.append(self.input_trans(sorted_inputs, loss)) elif self.sort_input_vec: sorted_inputs = [] for slot in self.slots: tmp = inputs[slot][:, :-2].sort(is_ascend=False) if tmp.shape[1] < 20: tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1) else: tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20) sorted_inputs.append(nd.concat(tmp, inputs[slot][:, -2:], dim=1)) sorted_inputs.append(inputs['global']) layer.append(self.input_trans(sorted_inputs, loss)) # hidden_layers for i in range(self.hidden_layers - 1): if self.recurrent_mode is False: # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))' layer.append(self.ma_trans[i](layer[i], loss)) else: layer.append(self.ma_trans(layer[i], loss)) if self.share_last_layer is False: # dropout of last hidden layer for j in range(len(self.slots)): layer[-1][j] = self.local_out_drop_op(layer[-1][j]) layer[-1][-1] = self.global_out_drop_op(layer[-1][-1]) # last_hidden_layer -> outputs outputs = [] slotv_probs = [] slot_hidden_value = [] tmp_ave = nd.zeros_like(layer[-1][0]) for i in range(len(self.slots) + 1): if self.use_dueling is False: outputs.append(self.output_trans[i](layer[-1][i])) else: if i < len(self.slots): cur_slot_prob = self.output_trans_local_slotP(layer[-1][i]) cur_slot_v = self.output_trans_local_value(sorted_inputs[i]) else: cur_slot_prob = self.output_trans_global_slotP(layer[-1][i]) cur_slot_v = self.output_trans_global_value(sorted_inputs[i]) slotv_probs.append(cur_slot_prob) slot_hidden_value.append(cur_slot_v) batch_slotv_prob = nd.softmax(nd.concat(*slotv_probs, dim=1)) batch_value = nd.squeeze(self.output_trans_value(nd.add_n(*slot_hidden_value))) # print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@') # print(batch_slotv_prob) # print(batch_slot_prob.shape) # print(batch_slot_slotq.shape) # print(batch_slotv_prob.shape) return batch_slotv_prob, batch_value