def yolo2_decoder(x, num_class, anchor_scales): """ yolo2_decoder 会把卷积的通道分开,转换,最后转成我们需要的检测框 out: (index,score,xmin,ymin,xmax,ymax) """ stride = num_class + 5 x = x.transpose((0, 2, 3, 1)) # (Batch,H,W,Stride*Anchor) x = x.reshape((0, 0, 0, -1, stride)) # (Batch,H,W,Anchor,Stride) xy_pred = x.slice_axis(begin=0, end=2, axis=-1) wh = x.slice_axis(begin=2, end=4, axis=-1) score_pred = x.slice_axis(begin=4, end=5, axis=-1) cls_pred = x.slice_axis(begin=5, end=stride, axis=-1) xy = nd.sigmoid(xy_pred) x, y = transform_center(xy) w, h = transform_size(wh, anchor_scales) score = nd.sigmoid(score_pred) cid = nd.argmax(cls_pred, axis=-1, keepdims=True) left = nd.clip(x - w / 2, 0, 1) top = nd.clip(y - h / 2, 0, 1) right = nd.clip(x + w / 2, 0, 1) bottom = nd.clip(y + h / 2, 0, 1) output = nd.concat(*[cid, score, left, top, right, bottom], dim=4) return output, cls_pred, score, nd.concat(*[xy, wh], dim=4)
def yolo2_forward(x, num_class, anchor_scales): """Transpose/reshape/organize convolution outputs.""" stride = num_class + 5 # transpose and reshape, 4th dim is the number of anchors x = x.transpose((0, 2, 3, 1)) x = x.reshape((0, 0, 0, -1, stride)) # now x is (batch, m, n, stride), stride = num_class + 1(object score) + 4(coordinates) # class probs cls_pred = x.slice_axis(begin=0, end=num_class, axis=-1) # object score score_pred = x.slice_axis(begin=num_class, end=num_class + 1, axis=-1) score = nd.sigmoid(score_pred) # center prediction, in range(0, 1) for each grid xy_pred = x.slice_axis(begin=num_class + 1, end=num_class + 3, axis=-1) xy = nd.sigmoid(xy_pred) # width/height prediction wh = x.slice_axis(begin=num_class + 3, end=num_class + 5, axis=-1) # convert x, y to positions relative to image x, y = transform_center(xy) # convert w, h to width/height relative to image w, h = transform_size(wh, anchor_scales) # cid is the argmax channel cid = nd.argmax(cls_pred, axis=-1, keepdims=True) # convert to corner format boxes half_w = w / 2 half_h = h / 2 left = nd.clip(x - half_w, 0, 1) top = nd.clip(y - half_h, 0, 1) right = nd.clip(x + half_w, 0, 1) bottom = nd.clip(y + half_h, 0, 1) output = nd.concat(*[cid, score, left, top, right, bottom], dim=4) return output, cls_pred, score, nd.concat(*[xy, wh], dim=4)
def lstm(x, h, c, Wxi, Wxf, Wxo, Whi, Whf, Who, Wxc, Whc, bi, bf, bo, bc): i = nd.sigmoid(nd.dot(x, Wxi) + nd.dot(h, Whi) + bi) f = nd.sigmoid(nd.dot(x, Wxf) + nd.dot(h, Whf) + bf) o = nd.sigmoid(nd.dot(x, Wxo) + nd.dot(h, Who) + bo) c̃ = nd.tanh(nd.dot(x, Wxc) + nd.dot(h, Whc) + bc) c = f * c + i * c̃ h = o * nd.tanh(c) return h, c
def generate(self, x: nd.NDArray = None, include_intermediate: bool = False, return_attn_params: bool = False) -> \ Union[nd.NDArray, Tuple[nd.NDArray, nd.NDArray]]: """ Generate a batch of samples from model. See Section 2.3 in paper. If x is None, this method generates unconditional samples from the model (as explained in Section 2.3 in the paper). If x is provided, this method reconstructs the input to generate the sample. This is not really a true sample from the model because the model looks at the image it is trying to generate. However, this is useful for seeing how the model generates a particular image. (I believe this is how the figures in the paper are generated.) :param x: Input to generate images from. :param include_intermediate: If True, samples from all timesteps (not only the last timestep) are returned. :param return_attn_params: If True, returns attention params along with generated samples. :return: n x input dim array of generated samples. If include_intermediate is True, then steps x n x input dim. """ canvases = [] attn_params = [] canvas = nd.zeros((self._batch_size, self._input_dim), ctx=self._ctx) h_dec = nd.broadcast_to(self._dec_rnn_h_init.data(), (self._batch_size, 0)) c_dec = nd.broadcast_to(self._dec_rnn_c_init.data(), (self._batch_size, 0)) if x is not None: h_enc = nd.broadcast_to(self._enc_rnn_h_init.data(), (self._batch_size, 0)) c_enc = nd.broadcast_to(self._enc_rnn_c_init.data(), (self._batch_size, 0)) for i in range(self._num_steps): canvases.append(nd.sigmoid(canvas)) if x is not None: err = x - nd.sigmoid(canvas) r, _ = self._read_layer(x, err, h_dec, c_dec) _, (h_enc, c_enc) = self._enc_rnn(nd.concat(r, h_dec, c_dec, dim=1), [h_enc, c_enc]) q = self._enc_dense(h_enc) z = self._latent_layer(q) else: z = nd.random.normal(shape=(self._batch_size, self._latent_dim), ctx=self._ctx) _, (h_dec, c_dec) = self._dec_rnn(z, [h_dec, c_dec]) w, attn_param = self._write_layer(h_dec, c_dec) attn_params.append(attn_param) canvas = canvas + w canvases.append(nd.sigmoid(canvas)) if include_intermediate: samples = nd.stack(*canvases, axis=0) else: samples = canvases[-1] if return_attn_params: return samples, nd.stack(*attn_params, axis=0) else: return samples
def generate(self, x: nd.NDArray = None, include_intermediate: bool = False, **kwargs) -> \ Union[nd.NDArray, Tuple[nd.NDArray, nd.NDArray]]: """ Generate a batch of samples from model. See Section 2.3 in paper. If x is None, this method generates unconditional samples from the model (as explained in Section 2.3 in the paper). If x is provided, this method reconstructs the input to generate the sample. This is not really a true sample from the model because the model looks at the image it is trying to generate. However, this is useful for seeing how the model generates a particular image. :param x: Input to generate images from. :param include_intermediate: If True, samples from all timesteps (not only the last timestep) are returned. :return: n x *image_shape array of generated samples. If include_intermediate is True, then steps x n x *image_shape. """ r = nd.zeros((self._batch_size, *self._input_shape), ctx=self._ctx) # reconstruction h_dec = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) c_dec = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) if x is not None: h_enc = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) c_enc = nd.zeros((self._batch_size, *self._rnn_hidden_shape), ctx=self._ctx) encoded_x = self._enc_nn(x) rs = [] # sample(s) over time for i in range(self._num_steps): rs.append(nd.sigmoid(r)) encoded_r = self._enc_nn(rs[-1]) if x is not None: err = encoded_x - encoded_r _, (h_enc, c_enc) = self._enc_rnn(nd.concat(encoded_x, err, h_dec, c_dec, dim=1), [h_enc, c_enc]) q = self._q_layer(h_enc) # convert NxCxHxW to NxF q = nd.reshape(q, (self._batch_size, -1)) z = self._latent_layer(q) else: # sample from prior p = self._p_layer(h_dec) p = nd.reshape(p, (self._batch_size, -1)) z = self._latent_layer(p) dec_z = nd.reshape(z, (self._batch_size, self._num_latent_maps, *self._encoder_output_shape[1:])) _, (h_dec, c_dec) = self._dec_rnn(nd.concat(dec_z, encoded_r, dim=1), [h_dec, c_dec]) r = r + self._dec_nn(h_dec) rs.append(nd.sigmoid(r)) if include_intermediate: samples = nd.stack(*rs, axis=0) else: samples = rs[-1] return samples
def gru_rnn(inputs, H, *params): # inputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵 # H: 尺寸为 batch_size * hidden_dim 矩阵 # outputs: num_steps 个尺寸为 batch_size * vocab_size 矩阵 W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hy, b_y = params outputs = [] for X in inputs: Z = nd.sigmoid(nd.dot(X, W_xz) + nd.dot(H, W_hz) + b_z) R = nd.sigmoid(nd.dot(X, W_xr) + nd.dot(H, W_hr) + b_r) H_tilda = nd.tanh(nd.dot(X, W_xh) + R * nd.dot(H, W_hh) + b_h) H = Z * H + (1 - Z) * H_tilda Y = nd.dot(H, W_hy) + b_y outputs.append(Y) return (outputs, H)
def decode(self, z): """ Given a latent vector, predict the input. z -> x :param z: Latent matrix (batch x features). :return: """ return nd.sigmoid(self._dec_nn(z))
def check_KL(self): ph_act = nd.dot(self.enum_states, self.W) + self.hb vt = nd.dot(self.enum_states, self.vb) ht = nd.sum(-nd.log(nd.sigmoid(-ph_act)), axis=1) p_th = nd.softmax(vt + ht) KL = nd.sum(self.prob_states * nd.log(self.prob_states / p_th)) return KL.asnumpy()[0]
def test_model_for_ml(self): net_path = os.path.join( DATA_DIR, 'model', 'epoch-3-0.48-20180920164709.params-symbol.json') params_path = os.path.join( DATA_DIR, 'model', 'epoch-3-0.48-20180920164709.params-0003.params') net = gluon.nn.SymbolBlock.imports(net_path, ['data'], params_path) im_path = os.path.join(DATA_DIR, 'imgs_data', 'd4YE10xHdvbwKJV5yBYsoJJke6K9b.jpg') img = image.imread(im_path) # plt.imshow(img.asnumpy()) # plt.show() transform_fn = transforms.Compose([ transforms.Resize(224, keep_ratio=True), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) img = transform_fn(img) img = nd.expand_dims(img, axis=0) res = net(img.as_in_context(self.ctx[0])) res = sigmoid(nd.squeeze(res)).asnumpy() res = np.where(res > 0.5, 1, 0) indexes, = np.where(res == 1) res_classes = [self.class_names[i] for i in indexes.tolist()] # print(indexes.tolist()) print('测试类别: {}'.format(res_classes))
def residue_forward(self, x, conv_sigmoid, conv_tanh, skip_scale, residue_scale): output = x output_sigmoid, output_tanh = conv_sigmoid(output), conv_tanh(output) output = F.sigmoid(output_sigmoid) * F.tanh(output_tanh) skip = skip_scale(output) output = residue_scale(output) output = output + x[:, :, -output.shape[2]:] return output, skip
def observe_reward_value( self, state_arr, action_arr, meta_data_arr=None, ): ''' Compute the reward value. Args: state_arr: Tensor of state. action_arr: Tensor of action. meta_data_arr: Meta data of actions. Returns: Reward value. ''' reward_arr = self.__check_goal_flag(action_arr) for i in range(action_arr.shape[0]): _action_arr = action_arr[i, 0].asnumpy() x, y = np.where(_action_arr == 1) x, y = x[0], y[0] goal_x, goal_y = self.__goal_pos if x == goal_x and y == goal_y: distance = 0.0 else: distance = np.sqrt(((x - goal_x) ** 2) + (y - goal_y) ** 2) if self.inferencing_mode is False: state_arr = self.__state_arr if state_arr is not None: _state_arr = state_arr[i, 0].asnumpy() pre_x, pre_y = np.where(_state_arr == 1) if pre_x == goal_x and pre_y == goal_y: pre_distance = 0.0 else: pre_distance = np.sqrt(((pre_x - goal_x) ** 2) + (pre_y - goal_y) ** 2) distance_penalty = distance - pre_distance if distance_penalty == 0: distance_penalty = 1 else: distance_penalty = 0 max_distance = (goal_x ** 2) + (goal_y ** 2) reward_arr[i] = reward_arr[i] + (max_distance - distance) - distance_penalty reward_arr = nd.ndarray.array(reward_arr, ctx=self.__ctx) reward_arr = nd.sigmoid(reward_arr / max_distance) return reward_arr
def format_net_output(self, Y): pred = nd.transpose(Y,(0,2,3,1)) #move channel to last dim pred = pred.reshape((0,0,0,self.box_per_cell, self.numClass + 5)) # re-arrange last dim to two dim (B,()) #here you are responsible to define each field of output predCls = nd.slice_axis(pred, begin=0, end=self.numClass,axis=-1) predObj = nd.slice_axis(pred, begin=self.numClass, end=self.numClass+1,axis=-1) predXY = nd.slice_axis(pred, begin=self.numClass+1, end=self.numClass+3,axis=-1) predXY = nd.sigmoid(predXY) predWH = nd.slice_axis(pred,begin=self.numClass+3,end=self.numClass+5,axis=-1) XYWH = nd.concat(predXY, predWH, dim=-1) return predCls, predObj, XYWH
def lstm_rnn(inputs, state_h, state_c, *params): # inputs: num_steps 个尺寸为 batch_size * vacab_size 矩阵 # H: 尺寸为 batch_size * hidden_dim 矩阵 # outputs: num_steps 个尺寸为 batch_size * vacab_size 矩阵 [W_xi, W_hi, W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hy, b_y] = params H = state_h C = state_c outputs = [] for X in inputs: I = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i) F = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f) O = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o) C_tilda = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c) C = F * C + I * C_tilda H = O * nd.tanh(C) Y = nd.dot(H, W_hy) + b_y outputs.append(Y) return (outputs, H, C)
def forward(self, encoder_output: nd.NDArray, label=None, label_lengths=None): no_label = label is None or label_lengths is None encoder_output = nd.transpose(encoder_output, (0, 2, 3, 1)) encoder_output = encoder_output.reshape( (encoder_output.shape[0], -1, encoder_output.shape[3])) batch_max_len = self.max_len if no_label else int( label_lengths.max().asscalar()) - 1 # Initialize hidden states encoder_output_mean = encoder_output.mean(axis=1) h = self.init_h(encoder_output_mean) c = self.init_c(encoder_output_mean) # Two tensors to store outputs predictions = [] alphas = [] if not no_label: label_embedded = self.embedding(label) else: bs = encoder_output.shape[0] x_t = self.embedding( nd.zeros(shape=(bs, ), ctx=encoder_output.context)) for t in range(batch_max_len): if not no_label: x_t = label_embedded[:, t] if self._use_current_state: _, [h, c] = self.lstm_cell(x_t, [h, c]) if self._use_adaptive_attention: atten_weights, alpha = self.attention( encoder_output, h, x_t, c) else: atten_weights, alpha = self.attention(encoder_output, h) atten_weights = self.f_beta(h).sigmoid() * atten_weights inputs = nd.concat(atten_weights, h, dim=1) preds = self.out(self.dropout(inputs)) else: atten_weights, alpha = self.attention(encoder_output, h) atten_weights = nd.sigmoid(self.f_beta(h)) * atten_weights inputs = nd.concat(x_t, atten_weights, dim=1) _, [h, c] = self.lstm_cell(inputs, [h, c]) preds = self.out(self.dropout(h)) x_t = self.embedding(preds.argmax(axis=1)) predictions.append(preds) alphas.append(alpha) predictions = nd.concat(*[x.expand_dims(axis=1) for x in predictions], dim=1) alphas = nd.concat(*[x.expand_dims(axis=1) for x in alphas], dim=1) return predictions, alphas
def detect_img_to_vector(self, img_path): img = image.imread(img_path) transform_fn = transforms.Compose([ transforms.Resize(224, keep_ratio=True), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) img = transform_fn(img) img = nd.expand_dims(img, axis=0) res = self.net_tl(img.as_in_context(self.ctx)) res = sigmoid(res).asnumpy() return res
def forward(self, x): cls_preds, loc_preds = [None] * 5, [None] * 5 x = self.conv_0(x) for i in range(1, 8): x = getattr(self, 'conv_%d' % i)(x) if i in [3, 4, 5, 6, 7]: cls_x = getattr(self, 'cls_%d_conv' % (i - 3))(x) cls_preds[i - 3] = F.sigmoid( cls_x.transpose((0, 2, 3, 1)).reshape((0, -1, 2))) loc_x = getattr(self, 'loc_%d_conv' % (i - 3))(x) loc_preds[i - 3] = loc_x.transpose((0, 2, 3, 1)).reshape( (0, -1, 4)) if i in [2, 3, 4, 5]: x = F.Pad(x, pad_width=(0, 0, 0, 0, 0, 1, 0, 1), mode='edge') return nd.concat(*cls_preds, dim=1), nd.concat(*loc_preds, dim=1)
def detect_img_to_class(self, img_path): img = image.imread(img_path) transform_fn = transforms.Compose([ transforms.Resize(224, keep_ratio=True), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) img = transform_fn(img) img = nd.expand_dims(img, axis=0) res = self.net_cl(img.as_in_context(self.ctx)) res = sigmoid(nd.squeeze(res)).asnumpy() res = np.where(res > 0.5, 1, 0) indexes, = np.where(res == 1) return indexes
def get_batch_acc(outputs, labels): """ multiLabel acc, all values are same, is right :param outputs: predictions :param labels: ground truth :return: acc percent, num of right, num of all """ outputs = sigmoid(outputs) outputs = outputs.asnumpy() labels = labels.asnumpy().astype('int') print(labels[0]) outputs = np.where(outputs > 0.5, 1, 0) # 类别阈值0.5 print(outputs[0]) rights = np.sum(np.where(labels == outputs, 0, 1), axis=1) n_right = np.count_nonzero(rights == 0) # 全0的即全部相等 return safe_div(n_right, len(labels)), n_right, len(labels)
def metric_of_rpf(y_pred, y_true): ar, ap, af1 = 0, 0, 0 # 汇总 for yp, yt in zip(y_pred, y_true): yt = yt.asnumpy().astype('int') yp = sigmoid(yp).asnumpy() yp = np.where(yp > 0.5, 1, 0) idx_true = np.where(yt == 1)[0] idx_pred = np.where(yp == 1)[0] tp = set(idx_true) & set(idx_pred) r = safe_div(len(tp), len(idx_true)) p = safe_div(len(tp), len(idx_pred)) f1 = safe_div(2 * r * p, (r + p)) ar += r ap += p af1 += f1 ar /= len(y_pred) ap /= len(y_pred) af1 /= len(y_pred) return ar, ap, af1
def forward_non_local(self, inputs, loss, training): results = [] for i in range(self.slots): results.append(self.local_trans(inputs[i], training=training)) results.append(self.global_trans(inputs[-1], training=training)) comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) if self.use_comm and self.topo_learning_mode: proba = nd.sigmoid(self.topo.data()) if random.random() < 1e-2: print '---------------------------------------------' print proba.asnumpy() print '---------------------------------------------' u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1)) comm_rate = nd.sigmoid(10. * ( nd.log(proba) - nd.log(1. - proba) + nd.log(u_vec) - nd.log(1. - u_vec) )) if loss is not None: loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) f = [[None] * (self.slots + 1)] * (self.slots + 1) if self.use_comm: # local for i in range(self.slots): norm_fac = None for j in range(self.slots): if i != j: f[i][j] = nd.sum(self.f_rec_local(inputs[i], training=training) * self.f_emit_local2local(inputs[j], training=training), axis=1) f[i][j] = nd.exp(f[i][j]).reshape((f[i][j].shape[0], 1)) f[i][j] = f[i][j] * comm_rate[j][i] if norm_fac is None: norm_fac = nd.zeros_like(f[i][j]) norm_fac = norm_fac + f[i][j] f[i][-1] = nd.sum(self.f_rec_local(inputs[i], training=training) * self.f_emit_global2local(inputs[-1], training=training), axis=1) f[i][-1] = nd.exp(f[i][-1]).reshape((f[i][-1].shape[0], 1)) f[i][-1] = f[i][-1] * comm_rate[-1][i] if norm_fac is None: norm_fac = nd.zeros_like(f[i][-1]) norm_fac = norm_fac + f[i][-1] for j in range(self.slots): if i != j: results[i] = results[i] + (1. / norm_fac) * f[i][j] * self.g_local2local(inputs[j], training=training) results[i] = results[i] + (1. / norm_fac) * f[i][-1] * self.g_global2local(inputs[-1], training=training) # global norm_fac = None for i in range(self.slots): f[-1][i] = nd.sum(self.f_rec_global(inputs[-1], training=training) * self.f_emit_local2global(inputs[i], training=training), axis=1) f[-1][i] = nd.exp(f[-1][i]).reshape((f[-1][i].shape[0], 1)) f[-1][i] = f[-1][i] * comm_rate[i][-1] if norm_fac is None: norm_fac = nd.zeros_like(f[-1][i]) norm_fac = norm_fac + f[-1][i] for i in range(self.slots): results[-1] = results[-1] + (1. / norm_fac) * f[-1][i] * self.g_local2global(inputs[i], training=training) if self.block_mode: assert self.local_in_units == self.local_units assert self.global_in_units == self.global_units for i in range(self.slots): results[i] = self.yz_weight_local(results[i], training=training) + inputs[i] results[-1] = self.yz_weight_global(results[-1], training=training) + inputs[-1] return results
def reconstruct(self, v): h = nd.sigmoid(nd.dot(v, self.W) + self.hb) reconstructed_v_prob = nd.sigmoid(nd.dot(h, self.W.T) + self.vb) return reconstructed_v_prob
def propup(self, v): pre_sigmoid_activation = nd.dot(v, self.W) + self.hb return nd.sigmoid(pre_sigmoid_activation)
def sample_v_given_h_without_softmax(self, h0): v1_prob = nd.sigmoid(self.propdown(h0)) v1 = v1_prob > nd.random_uniform( low=0.0, high=1.0, shape=v1_prob.shape, ctx=self.ctx) return [v1_prob, v1]
def gru(x, h, Wxr, Wxz, Whr, Whz, Wxh, Whh, br, bz, bh): r = nd.sigmoid(nd.dot(x, Wxr) + nd.dot(h, Whr) + br) z = nd.sigmoid(nd.dot(x, Wxz) + nd.dot(h, Whz) + bz) h̃ = nd.tanh(nd.dot(x, Wxh) + r * nd.dot(h, Whh) + bh) return z * h + (1 - z) * h̃
def forward(self, inputs, loss=None): assert len(inputs) == self.slots + 1 if self.non_local_mode: return self.forward_multidims(inputs, loss) if self.message_embedding: return self.forward_message_embedding(inputs, loss) local_drop_vec = nd.ones_like(inputs[0]) local_drop_vec = self.local_dropout_op(local_drop_vec) for i in range(self.slots): inputs[i] = inputs[i] * local_drop_vec inputs[-1] = self.global_dropout_op(inputs[-1]) # local_share_vec = [] # local_private_vec = [] # if self.concrete_share_rate: # raise ValueError('no share_private!!!') # for i in range(self.slots): # proba = nd.sigmoid(data=self.share_rate[i].data()) # proba = nd.broadcast_axis(data=proba, axis=(0, 1), size=inputs[0].shape) # u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=inputs[0].shape, ctx=CTX) # local_share_vec.append(nd.sigmoid(10. * ( # nd.log(proba) - nd.log(1. - proba) + # nd.log(u_vec) - nd.log(1. - u_vec) # ))) # local_private_vec.append(1. - local_share_vec[i]) # # print 'proba:', proba # # print 'dropout_regularizer:', self.dropout_regularizer # if loss is not None: # loss.append( # self.dropout_regularizer * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) # if random.random() < 0.01: # for i in range(self.slots): # proba = nd.sigmoid(data=self.share_rate[i].data()) # print proba.asnumpy(), # print '' # else: # local_share_vec = [nd.ones_like(inputs[0]), ] * self.slots # local_private_vec = [nd.zeros_like(inputs[0]), ] * self.slots # local_share_vec = (1. - self.private_rate) * nd.Dropout( # nd.ones(shape=(inputs[0].shape[0], self.local_units)), p=self.private_rate, mode='always') # local_private_vec = 1. - local_share_vec comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) if self.use_comm and self.topo_learning_mode: proba = nd.sigmoid(self.topo.data()) if random.random() < 1e-2: print '---------------------------------------------' print proba.asnumpy() print '---------------------------------------------' u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1)) comm_rate = nd.sigmoid(10. * ( nd.log(proba) - nd.log(1. - proba) + nd.log(u_vec) - nd.log(1. - u_vec) )) if loss is not None: loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) results = [] for i in range(self.slots): results.append(self.local_share_trans(inputs[i])) results.append(self.global_trans(inputs[-1])) if self.use_comm: if self.topo_learning_mode: assert self.concrete_share_rate is False for i in range(self.slots): tmp = nd.zeros_like(results[i]) norm = nd.zeros_like(comm_rate[0][0]) for j in range(self.slots): if i != j: tmp = tmp + self.local2local_share_comm(inputs[j]) * comm_rate[j][i] norm = norm + comm_rate[j][i] # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i] tmp = tmp + self.global2local_comm(inputs[-1]) * comm_rate[-1][i] norm = norm + comm_rate[-1][i] if nd.sum(norm) > 1e-5: results[i] = results[i] + tmp / norm tmp = nd.zeros_like(results[-1]) norm = nd.zeros_like(comm_rate[0][0]) for j in range(self.slots): tmp = tmp + self.local2global_comm(inputs[j]) * comm_rate[j][-1] norm = norm + comm_rate[j][-1] if nd.sum(norm) > 1e-5: results[-1] = results[-1] + tmp / norm else: for i in range(self.slots): tmp = nd.zeros_like(results[i]) for j in range(self.slots): if j != i: tmp = tmp + self.local2local_share_comm(inputs[j]) tmp = tmp + self.global2local_comm(inputs[-1]) results[i] = results[i] + (tmp / float(self.slots)) tmp = nd.zeros_like(results[-1]) for i in range(self.slots): tmp = tmp + self.local2global_comm(inputs[i]) results[-1] = results[-1] + (tmp / float(self.slots)) if self.block_mode: assert self.local_in_units == self.local_units assert self.global_in_units == self.global_units for i in range(self.slots): results[i] = self.yz_weight_local(results[i]) + inputs[i] results[-1] = self.yz_weight_global(results[-1]) + inputs[-1] return results
def forward_multidims(self, inputs, loss): results = [[] for i in range(self.multidims)] for dim in range(self.multidims): for i in range(self.slots): results[dim].append(self.local_trans(inputs[i])) results[dim].append(self.global_trans(inputs[-1])) comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) if self.use_comm and self.topo_learning_mode: proba = nd.sigmoid(self.topo.data()) if random.random() < 1e-2: print '---------------------------------------------' print proba.asnumpy() print '---------------------------------------------' u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1)) comm_rate = nd.sigmoid(10. * ( nd.log(proba) - nd.log(1. - proba) + nd.log(u_vec) - nd.log(1. - u_vec) )) if loss is not None: loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) f = [[None] * (self.slots + 1)] * (self.slots + 1) if self.use_comm: # local for i in range(self.slots): norm_fac = None for j in range(self.slots): if i != j: f[i][j] = self.local_atten_out(self.local_atten_act(self.f_rec_local(inputs[i]) + self.f_emit_local2local(inputs[j]))) f[i][j] = nd.exp(f[i][j]).reshape((f[i][j].shape[0], self.multidims)) f[i][j] = f[i][j] * comm_rate[j][i] if norm_fac is None: norm_fac = nd.zeros_like(f[i][j]) norm_fac = norm_fac + f[i][j] f[i][-1] = self.local_atten_out(self.local_atten_act(self.f_rec_local(inputs[i]) + self.f_emit_global2local(inputs[-1]))) # print '++++++++++++++++(*** x 3)++++++++++++++++++++++' # print f[i][-1].shape f[i][-1] = nd.exp(f[i][-1]).reshape((f[i][-1].shape[0], self.multidims)) f[i][-1] = f[i][-1] * comm_rate[-1][i] if norm_fac is None: norm_fac = nd.zeros_like(f[i][-1]) norm_fac = norm_fac + f[i][-1] for j in range(self.slots): if i != j: f[i][j] = (1. / norm_fac) * f[i][j] f[i][-1] = (1. / norm_fac) * f[i][-1] for j in range(self.slots): if i != j: f[i][j] = mx.nd.split(f[i][j], axis=1, num_outputs=self.multidims) f[i][-1] = mx.nd.split(f[i][-1], axis=1, num_outputs=self.multidims) # print f[i][-1][0].shape for dim in range(self.multidims): for j in range(self.slots): if i != j: results[dim][i] = results[dim][i] + f[i][j][dim] * self.g_local2local(inputs[j]) results[dim][i] = results[dim][i] + f[i][-1][dim] * self.g_global2local(inputs[-1]) # global norm_fac = None for i in range(self.slots): f[-1][i] = self.global_atten_out(self.global_atten_act(self.f_rec_global(inputs[-1]) + self.f_emit_local2global(inputs[i]))) f[-1][i] = nd.exp(f[-1][i]).reshape((f[-1][i].shape[0], self.multidims)) f[-1][i] = f[-1][i] * comm_rate[i][-1] if norm_fac is None: norm_fac = nd.zeros_like(f[-1][i]) norm_fac = norm_fac + f[-1][i] for i in range(self.slots): f[-1][i] = (1. / norm_fac) * f[-1][i] f[-1][i] = mx.nd.split(f[-1][i], axis=1, num_outputs=self.multidims) for dim in range(self.multidims): for i in range(self.slots): results[dim][-1] = results[dim][-1] + f[-1][i][dim] * self.g_local2global(inputs[i]) # norm = [None] * (self.slots + 1) # for j in range(self.slots + 1): # norm[j] = nd.zeros_like(f[j][0]) # for i in range(self.slots + 1): # if i != j: # norm[j] = norm[j] + f[j][i] # for i in range(self.slots + 1): # for j in range(self.slots + 1): # if i == j: # print nd.zeros_like(f[j][i]).asnumpy(), # else: # print (f[j][i] / norm[j]).asnumpy(), # print '' # print '' multidims_add_results = [] for l in range(len(results[0])): tmp = nd.zeros_like(results[0][l]) for dim in range(self.multidims): tmp = tmp + results[dim][l] multidims_add_results.append(tmp) if self.block_mode: assert self.local_in_units == self.local_units assert self.global_in_units == self.global_units for i in range(self.slots): multidims_add_results[i] = self.yz_weight_local(multidims_add_results[i]) + inputs[i] multidims_add_results[-1] = self.yz_weight_global(multidims_add_results[-1]) + inputs[-1] return multidims_add_results
def forward(self, inputs, loss=None, training=True, commtype='average', topo='FC'): assert len(inputs) == self.slots + 1 local_drop_vec = nd.ones_like(inputs[0]) local_drop_vec = self.local_dropout_op(local_drop_vec) for i in range(self.slots): inputs[i] = inputs[i] * local_drop_vec inputs[-1] = self.global_dropout_op(inputs[-1]) if topo == 'FC': comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) elif topo == 'FUC': comm_rate = nd.zeros(shape=(self.slots + 1, self.slots + 1)) elif topo == 'Master': comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1)) for i in range(self.slots): for j in range(self.slots): comm_rate[i][j] = 0 if self.use_comm and self.topo_learning_mode: proba = nd.sigmoid(self.topo.data()) if random.random() < 1e-2: print '---------------------------------------------' print proba.asnumpy() print '---------------------------------------------' u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1)) comm_rate = nd.sigmoid(10. * ( nd.log(proba) - nd.log(1. - proba) + nd.log(u_vec) - nd.log(1. - u_vec) )) if loss is not None: loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba))) results = [] for i in range(self.slots): results.append(self.local_share_trans.forward(inputs[i], training=training)) results.append(self.global_trans.forward(inputs[-1], training=training)) if commtype == 'average': for i in range(self.slots): tmp = nd.zeros_like(results[i]) norm = nd.zeros_like(comm_rate[0][0]) for j in range(self.slots): if i != j: tmp = tmp + self.local2local_share_comm.forward(nd.concat(inputs[j], dim=1), training=training) * comm_rate[j][i] norm = norm + comm_rate[j][i] # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i] tmp = tmp + self.global2local_comm.forward(nd.concat(inputs[-1], dim=1), training=training) * \ comm_rate[-1][i] norm = norm + comm_rate[-1][i] if nd.sum(norm) > 1e-5: results[i] = results[i] + tmp / norm tmp = nd.zeros_like(results[-1]) norm = nd.zeros_like(comm_rate[0][0]) for j in range(self.slots): tmp = tmp + self.local2global_comm.forward(nd.concat(inputs[j], dim=1), training=training) * \ comm_rate[j][-1] norm = norm + comm_rate[j][-1] if nd.sum(norm) > 1e-5: results[-1] = results[-1] + tmp / norm elif commtype == 'maxpooling': for i in range(self.slots): tmp = [] for j in range(self.slots): if j != i: tmp.append(self.local2local_share_comm.forward(inputs[j], training=training)) tmp.append(self.global2local_comm.forward(inputs[-1], training=training)) for k in range(len(tmp)): tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1])) tmp = nd.concat(*tmp, dim=1) maxcomm = nd.max(tmp, axis=1) results[i] = results[i] + maxcomm tmp = [] for i in range(self.slots): tmp.append(self.local2global_comm.forward(inputs[i], training=training)) for k in range(len(tmp)): tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1])) tmp = nd.concat(*tmp, dim=1) maxcomm = nd.max(tmp, axis=1) results[-1] = results[-1] + maxcomm return results
def forward(self, x, begin_states): encode = self.encoder(x) feature = self.feat_extractor(encode, begin_states) output = self.fc(feature) output = f.sigmoid(output) return output
def DCGAN(epoch=100, batch_size=128, save_period=10, load_period=100, optimizer="adam", beta1=0.5, learning_rate=0.0002, dataset="FashionMNIST", ctx=mx.gpu(0)): #data selection if dataset == "CIFAR10": train_data, test_data = CIFAR10(batch_size) G_path = "weights/CIFAR10-G{}.params".format(load_period) D_path = "weights/CIFAR10-D{}.params".format(load_period) elif dataset == "FashionMNIST": train_data, test_data = FashionMNIST(batch_size) G_path = "weights/FashionMNIST-G{}.params".format(load_period) D_path = "weights/FashionMNIST-D{}.params".format(load_period) else: return "The dataset does not exist." #network generator = Generator() discriminator = Discriminator() #for faster learning generator.hybridize() discriminator.hybridize() if os.path.exists(D_path) and os.path.exists(G_path): print("loading weights") generator.load_params(filename=G_path, ctx=ctx) # weights load discriminator.load_params(filename=D_path, ctx=ctx) # weights load else: print("initializing weights") generator.collect_params().initialize( mx.init.Normal(sigma=0.02), ctx=ctx) # weights initialization discriminator.collect_params().initialize( mx.init.Normal(sigma=0.02), ctx=ctx) # weights initialization #net.initialize(mx.init.Normal(sigma=0.1),ctx=ctx) # weights initialization #optimizer G_trainer = gluon.Trainer(generator.collect_params(), optimizer, { "learning_rate": learning_rate, "beta1": beta1 }) D_trainer = gluon.Trainer(discriminator.collect_params(), optimizer, { "learning_rate": learning_rate, "beta1": beta1 }) '''The cross-entropy loss for binary classification. (alias: SigmoidBCELoss) BCE loss is useful when training logistic regression. .. math:: loss(o, t) = - 1/n \sum_i (t[i] * log(o[i]) + (1 - t[i]) * log(1 - o[i])) Parameters ---------- from_sigmoid : bool, default is `False` Whether the input is from the output of sigmoid. Set this to false will make the loss calculate sigmoid and then BCE, which is more numerically stable through log-sum-exp trick. weight : float or None Global scalar weight for loss. batch_axis : int, default 0 The axis that represents mini-batch. ''' SBCE = gluon.loss.SigmoidBCELoss() #learning start_time = time.time() #cost selection real_label = nd.ones((batch_size, ), ctx=ctx) fake_label = nd.zeros((batch_size, ), ctx=ctx) for i in tqdm(range(1, epoch + 1, 1)): for data, label in train_data: print("\n<<D(X) , G(X)>") data = data.as_in_context(ctx) noise = Noise(batch_size=batch_size, ctx=ctx) #1. Discriminator : (1)maximize Log(D(x)) + (2)Log(1-D(G(z))) with autograd.record(train_mode=True): output = discriminator(data) print("real_D(X) : {}".format( nd.mean(nd.sigmoid(output)).asscalar())), #(1) real = SBCE(output, real_label) #(2) fake_real = generator(noise) output = discriminator(fake_real) print("fake_real_D(X) : {}".format( nd.mean(nd.sigmoid(output)).asscalar())) fake_real = SBCE(output, fake_label) # cost definition discriminator_cost = real + fake_real discriminator_cost.backward() D_trainer.step(batch_size, ignore_stale_grad=True) # 2. Generator : (3)maximize Log(D(G(z))) with autograd.record(train_mode=True): fake = generator(noise) output = discriminator(fake) print("fake_G(X) : {}".format( nd.mean(nd.sigmoid(output)).asscalar())) #(3) Generator_cost = SBCE(output, real_label) Generator_cost.backward() G_trainer.step(batch_size, ignore_stale_grad=True) print(" epoch : {}".format(i)) print("last batch Discriminator cost : {}".format( nd.mean(discriminator_cost).asscalar())) print("last batch Generator cost : {}".format( nd.mean(Generator_cost).asscalar())) if i % save_period == 0: end_time = time.time() print("-------------------------------------------------------") print("{}_learning time : {}".format(epoch, end_time - start_time)) print("-------------------------------------------------------") if not os.path.exists("weights"): os.makedirs("weights") print("saving weights") if dataset == "FashionMNIST": generator.save_params( "weights/FashionMNIST-G{}.params".format(i)) discriminator.save_params( "weights/FashionMNIST-D{}.params".format(i)) elif dataset == "CIFAR10": generator.save_params("weights/CIFAR10-G{}.params".format(i)) discriminator.save_params( "weights/CIFAR10-D{}.params".format(i)) #generate image generate_image(generator, ctx, dataset) return "optimization completed"