def function_set(self): def dropout(batch_X, drop_probability): keep_probability = 1 - drop_probability assert 0 <= keep_probability <= 1 if keep_probability == 0: return batch_X.zeros_like() # > 保存的概率才能够保留该样本该神经元的输出 mask = nd.random_uniform( 0, 1.0, batch_X.shape, ctx=batch_X.context) < keep_probability # 保证 E[dropout(batch_X)] == batch_X scale = 1 / keep_probability return mask * batch_X * scale # Dense 需要 dropout Conv 其实不需要因为已经 share weight 了 h1 = dropout( nd.relu( nd.dot(self.__batch_X.reshape( (-1, self.__num_inputs)), self.__W1) + self.__b1), self.__drop_prob1) h2 = dropout(nd.relu(nd.dot(h1, self.__W2) + self.__b2), self.__drop_prob2) return nd.dot(h2, self.__W3) + self.__b3
def forward(self, x): out = nd.relu(self.bn1(self.conv1(x))) # print("in forward", out.shape) out = self.bn2(self.conv2(out)) if not self.same_shape: x = self.conv3(x) return nd.relu(out + x)
def net(X, verbose=False): X = X.as_in_context(W1.context) # 第一层卷积 h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0]) h1_activation = nd.relu(h1_conv) h1 = nd.Pooling(data=h1_activation, pool_type='max', kernel=(2, 2), stride=(2, 2)) # 第二层卷积 h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0]) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一层全连接 h3_linear = nd.dot(h2, W3) + b3 h3 = nd.relu(h3_linear) # 第二层全连接 h4_linear = nd.dot(h3, W4) + b4 if verbose: print('1st conv block', h1.shape) print('2nd conv block', h2.shape) print('1st conv block', h3.shape) print('2nd conv block', h4_linear.shape) print('output:', h4_linear) return h4_linear
def forward(self, x): x = nd.relu(self.bn1(self.conv1(x))) x = nd.relu(self.bn2(self.conv2(x))) x = nd.relu(self.bn3(self.conv3(x))) x = nd.relu(self.fc1(x)) x = nd.relu(self.fc2(x)) return self.out(x)
def function_set(self): # 第一层卷积 # 卷积 h1_conv = nd.Convolution( data=self.__batch_X, weight=self.__W1, bias=self.__b1, kernel=self.__W1.shape[2:], num_filter=self.__W1.shape[0]) # 激活 h1_activation = nd.relu(h1_conv) # 池化 h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) # 第二层卷积 h2_conv = nd.Convolution( data=h1, weight=self.__W2, bias=self.__b2, kernel=self.__W2.shape[2:], num_filter=self.__W2.shape[0]) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一层全连接 h3_linear = nd.dot(h2, self.__W3) + self.__b3 h3 = nd.relu(h3_linear) # 第二层全连接 h4_linear = nd.dot(h3, self.__W4) + self.__b4 # print("1st conv block:", h1.shape) # print("2nd conv block:", h2.shape) # print("1st dense:", h3.shape) # print("2nd dense:", h4_linear.shape) # print("output:", h4_linear) return h4_linear
def postprocess(self, x): output = F.relu(x) output = self.conv_post_1(output) output = F.relu(output) output = self.conv_post_2(output) output = nd.reshape(output, (output.shape[1], output.shape[2])) output = F.transpose(output, axes=(1, 0)) return output
def forward(self, pred, label): label = nd.one_hot(label, self.nclass) alpha_p = nd.relu(self.op - pred) alpha_n = nd.relu(pred - self.on) pred = (label * (alpha_p * (pred - self.delta_p)) + (1-label) * (alpha_n * (pred - self.delta_n))) * self.scale return self.loss(pred, label)
def function_set(self): def batch_norm(X, gamma, beta, is_training, moving_mean, moving_variance, eps=1e-5, moving_momentum=0.9): assert len(X.shape) in (2, 4) # 全连接: batch_size x feature if len(X.shape) == 2: # 每个输入维度在样本上的平均和方差 mean = X.mean(axis=0) variance = ((X - mean) ** 2).mean(axis=0) # 2D卷积: batch_size x channel x height x width else: # 对每个通道算均值和方差,需要保持 4D 形状使得可以正确的广播 mean = X.mean(axis=(0, 2, 3), keepdims=True) variance = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True) # 变形使得可以正确的广播 moving_mean = moving_mean.reshape(mean.shape) moving_variance = moving_variance.reshape(mean.shape) # 均一化 if is_training: X_hat = (X - mean) / nd.sqrt(variance + eps) # !!! 更新全局的均值和方差 # 每一个 batch_X 都会使用上个 batch_X 的 0.9 与 这个 batch_X 的 0.1 moving_mean[:] = moving_momentum * moving_mean + (1.0 - moving_momentum) * mean moving_variance[:] = moving_momentum * moving_variance + (1.0 - moving_momentum) * variance else: # !!! 测试阶段使用全局的均值和方差 X_hat = (X - moving_mean) / nd.sqrt(moving_variance + eps) # 拉升和偏移 return gamma.reshape(mean.shape) * X_hat + beta.reshape(mean.shape) # 第一层卷积 h1_conv = nd.Convolution( data=self.__batch_X, weight=self.__W1, bias=self.__b1, kernel=(5, 5), num_filter=20) # 第一个 BN h1_bn = batch_norm( h1_conv, self.__gamma1, self.__beta1, self.__is_training, self.__moving_mean1, self.__moving_variance1) h1_activation = nd.relu(h1_bn) h1 = nd.Pooling( data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) # 第二层卷积 h2_conv = nd.Convolution( data=h1, weight=self.__W2, bias=self.__b2, kernel=(3, 3), num_filter=50) # 第二个 BN h2_bn = batch_norm( h2_conv, self.__gamma2, self.__beta2, self.__is_training, self.__moving_mean2, self.__moving_variance2) h2_activation = nd.relu(h2_bn) h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一层全连接 h3_linear = nd.dot(h2, self.__W3) + self.__b3 h3 = nd.relu(h3_linear) # 第二层全连接 h4_linear = nd.dot(h3, self.__W4) + self.__b4 return h4_linear
def forward(self, x): out = self.conv_1(nd.relu(self.bn_1(x))) out = nd.relu(self.bn_2(out)) if self.is_dropout: out = self.dropout(out) out = self.conv_2(out) if not self.same_shape: x = self.conv_3(x) return out + x
def net(x, is_training=False): # w1, b1, w2, b2, w3, b3 = params = initParam(verbose=True) x = x.reshape(shape=(-1, num_input)) # (256,784) # print(x.shape) x1 = nd.relu(nd.dot(x, w1) + b1) if is_training: x1 = dropout(x1, 0.8) x2 = nd.relu(nd.dot(x1, w2) + b2) if is_training: x2 = dropout(x2, 0.5) out = nd.dot(x2, w3) + b3 return out
def net(X): X = X.reshape((-1, num_inputs)) h1 = nd.dot(X, w1) + b1 h1 = nd.relu(h1) h1 = dropout(h1, dropout_prob_1) h2 = nd.dot(h1, w2) + b2 h2 = nd.relu(h2) h2 = dropout(h2, dropout_prob_2) y = nd.dot(h2, w3) + b3 return y
def forward(self, x): x = self.pool1(F.relu(self.conv1(x))) x = self.pool2(F.relu(self.conv2(x))) # 0 means copy over size from corresponding dimension. # -1 means infer size from the rest of dimensions. x = x.reshape((0, -1)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x
def forward(self, x): x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = x.reshape((-1, 192)) x = self.dense(x) x = self.dense2(x) x = self.dense3(x) x = self.dense4(x) probs = self.action_pred(x) values = self.value_pred(x) return mx.ndarray.softmax(probs), values
def net(X): X = X.reshape((-1, num_inputs)) # 第一层全连接。 h1 = nd.relu(nd.dot(X, W1) + b1) # 在第一层全连接后添加丢弃层。 h1 = dropout(h1, drop_prob1) # 第二层全连接。 h2 = nd.relu(nd.dot(h1, W2) + b2) # 在第二层全连接后添加丢弃层。 h2 = dropout(h2, drop_prob2) return nd.dot(h2, W3) + b3
def forward(self, x, z): bs = x.shape[0] x = F.relu(self.conv1(x)) x = self.norm1(self.pool1(x)) x = F.relu(self.conv2(x)) x = self.norm2(self.pool2(x)) x = F.relu(self.dense1(x)) x = F.concat(x, z) status = self.encoder.begin_state(batch_size=bs) x, status = self.encoder(x, status) return x, status
def postprocess(self, x): """ Description : module for postprocess """ output = F.relu(x) output = self.conv_post_1(output) output = F.relu(output) output = self.conv_post_2(output) output = nd.reshape(output, (output.shape[1], output.shape[2])) output = F.transpose(output, axes=(1, 0)) return output
def forward(self, x): x = F.relu(self.conv1(x)) x = self.pool2(F.relu(self.conv2(x))) x = self.drop2D(x) # 0 means copy over size from corresponding dimension. # -1 means infer size from the rest of dimensions. # Essentially flattens to 1D. x = x.reshape((0, -1)) x = F.relu(self.fc1(x)) x = self.drop1D(x) x = F.relu(self.fc2(x)) x = F.softmax(x) return x
def forward(self, x): x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = x.reshape((-1, 192)) x = x.reshape((1, 1, 192)) # x = self.lstm(x) # x = self.dense(x) # x = self.dense2(x) # x = self.dense3(x) # x = self.dense4(x) probs = self.action_pred(x) values = self.value_pred(x) probs = probs.reshape((-1, self.available_actions_count)) values = values.reshape((-1, 1)) return mx.ndarray.softmax(probs, axis=1), values
def bayes_forward(self, x, dense, loss, activation_fn=None, is_target=False): weight = self.get_sample(mu=dense.weight_mu.data(), rho=dense.weight_rho.data(), is_target=is_target) bias = self.get_sample(mu=dense.bias_mu.data(), rho=dense.bias_rho.data(), is_target=is_target) loss = loss + log_gaussian(x=weight, mu=dense.weight_mu.data(), sigma=softplus(dense.weight_rho.data())) loss = loss + log_gaussian(x=bias, mu=dense.bias_mu.data(), sigma=softplus(dense.bias_rho.data())) loss = loss - log_gaussian(x=weight, mu=0., sigma=self.sigma_prior) loss = loss - log_gaussian(x=bias, mu=0., sigma=self.sigma_prior) result = nd.dot(x, weight) + bias if activation_fn is None: return result elif activation_fn == 'relu': return nd.relu(result)
def forward(self, x): x = self.fc2(x) x = F.relu(x) x = F.Dropout(x) x = self.fc3(x) return x
def forward(self, inputs, is_target=False): result = None loss = 0. for _ in range(self.n_samples): tmp = inputs weights = [] biases = [] for i in range(len(self.weight_mus)): weights.append(self.get_sample( mu=self.weight_mus[i].data(), rho=self.weight_rhos[i].data(), is_target=is_target)) biases.append(self.get_sample(mu=self.bias_mus[i].data(), rho=self.bias_rhos[i].data(), is_target=is_target)) loss = loss + log_gaussian( x=weights[-1], mu=self.weight_mus[i].data(), sigma=softplus(self.weight_rhos[i].data())) loss = loss + log_gaussian(x=biases[-1], mu=self.bias_mus[i].data(), sigma=softplus(self.bias_rhos[i].data())) loss = loss - log_gaussian(x=weights[-1], mu=0., sigma=self.sigma_prior) loss = loss - log_gaussian(x=weights[-1], mu=0., sigma=self.sigma_prior) for i in range(len(weights)): tmp = nd.dot(tmp, weights[i]) + biases[i] if i != len(weights) - 1: tmp = nd.relu(tmp) if result is None: result = nd.zeros_like(tmp) result = result + tmp result = result / float(self.n_samples) loss = loss / float(self.n_samples) return result, loss
def forward(self, x, y): x = nd.relu(self.bn_z(self.dense_z(x))) y = nd.expand_dims(y, axis=2) y = nd.expand_dims(y, axis=2) y = nd.relu(self.bn_label(self.dense_label(y))) z = nd.concat(x, y, dim=1) z = z.reshape([z.shape[0],z.shape[1],1,1]) x = nd.relu(self.bn2(self.deconv2(z))) x = nd.relu(self.bn3(self.deconv3(x))) x = nd.relu(self.bn4(self.deconv4(x))) # x = nd.sigmoid(self.out(z)) return x
def net(X, verbose=False): X = X.as_in_context(W1.context) # 第一个卷积层 h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0]) h1_activation = nd.relu(h1_conv) h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) # h1_conv.shape: (256, 20, 24, 24) # h1.shape: (256, 20, 12, 12) #print('h1_conv.shape: ',h1_conv.shape) #print('h1.shape: ',h1.shape) # 第二个卷积层 h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0]) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一个全连接层 h3_linear = nd.dot(h2, W3) + b3 h3 = nd.relu(h3_linear) # 第二个全连接层 h4_linear = nd.dot(h3, W4) + b4 if verbose: print('1st conv block:', h1.shape) print('2nd conv block:', h2.shape) print('1st dense block:', h3.shape) print('2nd dense block:', h4_linear.shape) print('output:', h4_linear) return h4_linear
def net(x, is_training=False, verbose=False): x = x.as_in_context(w1.context) h1_conv = nd.Convolution(data=x, weight=w1, bias=b1, kernel=w1.shape[2:], num_filter=c1) h1_bn = utils.batch_norm(h1_conv, gamma1, beta1, is_training, moving_mean1, moving_variance1) h1_activation = nd.relu(h1_conv) h1 = nd.Pooling(data=h1_activation, pool_type='max', kernel=(2, 2), stride=(2, 2)) h2_conv = nd.Convolution(data=h1, weight=w2, bias=b2, kernel=w2.shape[2:], num_filter=c2) h2_bn = utils.batch_norm(h2_conv, gamma2, beta2, is_training, moving_mean2, moving_variance2) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(data=h2_activation, pool_type='max', kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) h3_linear = nd.dot(h2, w3) + b3 h3 = nd.relu(h3_linear) h4_linear = nd.dot(h3, w4) + b4 if verbose: print('h1 conv block: ', h1.shape) print('h2 conv block: ', h2.shape) print('h3 conv block: ', h3.shape) print('h4 conv block: ', h4_linear.shape) print('output: ', h4_linear) return h4_linear.as_in_context(ctx)
def demo(self, x_low, x_high): import mxnet.ndarray as F x_low = F.contrib.BilinearResize2D(x_low, height=x_high.shape[2], width=x_high.shape[3]) x_low = self.conv_low(x_low) x_high = self.conv_hign(x_high) x = x_low + x_high x = F.relu(x) x_low_cls = self.conv_low_cls(x_low) return x, x_low_cls
def hybrid_forward(self, F, x_low, x_high): x_low = F.contrib.BilinearResize2D(x_low, height=self._up_kwargs['height'], width=self._up_kwargs['width']) x_low = self.conv_low(x_low) x_high = self.conv_hign(x_high) x = x_low + x_high x = F.relu(x) x_low_cls = self.conv_low_cls(x_low) return x, x_low_cls
def forward(self, x): """ we separate two feature sequence and feed into two separate net and then stack them for loss """ #input: (batch, seq_len, features) for 'nwc' #input: (batch, features, seq_len) for 'ncw' #pdb.set_trace() convi = self._convNet(x) #O: (n, num_filter, w) if self._downsample is not None: # differ from original convi = self._downsample(convi) out = convi + x # (n, c, w) return F.relu(out)
def forward(self, x, hidden): #conver NTC to TNC x = F.transpose(x, (1, 0, 2)) output, hiddens = self.rnn(x, hidden) #print(output.shape) hidden = hiddens[-1] #print(hidden.shape) hidden = F.transpose(hidden, (1, 0, 2)) output = self.fc(hidden) output = self.bn(output) output = F.relu(output) return output
def forward(self, x): # NCHW h, w = x.shape[2], x.shape[3] res = [] for i in range(h): res.append( nd.stack(*self.hcell.unroll(w, x[:, :, i, :], layout='NCT')[0], axis=2)) # NCW for i in range(w): res.append( nd.stack(*self.vcell.unroll(h, x[:, :, :, i], layout='NCT')[0], axis=2)) # NCH res = nd.relu(nd.stack(*res[:h], axis=2) + nd.stack(*res[h:], axis=3)) return nd.concat(x, res, dim=1)
def net_lenet(X, verbose=False): # 第一层卷积 h1_conv = nd.Convolution(data=X, weight=lenet_W1, bias=lenet_b1, kernel=lenet_W1.shape[2:], num_filter=lenet_W1.shape[0]) h1_activation = nd.relu(h1_conv) h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) # 第二层卷积 h2_conv = nd.Convolution(data=h1, weight=lenet_W2, bias=lenet_b2, kernel=lenet_W2.shape[2:], num_filter=lenet_W2.shape[0]) h2_activation = nd.relu(h2_conv) h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2)) h2 = nd.flatten(h2) # 第一层全连接 h3_linear = nd.dot(h2, lenet_W3) + lenet_b3 h3 = nd.relu(h3_linear) # 第二层全连接 h4_linear = nd.dot(h3, lenet_W4) + lenet_b4 if verbose: print('1st conv block:', h1.shape) print('2nd conv block:', h2.shape) print('1st dense:', h3.shape) print('2nd dense:', h4_linear.shape) print('output:', h4_linear) return h4_linear
def forward(self, input, hidden, encoder_outputs): #input shape, (1,) embedded = self.embedding(input) if self.dropout_p > 0: embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(F.concat(embedded, hidden[0].flatten(), dim=1))) attn_applied = F.batch_dot(attn_weights.expand_dims(0), encoder_outputs.expand_dims(0)) output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1) output = self.attn_combine(output).expand_dims(0) for i in range(self.n_layers): output = F.relu(output) output, hidden = self.gru(output, hidden) output = self.out(output) return output, hidden, attn_weights