def generator_net(x, training, opts, name='Generator'): with tf.variable_scope(name, reuse=tf.AUTO_REUSE): sz = opts.img_size // 16 # x is input. 1 x 1 x nz # state size. sz x sz x (ngf*8) y = deconv(x, opts.ngf*8, sz, 1, 'valid', 'deconv1') y = batch_norm(y, training) y = relu(y) # state size. (sz*2) x (sz*2) x (ngf*4) y = deconv(y, opts.ngf*4, 4, 2, 'same', 'deconv2') y = batch_norm(y, training) y = relu(y) # state size. (sz*4) x (sz*4) x (ngf*2) y = deconv(y, opts.ngf*2, 4, 2, 'same', 'deconv3') y = batch_norm(y, training) y = relu(y) # state size. (sz*8) x (sz*8) x ngf y = deconv(y, opts.ngf, 4, 2, 'same', 'deconv4') y = batch_norm(y, training) y = relu(y) # state size. (sz*16) x (sz*16) x nc y = deconv(y, opts.nc, 4, 2, 'same', 'deconv5') y = tf.nn.tanh(y) tf.logging.info("Generator output: {}".format(y)) return y
def build_cnn(states, reuse=False, init=None): """ Builds ACER network, returns the shared layer """ #init = tf.initializers.random_normal(0,0.1) with tf.variable_scope("shared_policy_net"): with tf.variable_scope("conv_layer_1"): conv_l1 = relu(conv2d(states, 8, 4, 32, init)) print(conv_l1) with tf.variable_scope("conv_layer_2"): conv_l2 = relu(conv2d(conv_l1, 4, 2, 64, init)) print(conv_l2) with tf.variable_scope("conv_layer_3"): conv_l3 = relu(conv2d(conv_l2, 3, 1, 64, init)) print(conv_l3) with tf.name_scope("flatten"): conv_l3_flat = tf.layers.flatten(conv_l3) with tf.variable_scope("shared_fully_connected"): shared_fc = tf.layers.dense( inputs=conv_l3_flat, units=512, activation=tf.nn.relu, kernel_initializer=init, ) return shared_fc
def forwardPropagation(input, weights, bias, originalOutput, binarizedTruePrediction, prediction, numberOfSamples, numberOfNeuronsInLayers, classes, optimizer): #computing output of first hidden layer h1In = numpy.dot(input[:, :3], weights[0]) + numpy.repeat( numpy.array([bias[0]]), repeats=[numberOfSamples], axis=0) h1Output = utils.relu(h1In) #computing output of second hidden layer h2In = numpy.dot(h1Output, weights[1]) + numpy.repeat( numpy.array([bias[1]]), repeats=[numberOfSamples], axis=0) h2Output = utils.relu(h2In) #computing output of the output layer OIn = numpy.dot(h2Output, weights[2]) + numpy.repeat( numpy.array([bias[2]]), repeats=[numberOfSamples], axis=0) OOutput = utils.softmax(OIn) myPredictedValueListAsIntegers = numpy.argmax(OOutput, axis=1) # Computing overall error only for plotting graph if prediction == False: errorForGraph = utils.log_loss(binarizedTruePrediction, OOutput[:] + 0.00001) errorForPlottingGraphList.append(errorForGraph) accuracyScoreForGraph.append( utils.accuracy_score(originalOutput, myPredictedValueListAsIntegers)) backPropagation(input, OOutput, originalOutput, binarizedTruePrediction, h1Output, h2Output, numberOfNeuronsInLayers, classes, h1In, h2In, OIn, optimizer) else: return myPredictedValueListAsIntegers
def forward(self, x): """Forwardpropagation pass""" # Layer_in a_in = np.matmul(x, self.w_in.T) + self.b_in z_in = relu(a_in) # Layer_rec a_rec = np.matmul(z_in, self.w_rec.T) + self.b_rec z_rec = relu(a_rec) # Layer_link a_link = np.matmul(np.matmul(x, self.w_reduce), self.w_link.T) z_link = identity(a_link) # Layer_out a_out = np.matmul(z_rec+z_link, self.w_out.T) + self.b_out y = relu(a_out) # Set internal state self._state.update({ 'x': x, 'y': y, 'a_out': a_out, 'z_link': z_link, 'a_link': a_link, 'z_rec': z_rec, 'a_rec': a_rec, 'z_in': z_in, 'a_in': a_in, }) return y
def forward(self, x): #Layer_in forward pass self.x = x B_in = np.matmul(np.ones((BATCH_SIZE, 1)), self.b_in.reshape(1, P)) # [20, 75] self.a_in = np.matmul(self.x, self.w_in.transpose()) + B_in # [20, 75] z_in_numpy = relu(self.a_in) # Layer_rec forward pass B_rec = np.matmul(np.ones((BATCH_SIZE, 1)), self.b_rec.reshape(1, P)) # [20, 75] self.a_rec = np.matmul(z_in_numpy, self.w_rec.transpose()) + B_rec # [20, 75] self.z_rec = relu(self.a_rec) # Layer_link forward pass self.x_reduce = x[:, 0::3] a_link = np.matmul(self.x_reduce, self.w_link.transpose()) self.z_link = identity_func(a_link) # Layer_out forward pass self.z_rec_link = self.z_rec + self.z_link B_out = np.matmul(np.ones((BATCH_SIZE, 1)), self.b_out.reshape(1, D)) # [20, 225] self.a_out = np.matmul(self.z_rec_link, self.w_out.transpose()) + B_out # [20, 225] y = relu(self.a_out) return y
def branch1(self, x, numOut, s): with tf.variable_scope("conv1"): conv1 = utils.relu(utils.Bn(utils.conv2d(x, numOut/4, d_h=s, d_w=s), training=self.is_training)) with tf.variable_scope("conv2"): conv2 = utils.relu(utils.Bn(utils.conv2d(conv1, numOut/4, 3, 3), training=self.is_training)) with tf.variable_scope("conv3"): conv3 = utils.Bn(utils.conv2d(conv2, numOut), training=self.is_training) return conv3
def residual(self, x, numOut, stride=1, name='res'): with tf.variable_scope(name): block = self.branch1(x, numOut, stride) if x.get_shape().as_list()[3] == numOut: return utils.relu(tf.add_n([x, block])) else: skip = self.branch2(x, numOut, stride) return utils.relu(tf.add_n([block, skip]))
def gradient(x, y, w1, w2, w3, w4, b1, b2, b3, b4, learning_rate, iterr): for i in range(iterr): z1 = np.dot(w1, x) + b1 #print("w1 shape{fgh} x shape{yj}".format(fgh=w1.shape,yj = x.shape)) a1 = relu(z1) z2 = np.dot(w2, a1) + b2 a2 = relu(z2) z3 = np.dot(w3, a2) + b3 a3 = relu(z3) z4 = np.dot(w4, a3) + b4 a4 = sigmoid(z4) a4 = preds(a4) dz4 = a4 - y dw4 = 1 / m * np.dot(dz4, a3.T) db4 = 1 / m * np.sum(dz4) da3 = np.dot(w4.T, dz4) dz3 = da3 * backward_relu(z3) dw3 = 1 / m * np.dot(dz4, a2.T) db3 = 1 / m * np.sum(dz3) da2 = np.dot(w3.T, dz3) dz2 = da2 * backward_relu(z2) dw2 = 1 / m * np.dot(dz2, a1.T) db2 = 1 / m * np.sum(dz2) da1 = np.dot(w2.T, dz2) dz1 = da1 * backward_relu(z1) dw1 = 1 / m * np.dot(dz1, x.T) db1 = 1 / m * np.sum(dz1) #print(db1,dw4,db4,dw1) w1 = w1 - learning_rate * dw1 b1 = b1 - learning_rate * db1 w2 = w2 - learning_rate * dw2 b2 = b2 - learning_rate * db2 w3 = w3 - learning_rate * dw3 b3 = b3 - learning_rate * db3 w4 = w4 - learning_rate * dw4 if (i % 100) == 0: error = a4 - y print("Accuracy: " + str(np.sum((a4 == y) / m))) print("Error:" + str(np.mean(np.abs(error)))) d = { 'w1': w1, 'b1': b1, 'w2': w2, 'b2': b2, 'w3': w3, 'b3': b3, 'w4': w4, 'b4': b4 } return d
def f_O2(L, p_O2): l, a, b, c, du, dd = tuple(L) if p_O2 == 1: return relu(1 - rat_tdown(L)) elif p_O2 == 2: return relu(1 - rat_tup(L)) elif p_O2 == 3: return relu(1 - 2 / (1 / (rat_tdown(L) + 10**-6) + 1 / rat_tup(L))) else: return 1
def test(x, w1, w2, w3, w4, b1, b2, b3, b4): z1 = np.dot(w1, x) + b1 a1 = relu(z1) z2 = np.dot(w2, a1) + b2 a2 = relu(z2) z3 = np.dot(w3, a2) + b3 a3 = relu(z3) z4 = np.dot(w4, a3) + b4 a4 = sigmoid(z4) a4 = preds(a4) return a4
def connvolution_process(img): '''----------------------Reading the image-------------------------------''' # # print(img.shape) # 3D image # Converting the image into gray. img = color.rgb2gray(img) # # print(img.shape) # 2D image # io.imshow(img) # plt.show() '''----------------------Preparing Filter-------------------------------''' l1_filter = numpy.zeros((2, 3, 3)) # Vertical ditector Filter l1_filter[0, :, :] = numpy.array([[[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]]) # Horizontal ditector Filter l1_filter[1, :, :] = numpy.array([[[1, 1, 1], [0, 0, 0], [-1, -1, -1]]]) # # print(l1_filter) '''---------------------- Convolutional Layer 1 ---------------------------''' l1_feature_map = conv(img, l1_filter) # print("l1_feature_map", l1_feature_map.shape) l1_feature_map_relu = relu(l1_feature_map) # print("l1_feature_map_relu", l1_feature_map_relu.shape) l1_feature_map_relu_pool = pooling(l1_feature_map_relu, 2, 2) # print("l1_feature_map_relu_pool", l1_feature_map_relu_pool.shape) # print("**End of conv layer 1**\n\n") '''---------------------- Convolutional Layer 2 ---------------------------''' l2_filter = numpy.random.rand(3, 5, 5, l1_feature_map_relu_pool.shape[-1]) l2_feature_map = conv(l1_feature_map_relu_pool, l2_filter) # print("l2_feature_map", l2_feature_map.shape) l2_feature_map_relu = relu(l2_feature_map) # print("l2_feature_map_relu", l2_feature_map_relu.shape) l2_feature_map_relu_pool = pooling(l2_feature_map_relu, 2, 2) # print("l2_feature_map_relu_pool", l2_feature_map_relu_pool.shape) # print("**End of conv layer 2**\n\n") '''---------------------- Convolutional Layer 3 ---------------------------''' l3_filter = numpy.random.rand(1, 7, 7, l2_feature_map_relu_pool.shape[-1]) l3_feature_map = conv(l2_feature_map_relu_pool, l3_filter) # print("l3_feature_map", l3_feature_map.shape) l3_feature_map_relu = relu(l3_feature_map) # print("l3_feature_map_relu", l3_feature_map_relu.shape) l3_feature_map_relu_pool = pooling(l3_feature_map_relu, 2, 2) # print("l3_feature_map_relu_pool", l3_feature_map_relu_pool.shape) # print("**End of conv layer 3**\n\n") '''---------------------- Graphing results of convolution ---------------------------''' draw_layer(img, l1_feature_map, l1_feature_map_relu, l1_feature_map_relu_pool, l2_feature_map, l2_feature_map_relu, l2_feature_map_relu_pool, l3_feature_map, l3_feature_map_relu, l3_feature_map_relu_pool) '''---------------------- Fully Connected layer ---------------------------''' # print("**Fully connected layer(Convolutional layer to Fully connected layer)**") fc = l3_feature_map_relu_pool.reshape(-1) ## print(fc.shape) return fc
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5): """ 带有dropout的前向传播 """ np.random.seed(1) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] W3 = parameters["W3"] b3 = parameters["b3"] # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID # 计算第一层输出 Z1 = np.dot(W1, X) + b1 A1 = relu(Z1) # 开始 # 初始化一个矩阵 D1 = np.random.rand(A1.shape[0], A1.shape[1]) # 标记为0和1 D1 = D1 < keep_prob # 对于A1中的部分结果丢弃 A1 = np.multiply(A1, D1) # 保持原来的期望值 A1 /= keep_prob # 结束 # 计算第二层输出 Z2 = np.dot(W2, A1) + b2 A2 = relu(Z2) # 开始 D2 = np.random.rand(A2.shape[0], A2.shape[1]) D2 = D2 < keep_prob A2 = np.multiply(A2, D2) A2 /= keep_prob # 结束 # 最后一层输出 Z3 = np.dot(W3, A2) + b3 A3 = sigmoid(Z3) cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) return A3, cache
def predict(self, X, y, show_probs=True): """ """ # forward prop through layers for layer_index in np.arange(1, len(self.structure)): layer = self.structure[layer_index] prev_layer = self.structure[layer_index - 1] if prev_layer['bias_node']: X = np.concatenate((X, np.ones((X.shape[0], 1))), axis=1) # change 1. to self.bias? X = (np.dot(layer['weight_matrix'], X.T)).T # activation functions if layer['activation'] == 'relu': X = relu(X) elif layer['activation'] == 'softmax': X = softmax(X.T).T elif layer['activation'] == 'sigmoid': X = sigmoid(X) else: raise Exception('Activation function not recognised') predictions = np.argmax(prob_predictions, axis=1) self.predict_cm = createConfusionMatrix(predictions=predictions, targets=np.argmax(y_train, axis=1)) if show_probs == True: return X else: return predictions
def forward(self, X): if (self.activationFunction == "relu"): hiddenLayerOutput = relu(X.dot(self.W1) + self.b1) else: hiddenLayerOutput = tanh(X.dot(self.W1) + self.b1) return softmax(hiddenLayerOutput.dot(self.W2) + self.b2), hiddenLayerOutput
def lossAndGrads(self, batchAndTargets, computeGrads=True): nb = len(batchAndTargets) ctxtoks = [ctxtok for ctxtok,tgt in batchAndTargets] tgts = [tgt for ctxtok,tgt in batchAndTargets] context = self.wvec[ctxtoks,:].reshape((nb,-1)) z = np.dot(context, self.Wz) a = relu(z) scores = np.dot(a, self.Wa) preds = np.argmax(scores, axis=1) loss, probs, dscores = softmaxLossAndGrads(scores, tgts) if not computeGrads: return preds, probs, loss dWa = np.dot(a.T, dscores) da = np.dot(dscores, self.Wa.T) dz = drelu(da, a) dWz = np.dot(context.T, dz) dcontext = np.dot(dz, self.Wz.T).reshape((nb,-1,self.wdim)) dwvec = np.zeros_like(self.wvec) for b,ctxtok in enumerate(ctxtoks): for w,tok in enumerate(ctxtok): dwvec[tok] += dcontext[b,w] grads = {} grads['Wa'] = dWa + self.reg * self.Wa grads['Wz'] = dWz + self.reg * self.Wz grads['wvec'] = dwvec + self.reg * self.wvec * (dwvec != 0) return preds, probs, loss, grads
def backpropagate(self, x, y): gradients_b = [np.zeros(b.shape) for b in self.biases] gradients_w = [np.zeros(w.shape) for w in self.weights] #feedforward a = x a_list = [x] z_list = [] for b, w in zip(self.biases[0:-1], self.weights[0:-1]): z = np.dot(w, a) + b a = utils.relu(z) a_list.append(a) z_list.append(z) z = np.dot(self.weights[-1], a_list[-1]) + self.biases[-1] a = utils.softmax(z) a_list.append(a) z_list.append(z) # backward # for softmax-cross-entropy layer: delta in last layer = result - ground truth delta = a_list[-1] - y # update b and w for the last layer L gradients_b[-1] = delta gradients_w[-1] = np.dot(delta, a_list[-2].transpose()) # update b and w for the rest of layers L-1, L-2, ... for l in range(2, self.num_layers): z = z_list[-l] # lth last layer of z r_derivative = utils.relu_derivative(z) # update delta based on delta(l) = transpose of w(l+1) * delta(l+1) delta = np.dot(self.weights[-l + 1].transpose(), delta) * r_derivative gradients_b[-l] = delta gradients_w[-l] = np.dot(delta, a_list[-l - 1].transpose()) return (gradients_b, gradients_w)
def prediction_baysian_dropout(self, x, k=10): """ Runs test time prediction k times to get a variance on the output. Essentially bayesian dropout. Input: x: A numpy array of input data, shape (N, D) Return: mean: The mean prediction from the dropout ensemble, shape (N, M) var: The variance on the prediction from the ensemble, shape (N, M) """ N, D = x.shape outputs = np.zeros((k, N, self.M)) for i in range(k): h = x # Input into the next layer or previous hidden activation for l in range(self.n_hidden): w, b = self.params["w" + str(l)], self.params["b" + str(l)] h, _ = affine(h, w, b) # Affine layer h, _ = relu(h) # Activation (ReLU) # Output layer, simply an affine outputs[i], _ = affine(h, self.params["w_out"], self.params["b_out"]) mean = np.mean(outputs) var = np.var(output) return mean, var
def linear_activation_forward(A_prev, W, b, activation): """ Implement the forward propagation for the LINEAR->ACTIVATION layer Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: A -- the output of the activation function, also called the post-activation value cache -- a python dictionary containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently """ if activation == "sigmoid": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = utils.sigmoid(Z) elif activation == "relu": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = utils.relu(Z) assert (A.shape == (W.shape[0], A_prev.shape[1])) cache = (linear_cache, activation_cache) return A, cache
def inLayerForward(self, x): B_in = np.ones((BATCH_SIZE, 1)) @ self.b_in # [20, 1] * [1, 75] self.a_in = x @ self.w_in + B_in # [20, 225] * [225, 75] + [20, 75] self.z_in = relu(self.a_in) # [20, 75] return self.z_in
def scalar_input_layer(self, x): # scalar form method for performance competion z_in = np.zeros((x.shape[0], P)) for i in range(x.shape[0]): a_in = np.dot(self.w_in, x[i, :].T) + self.b_in.T.flatten() z_in[i, :] = relu(a_in) return z_in
def forward_prop(self, data, parameters, layers): """ Forward propogate values :param data: Input data (train and then test) :param parameters: Dictionary with initialized weeights and biases :param layers: Number of layers and neurons :return: """ """ Z[l] = A[l-1] * W[l] + B[l] A[l] = G[l](Z[l]) """ # A0 initialized here and not in init function, so we can run the training data through forward prop parameters['A' + str(0)] = data for i in range(1, len(layers)): parameters['Z' + str(i)] = np.add(np.dot(parameters['A' + str(i - 1)], parameters['W' + str(i)]), parameters['B' + str(i)]) if i != len(layers) - 1: parameters['A' + str(i)] = ut.relu(parameters['Z' + str(i)]) else: # Final Activation is Softmax parameters['A' + str(i)] = ut.softmax(parameters['Z' + str(i)]) return parameters['A' + str(len(layers) - 1)], parameters
def fprop(self, X): X = np.array([np.array([float(x) for x in j]) for j in X]) X = X.transpose() self._ha = np.dot(self._w1, X) + np.repeat(self._b1, len(X[0]), axis=1) # valeur des synapses entre x et hidden self._hs = utils.relu(self._ha) # valeur hidden self._oa = np.dot(self._w2, self._hs) + np.repeat(self._b2, len(X[0]), axis=1) # valeur entre hidden et sortie self._os = utils.softmax(self._oa) # valeur de sortie
def lossAndGrads(self, batchAndTargets, computeGrads=True): nb = len(batchAndTargets) ctxtoks = [ctxtok for ctxtok, tgt in batchAndTargets] tgts = [tgt for ctxtok, tgt in batchAndTargets] context = self.wvec[ctxtoks, :].reshape((nb, -1)) z = np.dot(context, self.Wz) a = relu(z) scores = np.dot(a, self.Wa) preds = np.argmax(scores, axis=1) loss, probs, dscores = softmaxLossAndGrads(scores, tgts) if not computeGrads: return preds, probs, loss dWa = np.dot(a.T, dscores) da = np.dot(dscores, self.Wa.T) dz = drelu(da, a) dWz = np.dot(context.T, dz) dcontext = np.dot(dz, self.Wz.T).reshape((nb, -1, self.wdim)) dwvec = np.zeros_like(self.wvec) for b, ctxtok in enumerate(ctxtoks): for w, tok in enumerate(ctxtok): dwvec[tok] += dcontext[b, w] grads = {} grads['Wa'] = dWa + self.reg * self.Wa grads['Wz'] = dWz + self.reg * self.Wz grads['wvec'] = dwvec + self.reg * self.wvec * (dwvec != 0) return preds, probs, loss, grads
def prediction(self, x, z): """ Compute prediction for the fully-connected net as test time (without saving cache and no-dropout). Input: x: A numpy array of input data, shape (N, D) z: Diff between y-y_des for the func approx (N, M) (1,2) in this case Return: output: Output prediction/prediction of label, shape (N, M) """ h = x # Input into the next layer or previous hidden activation for l in range(self.n_hidden): l = str(l) w = self.params["w" + l] b = self.params["b" + l] h, _ = affine(h, w, b) # Affine layer h, _ = relu(h) # Activation (ReLU) # Output layer, simply an affine output, cache = affine(h, self.params["w_out"], self.params["b_out"]) # Technically this is not the real z but the 1/N term only scales z (we # can think of this as equivalent to scaling β by 1/N). # This is to match how dout works in NeuralNetOffline (see line: 190) N, D = x.shape z = z / N # Only trainable paramters in the adaptive case are the last layer weights # So we only update the output layer weights (using e-mod) _, dw, db = self.w_hat_dot_e_mod(z, cache) # Update the weights self.params["w_out"] -= self.beta * dw self.params["b_out"] -= self.beta * db return output
def vector_input_layer(self, x): # vector form method for performance competion B_in = np.matmul(np.ones((BATCH_SIZE, 1)), self.b_in.reshape(1, P)) # [20, 75] a_in = np.matmul(x, self.w_in.transpose()) + B_in # [20, 75] z_in_numpy = relu(a_in) return z_in_numpy
def inference(self, x): with tf.variable_scope("conv0"): conv1 = utils.relu(utils.Bn(utils.conv2d(x, 64, 7, 7, 2, 2, bias=True), training=self.is_training)) with tf.name_scope("pool1"): pool1 = utils.max_pool(conv1, 3, 3, 2, 2) with tf.variable_scope("group0"): res2a = self.residual(pool1, 256, name='block0') res2b = self.residual(res2a, 256, name='block1') res2c = self.residual(res2b, 256, name='block2') with tf.variable_scope("group1"): res3a = self.residual(res2c, 512, 2, name='block0') res3b = self.residual(res3a, 512, name='block1') res3c = self.residual(res3b, 512, name='block2') res3d = self.residual(res3c, 512, name='block3') with tf.variable_scope("group2"): res4a = self.residual(res3d, 1024, 2, name='block0') res4b = self.residual(res4a, 1024, name='block1') res4c = self.residual(res4b, 1024, name='block2') res4d = self.residual(res4c, 1024, name='block3') res4e = self.residual(res4d, 1024, name='block4') res4f = self.residual(res4e, 1024, name='block5') with tf.variable_scope("group3"): res5a = self.residual(res4f, 2048, 2, name='block0') res5b = self.residual(res5a, 2048, name='block1') res5c = self.residual(res5b, 2048, name='block2') with tf.name_scope("pool5"): pool5 = utils.global_pool(res5c) with tf.variable_scope("linear"): dropout = tf.nn.dropout(pool5, keep_prob=self.keep_prob) out = utils.linear(dropout, 1000) return out
def prediction_save_cache(self, x): """ Compute prediction for the fully-connected net and save intermediate activations. N samples, D dims per sample, each sample is a row vec, M is the dims of y/prediction Input: x: A numpy array of input data, shape (N, D) Return: output: Output prediction/prediction of label, shape (N, M) caches: Saved intermediate activations for use in backprop """ caches = {} h = x # Input into the next layer or previous hidden activation for l in range(self.n_hidden): l = str(l) w, b = self.params["w" + l], self.params["b" + l] h, caches["affine" + l] = affine(h, w, b) # Affine layer h, caches["relu" + l] = relu(h) # Activation (ReLU) # Dropout layer (train-time dropout) h, caches["dropout" + l] = dropout(h, self.dropout) # Output layer, simply an affine output, cache = affine(h, self.params["w_out"], self.params["b_out"]) caches["affine_out"] = cache return output, caches
def forward_propagation(X, parameters): """正向传播 """ cache = dict() L = len(parameters) // 2 cache['A{}'.format(0)] = X # A0 = X # 第1~(L-1)层,LINEAR -> RELU -> LINEAR -> RELU... for l in range(1, L): W = parameters['W{}'.format(l)] b = parameters['b{}'.format(l)] Z = np.dot(W, cache['A{}'.format(l - 1)]) + b # Zl = Wl * A(l-1) + bl A = relu(Z) # 该层的A值 cache['W{}'.format(l)] = W cache['b{}'.format(l)] = b cache['Z{}'.format(l)] = Z cache['A{}'.format(l)] = A # 第L层,LINEAR -> SIGMOID WL = parameters['W{}'.format(L)] bL = parameters['b{}'.format(L)] ZL = np.dot(WL, cache['A{}'.format(L - 1)]) + bL AL = sigmoid(ZL) cache['W{}'.format(L)] = WL cache['b{}'.format(L)] = bL cache['Z{}'.format(L)] = ZL cache['A{}'.format(L)] = AL return AL, cache
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,2), params_W=None, params_b=None, mu=0, sigma=0.1, bias_val=0.5, activation_mode=0): assert image_shape[1] == filter_shape[1] self.input = input # if params_W is not given, generate random params_W if params_W == None: self.W = theano.shared( numpy.asarray( rng.normal(mu, sigma, filter_shape), dtype=theano.config.floatX ), borrow=True ) else: self.W = theano.shared( numpy.asarray(params_W,dtype=theano.config.floatX), borrow=True) # if params_b is not given, generate random params_b if params_b == None: b_values = bias_val * numpy.ones((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) else: self.b = theano.shared( numpy.asarray(params_b,dtype=theano.config.floatX), borrow=True ) self.momentum_W = theano.shared( numpy.zeros(filter_shape, dtype=theano.config.floatX), borrow=True ) self.momentum_b = theano.shared( numpy.zeros((filter_shape[0],), dtype=theano.config.floatX), borrow=True ) # feature maps after convolution conv_out = conv.conv2d( input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape ) # feature maps after pooling pooled_out = downsample.max_pool_2d( input=conv_out, ds=poolsize, ignore_border=True ) # output of layer, activated pooled feature maps if activation_mode == 0: self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) elif activation_mode == 1: self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) self.params = [self.W, self.b]
def _process_layers(self, weights, data, learning=True): for W, mean, std in self._generate_layers(weights): data = normalize(data, mean, std) data = relu(data) if learning and self.dropout is not None: data = dropout(data, self.dropout) data = np.dot(data, W) return data
def fprop(self, X): X = np.array([[float(x)] for x in X]) self._ha = np.dot( self._w1, X) + self._b1 # valeur des synapses entre x et hidden self._hs = utils.relu(self._ha) # valeur hidden self._oa = np.dot(self._w2, self._hs) + self._b2 # valeur entre hidden et sortie self._os = utils.softmax(self._oa) # valeur de sortie
def linear_activation_forward(a_prev, w, b, activation): z, linear_cache = linear_forward(a_prev, w, b) if activation == "sigmoid": a, activation_cache = sigmoid(z) else: a, activation_cache = relu(z) cache = (linear_cache, activation_cache) return a, cache
def predict(self, X, learning): hiddenLayerOutput = relu(X.dot(self.W1) + self.b1) output = hiddenLayerOutput.dot(self.W2) + self.b2 if (learning): probs = softmax(output) action = np.where(probs == np.random.choice(probs, 1, p=probs)) return (int(action[0])) return np.argmax(output)
def f(X): n_samples = X.shape[0] input = X.reshape((n_samples, n_feats[0], image_row, image_col)) conv_out = T.nnet.conv2d(input, P.W_input_conv) pool_out_= max_pool_2d(conv_out, (pool_row, pool_col)) pool_out = pool_out_.flatten(2) + P.b_pool_out hidden = relu(T.dot(pool_out, P.W_pool_out_hidden) + P.b_hidden) output = T.dot(hidden, P.W_hidden_output) + P.b_output return output.astype(theano.config.floatX)
def activate(self, activation): if(self.visible_type == "SIGMOID"): m_output = utils.sigmoid(activation) elif(self.visible_type == "RELU"): m_output = utils.relu(activation) elif(self.visible_type == "LEAKY_RELU"): m_output = utils.leaky_relu(activation) elif(self.visible_type == "LINEAR"): m_output = activation else: raise NotImplemented("Unrecogonised hidden type") return m_output
def layers(x, window, dim_emb, dim_hidden, n_layers, activation=tanh): params = [] zero = T.zeros((1, 1, dim_emb * window), dtype=theano.config.floatX) def zero_pad_gate(matrix): return T.neq(T.sum(T.eq(matrix, zero), 2, keepdims=True), dim_emb * window) for i in xrange(n_layers): if i == 0: W = theano.shared(sample_weights(dim_emb * window, dim_hidden)) h = T.max(zero_pad_gate(x) * relu(T.dot(x, W)), 1) # h = T.max(T.dot(x, W), 1) else: W = theano.shared(sample_weights(dim_hidden, dim_hidden)) h = activation(T.dot(h, W)) params.append(W) return h, params
def fprop(self, X): X = np.array([[float(x)] for x in X]) self._ha = np.dot(self._w1, X) + self._b1 # valeur des synapses entre x et hidden self._hs = utils.relu(self._ha) # valeur hidden self._oa = np.dot(self._w2, self._hs) + self._b2 # valeur entre hidden et sortie self._os = utils.softmax(self._oa) # valeur de sortie
def train(self, data_train, initialmomentum, finalmomentum, learn_rate_w, learn_rate_visb, learn_rate_hidb, weightcost): learning_rate = learn_rate_w self.weightcost = weightcost N = np.shape(data_train)[0] num_visible = np.shape(data_train)[1] total_batches = np.int(np.ceil(N/self.mini_batch)) m_visible_biases = np.zeros(num_visible) m_hidden_biases = np.zeros(self.hidden_nodes) m_weights = np.random.randn(num_visible, self.hidden_nodes) * 0.01 momentum_weights = np.zeros_like(m_weights) momentum_visible_biases = np.zeros_like(m_visible_biases) momentum_hidden_biases = np.zeros_like(m_hidden_biases) momentum_rate = finalmomentum best_rmsd = None rmsd_logger=[] for i in range(self.iterations): start = (i % total_batches) * self.mini_batch end = start + self.mini_batch if end >= N : end = N - 1 #print start, end batch = data_train[start:end] batch = batch weights, visible_biases, hidden_biases = self._CD1(batch, m_weights, m_visible_biases, m_hidden_biases) momentum_weights = momentum_rate * momentum_weights + weights momentum_visible_biases = momentum_rate * momentum_visible_biases + visible_biases momentum_hidden_biases = momentum_rate * momentum_hidden_biases + hidden_biases m_weights += momentum_weights * learning_rate m_visible_biases += momentum_visible_biases * learning_rate m_hidden_biases += momentum_hidden_biases * learning_rate # Reconstruction error, no sampling done, using raw probability output = np.dot(data_train, m_weights) \ + np.tile(hidden_biases, (N,1)) if self.visible_type == "SIGMOID": hidden_state = u.sigmoid(output) elif self.visible_type == "RELU": hidden_state = u.relu(output) elif self.visible_type == "LINEAR": hidden_state = output reconstruction = np.dot(hidden_state, m_weights.T) + \ + np.tile(visible_biases, (N,1)) if self.visible_type == "SIGMOID": reconstruction = u.sigmoid(reconstruction) elif self.visible_type == "RELU": reconstruction = u.relu(reconstruction) ################################################## err = (data_train-reconstruction)*(data_train-reconstruction) rmsd = np.sqrt(np.mean(err*err)) rmsd_logger.append(rmsd) print "Epoch %4d/%4d, RMS deviation = %7.4f" % (i + 1, self.iterations, rmsd) if self.early_stop: if best_rmsd is None or (rmsd - best_rmsd) / best_rmsd < -1e-3 : best_weights = m_weights best_hidden_biases = hidden_biases best_visible_biases = visible_biases best_rmsd = rmsd iter_since_best = 0 else : iter_since_best += 1 if iter_since_best >= self.max_epoch_without_improvement : print "Early stop -- best epoch %d / %d, RMS deviation = %7.4f" % ( i + 1 - iter_since_best, self.iterations, best_rmsd) break else: best_weights = m_weights best_hidden_biases = hidden_biases best_visible_biases = visible_biases best_rmsd = rmsd iter_since_best = 0 #print ' rmsd = ', rmsd self.weights = best_weights self.hidden_biases = best_hidden_biases self.visible_biases = best_visible_biases rmsd_history = np.asarray(rmsd_logger) if iter_since_best > 0 : self.train_history = rmsd_history[:-1*iter_since_best] else : self.train_history = rmsd_history
def train(self, data_train, initialmomentum, finalmomentum, learn_rate_w, learn_rate_visb, learn_rate_hidb, weightcost): Nd, numdims = np.shape(data_train) N = self.mini_batch n_tot_batches = np.int(np.ceil(Nd/self.mini_batch)) vishid = 0.01 * np.random.randn(numdims, self.hidden_nodes) hidbiases = np.zeros(self.hidden_nodes) visbiases = np.zeros(numdims) vishidinc = np.zeros_like(vishid) hidbiasinc = np.zeros_like(hidbiases) visbiasinc = np.zeros_like(visbiases) """ pos_hidprobs = np.zeros([N, self.hidden_nodes]) neg_hidprobs = np.zeros_like(pos_hidprobs) pos_prods = np.zeros_like(vishid) neg_prods = np.zeros_like(vishid) """ batchposidprobs = np.empty([N, self.hidden_nodes, n_tot_batches]) rmsd_logger = [] best_weights = np.zeros_like(vishid) best_hidden_biases = np.zeros_like(hidbiases) best_rmsd = None iter_since_best = 0 for epoch in range(self.iterations): errsum = 0 rmsd = 0 ##print "Epoch %d / %d" % (epoch + 1, self.iterations) for batch in range(n_tot_batches): #print " epoch %d / %d -- batch %d / %d" % (epoch + 1, self.iterations,\ # batch + 1, n_tot_batches) start = (batch % n_tot_batches) * self.mini_batch end = start + self.mini_batch if end >= Nd : end = Nd data = data_train[start:end] ## START POSITIVE PHASE ################################################## nw = np.dot(data, vishid) + np.tile(hidbiases, (N, 1)) if self.hidden_type == "SIGMOID": pos_hidprobs = utils.sigmoid(nw) if self.hidden_type == "RELU": pos_hidprobs = utils.relu(nw) elif self.hidden_type == "LINEAR": pos_hidprobs = nw if epoch >= self.iterations - 1: batchposidprobs[:,:,batch] = pos_hidprobs pos_prods = np.dot(data.T, pos_hidprobs) pos_hidact = np.sum(pos_hidprobs, 0) pos_visact = np.sum(data, 0) ## END OF POSITIVE PHASE ################################################ if self.hidden_type == "SIGMOID" or self.hidden_type == "RELU": ran = np.random.rand(N, self.hidden_nodes) pos_hidstates = pos_hidprobs > ran elif self.hidden_type == "LINEAR": ran = np.random.randn(N, self.hidden_nodes) pos_hidstates = pos_hidprobs + ran ## START NEGATIVE PHASE ################################################# nw = np.dot(pos_hidstates,vishid.T) + np.tile(visbiases, (N,1)) # TODO: Do this only if visible type is sigmoid see C++ line 262 and next if self.visible_type == "SIGMOID": neg_data = utils.sigmoid(nw) if self.visible_type == "RELU": neg_data = utils.relu(nw) else: neg_data = nw nw = np.dot(neg_data, vishid) + np.tile(hidbiases, (N, 1)) if self.hidden_type == "SIGMOID": neg_hidprobs = utils.sigmoid(nw) if self.hidden_type == "RELU": neg_hidprobs = utils.relu(nw) else: neg_hidprobs = nw neg_prods = np.dot(neg_data.T, neg_hidprobs) neg_hidact = np.sum(neg_hidprobs, 0) neg_visact = np.sum(neg_data, 0) ## END OF NEGATIVE PHASE ################################################ errsum += np.sum((data-neg_data)*(data-neg_data)) rmsd += np.sqrt(np.mean((data-neg_data)*(data-neg_data))) #print rmsd; exit() if epoch > 5: momentum = finalmomentum else: momentum = initialmomentum ## UPDATE WEIGHTS AND BIASES ############################################ vishidinc = momentum * vishidinc + learn_rate_w * \ ((pos_prods - neg_prods)/N - weightcost * vishid) visbiasinc = momentum * visbiasinc + learn_rate_visb/N * (pos_visact - neg_visact) hidbiasinc = momentum * hidbiasinc + learn_rate_hidb/N * (pos_hidact - neg_hidact) vishid += vishidinc visbiases += visbiasinc hidbiases += hidbiasinc print "Epoch %4d/%4d, RMS deviation = %7.4f" % (epoch + 1, self.iterations, rmsd) rmsd_logger.append(rmsd) if self.early_stop: if best_rmsd is None or (rmsd - best_rmsd) / best_rmsd < -1e-3 : best_weights = vishid best_hidden_biases = hidbiases best_visible_biases = visbiases best_rmsd = rmsd iter_since_best = 0 else : iter_since_best += 1 if iter_since_best >= self.max_epoch_without_improvement : print "Early stop -- best epoch %d / %d, RMS deviation = %7.4f" % ( epoch + 1 - iter_since_best, self.iterations, best_rmsd) break else: best_weights = vishid best_hidden_biases = hidbiases best_visible_biases = visbiases best_rmsd = rmsd iter_since_best = 0 #print ' rmsd = ', rmsd self.weights = best_weights self.hidden_biases = best_hidden_biases self.visible_biases = best_visible_biases rmsd_history = np.asarray(rmsd_logger) if iter_since_best > 0 : self.train_history = rmsd_history[:-1*iter_since_best] else : self.train_history = rmsd_history