def train(x1, rbm, learning_rate = 0.1): rbm.units[rbm.visible] = x1 mask = rbm.hidden Qh1 = sigmoid(rbm.gaps[mask]) h1 = Qh1 > numpy.random.random(len(Qh1)) rbm.units[mask] = h1.copy() mask = rbm.visible Px2 = sigmoid(rbm.gaps[mask]) x2 = Px2 > numpy.random.random(len(Px2)) rbm.units[mask] = x2.copy() mask = rbm.hidden Qh2 = sigmoid(rbm.gaps[mask]) h2 = Qh2 > numpy.random.random(len(Qh2)) rbm.units[mask] = h2.copy() adj = (numpy.outer(h1,x1) - numpy.outer(Qh2,x2)) for i,wi in enumerate(rbm.visible): for j,wj in enumerate(rbm.hidden): if rbm.weights[wi,wj] == None: continue rbm.weights[wi,wj] = rbm.weights[wi,wj] + learning_rate * adj[j,i] rbm.biases[rbm.visible] += learning_rate * (x1 - x2) rbm.biases[rbm.hidden] += learning_rate * (h1 - Qh2) print(numpy.linalg.norm(x2-x1,1)) print(numpy.linalg.norm(h1-Qh2,1)) print(numpy.linalg.norm(adj,1))
def learn(self, data, num = 1, rate = 0.01): """ """ if num <= 0: raise ValueError('Need at least one reconstruction') nVisible = len(self._model.visible) nHidden = len(self._model.hidden) self._model.units[self._model.visible] = data prob_hidden = sigmoid(self._model.gaps[self._model.hidden]) self._model.units[self._model.hidden] = prob_hidden > numpy.random.random(nHidden) vh_data = self._model.units.copy() for i in range(num): prob_visible = sigmoid(self._model.gaps[self._model.visible]) self._model.units[self._model.visible] = prob_visible > numpy.random.random(nVisible) prob_hidden = sigmoid(self._model.gaps[self._model.hidden]) self._model.units[self._model.hidden] = prob_hidden > numpy.random.random(nHidden) vh_model = self._model.units.copy() for v in self._model.visible: for h in self._model.hidden: dw = vh_data[v] * vh_data[h] - vh_model[v] * vh_model[h] self._model.weights[v,h] = self._model.weights[v,h] + rate * dw for v in self._model.visible: da = vh_data[v] - vh_data[v] self._model.biases[v] = self._model.biases[v] + rate * da for h in self._model.hidden: da = vh_data[h] - vh_data[h] self._model.biases[h] = self._model.biases[h] + rate * da
def negSamplingCostAndGradient(predicted, target, outputVectors, dataset, K=10): """ Negative sampling cost function for word2vec models Implement the cost and gradients for one predicted word vector and one target word vector as a building block for word2vec models, using the negative sampling technique. K is the sample size. Note: See test_word2vec below for dataset's initialization. Arguments/Return Specifications: same as softmaxCostAndGradient """ indices = [target] indices.extend(getNegativeSamples(target, dataset, K)) grad = np.zeros(outputVectors.shape) gradPred = np.zeros(predicted.shape) cost = 0 z = sigmoid(np.dot(outputVectors[target], predicted)) cost -= np.log(z) grad[target] += predicted * (z - 1.0) gradPred += outputVectors[target] * (z - 1.0) for k in range(K): samp = indices[k + 1] z = sigmoid(np.dot(outputVectors[samp], predicted)) cost -= np.log(1.0 - z) grad[samp] += predicted * z gradPred += outputVectors[samp] * z return cost, gradPred, grad
def gradient(self, x, t): # 请从参数字典获取网络参数 w1, b1 = self.params['W1'], self.params['b1'] w2, b2 = self.params['W2'], self.params['b2'] # 保存梯度结果 grads = {} # forward a1 = np.dot(x, w1) + b1 h1 = sigmoid(a1) a2 = np.dot(h1, w2) + b2 output = softmax(a2) # backward dy = (output - t) / x.shape[0] grads['W2'] = np.dot(h1.T, dy) grads['b2'] = np.sum(dy, axis=0) """ grads['b2'] = np.sum(dy, axis=0),为什么求和? - 首先输出为多少维度,那么b就是多少维的向量,和样本数量无关 因为正向传播过程中,偏置b向量会分别加到每一个样本数据上,因此只需把这些值加起来就好 也就是说:第一个样本产生由于b产生误差 dy1 第二个样本产生由于b产生误差 dy2 ... b产生的总误差为: dy1 + dy2 + ... """ da1 = np.dot(dy, w2.T) ha1 = sigmoid(a1) dz1 = (1.0 - ha1) * ha1 * da1 grads['W1'] = np.dot(x.T, dz1) grads['b1'] = np.sum(dz1, axis=0) return grads
def train(x1, rbm, learning_rate=0.1): rbm.units[rbm.visible] = x1 mask = rbm.hidden Qh1 = sigmoid(rbm.gaps[mask]) h1 = Qh1 > numpy.random.random(len(Qh1)) rbm.units[mask] = h1.copy() mask = rbm.visible Px2 = sigmoid(rbm.gaps[mask]) x2 = Px2 > numpy.random.random(len(Px2)) rbm.units[mask] = x2.copy() mask = rbm.hidden Qh2 = sigmoid(rbm.gaps[mask]) h2 = Qh2 > numpy.random.random(len(Qh2)) rbm.units[mask] = h2.copy() adj = (numpy.outer(h1, x1) - numpy.outer(Qh2, x2)) for i, wi in enumerate(rbm.visible): for j, wj in enumerate(rbm.hidden): if rbm.weights[wi, wj] == None: continue rbm.weights[wi, wj] = rbm.weights[wi, wj] + learning_rate * adj[j, i] rbm.biases[rbm.visible] += learning_rate * (x1 - x2) rbm.biases[rbm.hidden] += learning_rate * (h1 - Qh2) print(numpy.linalg.norm(x2 - x1, 1)) print(numpy.linalg.norm(h1 - Qh2, 1)) print(numpy.linalg.norm(adj, 1))
def forward_pass(self, inputs): # decleare variables used forward pass self.inputs = inputs self.n_inp = len(inputs) self.vr = [] self.vz = [] self.v_h = [] self.vo = [] self.r = [] self.z = [] self._h = [] self.h = {} self.o = [] self.h[-1] = np.zeros((self.h_size, 1)) # performing recurrsion for i in range(self.n_inp): # calculating reset gate value # self.vr.append(np.dot(self.w['ur'],inputs[i]) + np.dot(self.w['wr'], self.h[i-1]) + self.b['r']) # self.r.append(sigmoid(self.vr[i])) self.r.append( sigmoid( np.dot(self.w['ur'], inputs[i]) + np.dot(self.w['wr'], self.h[i - 1]) + self.b['r'])) # calculation update gate value # self.vz.append(np.dot(self.w['uz'],inputs[i]) + np.dot(self.w['wz'], self.h[i-1]) + self.b['z']) # self.z.append(sigmoid(self.vz[i])) self.z.append( sigmoid( np.dot(self.w['uz'], inputs[i]) + np.dot(self.w['wz'], self.h[i - 1]) + self.b['z'])) # applying reset gate value # self.v_h.append(np.dot(self.w['u_h'], inputs[i]) + np.dot(self.w['w_h'], np.multiply(self.h[i - 1], self.r[i])) + + self.b['_h']) # self._h.append(tanh(self.v_h[i])) self._h.append( tanh( np.dot(self.w['u_h'], inputs[i]) + np.dot(self.w['w_h'], np.multiply(self.h[i - 1], self.r[i])) + +self.b['_h'])) # applying update gate value self.h[i] = np.multiply(self.z[i], self.h[i - 1]) + np.multiply( 1 - self.z[i], self._h[i]) # calculating output # self.vo.append(np.dot(self.w['wo'], self.h[i]) + self.b['o']) # self.o.append(softmax(self.vo[i])) self.o.append( softmax(np.dot(self.w['wo'], self.h[i]) + self.b['o'])) return self.o
def test(self, test_data, label): n_pos = 0.0 for i in range(len(test_data)): if sigmoid(self.W, test_data[i]) >= 0.5 and label[i] == 1: n_pos += 1 elif sigmoid(self.W, test_data[i]) < 0.5 and label[i] == 0: n_pos += 1 print n_pos, len(test_data), n_pos / len(test_data) print self.W
def forward(self, inputs, return_activations=False): # inputs specifies one float for each neuron in the first layer # if return_activations is true, we return a list of activations at each layer # otherwise, we only return the output layer values = numpy.array([inputs], float).T # values in current layer activations = [{'activations': values}] for layer in self.layers[1:]: if layer['type'] == 'sigmoid': # compute the weighted sum of neuron inputs, plus bias term (for each neuron in current layer) z_vector = numpy.dot(layer['weights'], values) + layer['bias'] # apply sigmoid activation function values = util.sigmoid(z_vector) if return_activations: activations.append({'activations': values[:, 0]}) elif layer['type'] == 'softmax': # compute the weighted sum of neuron inputs, plus bias term (for each neuron in current layer) z_vector = numpy.dot(layer['weights'], values) + layer['bias'] # apply softmax values = numpy.exp(z_vector - numpy.max(z_vector)) values = values / numpy.sum(values) if return_activations: activations.append({'activations': values[:, 0]}) elif layer['type'] == 'convsample': # carry out convolution to get convolved values convolved_out = numpy.zeros( (layer['k'], layer['conv_count'], layer['conv_count'])) for k in xrange(len(layer['weights'])): convolved_out[k] = scipy.signal.convolve2d( values.reshape(layer['m'], layer['m']), numpy.rot90(layer['weights'][k], 2), 'valid') convolved_out[k] = util.sigmoid(convolved_out[k] + layer['bias'][k]) # pool the convolved features pooled = numpy.zeros((layer['k'], layer['o'], layer['o'])) for k in xrange(layer['k']): for i in xrange(layer['o']): for j in xrange(layer['o']): pooled[k][i][j] = numpy.average( convolved_out[k, (i * layer['p']):((i + 1) * layer['p']), (j * layer['p']):((j + 1) * layer['p'])]) values = pooled.reshape(util.prod(pooled.shape), 1) if return_activations: activations.append({ 'activations': values[:, 0], 'extra': convolved_out }) if return_activations: return activations else: return values[:, 0]
def feedForward(self): for n in self.hiddenLayer: sum = n.bias for ni in self.inputLayer: sum += self.weights[(ni, n)] * ni.value n.value = util.sigmoid(sum) for n in self.outputLayer: sum = n.bias for nh in self.hiddenLayer: sum += self.weights[(nh, n)] * nh.value n.value = util.sigmoid(sum)
def getOutput(self, inputs): a_1 = [[ util.sigmoid(x) for x in util.add(util.dot(inputs, self.ihWeights), self.ihBias)[0] ]] #a_2 = [[util.sigmoid(x) for x in util.add(util.dot(a_1, self.hhWeights), self.hhBias)[0]]] # uncomment and fix a_3 for 2 hidden layers a_3 = [[ util.sigmoid(x) for x in util.add(util.dot(a_1, self.hoWeights), self.hoBias)[0] ]] return a_3[0]
def tree_lstm(input, g_in_left, g_in_right, r_in_left, r_in_right): g_in = 1 / 2 * (g_in_left + g_in_right) r_in = 1 / 2 * (r_in_left + r_in_right) f_t = sigmoid(concat_and_multiply(params['w_forget'], r_in, input)) k_t_1 = sigmoid(concat_and_multiply(params['w_k_1'], r_in, input)) r_t = np.tanh(concat_and_multiply(params['w_r'], r_in, input)) k_t_2 = sigmoid(concat_and_multiply(params['w_k_2'], r_in, input)) g_out = f_t * g_in + k_t_1 * r_t r_out = k_t_2 * np.tanh(g_out) return g_out, r_out
def update_backward_lstm(_input, hiddens, cells): change = np.tanh( concat_and_multiply(params['change_b'], _input, hiddens)) forget = sigmoid( concat_and_multiply(params['forget_b'], _input, hiddens)) ingate = sigmoid( concat_and_multiply(params['ingate_b'], _input, hiddens)) outgate = sigmoid( concat_and_multiply(params['outgate_b'], _input, hiddens)) cells = cells * forget + ingate * change hiddens = outgate * np.tanh(cells) return hiddens, cells
def fitness_notequal_proba(self, x, x1): feature_similarity_score = 1.0 - cdist( x.reshape(1, -1), x1.reshape(1, -1), metric=self.metric).ravel()[0] feature_similarity = sigmoid(feature_similarity_score) y = self.bb_predict_proba(x.reshape(1, -1))[0] y1 = self.bb_predict_proba(x1.reshape(1, -1))[0] # target_similarity_score = np.sum(np.abs(y - y1)) target_similarity_score = 1.0 - cosine(y, y1) target_similarity = 1.0 - sigmoid(target_similarity_score) evaluation = self.alpha1 * feature_similarity + self.alpha2 * target_similarity return evaluation,
def fitness_notequal(self, x, x1): feature_similarity_score = 1.0 - cdist( x.reshape(1, -1), x1.reshape(1, -1), metric=self.metric).ravel()[0] # feature_similarity = feature_similarity_score if feature_similarity_score >= self.eta1 else 0.0 feature_similarity = sigmoid(feature_similarity_score) y = self.bb_predict(x.reshape(1, -1))[0] y1 = self.bb_predict(x1.reshape(1, -1))[0] target_similarity_score = 1.0 - hamming(y, y1) # target_similarity = target_similarity_score if target_similarity_score < self.eta2 else 0.0 target_similarity = 1.0 - sigmoid(target_similarity_score) evaluation = self.alpha1 * feature_similarity + self.alpha2 * target_similarity return evaluation,
def _backpropagation(self, x, y_true): deltas = list() activations = [x] for w, b in zip(self._weights, self._biases): a = sigmoid(w.dot(activations[-1]) + b) activations.append(a) output_delta = (activations[-1] - y_true) * activations[-1] * (1 - activations[-1]) deltas.append(output_delta) for index in range(2, len(activations)): weight_index = 1 - index # = - (index - 1) delta_index = index - 2 tmp1 = self._weights[weight_index].T.dot(deltas[delta_index]) tmp2 = activations[-index] * (1 - activations[-index]) new_delta = tmp1 * tmp2 deltas.append(new_delta) deltas = deltas[::-1] w_gradients = list() b_gradients = list() for index, delta in enumerate(deltas): grad_b = delta grad_w = np.outer(delta, activations[index]) w_gradients.append(grad_w) b_gradients.append(grad_b) return w_gradients, b_gradients
def forward(self): temp = np.vstack((np.ones((1, self._input.shape[1])), self._input)) self._forward_cache_acted = [temp] self._forward_cache_raw = [temp] if not self._constructed: print("use the build method before forwarding.") assert 0 times = len(self._layers) - 1 for i in range(times): # temp = np.vstack((np.ones((1, self._input.shape[1])), temp)) temp = np.dot(self._weights[i], temp) self._forward_cache_raw.append(temp) if not self._activations[i]: pass elif self._activations[i].lower() == 'sigmoid': temp = util.sigmoid(temp) elif self._activations[i].lower() == 'tanh': temp = util.tanh(temp) elif self._activations[i].lower() == 'relu': temp = util.relu(temp) else: print( "Activation function should be None, 'sigmoid', 'tanh' or 'relu'." ) assert 0 self._forward_cache_acted.append(temp) self._predictions = temp return temp
def loss(A, b, x): result = [A] layer_in = A for layer in x: layer_out = util.sigmoid(np.dot(layer_in, layer)) layer_in = layer_out return np.sum(np.linalg.norm(layer_out - b, axis=1))
def target(self, x, t, x0): """ A directed sigmoid function target. Parameters ---------- x: state t: time x0: initial state Returns ------- (x, u, a) """ pose0 = self.model.model_parameters(q=x0[:self.model.nd]) xt0 = self.task.x(pose0) t0 = 1.0 A = 0.3 B = 4 o = np.asmatrix([[0], [0]]) xd, ud, ad = sigmoid(t, t0, A, B) xd = rotate(o, np.asmatrix([[xd], [0]]), self.angle) ud = rotate(o, np.asmatrix([[ud], [0]]), self.angle) ad = rotate(o, np.asmatrix([[ad], [0]]), self.angle) return (np.asmatrix(xt0 + xd), np.asmatrix(ud), np.asmatrix(ad))
def linear_activation_forward(A_prev, W, b, activation): """ Implement the forward propagation for the LINEAR->ACTIVATION layer Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: A -- the output of the activation function, also called the post-activation value cache -- a python dictionary containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently """ if activation == "sigmoid": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation == "relu": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward(A_prev, W, b) A, activation_cache = relu(Z) assert (A.shape == (W.shape[0], A_prev.shape[1])) cache = (linear_cache, activation_cache) return A, cache
def backprop(self, x, y): # Changes in weights and biases deltaWeights = [np.zeros(w.shape) for w in self.weights] deltaBiases = [np.zeros(b.shape) for b in self.biases] # feed forward params = zip(self.weights, self.biases) activation = x activations = [x] zs = [] for w, b in params: z = np.matmul(w, activation) + b zs.append(z) activation = util.sigmoid(z) activations.append(activation) # backprop # delta is dc/da * da/dz = dc/dz for last layer # because dz/db = 1 delta is also equal to dc/db delta = (activations[-1] - y) * util.sigmoid_derivative(zs[-1]) deltaBiases[-1] = delta deltaWeights[-1] = np.matmul(delta, activations[-2].transpose()) for layer in xrange(2, len(self.layerSizes)): z = zs[-layer] sp = util.sigmoid_derivative(z) delta = np.matmul(self.weights[-layer + 1].transpose(), delta) * sp deltaBiases[-layer] = delta deltaWeights[-layer] = np.matmul(delta, activations[-layer - 1].transpose()) return (deltaWeights, deltaBiases, activations[-1])
def forwards(A, x): result = [A] layer_in = A for layer in x: layer_out = util.sigmoid(np.dot(layer_in, layer)) result += [layer_out] layer_in = layer_out return result
def forward(self, X): Z = X.dot(self.W1) + self.b1 Z = relu(Z) A = Z.dot(self.W2) + self.b2 A = sigmoid(A) return A, Z
def train(self): self.W = np.random.rand(self.n_feature + 1) for iter in range(self.n_iter): g = [0.0 for i in range(len(self.W))] for index, d in enumerate(self.data): mu = sigmoid(self.W, d) g = [ x + y for x, y in zip([(mu - label[index]) * z for z in data[index]], g) ] self.W = [x - self.eta * y for x, y in zip(self.W, g)]
def forward(self, x_t): self.t += 1 t = self.t h = self.h[t-1] self.input_gate[t] = sigmoid(np.dot(self.W_hi, h) + np.dot(self.W_xi, x_t) + self.b_i) self.forget_gate[t] = sigmoid(np.dot(self.W_hf, h) + np.dot(self.W_xf, x_t) + self.b_f) self.output_gate[t] = sigmoid(np.dot(self.W_ho, h) + np.dot(self.W_xo, x_t) + self.b_o) self.cell_update[t] = tanh(np.dot(self.W_hj, h) + np.dot(self.W_xj, x_t) + self.b_j) self.c[t] = self.input_gate[t] * self.cell_update[t] + self.forget_gate[t] * self.c[t-1] self.ct[t] = tanh(self.c[t]) self.h[t] = self.output_gate[t] * self.ct[t] self.x[t] = x_t return self.h[t]
def forward(self,X): #Z = relu(X.dot(self.W1)+self.b1) Z = tanh(X.dot(self.W1)+self.b1) # print("Z.shape"+str(Z.shape)) # print("self.W2.shape"+str(self.W2.shape)) # print("self.b2.shape"+str(self.b2.shape)) ret = sigmoid(Z.dot(self.W2)+self.b2) # print("ret.shape"+str(np.array(ret).shape)) return ret, Z
def sample_v_given_h(h, W, b): """ :param v: 2d np.ndarray, (N, n_visible) :param W: 2d np.nadarray, (n_visible, n_hidden) :param b: 1d np.ndarray, (n_visible, ) :return: """ proba = sigmoid(np.matmul(h, W.transpose()) + b) return sample_binomial(proba)
def feedforward(self): for i in xrange(self.num_layers + 1): prev_layer = self.layers[i] for neuron in self.layers[i + 1]: sum = neuron.bias for prev_neuron in prev_layer: sum += self.weights[(prev_neuron, neuron)] * prev_neuron.value neuron.value = util.sigmoid(sum)
def sample_h_given_v(v, W, c): """ :param v: 2d np.ndarray, (N, n_visible) :param W: 2d np.nadarray, (n_visible, n_hidden) :param c: 1d np.ndarray, (n_hidden, ) :return: """ proba = sigmoid(np.matmul(v, W) + c) return sample_binomial(proba)
def _forward_pass(self, x): if len(x) != self._architecture[0]: raise ValueError( "Input dimensions don't correspond to input layer size") values = x for weights, bias in zip(self._weights, self._biases): z = weights.dot(values) + bias values = sigmoid(z) return values
def logistic_graddesc(X, Y, T=1000, learning_rate=10e-2): #Gradient descent for logistic regression ( analytic method ) #It garantees convergence even when a subset of columns of X are multiples # https://deeplearningcourses.com/c/data-science-linear-regression-in-python # https://www.udemy.com/data-science-linear-regression-in-python w = np.random.randn(X.shape[1]) Yh = sigmoid(X.dot(w)) xe = [] for _ in xrange(T): delta = Y - Yh # gradient descent weight update w += learning_rate * X.T.dot(delta) # recalculate Y Yh = sigmoid(X.dot(w)) xe.append(xentropy(Y, Yh)) return w, xe
def getFeatures(self, state, action): features = util.Counter() dir_vec = [] if action == util.Dirs.LEFT: dir_vec = [0, -1] elif action == util.Dirs.RIGHT: dir_vec = [0, 1] elif action == util.Dirs.UP: dir_vec = [-1, 0] elif action == util.Dirs.DOWN: dir_vec = [1, 0] head = util.vectorAdd(state.snake[0], dir_vec) #performs BFS of 'search_size' number of positions to see if head is surrounded by walls search_size = min(pow(max(len(state.snake)/4, 1), 2), int((state.walls[0] * state.walls[1] - len(state.snake)) * 0.75)) remaining_nodes = search_size oldest_bar = (-1, -1) oldest_bar_age = len(state.snake) if head in state.snake or util.outOfBounds(head, state.walls): remaining_nodes = 0 else: head_t = (head[0], head[1]) visited_coords = set(head_t) q = [head_t] for i in range(0,search_size): if q: coord = q.pop(0) else: remaining_nodes = i break for neighbor in self.getNeighbors(coord): if state.board[neighbor[0]][neighbor[1]] > 0 and state.board[neighbor[0]][neighbor[1]] < oldest_bar_age: oldest_bar_age = state.board[neighbor[0]][neighbor[1]] oldest_bar = neighbor if util.outOfBounds(neighbor, state.walls) == False and state.board[neighbor[0]][neighbor[1]] < util.manhattanDist(neighbor, head) and neighbor not in visited_coords: q.append(neighbor) visited_coords.add(neighbor) len_bin = int(util.sigmoid(len(state.snake))) trapped = remaining_nodes < search_size #Whether or not snake is touching a wall or itself features["on-barrier"] = state.board[head[0]][head[1]] > 0 or util.outOfBounds(head, state.walls) #Distance to oldest snake segment; only active if snake is trapped by itself features["dist-oldest"] = 1.0 / pow(util.euclidDist(head, oldest_bar), 2) if trapped else 0 #Distance to food features["dist-food"] = pow(util.manhattanDist(head, state.food) / float(state.walls[0] + state.walls[1]), .33) #Indicated whether current "trapped" state is escapable under perfect circumstances features["trapped"] = oldest_bar_age > remaining_nodes if trapped else 0 return features
def forward(self, inputs, targets, h_prev, c_prev): # Forward pass of lstm cache = defaultdict(lambda : defaultdict(np.float64)) cache['h'][-1] = np.copy(h_prev) cache['c'][-1] = np.copy(c_prev) loss = 0 for t in xrange(len(inputs)): cache['i'][t] = sigmoid(np.dot(self.Wix, inputs[t]) + np.dot(self.Wih, cache['h'][t-1]) + self.bi) # i(t) cache['o'][t] = sigmoid(np.dot(self.Wox, inputs[t]) + np.dot(self.Woh, cache['h'][t-1]) + self.bo) # o(t) cache['f'][t] = sigmoid(np.dot(self.Wfx, inputs[t]) + np.dot(self.Wfh, cache['h'][t-1]) + self.bf) # f(t) cache['g'][t] = np.tanh(np.dot(self.Wgx, inputs[t]) + np.dot(self.Wgh, cache['h'][t-1]) + self.bg) # g(t) cache['c'][t] = cache['g'][t] * cache['i'][t] + cache['c'][t-1] * cache['f'][t] # c(t) cache['h'][t] = cache['c'][t] * cache['o'][t] # h(t) cache['y'][t] = np.dot(self.Why, cache['h'][t]) + self.by # unnormalized log probabilities cache['p'][t] = np.exp(cache['y'][t]) / np.sum(np.exp(cache['y'][t])) # softmax for prediction loss += -np.log(cache['p'][t][np.argmax(targets[t])]) return cache, loss
def predict_one_vs_all(all_theta, _X): m = _X.shape[0] X = np.hstack((np.ones((m, 1)), _X)) theta0 = all_theta[0] p = sigmoid(np.dot(X, theta0)) print p.shape print p p = np.dot(X, all_theta.T) return np.array(map(max_index, p))
def forward(self, x): # 请从参数字典获取网络参数 w1, b1 = self.params['W1'], self.params['b1'] w2, b2 = self.params['W2'], self.params['b2'] # 实现第一层的运算 z1 = np.dot(x, w1) + b1 h1 = sigmoid(z1) # 请实现第二层的运算 z2 = np.dot(h1, w2) + b2 return softmax(z2)
def predict(self, x): """Make a prediction given new inputs x. Args: x: Inputs of shape (m, n). Returns: Outputs of shape (m,). """ # *** START CODE HERE *** return util.sigmoid(x.dot(self.theta))
def forward(self, inputs, return_activations = False): # inputs specifies one float for each neuron in the first layer # if return_activations is true, we return a list of activations at each layer # otherwise, we only return the output layer values = numpy.array([inputs], float).T # values in current layer activations = [{'activations': values}] for layer in self.layers[1:]: if layer['type'] == 'sigmoid': # compute the weighted sum of neuron inputs, plus bias term (for each neuron in current layer) z_vector = numpy.dot(layer['weights'], values) + layer['bias'] # apply sigmoid activation function values = util.sigmoid(z_vector) if return_activations: activations.append({'activations': values[:, 0]}) elif layer['type'] == 'softmax': # compute the weighted sum of neuron inputs, plus bias term (for each neuron in current layer) z_vector = numpy.dot(layer['weights'], values) + layer['bias'] # apply softmax values = numpy.exp(z_vector - numpy.max(z_vector)) values = values / numpy.sum(values) if return_activations: activations.append({'activations': values[:, 0]}) elif layer['type'] == 'convsample': # carry out convolution to get convolved values convolved_out = numpy.zeros((layer['k'], layer['conv_count'], layer['conv_count'])) for k in xrange(len(layer['weights'])): convolved_out[k] = scipy.signal.convolve2d(values.reshape(layer['m'], layer['m']), numpy.rot90(layer['weights'][k], 2), 'valid') convolved_out[k] = util.sigmoid(convolved_out[k] + layer['bias'][k]) # pool the convolved features pooled = numpy.zeros((layer['k'], layer['o'], layer['o'])) for k in xrange(layer['k']): for i in xrange(layer['o']): for j in xrange(layer['o']): pooled[k][i][j] = numpy.average(convolved_out[k, (i * layer['p']):((i + 1) * layer['p']), (j * layer['p']):((j + 1) * layer['p'])]) values = pooled.reshape(util.prod(pooled.shape), 1) if return_activations: activations.append({'activations': values[:, 0], 'extra': convolved_out}) if return_activations: return activations else: return values[:, 0]
def cost_function(theta, X, y, lambda_=0): m = y.size o = sigmoid(np.dot(X, theta)) J = (-1./m) * (np.dot(y.T, np.log(o)) + np.dot((1.-y).T, np.log(1.-o))) \ + (lambda_/(2.*m)) * np.dot(theta[1:], theta[1:]) grad = (1. / m) * np.dot(X.T, o - y) grad[1:] += (lambda_ / m) * theta[1:] return J, grad
def forwardPass(self, x): """ Given x, computes the outputs of the Network Parameters ---------- x : example Returns ------- nothing """ layer = self.input_layer layer.xs[0] = 1 # bias term layer.xs[1:] = x for i in range(self.n_hidden_layers): next_layer = self.hidden_layers[i] next_layer.xs[0] = 1 # bias term next_layer.xs[1:] = sigmoid(np.dot(layer.weights.transpose(), layer.xs)) layer = next_layer self.output_layer.xs = sigmoid(np.dot(layer.weights.transpose(), layer.xs))
def predict_proba( self, X ): """ Returns the probability of class being 1 @params: X: observations to predict of second axis m """ if self.add_bias: # add the bias term bias_term = np.ones( ( X.shape[0], 1 ) ) X = np.append( bias_term, X, axis = 1 ) return sigmoid( np.dot( X, self.w_estimated ) )
def predict(self, x): """Make a prediction given new inputs x. Args: x: Inputs of shape (m, n). Returns: Outputs of shape (m,). """ # *** START CODE HERE *** temp = util.sigmoid(np.dot(x, self.theta) + self.theta_0) return temp
def predict_proba(self, X): """ Returns the probability of class being 1 @params: X: observations to predict of second axis m """ if self.add_bias: # add the bias term bias_term = np.ones((X.shape[0], 1)) X = np.append(bias_term, X, axis=1) return sigmoid(np.dot(X, self.w_estimated))
def sample_data(self, ss, hps): """ NOTE THIS ONLY SAMPLES FROM THE PARAM P and not from suffstats. """ d = np.random.exponential(hps['mu_hp']) p = util.sigmoid(d, ss['mu'], ss['lambda']) p = p * (hps['p_max'] - hps['p_min']) + hps['p_min'] link = np.random.rand() < p x = np.zeros(1, dtype=self.data_dtype()) x[0]['distance'] = d x[0]['link'] = link return x[0]
def mlp_decode(z, phi, tanh_scale=10., sigmoid_output=True): nnet_params, ((W_mu, b_mu), (W_sigma, b_sigma)) = phi[:-2], phi[-2:] z = z if z.ndim == 3 else z[:,None,:] # ensure z.shape == (T, K, n) nnet = compose(tanh_layer(W, b) for W, b in nnet_params) mu = linear_layer(W_mu, b_mu) log_sigmasq = linear_layer(W_sigma, b_sigma) nnet_outputs = nnet(np.reshape(z, (-1, z.shape[-1]))) mu = sigmoid(mu(nnet_outputs)) if sigmoid_output else mu(nnet_outputs) log_sigmasq = tanh_scale * np.tanh(log_sigmasq(nnet_outputs) / tanh_scale) shape = z.shape[:-1] + (-1,) return mu.reshape(shape), log_sigmasq.reshape(shape)
def __init__(self, rng, data, W=None, b=None, filter_h=2, filter_num=50, k=300): """ :param data: a 3D tensor (sentence number, sentence length, word vector size). :param W: a matrix (filter_num, word vector size) :param filter_h: converlution operation window size. :param filter_num: the feature map number of each converlution window size. So the total feature maps are `filter_num`, which is also the size of the new vector representation of the sentence. """ if W is None: W = np.asarray(rng.uniform(size=(filter_num, k * filter_h)), dtype=theano.config.floatX ) self.W = theano.shared(value=W, name='W', borrow=True) # initialize the biases b as a vector of n_out 0s if b is None: b = np.asarray(rng.uniform( size=(filter_num,)), dtype=theano.config.floatX ) self.b = theano.shared(value=b, name='b', borrow=True) X_h, X_w = data.shape[1], data.shape[2] idx_range = T.arange(X_h - filter_h + 1) self.window_results, updates = theano.scan(fn=lambda i, X, filter_h: T.flatten(data[:, i: i + filter_h], outdim=2), sequences=idx_range, outputs_info=None, non_sequences=[data, filter_h] ) self.window_results = T.transpose(self.window_results, axes=(1, 0, 2)) c = sigmoid(T.dot(self.window_results, self.W.T) + self.b) # max pooling c_max = T.max(c, axis=1) self.c = c # c_max (sentence number, filter_num) self.c_max = c_max self.params = [self.W, self.b]
def cross_validation(X_train, y_train, params, X_test=None, verbose_eval=False): NUM_BOOST_ROUND = 1000 best_iterations = [] train_scores = [] valid_scores = [] y_preds = [] kf = KFold(y_train.shape[0], n_folds=5, shuffle=True, random_state=12345) for train_index, valid_index in kf: _X_train, _X_valid = X_train.ix[train_index], X_train.ix[valid_index] _y_train, _y_valid = y_train[train_index], y_train[valid_index] dtrain = xgb.DMatrix(_X_train, _y_train) dvalid = xgb.DMatrix(_X_valid, _y_valid) watchlist = [(dtrain, 'train'), (dvalid, 'eval')] bst = xgb.train(params, dtrain, NUM_BOOST_ROUND, evals=watchlist, early_stopping_rounds=200, verbose_eval=verbose_eval) # best iterations and valid score best_iterations.append(bst.best_iteration + 1) valid_scores.append(bst.best_score) if X_test is not None: dtest = xgb.DMatrix(X_test) y_pred = bst.predict(dtest, ntree_limit=bst.best_iteration) y_preds.append(y_pred) y_pred = util.sigmoid(np.mean(util.logit(np.array(y_preds)), axis=0)) result = {"best-iterations": best_iterations, "best-iteration": np.mean(best_iterations), "valid-score": np.mean(valid_scores), "valid-scores": valid_scores, "y_pred": y_pred, "y_preds": y_preds} return result
def gaussian_mean(inputs, sigmoid_mean=False): mu_input, sigmasq_input = np.split(inputs, 2, axis=-1) mu = sigmoid(mu_input) if sigmoid_mean else mu_input sigmasq = log1pexp(sigmasq_input) return make_tuple(mu, sigmasq)
def forward_propagation(self): return util.sigmoid(np.dot(self.a, self.theta))
sentence = "the digit 1 is on the left of the digit 0 ." y, words = sent2matrix(sentence, dictionary) y = np.int32(np.repeat(y, 100, axis=0)) print y.shape height = int(math.sqrt(dimX)) width = int(math.sqrt(dimX)) draw.height = height draw.width = width rvae = ReccurentAttentionVAE(dimY, dimLangRNN, dimAlign, dimX, dimReadAttent, dimWriteAttent, dimRNNEnc, dimRNNDec, dimZ, runSteps, batch_size, reduceLRAfter, data, labels, pathToWeights=args.weights) ct_s, write_attent_params, alphas, _, _ = rvae.sample_from_prior(runSteps, y) ct_s = sigmoid(ct_s) dimension = int(math.sqrt(dimX)) print dimension, dimension most_used = np.float32(np.zeros((y.shape[1]))) for i in xrange(runSteps): total_image = np.zeros((dimension*10,dimension*10)) for j in xrange(100): c = ct_s[i,j,:].reshape([dimension,dimension]) row = j/10 column = j%10 total_image[(row*dimension):((row+1)*dimension),(column*dimension):((column+1)*dimension)] = c[:][:]
def decode(X): return sigmoid(mu(nnet(X)))
def forward(X, W, b): return sigmoid(X.dot(W) + b)
def _sigmoid(self,gain): for i in xrange(len(gain)): gain[i]=sigmoid(gain[1],1,2,self._gain)
# Calculate cross-entropy error for random weights, and for closed-form solution to bayes classifier import numpy as np from util import sigmoid, cross_entropy N = 100 D = 2 means = np.array(((-2,-2), (2,2))) covar = np.eye(2) # Artifically create 2 classes, center first 50 points at (-2,-2), last 50 at (2,2) X = np.random.randn(N, D) X[:N//2, :] = X[:N//2, :] + means[0] * np.ones((N//2,D)) X[N//2:, :] = X[N//2:, :] + means[1] * np.ones((N//2,D)) Xb = np.concatenate((np.ones((N, 1)), X), axis=1) # Class labels, first 50 are 0, last 50 are 1 T = np.concatenate((np.zeros((N//2,)), np.ones((N//2,)))) # Random weights w = np.random.randn(D+1) Y = sigmoid(Xb @ w) print('Random weights:', cross_entropy(T, Y)) # Closed form Bayes solution w = ((means[1, None] - means[0, None]) @ np.linalg.inv(covar)).T w = np.concatenate(((0,), w.reshape(D))) # Add weight for bias Y = sigmoid(Xb @ w) print('Closed form solution:', cross_entropy(T, Y))
def forward(self, X): # Z = relu(X.dot(self.W1) + self.b1) Z = np.tanh(X.dot(self.W1) + self.b1) return sigmoid(Z.dot(self.W2) + self.b2), Z
def forward(self, X): return sigmoid(X.dot(self.W) + self.b)
def value(self, params, ex): return util.sigmoid(self.score_fxn.value(params, ex))
def _updateOnMask(self, mask): self._network.units[mask] = sigmoid(self._network.gaps[mask]) > numpy.random.random(len(mask))