def feedforward(self, s_inst, s_trans): y_r = act.sigmoid(s_inst, self.V_r) if self.H_r != 0: y_h_r = act.sigmoid(y_r, self.W_r) else: y_h_r = None y_m, self.cumulative_memory = act.sigmoid_acc_leaky( s_trans, self.V_m, self.cumulative_memory, self.memory_leak) if self.H_m != 0: y_h_m = act.sigmoid(y_m, self.W_m) else: y_h_m = None if self.H_r != 0 and self.H_m != 0: y_tot = np.concatenate((y_h_r, y_h_m), axis=1) W_tot = np.concatenate((self.W_h_r, self.W_h_m), axis=0) elif self.H_r == 0 and self.H_m != 0: y_tot = np.concatenate((y_r, y_h_m), axis=1) W_tot = np.concatenate((self.W_r, self.W_h_m), axis=0) elif self.H_r != 0 and self.H_m == 0: y_tot = np.concatenate((y_h_r, y_m), axis=1) W_tot = np.concatenate((self.W_h_r, self.W_m), axis=0) else: y_tot = np.concatenate((y_r, y_m), axis=1) W_tot = np.concatenate((self.W_r, self.W_m), axis=0) #print(y_tot) Q = act.linear(y_tot, W_tot) return y_r, y_m, Q, y_h_r, y_h_m
def step(self, input_: np.ndarray) -> np.ndarray: # update gate update_gate_input = np.dot(self.params["Wz"], input_) update_gate_hidden = np.dot(self.params["Uz"], self.hidden_state) update_gate = sigmoid(update_gate_input + update_gate_hidden + self.params["bz"]) # reset gate reset_gate_input = np.dot(self.params["Wr"], input_) reset_gate_hidden = np.dot(self.params["Ur"], self.hidden_state) reset_gate = sigmoid(reset_gate_input + reset_gate_hidden + self.params["br"]) # hidden state proposal proposal_input = np.dot(self.params["Wh"], input_) proposal_hidden = np.dot(self.params["Uh"], (self.hidden_state * reset_gate)) proposal = tanh(proposal_input + proposal_hidden + self.params["bh"]) # new hidden state self.hidden_state = ( (1 - update_gate) * proposal) + (update_gate * self.hidden_state) return self.hidden_state
def predict(layers, df): for idx, row in df.iterrows(): for i, neuron in zip(range(row.count() - 1), layers[0]): neuron.set_val(RGB_to_int(*row[i])) for neuron in layers[1]: sigmoid_ = 0 for n, weight in neuron.get_prev_layer().items(): sigmoid_ += n.get_val() * weight neuron.set_val(sigmoid(sigmoid_, 2)) for neuron in layers[2]: sigmoid_ = 0 for n, weight in neuron.get_prev_layer().items(): sigmoid_ += n.get_val() * weight neuron.set_val(sigmoid(sigmoid_, 2)) highest, val = -1, 0 i = 0 for neuron in layers[3]: sigmoid_ = 0 for n, weight in neuron.get_prev_layer().items(): sigmoid_ += n.get_val() * weight observed = sigmoid(sigmoid_, 2) if observed > highest: highest = observed val = i i += 1 print("=========================") print("Prediction: " + str(val)) print("Actual: " + str(row["dr"]))
def forward(self, inputs): # init first hidden state h = np.zeros((self.W_f.shape[0], 1)) # store inputs for reference self.last_inputs = inputs # history of hiddent states self.last_hs = {0: h} for i, x in enumerate(inputs): # concatenate inputs and last hidden state z = np.hstack((h, x)) # forget gate f_t = sigmoid(self.W_f @ z + self.b_f) i_t = sigmoid(self.W_i @ z + self.b_i) c_t = np.tanh(self.W_c @ z + self.b_c) self.C_t = f_t * self.C_t + i_t * c_t o_t = sigmoid(self.W_o @ z + self.b_o) h = o_t * np.tanh(self.C_t) # calculate new hidden state self.last_hs[i + 1] = h # save the hidden state # logits y = h @ self.W_y + self.b_y return y, h
def compute_layer_and_cache(self, X): X = self._assert_numpy(X) # forward propagate, saving information as we go F_arg = numpy.dot(self.W_f, X) + numpy.dot(self.U_f, self.h_t) + self.b_f F = af.sigmoid(F_arg) I_arg = numpy.dot(self.W_i, X) + numpy.dot(self.U_i, self.h_t) + self.b_i I = af.sigmoid(I_arg) C_arg = numpy.dot(self.W_c, X) + numpy.dot(self.U_c, self.h_t) + self.b_c C = self.afuncs[0](C_arg) self.S = numpy.multiply(F, self.S) + numpy.multiply(I, C) Hf_arg = numpy.dot(self.W_hf, X) + numpy.dot(self.U_hf, self.h_t) + self.b_hf Hf = af.sigmoid(Hf_arg) self.h_t = numpy.multiply(Hf, self.afuncs[1](self.S)) O_arg = numpy.dot(self.W_o, self.h_t) + self.b_o O = self.afuncs[2](O_arg) return F_arg, F, I_arg, I, C_arg, C, Hf_arg, Hf,\ numpy.array(self.h_t), O_arg, O, numpy.array(self.S)
def step(self, input_: np.ndarray) -> np.ndarray: # forget gate forget_gate_input = np.dot(self.params["Wf"], input_) forget_gate_hidden = np.dot(self.params["Uf"], self.hidden_state) forget_gate = sigmoid(forget_gate_input + forget_gate_hidden + self.params["bf"]) # input gate input_gate_input = np.dot(self.params["Wi"], input_) input_gate_hidden = np.dot(self.params["Ui"], self.hidden_state) input_gate = sigmoid(input_gate_input + input_gate_hidden + self.params["bi"]) # output gate output_gate_input = np.dot(self.params["Wo"], input_) output_gate_hidden = np.dot(self.params["Uo"], self.hidden_state) output_gate = sigmoid(output_gate_input + output_gate_hidden + self.params["bo"]) # cell state cell_state_input = np.dot(self.params["Wc"], input_) cell_state_hidden = np.dot(self.params["Uc"], self.hidden_state) cell_state = tanh(cell_state_input + cell_state_hidden + self.params["bc"]) self.cell_state = (forget_gate * self.cell_state) + (input_gate * cell_state) self.hidden_state = output_gate * tanh(self.cell_state) return self.hidden_state, self.cell_state
def test_sigmoid(self): self.assertEqual(sigmoid(0), 0.5) self.assertGreater(sigmoid(100), .99) self.assertLess(sigmoid(-100), .01) Z = np.array([1, 2, 3]) expected = np.array([0.73105858, 0.88079708, 0.95257413]) self.assertTrue(np.allclose(sigmoid(Z), expected))
def forward(self, x, a_prev, c_prev): self.gamma_f = sigmoid(np.dot(self.w_f, np.concatenate([a_prev, x])) + self.b_f) self.gamma_u = sigmoid(np.dot(self.w_u, np.concatenate([a_prev, x])) + self.b_u) self.gamma_o = sigmoid(np.dot(self.w_o, np.concatenate([a_prev, x])) + self.b_o) self.c_ = np.tanh(self.w_c * np.concatenate([a_prev, x]) + self.b_c) self.c = self.gamma_f * c_prev + self.gamma_u * self.c_ self.a = self.gamma_o * np.tanh(self.c) self.y = softmax(np.dot())
def decode(self, y_m): y_h = act.sigmoid(y_m,self.hidden_weights) decode_output = act.sigmoid(y_h, self.W_dec) #decode_output = act.softmax(y_output) return decode_output, y_h
def predict(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = softmax(a3) return y
def linear_activation_forward_with_dropout(A_prev, W, b, activation, keep_prob=0.5): # Linear forward step Z, linear_cache = linear_forward(A_prev, W, b) # Activation forward step if activation == 'relu': A, activation_cache = relu(Z) # Implementing dropout D = np.random.rand(A.shape[0], A.shape[1]) D = (D < keep_prob).astype( int ) # convert entries of D to 0 or 1 (using keep_prob as the threshold) A = A * D # shut down some neurons of A A = np.divide( A, keep_prob ) # scale the value of neurons that haven't been shut down cache = (linear_cache, activation_cache, D) elif activation == 'sigmoid': A, activation_cache = sigmoid(Z) cache = (linear_cache, activation_cache, None) return A, cache
def linear_activation_forward(A_prev, W, b, activation_function): """ Implement the forward propagation for the LINEAR->ACTIVATION layer Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu" Returns: A -- the output of the activation function, also called the post-activation value cache -- a python dictionary containing "linear_cache" and "activation_cache"; stored for computing the backward pass efficiently """ if activation_function == "sigmoid": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward_propagation(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation_function == "relu": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = linear_forward_propagation(A_prev, W, b) A, activation_cache = relu(Z) assert (A.shape == (W.shape[0], A_prev.shape[1])) cache = (linear_cache, activation_cache) return A, cache
def memory_dynamics(self, k_r, k_w, w_r, w_u, w_w): r = np.zeros((self.M, self.n)) # still from previous time step w_u_threshold = np.sort(w_u, axis=None)[self.n - 1] if np.sum(np.where(w_u <= w_u_threshold, 1, 0)) == self.n: w_lu = np.where(w_u <= w_u_threshold, 1, 0) else: w_lu = np.where(w_u < w_u_threshold, 1, 0) missed = self.n - np.sum(np.where(w_u < w_u_threshold, 1, 0)) pos = np.where(w_u == w_u_threshold) for m in np.arange(missed): w_lu[pos[0][m]] = 1 if np.sum(np.where(w_lu == 1, 1, 0)) != self.n: print('ERROR in least usage vector') #print(w_lu) # current time step interp_value = act.sigmoid(self.alpha) for head in np.arange(self.n): #print('-- HEAD ',head) #print(np.shape(w_w[:,head:(head+1)])) #print(np.shape(w_r[:,head:(head+1)])) #print(np.shape(w_lu)) w_lu_head = np.zeros((self.N, 1)) w_lu_head[np.where(w_lu == 1)[0][head], 0] = 1 w_w[:, head:(head + 1)] = interp_value * w_r[:, head:( head + 1)] + (1 - interp_value) * w_lu_head w_r[:, head:(head + 1)] = self.read_weights(k_r[:, head:(head + 1)]) r[:, head:(head + 1)] = np.dot(np.transpose(self.MEMORY), w_r[:, head:(head + 1)]) for head in np.arange(self.n): # write key to memory according to write weights self.MEMORY += np.dot(w_w[:, head:(head + 1)], np.transpose(k_w[:, head:(head + 1)])) w_u = self.gamma * w_u + np.reshape(np.sum(w_r, axis=1), (-1, 1)) + np.reshape( np.sum(w_w, axis=1), (-1, 1)) #print('Write weights:\n',np.transpose(w_w)) #print('Read weights:\n',np.transpose(w_r)) #print('Usage weights:\n',np.transpose(w_u)) #print('Memory:\n',self.MEMORY) return r, w_r, w_u, w_w
def output_gate(self, x: np.ndarray): """ output gate of the LSTM layer :param x: :return: """ return sigmoid(np.dot(self.W_output.value, x) + self.b_output.value)
def forward(self, inputs): assert inputs.shape == (inputs, 1) s = np.matmul(np.transpose(weights), inputs) if activationf == 'Relu': self.activation = act.relu(s) else: self.activation = act.sigmoid(s)
def linear_activation_forward(A_prev, W, b, activation): ''' Implementa la propagación hacia adelante Linear->Activación de una capa Arguments: A_prev -- Activación de la capa anterior, de dimensiones (tamaño de la capa anterior, número de ejemplos) W -- Pesos de la capa actual, de dimensiones (tamaño de la capa actual, tamaño de la capa anterior) b -- sesgos de la capa actual, de dimensiones (tamaño de la capa actual, 1) activation -- string con el nombre de la activación que será usada en esta capa: "Relu", "Sigmoid" Returns: A -- Activación de la capa actual, de dimensiones (tamaño de la capa actual, número de ejemplos) cache -- python tupla que contiene el cache linear y de la activación ''' if activation == "Sigmoid": Z, linear_c = linear_forward(A_prev, W, b) A, activacion_c = sigmoid(Z) elif activation == "Relu": Z, linear_c = linear_forward(A_prev, W, b) A, activacion_c = relu(Z) cache = (linear_c, activacion_c) return A, cache
def _forward_prop(self, x): self._activations[0] = x for i in range(1, self.num_layers): self._zs[i] = ( self.weights[i].dot(self._activations[i - 1]) + self.biases[i] ) self._activations[i] = sigmoid(self._zs[i])
def _x_given_h(self, h): """ Gives the probability vector of each x being 1 given h :param h: batch x hidden_dim: The observed h :return x: batch x in_dim: The probability of each x = 1 given h """ return A.sigmoid(np.dot(h, self.W.data.transpose()) + self.c.data)
def _h_given_x(self, x): """ Gives the probability vector of each hidden being 1 given x :param x: batch x in_dim: The observed x :return h: batch x hidden_dim: The probability of each h = 1 given x """ return A.sigmoid(np.dot(x, self.W.data) + self.b.data)
def activation_forward(self, Z, activation="tanh"): if activation is "tanh": return [Z, tanh(Z)] if activation is "relu": return [Z, relu(Z)] if activation is "sigmoid": return [Z, sigmoid(Z)]
def softmax(self, input): input = sigmoid(self, input) result = [] for i in input: result.append(np.exp(i) / np.sum(np.exp(i))) return np.array(result)
def forward_propagation(self): """ Computes: Z1 - result of linear function with input X A1 - result of applying tanh activation function to Z1 Z2 - result of linear function with input A1 A2 - result of applying sigmoid activation function to Z2 (Used for backward propagation/gradient descent) Return: cache - dict contains Z1, A1, Z2, A2 """ #Hidden Layer Z1 = np.dot(self.W1, self.X) + self.b1 A1 = np.tanh(Z1) #Output Layer Z2 = np.dot(self.W2, A1) + self.b2 A2 = sigmoid(Z2) #Check Dimensions assert Z1.shape == (self.n_h, self.m) assert A1.shape == (self.n_h, self.m) assert Z2.shape == (self.n_y, self.m) assert A2.shape == (self.n_y, self.m) #Cache dictionary cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2} return cache
def backprop(self, x, y): # initialize empty arrays to represent the change in the weights and biases delta_b = [np.zeros(b.shape) for b in self.biases] delta_w = [np.zeros(w.shape) for w in self.weights] # Feedforward a = x activations = [x] products = [] for i in range(self.num_layers - 1): # z^l = w^l a^l-1 + b^l z = np.dot(self.weights[i], a) + self.biases products.append(z) a = act.sigmoid(z) activations.append(a) # Output Error delta = (activations[-1] - y) * act.sigmoid_derivative(products[-1]) delta_b[-1] = delta delta_w[-1] = np.dot(delta, activations[-2].transpose()) # Backprop Error errors = [] for l in range(self.num_layers - 2, 0): delta = np.dot(self.weights[l + 1].transpose(), delta) * act.sigmoid_derivative(products[l]) delta_b[l] = delta delta_w[l] = np.dot(delta, activations[l - 1]) return (delta_b, delta_w)
def linear_activation_forward(self, A_prev, W, b, activation): if activation == "sigmoid": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache" Z, linear_cache = self.linear_forward(A_prev, W, b) A, activation_cache = activations.sigmoid(Z) elif activation == "relu": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = self.linear_forward(A_prev, W, b) A, activation_cache = activations.relu(Z) elif activation == "softmax": # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z, linear_cache = self.linear_forward(A_prev, W, b) A, activation_cache = activations.softmax(Z) elif activation == "euler": Z, linear_cache = self.linear_forward(A_prev, W, b) A, activation_cache = activations.euler(Z) assert (A.shape == (W.shape[0], A_prev.shape[1])) cache = (linear_cache, activation_cache) return A, cache
def forget_gate(self, x: np.ndarray): """ froget gate of the LSTM layer :param x: :return: """ return sigmoid(np.dot(self.W_forget.value, x) + self.b_forget.value)
def partial_fit(self, x, y): # activation A = [x] # feedforward out = A[-1] for i in range(0, len(self.layers) - 1): out = sigmoid(out.dot(self.W[i]) + (self.B[i].T)) A.append(out) # backpropagation dA = [-(y/A[-1] - (1 - y)/(1 - A[-1]))] dW = [] dB = [] for i in reversed(range(0, len(self.layers) - 1)): dw = A[i].T.dot(dA[-1] * sigmoid_derivative(A[i+1])) db = (np.sum(dA[-1] * sigmoid_derivative(A[i+1]), 0)).reshape(-1, 1) da = dA[-1] * sigmoid_derivative(A[i+1]).dot(self.W[i].T) dW.append(dw) dB.append(db) dA.append(da) # reverse dW = dW[::-1] dB = dB[::-1] # gradient descent for i in range(0, len(self.layers) - 1): self.W[i] -= self.alpha * dW[i] self.B[i] -= self.alpha * dB[i]
def feedforward(self, s_inst, s_trans): y_r = act.sigmoid(s_inst, self.V_r) g = act.softmax(s_inst, self.W_g, self.g_strength, self.level_bias) g = np.transpose(g) l_sel = self.select_level(g) y_m = np.zeros((self.L, 1, self.M)) for l in np.arange(self.L): if l == l_sel: y_m[l, :, :], self.cumulative_memory[ l, :, :] = act.sigmoid_acc_leaky( s_trans, self.V_m[l, :, :], self.cumulative_memory[l, :, :], self.LEAK[l, 0, 0], g[l, 0]) else: self.cumulative_memory[l, :, :] *= self.LEAK[l, 0, 0] y_m[l, :, :] = act.sigmoidal(self.cumulative_memory[l, :, :]) print('\t\t\t\t MEMORY_LEVEL ', l, '\t ', y_m[l, :, :]) inp_h = np.zeros((1, self.H)) for l in np.arange(self.L): inp_h = act.linear(y_m[l, :, :], self.W_m[l, :, :]) y_h = act.sigmoidal(inp_h) Q = act.linear(y_r, self.W_r) + act.linear(y_h, self.W_h) return y_r, y_m, y_h, g, l_sel, Q
def predict_lg_labels(weights, data): """generates class predictions given weights, and a test data matrix.""" y_pred = sigmoid(np.dot(data, weights)) y_pred[np.where(y_pred <= 0.5)] = 0 y_pred[np.where(y_pred > 0.5)] = 1 return y_pred
def classify(self, X): prediction = np.dot(X, self.weights).T prediction = sigmoid(prediction) actual_predictions = np.zeros((1, X.shape[0])) for i in range(prediction.shape[1]): if prediction[0][i] > 0.5: actual_predictions[0][i] = 1 return actual_predictions
def activationFunction(self, z): if self.activ == Activations.SIGMOID.value: return actvtn.sigmoid(z) elif self.activ == Activations.SOFTMAX.value: return actvtn.softmax(z) elif self.activ == Activations.TANH.value: return actvtn.tanh(z) else: return z
def backpropagate(self,z, train_out): z_list = [] activations_list = [] del_bias = [numpy.zeros(b.shape) for b in self.bias] del_weights = [numpy.zeros(w.shape) for w in self.weights] #print 'feed forwarding in back propagation.....' activations_list.append(z) for w, b in zip(self.weights, self.bias): z = numpy.dot(w, z) + b z_list.append(z) activation = sigmoid(z) activations_list.append(activation) #print 'feed forwarding done.....' #print 'calculating gradient and rho.....' #print activations_list[-1] - train_out gradient_c_wrt_a = activations_list[-1] - train_out #'sd:' , sigmoid_derivative(z_list[-1]) rho = gradient_c_wrt_a * sigmoid_derivative(z_list[-1]) #print 'calculated gradient and rho.....' #print activations_list #print 'rho:', rho del_bias[-1] = rho del_weights[-1] = numpy.dot(rho, activations_list[-2].transpose()) # print 'back propagating the error.....' for l in range(2, self.layers): k = z_list[-l] sd = sigmoid_derivative(k) rho = numpy.dot(self.weights[-l+1].transpose(), rho) * sd del_bias[-l] = rho del_weights[-l] = numpy.dot(rho, activations_list[-l-1].transpose()) #print 'back propagation completed.....' #print ' debug:',del_bias #print 'debug2', del_weights #print 'done' return del_bias, del_weights
def _forward(self, x_t, h_prev, activations=None): """ activations - list of activations for wach step( all is three steps) """ z = sigmoid(T.dot(x_t, self.Wh) + self.bh + T.dot(h_prev, self.Uh)) r = sigmoid(T.dot(x_t, self.Wx) + self.bx + T.dot(h_prev, self.Ux)) h = T.tanh(T.dot(x_t, self.Wz) + self.bz + T.dot(r * h_prev, self.Uz)) return (1 - z) * h_prev + z * h
def feedingforward(self, A): for w, b in zip(self.weights, self.bias): A = numpy.dot(w, A) + b A = sigmoid(A) return A