def classifier(self, feature_vector, a, t): '''Is this all I need? I think I know how to do this one at least''' #a = np.log(np.linalg.det(self.var_class1))-np.log(np.linalg.det(self.var_class2))#-> Ideally want to do this but due to overflow using log difference norm_const = 1e9 #Introducing a normalization constant to take care of overflow/underflow b = np.matmul( np.transpose(feature_vector - self.mean_class1), np.linalg.inv(self.var_class1) @ (feature_vector - self.mean_class1)) c = np.matmul( np.transpose(feature_vector - self.mean_class2), np.linalg.inv(self.var_class1) @ (feature_vector - self.mean_class2)) # b = np.matmul(np.transpose(feature_vector-self.mean_class1),(feature_vector-self.mean_class1)) # c = np.matmul(np.transpose(feature_vector-self.mean_class2),(feature_vector-self.mean_class2)) # print(act.sigmoid((a + b - c) / norm_const)) #print(a) #print(type(a)) #print(c) if act.sigmoid((a + b - c) / norm_const) <= t: return 1, act.sigmoid((a + b - c) / norm_const) else: return 0, act.sigmoid((a + b - c) / norm_const)
def MOGBinClassifier(x, mu1, mu2, sigma1, sigma2, prior1, prior2, t=0.5): ''' Parameters: x: the input data is a numpy array of shape (featurelength,datapoints) model is evaluated on (1200,1000) mu1,mu2: The mean of the classes which needs to be classified, shape->(1200,k=number of clusters) sigma1,sigma2: the variance of the calsses which needs to be classified prior1,prior2 are the priors of the MOG for classes to be classified Return: returns a lsit and a numoy array ''' p1 = np.ndarray((x.shape[1], prior1.shape[1])) #np.zeros((prior1.shape)) p2 = np.ndarray((x.shape[1], prior1.shape[1])) print(prior1.shape) print(prior1.shape[0]) for i in range(prior1.shape[1]): p1[:, i] = np.nan_to_num( (np.log(prior1[0, i] + realmin) + g.logGaussPdf(x, mu1[:, i].reshape(mu1.shape[0], 1), sigma1[:, :, i]))).reshape((p1.shape[0])) for i in range(prior2.shape[1]): p2[:, i] = np.nan_to_num( (np.log(prior2[0, i] + realmin) + g.logGaussPdf(x, mu2[:, i].reshape(mu2.shape[0], 1), sigma2[:, :, i]))).reshape((p1.shape[0])) #p1 -> number of data,number of gaussian #p2 -> number of data, number of gaussian p1 = np.nan_to_num(sc.logsumexp(p1, axis=1)) p2 = np.nan_to_num(sc.logsumexp(p2, axis=1)) pred = [] df1 = pd.DataFrame(p1) df2 = pd.DataFrame(p2) df1.to_csv('p1data.csv') df2.to_csv('p2data.csv') P1 = p1 / (p1 + p2) P2 = p2 / (p1 + p2) df1 = pd.DataFrame(P1) df2 = pd.DataFrame(P2) df1.to_csv('p1data.csv') df2.to_csv('p2data.csv') #normalizer = 1e12 #To normalize the log-likelihood P1 = P1.reshape((p1.shape[0])) P2 = P2.reshape((p2.shape[0])) for i in range(P1.shape[0]): if (P2[i] - P1[i]) > 0: pred.append(1) else: pred.append(0) sf = 100 return pred, act.sigmoid(sf * (P1 - P2))
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### h_k = x h_ks = [h_k] a_ks = [] for b, wT in zip(biases, weightsT): a_k = np.dot(wT, h_k) + b a_ks.append(a_k) h_k = sigmoid(a_k) h_ks.append(h_k) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer #delta = (cost).df_wrt_a(h_ks[-1], y) delta = (cost).df_wrt_a(h_ks[-1], y) * sigmoid_prime(a_ks[-1]) nabla_b[-1] = delta nabla_wT[-1] = np.dot(delta, h_ks[-2].T) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### for i in range(2, num_layers): delta = np.dot(weightsT[-i + 1].T, delta) * sigmoid_prime(a_ks[-i]) nabla_b[-i] = delta nabla_wT[-i] = np.dot(delta, h_ks[-i - 1].T) # for i in range(num_layers-2,-1,-1): # delta = np.multiply(delta,sigmoid_prime(a_ks[i])) # nabla_b[i] = delta # nabla_wT[i] = (np.dot(h_ks[i],delta.T)).T # delta = np.dot(weightsT[i].T,delta) return (nabla_b, nabla_wT)
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ temp = (np.transpose(((np.transpose(weights[0])) * x))) + biases[0] # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### # Feed forward code. @TODO still need to figure out what to pass the cost delta function - Zach Johnston h = [] h.append(x) #print(h[0].shape) a = [] for k in range(1, num_layers): a.append(np.dot(weights[k - 1], h[k - 1]) + biases[k - 1]) h.append(sigmoid(a[k - 1])) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer #delta = (cost).delta(activations[-1], y) delta = (cost).delta(h[-1], y) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### #for layer in range((num_layers-1),0,-1): # Backpropagate the error # error_prev = np.multiply(np.dot(np.transpose(weights[layer-1]),error_prev),sigmoid_prime(h[layer-1])) # error.append(error_prev) # nabla_b.append(error_prev) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, h[-2].transpose()) for layer in range(2, num_layers): delta = np.dot(weights[-layer + 1].transpose(), delta) * sigmoid_prime( a[-layer]) nabla_b[-layer] = delta nabla_w[-layer] = np.dot(delta, h[-layer - 1].transpose()) return (nabla_b, nabla_w)
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass activations = [x] # List to store the activation values zs = [] # List to store the matrix multiplications val = x for i in range(num_layers - 1): val = np.dot(weights[i], val) + biases[i] zs.append(val) val = sigmoid(val) activations.append(val) ### # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).delta(activations[-1], y) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias # backward pass nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) for l in range(2, num_layers): z = zs[-l] delta = np.dot(weights[-l + 1].transpose(), delta) * sigmoid_prime(z) nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) return (nabla_b, nabla_w)
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### # a =[np.zeros(b.shape) for b in biases] h_s = [np.zeros(b.shape) for b in biases] h0=[x] activations=h0+h_s for k in range(num_layers -1): a[k]= biases[k] + np.dot(weights[k], activations[k]) activations[k+1] = sigmoid(a[k]) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).delta(activations[-1], y) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### for k in range(2, num_layers): delta = np.dot(weights[-k +1].T, delta) *sigmoid_prime(a[-k]) nabla_b[-k] = delta nabla_w[-k] = np.dot(delta, activations[-k-1].T) return (nabla_b, nabla_w)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] ### Implement here # feedforward activations = [x] a = [x] for i in range(0, num_layers - 1): a.append(biases[i] + np.dot(weightsT[i], activations[i])) activations.append(sigmoid(a[-1])) # Here you need to store all the activations of all the units # by feedforward pass ### # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).df_wrt_a(activations[-1], y) ### Implement here G = delta for k in range(num_layers - 1, 0, -1): nabla_b[k - 1] = G nabla_wT[k - 1] = np.dot(G, np.transpose(activations[k - 1])) G = np.dot(np.transpose(weightsT[k - 1]), G) G = np.multiply(G, sigmoid_prime(a[k - 1])) # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### return (nabla_b, nabla_wT)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] inp = [] inp.append(x) # print(h[0].shape) a = [] for k in range(1, num_layers): a.append(np.dot(weightsT[k - 1], inp[k - 1]) + biases[k - 1]) inp.append(sigmoid(a[k - 1])) ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).df_wrt_a(inp[-1], y) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### nabla_b[-1] = delta nabla_wT[-1] = np.dot(delta, inp[-2].transpose()) for layer in range(2, num_layers): delta = np.dot(weightsT[1 - layer].transpose(), delta) * sigmoid_prime( a[-layer]) nabla_b[-layer] = delta nabla_wT[-layer] = np.dot(delta, inp[-1 - layer].transpose()) return (nabla_b, nabla_wT)
def test_sigmoid(): z = np.arange(-10, 10, 0.1) y = act.sigmoid(z) y_p = act.sigmoid_prime(z) plt.figure() plt.subplot(1, 2, 1) plt.plot(z, y) plt.title('sigmoid') plt.subplot(1, 2, 2) plt.plot(z, y_p) plt.title('derivative sigmoid') plt.show()
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [] nabla_w = [] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### activations = [x] #store activations h = [] #store the z vectors for i in range(num_layers - 1): ai = biases[i] + np.dot(weights[i], activations[i]) h.append(ai) activations.append(sigmoid(ai)) # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer g = (cost).delta(activations[-1], y) #delta for output layer for cross entropy cost nabla_w = [np.dot(g, np.transpose(activations[-2]))] nabla_b = [g] ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### for i in range(2, num_layers): g = np.dot(weights[-i + 1].transpose(), g) * sigmoid_prime(h[-i]) nabla_b = [g] + nabla_b nabla_w = [np.dot(g, np.transpose(activations[-i - 1]))] + nabla_w return (nabla_b, nabla_w)
def FAbinClassifier(x, mu1, mu2, sigma1, sigma2, phi1, phi2, t=0.5): ''' Parameters: x: the input data is a numpy array of shape (featurelenth,number of datapoints) model is evaluated on (1200,1000) mu1,mu2: The mean of classes which needs to be classified, shape ->(featurelength,) sigma1,sigma2 : the variance of the calsses which needs to be classified its a diagonal mat with shape ->(1200,) only diagonal elements are passed phi1,phi2: factor matrix with shape (1200,3) ''' pred = [] mu_Class1 = mu1.reshape(mu1.shape[0], 1) mu_Class2 = mu2.reshape(mu2.shape[0], 1) sigmaclass1 = np.matmul(phi1, np.transpose(phi1)) + np.diag(sigma1) sigmaclass2 = np.matmul(phi2, np.transpose(phi2)) + np.diag(sigma2) # x_minus_mu1 = x - mu_Class1.reshape((mu_Class1.shape[0],1)) # x_minus_mu2 = x - mu_Class2.reshape((mu_Class2.shape[0],1)) # # (sign,logdet1) = np.linalg.slogdet(sigma) # (sign,logdet2) = np.linalg.slogdet(simga) # inv_Sigma1 p1 = g.logGaussPdf(x, mu_Class1, sigmaclass1) p2 = g.logGaussPdf(x, mu_Class2, sigmaclass2) # p1 = sc.softmax(p1) # p2 = sc.softmax(p2) # # p1 = sc.multivariate_normal.pdf(x,mu_Class1.reshape((1200)),sigmaclass1) # p2 = sc.multivariate_normal.pdf(x,mu_Class2.reshape((1200)),sigmaclass2) P1 = p1 / (p1 + p2) P2 = p2 / (p1 + p2) dfp1 = pd.DataFrame(P1) dfp2 = pd.DataFrame(P2) dfp1.to_csv('P1_FA_.csv') dfp2.to_csv('P2_FA_.csv') P1 = P1.reshape(p1.shape[0]) P2 = P2.reshape(p2.shape[0]) for i in range(p1.shape[0]): if P2[i] - P1[i] < 0: pred.append(1) else: pred.append(0) return pred, act.sigmoid(1000 * (P2 - P1))
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weightsT] # feedforward activation = x activations = [x] # list to store all the activations, layer by layer zs = [] # list to store all the z vectors, layer by layer for b, w in zip(biases, weightsT): z = np.dot(w, activation) + b zs.append(z) activation = sigmoid(z) activations.append(activation) # backward pass delta = (cost).df_wrt_a(activations[-1], y) * sigmoid_prime(zs[-1]) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) # Note that the variable l in the loop below is used a little # differently to the notation in Chapter 2 of the book. Here, # l = 1 means the last layer of neurons, l = 2 is the # second-last layer, and so on. It's a renumbering of the # scheme in the book, used here to take advantage of the fact # that Python can use negative indices in lists. for l in range(2, num_layers): z = zs[-l] sp = sigmoid_prime(z) delta = np.dot(weightsT[-l + 1].transpose(), delta) * sp nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) return (nabla_b, nabla_w)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] activations = [[], []] tempx = x for supercount in range(len(biases)): for counts in range(len(biases[supercount])): activations[supercount].append( sigmoid( np.dot(weightsT[supercount][counts], tempx) + biases[supercount][counts])) tempx = activations[supercount] delta = (cost).df_wrt_a(activations[1], y) for i in range(len(biases) - 1, -1, -1): activationsD = [] for j in range(len(activations[i])): activationsD.append(sigmoid_prime(activations[i][j])) delta = np.multiply(delta, activationsD) nabla_b[i] = delta if (i == 0): nabla_wT[i] = np.dot(nabla_b[i], np.transpose(x)) else: nabla_wT[i] = np.dot(nabla_b[i], np.transpose(activations[i - 1])) delta = np.dot(np.transpose(weightsT[i]), delta) return (nabla_b, nabla_wT)
def StudnetT(): trainface,testface,trainnonface,testnonface=load_data() pca_face=PCA(30) f_train_pca,array_trainf = data_pca(trainface) f_test_pca,array_testf = data_pca(testface) nf_train_pca,array_trainnf = data_pca(trainnonface) nf_test_pca,array_testnf = data_pca(testnonface) (mu_f,sig_f,nu_f)= Stud.fit_t(f_train_pca,0.01) (mu_nf,sig_nf,nu_nf)= Stud.fit_t(nf_train_pca,0.01) px_face_pf = T.StudTpdf(f_test_pca,mu_f,sig_f,nu_f) px_nonface_pf = T.StudTpdf(nf_test_pca,mu_f,sig_f,nu_f) px_face_pnf = T.StudTpdf(f_test_pca,mu_nf,sig_nf,nu_nf) px_nonface_pnf = T.StudTpdf(nf_test_pca,mu_nf,sig_nf,nu_nf) Prob_face = np.concatenate((px_face_pf,px_nonface_pf)) Prob_nonface = np.concatenate((px_face_pnf,px_nonface_pnf)) df = pd.concat([testface,testnonface],ignore_index = True) print(Prob_face.shape) pred = [] for i in range(Prob_face.shape[0]): if Prob_face[i] > Prob_nonface[i]: pred.append(1) else: pred.append(0) Prob_faceact = Prob_face/(Prob_face+Prob_nonface) Prob_nonfaceact = Prob_nonface/(Prob_face+Prob_nonface) df['Predictions'] = pred df['Probability'] = act.sigmoid(5*(Prob_nonfaceact-Prob_faceact)) df.to_csv('studentT.csv') roc = ROC.ROC(1200) roc.Classifier_Details(df) roc.ROC_plotter(df,'StudentT') print(mu_f.shape) print(sig_nf.shape)
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] activations = [] activations.append(x) for k in range(0, num_layers - 1): activations.append( sigmoid(np.dot(weightsT[k], activations[k]) + biases[k])) delta = (cost).df_wrt_a(activations[-1], y) for i in range(num_layers - 2, -1, -1): if i == num_layers - 2: nabla_b[i] = delta nabla_wT[i] = np.dot(delta, np.transpose(activations[-2])) else: delta = np.dot(np.transpose(weightsT[i + 1]), delta) * sigmoid_prime( np.dot(weightsT[i], activations[i]) + biases[i]) nabla_b[i] = delta nabla_wT[i] = np.dot(delta, np.transpose(activations[i])) return (nabla_b, nabla_wT)
def backprop(x, y, biases, weights, cost, num_layers): nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] activation = x activations = [x] before_activations = [] for bias, weight in zip(biases, weights): before_activation = np.dot(weight, activation) + bias before_activations.append(before_activation) activation = sigmoid(before_activation) activations.append(activation) delta = (cost).delta(activations[-1], y) for l in range(1, num_layers): if l != 1: delta = np.dot(weights[-l + 1].transpose(), delta) * sigmoid_prime( before_activations[-l]) nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) return (nabla_b, nabla_w)
def feedforward(self, a): """Return the output of the network if ``a`` is input.""" for b, wT in zip(self.biases, self.weightsT): a = sigmoid(np.dot(wT, a)+b) return a
def backprop(x, y, biases, weightsT, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and transposed weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_wT): tuple containing the gradient for all the biases and weightsT. nabla_b and nabla_wT should be the same shape as input biases and weightsT """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_wT = [np.zeros(wT.shape) for wT in weightsT] ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass pre_act = [np.zeros(b.shape) for b in biases] # h^k pre_act.insert(0, np.nan) activations = [np.zeros(b.shape) for b in biases] # a^k activations.insert(0, x) # pre_act[0] = np.matmul(weightsT[0], x) + biases[0] # first activation is from input layer: x = h^(k-1) -> a^k # activations[0] = sigmoid(pre_act[0]) # hit activation layer with sigmoid function for each element h for i in range( 0, num_layers - 1 ): # use previous activations layer output as current layers inputs # previous layers outputs to make new one h^(k-1) -> a^k pre_act[i + 1] = np.matmul(weightsT[i], activations[i]) + biases[i] # # hit activation layer with sigmoid function for each element: g(a^k) -> h^k activations[i + 1] = sigmoid(pre_act[i + 1]) ### # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).df_wrt_a(activations[-1], y) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias # first output layer ######$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ G = delta nabla_b[-1] = G nabla_wT[-1] = np.transpose(np.matmul(activations[-2], np.transpose(G))) G = np.matmul(np.transpose(weightsT[-1]), G) # Restart Algorithm for n in reversed(range(0, num_layers - 2)): G = np.multiply(G, sigmoid_prime(pre_act[n + 1])) nabla_b[n] = G nabla_wT[n] = np.transpose(np.matmul(activations[n], np.transpose(G))) G = np.matmul(np.transpose(weightsT[n]), G) # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ ### return (nabla_b, nabla_wT)
def backprop(x, y, biases, weights, cost, num_layers): """ function of backpropagation Return a tuple ``(nabla_b, nabla_w)`` representing the gradient of all biases and weights. Args: x, y: input image x and label y biases, weights (list): list of biases and weights of entire network cost (CrossEntropyCost): object of cost computation num_layers (int): number of layers of the network Returns: (nabla_b, nabla_w): tuple containing the gradient for all the biases and weights. nabla_b and nabla_w should be the same shape as input biases and weights """ # initial zero list for store gradient of biases and weights nabla_b = [np.zeros(b.shape) for b in biases] nabla_w = [np.zeros(w.shape) for w in weights] """" print("x.shape",x.shape) print("y",y.shape) print("b",biases[1].shape) print("l_w",len(weights)) print("w",weights[1].shape) """ ### Implement here # feedforward # Here you need to store all the activations of all the units # by feedforward pass ### index = 0 activations = [np.zeros(b.shape) for b in biases] input = x for b, w in zip(biases, weights): activations[index] = sigmoid(np.dot(w, x) + b) x = activations[index] index += 1 # compute the gradient of error respect to output # activations[-1] is the list of activations of the output layer delta = (cost).delta(activations[-1], y) gradlayer = delta / (activations[-1] * (1 - activations[-1])) ### Implement here # backward pass # Here you need to implement the backward pass to compute the # gradient for each weight and bias ### for y in range(0, len(activations)): gradactivation = np.multiply( gradlayer, sigmoid_prime( np.log((activations[-1 - y]) / (1 - activations[-1 - y])))) nabla_b[-1 - y] = gradactivation if (y == (len(activations) - 1)): activation = input else: activation = activations[-2 - y] nabla_w[-1 - y] = np.dot(gradactivation, activation.transpose()) gradlayer = np.dot(weights[-1 - y].transpose(), gradactivation) return (nabla_b, nabla_w)