def train(training_data, labels, n_iter, n_classes, n_filter, learn_rate, print_acc=True): input_dim = int((((training_data[0].shape[0] - 3 + 1) / 2)**2) * n_filter) np.random.seed(seed=30) own_filter_conv = np.random.randn(n_filter, 3, 3) / 9 np.random.seed(seed=30) own_weight_soft = (np.random.randn(input_dim, n_classes) / input_dim) own_bias_soft = np.random.randn(n_classes) num_correct = 0 for i in range(n_iter): image = training_data[i] / 255 - 0.5 label = labels[i] own_feature_map, own_filter_conv = fun.convolute( image=image, filter_matrix=own_filter_conv) own_maxpool_map = fun.maxpool(feature_map=own_feature_map) own_probs, own_inter_soft = fun.softmax(own_maxpool_map, weight_matrix=own_weight_soft, bias_vector=own_bias_soft) own_weight_soft, own_bias_soft, own_gradient_soft = fun.backprop_softmax( inter_soft=own_inter_soft, probabilities=own_probs, label=label, learn_rate=learn_rate) own_gradient_max = fun.backprop_maxpool(feature_map=own_feature_map, gradient=own_gradient_soft) own_filter_conv = fun.backprop_conv(image=image, filter_conv=own_filter_conv, gradient=own_gradient_max, learn_rate=learn_rate) prediction = np.argmax(own_probs) acc = 1 if prediction == label else 0 num_correct += acc if i % 100 == 0 and i != 0 and print_acc: accuracy = num_correct / i print(f"accuracy for the first {i} samples: {accuracy}") print(f"{num_correct} predictions for {i} samples were correct") return None
np.random.seed(seed=666); weight_soft = np.random.randn(8, 2) / 8 np.random.seed(seed=666); bias_soft = np.zeros(2) probabilities, intermediates = fun.softmax(out_maxown.T, weight_soft, bias_soft) out_maxown[0] out_maxown.T[:, :, 0] ################################## backprop ###################### weight_soft.shape ### agrees, after Transposing input! # needed to transpose weight matrix as well, now gradients from softmax roughly agree back_soft = fun.backprop_softmax(intermediates, probabilities, label = label)[3] ## gradients from backprop max are not the same, try with exactly the same gradients # from backsoftmax now, since they are only rougghly the same back_soft = np.array([ 0.02303161, 0.01477759, -0.02779495, 0.05881862, 0.09134293, 0.09521715, 0.10948755, 0.00828537]) # gradients from back propagation softmax are the same # but probably is indexing not working anymore in backprop maxpool, # since I transposed the input into the softmax layer grad_max = fun.backprop_maxpool(out_ownconv, index_maxown, back_soft) grad_max[0, :, : ] # hmmmmmm die indices sind an den richtigen Stellen ungleich 0, aber # die updatees habend ie falschen Werte grad_max[grad_max != 0]
# Backprop gradient = np.zeros(2) gradient[label] = -1 / out_soft[label] # since the flattened versions oft the output of the maxpool layer are different, # one would expect the gradients of softmax should be in different order as well, # and not the same! out_max.shape out_maxown.shape weight_soft.shape out_max.flatten() out_maxown.flatten() gradient_soft, weights, deltaL = softmax.backprop(gradient, 0.01) gradient_softown = fun.backprop_softmax(intermediates, out_maxown.shape, probabilities, label=label)[3] # gradient should not be the same, but seem to be very similar, my bc of random # weight_soft initialisations? # try with weight_soft * 10 gradients_soft_oldweights = gradient_soft gradient_softown.shape gradient_soft.flatten() == gradient_softown.flatten( ) # are the same, but format is wrong # not the same, yes! # no only remaining problem has to be in backprop_conv # maybe still problem in back_softmax,
def debug_cnn(n_iter, version, learn_rate): # from importlib import reload path = "/home/konstantin/Documents/master_arbeit/" sys.path.append(path) import functions as fun #import os if version == "changed": print("changed blog version is being used") path_blog_changed = "/home/konstantin/Documents/master_arbeit/cnn_python/cnn-from-scratch-changed/" sys.path.append(path_blog_changed) #os.listdir(path_blog_changed) from conv import Conv3x3 from maxpool import MaxPool2 from softmax import Softmax if version == "original": print("original version is being used") path_blog_original = "/home/konstantin/Documents/master_arbeit/cnn_python/original_blog/cnn-from-scratch" sys.path.append(path_blog_original) from conv import Conv3x3 from maxpool import MaxPool2 from softmax import Softmax # Conv3x3 = reload(Conv3x3) # MaxPool2 = reload(MaxPool2) # Softmax = reload(Softmax) num_filters = 8 np.random.seed(seed=444); own_filter_conv = np.random.randn(num_filters, 3, 3) / 9 own_filter_conv = np.round(own_filter_conv) dim_maxpool = 13 * 13 *8 np.random.seed(seed=666); own_weight_soft = (np.random.randn(dim_maxpool, 10) / dim_maxpool) own_bias_soft = np.zeros(10) conv = Conv3x3(8) # 28x28x1 -> 26x26x8 pool = MaxPool2() # 26x26x8 -> 13x13x8 dim_maxpool = np.prod(13 * 13 * 8) softmax = Softmax(dim_maxpool, 10) for i in range(n_iter): image = test_images[i] / 255 - 0.5 label = test_labels[i] own_feature_map, own_filter_conv = fun.convolute(image=image, filter_matrix=own_filter_conv) own_maxpool_map = fun.maxpool(feature_map=own_feature_map) own_probs, own_inter_soft = fun.softmax(own_maxpool_map, weight_matrix=own_weight_soft, bias_vector=own_bias_soft) own_weight_soft, own_bias_soft, own_gradient_soft = fun.backprop_softmax(inter_soft=own_inter_soft, probabilities=own_probs, label = label, learn_rate=learn_rate) own_gradient_max = fun.backprop_maxpool(feature_map=own_feature_map, gradient=own_gradient_soft) own_filter_conv = fun.backprop_conv(image=image, filter_conv=own_filter_conv, gradient=own_gradient_max, learn_rate=learn_rate) # run model from blog with same data blog_out_conv = conv.forward(image) #print(out_conv) blog_out_max = pool.forward(blog_out_conv) blog_out_soft = softmax.forward(blog_out_max) # #print(blog_out_soft) gradient_L = np.zeros(10) gradient_L[label] = -1 / blog_out_soft[label] blog_gradient_soft = softmax.backprop( gradient_L, learn_rate) blog_gradient_max = pool.backprop(blog_gradient_soft) conv.backprop(blog_gradient_max, learn_rate) ################## compare feedforward #################################### ########################################################################### print("This is iteration", i) if np.sum(own_feature_map == blog_out_conv) == np.prod(own_feature_map.shape): print("YEAAAH! FeatureMaps are the same") else: print("NOOOO! featuremaps are not the same") # conv.filters == filter_conv # # after first iteration these are not the same anymore, # since they get updated if np.sum(own_maxpool_map == blog_out_max) == np.prod(blog_out_max.shape): print("YEAHHH! maxpool is the same") else: print("NOOOO! maxpool is not the same") if np.sum(own_probs == blog_out_soft) == np.prod(blog_out_soft.shape): print("YEAAAH! predicted probabilities are the same") else: print("NOOOO! predicted probabilities are not the same") print("Own probabilities") print(own_probs) print("Blog probabilities") print(blog_out_soft) # break ######################### compare backprop ################################# ############################################################################ ######## softmax: gradients: if np.sum(own_gradient_soft == blog_gradient_soft) == np.prod(blog_gradient_soft.shape): print("YEAHHHH! gradients softmax are the same") else: print("NOOOO! gradients softmax are not the same") ## weight updates weight matrix softmax layer # if np.sum(own_weight_soft == blog_weights_updated) == np.prod(own_weight_soft.shape): # print("Yeaaah! updated weightmatrix softmax is the same") # else: # print("updated weightmatrix softmax is not the same") # ## weight updates bias vector # if np.sum(own_bias_soft == blog_biases_updated) == np.prod(blog_biases_updated.shape): # print("Yeaaah! Updated bias vector softmax is the same") # else: # print("updated bias vector is not the same") #### maxpool if np.sum(own_gradient_max== blog_gradient_max) == np.prod(blog_gradient_max.shape): print("YEAHHHH! gradients maxpool layer are the same") else: print("NOOOO! updated gradients maxpool are not the same") ## conv # if np.sum(own_filter_conv == blog_filter_update) == np.prod(own_filter_conv.shape): # print("YEAAAHHH! updated filter convlayer are the same") # else: # print("NOOOOO! updated filter conv layer is not the same") # # So! After two runs the predicted probabilities are already different, why? return None
weight_matrix=weight_soft, bias_vector=bias_soft) if np.sum(out_soft == probabilities) == np.prod(out_soft.shape): print("Yeaaaah!") ##################### backprop # backprop softmax gradient_L = np.zeros(10) gradient_L[label] = -1 / out_soft[label] gradient_soft, dL_dw, weights_updated, biases_updated = softmax.backprop( gradient_L, 0.1) weights_updatedown, biases_updatedown, gradient_softown = fun.backprop_softmax( inter_soft=inter_soft, # maxpool_shape=out_maxown.shape, probabilities=probabilities, label=label, learn_rate=0.1) if np.sum(weights_updated == weights_updatedown) == np.prod( weights_updated.shape): print("Yeaaaah!") if np.sum(biases_updated == biases_updatedown) == np.prod( biases_updated.shape): print("Yeaaaah!") if np.sum(gradient_soft == gradient_softown) == np.prod(gradient_soft.shape): print("Yeaaaah!") # so gradients are the same, but they do not get copied to correct entries in