def train(training_data, labels, n_iter, n_classes, n_filter, learn_rate, print_acc=True): input_dim = int((((training_data[0].shape[0] - 3 + 1) / 2)**2) * n_filter) np.random.seed(seed=30) own_filter_conv = np.random.randn(n_filter, 3, 3) / 9 np.random.seed(seed=30) own_weight_soft = (np.random.randn(input_dim, n_classes) / input_dim) own_bias_soft = np.random.randn(n_classes) num_correct = 0 for i in range(n_iter): image = training_data[i] / 255 - 0.5 label = labels[i] own_feature_map, own_filter_conv = fun.convolute( image=image, filter_matrix=own_filter_conv) own_maxpool_map = fun.maxpool(feature_map=own_feature_map) own_probs, own_inter_soft = fun.softmax(own_maxpool_map, weight_matrix=own_weight_soft, bias_vector=own_bias_soft) own_weight_soft, own_bias_soft, own_gradient_soft = fun.backprop_softmax( inter_soft=own_inter_soft, probabilities=own_probs, label=label, learn_rate=learn_rate) own_gradient_max = fun.backprop_maxpool(feature_map=own_feature_map, gradient=own_gradient_soft) own_filter_conv = fun.backprop_conv(image=image, filter_conv=own_filter_conv, gradient=own_gradient_max, learn_rate=learn_rate) prediction = np.argmax(own_probs) acc = 1 if prediction == label else 0 num_correct += acc if i % 100 == 0 and i != 0 and print_acc: accuracy = num_correct / i print(f"accuracy for the first {i} samples: {accuracy}") print(f"{num_correct} predictions for {i} samples were correct") return None
test_conv[0, ind[0], ind[1]] # if this works for image, wooooow! test_conv.shape ind[0] ind[1] test_image.shape test_image dConv.shape dConv = np.zeros(test_filter.shape) deltaL = np.random.randn(2).round() deltaL[1] = 100 out_backmax = fun.backprop_maxpool(test_conv, index_maxpool, deltaL, 1) np.sum(out_backmax) test_conv test_image test_image def backprop_conv(image, filter_conv, back_maxpool): dConv = np.zeros(filter_conv.shape) k = 0 for f in range(2): for i in range(4):
weight_soft.shape ### agrees, after Transposing input! # needed to transpose weight matrix as well, now gradients from softmax roughly agree back_soft = fun.backprop_softmax(intermediates, probabilities, label = label)[3] ## gradients from backprop max are not the same, try with exactly the same gradients # from backsoftmax now, since they are only rougghly the same back_soft = np.array([ 0.02303161, 0.01477759, -0.02779495, 0.05881862, 0.09134293, 0.09521715, 0.10948755, 0.00828537]) # gradients from back propagation softmax are the same # but probably is indexing not working anymore in backprop maxpool, # since I transposed the input into the softmax layer grad_max = fun.backprop_maxpool(out_ownconv, index_maxown, back_soft) grad_max[0, :, : ] # hmmmmmm die indices sind an den richtigen Stellen ungleich 0, aber # die updatees habend ie falschen Werte grad_max[grad_max != 0] out_ownconv[[index_maxown] ## still disagree! grad_max.shape fun.backprop_max_conv(image, out_ownconv, index_maxown, back_soft, 0.01)
# weight_soft initialisations? # try with weight_soft * 10 gradients_soft_oldweights = gradient_soft gradient_softown.shape gradient_soft.flatten() == gradient_softown.flatten( ) # are the same, but format is wrong # not the same, yes! # no only remaining problem has to be in backprop_conv # maybe still problem in back_softmax, gradient_soft[:, :, 0] gradient_softown[1] gradient_max = pool.backprop(gradient_soft) gradient_maxown = fun.backprop_maxpool(out_convown, index_maxown, gradient_softown.flatten()) gradient_maxown[1] gradient_max[:, :, 1] gradient_max.shape gradient_maxown.shape ############################################################################ ########################################## backprop maxpool and conv######### ############################################################################ ## mabe just stop here and continue with other backprobs # backprop_maxpool should work, I just pass wrong(?) values out = conv.forward(image)
def debug_cnn(n_iter, version, learn_rate): # from importlib import reload path = "/home/konstantin/Documents/master_arbeit/" sys.path.append(path) import functions as fun #import os if version == "changed": print("changed blog version is being used") path_blog_changed = "/home/konstantin/Documents/master_arbeit/cnn_python/cnn-from-scratch-changed/" sys.path.append(path_blog_changed) #os.listdir(path_blog_changed) from conv import Conv3x3 from maxpool import MaxPool2 from softmax import Softmax if version == "original": print("original version is being used") path_blog_original = "/home/konstantin/Documents/master_arbeit/cnn_python/original_blog/cnn-from-scratch" sys.path.append(path_blog_original) from conv import Conv3x3 from maxpool import MaxPool2 from softmax import Softmax # Conv3x3 = reload(Conv3x3) # MaxPool2 = reload(MaxPool2) # Softmax = reload(Softmax) num_filters = 8 np.random.seed(seed=444); own_filter_conv = np.random.randn(num_filters, 3, 3) / 9 own_filter_conv = np.round(own_filter_conv) dim_maxpool = 13 * 13 *8 np.random.seed(seed=666); own_weight_soft = (np.random.randn(dim_maxpool, 10) / dim_maxpool) own_bias_soft = np.zeros(10) conv = Conv3x3(8) # 28x28x1 -> 26x26x8 pool = MaxPool2() # 26x26x8 -> 13x13x8 dim_maxpool = np.prod(13 * 13 * 8) softmax = Softmax(dim_maxpool, 10) for i in range(n_iter): image = test_images[i] / 255 - 0.5 label = test_labels[i] own_feature_map, own_filter_conv = fun.convolute(image=image, filter_matrix=own_filter_conv) own_maxpool_map = fun.maxpool(feature_map=own_feature_map) own_probs, own_inter_soft = fun.softmax(own_maxpool_map, weight_matrix=own_weight_soft, bias_vector=own_bias_soft) own_weight_soft, own_bias_soft, own_gradient_soft = fun.backprop_softmax(inter_soft=own_inter_soft, probabilities=own_probs, label = label, learn_rate=learn_rate) own_gradient_max = fun.backprop_maxpool(feature_map=own_feature_map, gradient=own_gradient_soft) own_filter_conv = fun.backprop_conv(image=image, filter_conv=own_filter_conv, gradient=own_gradient_max, learn_rate=learn_rate) # run model from blog with same data blog_out_conv = conv.forward(image) #print(out_conv) blog_out_max = pool.forward(blog_out_conv) blog_out_soft = softmax.forward(blog_out_max) # #print(blog_out_soft) gradient_L = np.zeros(10) gradient_L[label] = -1 / blog_out_soft[label] blog_gradient_soft = softmax.backprop( gradient_L, learn_rate) blog_gradient_max = pool.backprop(blog_gradient_soft) conv.backprop(blog_gradient_max, learn_rate) ################## compare feedforward #################################### ########################################################################### print("This is iteration", i) if np.sum(own_feature_map == blog_out_conv) == np.prod(own_feature_map.shape): print("YEAAAH! FeatureMaps are the same") else: print("NOOOO! featuremaps are not the same") # conv.filters == filter_conv # # after first iteration these are not the same anymore, # since they get updated if np.sum(own_maxpool_map == blog_out_max) == np.prod(blog_out_max.shape): print("YEAHHH! maxpool is the same") else: print("NOOOO! maxpool is not the same") if np.sum(own_probs == blog_out_soft) == np.prod(blog_out_soft.shape): print("YEAAAH! predicted probabilities are the same") else: print("NOOOO! predicted probabilities are not the same") print("Own probabilities") print(own_probs) print("Blog probabilities") print(blog_out_soft) # break ######################### compare backprop ################################# ############################################################################ ######## softmax: gradients: if np.sum(own_gradient_soft == blog_gradient_soft) == np.prod(blog_gradient_soft.shape): print("YEAHHHH! gradients softmax are the same") else: print("NOOOO! gradients softmax are not the same") ## weight updates weight matrix softmax layer # if np.sum(own_weight_soft == blog_weights_updated) == np.prod(own_weight_soft.shape): # print("Yeaaah! updated weightmatrix softmax is the same") # else: # print("updated weightmatrix softmax is not the same") # ## weight updates bias vector # if np.sum(own_bias_soft == blog_biases_updated) == np.prod(blog_biases_updated.shape): # print("Yeaaah! Updated bias vector softmax is the same") # else: # print("updated bias vector is not the same") #### maxpool if np.sum(own_gradient_max== blog_gradient_max) == np.prod(blog_gradient_max.shape): print("YEAHHHH! gradients maxpool layer are the same") else: print("NOOOO! updated gradients maxpool are not the same") ## conv # if np.sum(own_filter_conv == blog_filter_update) == np.prod(own_filter_conv.shape): # print("YEAAAHHH! updated filter convlayer are the same") # else: # print("NOOOOO! updated filter conv layer is not the same") # # So! After two runs the predicted probabilities are already different, why? return None
weights_updated.shape): print("Yeaaaah!") if np.sum(biases_updated == biases_updatedown) == np.prod( biases_updated.shape): print("Yeaaaah!") if np.sum(gradient_soft == gradient_softown) == np.prod(gradient_soft.shape): print("Yeaaaah!") # so gradients are the same, but they do not get copied to correct entries in # feature map # backprop maxpool gradient_max = pool.backprop(gradient_soft) #gradient_test = np.ones(shape=gradient_softown.shape) gradient_maxown = fun.backprop_maxpool(out_convown, gradient_softown) if np.sum(gradient_max == gradient_maxown) == np.prod(gradient_max.shape): print("Yeaaaah!") gradient_conv, filter_update = conv.backprop(gradient_max, 0.01) filter_updateown, gradient_convown = fun.backprop_conv( image, filter_conv, feature_gradient=gradient_maxown, learn_rate=0.01) if np.sum(gradient_conv == gradient_convown) == np.prod(gradient_conv.shape): print("Yeaaaah!") if np.sum(filter_update == filter_updateown) == np.prod(filter_update.shape): print("Yeaaaah!")