def person_follow(self): """ Implements person following behavior """ m = Twist() r = rospy.Rate(10) print('*********Person Following*********') while not rospy.is_shutdown(): # Doesn't have a POI if self.POI[0] is None: return self.seeking # Checks if neato is close enough to person to stop elif abs(self.POI[0]) <= .5: m.linear.x = 0 m.angular.z = 0 self.vel_pub.publish(m) else: # Checks if heading of neato is not in the direction of the POI if abs(self.POI[1]) > .1: # Continue turning at angular speed based on angle (in rads) left to cover # is it - self.POI? if 0 < self.POI[1] <= math.pi: m.angular.z = sigmoid(self.POI[1]) * 0.6 else: m.angular.z = -sigmoid(self.POI[1]) * 0.6 else: # Drive straight at speed based on distance to drive m.linear.x = self.POI[0] * 0.5 m.angular.z = 0 self.vel_pub.publish(m) r.sleep()
def forward_prop(Batch_Norm, param, x): w1, w2, w3, b1, b2, b3 = param['w1'], param['w2'], param['w3'], param[ 'b1'], param['b2'], param['b3'] # input to hidden layer- pre activation a1 = np.dot(x, w1) + b1 if Batch_Norm == True: #send it to batch norm a1, param = BatchNorm.forward(a1, param, level=1) #hidden layer activation h1 = helper.sigmoid(a1) #hidden layer to hidden layer - pre-activation a2 = np.dot(h1, w2) + b2 if Batch_Norm == True: #send it to batch norm a2, param = BatchNorm.forward(a2, param, level=2) #hidden layer activation h2 = helper.sigmoid(a2) #hidden layer to output - pre-activation a3 = np.dot(h2, w3) + b3 if Batch_Norm == True: #send it to batch norm a3, param = BatchNorm.forward(a3, param, level=3) #output layer activation resulting in probability scores prob_scores = helper.softmax(a3) return prob_scores, h1, h2
def get_D_losses(self, obj='original'): # logits --> probabilities self.Df = [sigmoid(logit) for logit in self.Df_logits] self.Dr = [sigmoid(logit) for logit in self.Dr_logits] self.D_losses = [tf.reduce_mean(-tf.log(self.Dr[ind]) - tf.log(1 - self.Df[ind])) for ind in range(len(self.Dr))] # Define minimax objectives for discriminators self.V_D = [tf.reduce_mean(tf.log(self.Dr[ind]) + tf.log(1 - self.Df[ind])) for ind in range(len(self.Dr))]
def compute_link_probabilities(is_dev=True, u_embed=None, v_embed=None, test_edges=None): """ Computes the link :param is_dev: :param u_embed: :param v_embed: :param test_edges: :return: """ # Adapted from CANE: https://github.com/thunlp/CANE/blob/master/code/auc.py if is_dev: nodes = list(range(u_embed.shape[0])) test_edges = list(zip(range(u_embed.shape[0]), range(v_embed.shape[0]))) else: nodes = list({n for edge in test_edges for n in edge}) def get_random_index(u, v, lookup=None): while True: node = np.random.choice(nodes) if node != u and node != v: if lookup is None: return node elif node in lookup: return node link_probabilities = [] for i in range(len(test_edges)): if is_dev: u = v = i j = get_random_index(u=i, v=i) else: u = test_edges[i][0] v = test_edges[i][1] if u not in u_embed or v not in u_embed: continue j = get_random_index(u=u, v=v, lookup=v_embed) u_emb = u_embed[u] v_emb = v_embed[v] j_emb = v_embed[j] pos_score = helper.sigmoid(u_emb.dot(v_emb.transpose()).max()) neg_score = helper.sigmoid(u_emb.dot(j_emb.transpose()).max()) link_probabilities.append([pos_score, neg_score]) return np.array(link_probabilities)
def get_G_boosted_loss(self, boosting_variant, mixing, obj='original'): # Define lambda placeholder self.l = tf.placeholder(tf.float32, name='lambda') # Boosting variants # boost_prediction: Use booster to predict probabilities # boost_training: Use boosting to train, but not predict probabilites if boosting_variant == 'boost_prediction': # Define generator loss if obj == 'original': self.G_loss = tf.reduce_mean( tf.log(1 - sigmoid(self.Df_expected))) else: self.G_loss = tf.reduce_mean( -tf.log(sigmoid(self.Df_expected))) # Define minimax objective for generator self.V_G = tf.reduce_mean( tf.log(self.Dr_expected) + tf.log(1 - sigmoid(self.Df_expected))) else: # Define generator loss if obj == 'original': self.G_losses = [ tf.reduce_mean(tf.log(1 - self.Df[ind])) for ind in range(len(self.Df)) ] sign = -1. else: self.G_losses = [ tf.reduce_mean(-tf.log(self.Df[ind])) for ind in range(len(self.Df)) ] sign = 1. _G_losses = [tf.expand_dims(loss, 0) for loss in self.G_losses] _G_losses = tf.concat(axis=0, values=_G_losses) self.G_loss = mix_prediction(_G_losses, self.l, mean_typ=mixing, weight_typ=self.weight_type, sign=sign) # Define minimax objective for generator self.V_G = mix_prediction(self.V_D, self.l, mean_typ=mixing, weight_typ=self.weight_type, sign=sign) tf.summary.scalar('G_loss', self.G_loss)
def forward(self, X, W, b): ''' Forward propogation function for logistic regression ''' temp = X.dot(W.transpose()) + b pY = sigmoid(temp) return pY
def classify(clf, test_data, threshold=0): pred_confidence = clf.decision_function(test_data) pred = [ clf.classes_[1] if (val if threshold == 0 else sigmoid(val)) > threshold else clf.classes_[0] for val in pred_confidence ] return pred, pred_confidence
def linear_activation_forward(A_prev, W, b, activation): Z = np.dot(W, A_prev) + b if activation == "relu": A = relu(Z) elif activation == "sigmoid": A = sigmoid(Z) cache = (A_prev, Z, W, b) return A, cache
def forward_propagation(X, parameters): W = parameters["W"] b = parameters["b"] Z = np.dot(W, X) + b A = sigmoid(Z) return A
def forward(self,X,W1,W2,b1,b2): ''' Forward propogation function for Neural Network ''' p1=(X).dot(W1)+b1 #Outputs the predictions from layer one Z1=sigmoid(p1) #Sigmoid values from layer one p2 = Z1.dot(W2) + b2 #Outputs from hidden layer (layer two) pY=softmax(p2) #Final Predictions based on layer two input return pY,p1
def propagate(parameters, X, Y): W = parameters["W"] b = parameters["b"] m = X.shape[1] A = sigmoid(np.dot(W, X) + b) cost = -1 / m * sum(sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))) dW = np.mean((A - Y) * X, 1) db = np.mean(A - Y) grads = {"dW": dW, "db": db} return grads, cost
def _forward_prop(self, x): #set activations equal to the input array self._outs[0] = x #process each layer using our weights, biases and activation function for i in range(1, self.num_layers): #calculate each neuron's pre-activation values self._inps[i] = (self.weights[i].dot(self._outs[i - 1]) + self.biases[i]) #activate these values to get that layers outputs self._outs[i] = helper.sigmoid(self._inps[i])
def predict(text): score = sigmoid(np.dot(extract_features(text), weights) + biases) rv = { "score": score, } if score > 0.5: rv['gender'] = "Male" else: rv['gender'] = "Female" return rv
def forward_prop(param, x): w1, w2, b1, b2 = param['w1'], param['w2'], param['b1'], param['b2'] # input to hidden layer- pre activation a1 = np.dot(x, w1) + b1 #hidden layer activation h1 = helper.sigmoid(a1) #h1 = helper.tanh(a1) #for tanh uncomment this #h1 = helper.relu_activation(a1) #input to output layer - pre-activation a2 = np.dot(h1, w2) + b2 #output layer activation resulting in probability scores prob_scores = helper.softmax(a2) return prob_scores, h1
def run(self): # Given an angle and a distance from the base_link frame, the neato should aim to # move in the right direction and close the gap. # The function should allow for mid-run recalibration r = rospy.Rate(10) while not rospy.is_shutdown(): x = self.POI[0] * math.cos(self.POI[1]) y = self.POI[0] * math.sin(self.POI[1]) self.person_marker.pose.position.x = x self.person_marker.pose.position.y = y self.marker_pub.publish(self.person_marker) # Checks if neato is close enough to person to stop if abs(self.POI[0]) <= .5: self.twist.linear.x = 0 self.twist.angular.z = 0 self.pub.publish(self.twist) else: # Checks if heading of neato is not in the direction of the POI if abs(self.POI[1]) > .1: # Continue turning at angular speed based on angle (in rads) left to cover # We use a sigmoid function function to scale the motor speeds to between 0 and 1*0.6 if 0 < self.POI[1] <= math.pi: self.twist.angular.z = helper.sigmoid( self.POI[1]) * 0.6 else: self.twist.angular.z = -helper.sigmoid( self.POI[1]) * 0.6 else: # Drive straight at speed based on distance to drive self.twist.linear.x = self.POI[0] * 0.5 self.twist.angular.z = 0 self.pub.publish(self.twist) r.sleep()
def feed_forward(self, X ,y): ''' Implementation of the Feedforward ''' Z = {} input_layer = X for i in range(1,len(self.layer_sizes)): Z["Z"+str(i)] = np.dot(self.weights["W"+str(i)],input_layer) + self.bias["b"+str(i)] if( i == len(self.hidden_layer_sizes) ): self.A["A"+str(i)],self.df["df"+str(i)] = ut.sigmoid(Z["Z"+str(i)]) else: self.A["A"+str(i)],self.df["df"+str(i)] = ut.tanh(Z["Z"+str(i)]) input_layer = self.A["A"+str(i)] error = ut.entropy_loss(self.A["A"+str(len(self.hidden_layer_sizes)+1)],y) return error, self.A["A"+str(len(self.hidden_layer_sizes)+1)]
def classify_one_class_svm(clf, test_data, threshold=-1): if threshold == -1: pred_confidence = clf.predict(test_data) """ leave label is 1 stay label 0 """ """ predicted value -1 will leave 1 will stay """ pred = [1 if val == 1 else 0 for val in pred_confidence] return pred, pred_confidence else: pred_confidence = clf.decision_function(test_data) pred = [ 1 if (val if threshold == 0 else sigmoid(val)) > threshold else 0 for val in pred_confidence ] return pred, pred_confidence
def compute_gradient_entropy(batch, weights, bias, b_or_w): """ Computes the cross-entropy error gradient by summing over gradients of all data points in batch. """ assert b_or_w == 'w' or b_or_w == 'b' if b_or_w == 'w': ret = np.zeros((784, 10)) else: ret = np.zeros((10, 1)) for dp in batch: x = dp.T[:784].reshape(784, 1) t = np.zeros((10, 1)) t[int(dp.T[784:][0])] = 1 y = helper.sigmoid(x, weights, bias) if b_or_w == 'w': v = y - t ret += np.dot(x, v.T) else: # b_or_w == 'b': ret += y - t return ret
def compute_gradient_mse(batch, weights, bias, b_or_w): """ Computes the mean squared error gradient by summing over gradients of all data points in batch. """ assert b_or_w == 'w' or b_or_w == 'b' if b_or_w == 'w': ret = np.zeros((784, 10)) else: ret = np.zeros((10, 1)) for dp in batch: x = dp.T[:784].reshape(784, 1).astype(float) t = np.zeros((10, 1)) t[int(dp.T[784:][0])] = 1 y = helper.sigmoid(x, weights, bias) if b_or_w == 'w': v = np.diagonal(np.dot(np.diagonal(np.dot((y - t), (1 - y).T)).\ reshape(10, 1), y.T)).reshape(10, 1) ret += np.dot(x, v.T) else: # b_or_w == 'b': ret += np.diagonal(np.dot(np.diagonal(np.dot((y - t), (1 - y).T)).\ reshape(10, 1), y.T)).reshape(10, 1) return ret
def output_layer_fp(Input, W, b): # W shape: (1, input_size) # b shape: (1, 1) output = np.matmul(Input, W.T) + b return h.sigmoid(output)
dl_val = DataLoader(ds_val, batch_size=args.batch_size, shuffle=False) train_loader = dl_train valid_loader = dl_test # Building ensemble (average) test_info = [] for num_models in range(args.num_ensemble): print( f"Training ensemble model {num_models} / {args.num_ensemble}") valid_acc, valid_auc, valid_ce, valid_info = model.train( dl_train, dl_val) acc, auc, ce, _test_info = model.test(dl_test) test_info.append(_test_info) print(f"validation auc = {valid_auc}\ntest auc = {auc}") prob = np.asarray([sigmoid(it[0]) for it in test_info]).mean(0) # Convert back to logits scores = np.log(prob / (1 - prob + 1e-10)) labels = test_info[0][1] auc, acc, ce = compute_metrics(scores, labels) print(f"Fold {i}\nAcc: {acc:.2f}\nAuc: {auc:.2f}\nCE: {ce:.2f}") # auc = train_model( # model, # patience, # n_epochs, # train_loader, # valid_loader, # optimizer, # criterion, # device,
def predict(X, parameters): W = parameters["W"] b = parameters["b"] pred = np.floor(sigmoid(np.dot(W, X) + b) + 0.5) return pred
def get_D_boosted_losses(self, boosting_variant, obj='original'): # Define auxiliary placeholds t = tf.placeholder(tf.float32) alpha = tf.placeholder(tf.float32, shape=[self.N]) v = tf.placeholder(tf.float32, shape=[self.N]) # Compute expectation of booster prediction _Df_logits = tf.concat(axis=1, values=self.Df_logits) _Dr_logits = tf.concat(axis=1, values=self.Dr_logits) _Df = tf.cumsum(alpha * _Df_logits, axis=1, exclusive=False) _Dr = tf.cumsum(alpha * _Dr_logits, axis=1, exclusive=False) Df_weighted = v / tf.reduce_sum(v) * _Df Dr_weighted = v / tf.reduce_sum(v) * _Dr self.Df_expected = tf.reduce_sum(Df_weighted, axis=1) self.Dr_expected = tf.reduce_sum(Dr_weighted, axis=1) # Compute auxiliary variable, s # Note: 'q' is 'z' from AdaBoost.OL to avoid confusion with latent variable 'z' in GAN qf = -_Df_logits qr = _Dr_logits q = tf.concat(axis=0, values=[qf, qr]) s_0 = tf.clip_by_value(tf.cumsum(alpha * q, exclusive=True), -4., 4.) s_1 = tf.clip_by_value(tf.cumsum(alpha * q, exclusive=False), -4., 4.) # Compute loss weights w = 1 / (1 + tf.exp(s_0)) # size: batch_size x num_discriminators wf, wr = tf.split(axis=0, num_or_size_splits=2, value=w) wf_split = tf.split(axis=1, num_or_size_splits=self.N, value=wf) wr_split = tf.split(axis=1, num_or_size_splits=self.N, value=wr) # Define v update -- only needed if training generator with expectation of booster prediction wrong_f = sigmoid(Df_weighted) wrong_r = sigmoid(-Dr_weighted) wrong = tf.concat(axis=0, values=[wrong_f, wrong_r]) v_new = tf.reduce_mean(v * tf.exp(wrong), axis=0) # Define alpha update nt = 4 / tf.sqrt(t) alpha_delta = nt * q / (1 + tf.exp(s_1)) alpha_new = tf.reduce_mean(tf.clip_by_value(alpha + alpha_delta, -2, 2), axis=0) # Store auxiliary variable update pairs (t,alpha,v) self.aux_vars = [t, alpha, v] self.aux_vars_new = [t + 1, alpha_new, v_new] # logits --> probabilities self.Df = [sigmoid(logit) for logit in self.Df_logits] self.Dr = [sigmoid(logit) for logit in self.Dr_logits] # Define discriminator losses if obj == 'original': self.D_losses = [tf.reduce_mean(-wr_split[ind] * tf.log(self.Dr[ind]) - wf_split[ind] * tf.log(1 - self.Df[ind])) for ind in range(len(self.Dr))] else: self.D_losses = [tf.reduce_mean(-wr_split[ind] * tf.log(self.Dr[ind]) + wf_split[ind] * tf.log(self.Df[ind])) for ind in range(len(self.Dr))] for ind in range(len(self.Dr)): tf.summary.scalar('D_%d_Loss' % ind, self.D_losses[ind]) # Define minimax objectives for discriminators self.V_D = [tf.reduce_mean(tf.log(self.Dr[ind]) + tf.log(1 - self.Df[ind])) for ind in range(len(self.Dr))]
def test_sigmoid(sigmoid_input): from helper import sigmoid res = sigmoid(sigmoid_input) return res
for j in range(iterations): # At each iteration, we refine the model batch_idx = np.random.randint(0, m - batch_size) batch = X[:, batch_idx:batch_idx + batch_size] score = np.dot(w.T, batch) h = activation(score) delta = (h - y[:, batch_idx:batch_idx + batch_size]) g = alpha / m * np.dot(batch, delta.T) w -= g for i in range(epochs): ##print('Epoch', i) if i % report_interval == 0: score = np.dot(w.T, X) h = hl.sigmoid(score) j = -np.sum(y * np.log(h) + (1 - y) * np.log(1 - h), axis=0) / m js.append(j) error = calculate_error() errors.append(error) print('Error', error, '%') # Annealing alpha over time alpha = alpha_initial - i / epochs * (alpha_initial - alpha_final) # You can comment out two to test the third # batch_gd(alpha) # sgd(alpha) batch_gd(alpha) js = np.array(js)
def train(self, X, Y, step_size=10e-7, epochs=10000): ''' Training function used to train a neural network model to given data ''' X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] K = len(set(Y)) Y = y2indicator(Y, K) Yvalid = y2indicator(Yvalid, K) M, D = X.shape m1=5 #number of neurons for the hidden layer W1=[[random.uniform(0,1) for j in range(m1)] for i in range(D)] #initial weights for first layer W1=np.array(W1) b1=0 #initial first layer bias W2=[[random.uniform(0,1) for j in range(2)] for i in range(m1)] #initial weights for output layer W2=np.array(W2) b2=0 #bias for output layer train_costs = [] valid_costs = [] best_validation_error = 1 errorV=[] errorT=[] for i in range(epochs): #for the given number of epochs print("Epoch: ",i) pY,z=self.forward(X,W1,W2,b1,b2) #forward propogation for train data W2=np.subtract(W2,step_size*(sigmoid((z.transpose()).dot((np.subtract(pY,Y)))))) #updating w2 by gradient descent b2=b2-step_size*(np.subtract(pY,Y)) #updating bias s=0 s=s+sum([b for b in b2]) b2=s z=z.transpose().dot(z) j=((np.subtract(pY,Y)).dot(W2.transpose())).dot((np.subtract(1,(z))).transpose()) #calculating error in activation W1=np.subtract(W1,step_size*(X.transpose()).dot(j)) #using error in activation for back propogation b1=b1-step_size*j #updatin output bias s=0 s=s+sum([b for b in b2]) b1=s Pans=[ [] for k in range(len(pY)) ] for k in range(len(pY)): #normalising the predicted labels according to class labels if pY[k][0] > 0.5: Pans[k].append(0) else: Pans[k].append(1) if pY[k][1] > 0.5: Pans[k].append(0) else: Pans[k].append(1) Pans=np.array(Pans) train_costs.append(sigmoid_cost(Y,Pans)) #error cost for prediction (train) et=error_rate(Y,Pans) #error in predictions errorT.append(et) #error in train set over time Pvalid,zValid=self.forward(Xvalid,W1,W2,b1,b2) #forward propogation for validtion set based on updated weights PVans=[ [] for k in range(len(Pvalid)) ] for k in range(len(Pvalid)): #normalising prediictions based on class labels if Pvalid[k][0] > 0.5: PVans[k].append(0) else: PVans[k].append(1) if Pvalid[k][1] > 0.5: PVans[k].append(0) else: PVans[k].append(1) PVans=np.array(PVans) valid_costs.append(sigmoid_cost(Yvalid,PVans)) #error cost for prediction (validation) eV=error_rate(Yvalid,PVans) #error in predictions if eV < best_validation_error: #finding the best validation error best_w1=W1 best_w2=W2 best_b1=b1 best_b2=b2 best_validation_error=eV errorV.append(eV) #error in validation set over time plt.plot(errorT) #Plotting error in train set over time #plt.plot(errorV) #Uncomment this to plot error in validation set over time print(best_validation_error) return best_w1,best_w2,best_b1,best_b2
def predict(text): return sigmoid(np.dot(get_features(text), weights) + biases)
def update(self): helper.sigmoid()
def ac_f(param): x = np.hstack((param, np.tile(context, (param.shape[0], 1)))) ret = self.model.predict(x).astype(np.float64) if self.model_type is not 'classification': ret = helper.sigmoid(ret) return np.squeeze(ret)
def __call__(self, outputs, labels, protected_classes, inputs, phase): cross_entropy_loss = nn.CrossEntropyLoss()(outputs, labels) assert len(inputs) == len(outputs) # inputs.requires_grad = True # this needs to be done before having outputs # 1 is minority class; 0 is majority assert len(protected_classes[protected_classes == -1]) == 0 # nothing should be -1 # assert len(protected_classes[protected_classes == 1]) > 0 # assert len(protected_classes[protected_classes == 0]) > 0 assert (len(protected_classes[protected_classes == 0]) + len(protected_classes[protected_classes == 1]) ) == len(protected_classes) _, predicted_classes = torch.max(outputs, 1) mask_correct_predictions = predicted_classes == labels mask_minority = mask_correct_predictions & (protected_classes == 1) mask_majority = mask_correct_predictions & (protected_classes == 0) minority_outputs, majority_outputs = outputs[mask_minority], outputs[ mask_majority] assert len(minority_outputs) == torch.sum(mask_minority) and \ len(majority_outputs) == torch.sum(mask_majority) if len(minority_outputs) == 0 or len(majority_outputs) == 0: ce_loss = cross_entropy_loss.item() if phase == 'train': self.regularization_terms_batch_train.append(0.) self.cross_entropy_losses_batch_train.append(ce_loss) self.total_loss_batch_train.append(ce_loss) elif phase == 'test': self.regularization_terms_batch_test.append(0.) self.cross_entropy_losses_batch_test.append(ce_loss) self.total_loss_batch_test.append(ce_loss) return cross_entropy_loss indices_minority = [ coord for coord in zip(*enumerate(torch.argmax(minority_outputs, 1))) ] indices_majority = [ coord for coord in zip(*enumerate(torch.argmax(majority_outputs, 1))) ] assert len(indices_majority) == 2 and len(indices_minority) == 2 output_class_logits_minority = minority_outputs[indices_minority[0], indices_minority[1]] output_class_logits_majority = majority_outputs[indices_majority[0], indices_majority[1]] grad_minority = autograd.grad( outputs=output_class_logits_minority, inputs=inputs, only_inputs=True, retain_graph=True, grad_outputs=torch.ones_like(output_class_logits_minority, device=self.device))[0][mask_minority] grad_majority = autograd.grad( outputs=output_class_logits_majority, inputs=inputs, only_inputs=True, retain_graph=True, grad_outputs=torch.ones_like(output_class_logits_majority, device=self.device))[0][mask_majority] d_approx_minority = torch.abs(output_class_logits_minority).float( ) / torch.norm(grad_minority.view(grad_minority.shape[0], -1), dim=1) d_approx_majority = torch.abs(output_class_logits_majority).float( ) / torch.norm(grad_majority.view(grad_majority.shape[0], -1), dim=1) print(d_approx_minority.shape, d_approx_majority.shape, torch.mean(d_approx_minority).item(), torch.mean(d_approx_majority).item()) if self.probabilities: if self.sigmoid_approx: # This takes a sigmoid approximation regularization_minority = torch.sum( hp.sigmoid(-d_approx_minority + self.tau)).float() / torch.sum(mask_minority) regularization_majority = torch.sum( hp.sigmoid(-d_approx_majority + self.tau)).float() / torch.sum(mask_majority) else: # This does the actual thresholding on tau to calculate exact probabilities # (Highly non-smooth and non-differentiable) regularization_minority = torch.sum( d_approx_minority < self.tau).float() / torch.sum( mask_minority) regularization_majority = torch.sum( d_approx_majority < self.tau).float() / torch.sum( mask_majority) else: regularization_minority = torch.mean( d_approx_minority[d_approx_minority < self.tau]) regularization_majority = torch.mean( d_approx_majority[d_approx_majority < self.tau]) # normalize this since CrossEntropyLoss is also normalized regularization = torch.abs(regularization_minority - regularization_majority) if phase == 'train': self.regularization_terms_batch_train.append(regularization.item()) self.cross_entropy_losses_batch_train.append( cross_entropy_loss.item()) self.total_loss_batch_train.append( (cross_entropy_loss + self.alpha * regularization).item()) self.d_approx_majority_train.extend( [x.item() for x in d_approx_majority]) self.d_approx_minority_train.extend( [x.item() for x in d_approx_minority]) elif phase == 'test': self.regularization_terms_batch_test.append(regularization.item()) self.cross_entropy_losses_batch_test.append( cross_entropy_loss.item()) self.total_loss_batch_test.append( (cross_entropy_loss + self.alpha * regularization).item()) self.d_approx_majority_test.extend( [x.item() for x in d_approx_majority]) self.d_approx_minority_test.extend( [x.item() for x in d_approx_minority]) if self.robust_regularization: ## This is the case when we want to reduce unfairness and also increase robustness # negative sign since we want to maximize these individual robustness measures of majority and minority if self.probabilities and not self.sigmoid_approx: assert regularization_majority >= 0 and regularization_minority >= 0 print( 'CE Loss: {}, regularization: {}, regularization_minority: {}, regularization_majority: {}' .format(cross_entropy_loss, regularization, regularization_minority, regularization_majority)) final_loss = cross_entropy_loss + self.alpha * regularization + \ self.beta * regularization_majority + self.gamma * regularization_minority else: final_loss = cross_entropy_loss + self.alpha * regularization return final_loss
images, labels = mnist.load_mnist() print(images.shape) # Load and ravel the images X = np.array([k.ravel() for k in images])[0:m, :].T # Insert 1 at the beginning of each image X = np.insert(X, 0, 1, axis=0) print(X.shape) n_features = X.shape[0] # Normalize the data X = X / 255 # Initialize the weights w1 = np.random.randn(20, n_features) w2 = np.random.randn(10, 20) print(w1.shape) z1 = w1.dot(X) a1 = hl.sigmoid(z1) z2 = w2.dot(a1) a2 = hl.sigmoid(z2)