def person_follow(self):
        """ Implements person following behavior """
        m = Twist()
        r = rospy.Rate(10)
        print('*********Person Following*********')
        while not rospy.is_shutdown():
            # Doesn't have a POI
            if self.POI[0] is None:
                return self.seeking
            # Checks if neato is close enough to person to stop
            elif abs(self.POI[0]) <= .5:
                m.linear.x = 0
                m.angular.z = 0
                self.vel_pub.publish(m)

            else:
                # Checks if heading of neato is not in the direction of the POI
                if abs(self.POI[1]) > .1:
                    # Continue turning at angular speed based on angle (in rads) left to cover

                    # is it - self.POI?
                    if 0 < self.POI[1] <= math.pi:
                        m.angular.z = sigmoid(self.POI[1]) * 0.6
                    else:
                        m.angular.z = -sigmoid(self.POI[1]) * 0.6
                else:
                    # Drive straight at speed based on distance to drive
                    m.linear.x = self.POI[0] * 0.5
                    m.angular.z = 0

            self.vel_pub.publish(m)

            r.sleep()
示例#2
0
def forward_prop(Batch_Norm, param, x):
    w1, w2, w3, b1, b2, b3 = param['w1'], param['w2'], param['w3'], param[
        'b1'], param['b2'], param['b3']
    # input to hidden layer- pre activation
    a1 = np.dot(x, w1) + b1

    if Batch_Norm == True:
        #send it to batch norm
        a1, param = BatchNorm.forward(a1, param, level=1)

    #hidden layer activation
    h1 = helper.sigmoid(a1)
    #hidden layer to hidden layer - pre-activation
    a2 = np.dot(h1, w2) + b2

    if Batch_Norm == True:
        #send it to batch norm
        a2, param = BatchNorm.forward(a2, param, level=2)

    #hidden layer activation
    h2 = helper.sigmoid(a2)
    #hidden layer to output - pre-activation
    a3 = np.dot(h2, w3) + b3

    if Batch_Norm == True:
        #send it to batch norm
        a3, param = BatchNorm.forward(a3, param, level=3)

    #output layer activation resulting in probability scores
    prob_scores = helper.softmax(a3)
    return prob_scores, h1, h2
示例#3
0
    def get_D_losses(self, obj='original'):
        # logits --> probabilities
        self.Df = [sigmoid(logit) for logit in self.Df_logits]
        self.Dr = [sigmoid(logit) for logit in self.Dr_logits]

        self.D_losses = [tf.reduce_mean(-tf.log(self.Dr[ind]) - tf.log(1 - self.Df[ind]))
                         for ind in range(len(self.Dr))]

        # Define minimax objectives for discriminators
        self.V_D = [tf.reduce_mean(tf.log(self.Dr[ind]) + tf.log(1 - self.Df[ind])) for ind in range(len(self.Dr))]
示例#4
0
文件: evaluate.py 项目: AmmieQi/goat
def compute_link_probabilities(is_dev=True,
                               u_embed=None,
                               v_embed=None,
                               test_edges=None):
    """
    Computes the link
    :param is_dev:
    :param u_embed:
    :param v_embed:
    :param test_edges:
    :return:
    """
    # Adapted from CANE: https://github.com/thunlp/CANE/blob/master/code/auc.py
    if is_dev:
        nodes = list(range(u_embed.shape[0]))
        test_edges = list(zip(range(u_embed.shape[0]),
                              range(v_embed.shape[0])))
    else:
        nodes = list({n for edge in test_edges for n in edge})

    def get_random_index(u, v, lookup=None):
        while True:
            node = np.random.choice(nodes)
            if node != u and node != v:
                if lookup is None:
                    return node
                elif node in lookup:
                    return node

    link_probabilities = []
    for i in range(len(test_edges)):
        if is_dev:
            u = v = i
            j = get_random_index(u=i, v=i)
        else:
            u = test_edges[i][0]
            v = test_edges[i][1]
            if u not in u_embed or v not in u_embed:
                continue
            j = get_random_index(u=u, v=v, lookup=v_embed)

        u_emb = u_embed[u]
        v_emb = v_embed[v]
        j_emb = v_embed[j]

        pos_score = helper.sigmoid(u_emb.dot(v_emb.transpose()).max())
        neg_score = helper.sigmoid(u_emb.dot(j_emb.transpose()).max())

        link_probabilities.append([pos_score, neg_score])

    return np.array(link_probabilities)
示例#5
0
文件: GMAN.py 项目: punkq/GMAN
    def get_G_boosted_loss(self, boosting_variant, mixing, obj='original'):
        # Define lambda placeholder
        self.l = tf.placeholder(tf.float32, name='lambda')

        # Boosting variants
        # boost_prediction: Use booster to predict probabilities
        # boost_training: Use boosting to train, but not predict probabilites
        if boosting_variant == 'boost_prediction':
            # Define generator loss
            if obj == 'original':
                self.G_loss = tf.reduce_mean(
                    tf.log(1 - sigmoid(self.Df_expected)))
            else:
                self.G_loss = tf.reduce_mean(
                    -tf.log(sigmoid(self.Df_expected)))

            # Define minimax objective for generator
            self.V_G = tf.reduce_mean(
                tf.log(self.Dr_expected) +
                tf.log(1 - sigmoid(self.Df_expected)))
        else:
            # Define generator loss
            if obj == 'original':
                self.G_losses = [
                    tf.reduce_mean(tf.log(1 - self.Df[ind]))
                    for ind in range(len(self.Df))
                ]
                sign = -1.
            else:
                self.G_losses = [
                    tf.reduce_mean(-tf.log(self.Df[ind]))
                    for ind in range(len(self.Df))
                ]
                sign = 1.
            _G_losses = [tf.expand_dims(loss, 0) for loss in self.G_losses]
            _G_losses = tf.concat(axis=0, values=_G_losses)
            self.G_loss = mix_prediction(_G_losses,
                                         self.l,
                                         mean_typ=mixing,
                                         weight_typ=self.weight_type,
                                         sign=sign)

            # Define minimax objective for generator
            self.V_G = mix_prediction(self.V_D,
                                      self.l,
                                      mean_typ=mixing,
                                      weight_typ=self.weight_type,
                                      sign=sign)

        tf.summary.scalar('G_loss', self.G_loss)
 def forward(self, X, W, b):
     '''
     Forward propogation function for logistic regression
     '''
     temp = X.dot(W.transpose()) + b
     pY = sigmoid(temp)
     return pY
示例#7
0
def classify(clf, test_data, threshold=0):
    pred_confidence = clf.decision_function(test_data)
    pred = [
        clf.classes_[1] if
        (val if threshold == 0 else sigmoid(val)) > threshold else
        clf.classes_[0] for val in pred_confidence
    ]
    return pred, pred_confidence
示例#8
0
def linear_activation_forward(A_prev, W, b, activation):
    Z = np.dot(W, A_prev) + b
    if activation == "relu":
        A = relu(Z)
    elif activation == "sigmoid":
        A = sigmoid(Z)
    cache = (A_prev, Z, W, b)
    return A, cache
def forward_propagation(X, parameters):
    W = parameters["W"]
    b = parameters["b"]

    Z = np.dot(W, X) + b
    A = sigmoid(Z)

    return A
示例#10
0
 def forward(self,X,W1,W2,b1,b2):
     '''
     Forward propogation function for Neural Network
     '''
     p1=(X).dot(W1)+b1 #Outputs the predictions from layer one
     Z1=sigmoid(p1)       #Sigmoid values from layer one
     p2 = Z1.dot(W2) + b2 #Outputs from hidden layer (layer two)
     pY=softmax(p2) #Final Predictions based on layer two input
     return pY,p1
def propagate(parameters, X, Y):
    W = parameters["W"]
    b = parameters["b"]
    m = X.shape[1]
    A = sigmoid(np.dot(W, X) + b)
    cost = -1 / m * sum(sum(Y * np.log(A) + (1 - Y) * np.log(1 - A)))
    dW = np.mean((A - Y) * X, 1)
    db = np.mean(A - Y)
    grads = {"dW": dW, "db": db}
    return grads, cost
示例#12
0
    def _forward_prop(self, x):
        #set activations equal to the input array
        self._outs[0] = x
        #process each layer using our weights, biases and activation function

        for i in range(1, self.num_layers):
            #calculate each neuron's pre-activation values
            self._inps[i] = (self.weights[i].dot(self._outs[i - 1]) +
                             self.biases[i])
            #activate these values to get that layers outputs
            self._outs[i] = helper.sigmoid(self._inps[i])
示例#13
0
def predict(text):
  score = sigmoid(np.dot(extract_features(text), weights) + biases)
  rv = {
    "score": score,
  }
  if score > 0.5:
    rv['gender'] = "Male"
  else:
    rv['gender'] = "Female"

  return rv
def forward_prop(param, x):
    w1, w2, b1, b2 = param['w1'], param['w2'], param['b1'], param['b2']
    # input to hidden layer- pre activation
    a1 = np.dot(x, w1) + b1
    #hidden layer activation
    h1 = helper.sigmoid(a1)
    #h1 = helper.tanh(a1) #for tanh uncomment this
    #h1 = helper.relu_activation(a1)
    #input to output layer - pre-activation
    a2 = np.dot(h1, w2) + b2
    #output layer activation resulting in probability scores
    prob_scores = helper.softmax(a2)
    return prob_scores, h1
示例#15
0
    def run(self):
        # Given an angle and a distance from the base_link frame, the neato should aim to
        # move in the right direction and close the gap.
        # The function should allow for mid-run recalibration
        r = rospy.Rate(10)
        while not rospy.is_shutdown():
            x = self.POI[0] * math.cos(self.POI[1])
            y = self.POI[0] * math.sin(self.POI[1])

            self.person_marker.pose.position.x = x
            self.person_marker.pose.position.y = y
            self.marker_pub.publish(self.person_marker)
            # Checks if neato is close enough to person to stop
            if abs(self.POI[0]) <= .5:
                self.twist.linear.x = 0
                self.twist.angular.z = 0
                self.pub.publish(self.twist)

            else:
                # Checks if heading of neato is not in the direction of the POI
                if abs(self.POI[1]) > .1:
                    # Continue turning at angular speed based on angle (in rads) left to cover
                    # We use a sigmoid function function to scale the motor speeds to between 0 and 1*0.6
                    if 0 < self.POI[1] <= math.pi:
                        self.twist.angular.z = helper.sigmoid(
                            self.POI[1]) * 0.6
                    else:
                        self.twist.angular.z = -helper.sigmoid(
                            self.POI[1]) * 0.6
                else:
                    # Drive straight at speed based on distance to drive
                    self.twist.linear.x = self.POI[0] * 0.5
                    self.twist.angular.z = 0

            self.pub.publish(self.twist)

            r.sleep()
示例#16
0
文件: MLP.py 项目: ioseff-i/introML
 def feed_forward(self, X ,y):
   '''
   Implementation of the Feedforward
   '''
   Z = {}
   input_layer = X
   for i in range(1,len(self.layer_sizes)):
     Z["Z"+str(i)] = np.dot(self.weights["W"+str(i)],input_layer) + self.bias["b"+str(i)]
     if( i == len(self.hidden_layer_sizes) ):
       self.A["A"+str(i)],self.df["df"+str(i)] = ut.sigmoid(Z["Z"+str(i)])
       
       
     else:
       self.A["A"+str(i)],self.df["df"+str(i)] = ut.tanh(Z["Z"+str(i)])
     input_layer = self.A["A"+str(i)]
   error = ut.entropy_loss(self.A["A"+str(len(self.hidden_layer_sizes)+1)],y)
   return  error, self.A["A"+str(len(self.hidden_layer_sizes)+1)]
示例#17
0
def classify_one_class_svm(clf, test_data, threshold=-1):
    if threshold == -1:
        pred_confidence = clf.predict(test_data)
        """
        leave label is 1
        stay label 0
        """
        """
            predicted value
            -1 will leave
        1 will stay
        """
        pred = [1 if val == 1 else 0 for val in pred_confidence]
        return pred, pred_confidence
    else:
        pred_confidence = clf.decision_function(test_data)
        pred = [
            1 if (val if threshold == 0 else sigmoid(val)) > threshold else 0
            for val in pred_confidence
        ]
        return pred, pred_confidence
示例#18
0
def compute_gradient_entropy(batch, weights, bias, b_or_w):
    """
    Computes the cross-entropy error gradient by summing over gradients
    of all data points in batch.
    """
    assert b_or_w == 'w' or b_or_w == 'b'
    if b_or_w == 'w':
        ret = np.zeros((784, 10))
    else:
        ret = np.zeros((10, 1))
    for dp in batch:
        x = dp.T[:784].reshape(784, 1)
        t = np.zeros((10, 1))
        t[int(dp.T[784:][0])] = 1
        y = helper.sigmoid(x, weights, bias)
        if b_or_w == 'w':
            v = y - t
            ret += np.dot(x, v.T)
        else: # b_or_w == 'b':
            ret += y - t
    return ret
示例#19
0
def compute_gradient_mse(batch, weights, bias, b_or_w):
    """
    Computes the mean squared error gradient by summing over gradients
    of all data points in batch.
    """
    assert b_or_w == 'w' or b_or_w == 'b'
    if b_or_w == 'w':
        ret = np.zeros((784, 10))
    else:
        ret = np.zeros((10, 1))
    for dp in batch:
        x = dp.T[:784].reshape(784, 1).astype(float)
        t = np.zeros((10, 1))
        t[int(dp.T[784:][0])] = 1
        y = helper.sigmoid(x, weights, bias)
        if b_or_w == 'w':
            v = np.diagonal(np.dot(np.diagonal(np.dot((y - t), (1 - y).T)).\
                reshape(10, 1), y.T)).reshape(10, 1)
            ret += np.dot(x, v.T)
        else: # b_or_w == 'b':
            ret += np.diagonal(np.dot(np.diagonal(np.dot((y - t), (1 - y).T)).\
                reshape(10, 1), y.T)).reshape(10, 1)
    return ret
def output_layer_fp(Input, W, b):
    # W shape: (1, input_size)
    # b shape: (1, 1)

    output = np.matmul(Input, W.T) + b
    return h.sigmoid(output)
示例#21
0
        dl_val = DataLoader(ds_val, batch_size=args.batch_size, shuffle=False)

        train_loader = dl_train
        valid_loader = dl_test

        # Building ensemble (average)
        test_info = []
        for num_models in range(args.num_ensemble):
            print(
                f"Training ensemble model {num_models} / {args.num_ensemble}")
            valid_acc, valid_auc, valid_ce, valid_info = model.train(
                dl_train, dl_val)
            acc, auc, ce, _test_info = model.test(dl_test)
            test_info.append(_test_info)
            print(f"validation auc = {valid_auc}\ntest auc = {auc}")
        prob = np.asarray([sigmoid(it[0]) for it in test_info]).mean(0)
        # Convert back to logits
        scores = np.log(prob / (1 - prob + 1e-10))
        labels = test_info[0][1]
        auc, acc, ce = compute_metrics(scores, labels)
        print(f"Fold {i}\nAcc: {acc:.2f}\nAuc: {auc:.2f}\nCE: {ce:.2f}")

        # auc = train_model(
        #     model,
        #     patience,
        #     n_epochs,
        #     train_loader,
        #     valid_loader,
        #     optimizer,
        #     criterion,
        #     device,
def predict(X, parameters):
    W = parameters["W"]
    b = parameters["b"]
    pred = np.floor(sigmoid(np.dot(W, X) + b) + 0.5)
    return pred
示例#23
0
    def get_D_boosted_losses(self, boosting_variant, obj='original'):
        # Define auxiliary placeholds
        t = tf.placeholder(tf.float32)
        alpha = tf.placeholder(tf.float32, shape=[self.N])
        v = tf.placeholder(tf.float32, shape=[self.N])

        # Compute expectation of booster prediction
        _Df_logits = tf.concat(axis=1, values=self.Df_logits)
        _Dr_logits = tf.concat(axis=1, values=self.Dr_logits)
        _Df = tf.cumsum(alpha * _Df_logits, axis=1, exclusive=False)
        _Dr = tf.cumsum(alpha * _Dr_logits, axis=1, exclusive=False)
        Df_weighted = v / tf.reduce_sum(v) * _Df
        Dr_weighted = v / tf.reduce_sum(v) * _Dr
        self.Df_expected = tf.reduce_sum(Df_weighted, axis=1)
        self.Dr_expected = tf.reduce_sum(Dr_weighted, axis=1)

        # Compute auxiliary variable, s
        # Note: 'q' is 'z' from AdaBoost.OL to avoid confusion with latent variable 'z' in GAN
        qf = -_Df_logits
        qr = _Dr_logits
        q = tf.concat(axis=0, values=[qf, qr])
        s_0 = tf.clip_by_value(tf.cumsum(alpha * q, exclusive=True), -4., 4.)
        s_1 = tf.clip_by_value(tf.cumsum(alpha * q, exclusive=False), -4., 4.)

        # Compute loss weights
        w = 1 / (1 + tf.exp(s_0))  # size: batch_size x num_discriminators
        wf, wr = tf.split(axis=0, num_or_size_splits=2, value=w)
        wf_split = tf.split(axis=1, num_or_size_splits=self.N, value=wf)
        wr_split = tf.split(axis=1, num_or_size_splits=self.N, value=wr)

        # Define v update -- only needed if training generator with expectation of booster prediction
        wrong_f = sigmoid(Df_weighted)
        wrong_r = sigmoid(-Dr_weighted)
        wrong = tf.concat(axis=0, values=[wrong_f, wrong_r])
        v_new = tf.reduce_mean(v * tf.exp(wrong), axis=0)

        # Define alpha update
        nt = 4 / tf.sqrt(t)
        alpha_delta = nt * q / (1 + tf.exp(s_1))
        alpha_new = tf.reduce_mean(tf.clip_by_value(alpha + alpha_delta, -2, 2), axis=0)

        # Store auxiliary variable update pairs (t,alpha,v)
        self.aux_vars = [t, alpha, v]
        self.aux_vars_new = [t + 1, alpha_new, v_new]

        # logits --> probabilities
        self.Df = [sigmoid(logit) for logit in self.Df_logits]
        self.Dr = [sigmoid(logit) for logit in self.Dr_logits]

        # Define discriminator losses
        if obj == 'original':
            self.D_losses = [tf.reduce_mean(-wr_split[ind] * tf.log(self.Dr[ind])
                                            - wf_split[ind] * tf.log(1 - self.Df[ind]))
                             for ind in range(len(self.Dr))]
        else:
            self.D_losses = [tf.reduce_mean(-wr_split[ind] * tf.log(self.Dr[ind])
                                            + wf_split[ind] * tf.log(self.Df[ind]))
                             for ind in range(len(self.Dr))]
        for ind in range(len(self.Dr)):
            tf.summary.scalar('D_%d_Loss' % ind, self.D_losses[ind])

        # Define minimax objectives for discriminators
        self.V_D = [tf.reduce_mean(tf.log(self.Dr[ind]) + tf.log(1 - self.Df[ind])) for ind in range(len(self.Dr))]
示例#24
0
def test_sigmoid(sigmoid_input):
    from helper import sigmoid
    res = sigmoid(sigmoid_input)

    return res
    for j in range(iterations):
        # At each iteration, we refine the model
        batch_idx = np.random.randint(0, m - batch_size)
        batch = X[:, batch_idx:batch_idx + batch_size]
        score = np.dot(w.T, batch)
        h = activation(score)
        delta = (h - y[:, batch_idx:batch_idx + batch_size])
        g = alpha / m * np.dot(batch, delta.T)
        w -= g


for i in range(epochs):
    ##print('Epoch', i)
    if i % report_interval == 0:
        score = np.dot(w.T, X)
        h = hl.sigmoid(score)
        j = -np.sum(y * np.log(h) + (1 - y) * np.log(1 - h), axis=0) / m
        js.append(j)
        error = calculate_error()
        errors.append(error)
        print('Error', error, '%')

    # Annealing alpha over time
    alpha = alpha_initial - i / epochs * (alpha_initial - alpha_final)

    # You can comment out two to test the third
    # batch_gd(alpha)
    # sgd(alpha)
    batch_gd(alpha)

js = np.array(js)
示例#26
0
 def train(self, X, Y, step_size=10e-7, epochs=10000):
     '''
     Training function used to train a neural network model to given data
     '''
     X, Y = shuffle(X, Y)
     Xvalid, Yvalid = X[-1000:], Y[-1000:]
     X, Y = X[:-1000], Y[:-1000]   
     K = len(set(Y))        
     Y = y2indicator(Y, K)
     Yvalid = y2indicator(Yvalid, K)
     M, D = X.shape
     m1=5 #number of neurons for the hidden layer
     W1=[[random.uniform(0,1) for j in range(m1)] for i in range(D)] #initial weights for first layer 
     W1=np.array(W1)
     b1=0 #initial first layer bias
     W2=[[random.uniform(0,1) for j in range(2)] for i in range(m1)] #initial weights for output layer
     W2=np.array(W2)
     b2=0 #bias for output layer
     train_costs = []
     valid_costs = []
     best_validation_error = 1
     errorV=[] 
     errorT=[]
     for i in range(epochs): #for the given number of epochs
         print("Epoch: ",i)
         pY,z=self.forward(X,W1,W2,b1,b2) #forward propogation for train data
         W2=np.subtract(W2,step_size*(sigmoid((z.transpose()).dot((np.subtract(pY,Y)))))) #updating w2 by gradient descent
         b2=b2-step_size*(np.subtract(pY,Y)) #updating bias
         s=0
         s=s+sum([b for b in b2])
         b2=s
         z=z.transpose().dot(z)
         j=((np.subtract(pY,Y)).dot(W2.transpose())).dot((np.subtract(1,(z))).transpose())     #calculating error in activation        
         W1=np.subtract(W1,step_size*(X.transpose()).dot(j)) #using error in activation for back propogation
         b1=b1-step_size*j #updatin output bias
         s=0
         s=s+sum([b for b in b2])
         b1=s
         
         Pans=[  [] for k in range(len(pY)) ]
         for k in range(len(pY)): #normalising the predicted labels according to class labels
             if pY[k][0] > 0.5:
                 Pans[k].append(0)
             else:
                 Pans[k].append(1)
             if pY[k][1] > 0.5:
                 Pans[k].append(0)
             else:
                 Pans[k].append(1)
         Pans=np.array(Pans)
         train_costs.append(sigmoid_cost(Y,Pans)) #error cost for prediction (train)
         et=error_rate(Y,Pans) #error in predictions
         errorT.append(et) #error in train set over time
         
         Pvalid,zValid=self.forward(Xvalid,W1,W2,b1,b2) #forward propogation for validtion set based on updated weights
         PVans=[  [] for k in range(len(Pvalid)) ]
         for k in range(len(Pvalid)): #normalising prediictions based on class labels
             if Pvalid[k][0] > 0.5:
                 PVans[k].append(0)
             else:
                 PVans[k].append(1)
             if Pvalid[k][1] > 0.5:
                 PVans[k].append(0)
             else:
                 PVans[k].append(1)
         PVans=np.array(PVans)
         valid_costs.append(sigmoid_cost(Yvalid,PVans)) #error cost for prediction (validation)
         eV=error_rate(Yvalid,PVans) #error in predictions
         if eV < best_validation_error: #finding the best validation error
                     best_w1=W1
                     best_w2=W2
                     best_b1=b1
                     best_b2=b2
                     best_validation_error=eV
         errorV.append(eV) #error in validation set over time
     plt.plot(errorT) #Plotting error in train set over time
     #plt.plot(errorV) #Uncomment this to plot error in validation set over time
     print(best_validation_error)
     return best_w1,best_w2,best_b1,best_b2
示例#27
0
def predict(text):
    return sigmoid(np.dot(get_features(text), weights) + biases)
示例#28
0
 def update(self):
     helper.sigmoid()
示例#29
0
 def ac_f(param):
     x = np.hstack((param, np.tile(context, (param.shape[0], 1))))
     ret = self.model.predict(x).astype(np.float64)
     if self.model_type is not 'classification':
         ret = helper.sigmoid(ret)
     return np.squeeze(ret)
示例#30
0
    def __call__(self, outputs, labels, protected_classes, inputs, phase):
        cross_entropy_loss = nn.CrossEntropyLoss()(outputs, labels)
        assert len(inputs) == len(outputs)
        # inputs.requires_grad = True # this needs to be done before having outputs
        # 1 is minority class; 0 is majority
        assert len(protected_classes[protected_classes ==
                                     -1]) == 0  # nothing should be -1
        # assert len(protected_classes[protected_classes == 1]) > 0
        # assert len(protected_classes[protected_classes == 0]) > 0
        assert (len(protected_classes[protected_classes == 0]) +
                len(protected_classes[protected_classes == 1])
                ) == len(protected_classes)

        _, predicted_classes = torch.max(outputs, 1)
        mask_correct_predictions = predicted_classes == labels

        mask_minority = mask_correct_predictions & (protected_classes == 1)
        mask_majority = mask_correct_predictions & (protected_classes == 0)
        minority_outputs, majority_outputs = outputs[mask_minority], outputs[
            mask_majority]

        assert len(minority_outputs) == torch.sum(mask_minority) and \
            len(majority_outputs) == torch.sum(mask_majority)

        if len(minority_outputs) == 0 or len(majority_outputs) == 0:
            ce_loss = cross_entropy_loss.item()
            if phase == 'train':
                self.regularization_terms_batch_train.append(0.)
                self.cross_entropy_losses_batch_train.append(ce_loss)
                self.total_loss_batch_train.append(ce_loss)
            elif phase == 'test':
                self.regularization_terms_batch_test.append(0.)
                self.cross_entropy_losses_batch_test.append(ce_loss)
                self.total_loss_batch_test.append(ce_loss)
            return cross_entropy_loss

        indices_minority = [
            coord
            for coord in zip(*enumerate(torch.argmax(minority_outputs, 1)))
        ]
        indices_majority = [
            coord
            for coord in zip(*enumerate(torch.argmax(majority_outputs, 1)))
        ]

        assert len(indices_majority) == 2 and len(indices_minority) == 2

        output_class_logits_minority = minority_outputs[indices_minority[0],
                                                        indices_minority[1]]
        output_class_logits_majority = majority_outputs[indices_majority[0],
                                                        indices_majority[1]]

        grad_minority = autograd.grad(
            outputs=output_class_logits_minority,
            inputs=inputs,
            only_inputs=True,
            retain_graph=True,
            grad_outputs=torch.ones_like(output_class_logits_minority,
                                         device=self.device))[0][mask_minority]
        grad_majority = autograd.grad(
            outputs=output_class_logits_majority,
            inputs=inputs,
            only_inputs=True,
            retain_graph=True,
            grad_outputs=torch.ones_like(output_class_logits_majority,
                                         device=self.device))[0][mask_majority]

        d_approx_minority = torch.abs(output_class_logits_minority).float(
        ) / torch.norm(grad_minority.view(grad_minority.shape[0], -1), dim=1)
        d_approx_majority = torch.abs(output_class_logits_majority).float(
        ) / torch.norm(grad_majority.view(grad_majority.shape[0], -1), dim=1)

        print(d_approx_minority.shape, d_approx_majority.shape,
              torch.mean(d_approx_minority).item(),
              torch.mean(d_approx_majority).item())

        if self.probabilities:
            if self.sigmoid_approx:
                # This takes a sigmoid approximation
                regularization_minority = torch.sum(
                    hp.sigmoid(-d_approx_minority +
                               self.tau)).float() / torch.sum(mask_minority)
                regularization_majority = torch.sum(
                    hp.sigmoid(-d_approx_majority +
                               self.tau)).float() / torch.sum(mask_majority)
            else:
                # This does the actual thresholding on tau to calculate exact probabilities
                # (Highly non-smooth and non-differentiable)
                regularization_minority = torch.sum(
                    d_approx_minority < self.tau).float() / torch.sum(
                        mask_minority)
                regularization_majority = torch.sum(
                    d_approx_majority < self.tau).float() / torch.sum(
                        mask_majority)
        else:
            regularization_minority = torch.mean(
                d_approx_minority[d_approx_minority < self.tau])
            regularization_majority = torch.mean(
                d_approx_majority[d_approx_majority < self.tau])

        # normalize this since CrossEntropyLoss is also normalized
        regularization = torch.abs(regularization_minority -
                                   regularization_majority)

        if phase == 'train':
            self.regularization_terms_batch_train.append(regularization.item())
            self.cross_entropy_losses_batch_train.append(
                cross_entropy_loss.item())
            self.total_loss_batch_train.append(
                (cross_entropy_loss + self.alpha * regularization).item())
            self.d_approx_majority_train.extend(
                [x.item() for x in d_approx_majority])
            self.d_approx_minority_train.extend(
                [x.item() for x in d_approx_minority])
        elif phase == 'test':
            self.regularization_terms_batch_test.append(regularization.item())
            self.cross_entropy_losses_batch_test.append(
                cross_entropy_loss.item())
            self.total_loss_batch_test.append(
                (cross_entropy_loss + self.alpha * regularization).item())
            self.d_approx_majority_test.extend(
                [x.item() for x in d_approx_majority])
            self.d_approx_minority_test.extend(
                [x.item() for x in d_approx_minority])

        if self.robust_regularization:
            ## This is the case when we want to reduce unfairness and also increase robustness
            # negative sign since we want to maximize these individual robustness measures of majority and minority

            if self.probabilities and not self.sigmoid_approx:
                assert regularization_majority >= 0 and regularization_minority >= 0

            print(
                'CE Loss: {}, regularization: {}, regularization_minority: {}, regularization_majority: {}'
                .format(cross_entropy_loss, regularization,
                        regularization_minority, regularization_majority))

            final_loss = cross_entropy_loss + self.alpha * regularization + \
                self.beta * regularization_majority + self.gamma * regularization_minority

        else:
            final_loss = cross_entropy_loss + self.alpha * regularization

        return final_loss
images, labels = mnist.load_mnist()
print(images.shape)

# Load and ravel the images
X = np.array([k.ravel() for k in images])[0:m, :].T

# Insert 1 at the beginning of each image
X = np.insert(X, 0, 1, axis=0)
print(X.shape)
n_features = X.shape[0]

# Normalize the data
X = X / 255

# Initialize the weights
w1 = np.random.randn(20, n_features)
w2 = np.random.randn(10, 20)
print(w1.shape)

z1 = w1.dot(X)
a1 = hl.sigmoid(z1)
z2 = w2.dot(a1)
a2 = hl.sigmoid(z2)