def model_gaussian_rand_var_train(self, data, truth):
     mus = {}
     std_dev = {}
     for label in [0,1]:
         sub_data = hw3.get_sub_at_value(data, truth, label)
         mus[label] = hw3.get_mus(sub_data)
         std_dev[label] = hw3.get_std_dev(sub_data)
     self.y_prob = float(sum(truth))/len(truth)
     return [mus, std_dev, float(sum(truth))/len(truth)]
 def model_average_predict(self, data_row, theta=.5):
     """  For each row calculate the probability
     that y is 1 and the probability that y is 0
     P(Y|X) = ( P(X|Y) * P(Y) ) / ( P(X) )
     P(X) = prob_over (probability that x is above average for column)
     P(X|Y) = prob_over_given_c (probability that x is above average when y = c for column)
     P(Y) = prob_y ( probability of y )
     """
     mus = hw3.get_mus(data_row)
     data_cols = hw3.transpose_array(data_row)
     prob_over_given_1 = self.model[0]
     prob_over_given_0 = self.model[1]
     prob_over = self.model[2]
     prob_y1 = self.model[3]
     predict = []
     for r in range(len(data_row)):
         row = data_row[r]
         prob_1 = 1
         prob_0 = 1
         for c in range(len(row)):
             mu = mus[c]
             if row[c] > mu:
                 prob_x1 = prob_over_given_1[c]
                 prob_x0 = prob_over_given_0[c]
                 prob_xover = prob_over[c]
             else:
                 prob_x1 = 1 - prob_over_given_1[c]
                 prob_x0 = 1 - prob_over_given_0[c]
                 prob_xover = 1 - prob_over[c]
             prob_1 = prob_1 * prob_x1 #* prob_y1 #/ prob_xover  #P(X|Y) * P(Y)
             prob_0 = prob_0 * prob_x0 #* (1-prob_y1) #/ prob_xover
             #prob_1 = prob_1 + np.log(prob_x1) + np.log(prob_y1)
             #prob_0 = prob_0 + np.log(prob_x0) + np.log(1-prob_y1)
         prob_1 = prob_1 * prob_y1
         prob_0 = prob_0 * (1 - prob_y1)
         prob_norm = float(prob_1)/(prob_0 + prob_1)
         if prob_norm > theta:
             predict.append(1)
         else:
             predict.append(0)
     return predict
    def model_average_train(self, data_row, truth):
        """ return [prob_over_given_1, prob_over_given_0, prob_y1]
        prob_over_give_x = col1[mu, var, proabality], colx[mu, var, prob] ...
        """
        mus = hw3.get_mus(data_row)
        is_not_spam = hw3.get_sub_at_value(data_row, truth, 0)
        is_spam = hw3.get_sub_at_value(data_row, truth, 1)
        prob_over = get_prob_over(data_row, mus)
        prob_over_given_1 = get_prob_over(is_spam, mus)
        prob_over_given_0 = get_prob_over(is_not_spam, mus)
        l0 = len(prob_over_given_0)
        l1 = len(prob_over_given_1)
        if l1 != l0:
            addx = abs(l1-l0)
            fake_row = [0 for _ in range(addx)]
            if l1 > l0:
                prob_over_given_0 = fake_row
            else:
                prob_over_given_1 = fake_row
        prob_y1 = float(sum(truth))/len(truth)
        self.y_prob = prob_y1

        return [prob_over_given_1, prob_over_given_0, prob_over, prob_y1]
def test_GDA():
    arr = get_test_data()
    print arr
    covar = hw3.get_covar(arr, arr)
    print hw3.GDA(arr, hw3.get_mus(arr), covar)
def test_get_mus():
    arr = get_test_data()
    print arr
    print hw3.get_mus(arr)