def label_test(): from labeler import Labelers model = ObjectClassifier(NZ = False) labelers = Labelers() test = np.ix_(np.arange(4096/3, 4096), np.arange(4096/2)) train = np.ix_( np.arange(4096/3, 4096), np.arange(4096/2, 4096)) haiti_map = map_overlay.haiti_setup() train_map = haiti_map.sub_map(train) test_map = haiti_map.sub_map(test) predictions = np.zeros((labelers.labels.shape[0], test_map.unique_segs(20).shape[0])) agreement = (labelers.labels == labelers.majority_vote())[:,train_map.unique_segs(20)] for i in range(labelers.labels.shape[0]): print labelers.emails[i] new_model = ObjectClassifier(NZ = False) new_model.fit(train_map, agreement[i]) probs = new_model.predict_proba_segs(test_map) predictions[i] = probs print predictions best_labelers = np.argmax(predictions, axis = 0) print best_labelers np.save('predictions.npy', predictions) np.save('best.npy',best_labelers) assert(best_labelers.shape[0] == test_map.unique_segs(20).shape[0]) model_labels = labelers.labels[best_labelers,test_map.unique_segs(20)] np.save('vote.npy', model_labels)
def setup_map_split(self): """Splits haiti map into training portion and testing portion""" self.train = np.ix_(np.arange(4096/3, 4096), np.arange(4096/2)) self.test = np.ix_(np.arange(4096/3, 4096), np.arange(4096/2, 4096)) self.haiti_map = map_overlay.haiti_setup() self.train_map = self.haiti_map.sub_map(self.train) self.test_map = self.haiti_map.sub_map(self.test)
def test(): haiti_map = map_overlay.haiti_setup() labelers = Labelers() labelers.print_stats() l = [] for i,j in zip(labelers.image_count.keys(), np.array(labelers.image_count.values())[:,0]): l.append([i,j]) print i,j np.savetxt('all_emails.csv', np.array(l), delimiter = ',', fmt = '%s')
def main_haiti(): from Xie import EM from labeler import Labelers model = ObjectClassifier(0,1) labelers = Labelers() y = labelers.majority_vote() train = np.ix_(np.arange(4096/3, 4096), np.arange(4096/2)) test = np.ix_( np.arange(4096/3, 4096), np.arange(4096/2, 4096)) haiti_map = map_overlay.haiti_setup() train_map = haiti_map.sub_map(train) test_map = haiti_map.sub_map(test) #em = EM(train_map, labelers) #em.run() #y2 = em.G[:,1]>0.5 g_truth = y[test_map.segmentations[20]] FPRs = [] TPRs = [] for email in labelers.emails: print email a = time.time() labels = labelers.labeler(email)[test_map.segmentations[20]] b = time.time() FPR, TPR = analyze_results.confusion_analytics(g_truth.ravel(), labels.ravel()) c = time.time() FPRs.append(FPR) TPRs.append(TPR) probs = model.fit_and_predict(train_map, test_map, y[train_map.unique_segs(20)]) print analyze_results.FPR_from_FNR(g_truth.ravel(), probs.ravel(), TPR = .95) analyze_results.probability_heat_map(test_map, probs.ravel(), '') fig, _, _, _, _, _ = analyze_results.ROC(g_truth.ravel(), probs.ravel(), 'Classifier') plt.scatter(FPRs, TPRs) names = labelers.emails for i in range(len(FPRs)): plt.annotate(names[i], (FPRs[i], TPRs[i])) fig.savefig('All_ROCs/{}_ROC.png'.format('Classifier'), format='png') plt.show()
def E(self): G = np.tile(self.G, (2, 1, 1)).transpose((1, 2, 0)) n = np.tile(self.n, (2, 1, 1, 1)).transpose((1, 2, 0, 3)) alpha = np.sum(n * G, axis=1) alpha = (alpha.transpose((2, 0, 1)) / np.sum(alpha, axis=2)).transpose((1, 2, 0)) self.alpha = alpha self.p = np.sum(self.G, axis=0) / self.I def run(self, runs=15, v=2): converged = False count = 0 while not converged and count < 50: count += 1 self.E() converged = self.M() if v > 0: print self.p if v > 1: self.show_probs() print self.p for i in zip(self.labelers.emails, list(self.alpha)): print i[0] print i[1] if __name__ == "__main__": haiti_map = map_overlay.haiti_setup().sub_map(np.ix_(np.arange(4096 / 3, 4096), np.arange(4096))) labelers = Labelers() test = EM(haiti_map, labelers) test.run()