def evaluate(self, alpha, test_sets):
     results = {}
     confusion_matrix = {}
     # print('-------------------------------')
     # print('\t Right \t Wrong \t Rate\n')
     t = 0
     for c in self.cl:
         results[c] = {'right': 0, 'wrong': 0}
         # init condusion matrix
         confusion_matrix[c] = {}
         for c_pred in self.cl:
             confusion_matrix[c][c_pred] = 0
         for file_path in test_sets[c]:
             data = np.load(file_path)
             data_A, data_B = tools.extract_individual_data(data)
             obs_data = tools.compute_observables(data_A, data_B)
             # obs_data = tools.shuffle_data(obs_data)
             bins = {}
             for o in self.obs:
                 bins[o] = tools.find_bins(o, obs_data[o])
             mean_p = self.compute_probabilities(bins, alpha)
             # t += i
             class_max = max(mean_p.items(), key=operator.itemgetter(1))[0]
             confusion_matrix[c][class_max] += 1
             if class_max == c:
                 results[c]['right'] += 1
             else:
                 results[c]['wrong'] += 1
             rate = results[c]['right'] / (results[c]['right'] +
                                           results[c]['wrong'])
         # print('{}\t {}\t {}\t {}'.format(c, results[c]['right'], results[c]['wrong'], rate))
     # tools.print_confusion_matrix(self.cl, confusion_matrix)
     # print(t)
     return results
    def eval_global(self, measures, test_sets):
        histograms, testset_pdfs = {}, {}

        dist_vals, conf_mats, results = {}, {}, {}
        for m in measures:
            dist_vals[m], conf_mats[m], results[m] = {}, {}, {}
            for c in self.cl:
                dist_vals[m][c], conf_mats[m][c], results[m][c] = {}, {}, {}
                for o in self.obs:
                    results[m][c][o] = {'right': 0, 'wrong': 0}

        for c in self.cl:
            histograms[c], testset_pdfs[c] = {}, {}

            for m in measures:
                for c_pred in self.cl:
                    conf_mats[m][c][c_pred] = 0

                for file_path in test_sets[c]:
                    histograms[c][file_path], testset_pdfs[c][
                        file_path] = {}, {}
                    dist_vals[m][c][file_path] = {}

                    data = np.load(file_path)
                    data_A, data_B = tools.extract_individual_data(data)
                    obs_data = tools.compute_observables(data_A, data_B)

                    for o in self.obs:
                        dist_vals[m][c][file_path][o] = {}

                        histograms[c][file_path][
                            o] = tools.initialize_histogram(o)
                        histograms[c][file_path][o] = tools.compute_histogram(
                            o, obs_data[o])
                        testset_pdfs[c][file_path][o] = tools.compute_pdf(
                            o, histograms[c][file_path][o])

                        for c_query in self.cl:
                            dist_vals[m][c][file_path][o][
                                c_query] = self.get_dists(
                                    m, testset_pdfs[c][file_path][o],
                                    histograms[c][file_path][o],
                                    self.pdfs[o][c_query])

                        c_pred = min(dist_vals[m][c][file_path][o].items(),
                                     key=operator.itemgetter(1))[0]
                        conf_mats[m][c][c_pred] += 1
                        if c_pred == c:
                            results[m][c][o]['right'] += 1
                        else:
                            results[m][c][o]['wrong'] += 1
        return results
    def train(self, train_sets):
        histograms, self.joint_pdfs, jaccard_dist = {}, {}, {}
        # initialize empty histograms
        for o in self.obs:
            histograms[o], self.pdfs[o] = {}, {}
            for c in self.cl:
                histograms[o][c] = tools.initialize_histogram(o)
        # compute histograms for each classes
        obs_data_cum = {}
        for c in self.cl:
            obs_data_cum[c] = {}
            for o in self.obs:
                obs_data_cum[c][o] = []
            for file_path in train_sets[c]:
                data = np.load(file_path)
                data_A, data_B = tools.extract_individual_data(data)
                obs_data = tools.compute_observables(data_A, data_B)
                for o in self.obs:
                    histograms[o][c] += tools.compute_histogram(o, obs_data[o])
                    obs_data_cum[c][o].extend(obs_data[o])

        for o in self.obs:
            for c in self.cl:
                self.pdfs[o][c] = tools.compute_pdf(o, histograms[o][c])

        for c in self.cl:
            self.joint_pdfs[c], jaccard_dist[c] = {}, {}
            for o1 in self.obs:
                self.joint_pdfs[c][o1], jaccard_dist[c][o1] = {}, {}
                for o2 in self.obs:
                    self.joint_pdfs[c][o1][o2] = tools.compute_joint_pdf(
                        tools.compute_joint_histogram(o1, obs_data_cum[c][o1],
                                                      o2, obs_data_cum[c][o2]))
                    joint_ent = tools.get_joint_ent(self.joint_pdfs[c][o1][o2],
                                                    self.pdfs[o1][c],
                                                    self.pdfs[o2][c])
                    mutual_inf = tools.get_mutual_inf(
                        self.joint_pdfs[c][o1][o2], self.pdfs[o1][c],
                        self.pdfs[o2][c])
                    # i should not need th follwoign wheck but all is nan
                    if mutual_inf is not 0:
                        jaccard_dist[c][o1][o2] = (joint_ent -
                                                   mutual_inf) / joint_ent

        return jaccard_dist
def get_jaccard_dist(train_fnames):

    histograms, pdfs, joint_pdfs, jaccard_dist = {}, {}, {}, {}
    # initialize empty histograms
    for o in preferences.OBSERVABLES:
        histograms[o], pdfs[o] = {}, {}
        for c in preferences.CLASSES:
            histograms[o][c] = tools.initialize_histogram(o)
    # compute histograms for each classes
    obs_data_cum = {}
    for c in preferences.CLASSES:
        obs_data_cum[c] = {}
        for o in preferences.OBSERVABLES:
            obs_data_cum[c][o] = []
        for file_path in train_fnames[c]:
            data = np.load(file_path)
            data_A, data_B = tools.extract_individual_data(data)
            obs_data = tools.compute_observables(data_A, data_B)
            for o in preferences.OBSERVABLES:
                histograms[o][c] += tools.compute_histogram(o, obs_data[o])
                obs_data_cum[c][o].extend(obs_data[o])

    for o in preferences.OBSERVABLES:
        for c in preferences.CLASSES:
            pdfs[o][c] = tools.compute_pdf(o, histograms[o][c])

    for c in preferences.CLASSES:
        joint_pdfs[c], jaccard_dist[c] = {}, {}
        for o1 in preferences.OBSERVABLES:
            joint_pdfs[c][o1], jaccard_dist[c][o1] = {}, {}
            for o2 in preferences.OBSERVABLES:
                joint_pdfs[c][o1][o2] = tools.compute_joint_pdf(
                    tools.compute_joint_histogram(o1, obs_data_cum[c][o1], o2,
                                                  obs_data_cum[c][o2]))
                joint_ent = tools.get_joint_ent(joint_pdfs[c][o1][o2],
                                                pdfs[o1][c], pdfs[o2][c])
                mutual_inf = tools.get_mutual_inf(joint_pdfs[c][o1][o2],
                                                  pdfs[o1][c], pdfs[o2][c])
                # i should not need th follwoign wheck but all is nan
                if mutual_inf is not 0:
                    jaccard_dist[c][o1][o2] = (joint_ent -
                                               mutual_inf) / joint_ent

    return jaccard_dist
 def evaluate_distance(self, alpha, test_sets):
     results = {}
     confusion_matrix = {}
     # print('-------------------------------')
     # print('\t Right \t Wrong \t Rate\n')
     t = 0
     for c in self.cl:
         results[c] = {'right': 0, 'wrong': 0}
         # init condusion matrix
         confusion_matrix[c] = {}
         for c_pred in self.cl:
             confusion_matrix[c][c_pred] = 0
         for file_path in test_sets[c]:
             data = np.load(file_path)
             pdfs, distances = {}, {}
             # initialize distances
             for c_pred in self.cl:
                 distances[c_pred] = 0
             data_A, data_B = tools.extract_individual_data(data)
             obs_data = tools.compute_observables(data_A, data_B)
             for o in self.obs:
                 pdfs[o] = tools.compute_pdf(
                     o, tools.compute_histogram(o, obs_data[o]))
                 for c_pred in self.cl:
                     distances[c_pred] += stats.energy_distance(
                         pdfs[o], self.pdfs[o][c_pred])
             # t += i
             class_max = min(distances.items(),
                             key=operator.itemgetter(1))[0]
             confusion_matrix[c][class_max] += 1
             if class_max == c:
                 results[c]['right'] += 1
             else:
                 results[c]['wrong'] += 1
             rate = results[c]['right'] / (results[c]['right'] +
                                           results[c]['wrong'])
         # print('{}\t {}\t {}\t {}'.format(c, results[c]['right'], results[c]['wrong'], rate))
     # tools.print_confusion_matrix(self.cl, confusion_matrix)
     # print(t)
     return results
import glob
from os import remove
from model import tools
import numpy as np
import math
import matplotlib.pyplot as plt

if __name__ == "__main__":
    min_x, max_x = 0, 0
    min_y, max_y = 0, 0
    for file_path in glob.iglob('data/**/*.dat', recursive=True):
        if 'threshold' in file_path:
            data = np.load(file_path)
            data_A, data_B = tools.extract_individual_data(data)
            xA, yA = data_A[:, 2], data_A[:, 3]
            xB, yB = data_B[:, 2], data_B[:, 3]
            min_xA, min_xB = min(xA), min(xB)
            min_yA, min_yB = min(yA), min(yB)
            if min_xA < min_x:
                min_x = min_xA
            if min_xB < min_x:
                min_x = min_xB
            if min_yA < min_y:
                min_y = min_yA
            if min_yB < min_y:
                min_y = min_yB
            max_xA, max_xB = max(xA), max(xB)
            max_yA, max_yB = max(yA), max(yB)
            if max_xA > max_x:
                max_x = max_xA
            if max_xB > max_x: