Пример #1
0
    def add_new_object(self):
        k = int(self.ui_new_obj.K_value.text())
        values = self.ui_new_obj.newValues.text()
        if self.ui_new_obj.euklidianRadio.isChecked():
            object_class = Metrics.euclidean_distance(values, self.data_frame.df, k)
        elif self.ui_new_obj.manhattanRadio.isChecked():
            object_class = Metrics.manhattan_distance(values, self.data_frame.df, k)
        elif self.ui_new_obj.chebyshevRadio.isChecked():
            object_class = Metrics.chebyshev_distance(values, self.data_frame.df, k)
        elif self.ui_new_obj.mahalanobisRadio.isChecked():
            object_class = Metrics.mahalanobis_distance(values, self.data_frame.df, k)

        self.data_frame.append(values, object_class)
        self.setup_table(self.data_frame.df)
        self.close_add_new_object_dialog()
    def __init__(self, _type, bandits, agent, runs, exp_nums, logdir, k_vec,
                 k_probs):
        self._type = _type
        self.esr_vector = []
        self.esr_probs = []
        self.f1_score = []
        self.f1 = []
        #self.plotter = Plotter()
        self.metrics = Metrics()
        self.k_vec = k_vec
        self.k_probs = k_probs
        self.f1_df = pd.DataFrame()
        self.esrBandit = Agent(0, 10)
        self.logdir = logdir

        for i in range(len(k_vec)):
            self.esrBandit.manual_distribution(k_vec[i], k_probs[i])

        if self._type == "bandit":
            self.bandits = bandits
            self.agent = agent
            self.runs = runs
            self.exp_nums = exp_nums
Пример #3
0
    def classify(self):
        metrics: Metrics = Metrics(len(self.data_frame.df.index), self.data_frame.df)
        if self.ui_classify.euklidianRadio.isChecked():
            if self.ui_classify.checkBoxNormalize.isChecked():
                metrics.classify_euclidean_normalize()
            else:
                metrics.classify_euclidean()
        elif self.ui_classify.manhattanRadio.isChecked():
            if self.ui_classify.checkBoxNormalize.isChecked():
                metrics.classify_manhattan_normalize()
            else:
                metrics.classify_manhattan()
        elif self.ui_classify.chebyshevRadio.isChecked():
            if self.ui_classify.checkBoxNormalize.isChecked():
                metrics.classify_chebyshev_normalize()
            else:
                metrics.classify_chebyshev()
        elif self.ui_classify.mahalanobisRadio.isChecked():
            if self.ui_classify.checkBoxNormalize.isChecked():
                metrics.classify_mahalanobis_normalize()
            else:
                metrics.classify_mahalanobis()

        self.close_classify_dialog()
class Experiment():
    def __init__(self, _type, bandits, agent, runs, exp_nums, logdir, k_vec,
                 k_probs):
        self._type = _type
        self.esr_vector = []
        self.esr_probs = []
        self.f1_score = []
        self.f1 = []
        #self.plotter = Plotter()
        self.metrics = Metrics()
        self.k_vec = k_vec
        self.k_probs = k_probs
        self.f1_df = pd.DataFrame()
        self.esrBandit = Agent(0, 10)
        self.logdir = logdir

        for i in range(len(k_vec)):
            self.esrBandit.manual_distribution(k_vec[i], k_probs[i])

        if self._type == "bandit":
            self.bandits = bandits
            self.agent = agent
            self.runs = runs
            self.exp_nums = exp_nums

    def run(self):
        if self._type == "bandit":
            avg_log = self.logdir + 'average' + '/'
            if not os.path.exists(avg_log):
                os.makedirs(avg_log, exist_ok=True)

            for run in range(self.runs):
                self.run_df = pd.DataFrame()
                start = time.perf_counter()
                run_log = self.logdir + 'run_' + str(run + 1) + '/'

                if not os.path.exists(run_log):
                    os.makedirs(run_log, exist_ok=True)

                for i in range(self.exp_nums):
                    self.esr_vector = []
                    self.esr_probs = []
                    if i == 0:
                        for j in range(len(self.bandits)):
                            _return_ = self.bandits[j].pull_arm()
                            self.agent.update(j, _return_)

                    action = self.agent.select_action()
                    _return_ = self.bandits[action].pull_arm()
                    self.agent.update(action, _return_)
                    esr_index = self.agent.esr_dominance()

                    self.esr_agent = deepcopy(self.agent)
                    self.esr_agent.distribution = np.array(
                        self.esr_agent.distribution)[esr_index]

                    for val in esr_index:
                        self.esr_vector.append(
                            self.agent.distribution[val].get_distribution()[0])
                        self.esr_probs.append(
                            self.agent.distribution[val].get_distribution()[1])

                    #self.f1.append(self.metrics.precision_recall(self.esr_vector, self.esr_probs, self.k_vec, self.k_probs))
                    self.f1.append(
                        self.metrics.pr_kl(self.esr_agent, self.esrBandit))

                self.run_df['run' + str(run)] = self.f1
                self.run_df['mean'] = self.run_df.mean(axis=1)
                self.f1_df['run' + str(run)] = self.f1
                end = time.perf_counter()

                #self.run_df['average'] = self.f1_df.mean(axis=1)
                #print(self.f1_df)
                #self.f1_score = self.f1_df['Average']
                #self.run_df['average'] = np.mean(np.array(self.f1_score).reshape(-1, 10), axis=1)
                self.run_df.to_csv(run_log + "/f1_score.csv", index=False)

                ser = SER(self.k_vec, self.k_probs)
                ser_expectations = ser.expectations()
                ser_pareto_front = ser.pareto_front()

                print("")
                print(
                    '**** Run ' + str(run + 1) + ' - Execution Time: ' +
                    str(round((end - start), 2)) + ' seconds ****', )
                print(str(len(esr_index)) + " distributions in the ESR set")
                print("ESR Vector and Probabilities")
                for a in range(len(self.esr_vector)):
                    print(self.esr_vector[a])
                    print(self.esr_probs[a])
                    print(" ")
                print("")
                print("SER - Pareto Front")
                print("Number of policies on the pareto front : " +
                      str(len(ser_pareto_front)))
                print(ser_pareto_front)
                print("")

                self.plotter = Plotter(self.esr_vector, self.esr_probs,
                                       run_log, self.exp_nums, True, True)
                self.plotter.plot_run()

            self.f1_df['mean'] = self.f1_df.mean(axis=1)
            #self.f1_df['average'] = np.mean(np.array(self.f1_df['mean']).reshape(-1, 10), axis=1)
            self.f1_df.to_csv(avg_log + "/f1_score.csv", index=False)
            self.plotter = Plotter(self.esr_vector, self.esr_probs, avg_log,
                                   self.exp_nums, True, True)
            self.plotter.plot_run()

        return
 def dameray_levenstein_distance_v(cls, sign1, sign2):
     h_1, v_1 = cls.get_lines_picture(sign1)
     h_2, v_2 = cls.get_lines_picture(sign2)
     v_distance = Metrics.dameray_levenstein_distance(v_1, v_2)
     return v_distance
 def dameray_levenstein_distance_sum(cls, sign1, sign2, median_num=17, weights=None):
     h_1, v_1 = cls.get_lines_picture(sign1, median_num)
     h_2, v_2 = cls.get_lines_picture(sign2, median_num)
     h_distance = Metrics.dameray_levenstein_distance(h_1, h_2, weights=weights)
     v_distance = Metrics.dameray_levenstein_distance(v_1, v_2, weights=weights)
     return h_distance + v_distance
Пример #7
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("--project", "-p", help="Project path", required=True)
    parser.add_argument("--stop_words", "-s", help="Path to stopwords file")
    parser.add_argument("--k_topics",
                        "-k",
                        help="Number of topics for given project")
    parser.add_argument(
        "--resolution",
        "-r",
        help=
        "Resolution number parameter in Louvain community detection. A value in range of 0.3 to 1 is advised. A smaller resolution will identify smaller communities and vice versa. By default the whole range is tested and communities for each community saved."
    )
    parser.add_argument(
        "--metrics",
        "-m",
        help=
        "Execute metrics for a given project name after normal parsing and execution (relative path to set root path) (At the current time it does NOT work independently from the identification process)",
        action="store_true")
    parser.add_argument("--draw",
                        "-d",
                        help="Enable plotting of graphs",
                        action="store_true")
    parser.add_argument("--lda-plotting",
                        "-l",
                        help="Enable plotting of LDA topics",
                        action="store_true")
    args = parser.parse_args()

    Settings.DRAW = True if args.draw else False
    Settings.LDA_PLOTTING = True if args.lda_plotting else False
    Settings.K_TOPICS = int(args.k_topics) if args.k_topics else None
    Settings.RESOLUTION = float(args.resolution) if args.resolution else None

    Settings.set_stop_words(args.stop_words)

    print(f"Setting Directory as: {Settings.DIRECTORY}")

    if args.project:
        project_name = str(args.project.split('/')[-1])
        project_path = str(args.project)
        Settings.PROJECT_PATH = project_path
        Settings.PROJECT_NAME = project_name

        # cluster_results = (clusters, modularity, resolution)
        clusters_results = identify_clusters_in_project(
            project_name, project_path)

        metrics = Metrics()
        for cluster in clusters_results:
            Settings.create_id()

            # TODO: Refactor MetricExecutor into ProcessResultOutput and MetricExecutor, currently sharing many responsabilities
            metric_executor = MetricExecutor()
            metric_executor.add_project(project_name, str(cluster[0]))
            metric_executor.dump_to_json_file()

            if args.metrics:
                # TODO: refactor
                metrics.set_metric_executor(metric_executor)
                metrics.set_cluster_results(clusters_results)
                metrics.calculate()

        if args.metrics:
            metrics.save()