Exemplo n.º 1
0
    def compute(self):
        category_chain_list = []
        category_chain_creature_list = []

        if self._direction == "asc":
            label_set = set(self._labels)
            for seed_label in label_set:
                category_chain = []
                category_chain_creature = []
                category_chain_list.append(category_chain)
                category_chain_creature_list.append(category_chain_creature)
                category_chain.append(seed_label)
                category_chain_creature.append(None)
                test_label_set = set()
                test_label_set.add(seed_label)
                for cycle in range(1, len(label_set)):
                    best_category = None
                    best_creature = None
                    for add_category in label_set:
                        if not (add_category in test_label_set):
                            exp_set = set(test_label_set)
                            exp_set.add(add_category)
                            creature = self._creature_type(
                                self._extract_data_matrix(exp_set),
                                self._extract_labels(exp_set),
                                IOUtil.get_full_features(
                                    len(self._data_matrix[0])),
                                self._params_list)
                            creature.compute()
                            if best_creature is None:
                                best_creature = creature
                                best_category = add_category
                            elif creature.get_best_params_result(
                            ).avg_fscore > best_creature.get_best_params_result(
                            ).avg_fscore:
                                best_creature = creature
                                best_category = add_category
                    category_chain.append(best_category)
                    category_chain_creature.append(best_creature)
                    test_label_set.add(best_category)
            for i in range(len(category_chain_list)):
                if i > 0:
                    print()
                category_chain = category_chain_list[i]
                print(repr(category_chain[0]) + "\t1.0")
                for k in range(1, len(category_chain)):
                    print(
                        repr(category_chain[k]) + "\t" +
                        repr(category_chain_creature_list[i]
                             [k].get_best_params_result().avg_fscore))
        else:
            category_chain = []
            category_chain_creature = []
            label_set = set(self._labels)
            test_label_set = set(label_set)
            base_creature = self._creature_type(
                self._data_matrix, self._labels,
                IOUtil.get_full_features(len(self._data_matrix[0])),
                self._params_list)
            base_creature.compute()
            category_chain.append(None)
            category_chain_creature.append(base_creature)
            for cycle in range(1, len(label_set) - 1):
                best_category = None
                best_creature = None
                for remove_category in label_set:
                    if remove_category in test_label_set:
                        exp_set = set(test_label_set)
                        exp_set.remove(remove_category)
                        creature = self._creature_type(
                            self._extract_data_matrix(exp_set),
                            self._extract_labels(exp_set),
                            IOUtil.get_full_features(len(
                                self._data_matrix[0])), self._params_list)
                        creature.compute()
                        if best_creature is None:
                            best_creature = creature
                            best_category = remove_category
                        elif creature.get_best_params_result(
                        ).avg_fscore > best_creature.get_best_params_result(
                        ).avg_fscore:
                            best_creature = creature
                            best_category = remove_category
                category_chain.append(best_category)
                category_chain_creature.append(best_creature)
                test_label_set.remove(best_category)
            remaining_category = None
            for i in test_label_set:
                remaining_category = i

            print("All\t" + repr(category_chain_creature[0].
                                 get_best_params_result().avg_fscore))
            for k in range(1, len(category_chain)):
                print(
                    repr(category_chain[k]) + "\t" +
                    repr(category_chain_creature[k].get_best_params_result().
                         avg_fscore))
            print(repr(remaining_category) + "\t1.0")
 def compute(self):
     # Initialize First population
     self._population = [None for x in range(self._max_population)]
     # Always include a creature with all features
     self._population[0] = self._creature_type(self._data_matrix, self._labels, IOUtil.get_full_features(len(self._data_matrix[0])), self._params_list)
     # Include preset
     max_preset = len(self._feature_preset_list)
     if (max_preset > self._max_population-1):
         raise Exception("Preset plus complete 1111-vector exceeds max_population")
     for i in range(1, 1+max_preset):
         self._population[i] = self._creature_type(self._data_matrix, self._labels, IOUtil.get_feature_from_string(self._feature_preset_list[i-1]), self._params_list)
     # Fill up with random creatures
     for i in range(1+max_preset, self._max_population):                        
         self._population[i] = self._creature_type(self._data_matrix, self._labels, self._create_random_features(density=random.uniform(0,1)), self._params_list)
             
     for turn in range(0, self._max_turns):
         print("Turn "+repr(turn+1)+"/"+repr(self._max_turns))
         lock = threading.Lock()
         threads = []   
         sleep_event = threading.Event()                     
         for creature in self._population:                
             if not creature.is_computed():
                 # Make sure we have room for another thread
                 while True:
                     lock.acquire()
                     active_threads = len(threads)
                     lock.release()                                    
                     if active_threads < self._max_threads:
                         break                    
                     else:
                         sleep_event.wait(1)
                 thread = CreatureThread(creature, lock, threads, sleep_event)
                 threads.append(thread)
                 thread.start()                    
         # Wait for pending threads to complete                        
         while True:
             lock.acquire()
             active_threads = len(threads)
             lock.release()                                    
             if active_threads == 0:
                 break                    
             else:
                 sleep_event.wait(1)
                     
         sys.stdout.flush()
         # Sort and handle population
         if self._optimize_features == 'min':
             self._population = sorted(self._population, key=functools.cmp_to_key(self.compare_creatures_minimize_features))
         else:
             self._population = sorted(self._population, key=functools.cmp_to_key(self.compare_creatures_maximize_features))
         self._population[len(self._population)-1].pretty_print()
         self._best_performer_params_result_history.append(self._population[len(self._population)-1].get_best_params_result())
         for i in range(len(self._population[len(self._population)-1].get_best_params_result().features)):
             if (i == True):
                 self._best_performer_feature_usage[i] = self._best_performer_feature_usage[i] + 1
         if turn < (self._max_turns -1): 
             population_new = []
             # Keep top n unchanged
             for i in range(self._max_population - self._keep_best_n, self._max_population):
                 population_new.append(self._population[i])
             # Randomize the ones we want to keep
             for i in range(self._keep_best_n, self._max_population):
                 # Make a copy of the features since we are going to manipulate them
                 features = copy.deepcopy(self._population[i].get_features())
                 if (self._mutation_rate is not None):
                     while (True):
                         active_count = 0
                         for k in range(len(features)):                    
                             if random.uniform(0,1) < self._mutation_rate:
                                 features[k] = not features[k]
                                 if features[k] == True:
                                     active_count = active_count + 1
                         if (active_count > 1):
                             break
                 if (self._absolute_feature_toggle_count is not None):
                     toggle_set = set()
                     for k in range(0, self._absolute_feature_toggle_count):
                         while True:                                
                             while True:
                                 m = random.randint(0, len(features)-1)
                                 if not m in toggle_set:
                                     toggle_set.add(m)
                                     features[m] = not features[m]
                                     break 
                             active_count = 0
                             for m in range(len(features)):
                                 if features[m] == True:
                                     active_count = active_count + 1
                             if (active_count > 1):
                                 break
                 population_new.append(self._creature_type(self._data_matrix, self._labels, features, self._params_list))             
             self._population = population_new    
Exemplo n.º 3
0
def main(argv=None):
    result_lines = []

    mode = None
    data = None
    labels = None
    params_list = []
    result_filename = None
    data_filename = None

    ic = 1
    while ic < len(sys.argv):
        if sys.argv[ic] == '-m':
            ic = ic + 1
            mode = sys.argv[ic]
        elif sys.argv[ic] == '-d':
            ic = ic + 1
            data_filename = sys.argv[ic]
            data = IOUtil.load_matrix(data_filename)
        elif sys.argv[ic] == '-l':
            ic = ic + 1
            labels = IOUtil.load_labels(sys.argv[ic])
        elif sys.argv[ic] == '-pl':
            ic = ic + 1
            params_list = IOUtil.load_params_list(sys.argv[ic])
        elif sys.argv[ic] == '-cl':
            ic = ic + 1
            column_labels = IOUtil.load_column_labels(sys.argv[ic])
        elif sys.argv[ic] == '-cd':
            ic = ic + 1
            column_descriptions = IOUtil.load_column_descriptions(sys.argv[ic])
        elif sys.argv[ic] == '-r':
            ic = ic + 1
            result_filename = sys.argv[ic]
        ic = ic + 1

    if mode == "graphsimsvmwalktrough":
        extended_mode = True
        label_set = set(labels)
        # Base - Get best Parameters for SVM
        study_base = GeneticFeatureStudy(data,
                                         labels,
                                         params_list,
                                         creature_type=SVMCreature,
                                         max_population=1,
                                         max_turns=1,
                                         keep_best_n=0,
                                         max_threads=20,
                                         mutation_rate=0.1,
                                         absolute_feature_toggle_count=None)
        study_base.compute()
        study_base_best_array = study_base.get_best_performer_params_result_history(
        )
        study_base_params_result = study_base_best_array[
            len(study_base_best_array) - 1]
        best_C = study_base_params_result.params["C"]
        best_gamma = study_base_params_result.params["gamma"]

        # Perform genetic Study
        params_list = []
        param = dict()
        param["C"] = best_C
        param["gamma"] = best_gamma
        params_list.append(param)
        study_genetic = None
        if extended_mode == True:
            study_genetic = GeneticFeatureStudy(
                data,
                labels,
                params_list,
                creature_type=SVMCreature,
                max_population=20,
                max_turns=500,
                keep_best_n=3,
                max_threads=20,
                mutation_rate=0.1,
                absolute_feature_toggle_count=None)
        else:
            study_genetic = GeneticFeatureStudy(
                data,
                labels,
                params_list,
                creature_type=SVMCreature,
                max_population=20,
                max_turns=50,
                keep_best_n=3,
                max_threads=20,
                mutation_rate=0.1,
                absolute_feature_toggle_count=None)
        study_genetic.compute()
        study_genetic_best_array = study_genetic.get_best_performer_params_result_history(
        )
        study_genetic_params_result = study_genetic_best_array[
            len(study_genetic_best_array) - 1]
        study_genetic_features = study_genetic_params_result.features

        # Perform feature reduction optimization
        study_final = None
        study_final_best_array = None
        study_final_params_result = None
        if extended_mode == True:
            feature_preset_list = []
            feature_string = ""
            for b in study_genetic_features:
                if b == True:
                    feature_string = feature_string + "1"
                else:
                    feature_string = feature_string + "0"
            feature_preset_list.append(feature_string)
            study_final = GeneticFeatureStudy(
                data,
                labels,
                params_list,
                creature_type=SVMCreature,
                max_population=20,
                max_turns=500,
                keep_best_n=3,
                max_threads=20,
                mutation_rate=None,
                absolute_feature_toggle_count=1,
                feature_preset_list=feature_preset_list)
            study_final.compute()
            study_final_best_array = study_final.get_best_performer_params_result_history(
            )
            study_final_params_result = study_final_best_array[
                len(study_final_best_array) - 1]

        # Write Results
        result_file = open(result_filename, "w")
        result_file.write(
            repr(best_C) + "\t" + repr(best_gamma) + "\t" +
            repr(study_base_params_result.avg_fscore) + "\t" +
            repr(study_base_params_result.avg_precision) + "\t" +
            repr(study_base_params_result.avg_recall))
        result_file.write("\t" + repr(study_genetic_params_result.avg_fscore) +
                          "\t" +
                          repr(study_genetic_params_result.avg_precision) +
                          "\t" + repr(study_genetic_params_result.avg_recall) +
                          "\t")
        for b in study_genetic_params_result.features:
            if b == True:
                result_file.write("1")
            else:
                result_file.write("0")
        result_file.write("\t")
        result_file.write(
            repr((study_genetic_params_result.active_feature_count /
                  len(study_genetic_params_result.features)) * 100))
        if extended_mode == True:
            result_file.write("\t" +
                              repr(study_final_params_result.avg_fscore) +
                              "\t" +
                              repr(study_final_params_result.avg_precision) +
                              "\t" +
                              repr(study_final_params_result.avg_recall) +
                              "\t")
            for b in study_final_params_result.features:
                if b == True:
                    result_file.write("1")
                else:
                    result_file.write("0")
            result_file.write("\t")
            result_file.write(
                repr((study_final_params_result.active_feature_count /
                      len(study_final_params_result.features)) * 100))
        result_file.write("\n")
        #
        result_file.write("Type=AllFeatures\n")
        result_file.write("Category\tPrecision\tRecall\tFScore\n")
        for cat in range(0, len(label_set)):
            result_file.write(
                str(cat) + "\t" +
                str(study_base_params_result.precisions[cat]) + "\t" +
                str(study_base_params_result.recalls[cat]) + "\t" +
                str(study_base_params_result.fscores[cat]) + "\n")
        #
        result_file.write("Type=Optimized\n")
        result_file.write("Category\tPrecision\tRecall\tFScore\n")
        for cat in range(0, len(label_set)):
            result_file.write(
                repr(cat) + "\t" +
                repr(study_genetic_params_result.precisions[cat]) + "\t" +
                repr(study_genetic_params_result.recalls[cat]) + "\t" +
                repr(study_genetic_params_result.fscores[cat]) + "\n")
        #
        if extended_mode == True:
            result_file.write("Type=OptimizedMinimized\n")
            result_file.write("Category\tPrecision\tRecall\tFScore\n")
            for cat in range(0, len(label_set)):
                result_file.write(
                    repr(cat) + "\t" +
                    repr(study_final_params_result.precisions[cat]) + "\t" +
                    repr(study_final_params_result.recalls[cat]) + "\t" +
                    repr(study_final_params_result.fscores[cat]) + "\n")
        result_file.close()

    for line in result_lines:
        print(line)
    def compute(self):
        lock = threading.Lock()
        threads = []
        sleep_event = threading.Event()
        best_creatures = []
        best_remove_features = []
        root_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_full_features(len(self._data_matrix[0])),
            self._params_list)
        best_creatures.append(root_creature)
        best_remove_features.append(-1)
        root_creature.compute()

        # Take up to _feature_count-2 features away
        for layer_id in range(self._feature_count - 2):
            # Compute Current Layer
            current_creatures = []
            current_removedfeature = []
            base = best_creatures[layer_id].get_best_params_result().features
            for i in range(len(base)):
                if (base[i] == True):
                    features = copy.deepcopy(base)
                    features[i] = False
                    current_creatures.append(
                        self._creature_type(self._data_matrix, self._labels,
                                            features, self._params_list))
                    current_removedfeature.append(i)

            for creature in current_creatures:
                # Make sure we have room for another thread
                while True:
                    lock.acquire()
                    active_threads = len(threads)
                    lock.release()
                    if active_threads < self._max_threads:
                        break
                    else:
                        sleep_event.wait(1)

                thread = CreatureThread(creature, lock, threads, sleep_event)
                threads.append(thread)
                thread.start()
            # Wait for pending threads to complete
            while True:
                lock.acquire()
                active_threads = len(threads)
                lock.release()
                if active_threads == 0:
                    break
                else:
                    sleep_event.wait(1)

            sys.stdout.flush()

            best_creature = None
            best_avg_score = -1
            best_remove_feature = 0
            for i in range(len(current_creatures)):
                creature = current_creatures[i]
                if creature.get_best_params_result(
                ).avg_fscore > best_avg_score:
                    best_avg_score = creature.get_best_params_result(
                    ).avg_fscore
                    best_creature = creature
                    best_remove_feature = current_removedfeature[i]
            best_creatures.append(best_creature)
            best_remove_features.append(best_remove_feature)
            print("Removed: " + repr(best_remove_feature) + "\t" +
                  repr(best_avg_score))

        for i in range(len(best_creatures)):
            creature = best_creatures[i]
            remove_feature = best_remove_features[i]
            if remove_feature != -1:
                print(self._column_labels[remove_feature] + "\t" +
                      repr(creature.get_best_params_result().avg_fscore) +
                      "\t" + self._column_descriptions[
                          self._column_labels[remove_feature]] + "\t" +
                      creature.get_best_params_result().get_features_string())
            else:
                print("-\t" +
                      repr(creature.get_best_params_result().avg_fscore) +
                      "\t-\t" +
                      creature.get_best_params_result().get_features_string())
Exemplo n.º 5
0
    def compute(self):
        feature_list = self._create_tuples()
        lock = threading.Lock()
        threads = []
        sleep_event = threading.Event()
        creatures = []
        counter = 0
        for features in feature_list:
            counter = counter + 1
            print(repr(counter) + "/" + repr(len(feature_list)))
            # Make sure we have room for another thread
            while True:
                lock.acquire()
                active_threads = len(threads)
                lock.release()
                if active_threads < self._max_threads:
                    break
                else:
                    sleep_event.wait(1)
            creature = self._creature_type(
                self._data_matrix, self._labels,
                IOUtil.get_feature_from_string(features), self._params_list)
            creatures.append(creature)
            thread = CreatureThread(creature, lock, threads, sleep_event)
            threads.append(thread)
            thread.start()
        # Wait for pending threads to complete
        while True:
            lock.acquire()
            active_threads = len(threads)
            lock.release()
            if active_threads == 0:
                break
            else:
                sleep_event.wait(1)

        sys.stdout.flush()
        avg_feature_score = [0 for i in range(self._feature_count)]
        avg_feature_score_div = [0 for i in range(self._feature_count)]
        for creature in creatures:
            for i in range(len(creature.get_best_params_result().features)):
                if creature.get_best_params_result().features[i] == True:
                    avg_feature_score[i] = avg_feature_score[
                        i] + creature.get_best_params_result().avg_fscore
                    avg_feature_score_div[i] = avg_feature_score_div[i] + 1
        feature_results = []
        for i in range(self._feature_count):
            avg_feature_score[
                i] = avg_feature_score[i] / avg_feature_score_div[i]
            res = TupleFeatureStudyResultEntry(self._column_labels[i],
                                               avg_feature_score[i])
            feature_results.append(res)
        feature_results = sorted(feature_results,
                                 key=lambda entry: entry.avg_fscore,
                                 reverse=True)
        if self._column_descriptions is None:
            for res in feature_results:
                print(res.feature_label + "\t" + repr(res.avg_fscore))
        else:
            for res in feature_results:
                print(res.feature_label + "\t" + repr(res.avg_fscore) + "\t" +
                      self._column_descriptions[res.feature_label])
def main(argv=None):
    fsc_all = IOUtil.get_float_column(
        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Results/Summary.txt",
        column=2,
        ignore_first_line=False)
    fsc_opt = IOUtil.get_float_column(
        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Results/Summary.txt",
        column=5,
        ignore_first_line=False)
    fsc_min = IOUtil.get_float_column(
        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Results/Summary.txt",
        column=10,
        ignore_first_line=False)
    #
    f_scores_b2_all = []
    f_scores_b2_all_minval = 1
    f_scores_b2_all_maxval = 0
    f_scores_b2_all_minlabel = ""
    f_scores_b2_all_maxlabel = ""
    for d in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGraph/StatsAllFeatures"
    ):
        if isdir(
                "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGraph/StatsAllFeatures/"
                + d):
            for f in listdir(
                    "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGraph/StatsAllFeatures/"
                    + d):
                if f.endswith(".txt"):
                    vals = IOUtil.get_float_column(
                        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGraph/StatsAllFeatures/"
                        + d + "/" + f,
                        1,
                        ignore_first_line=True)
                    val = vals[len(vals) - 1]
                    if val < f_scores_b2_all_minval:
                        f_scores_b2_all_minval = val
                        f_scores_b2_all_minlabel = d + "/" + f
                    if val > f_scores_b2_all_maxval:
                        f_scores_b2_all_maxval = val
                        f_scores_b2_all_maxlabel = d + "/" + f
                    f_scores_b2_all.append(val)
    #
    f_scores_b2_min = []
    f_scores_b2_min_minval = 1
    f_scores_b2_min_maxval = 0
    f_scores_b2_min_minlabel = ""
    f_scores_b2_min_maxlabel = ""
    for d in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGraph/StatsOptimizedMinimized"
    ):
        if isdir(
                "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGraph/StatsOptimizedMinimized/"
                + d):
            for f in listdir(
                    "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGraph/StatsOptimizedMinimized/"
                    + d):
                if f.endswith(".txt"):
                    vals = IOUtil.get_float_column(
                        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGraph/StatsOptimizedMinimized/"
                        + d + "/" + f,
                        1,
                        ignore_first_line=True)
                    val = vals[len(vals) - 1]
                    if val < f_scores_b2_min_minval:
                        f_scores_b2_min_minval = val
                        f_scores_b2_min_minlabel = d + "/" + f
                    if val > f_scores_b2_min_maxval:
                        f_scores_b2_min_maxval = val
                        f_scores_b2_min_maxlabel = d + "/" + f
                    f_scores_b2_min.append(val)
    #
    f_scores_b3_all = []
    f_scores_b3_all_minval = 1
    f_scores_b3_all_maxval = 0
    f_scores_b3_all_minlabel = ""
    f_scores_b3_all_maxlabel = ""
    for f in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Random_43x43/StatsAllFeatures"
    ):
        if f.endswith(".txt"):
            vals = IOUtil.get_float_column(
                "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Random_43x43/StatsAllFeatures/"
                + f,
                1,
                ignore_first_line=True)
            val = vals[len(vals) - 1]
            if val < f_scores_b3_all_minval:
                f_scores_b3_all_minval = val
                f_scores_b3_all_minlabel = f
            if val > f_scores_b3_all_maxval:
                f_scores_b3_all_maxval = val
                f_scores_b3_all_maxlabel = f
            f_scores_b3_all.append(val)
    #
    f_scores_b3_min = []
    f_scores_b3_min_minval = 1
    f_scores_b3_min_maxval = 0
    f_scores_b3_min_minlabel = ""
    f_scores_b3_min_maxlabel = ""
    for f in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Random_43x43/StatsOptimizedMinimized"
    ):
        if f.endswith(".txt"):
            vals = IOUtil.get_float_column(
                "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Random_43x43/StatsOptimizedMinimized/"
                + f,
                1,
                ignore_first_line=True)
            val = vals[len(vals) - 1]
            if val < f_scores_b3_min_minval:
                f_scores_b3_min_minval = val
                f_scores_b3_min_minlabel = f
            if val > f_scores_b3_min_maxval:
                f_scores_b3_min_maxval = val
                f_scores_b3_min_maxlabel = f
            f_scores_b3_min.append(val)
    #
    f_scores_b4_all = []
    f_scores_b4_all_minval = 1
    f_scores_b4_all_maxval = 0
    f_scores_b4_all_minlabel = ""
    f_scores_b4_all_maxlabel = ""
    for d in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGoldRandomClasses/StatsAllFeatures"
    ):
        if isdir(
                "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGoldRandomClasses/StatsAllFeatures/"
                + d):
            for f in listdir(
                    "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGoldRandomClasses/StatsAllFeatures/"
                    + d):
                if f.endswith(".txt"):
                    vals = IOUtil.get_float_column(
                        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGoldRandomClasses/StatsAllFeatures/"
                        + d + "/" + f,
                        1,
                        ignore_first_line=True)
                    val = vals[len(vals) - 1]
                    if val < f_scores_b4_all_minval:
                        f_scores_b4_all_minval = val
                        f_scores_b4_all_minlabel = d + "/" + f
                    if val > f_scores_b4_all_maxval:
                        f_scores_b4_all_maxval = val
                        f_scores_b4_all_maxlabel = d + "/" + f
                    f_scores_b4_all.append(val)
    #
    f_scores_b4_min = []
    f_scores_b4_min_minval = 1
    f_scores_b4_min_maxval = 0
    f_scores_b4_min_minlabel = ""
    f_scores_b4_min_maxlabel = ""
    for d in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGoldRandomClasses/StatsOptimizedMinimized"
    ):
        if isdir(
                "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGoldRandomClasses/StatsOptimizedMinimized/"
                + d):
            for f in listdir(
                    "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGoldRandomClasses/StatsOptimizedMinimized/"
                    + d):
                if f.endswith(".txt"):
                    vals = IOUtil.get_float_column(
                        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/RandomGoldRandomClasses/StatsOptimizedMinimized/"
                        + d + "/" + f,
                        1,
                        ignore_first_line=True)
                    val = vals[len(vals) - 1]
                    if val < f_scores_b4_min_minval:
                        f_scores_b4_min_minval = val
                        f_scores_b4_min_minlabel = d + "/" + f
                    if val > f_scores_b4_min_maxval:
                        f_scores_b4_min_maxval = val
                        f_scores_b4_min_maxlabel = d + "/" + f
                    f_scores_b4_min.append(val)

    fsc_all_boxplot = BoxPlot(fsc_all, "fsc_all_boxplot")
    fsc_opt_boxplot = BoxPlot(fsc_opt, "fsc_opt")
    fsc_min_boxplot = BoxPlot(fsc_min, "fsc_min")
    f_scores_b2_all_boxplot = BoxPlot(f_scores_b2_all,
                                      "f_scores_b2_all",
                                      minlabel=f_scores_b2_all_minlabel,
                                      maxlabel=f_scores_b2_all_maxlabel)
    f_scores_b2_min_boxplot = BoxPlot(f_scores_b2_min,
                                      "f_scores_b2_min",
                                      minlabel=f_scores_b2_min_minlabel,
                                      maxlabel=f_scores_b2_min_maxlabel)
    f_scores_b3_all_boxplot = BoxPlot(f_scores_b3_all,
                                      "f_scores_b3_all",
                                      minlabel=f_scores_b3_all_minlabel,
                                      maxlabel=f_scores_b3_all_maxlabel)
    f_scores_b3_min_boxplot = BoxPlot(f_scores_b3_min,
                                      "f_scores_b3_min",
                                      minlabel=f_scores_b3_min_minlabel,
                                      maxlabel=f_scores_b3_min_maxlabel)
    f_scores_b4_all_boxplot = BoxPlot(f_scores_b4_all,
                                      "f_scores_b4_all",
                                      minlabel=f_scores_b4_all_minlabel,
                                      maxlabel=f_scores_b4_all_maxlabel)
    f_scores_b4_min_boxplot = BoxPlot(f_scores_b4_min,
                                      "f_scores_b4_min",
                                      minlabel=f_scores_b4_min_minlabel,
                                      maxlabel=f_scores_b4_min_maxlabel)

    print(fsc_all_boxplot.get_tikz())
    print(fsc_opt_boxplot.get_tikz())
    print(fsc_min_boxplot.get_tikz())
    print(f_scores_b2_all_boxplot.get_tikz())
    print(f_scores_b2_min_boxplot.get_tikz())
    print(f_scores_b3_all_boxplot.get_tikz())
    print(f_scores_b3_min_boxplot.get_tikz())
    print(f_scores_b4_all_boxplot.get_tikz())
    print(f_scores_b4_min_boxplot.get_tikz())
def ddcplots(argv=None):
    fsc_all = IOUtil.get_float_column(
        "k:/Wiki/StadtWikis/DDC/Classification/DDCResults/Summary.txt",
        column=3,
        ignore_first_line=False)
    fsc_opt = IOUtil.get_float_column(
        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Results/Summary.txt",
        column=6,
        ignore_first_line=False)
    fsc_min = IOUtil.get_float_column(
        "k:/Wiki/StadtWikis/DDC/Classification/SimilarityGraph/Results/Summary.txt",
        column=11,
        ignore_first_line=False)
    #
    f_scores_b3_all = []
    for f in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/Random_43x98/StatsAllFeatures"
    ):
        if f.endswith(".txt"):
            vals = IOUtil.get_float_column(
                "k:/Wiki/StadtWikis/DDC/Classification/Random_43x98/StatsAllFeatures/"
                + f,
                1,
                ignore_first_line=True)
            f_scores_b3_all.append(vals[len(vals) - 1])
    #
    f_scores_b3_min = []
    for f in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/Random_43x98/StatsOptimizedMinimized"
    ):
        if f.endswith(".txt"):
            vals = IOUtil.get_float_column(
                "k:/Wiki/StadtWikis/DDC/Classification/Random_43x98/StatsOptimizedMinimized/"
                + f,
                1,
                ignore_first_line=True)
            f_scores_b3_min.append(vals[len(vals) - 1])
    #
    f_scores_b4_all = []
    for d in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/DDCRandomGoldRandomClasses/StatsAllFeatures"
    ):
        if isdir(
                "k:/Wiki/StadtWikis/DDC/Classification/DDCRandomGoldRandomClasses/StatsAllFeatures/"
                + d):
            for f in listdir(
                    "k:/Wiki/StadtWikis/DDC/Classification/DDCRandomGoldRandomClasses/StatsAllFeatures/"
                    + d):
                if f.endswith(".txt"):
                    vals = IOUtil.get_float_column(
                        "k:/Wiki/StadtWikis/DDC/Classification/DDCRandomGoldRandomClasses/StatsAllFeatures/"
                        + d + "/" + f,
                        1,
                        ignore_first_line=True)
                    f_scores_b4_all.append(vals[len(vals) - 1])
    #
    f_scores_b4_min = []
    for d in listdir(
            "k:/Wiki/StadtWikis/DDC/Classification/DDCRandomGoldRandomClasses/StatsOptimizedMinimized"
    ):
        if isdir(
                "k:/Wiki/StadtWikis/DDC/Classification/DDCRandomGoldRandomClasses/StatsOptimizedMinimized/"
                + d):
            for f in listdir(
                    "k:/Wiki/StadtWikis/DDC/Classification/DDCRandomGoldRandomClasses/StatsOptimizedMinimized/"
                    + d):
                if f.endswith(".txt"):
                    vals = IOUtil.get_float_column(
                        "k:/Wiki/StadtWikis/DDC/Classification/DDCRandomGoldRandomClasses/StatsOptimizedMinimized/"
                        + d + "/" + f,
                        1,
                        ignore_first_line=True)
                    f_scores_b4_min.append(vals[len(vals) - 1])
    fsc_all_boxplot = BoxPlot(fsc_all, "fsc_all_boxplot")
    fsc_opt_boxplot = BoxPlot(fsc_opt, "fsc_opt")
    fsc_min_boxplot = BoxPlot(fsc_min, "fsc_min")
    f_scores_b3_all_boxplot = BoxPlot(f_scores_b3_all, "f_scores_b3_all")
    f_scores_b3_min_boxplot = BoxPlot(f_scores_b3_min, "f_scores_b3_min")
    f_scores_b4_all_boxplot = BoxPlot(f_scores_b4_all, "f_scores_b4_all")
    f_scores_b4_min_boxplot = BoxPlot(f_scores_b4_min, "f_scores_b4_min")

    print(fsc_all_boxplot.get_tikz())
    print(fsc_opt_boxplot.get_tikz())
    print(fsc_min_boxplot.get_tikz())
    print(f_scores_b3_all_boxplot.get_tikz())
    print(f_scores_b3_min_boxplot.get_tikz())
    print(f_scores_b4_all_boxplot.get_tikz())
    print(f_scores_b4_min_boxplot.get_tikz())
Exemplo n.º 8
0
    def compute(self):
        lock = threading.Lock()
        threads = []
        sleep_event = threading.Event()
        creatures = []

        root_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_full_features(len(self._data_matrix[0])),
            self._params_list)
        root_creature.compute()

        for feature_id in range(self._feature_count):
            feature = ""
            for i in range(self._feature_count):
                if i == feature_id:
                    feature = feature + "0"
                else:
                    feature = feature + "1"
            creature = self._creature_type(
                self._data_matrix, self._labels,
                IOUtil.get_feature_from_string(feature), self._params_list)
            creatures.append(creature)
            while True:
                lock.acquire()
                active_threads = len(threads)
                lock.release()
                if active_threads < self._max_threads:
                    break
                else:
                    sleep_event.wait(1)

            thread = CreatureThread(creature, lock, threads, sleep_event)
            threads.append(thread)
            thread.start()

        # Wait for pending threads to complete
        while True:
            lock.acquire()
            active_threads = len(threads)
            lock.release()
            if active_threads == 0:
                break
            else:
                sleep_event.wait(1)

        sys.stdout.flush()

        # Now take all Features which if removed lower the f-score and compute a creature based on them
        synth_maxloss_feature = ""
        for i in range(len(creatures)):
            if creatures[i].get_best_params_result(
            ).avg_fscore < root_creature.get_best_params_result().avg_fscore:
                synth_maxloss_feature = synth_maxloss_feature + "1"
            else:
                synth_maxloss_feature = synth_maxloss_feature + "0"

        synth_maxloss_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_feature_from_string(synth_maxloss_feature),
            self._params_list)
        synth_maxloss_creature.compute()

        # Other variant: Remove all which removal had a positive impact (thus keeping all the others including the "irrelevant")
        synth_minloss_feature = ""
        for i in range(len(creatures)):
            if creatures[i].get_best_params_result(
            ).avg_fscore <= root_creature.get_best_params_result().avg_fscore:
                synth_minloss_feature = synth_minloss_feature + "1"
            else:
                synth_minloss_feature = synth_minloss_feature + "0"

        synth_minloss_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_feature_from_string(synth_minloss_feature),
            self._params_list)
        synth_minloss_creature.compute()

        # To complete the picture: Only take the worst features
        synth_worst_feature = ""
        for i in range(len(creatures)):
            if creatures[i].get_best_params_result(
            ).avg_fscore > root_creature.get_best_params_result().avg_fscore:
                synth_worst_feature = synth_worst_feature + "1"
            else:
                synth_worst_feature = synth_worst_feature + "0"

        synth_worst_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_feature_from_string(synth_worst_feature),
            self._params_list)
        synth_worst_creature.compute()

        print("Reference\t" +
              repr(root_creature.get_best_params_result().avg_fscore) + "\t" +
              root_creature.get_best_params_result().get_features_string())
        print(
            "SynthMaxLoss\t" +
            repr(synth_maxloss_creature.get_best_params_result().avg_fscore) +
            "\t" + synth_maxloss_creature.get_best_params_result(
            ).get_features_string())
        print(
            "SynthMinLoss\t" +
            repr(synth_minloss_creature.get_best_params_result().avg_fscore) +
            "\t" + synth_minloss_creature.get_best_params_result(
            ).get_features_string())
        print("SynthWorst\t" +
              repr(synth_worst_creature.get_best_params_result().avg_fscore) +
              "\t" + synth_worst_creature.get_best_params_result(
              ).get_features_string())
        for i in range(len(creatures)):
            creature = creatures[i]
            print(
                repr(i) + "\t" + self._column_labels[i] + "\t" +
                repr(creature.get_best_params_result().avg_fscore) + "\t" +
                self._column_descriptions[self._column_labels[i]] + "\t" +
                creature.get_best_params_result().get_features_string())