Exemplo n.º 1
0
    def compute(self):
        feature_list = self._create_tuples()
        lock = threading.Lock()
        threads = []
        sleep_event = threading.Event()
        creatures = []
        counter = 0
        for features in feature_list:
            counter = counter + 1
            print(repr(counter) + "/" + repr(len(feature_list)))
            # Make sure we have room for another thread
            while True:
                lock.acquire()
                active_threads = len(threads)
                lock.release()
                if active_threads < self._max_threads:
                    break
                else:
                    sleep_event.wait(1)
            creature = self._creature_type(
                self._data_matrix, self._labels,
                IOUtil.get_feature_from_string(features), self._params_list)
            creatures.append(creature)
            thread = CreatureThread(creature, lock, threads, sleep_event)
            threads.append(thread)
            thread.start()
        # Wait for pending threads to complete
        while True:
            lock.acquire()
            active_threads = len(threads)
            lock.release()
            if active_threads == 0:
                break
            else:
                sleep_event.wait(1)

        sys.stdout.flush()
        avg_feature_score = [0 for i in range(self._feature_count)]
        avg_feature_score_div = [0 for i in range(self._feature_count)]
        for creature in creatures:
            for i in range(len(creature.get_best_params_result().features)):
                if creature.get_best_params_result().features[i] == True:
                    avg_feature_score[i] = avg_feature_score[
                        i] + creature.get_best_params_result().avg_fscore
                    avg_feature_score_div[i] = avg_feature_score_div[i] + 1
        feature_results = []
        for i in range(self._feature_count):
            avg_feature_score[
                i] = avg_feature_score[i] / avg_feature_score_div[i]
            res = TupleFeatureStudyResultEntry(self._column_labels[i],
                                               avg_feature_score[i])
            feature_results.append(res)
        feature_results = sorted(feature_results,
                                 key=lambda entry: entry.avg_fscore,
                                 reverse=True)
        if self._column_descriptions is None:
            for res in feature_results:
                print(res.feature_label + "\t" + repr(res.avg_fscore))
        else:
            for res in feature_results:
                print(res.feature_label + "\t" + repr(res.avg_fscore) + "\t" +
                      self._column_descriptions[res.feature_label])
 def compute(self):
     # Initialize First population
     self._population = [None for x in range(self._max_population)]
     # Always include a creature with all features
     self._population[0] = self._creature_type(self._data_matrix, self._labels, IOUtil.get_full_features(len(self._data_matrix[0])), self._params_list)
     # Include preset
     max_preset = len(self._feature_preset_list)
     if (max_preset > self._max_population-1):
         raise Exception("Preset plus complete 1111-vector exceeds max_population")
     for i in range(1, 1+max_preset):
         self._population[i] = self._creature_type(self._data_matrix, self._labels, IOUtil.get_feature_from_string(self._feature_preset_list[i-1]), self._params_list)
     # Fill up with random creatures
     for i in range(1+max_preset, self._max_population):                        
         self._population[i] = self._creature_type(self._data_matrix, self._labels, self._create_random_features(density=random.uniform(0,1)), self._params_list)
             
     for turn in range(0, self._max_turns):
         print("Turn "+repr(turn+1)+"/"+repr(self._max_turns))
         lock = threading.Lock()
         threads = []   
         sleep_event = threading.Event()                     
         for creature in self._population:                
             if not creature.is_computed():
                 # Make sure we have room for another thread
                 while True:
                     lock.acquire()
                     active_threads = len(threads)
                     lock.release()                                    
                     if active_threads < self._max_threads:
                         break                    
                     else:
                         sleep_event.wait(1)
                 thread = CreatureThread(creature, lock, threads, sleep_event)
                 threads.append(thread)
                 thread.start()                    
         # Wait for pending threads to complete                        
         while True:
             lock.acquire()
             active_threads = len(threads)
             lock.release()                                    
             if active_threads == 0:
                 break                    
             else:
                 sleep_event.wait(1)
                     
         sys.stdout.flush()
         # Sort and handle population
         if self._optimize_features == 'min':
             self._population = sorted(self._population, key=functools.cmp_to_key(self.compare_creatures_minimize_features))
         else:
             self._population = sorted(self._population, key=functools.cmp_to_key(self.compare_creatures_maximize_features))
         self._population[len(self._population)-1].pretty_print()
         self._best_performer_params_result_history.append(self._population[len(self._population)-1].get_best_params_result())
         for i in range(len(self._population[len(self._population)-1].get_best_params_result().features)):
             if (i == True):
                 self._best_performer_feature_usage[i] = self._best_performer_feature_usage[i] + 1
         if turn < (self._max_turns -1): 
             population_new = []
             # Keep top n unchanged
             for i in range(self._max_population - self._keep_best_n, self._max_population):
                 population_new.append(self._population[i])
             # Randomize the ones we want to keep
             for i in range(self._keep_best_n, self._max_population):
                 # Make a copy of the features since we are going to manipulate them
                 features = copy.deepcopy(self._population[i].get_features())
                 if (self._mutation_rate is not None):
                     while (True):
                         active_count = 0
                         for k in range(len(features)):                    
                             if random.uniform(0,1) < self._mutation_rate:
                                 features[k] = not features[k]
                                 if features[k] == True:
                                     active_count = active_count + 1
                         if (active_count > 1):
                             break
                 if (self._absolute_feature_toggle_count is not None):
                     toggle_set = set()
                     for k in range(0, self._absolute_feature_toggle_count):
                         while True:                                
                             while True:
                                 m = random.randint(0, len(features)-1)
                                 if not m in toggle_set:
                                     toggle_set.add(m)
                                     features[m] = not features[m]
                                     break 
                             active_count = 0
                             for m in range(len(features)):
                                 if features[m] == True:
                                     active_count = active_count + 1
                             if (active_count > 1):
                                 break
                 population_new.append(self._creature_type(self._data_matrix, self._labels, features, self._params_list))             
             self._population = population_new    
Exemplo n.º 3
0
    def compute(self):
        lock = threading.Lock()
        threads = []
        sleep_event = threading.Event()
        creatures = []

        root_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_full_features(len(self._data_matrix[0])),
            self._params_list)
        root_creature.compute()

        for feature_id in range(self._feature_count):
            feature = ""
            for i in range(self._feature_count):
                if i == feature_id:
                    feature = feature + "0"
                else:
                    feature = feature + "1"
            creature = self._creature_type(
                self._data_matrix, self._labels,
                IOUtil.get_feature_from_string(feature), self._params_list)
            creatures.append(creature)
            while True:
                lock.acquire()
                active_threads = len(threads)
                lock.release()
                if active_threads < self._max_threads:
                    break
                else:
                    sleep_event.wait(1)

            thread = CreatureThread(creature, lock, threads, sleep_event)
            threads.append(thread)
            thread.start()

        # Wait for pending threads to complete
        while True:
            lock.acquire()
            active_threads = len(threads)
            lock.release()
            if active_threads == 0:
                break
            else:
                sleep_event.wait(1)

        sys.stdout.flush()

        # Now take all Features which if removed lower the f-score and compute a creature based on them
        synth_maxloss_feature = ""
        for i in range(len(creatures)):
            if creatures[i].get_best_params_result(
            ).avg_fscore < root_creature.get_best_params_result().avg_fscore:
                synth_maxloss_feature = synth_maxloss_feature + "1"
            else:
                synth_maxloss_feature = synth_maxloss_feature + "0"

        synth_maxloss_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_feature_from_string(synth_maxloss_feature),
            self._params_list)
        synth_maxloss_creature.compute()

        # Other variant: Remove all which removal had a positive impact (thus keeping all the others including the "irrelevant")
        synth_minloss_feature = ""
        for i in range(len(creatures)):
            if creatures[i].get_best_params_result(
            ).avg_fscore <= root_creature.get_best_params_result().avg_fscore:
                synth_minloss_feature = synth_minloss_feature + "1"
            else:
                synth_minloss_feature = synth_minloss_feature + "0"

        synth_minloss_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_feature_from_string(synth_minloss_feature),
            self._params_list)
        synth_minloss_creature.compute()

        # To complete the picture: Only take the worst features
        synth_worst_feature = ""
        for i in range(len(creatures)):
            if creatures[i].get_best_params_result(
            ).avg_fscore > root_creature.get_best_params_result().avg_fscore:
                synth_worst_feature = synth_worst_feature + "1"
            else:
                synth_worst_feature = synth_worst_feature + "0"

        synth_worst_creature = self._creature_type(
            self._data_matrix, self._labels,
            IOUtil.get_feature_from_string(synth_worst_feature),
            self._params_list)
        synth_worst_creature.compute()

        print("Reference\t" +
              repr(root_creature.get_best_params_result().avg_fscore) + "\t" +
              root_creature.get_best_params_result().get_features_string())
        print(
            "SynthMaxLoss\t" +
            repr(synth_maxloss_creature.get_best_params_result().avg_fscore) +
            "\t" + synth_maxloss_creature.get_best_params_result(
            ).get_features_string())
        print(
            "SynthMinLoss\t" +
            repr(synth_minloss_creature.get_best_params_result().avg_fscore) +
            "\t" + synth_minloss_creature.get_best_params_result(
            ).get_features_string())
        print("SynthWorst\t" +
              repr(synth_worst_creature.get_best_params_result().avg_fscore) +
              "\t" + synth_worst_creature.get_best_params_result(
              ).get_features_string())
        for i in range(len(creatures)):
            creature = creatures[i]
            print(
                repr(i) + "\t" + self._column_labels[i] + "\t" +
                repr(creature.get_best_params_result().avg_fscore) + "\t" +
                self._column_descriptions[self._column_labels[i]] + "\t" +
                creature.get_best_params_result().get_features_string())