Python CalculatePrecisionSet примеры использования

Язык программирования: Python

Пространство имен/Пакет: bgp.base

Класс/Тип: CalculatePrecisionSet

Примеров на hotexamples.com: 8

Python CalculatePrecisionSet - 8 примеров найдено. Это лучшие примеры Python кода для bgp.base.CalculatePrecisionSet, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CalculatePrecisionSet(6)

calculate_detail(2)

add_tree_to_features(1)

calculate_simple(1)

parallelize_score(1)

Пример #1

Показать файл

    def setUp(self):
        self.SymbolTree = SymbolTree
        self.pset = SymbolSet()

        from sklearn.datasets import fetch_california_housing

        data = fetch_california_housing()
        x = data["data"][:100]
        y = data["target"][:100]
        # No = Normalizer()
        # y=y/max(y)
        # x = No.fit_transform(x)
        self.x = x
        self.y = y
        # self.pset.add_features(x, y, )
        self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]])
        self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None)
        self.pset.add_operations(power_categories=(2, 3, 0.5),
                                 categories=("Add", "Mul", "Self", "Abs"),
                                 self_categories=None)

        from sklearn.metrics import r2_score, mean_squared_error
        self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error],
                                        score_pen=[1, -1],
                                        filter_warning=True)

Пример #2

Показать файл

Файл: test_symbol_gp.py Проект: boliqq07/BGP

    def setUp(self):
        self.SymbolTree = SymbolTree
        self.pset = SymbolSet()

        from sklearn.datasets import load_boston

        data = load_boston()
        x = data["data"]
        y = data["target"]

        self.x = x
        self.y = y
        # self.pset.add_features(x, y, )
        self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]])
        self.pset.add_constants([6, 3, 4],
                                c_dim=[dless, dless, dless],
                                c_prob=None)
        self.pset.add_operations(power_categories=(2, 3, 0.5),
                                 categories=("Add", "Mul", "Neg", "Abs"),
                                 self_categories=None)

        from sklearn.metrics import r2_score, mean_squared_error

        self.cp = CalculatePrecisionSet(self.pset,
                                        scoring=[r2_score, mean_squared_error],
                                        score_pen=[1, -1],
                                        dim_type=None,
                                        filter_warning=True)

Пример #3

Показать файл

    def __init__(self, pset, pop=500, gen=20, mutate_prob=0.5, mate_prob=0.8, hall=1, re_hall=1,
                 re_Tree=None, initial_min=None, initial_max=3, max_value=5,
                 scoring=(r2_score,), score_pen=(1,), filter_warning=True, cv=1,
                 add_coef=True, inter_add=True, inner_add=False, vector_add=False, out_add=False, flat_add=False,
                 cal_dim=False, dim_type=None, fuzzy=False, n_jobs=1, batch_size=40,
                 random_state=None, stats=None, verbose=True, migrate_prob=0,
                 tq=True, store=False, personal_map=False, stop_condition=None, details=False, classification=False,
                 score_object="y", sub_mu_max=1, limit_type="h_bgp", batch_para=False):
        """

        Parameters
        ----------
        pset:SymbolSet
            the feature x and target y and others should have been added.
        pop: int
            number of population.
        gen: int
            number of generation.
        mutate_prob:float
            probability of mutate.
        mate_prob:float
            probability of mate(crossover).
        initial_max:int
            max initial size of expression when first producing.
        initial_min : None,int
            min initial size of expression when first producing.
        max_value:int
            max size of expression.
        limit_type: "height" or "length",","h_bgp"
            limitation type for max_value, but don't affect initial_max, initial_min.
        hall:int,>=1
            number of HallOfFame (elite) to maintain.
        re_hall:None or int>=2
            Notes: only valid when hall
            number of HallOfFame to add to next generation.
        re_Tree: int
            number of new features to add to next generation.
            0 is false to add.
        personal_map:bool or "auto"
            "auto" is using 'premap' and with auto refresh the 'premap' with individual.\n
            True is just using constant 'premap'.\n
            False is just use the prob of terminals.
        scoring: list of Callable, default is [sklearn.metrics.r2_score,]
            See Also ``sklearn.metrics``
        score_pen: tuple of  1, -1 or float but 0.
            >0 : max problem, best is positive, worse -np.inf.
            <0 : min problem, best is negative, worse np.inf.

            Notes:
                if multiply score method, the scores must be turn to same dimension in prepossessing
                or weight by score_pen. Because the all the selection are stand on the mean(w_i*score_i)

            Examples::

                scoring = [r2_score,]
                score_pen= [1,]

        cv:sklearn.model_selection._split._BaseKFold,int
            the shuffler must be False,
            default=1 means no cv.
        filter_warning:bool
            filter warning or not.
        add_coef:bool
            add coef in expression or not.
        inter_add：bool
            add intercept constant or not.
        inner_add:bool
            add inner coefficients or not.
        out_add:bool
            add out coefficients or not.
        flat_add:bool
            add flat coefficients or not.
        n_jobs:int
            default 1, advise 6.
        batch_size:int
            default 40, depend of machine.
        random_state:int
            None,int.
        cal_dim:bool
            escape the dim calculation.
        dim_type:Dim or None or list of Dim
            "coef": af(x)+b. a,b have dimension,f(x)'s dimension is not dnan. \n
            "integer": af(x)+b. f(x) is with integer dimension. \n
            [Dim1,Dim2]: f(x)'s dimension in list. \n
            Dim: f(x) ~= Dim. (see fuzzy) \n
            Dim: f(x) == Dim. \n
            None: f(x) == pset.y_dim
        fuzzy:bool
            choose the dim with same base with dim_type, such as m,m^2,m^3.
        stats:dict
            details of logbook to show. \n
            Map:\n
            values
                = {"max": np.max, "mean": np.mean, "min": np.mean, "std": np.std, "sum": np.sum}
            keys
                = {\n
                   "fitness": just see fitness[0], \n
                   "fitness_dim_max": max problem, see fitness with demand dim,\n
                   "fitness_dim_min": min problem, see fitness with demand dim,\n
                   "dim_is_target": demand dim,\n
                   "coef":  dim is True, coef have dim, \n
                   "integer":  dim is integer, \n
                   ...
                   }

            if stats is None, default is:

            for cal_dim=True:
                stats = {"fitness_dim_max": ("max",), "dim_is_target": ("sum",)}

            for cal_dim=False
                stats = {"fitness": ("max",)}

            if self-definition, the key is func to get attribute of each ind.

            Examples::

                def func(ind):
                    return ind.fitness[0]
                stats = {func: ("mean",), "dim_is_target": ("sum",)}

        verbose:bool
            print verbose logbook or not.
        tq:bool
            print progress bar or not.
        store:bool or path
            bool or path.
        stop_condition:callable
            stop condition on the best ind of hall, which return bool,the true means stop loop.

            Examples::

                def func(ind):
                    c = ind.fitness.values[0]>=0.90
                    return c

        details:bool
            return expr and predict_y or not.

        classification: bool
            classification or not.

        score_object:
            score by y or delta y (for implicit function).
        """
        super(BaseLoop, self).__init__()

        assert initial_max <= max_value, "the initial size of expression should less than max_value limitation"
        if cal_dim:
            assert all(
                [isinstance(i, Dim) for i in pset.dim_ter_con.values()]), \
                "all import dim of pset should be Dim object."

        self.details = details
        self.max_value = max_value
        self.pop = pop
        self.gen = gen
        self.mutate_prob = mutate_prob
        self.mate_prob = mate_prob
        self.migrate_prob = migrate_prob
        self.verbose = verbose
        self.cal_dim = cal_dim
        self.re_hall = re_hall
        self.re_Tree = re_Tree
        self.store = store
        self.limit_type = limit_type
        self.data_all = []
        self.personal_map = personal_map
        self.stop_condition = stop_condition
        self.population = []
        self.rand_state = None
        self.random_state = random_state
        self.sub_mu_max = sub_mu_max
        self.population_next = []

        self.cpset = CalculatePrecisionSet(pset, scoring=scoring, score_pen=score_pen,
                                           filter_warning=filter_warning, cal_dim=cal_dim,
                                           add_coef=add_coef, inter_add=inter_add, inner_add=inner_add,
                                           vector_add=vector_add, out_add=out_add, flat_add=flat_add, cv=cv,
                                           n_jobs=n_jobs, batch_size=batch_size, tq=tq,
                                           fuzzy=fuzzy, dim_type=dim_type, details=details,
                                           classification=classification, score_object=score_object,
                                           batch_para=batch_para
                                           )

        Fitness_ = newclass.create("Fitness_", Fitness, weights=score_pen)
        self.PTree = newclass.create("PTrees", SymbolTree, fitness=Fitness_)
        # def produce
        if initial_min is None:
            initial_min = 2
        self.register("genGrow", genGrow, pset=self.cpset, min_=initial_min, max_=initial_max + 1,
                      personal_map=self.personal_map)
        self.register("genFull", genFull, pset=self.cpset, min_=initial_min, max_=initial_max + 1,
                      personal_map=self.personal_map)
        self.register("genHalf", genGrow, pset=self.cpset, min_=initial_min, max_=initial_max + 1,
                      personal_map=self.personal_map)
        self.register("gen_mu", genGrow, min_=1, max_=self.sub_mu_max + 1, personal_map=self.personal_map)
        # def selection

        self.register("select", selTournament, tournsize=2)

        self.register("selKbestDim", selKbestDim,
                      dim_type=self.cpset.dim_type, fuzzy=self.cpset.fuzzy)
        self.register("selBest", selBest)

        self.register("mate", cxOnePoint)
        # def mutate

        self.register("mutate", mutUniform, expr=self.gen_mu, pset=self.cpset)

        self.decorate("mate", staticLimit(key=operator.attrgetter(limit_type), max_value=self.max_value))
        self.decorate("mutate", staticLimit(key=operator.attrgetter(limit_type), max_value=self.max_value))

        if stats is None:
            if cal_dim:
                if score_pen[0] > 0:
                    stats = {"fitness_dim_max": ("max",), "dim_is_target": ("sum",)}
                else:
                    stats = {"fitness_dim_min": ("min",), "dim_is_target": ("sum",)}
            else:
                if score_pen[0] > 0:
                    stats = {"fitness": ("max",)}
                else:
                    stats = {"fitness": ("min",)}

        self.stats = Statis_func(stats=stats)
        logbook = Logbook()
        logbook.header = ['gen'] + (self.stats.fields if self.stats else [])
        self.logbook = logbook

        if hall is None:
            hall = 1
        self.hall = HallOfFame(hall)

        if re_hall is None:
            self.re_hall = None
        else:
            if re_hall == 1 or re_hall == 0:
                print("re_hall should more than 1")
                re_hall = 2
            assert re_hall >= hall, "re_hall should more than hall"
            self.re_hall = HallOfFame(re_hall)

Пример #4

Показать файл

class BaseLoop(Toolbox):
    """
    Base loop for BGP.

    Examples::

        if __name__ == "__main__":
            pset = SymbolSet()
            stop = lambda ind: ind.fitness.values[0] >= 0.880963

            bl = BaseLoop(pset=pset, gen=10, pop=1000, hall=1, batch_size=40, re_hall=3, \n
            n_jobs=12, mate_prob=0.9, max_value=5, initial_min=1, initial_max=2, \n
            mutate_prob=0.8, tq=True, dim_type="coef", stop_condition=stop,\n
            re_Tree=0, store=False, random_state=1, verbose=True,\n
            stats={"fitness_dim_max": ["max"], "dim_is_target": ["sum"]},\n
            add_coef=True, inter_add=True, inner_add=False, cal_dim=True, vector_add=False,\n
            personal_map=False)

            bl.run()

    """

    def __init__(self, pset, pop=500, gen=20, mutate_prob=0.5, mate_prob=0.8, hall=1, re_hall=1,
                 re_Tree=None, initial_min=None, initial_max=3, max_value=5,
                 scoring=(r2_score,), score_pen=(1,), filter_warning=True, cv=1,
                 add_coef=True, inter_add=True, inner_add=False, vector_add=False, out_add=False, flat_add=False,
                 cal_dim=False, dim_type=None, fuzzy=False, n_jobs=1, batch_size=40,
                 random_state=None, stats=None, verbose=True, migrate_prob=0,
                 tq=True, store=False, personal_map=False, stop_condition=None, details=False, classification=False,
                 score_object="y", sub_mu_max=1, limit_type="h_bgp", batch_para=False):
        """

        Parameters
        ----------
        pset:SymbolSet
            the feature x and target y and others should have been added.
        pop: int
            number of population.
        gen: int
            number of generation.
        mutate_prob:float
            probability of mutate.
        mate_prob:float
            probability of mate(crossover).
        initial_max:int
            max initial size of expression when first producing.
        initial_min : None,int
            min initial size of expression when first producing.
        max_value:int
            max size of expression.
        limit_type: "height" or "length",","h_bgp"
            limitation type for max_value, but don't affect initial_max, initial_min.
        hall:int,>=1
            number of HallOfFame (elite) to maintain.
        re_hall:None or int>=2
            Notes: only valid when hall
            number of HallOfFame to add to next generation.
        re_Tree: int
            number of new features to add to next generation.
            0 is false to add.
        personal_map:bool or "auto"
            "auto" is using 'premap' and with auto refresh the 'premap' with individual.\n
            True is just using constant 'premap'.\n
            False is just use the prob of terminals.
        scoring: list of Callable, default is [sklearn.metrics.r2_score,]
            See Also ``sklearn.metrics``
        score_pen: tuple of  1, -1 or float but 0.
            >0 : max problem, best is positive, worse -np.inf.
            <0 : min problem, best is negative, worse np.inf.

            Notes:
                if multiply score method, the scores must be turn to same dimension in prepossessing
                or weight by score_pen. Because the all the selection are stand on the mean(w_i*score_i)

            Examples::

                scoring = [r2_score,]
                score_pen= [1,]

        cv:sklearn.model_selection._split._BaseKFold,int
            the shuffler must be False,
            default=1 means no cv.
        filter_warning:bool
            filter warning or not.
        add_coef:bool
            add coef in expression or not.
        inter_add：bool
            add intercept constant or not.
        inner_add:bool
            add inner coefficients or not.
        out_add:bool
            add out coefficients or not.
        flat_add:bool
            add flat coefficients or not.
        n_jobs:int
            default 1, advise 6.
        batch_size:int
            default 40, depend of machine.
        random_state:int
            None,int.
        cal_dim:bool
            escape the dim calculation.
        dim_type:Dim or None or list of Dim
            "coef": af(x)+b. a,b have dimension,f(x)'s dimension is not dnan. \n
            "integer": af(x)+b. f(x) is with integer dimension. \n
            [Dim1,Dim2]: f(x)'s dimension in list. \n
            Dim: f(x) ~= Dim. (see fuzzy) \n
            Dim: f(x) == Dim. \n
            None: f(x) == pset.y_dim
        fuzzy:bool
            choose the dim with same base with dim_type, such as m,m^2,m^3.
        stats:dict
            details of logbook to show. \n
            Map:\n
            values
                = {"max": np.max, "mean": np.mean, "min": np.mean, "std": np.std, "sum": np.sum}
            keys
                = {\n
                   "fitness": just see fitness[0], \n
                   "fitness_dim_max": max problem, see fitness with demand dim,\n
                   "fitness_dim_min": min problem, see fitness with demand dim,\n
                   "dim_is_target": demand dim,\n
                   "coef":  dim is True, coef have dim, \n
                   "integer":  dim is integer, \n
                   ...
                   }

            if stats is None, default is:

            for cal_dim=True:
                stats = {"fitness_dim_max": ("max",), "dim_is_target": ("sum",)}

            for cal_dim=False
                stats = {"fitness": ("max",)}

            if self-definition, the key is func to get attribute of each ind.

            Examples::

                def func(ind):
                    return ind.fitness[0]
                stats = {func: ("mean",), "dim_is_target": ("sum",)}

        verbose:bool
            print verbose logbook or not.
        tq:bool
            print progress bar or not.
        store:bool or path
            bool or path.
        stop_condition:callable
            stop condition on the best ind of hall, which return bool,the true means stop loop.

            Examples::

                def func(ind):
                    c = ind.fitness.values[0]>=0.90
                    return c

        details:bool
            return expr and predict_y or not.

        classification: bool
            classification or not.

        score_object:
            score by y or delta y (for implicit function).
        """
        super(BaseLoop, self).__init__()

        assert initial_max <= max_value, "the initial size of expression should less than max_value limitation"
        if cal_dim:
            assert all(
                [isinstance(i, Dim) for i in pset.dim_ter_con.values()]), \
                "all import dim of pset should be Dim object."

        self.details = details
        self.max_value = max_value
        self.pop = pop
        self.gen = gen
        self.mutate_prob = mutate_prob
        self.mate_prob = mate_prob
        self.migrate_prob = migrate_prob
        self.verbose = verbose
        self.cal_dim = cal_dim
        self.re_hall = re_hall
        self.re_Tree = re_Tree
        self.store = store
        self.limit_type = limit_type
        self.data_all = []
        self.personal_map = personal_map
        self.stop_condition = stop_condition
        self.population = []
        self.rand_state = None
        self.random_state = random_state
        self.sub_mu_max = sub_mu_max
        self.population_next = []

        self.cpset = CalculatePrecisionSet(pset, scoring=scoring, score_pen=score_pen,
                                           filter_warning=filter_warning, cal_dim=cal_dim,
                                           add_coef=add_coef, inter_add=inter_add, inner_add=inner_add,
                                           vector_add=vector_add, out_add=out_add, flat_add=flat_add, cv=cv,
                                           n_jobs=n_jobs, batch_size=batch_size, tq=tq,
                                           fuzzy=fuzzy, dim_type=dim_type, details=details,
                                           classification=classification, score_object=score_object,
                                           batch_para=batch_para
                                           )

        Fitness_ = newclass.create("Fitness_", Fitness, weights=score_pen)
        self.PTree = newclass.create("PTrees", SymbolTree, fitness=Fitness_)
        # def produce
        if initial_min is None:
            initial_min = 2
        self.register("genGrow", genGrow, pset=self.cpset, min_=initial_min, max_=initial_max + 1,
                      personal_map=self.personal_map)
        self.register("genFull", genFull, pset=self.cpset, min_=initial_min, max_=initial_max + 1,
                      personal_map=self.personal_map)
        self.register("genHalf", genGrow, pset=self.cpset, min_=initial_min, max_=initial_max + 1,
                      personal_map=self.personal_map)
        self.register("gen_mu", genGrow, min_=1, max_=self.sub_mu_max + 1, personal_map=self.personal_map)
        # def selection

        self.register("select", selTournament, tournsize=2)

        self.register("selKbestDim", selKbestDim,
                      dim_type=self.cpset.dim_type, fuzzy=self.cpset.fuzzy)
        self.register("selBest", selBest)

        self.register("mate", cxOnePoint)
        # def mutate

        self.register("mutate", mutUniform, expr=self.gen_mu, pset=self.cpset)

        self.decorate("mate", staticLimit(key=operator.attrgetter(limit_type), max_value=self.max_value))
        self.decorate("mutate", staticLimit(key=operator.attrgetter(limit_type), max_value=self.max_value))

        if stats is None:
            if cal_dim:
                if score_pen[0] > 0:
                    stats = {"fitness_dim_max": ("max",), "dim_is_target": ("sum",)}
                else:
                    stats = {"fitness_dim_min": ("min",), "dim_is_target": ("sum",)}
            else:
                if score_pen[0] > 0:
                    stats = {"fitness": ("max",)}
                else:
                    stats = {"fitness": ("min",)}

        self.stats = Statis_func(stats=stats)
        logbook = Logbook()
        logbook.header = ['gen'] + (self.stats.fields if self.stats else [])
        self.logbook = logbook

        if hall is None:
            hall = 1
        self.hall = HallOfFame(hall)

        if re_hall is None:
            self.re_hall = None
        else:
            if re_hall == 1 or re_hall == 0:
                print("re_hall should more than 1")
                re_hall = 2
            assert re_hall >= hall, "re_hall should more than hall"
            self.re_hall = HallOfFame(re_hall)

    def varAnd(self, *arg, **kwargs):
        return varAnd(*arg, **kwargs)

    def to_csv(self, data_all):
        """store to csv"""
        if self.store:
            if isinstance(self.store, str):
                path = self.store
            else:
                path = os.getcwd()
            file_new_name = "_".join((str(self.pop), str(self.gen),
                                      str(self.mutate_prob), str(self.mate_prob),
                                      str(time.time())))
            try:
                st = Store(path)
                st.to_csv(data_all, file_new_name, transposition=True)
                print("store data to ", path, file_new_name)
            except (IOError, PermissionError):
                st = Store(os.getcwd())
                st.to_csv(data_all, file_new_name, transposition=True)
                print("store data to ", os.getcwd(), file_new_name)

    def maintain_halls(self, population):
        """maintain the best p expression"""
        if self.re_hall is not None:
            maxsize = max(self.hall.maxsize, self.re_hall.maxsize)

            if self.cal_dim:
                inds_dim = self.selKbestDim(population, maxsize)
            else:
                inds_dim = self.selBest(population, maxsize)

            self.hall.update(inds_dim)
            self.re_hall.update(inds_dim)

            sole_inds = [i for i in self.re_hall.items if i not in inds_dim]
            inds_dim.extend(sole_inds)
        else:
            if self.cal_dim:
                inds_dim = self.selKbestDim(population, self.hall.maxsize)
            else:
                inds_dim = self.selBest(population, self.hall.maxsize)

            self.hall.update(inds_dim)
            inds_dim = []

        inds_dim = copy.deepcopy(inds_dim)
        return inds_dim

    def re_add(self):
        """add the expression as a feature"""
        if self.hall.items and self.re_Tree:
            it = self.hall.items
            indo = it[random.choice(len(it))]
            ind = copy.deepcopy(indo)
            inds = ind.depart()
            if not inds:
                pass
            else:
                inds = [self.cpset.calculate_detail(indi) for indi in inds]
                le = min(self.re_Tree, len(inds))
                indi = inds[random.choice(le)]
                self.cpset.add_tree_to_features(indi)

    def re_fresh_by_name(self, *arr):
        re_name = ["mutate", "genGrow", "genFull", "genHalf"]
        if len(arr) > 0:
            re_name.extend(arr)
        self.refresh(re_name, pset=self.cpset)
        # for i in re_name + ["mate"]:  # don‘t del this
        #     self.decorate(i, staticLimit(key=operator.attrgetter("height"), max_value=2 * (self.max_value + 1)))

    def top_n(self, n=10, gen=-1, key="value", filter_dim=True, ascending=False):
        """
        Return the best n results.

        Note:
            Only valid in ``store=True``.

        Parameters
        ----------
        n:int
            n.
        gen:
            the generation, default is -1.
        key: str
            sort keys, default is "values".
        filter_dim:
            filter no-dim expressions or not.
        ascending:
            reverse.

        Returns
        -------
        top n results.
        pd.DataFrame

        """
        import pandas as pd
        if self.store == "False":
            raise TypeError("Only valid with store=True")
        data = self.data_all

        data = pd.DataFrame(data)
        if gen == -1:
            gen = max(data["gen"])

        data = data[data["gen"] == gen]

        if filter_dim:
            data = data[data["dim_score"] == 1]

        data = data.drop_duplicates(['expr'], keep="first")

        if key is not None:
            data[key] = data[key].str.replace("(", "")
            data[key] = data[key].str.replace(")", "")
            data[key] = data[key].str.replace(",", "")
            try:
                data[key] = data[key].astype(float)
            except ValueError:
                raise TypeError("check this key column can be translated into float")

            data = data.sort_values(by='value', ascending=ascending).iloc[:n, :]

        return data

    def check_height_length(self, pop, site=""):
        old = len(pop)
        if self.limit_type == 'height':
            pop = [i for i in pop if i.height <= self.max_value]
        elif self.limit_type == 'length':
            pop = [i for i in pop if i.length <= self.max_value]
        else:
            pop = [i for i in pop if i.h_bgp <= self.max_value]
        new = len(pop)
        if old == new:
            pass
        else:
            if site != "":
                print(site)
            # raise TypeError
            index = random.randint(0, new, old - new)
            pop.extend([pop[i] for i in index])
        return pop

    def run(self, warm_start=False, new_gen=None):
        """

        Parameters
        ----------
        warm_start:bool
            warm_start from last result.
        new_gen:
            new generations for warm_startm, default is the initial generations.

        """
        # 1.generate###################################################################
        if warm_start is False:
            random.seed(self.random_state)
            population = [self.PTree(self.genHalf()) for _ in range(self.pop)]
            gen_i = 0
            gen = self.gen
        else:
            assert self.population_next != []
            random.set_state(self.rand_state)
            population = self.population_next
            gen_i = self.gen_i
            self.re_fresh_by_name()
            if new_gen:
                gen = gen_i + new_gen
            else:
                gen = gen_i + self.gen

        for gen_i in range(gen_i + 1, gen + 1):

            population_old = copy.deepcopy(population)

            # 2.evaluate###############################################################

            invalid_ind_score = self.cpset.parallelize_score(population_old)

            for ind, score in zip(population_old, invalid_ind_score):
                ind.fitness.values = tuple(score[0])
                ind.y_dim = score[1]
                ind.dim_score = score[2]
                ind.coef_expr = score[3]
                ind.coef_pre_y = score[4]
            population = population_old
            # 3.log###################################################################
            # 3.1.log-print##############################

            record = self.stats.compile(population) if self.stats else {}
            self.logbook.record(gen=gen_i, **record)
            if self.verbose:
                print(self.logbook.stream)
            # 3.2.log-store##############################
            if self.store:
                datas = [{"gen": gen_i, "name": str(pop_i), "expr": str([pop_i.coef_expr]),
                          "value": str(pop_i.fitness.values),
                          "dimension": str(pop_i.y_dim),
                          "dim_score": pop_i.dim_score} for pop_i in population]
                self.data_all.extend(datas)

            self.population = copy.deepcopy(population)

            # 3.3.log-hall###############################
            inds_dim = self.maintain_halls(population)

            # 4.refresh################################################################
            # 4.1.re_update the premap ##################
            if self.personal_map == "auto":
                [self.cpset.premap.update(indi, self.cpset) for indi in inds_dim]

            # 4.2.re_add_tree and refresh pset###########
            if self.re_Tree:
                self.re_add()

            self.re_fresh_by_name()

            # 6.next generation ！！！！#######################################################
            # selection and mutate,mate,migration
            population = self.select(population, int((1 - self.migrate_prob) * len(population)) - len(inds_dim))

            offspring = self.varAnd(population, self, self.mate_prob, self.mutate_prob)
            offspring.extend(inds_dim)
            migrate_pop = [self.PTree(self.genFull()) for _ in range(int(self.migrate_prob * len(population)))]
            population[:] = offspring + migrate_pop

            population = self.check_height_length(population)

            # 5.break#######################################################
            if self.stop_condition is not None:
                if self.stop_condition(self.hall.items[0]):
                    break

            # 7 freeze ###################################################

            self.rand_state = random.get_state()
            self.population_next = population
            self.gen_i = gen_i

        # final.store#####################################################################

        if self.store:
            self.to_csv(self.data_all)
        self.hall.items = [self.cpset.calculate_detail(indi) for indi in self.hall.items]

        return self.hall

Пример #5

Показать файл

Файл: test_score.py Проект: MGEdata/BGP

    sl = [SymbolTree.genGrow(pset0, 3, 4) for _ in range(500)]
    a = time.time()
    # sli =" MAdd(Sub(Add(Mul(x0, gx1), exp(x10)), Mul(Conv(Add(x0, gx0)), Mul(x6, MAdd(x10)))))"
    # sl =["MAdd(gx1 * x11 * (-x0 + x11) * MAdd(gx1))"]

    # sl = [compile_context(sli, pset0.context, pset0.gro_ter_con) for sli in sl]
    # sl = [simple(sli.args[0], pset0.gro_ter_con) for sli in sl if len(sli.args)>0]
    c = 1
    a = time.time()
    pset0 = CalculatePrecisionSet(pset0,
                                  scoring=None,
                                  score_pen=(1, ),
                                  filter_warning=True,
                                  cal_dim=False,
                                  dim_type=None,
                                  fuzzy=False,
                                  add_coef=False,
                                  inter_add=True,
                                  inner_add=False,
                                  n_jobs=1,
                                  batch_size=20,
                                  tq=True)
    from sys import getsizeof

    T100 = getsizeof(sl)
    psize = getsizeof(pset0)
    # print(T100,psize)
    for i in sl:
        b = time.time()
        i0 = compile_context(i, pset0.context, pset0.gro_ter_con)
        r2 = time.time()

Пример #6

Показать файл

    x[:, 2] = x[:, 0] / x[:, 1]
    y = np.zeros(x.shape[0])
    x = x
    y = y

    pset = SymbolSet()

    pset.add_features(x, y)
    pset.add_operations(
        categories=("Add", "Mul", "Self", "Abs"),
        self_categories=None)

    from sklearn.metrics import r2_score, mean_squared_error

    cp = CalculatePrecisionSet(pset, scoring=[r2_score, mean_squared_error],
                               score_pen=[1, -1],
                               filter_warning=True)
    x0, x1, x2 = sympy.symbols("x0, x1, x2")

    # t=Function("t")
    # expr00 = (x2*x1+x0*x2*2).subs(x0, t(x1))
    # dv1 = sympy.diff(expr00, x1, evaluate=True)
    # dv1 = dv1.subs(t(x1), x0)
    #
    # t = Function("t")
    # expr00 = (x2*x1+x0*x2*2).subs(x1, t(x0))
    # dv2 = sympy.diff(expr00, x0, evaluate=True)
    # dv2 = dv2.subs(t(x0), x1)
    #
    # k = dv1/dv2
    #

Пример #7

Показать файл

 def test_pset_passed_to_cpset_will_change(self):
     cp = CalculatePrecisionSet(self.pset)
     self.assertNotEqual(cp, self.cp)

Пример #8

Показать файл

class MyTestbase(unittest.TestCase):

    def setUp(self):
        self.SymbolTree = SymbolTree
        self.pset = SymbolSet()

        from sklearn.datasets import fetch_california_housing

        data = fetch_california_housing()
        x = data["data"][:100]
        y = data["target"][:100]
        # No = Normalizer()
        # y=y/max(y)
        # x = No.fit_transform(x)
        self.x = x
        self.y = y
        # self.pset.add_features(x, y, )
        self.pset.add_features(x, y, x_group=[[1, 2], [4, 5]])
        self.pset.add_constants([6, 3, 4], c_dim=[dless, dless, dless], c_prob=None)
        self.pset.add_operations(power_categories=(2, 3, 0.5),
                                 categories=("Add", "Mul", "Self", "Abs"),
                                 self_categories=None)

        from sklearn.metrics import r2_score, mean_squared_error
        self.cp = CalculatePrecisionSet(self.pset, scoring=[r2_score, mean_squared_error],
                                        score_pen=[1, -1],
                                        filter_warning=True)

    def test_pset_passed_to_cpset_will_change(self):
        cp = CalculatePrecisionSet(self.pset)
        self.assertNotEqual(cp, self.cp)

    def test_tree_gengrow_repr_and_str_different(self):
        from numpy import random
        random.seed(1)
        sl = SymbolTree.genGrow(self.pset, 3, 4)
        print(sl)
        # self.assertNotEqual(repr(sl), str(sl))

    def test_add_tree_back(self):
        from numpy import random
        random.seed(1)
        sl = SymbolTree.genGrow(self.pset, 3, 4)
        self.pset.add_tree_to_features(sl)

    #
    def test_barch_tree(self):
        from numpy import random
        random.seed(1)
        for i in range(10):

            sl = SymbolTree.genGrow(self.pset, 3, 4)
            cpsl = self.cp.calculate_detail(sl)
            self.assertIsNotNone(cpsl.y_dim)
            self.assertIsNotNone(cpsl.expr)
            self.assertIsNone(cpsl.p_name)
            if cpsl.pre_y is not None:
                self.assertIsInstance(cpsl.pre_y, numpy.ndarray)
                self.assertEqual(cpsl.pre_y.shape, self.y.shape)
                print(cpsl.coef_pre_y[:3])
                print(cpsl.pre_y[:3])
                print(cpsl.coef_score)
                print(cpsl.coef_expr)
                print(cpsl.pure_expr)

    def test_depart_tree(self):
        from numpy import random
        random.seed(1)
        for i in range(10):

            sl = SymbolTree.genGrow(self.pset, 5, 6)
            sl_departs = sl.depart()
            for i in sl_departs:
                cpsl = self.cp.calculate_simple(i)
                self.assertIsNotNone(cpsl.y_dim)
                self.assertIsNotNone(cpsl.expr)
                self.assertIsNone(cpsl.p_name)