示例#1
0
    def to_pandas(self) -> Any:
        PackageChecker.pandas("Table.to_pandas")
        import pandas as pd #type: ignore
        import numpy as np #type: ignore #pandas installs numpy so if we have pandas we have numpy

        col_numpy = { col: np.empty(len(self), dtype=dtype) for col,dtype in zip(self.columns,self.dtypes)}

        index = 0

        for key in self._rows_flat.keys():

            flat = self._rows_flat[key]
            pack = self._rows_pack[key]

            size = 1 if not pack else len(pack['index'])

            for col in self.columns:
                if col in pack:
                    val = pack[col]

                elif col in flat:
                    if isinstance(flat[col], (tuple,list)):
                        val = [flat[col]]
                    else:
                        val = flat[col]

                else:
                    val = self._default(col)
                    
                col_numpy[col][index:(index+size)] = val

            index += size

        return pd.DataFrame(col_numpy, columns=self.columns)
示例#2
0
    def to_progressive_pandas(self, span: int = None, each: bool = False, ord_col="index", val_col: str = "reward"):
        """Return expanding or exponential averages for yaxis grouped by learner and possibly environment.

        Args:
            span: If span is None return an expanding average (i.e., progressive validation). If span is not none
                calculate a simple moving average with window size of span (window will be smaller than span initially).
            each: If true the group by learner and environment (as in each environment). If each is false
                then only group by learner.
            ord_col: The column which indicates the order in which averaging is calculated on the val_col.
            val_col: The column we wish to calculate the progressive average values for.

        Returns:
            A data frame whose columns are (learner_id, [environment_id], interaction indexes...).
        """

        PackageChecker.pandas("Result.to_pandas")

        import pandas as pd

        data = self.to_progressive_lists(span, each, ord_col, val_col)

        if each:
            n_index = len(data[0][2:])
            return pd.DataFrame(data, columns=["learner_id", "environment_id", *range(1,n_index+1)])

        else:
            n_index = len(data[0][1:])
            return pd.DataFrame(data, columns=["learner_id", *range(1,n_index+1)])
示例#3
0
    def __init__(self,
                 *,
                 alpha: float,
                 interactions: Sequence[str] = ['a', 'ax'],
                 timeit: bool = False) -> None:
        """Instantiate a linUCBLearner.
        Args:
            alpha: number of standard deviations
            interactions: the set of interactions the learner will use. x refers to context and a refers to actions, 
                e.g. xaa would mean interactions between context and actions and actions. 
        """
        PackageChecker.numpy("linUCBLearner.__init__")

        self._A = None
        self._b = None
        self._alpha = alpha
        self._interactions = interactions
        self._terms = []
        self._times = [0., 0.]
        self._i = 0
        self._timeit = timeit

        for term in self._interactions:
            term = term.lower()
            x_num = term.count('x')
            a_num = term.count('a')

            if x_num + a_num != len(term):
                raise Exception(
                    "Letters other than x and a were passed for parameter interactions. Please remove other letters/characters."
                )

            self._terms.append((x_num, a_num))
示例#4
0
    def filter(self,
               interactions: Iterable[Interaction]) -> Iterable[Interaction]:

        PackageChecker.numpy("PcaSimulation.__init__")

        import numpy as np  #type: ignore

        interactions = list(interactions)

        contexts = [
            list(cast(Tuple[float, ...], i.context)) for i in interactions
        ]

        feat_matrix = np.array(contexts)
        comp_vals, comp_vecs = np.linalg.eig(np.cov(feat_matrix.T))

        comp_vecs = comp_vecs[:, comp_vals > 0]
        comp_vals = comp_vals[comp_vals > 0]

        pca_contexts = (feat_matrix @ comp_vecs) / np.sqrt(
            comp_vals)  #type:ignore
        pca_contexts = pca_contexts[:, np.argsort(-comp_vals)]

        return [
            Interaction(tuple(c), i.actions, i.feedbacks)
            for c, i in zip(pca_contexts, interactions)
        ]
示例#5
0
 def setUpClass(cls):
     try:
         PackageChecker.vowpalwabbit('VowpalLearner_Tests._make_learner')
     except ImportError:
         #if somebody is using the package with no intention of
         #using the VowpalLearner we don't want them to see failed
         #tests and think something is wrong so we skip these tests
         raise SkipTest(
             "Vowpal Wabbit is not installed so no need to test VowpalLearner"
         )
示例#6
0
    def __init__(self, *args, **kwargs) -> None:
        """Instantiate a VowpalLearner with the requested VW learner and exploration."""

        PackageChecker.vowpalwabbit('VowpalLearner')

        interactions = "--interactions ssa --interactions sa --ignore_linear s"

        if not args and 'seed' not in kwargs:
            kwargs['seed'] = 1

        if not args and all(
                e not in kwargs
                for e in ['epsilon', 'softmax', 'bag', 'cover', 'args']):
            kwargs['epsilon'] = 0.1

        if len(args) > 0:
            self._adf = "--cb_explore_adf" in args[0]
            self._args = cast(str, args[0])

            self._args = re.sub("--cb_explore_adf\s+", '', self._args, count=1)
            self._args = re.sub("--cb_explore(\s+\d+)?\s+",
                                '',
                                self._args,
                                count=1)

        elif 'epsilon' in kwargs:
            self._adf = kwargs.get('adf', True)
            self._args = interactions + f" --epsilon {kwargs['epsilon']}"

        elif 'softmax' in kwargs:
            self._adf = True
            self._args = interactions + f" --softmax --lambda {kwargs['softmax']}"

        elif 'bag' in kwargs:
            self._adf = kwargs.get('adf', True)
            self._args = interactions + f" --bag {kwargs['bag']}"

        elif 'cover' in kwargs:
            self._adf = False
            self._args = interactions + f" --cover {kwargs['cover']}"

        if 'seed' in kwargs and kwargs['seed'] is not None:
            self._args += f" --random_seed {kwargs['seed']}"

        self._actions: Any = None
        self._vw = None
示例#7
0
    def __init__(self, *, beta: float, alpha: float, learning_rate:float=0.1, interactions: Sequence[str] = ['a', 'ax']) -> None:
        """Instantiate a RegCBLearner.

        Args:
            beta : square-loss tolerance
            alpha: confidence bounds precision
            interactions: the set of interactions the learner will use. x refers to context and a refers to actions, 
                e.g. xaa would mean interactions between context, actions and actions. 
        """

        PackageChecker.sklearn("RegCBLearner")
        from sklearn.feature_extraction import FeatureHasher
        from sklearn.preprocessing import PolynomialFeatures

        self._beta  = beta
        self._alpha = alpha
        self._iter  = 0

        self._core_model = []

        self._times         = [0,0,0,0]
        self._interactions  = interactions
        self._terms         = []
        self._learning_rate = learning_rate

        for term in self._interactions:
            term = term.lower()
            x_num = term.count('x')
            a_num = term.count('a')

            if x_num + a_num != len(term):
                raise Exception("Letters other than x and a were passed for parameter interactions. Please remove other letters/characters.")

            self._terms.append((x_num, a_num))

        max_x_term = max(max(term[0] for term in self._terms),1)
        max_a_term = max(max(term[1] for term in self._terms),1)

        self._x_p = PolynomialFeatures(degree=max_x_term, include_bias=False, interaction_only=False)
        self._a_p = PolynomialFeatures(degree=max_a_term, include_bias=False, interaction_only=False)
        self._h   = FeatureHasher(input_type='pair')
示例#8
0
文件: linucb.py 项目: anrath/coba
    def __init__(self, alpha: float = 1, X: Sequence[str] = ['a', 'ax']) -> None:
        """Instantiate a LinUCBLearner.

        Args:
            alpha: This parameter controls the exploration rate of the algorithm. A value of 0 will cause actions 
                to be selected based on the current best point estimate (i.e., no exploration) while a value of inf
                means that actions will be selected based solely on the bounds of the action point estimates (i.e., 
                we will always take actions that have the largest bound on their point estimate).
            X: Feature set interactions to use when calculating action value estimates. Context features
                are indicated by x's while action features are indicated by a's. For example, xaa means to cross the 
                features between context and actions and actions.
        """
        PackageChecker.numpy("LinUCBLearner.__init__")

        self._alpha = alpha

        self._X = X
        self._X_encoder = InteractionsEncoder(X)

        self._theta = None
        self._A_inv = None
示例#9
0
 def test_check_sklearn_support(self):
     PackageChecker.sklearn("")
示例#10
0
 def test_check_vowpal_support(self):
     PackageChecker.vowpalwabbit("")
示例#11
0
 def test_check_numpy_support(self):
     PackageChecker.numpy("")
示例#12
0
 def test_check_pandas_support(self):
     PackageChecker.pandas("")
示例#13
0
 def test_check_matplotlib_support(self):
     PackageChecker.matplotlib("")
示例#14
0
    def plot_learners(self, 
        source_pattern :Union[str,int] = ".*",
        learner_pattern:Union[str,int] = ".*", 
        span:int = None,
        start:Union[int,float]=0.05,
        end:Union[int,float] = 1.,
        err_every:Union[int,float]=.05,
        err_type:str=None,
        complete:bool = True,
        figsize=(9,6),
        ax=None) -> None:
        """This plots the performance of multiple Learners on multiple simulations. It gives a sense of the expected 
            performance for different learners across independent simulations. This plot is valuable in gaining insight 
            into how various learners perform in comparison to one another. 

        Args:
            source_pattern: The pattern to match when determining which simulations to include in the plot. The "source" 
                matched against is either the "source" column in the simulations table or the first item in the list in 
                the simulation 'pipes' column. The simulations can be seen most easily by Result.simulations.to_pandas().
            learner_pattern: The pattern to match against the 'full_name' column in learners to determine which learners
                to include in the plot. In the case of multiple matches only the last match is kept. The learners table in
                Result can be examined via result.learners.to_pandas().
            span: In general this indicates how many previous evaluations to average together. In practice this works
                identically to ewm span value in the Pandas API. Additionally, if span equals None then all previous 
                rewards are averaged together and that value is plotted. Compare this to span = 1 WHERE only the current 
                reward is plotted for each interaction.
            start: Determines at which interaction the plot will start at. If start is greater than 1 we assume start is
                an interaction index. If start is less than 1 we assume start is the percent of interactions to skip
                before starting the plot.
            end: Determines at which interaction the plot will stop at. If end is greater than 1 we assume end is
                an interaction index. If end is less than 1 we assume end is the percent of interactions to end on.
            err_every: Determines frequency of bars indicating the standard deviation of the population should be drawn. 
                Standard deviation gives a sense of how well the plotted average represents the underlying distribution. 
                Standard deviation is most valuable when plotting against multiple simulations. If plotting against a single 
                simulation standard error may be a more useful indicator of confidence. The value for sd_every should be
                between 0 to 1 and will determine how frequently the standard deviation bars are drawn.
            err_type: Determines what the error bars are. Valid types are `None`, 'se', and 'sd'. If err_type is None then 
                plot will use SEM when there is only one source simulation otherwise it will use SD. Otherwise plot will
                display the standard error of the mean for 'se' and the standard deviation for 'sd'.
            complete: Determines if the plotted simulations only includes those simulations with all learners. This
                can be important if plotting a long running benchmark that is still in the process of finishing evaluation.
        """

        PackageChecker.matplotlib('Result.standard_plot')

        learner_ids    = []
        learner_names  = {}
        sources        = set()
        simulation_ids = []

        if isinstance(source_pattern, Number):
            source_pattern = f'(\D|^){source_pattern}(\D|$)'

        if isinstance(learner_pattern, Number):
            learner_pattern = f'(\D|^){learner_pattern}(\D|$)'

        for simulation in self._simulations:

            if 'source' in simulation:
                source = simulation['source']
            else:
                #this is a hack...
                source_end = max(simulation['pipe'].find("},{"), simulation['pipe'].find(","))
                source_end = source_end if source_end > -1 else len(simulation['pipe'])
                source     = simulation['pipe'][0:source_end]

            if re.search(source_pattern, source):
                sources.add(source)
                simulation_ids.append(simulation['simulation_id'])

        for learner in self._learners:
            if re.search(learner_pattern, learner['full_name']):
                learner_names[learner['learner_id']] = learner['full_name']
                learner_ids.append(learner['learner_id'])

        if len(learner_ids) == 0:
            CobaConfig.Logger.log(f"No learners were found matching {learner_pattern}")

        if len(simulation_ids) == 0:
            CobaConfig.Logger.log(f"No simulations were found with a source matching {source_pattern}")

        if len(learner_ids) == 0 or len(simulation_ids) == 0:
            return

        learner_ids = sorted(learner_ids, key=lambda id: learner_names[id])

        if err_type is None and len(sources) == 1: err_type = 'se'
        if err_type is None and len(sources) >= 2: err_type = 'sd'

        progressives: Dict[int,List[Sequence[float]]] = collections.defaultdict(list)

        if complete:
            all_learners_sim = lambda sim_id: all( (sim_id,lrn_id) in self._interactions for lrn_id in learner_ids )
            simulation_ids = list(filter(all_learners_sim, simulation_ids))

        if len(simulation_ids) == 0:
            CobaConfig.Logger.log(f"No simulation was found with interaction data for every learner.")
            return

        for simulation_id, learner_id in product(simulation_ids,learner_ids):
            
            if (simulation_id,learner_id) not in self._interactions: continue

            rewards = self._interactions[(simulation_id,learner_id)]["reward"]

            if span is None or span >= len(rewards):
                cumwindow  = list(accumulate(rewards))
                cumdivisor = list(range(1,len(cumwindow)+1))
            
            elif span == 1:
                cumwindow  = list(rewards)
                cumdivisor = [1]*len(cumwindow)

            else:
                alpha = 2/(1+span)
                cumwindow  = list(accumulate(rewards          , lambda a,c: c + (1-alpha)*a))
                cumdivisor = list(accumulate([1.]*len(rewards), lambda a,c: c + (1-alpha)*a)) #type: ignore

            progressives[learner_id].append(list(map(truediv, cumwindow, cumdivisor)))

        import matplotlib.pyplot as plt #type: ignore
        import numpy as np #type: ignore

        if not progressives:
            CobaConfig.Logger.log("No interaction data was found for plot_learners.")
            return
        
        full_figure = ax is None

        if full_figure:
            fig = plt.figure(figsize=figsize)
            ax = fig.add_subplot(1,1,1) #type: ignore

        for i,learner_id in enumerate(learner_ids):

            label = self._learners[learner_id]["full_name"]
            Z     = list(zip(*progressives[learner_id]))
            
            if not Z: continue

            N     = [ len(z) for z in Z        ]
            Y     = [ sum(z)/len(z) for z in Z ]
            X     = list(range(1,len(Y)+1))

            start = int(start*len(X)) if start <  1 else int(start)
            end   = int(end*len(X))   if end   <= 1 else int(end)

            end_idx   = min(len(X), end)
            start_idx = max(0, start)

            if start_idx >= end_idx:
                CobaConfig.Logger.log("The plot's given end <= start making plotting impossible.")
                return

            X = X[start_idx:end_idx]
            Y = Y[start_idx:end_idx]
            Z = Z[start_idx:end_idx]

            if len(X) == 0: continue

            #this is much faster than python's native stdev
            #and more or less free computationally so we always
            #calculate it regardless of if they are showing them
            #we are using the identity Var[Y] = E[Y^2]-E[Y]^2
            Y2 = [ sum([zz**2 for zz in z])/len(z) for z in Z ]
            SD = [ (y2-y**2)**(1/2) for y,y2 in zip(Y,Y2)     ]
            SE = [ sd/(n**(1/2)) for sd,n in zip(SD,N)        ]

            err_every = int(len(X)*err_every) if err_every < 1 else err_every
            err_start = int(X[0] + i*len(X)*err_every**2) if err_every < 1 else err_every

            if not err_every:
               ax.plot(X, Y,label=label)
            else:
                yerr = SE if err_type.lower() == 'se' else SD #type: ignore
                ax.errorbar(X, Y, yerr=yerr, elinewidth=0.5, errorevery=(err_start,err_every), label=label)

        if full_figure:
            
            if start == start_idx and end == end_idx:
                ax.set_xticks(np.clip(ax.get_xticks(), min(X), max(X)))
            else:
                padding = - (end-start)*.01
                ax.set_xlim(start - padding, end + padding)
                ax.set_xticks(np.clip(ax.get_xticks(), start, end))

            ax.set_title (("Instantaneous" if span == 1 else "Progressive" if span is None else f"Span {span}") + " Reward")
            ax.set_ylabel("Reward")
            ax.set_xlabel("Interactions")

            #make room for the legend
            scale = 0.65
            box1 = ax.get_position()
            ax.set_position([box1.x0, box1.y0 + box1.height * (1-scale), box1.width, box1.height * scale])

            # Put a legend below current axis
            fig.legend(*ax.get_legend_handles_labels(), loc='upper center', bbox_to_anchor=(.5, .3), ncol=1, fontsize='medium') #type: ignore

            plt.show()
示例#15
0
文件: tasks.py 项目: anrath/coba
    def process(
            self, environment: Environment,
            interactions: Iterable[SimulatedInteraction]) -> Dict[Any, Any]:

        contexts, actions, rewards = zip(*[(i.context, i.actions,
                                            i.kwargs["rewards"])
                                           for i in interactions])
        env_statistics = {}

        try:

            PackageChecker.sklearn("ClassEnvironmentTask.process")

            import numpy as np
            import scipy.sparse as sp
            import scipy.stats as st
            from sklearn.feature_extraction import FeatureHasher
            from sklearn.tree import DecisionTreeClassifier
            from sklearn.model_selection import cross_val_score
            from sklearn.metrics import pairwise_distances
            from sklearn.decomposition import TruncatedSVD, PCA

            X = [InteractionsEncoder('x').encode(x=c, a=[]) for c in contexts]
            Y = [a[r.index(1)] for a, r in zip(actions, rewards)]
            C = collections.defaultdict(list)
            clf = DecisionTreeClassifier(random_state=1)

            if isinstance(X[0], dict):
                X = FeatureHasher(n_features=2**14,
                                  input_type="dict").fit_transform(X)

            if len(Y) > 5:
                scores = cross_val_score(clf, X, Y, cv=5)
                env_statistics["bayes_rate_avg"] = round(scores.mean(), 4)
                env_statistics["bayes_rate_iqr"] = round(st.iqr(scores), 4)

            svd = TruncatedSVD(n_components=8) if sp.issparse(X) else PCA()
            svd.fit(X)
            env_statistics[
                "PcaVarExplained"] = svd.explained_variance_ratio_[:8].tolist(
                )

            for x, y in zip(X, Y):
                C[y].append(x)

            if sp.issparse(X):
                centroids = sp.vstack(
                    [sp.csr_matrix(sp.vstack(c).mean(0)) for c in C.values()])
            else:
                centroids = np.vstack(
                    [np.vstack(c).mean(0) for c in C.values()])

            centroid_order = list(C.keys())
            centroid_index = [centroid_order.index(y) for y in Y]
            centroid_dists = pairwise_distances(X, centroids)
            closest_index = centroid_dists.argmin(1)
            cluster_purity = (closest_index == centroid_index).mean()

            env_statistics["centroid_purity"] = round(cluster_purity, 4)
            env_statistics["centroid_distance"] = round(
                median(centroid_dists[range(centroid_dists.shape[0]),
                                      centroid_index]), 4)

        except CobaExit:
            pass

        labels = set()
        features = set()
        feat_cnts = []
        label_cnts = collections.defaultdict(int)

        for c, a, f in zip(contexts, actions, rewards):

            inter_label = a[f.index(1)]
            inter_feats = c.keys() if isinstance(c, dict) else range(len(c))

            labels.add(inter_label)
            features.update(inter_feats)
            feat_cnts.append(len(inter_feats))
            label_cnts[inter_label] += 1

        env_statistics["action_cardinality"] = len(labels)
        env_statistics["context_dimensions"] = len(features)
        env_statistics["context_median_nz"] = median(feat_cnts)
        env_statistics["imbalance_ratio"] = round(
            max(label_cnts.values()) / min(label_cnts.values()), 4)

        return {
            **SimpleEnvironmentTask().process(environment, interactions),
            **env_statistics
        }
示例#16
0
 def test_check_pandas_support(self):
     try:
         PackageChecker.pandas("test_check_pandas_support")
     except Exception:
         self.fail("check_pandas_support raised an exception")
示例#17
0
 def __init__(self) -> None:
     PackageChecker.numpy("PCA.__init__")
示例#18
0
 def test_check_matplotlib_support(self):
     with self.assertRaises(CobaExit):
         PackageChecker.matplotlib("")
示例#19
0
 def test_check_numpy_support(self):
     try:
         PackageChecker.numpy("test_check_numpy_support")
     except Exception:
         self.fail("check_numpy_support raised an exception")
示例#20
0
 def test_check_vowpal_support(self):
     try:
         PackageChecker.vowpalwabbit("test_check_vowpal_support")
     except Exception:
         self.fail("check_vowpal_support raised an exception")
示例#21
0
    def plot_shuffles(self, 
        source_pattern:str = ".*", 
        learner_pattern:str = ".*", 
        span:int=None,
        start:Union[int,float]=0.05,
        end:Union[int,float] = 1.,
        err_every:Union[int,float]=.05,
        err_type:str=None,
        figsize=(8,6)) -> None:
        """This plots the performance of a single Learner on multiple shuffles of the same source. It gives a sense of the
            variance in peformance for the learner on the given simulation source. This plot is valuable if looking for a 
            reliable learner on a fixed problem.

        Args:
            source_pattern: The pattern to match when determining which simulations to include in the plot. The "source" 
                matched against is either the "source" column in the simulations table or the first item in the list in 
                the simulation 'pipes' column. The simulations can be seen most easily by Result.simulations.to_pandas().
            learner_pattern: The pattern to match against the 'full_name' column in learners to determine which learners
                to include in the plot. In the case of multiple matches only the last match is kept. The learners table in
                Result can be examined via result.learners.to_pandas().
            span: In general this indicates how many previous evaluations to average together. In practice this works
                identically to ewm span value in the Pandas API. Additionally, if span equals None then all previous 
                rewards are averaged together and that value is plotted. Compare this to span = 1 WHERE only the current 
                reward is plotted for each interaction.
            start: Determines at which interaction the plot will start at. If start is greater than 1 we assume start is
                an interaction index. If start is less than 1 we assume start is the percent of interactions to skip
                before starting the plot.
            end: Determines at which interaction the plot will stop at. If end is greater than 1 we assume end is
                an interaction index. If end is less than 1 we assume end is the percent of interactions to end on.
            err_every: Determines frequency of bars indicating the standard deviation of the population should be drawn. 
                Standard deviation gives a sense of how well the plotted average represents the underlying distribution. 
                Standard deviation is most valuable when plotting against multiple simulations. If plotting against a single 
                simulation standard error may be a more useful indicator of confidence. The value for sd_every should be
                between 0 to 1 and will determine how frequently the standard deviation bars are drawn.
            err_type: Determines what the error bars are. Valid types are `None`, 'se', and 'sd'. If err_type is None then 
                plot will use SEM when there is only one source simulation otherwise it will use SD. Otherwise plot will
                display the standard error of the mean for 'se' and the standard deviation for 'sd'.

        """

        PackageChecker.matplotlib('Result.standard_plot')

        simulation_ids     = []
        simulation_sources = []
        learner_id         = None

        if isinstance(source_pattern, Number):
            source_pattern = f'(\D|^){source_pattern}(\D|$)'

        if isinstance(learner_pattern, Number):
            learner_pattern = f'(\D|^){learner_pattern}(\D|$)'
        
        for simulation in self._simulations:
            
            if 'source' in simulation:
                sim_source = simulation['source']
            else:
                #this is a hack...
                source_end = max(simulation['pipe'].find("},{"), simulation['pipe'].find(","))
                source_end = source_end if source_end > -1 else len(simulation['pipe'])
                sim_source = simulation['pipe'][0:source_end]

            if re.search(source_pattern, sim_source):
                simulation_ids.append(simulation['simulation_id'])
                simulation_sources.append(sim_source)

        for learner in self._learners:
            if re.search(learner_pattern,learner['full_name']):
                learner_id = learner['learner_id']

        progressives: List[Sequence[float]] = []

        if len(simulation_ids) == 0:
            CobaConfig.Logger.log(f"No simulation was found with a source matching '{source_pattern}' when executing `plot_shuffles`.")
            return

        if learner_id is None:
            CobaConfig.Logger.log(f"No learner was found who's fullname matched '{learner_pattern}' when executing `plot_shuffles`.")
            return

        for simulation_id in simulation_ids:
            
            if (simulation_id,learner_id) not in self._interactions: continue

            rewards = self._interactions[(simulation_id,learner_id)]["reward"]

            if span is None or span >= len(rewards):
                cumwindow  = list(accumulate(rewards))
                cumdivisor = list(range(1,len(cumwindow)+1))

            elif span == 1:
                cumwindow  = list(rewards)
                cumdivisor = [1]*len(cumwindow)

            else:
                cumwindow  = list(accumulate(rewards))
                cumwindow  = cumwindow + [0] * span
                cumwindow  = [ cumwindow[i] - cumwindow[i-span] for i in range(len(cumwindow)-span) ]
                cumdivisor = list(range(1, span)) + [span]*(len(cumwindow)-span+1)

            progressives.append(list(map(truediv, cumwindow, cumdivisor)))

        if not progressives:
            CobaConfig.Logger.log("No interaction data was found for the plot_shuffles.")
            return

        import matplotlib.pyplot as plt #type: ignore
        import numpy as np #type: ignore

        fig = plt.figure(figsize=figsize)
        
        ax = fig.add_subplot(1,1,1) #type: ignore

        color = next(ax._get_lines.prop_cycler)['color']

        for shuffle in progressives:

            Y     = shuffle
            X     = list(range(1,len(Y)+1))

            start = int(start*len(X)) if start <  1 else int(start)
            end   = int(end*len(X))   if end   <= 1 else int(end)

            end_idx   = min(len(X), end)
            start_idx = max(0, start)

            if start_idx >= end_idx:
                CobaConfig.Logger.log("The plot's given end <= start making plotting impossible.")
                return

            X = X[start_idx:end_idx]
            Y = Y[start_idx:end_idx]

            ax.plot(X, Y, label='_nolegend_', color=color, alpha=0.15)

        plt.gca().set_prop_cycle(None)
        self.plot_learners(source_pattern, learner_pattern, span=span, start=start, end=end, err_every=err_every, err_type=err_type, ax=ax)

        if start == start_idx and end == end_idx:
            ax.set_xticks(np.clip(ax.get_xticks(), min(X), max(X)))
        else:
            padding = - (end-start)*.01
            ax.set_xlim(start - padding, end + padding)
            ax.set_xticks(np.clip(ax.get_xticks(), start, end))

        simulation_sources = list(set(simulation_sources))
        source = simulation_sources[0] if len(simulation_sources) == 1 else str(simulation_sources)

        ax.set_title (("Instantaneous" if span == 1 else "Progressive" if span is None else f"Span {span}") + f" Reward for '{source}'")
        ax.set_ylabel("Reward")
        ax.set_xlabel("Interactions")

        #make room for the legend
        scale = 0.85
        box1 = ax.get_position()
        ax.set_position([box1.x0, box1.y0 + box1.height * (1-scale), box1.width, box1.height * scale])

        #Put a legend below current axis
        fig.legend(*ax.get_legend_handles_labels(), loc='upper center', bbox_to_anchor=(.5, .1), ncol=1, fontsize='medium') #type: ignore

        plt.show()
示例#22
0
 def test_check_vowpal_support(self):
     with self.assertRaises(CobaExit):
         PackageChecker.vowpalwabbit("")
示例#23
0
    def plot_learners(self, 
        xlim : Optional[Tuple[Number,Number]] = None,
        ylim : Optional[Tuple[Number,Number]] = None,
        span : int = None,
        err  : Optional[Literal['se','sd']] = None,
        each : bool = False,
        filename: str = None,
        sort : Literal['name',"id","reward"] = "name",
        ax = None) -> None:
        """Plot the performance of multiple learners on multiple environments. It gives a sense of the expected 
            performance for different learners across independent environments. This plot is valuable in gaining 
            insight into how various learners perform in comparison to one another. 

        Args:
            xlim: Define the x-axis limits to plot. If `None` the x-axis limits will be inferred.
            ylim: Define the y-axis limits to plot. If `None` the y-axis limits will be inferred.
            span: If span is None return an expanding average (i.e., progressive validation). If span is not none
                calculate a simple moving average with window size of span (window will be smaller than span initially).
            err: This determines what kind of error bars to plot (if any). Valid types are `None`, 'se', and 'sd'. If `None`
                then no bars are plotted, if 'se' the standard error is shown, and if 'sd' the standard deviation is shown.
            each: This determines whether each evaluated environment used to estimate mean performance is also plotted.
            filename: Provide a filename to write plot image to disk.
            ax: Provide an optional axes that the plot will be drawn to. If not provided a new figure/axes is created.
        """

        PackageChecker.matplotlib('Result.plot_learners')
        import matplotlib.pyplot as plt #type: ignore

        show = ax is None

        for label, X, Y, yerr, Z in self._plot_learners_data(xlim,span,err,sort):

            ax = ax or plt.figure(figsize=(10,6)).add_subplot(111) #type: ignore

            color = next(ax._get_lines.prop_cycler)['color']

            ax.errorbar(X, Y, yerr=yerr, elinewidth=0.5, errorevery=(0,max(int(len(X)*0.05),1)), label=label, color=color)

            if each:
                for Y in list(zip(*Z)):
                    ax.plot(X,Y, color=color, alpha=0.15)

        if ax is None:
            CobaContext.logger.log(f"No data was found for plotting in the given results: {self}.")
        else:
            padding = .05
            ax.margins(0)
            ax.set_xticks([min(ax.get_xlim()[1], max(ax.get_xlim()[0],x)) for x in ax.get_xticks()])
            ax.margins(padding)

            if xlim:
                x_pad = padding*(xlim[1]-xlim[0])
                ax.set_xlim(xlim[0]-x_pad, xlim[1]+x_pad)

            if ylim:
                y_pad = padding*(ylim[1]-ylim[0])
                ax.set_ylim(ylim[0]-y_pad, ylim[1]+y_pad)

            ax.set_title(("Instantaneous" if span == 1 else "Progressive" if span is None else f"Span {span}") + " Reward", loc='left',pad=15)
            ax.set_ylabel("Reward")
            ax.set_xlabel("Interactions")

            if ax.get_legend() is None:
                scale = 0.65
                box1 = ax.get_position()
                ax.set_position([box1.x0, box1.y0 + box1.height * (1-scale), box1.width, box1.height * scale])
            else:
                ax.get_legend().remove()

            ax.legend(*ax.get_legend_handles_labels(), loc='upper left', bbox_to_anchor=(-.01, -.25), ncol=1, fontsize='medium') #type: ignore

            if show:
                plt.show()
                plt.close()

            if filename:
                plt.savefig(filename, dpi=300)
                plt.close()
示例#24
0
 def test_check_sklearn_support(self):
     with self.assertRaises(CobaExit):
         PackageChecker.sklearn("")
示例#25
0
 def test_check_pandas_support(self):
     with self.assertRaises(CobaExit):
         PackageChecker.pandas("")
示例#26
0
 def test_check_matplotlib_support(self):
     try:
         PackageChecker.matplotlib("test_check_matplotlib_support")
     except Exception:
         self.fail("check_matplotlib_support raised an exception")
示例#27
0
 def test_check_numpy_support(self):
     with self.assertRaises(CobaExit):
         PackageChecker.numpy("")
示例#28
0
文件: vowpal.py 项目: anrath/coba
    def __init__(self) -> None:
        self._vw = None
        self._ns_offsets: Dict[str, int] = {}
        self._curr_ns_offset = 0

        PackageChecker.vowpalwabbit('VowpalMediator.__init__')