def setUp(self): self.dir = temp_path() self.fn = self.dir / "test_read_table.csv" self.files = [self.fn.with_suffix(s) for s in [".csv"]] # ".xlsx", self.expect = pd.DataFrame(np.ones((2, 2)), columns=["C1", "C2"], index=["i0", "i1"]) for fn, ex in zip(self.files, ["to_csv"]): # make some csvs # "to_excel", kw = dict() # engine="openpyxl" getattr(self.expect, ex)(str(fn), **subkwargs(kw, getattr(self.expect, ex)))
def add_to_axes(self, ax, label=False, **kwargs): """ Plot this point on an :class:`~matplotlib.axes.Axes`. Parameters ---------- ax : :class:`~matplotlib.axes.Axes`. Axes to plot the line on. Returns -------- :class:`matplotlib.collections.PathCollection` PathCollection as plotted on the axes. """ return ax.scatter(self.x, self.y, label=[None, self.name][label], **{ **self.kwargs, **subkwargs(kwargs, ax.scatter, PathCollection) })
def add_to_axes(self, ax, xs=None, label=False, **kwargs): """ Plot this line on an :class:`~matplotlib.axes.Axes`. Parameters ---------- ax : :class:`~matplotlib.axes.Axes`. Axes to plot the line on. xs : :class:`numpy.ndarray` X values at which to evaluate the line function. Returns -------- :class:`matplotlib.lines.Line2D` Lines as plotted on the axes. Todo ----- * Update to draw lines based along points along their length Notes ------ * If no x values are specified, the function will attempt to use the validity limits of the line, or finally use the limits of the axes. """ if xs is None and self.xlim is not None: xmin, xmax = self.xlim elif xs is None and self.xlim is None: # use the axes limits xmin, xmax = ax.get_xlim() else: xmin, xmax = np.nanmin(xs), np.nanmax(xs) if xs is None: linexs = np.logspace(np.log(xmin), np.log(xmax), 100, base=np.e) else: linexs = xs xmin, xmax = max(xmin, np.nanmin(linexs)), min(xmax, np.nanmax(linexs)) ybounds = [self(xmin), self(xmax)] ymin, ymax = min(*ybounds), max(*ybounds) if self.ylim is not None: ymin, ymax = max(self.ylim[0], ymin), min(self.ylim[1], ymax) if not xmin > xmax: if np.abs(self.slope) > 1.0: # more vertical than horizonal lineys = np.logspace(np.log(ymin), np.log(ymax), xs.size, base=np.e) linexs = self.out_tfm(self.invfunc(self.in_tfm(lineys))) else: lineys = self.out_tfm(self.func(self.in_tfm(linexs))) # fltr = np.ones(linexs.shape).astype(bool) # fltr = (lineys >= ymin) & (lineys <= ymax) # fltr = (linexs >= xmin) & (linexs <= xmax) # linexs, lineys = linexs[fltr], lineys[fltr] # append self-styling to the output, but let it be overridden return ax.plot(linexs, lineys, label=[None, self.name][label], **{ **self.kwargs, **subkwargs(kwargs, ax.plot, Line2D) })
def plot_mapping(X, Y, mapping=None, ax=None, cmap=None, alpha=1.0, s=10, alpha_method="entropy", **kwargs): """ Parameters ---------- X : :class:`numpy.ndarray` Coordinates in multidimensional space. Y : :class:`numpy.ndarray` | :class:`sklearn.base.BaseEstimator` An array of targets, or a method to obtain such an array of targets via :func:`Y.predict`. Transformers with probabilistic output (via :func:`Y.predict_proba`) will have these probability estimates accounted for via the alpha channel. mapping : :class:`numpy.ndarray` | :class:`~sklearn.base.TransformerMixin` Mapped points or transformer to create mapped points. ax : :class:`matplotlib.axes.Axes` Axes to plot on. cmap : :class:`matplotlib.cm.ListedColormap` Colormap to use for the classification visualisation (ideally this should be a discrete colormap unless the classes are organised ). alpha : :class:`float` Coefficient for alpha. alpha_method : :code:`'entropy' or 'kl_div'` Method to map class probabilities to alpha. :code:`'entropy'` uses a measure of entropy relative to null-scenario of equal distribution across classes, while :code:`'kl_div'` calculates the information gain relative to the same null-scenario. Returns ------- ax : :class:`~matplotlib.axes.Axes` Axes on which the mapping is plotted. tfm : :class:`~sklearn.base.BaseEstimator` Fitted mapping transform. Todo ------ * Option to generate colors for individual classes This could be based on the distances between their centres in multidimensional space (or low dimensional mapping of this space), enabling a continuous (n-dimensional) colormap to be used to show similar classes, in addition to classification confidence. """ X_ = X.copy() # avoid modifying input array if mapping is None: tfm = sklearn.manifold.MDS tfm_kwargs = {k: v for k, v in kwargs.items() if inargs(k, tfm)} tfm = tfm(n_components=2, metric=True, **tfm_kwargs) mapped = tfm.fit_transform(X_) elif isinstance(mapping, str): if mapping.lower() == "mds": cls = sklearn.manifold.MDS kw = dict(n_components=2, metric=True) elif mapping.lower() == "isomap": # not necessarily consistent orientation, but consistent shape cls = sklearn.manifold.Isomap kw = dict(n_components=2) elif mapping.lower() == "tsne": # likely need to optimise! cls = sklearn.manifold.TSNE kw = dict(n_components=2) else: raise NotImplementedError tfm = cls(**{**kw, **subkwargs(kwargs, cls)}) mapped = tfm.fit_transform(X_) elif isinstance( mapping, (sklearn.base.TransformerMixin, sklearn.base.BaseEstimator)): # manifold transforms can be either tfm = mapping mapped = tfm.fit_transform(X_) else: # mapping is already performedata, expect a numpy.ndarray mapped = mapping tfm = None assert mapped.shape[0] == X_.shape[0] if ax is None: fig, ax = plt.subplots(1, **kwargs) if isinstance(Y, (np.ndarray, list)): c = Y # need to encode alpha here elif isinstance(Y, (sklearn.base.BaseEstimator)): # need to split this into multiple methods depending on form of classifier if hasattr(Y, "predict_proba"): classes = Y.predict(X_) cmap = cmap or DEFAULT_DISC_COLORMAP c = cmap(classes) ps = Y.predict_proba(X_) a = alphas_from_multiclass_prob(ps, method=alpha_method, alpha=alpha) c[:, -1] = a else: c = Y.predict(X) ax.scatter(*mapped.T, c=c, s=s, edgecolors="none") return ax, tfm, mapped