def scatter(self, x, y, xerr=None, yerr=None, cov=None, corr=None, s_expr=None, c_expr=None, labels=None, selection=None, length_limit=50000, length_check=True, label=None, xlabel=None, ylabel=None, errorbar_kwargs={}, ellipse_kwargs={}, **kwargs): """Viz (small amounts) of data in 2d using a scatter plot Convenience wrapper around pylab.scatter when for working with small DataFrames or selections :param x: Expression for x axis :param y: Idem for y :param s_expr: When given, use if for the s (size) argument of pylab.scatter :param c_expr: When given, use if for the c (color) argument of pylab.scatter :param labels: Annotate the points with these text values :param selection: Single selection expression, or None :param length_limit: maximum number of rows it will plot :param length_check: should we do the maximum row check or not? :param label: label for the legend :param xlabel: label for x axis, if None .label(x) is used :param ylabel: label for y axis, if None .label(y) is used :param errorbar_kwargs: extra dict with arguments passed to plt.errorbar :param kwargs: extra arguments passed to pylab.scatter :return: """ import pylab as plt x = _ensure_strings_from_expressions(x) y = _ensure_strings_from_expressions(y) label = str(label or selection) selection = _ensure_strings_from_expressions(selection) if length_check: count = self.count(selection=selection) if count > length_limit: raise ValueError( "the number of rows (%d) is above the limit (%d), pass length_check=False, or increase length_limit" % (count, length_limit)) x_values = self.evaluate(x, selection=selection) y_values = self.evaluate(y, selection=selection) if s_expr: kwargs["s"] = self.evaluate(s_expr, selection=selection) if c_expr: kwargs["c"] = self.evaluate(c_expr, selection=selection) plt.xlabel(xlabel or self.label(x)) plt.ylabel(ylabel or self.label(y)) s = plt.scatter(x_values, y_values, label=label, **kwargs) if labels: label_values = self.evaluate(labels, selection=selection) for i, label_value in enumerate(label_values): plt.annotate(label_value, (x_values[i], y_values[i])) xerr_values = None yerr_values = None if cov is not None or corr is not None: from matplotlib.patches import Ellipse sx = self.evaluate(xerr, selection=selection) sy = self.evaluate(yerr, selection=selection) if corr is not None: sxy = self.evaluate(corr, selection=selection) * sx * sy elif cov is not None: sxy = self.evaluate(cov, selection=selection) cov_matrix = np.zeros((len(sx), 2, 2)) cov_matrix[:, 0, 0] = sx**2 cov_matrix[:, 1, 1] = sy**2 cov_matrix[:, 0, 1] = cov_matrix[:, 1, 0] = sxy ax = plt.gca() ellipse_kwargs = dict(ellipse_kwargs) ellipse_kwargs['facecolor'] = ellipse_kwargs.get('facecolor', 'none') ellipse_kwargs['edgecolor'] = ellipse_kwargs.get('edgecolor', 'black') for i in range(len(sx)): eigen_values, eigen_vectors = np.linalg.eig(cov_matrix[i]) indices = np.argsort(eigen_values)[::-1] eigen_values = eigen_values[indices] eigen_vectors = eigen_vectors[:, indices] v1 = eigen_vectors[:, 0] v2 = eigen_vectors[:, 1] varx = cov_matrix[i, 0, 0] vary = cov_matrix[i, 1, 1] angle = np.arctan2(v1[1], v1[0]) # round off errors cause negative values? if eigen_values[1] < 0 and abs( (eigen_values[1] / eigen_values[0])) < 1e-10: eigen_values[1] = 0 if eigen_values[0] < 0 or eigen_values[1] < 0: raise ValueError('neg val') width, height = np.sqrt(np.max(eigen_values)), np.sqrt( np.min(eigen_values)) e = Ellipse(xy=(x_values[i], y_values[i]), width=width, height=height, angle=np.degrees(angle), **ellipse_kwargs) ax.add_artist(e) else: if xerr is not None: if _issequence(xerr): assert len( xerr ) == 2, "if xerr is a sequence it should be of length 2" xerr_values = [ self.evaluate(xerr[0], selection=selection), self.evaluate(xerr[1], selection=selection) ] else: xerr_values = self.evaluate(xerr, selection=selection) if yerr is not None: if _issequence(yerr): assert len( yerr ) == 2, "if yerr is a sequence it should be of length 2" yerr_values = [ self.evaluate(yerr[0], selection=selection), self.evaluate(yerr[1], selection=selection) ] else: yerr_values = self.evaluate(yerr, selection=selection) if xerr_values is not None or yerr_values is not None: errorbar_kwargs = dict(errorbar_kwargs) errorbar_kwargs['fmt'] = errorbar_kwargs.get('fmt', 'none') plt.errorbar(x_values, y_values, yerr=yerr_values, xerr=xerr_values, **errorbar_kwargs) return s
def label(index, label, expression): if label and _issequence(label): return label[i] else: return self.label(expression)
def scatter(self, x, y, xerr=None, yerr=None, cov=None, corr=None, s_expr=None, c_expr=None, labels=None, selection=None, length_limit=50000, length_check=True, label=None, xlabel=None, ylabel=None, errorbar_kwargs={}, ellipse_kwargs={}, **kwargs): """Viz (small amounts) of data in 2d using a scatter plot Convenience wrapper around pylab.scatter when for working with small DataFrames or selections :param x: Expression for x axis :param y: Idem for y :param s_expr: When given, use if for the s (size) argument of pylab.scatter :param c_expr: When given, use if for the c (color) argument of pylab.scatter :param labels: Annotate the points with these text values :param selection: Single selection expression, or None :param length_limit: maximum number of rows it will plot :param length_check: should we do the maximum row check or not? :param label: label for the legend :param xlabel: label for x axis, if None .label(x) is used :param ylabel: label for y axis, if None .label(y) is used :param errorbar_kwargs: extra dict with arguments passed to plt.errorbar :param kwargs: extra arguments passed to pylab.scatter :return: """ import pylab as plt x = _ensure_strings_from_expressions(x) y = _ensure_strings_from_expressions(y) label = str(label or selection) selection = _ensure_strings_from_expressions(selection) if length_check: count = self.count(selection=selection) if count > length_limit: raise ValueError("the number of rows (%d) is above the limit (%d), pass length_check=False, or increase length_limit" % (count, length_limit)) x_values = self.evaluate(x, selection=selection) y_values = self.evaluate(y, selection=selection) if s_expr: kwargs["s"] = self.evaluate(s_expr, selection=selection) if c_expr: kwargs["c"] = self.evaluate(c_expr, selection=selection) plt.xlabel(xlabel or self.label(x)) plt.ylabel(ylabel or self.label(y)) s = plt.scatter(x_values, y_values, label=label, **kwargs) if labels: label_values = self.evaluate(labels, selection=selection) for i, label_value in enumerate(label_values): plt.annotate(label_value, (x_values[i], y_values[i])) xerr_values = None yerr_values = None if cov is not None or corr is not None: from matplotlib.patches import Ellipse sx = self.evaluate(xerr, selection=selection) sy = self.evaluate(yerr, selection=selection) if corr is not None: sxy = self.evaluate(corr, selection=selection) * sx * sy elif cov is not None: sxy = self.evaluate(cov, selection=selection) cov_matrix = np.zeros((len(sx), 2, 2)) cov_matrix[:,0,0] = sx**2 cov_matrix[:,1,1] = sy**2 cov_matrix[:,0,1] = cov_matrix[:,1,0] = sxy ax = plt.gca() ellipse_kwargs = dict(ellipse_kwargs) ellipse_kwargs['facecolor'] = ellipse_kwargs.get('facecolor', 'none') ellipse_kwargs['edgecolor'] = ellipse_kwargs.get('edgecolor', 'black') for i in range(len(sx)): eigen_values, eigen_vectors = np.linalg.eig(cov_matrix[i]) indices = np.argsort(eigen_values)[::-1] eigen_values = eigen_values[indices] eigen_vectors = eigen_vectors[:,indices] v1 = eigen_vectors[:, 0] v2 = eigen_vectors[:, 1] varx = cov_matrix[i, 0, 0] vary = cov_matrix[i, 1, 1] angle = np.arctan2(v1[1], v1[0]) # round off errors cause negative values? if eigen_values[1] < 0 and abs((eigen_values[1]/eigen_values[0])) < 1e-10: eigen_values[1] = 0 if eigen_values[0] < 0 or eigen_values[1] < 0: raise ValueError('neg val') width, height = np.sqrt(np.max(eigen_values)), np.sqrt(np.min(eigen_values)) e = Ellipse(xy=(x_values[i], y_values[i]), width=width, height=height, angle=np.degrees(angle), **ellipse_kwargs) ax.add_artist(e) else: if xerr is not None: if _issequence(xerr): assert len(xerr) == 2, "if xerr is a sequence it should be of length 2" xerr_values = [self.evaluate(xerr[0], selection=selection), self.evaluate(xerr[1], selection=selection)] else: xerr_values = self.evaluate(xerr, selection=selection) if yerr is not None: if _issequence(yerr): assert len(yerr) == 2, "if yerr is a sequence it should be of length 2" yerr_values = [self.evaluate(yerr[0], selection=selection), self.evaluate(yerr[1], selection=selection)] else: yerr_values = self.evaluate(yerr, selection=selection) if xerr_values is not None or yerr_values is not None: errorbar_kwargs = dict(errorbar_kwargs) errorbar_kwargs['fmt'] = errorbar_kwargs.get('fmt', 'none') plt.errorbar(x_values, y_values, yerr=yerr_values, xerr=xerr_values, **errorbar_kwargs) return s