def _plot(cls, ax, y, style=None, bw_method=None, ind=None, column_num=None, stacking_id=None, **kwds): # 'y' is a Spark DataFrame that selects one column. # Using RDD is slow so we might have to change it to Dataset based implementation # once Spark has that implementation. sample = y.rdd.map(lambda x: float(x[0])) kd = KernelDensity() kd.setSample(sample) assert isinstance( bw_method, (int, float)), "'bw_method' must be set as a scalar number." if bw_method is not None: # Match the bandwidth with Spark. kd.setBandwidth(float(bw_method)) y = kd.estimate(list(map(float, ind))) lines = PandasMPLPlot._plot(ax, ind, y, style=style, **kwds) return lines
def _plot(cls, ax, y, style=None, bw_method=None, ind=None, column_num=None, stacking_id=None, **kwds): y = KdePlotBase.compute_kde(y, bw_method=bw_method, ind=ind) lines = PandasMPLPlot._plot(ax, ind, y, style=style, **kwds) return lines