Пример #1
0
    def _plot(cls,
              ax,
              y,
              style=None,
              bw_method=None,
              ind=None,
              column_num=None,
              stacking_id=None,
              **kwds):
        # 'y' is a Spark DataFrame that selects one column.

        # Using RDD is slow so we might have to change it to Dataset based implementation
        # once Spark has that implementation.
        sample = y.rdd.map(lambda x: float(x[0]))
        kd = KernelDensity()
        kd.setSample(sample)

        assert isinstance(
            bw_method,
            (int, float)), "'bw_method' must be set as a scalar number."

        if bw_method is not None:
            # Match the bandwidth with Spark.
            kd.setBandwidth(float(bw_method))
        y = kd.estimate(list(map(float, ind)))
        lines = PandasMPLPlot._plot(ax, ind, y, style=style, **kwds)
        return lines
Пример #2
0
 def _plot(cls,
           ax,
           y,
           style=None,
           bw_method=None,
           ind=None,
           column_num=None,
           stacking_id=None,
           **kwds):
     y = KdePlotBase.compute_kde(y, bw_method=bw_method, ind=ind)
     lines = PandasMPLPlot._plot(ax, ind, y, style=style, **kwds)
     return lines