Python LuxDataFrame._sampled示例

编程语言: Python

命名空间/包名称: lux.core.frame

类/类型: LuxDataFrame

方法/功能: _sampled

hotexamples.com的示例: 3

Python LuxDataFrame._sampled - 已找到3个示例。这些是从开源项目中提取的最受好评的lux.core.frame.LuxDataFrame._sampled现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

LuxDataFrame(6)

_compiled(3)

_sampled(3)

_min_max(2)

_pandas_only(2)

cardinality(2)

data_model(2)

data_model_lookup(2)

_approx_sample(1)

_data_type(1)

_length(1)

current_vis_to_JSON(1)

示例#1

显示文件

文件： PandasExecutor.py 项目： westernguy2/lux

    def execute_sampling(ldf: LuxDataFrame):
        """
        Compute and cache a sample for the overall dataframe

        - When # of rows exceeds lux.config.sampling_start, take 75% df as sample
        - When # of rows exceeds lux.config.sampling_cap, cap the df at {lux.config.sampling_cap} rows

        lux.config.sampling_start = 100k rows
        lux.config.sampling_cap = 1M rows

        Parameters
        ----------
        ldf : LuxDataFrame
        """
        SAMPLE_FLAG = lux.config.sampling
        SAMPLE_START = lux.config.sampling_start
        SAMPLE_CAP = lux.config.sampling_cap
        SAMPLE_FRAC = 0.75

        if SAMPLE_FLAG and len(ldf) > SAMPLE_CAP:
            if ldf._sampled is None:  # memoize unfiltered sample df
                ldf._sampled = ldf.sample(n=SAMPLE_CAP, random_state=1)
            ldf._message.add_unique(
                f"Large dataframe detected: Lux is only visualizing a sample capped at {SAMPLE_CAP} rows.",
                priority=99,
            )
        elif SAMPLE_FLAG and len(ldf) > SAMPLE_START:
            if ldf._sampled is None:  # memoize unfiltered sample df
                ldf._sampled = ldf.sample(frac=SAMPLE_FRAC, random_state=1)
            ldf._message.add_unique(
                f"Large dataframe detected: Lux is visualizing a sample of {SAMPLE_FRAC}% of the dataframe ({len(ldf._sampled)} rows).",
                priority=99,
            )
        else:
            ldf._sampled = ldf

示例#2

显示文件

文件： PandasExecutor.py 项目： ccubc/lux

 def execute_sampling(ldf: LuxDataFrame):
     # General Sampling for entire dataframe
     SAMPLE_START = 10000
     SAMPLE_CAP = 30000
     SAMPLE_FRAC = 0.75
     if len(ldf) > SAMPLE_CAP:
         if (ldf._sampled is None):  # memoize unfiltered sample df
             ldf._sampled = ldf.sample(n=SAMPLE_CAP, random_state=1)
         ldf._message.add_unique(
             f"Large dataframe detected: Lux is only visualizing a random sample capped at {SAMPLE_CAP} rows.",
             priority=99)
     elif len(ldf) > SAMPLE_START:
         if (ldf._sampled is None):  # memoize unfiltered sample df
             ldf._sampled = ldf.sample(frac=SAMPLE_FRAC, random_state=1)
         ldf._message.add_unique(
             f"Large dataframe detected: Lux is only visualizing a random sample of {len(ldf._sampled)} rows.",
             priority=99)
     else:
         ldf._sampled = ldf

示例#3

显示文件

    def execute(vislist: VisList, ldf: LuxDataFrame):
        '''
        Given a VisList, fetch the data required to render the vis.
        1) Apply filters
        2) Retrieve relevant attribute
        3) Perform vis-related processing (aggregation, binning)
        4) return a DataFrame with relevant results

        Parameters
        ----------
        vislist: list[lux.Vis]
            vis list that contains lux.Vis objects for visualization.
        ldf : lux.core.frame
            LuxDataFrame with specified intent.

        Returns
        -------
        None
        '''
        for vis in vislist:
            vis._vis_data = ldf  # The vis data starts off being the same as the content of the original dataframe
            filter_executed = PandasExecutor.execute_filter(vis)
            # Select relevant data based on attribute information
            attributes = set([])
            for clause in vis._inferred_intent:
                if (clause.attribute):
                    if (clause.attribute != "Record"):
                        attributes.add(clause.attribute)
            # General Sampling
            if len(vis.data) > 10000:
                if (filter_executed):
                    vis._vis_data = vis.data.sample(frac=0.75, random_state=1)
                else:
                    if (ldf._sampled is None):  # memoize unfiltered sample df
                        ldf._sampled = vis.data.sample(frac=0.75,
                                                       random_state=1)
                    vis._vis_data = ldf._sampled
            # TODO: Add some type of cap size on Nrows ?
            vis._vis_data = vis.data[list(attributes)]
            if (vis.mark == "bar" or vis.mark == "line"):
                PandasExecutor.execute_aggregate(vis,
                                                 isFiltered=filter_executed)
            elif (vis.mark == "histogram"):
                PandasExecutor.execute_binning(vis)
            elif (vis.mark == "scatter"):
                if (len(vis.data) > 10000):
                    vis._mark = "heatmap"
                    PandasExecutor.execute_2D_binning(vis)