Exemplo n.º 1
0
def build_histogram(data_id, col, query, point_filter):
    data = run_query(
        handle_predefined(data_id),
        query,
        global_state.get_context_variables(data_id),
    )
    query, _ = build_group_inputs_filter(data, [point_filter])
    data = run_query(data, query)
    s = data[~pd.isnull(data[col])][col]
    hist_data, hist_labels = np.histogram(s, bins=10)
    hist_labels = list(
        map(lambda x: json_float(x, precision=3), hist_labels[1:]))
    axes_builder = build_axes(
        dict(
            data=dict(all=dict(Frequency=hist_data, Bins=hist_labels)),
            min=dict(Frequency=0),
            max=dict(Frequency=max(hist_data)),
        ),
        "Bins",
        dict(type="single", data={}),
    )
    hist_data = dict(data={"all": dict(x=hist_labels, Frequency=hist_data)})
    bars = bar_builder(
        hist_data,
        "Bins",
        ["Frequency"],
        axes_builder,
        chart_builder_passthru,
        modal=True,
    )
    bars.figure["layout"]["xaxis"]["type"] = "category"
    bars.figure["layout"]["title"]["text"] = "{} {} ({} {})".format(
        text("Histogram of"), col, len(s), text("data points"))
    return bars
Exemplo n.º 2
0
 def group_values(
     chart_type,
     group_cols,
     map_group_cols,
     cs_group_cols,
     treemap_group_cols,
     funnel_group_cols,
     clustergram_group_cols,
     pareto_group_cols,
     inputs,
     prev_group_vals,
 ):
     data_id = inputs["data_id"]
     group_cols = group_cols
     if chart_type == "maps":
         group_cols = map_group_cols
     elif chart_type == "candlestick":
         group_cols = cs_group_cols
     elif chart_type == "treemap":
         group_cols = treemap_group_cols
     elif chart_type == "funnel":
         group_cols = funnel_group_cols
     elif chart_type == "clustergram":
         group_cols = clustergram_group_cols
     elif chart_type == "pareto":
         group_cols = pareto_group_cols
     group_cols = make_list(group_cols)
     group_types = get_group_types(inputs, group_cols)
     if "groups" not in group_types:
         return [], None
     group_vals = run_query(
         handle_predefined(data_id),
         inputs.get("query"),
         global_state.get_context_variables(data_id),
     )
     group_vals = build_group_val_options(group_vals, group_cols)
     selections = []
     available_vals = [gv["value"] for gv in group_vals]
     if prev_group_vals is not None:
         selections = [
             pgv for pgv in prev_group_vals if pgv in available_vals
         ]
     if not len(selections) and len(group_vals) <= MAX_GROUPS:
         selections = available_vals
     return group_vals, selections
Exemplo n.º 3
0
    def __init__(self, data_id, req):
        self.data_id = data_id
        self.analysis_type = get_str_arg(req, "type")
        curr_settings = global_state.get_settings(data_id) or {}
        self.query = build_query(data_id, curr_settings.get("query"))

        data = run_query(
            handle_predefined(data_id),
            self.query,
            global_state.get_context_variables(self.data_id),
        )
        self.selected_col = find_selected_column(
            data, get_str_arg(req, "col", "values"))
        self.data = data[~pd.isnull(data[self.selected_col])]
        self.dtype = find_dtype(self.data[self.selected_col])
        self.classifier = classify_type(self.dtype)
        self.code = build_code_export(
            data_id,
            imports="{}\n".format("\n".join([
                "import numpy as np",
                "import pandas as pd",
                "import plotly.graph_objs as go",
            ])),
        )

        if self.analysis_type is None:
            self.analysis_type = ("histogram" if self.classifier
                                  in ["F", "I", "D"] else "value_counts")

        if self.analysis_type == "geolocation":
            self.analysis = GeolocationAnalysis(req)
        elif self.analysis_type == "histogram":
            self.analysis = HistogramAnalysis(req)
        elif self.analysis_type == "categories":
            self.analysis = CategoryAnalysis(req)
        elif self.analysis_type == "value_counts":
            self.analysis = ValueCountAnalysis(req)
        elif self.analysis_type == "word_value_counts":
            self.analysis = WordValueCountAnalysis(req)
        elif self.analysis_type == "qq":
            self.analysis = QQAnalysis()
Exemplo n.º 4
0
    def query_input(query, curr_query, curr_marks, data_id):
        """
        dash callback for storing valid pandas dataframe queries.  This acts as an intermediary between values typed
        by the user and values that are applied to pandas dataframes.  Most of the time what the user has typed is not
        complete and thus not a valid pandas dataframe query.

        :param query: query input
        :type query: str
        :param data_id: identifier for the data we are viewing
        :type data_id: string
        :param curr_query: current valid pandas dataframe query
        :return: tuple of (query (if valid), styling for query input (if invalid input), query input title (containing
        invalid query exception information)
        :rtype: tuple of (str, str, str)
        """
        try:
            data = handle_predefined(data_id)
            ctxt_vars = global_state.get_context_variables(data_id)
            df = run_query(data, query, ctxt_vars)
            return (
                query,
                {
                    "line-height": "inherit"
                },
                "",
                build_slider_counts(df, data_id, query),
            )
        except BaseException as ex:
            return (
                curr_query,
                {
                    "line-height": "inherit",
                    "background-color": "pink"
                },
                str(ex),
                curr_marks,
            )
Exemplo n.º 5
0
def test_set_filters(unittest):
    filters = []
    df = pd.DataFrame(
        ([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12], [13, 14, 15, 16, 17, 18]
          ]),
        columns=["A", "B", "C", "D", "E", "F"],
    )
    with ExitStack() as stack:
        stack.enter_context(
            mock.patch("dtale.predefined_filters.PREDEFINED_FILTERS", filters))
        predefined_filters.set_filters([
            {
                "name": "A and B > 2",
                "column": "A",
                "description": "Filter A with B greater than 2",
                "handler": lambda df, val: df[
                    (df["A"] == val) & (df["B"] > 2)],
                "input_type": "input",
            },
            {
                "name":
                "A and (B % 2) == 0",
                "column":
                "A",
                "description":
                "Filter A with B mod 2 equals zero (is even)",
                "handler":
                lambda df, val: df[(df["A"] == val) & (df["B"] % 2 == 0)],
                "input_type":
                "select",
            },
            {
                "name":
                "A in values and (B % 2) == 0",
                "column":
                "A",
                "description":
                "A is within a group of values and B mod 2 equals zero (is even)",
                "handler":
                lambda df, val: df[df["A"].isin(val) & (df["B"] % 2 == 0)],
                "input_type":
                "multiselect",
            },
        ])

        assert len(predefined_filters.get_filters()[0].handler(df, 1)) == 0
        assert len(predefined_filters.get_filters()[0].handler(df, 7)) == 1
        unittest.assertEqual(
            predefined_filters.get_filters()[0].asdict(),
            {
                "name": "A and B > 2",
                "column": "A",
                "description": "Filter A with B greater than 2",
                "inputType": "input",
            },
        )

        assert len(predefined_filters.get_filters()[1].handler(df, 1)) == 1
        assert len(predefined_filters.get_filters()[1].handler(df, 7)) == 1

        assert len(predefined_filters.get_filters()[2].handler(df, [1])) == 1
        assert len(predefined_filters.get_filters()[2].handler(df,
                                                               [1, 7])) == 2

        global_state.set_data("1", df)
        assert len(handle_predefined("1")) == 3

        global_state.set_settings("1",
                                  dict(predefinedFilters={"A and B > 2": 7}))
        assert len(handle_predefined("1")) == 1