示例#1
0
    def query_input(query, pathname, curr_query):
        """
        dash callback for storing valid pandas dataframe queries.  This acts as an intermediary between values typed
        by the user and values that are applied to pandas dataframes.  Most of the time what the user has typed is not
        complete and thus not a valid pandas dataframe query.

        :param query: query input
        :type query: str
        :param pathname: URL path
        :param curr_query: current valid pandas dataframe query
        :return: tuple of (query (if valid), styling for query input (if invalid input), query input title (containing
        invalid query exception information)
        :rtype: tuple of (str, str, str)
        """
        try:
            data_id = get_data_id(pathname)
            data = global_state.get_data(data_id)
            ctxt_vars = global_state.get_context_variables(data_id)
            run_query(data, query, ctxt_vars)
            return query, {"line-height": "inherit"}, ""
        except BaseException as ex:
            return (
                curr_query,
                {"line-height": "inherit", "background-color": "pink"},
                str(ex),
            )
示例#2
0
def build_histogram(data_id, col, query, point_filter):
    data = run_query(
        handle_predefined(data_id),
        query,
        global_state.get_context_variables(data_id),
    )
    query, _ = build_group_inputs_filter(data, [point_filter])
    data = run_query(data, query)
    s = data[~pd.isnull(data[col])][col]
    hist_data, hist_labels = np.histogram(s, bins=10)
    hist_labels = list(
        map(lambda x: json_float(x, precision=3), hist_labels[1:]))
    axes_builder = build_axes(
        dict(
            data=dict(all=dict(Frequency=hist_data, Bins=hist_labels)),
            min=dict(Frequency=0),
            max=dict(Frequency=max(hist_data)),
        ),
        "Bins",
        dict(type="single", data={}),
    )
    hist_data = dict(data={"all": dict(x=hist_labels, Frequency=hist_data)})
    bars = bar_builder(
        hist_data,
        "Bins",
        ["Frequency"],
        axes_builder,
        chart_builder_passthru,
        modal=True,
    )
    bars.figure["layout"]["xaxis"]["type"] = "category"
    bars.figure["layout"]["title"]["text"] = "{} {} ({} {})".format(
        text("Histogram of"), col, len(s), text("data points"))
    return bars
示例#3
0
def test_run_query():
    df = pd.DataFrame(
        [dict(a=1, b=2, c=3),
         dict(a=2, b=3, c=4),
         dict(a=3, b=4, c=5)])

    with pytest.raises(Exception):
        query.run_query(df, "a == 4")

    assert len(query.run_query(df, "`a` in @a", {"a": [1, 2, 3]})) == 3

    if PY3:
        df = pd.DataFrame([
            {
                "a.b": 1,
                "b": 2,
                "c": 3
            },
            {
                "a.b": 2,
                "b": 3,
                "c": 4
            },
            {
                "a.b": 3,
                "b": 4,
                "c": 5
            },
        ])
        assert len(query.run_query(df, "`a.b` == 1")) == 1
示例#4
0
文件: utils.py 项目: reza1615/dtale
def retrieve_chart_data(df, *args, **kwargs):
    """
    Retrieves data from a dataframe for x, y, z & group inputs complete with date frequency
    formatting (:meth:`dtale.charts.utils.date_freq_handler`) if specified

    :param df: dataframe that contains data for chart
    :type df: :class:`pandas:pandas.DataFrame`
    :param args: columns to use
    :type args: iterable of str
    :return: dataframe of data required for chart construction
    :rtype: :class:`pandas:pandas.DataFrame`
    """
    freq_handler = date_freq_handler(df)
    cols = flatten_lists([make_list(a) for a in args])
    all_code = []
    all_data = []
    for col in cols:
        if col is not None:
            s, code = freq_handler(col)
            all_data.append(s)
            if code is not None:
                all_code.append(code)
    all_data = pd.concat(all_data, axis=1)
    all_code = ["chart_data = pd.concat(["] + all_code + ["], axis=1)"]
    if len(make_list(kwargs.get("group_val"))):
        filters = build_group_inputs_filter(all_data, kwargs["group_val"])
        all_data = run_query(all_data, filters)
        all_code.append("chart_data = chart_data.query({})".format(
            triple_quote(filters)))
    return all_data, all_code
示例#5
0
文件: views.py 项目: redisun/dtale
 def group_values(
     chart_type,
     group_cols,
     map_group_cols,
     cs_group_cols,
     treemap_group_cols,
     pathname,
     inputs,
     prev_group_vals,
 ):
     data_id = get_data_id(pathname)
     group_cols = group_cols
     if chart_type == "maps":
         group_cols = map_group_cols
     elif chart_type == "candlestick":
         group_cols = cs_group_cols
     elif chart_type == "treemap":
         group_cols = treemap_group_cols
     group_cols = make_list(group_cols)
     group_types = get_group_types(inputs, data_id, group_cols)
     if "groups" not in group_types:
         return [], None
     group_vals = run_query(
         global_state.get_data(data_id),
         inputs.get("query"),
         global_state.get_context_variables(data_id),
     )
     group_vals = build_group_val_options(group_vals, group_cols)
     selections = []
     available_vals = [gv["value"] for gv in group_vals]
     if prev_group_vals is not None:
         selections = [pgv for pgv in prev_group_vals if pgv in available_vals]
     if not len(selections) and len(group_vals) <= MAX_GROUPS:
         selections = available_vals
     return group_vals, selections
示例#6
0
 def reshape(self):
     data = run_query(
         global_state.get_data(self.data_id),
         (global_state.get_settings(self.data_id) or {}).get("query"),
         global_state.get_context_variables(self.data_id),
     )
     return self.builder.reshape(data)
示例#7
0
 def run(self):
     data = run_query(
         global_state.get_data(self.data_id),
         (global_state.get_settings(self.data_id) or {}).get("query"),
         global_state.get_context_variables(self.data_id),
     )
     return self.report.run(data)
示例#8
0
 def remove(self, df):
     group = self.cfg.get("group")
     duplicates = [g for _, g in df.groupby(group) if len(g) > 1]
     if not duplicates:
         raise NoDuplicatesToShowException("No duplicates to show!")
     duplicates = pd.concat(duplicates)
     group_filter = None
     if self.cfg.get("filter"):
         group_filter, _ = build_group_inputs_filter(
             df, [{col: val
                   for col, val in zip(group, self.cfg["filter"])}])
         duplicates = run_query(duplicates, group_filter)
     code = self._build_code(group_filter)
     return duplicates, code
示例#9
0
 def group_values(
     chart_type,
     group_cols,
     map_group_cols,
     cs_group_cols,
     treemap_group_cols,
     funnel_group_cols,
     clustergram_group_cols,
     pareto_group_cols,
     inputs,
     prev_group_vals,
 ):
     data_id = inputs["data_id"]
     group_cols = group_cols
     if chart_type == "maps":
         group_cols = map_group_cols
     elif chart_type == "candlestick":
         group_cols = cs_group_cols
     elif chart_type == "treemap":
         group_cols = treemap_group_cols
     elif chart_type == "funnel":
         group_cols = funnel_group_cols
     elif chart_type == "clustergram":
         group_cols = clustergram_group_cols
     elif chart_type == "pareto":
         group_cols = pareto_group_cols
     group_cols = make_list(group_cols)
     group_types = get_group_types(inputs, group_cols)
     if "groups" not in group_types:
         return [], None
     group_vals = run_query(
         handle_predefined(data_id),
         inputs.get("query"),
         global_state.get_context_variables(data_id),
     )
     group_vals = build_group_val_options(group_vals, group_cols)
     selections = []
     available_vals = [gv["value"] for gv in group_vals]
     if prev_group_vals is not None:
         selections = [
             pgv for pgv in prev_group_vals if pgv in available_vals
         ]
     if not len(selections) and len(group_vals) <= MAX_GROUPS:
         selections = available_vals
     return group_vals, selections
示例#10
0
    def __init__(self, data_id, req):
        self.data_id = data_id
        self.analysis_type = get_str_arg(req, "type")
        curr_settings = global_state.get_settings(data_id) or {}
        self.query = build_query(data_id, curr_settings.get("query"))

        data = run_query(
            handle_predefined(data_id),
            self.query,
            global_state.get_context_variables(self.data_id),
        )
        self.selected_col = find_selected_column(
            data, get_str_arg(req, "col", "values"))
        self.data = data[~pd.isnull(data[self.selected_col])]
        self.dtype = find_dtype(self.data[self.selected_col])
        self.classifier = classify_type(self.dtype)
        self.code = build_code_export(
            data_id,
            imports="{}\n".format("\n".join([
                "import numpy as np",
                "import pandas as pd",
                "import plotly.graph_objs as go",
            ])),
        )

        if self.analysis_type is None:
            self.analysis_type = ("histogram" if self.classifier
                                  in ["F", "I", "D"] else "value_counts")

        if self.analysis_type == "geolocation":
            self.analysis = GeolocationAnalysis(req)
        elif self.analysis_type == "histogram":
            self.analysis = HistogramAnalysis(req)
        elif self.analysis_type == "categories":
            self.analysis = CategoryAnalysis(req)
        elif self.analysis_type == "value_counts":
            self.analysis = ValueCountAnalysis(req)
        elif self.analysis_type == "word_value_counts":
            self.analysis = WordValueCountAnalysis(req)
        elif self.analysis_type == "qq":
            self.analysis = QQAnalysis()
示例#11
0
    def query_input(query, curr_query, curr_marks, data_id):
        """
        dash callback for storing valid pandas dataframe queries.  This acts as an intermediary between values typed
        by the user and values that are applied to pandas dataframes.  Most of the time what the user has typed is not
        complete and thus not a valid pandas dataframe query.

        :param query: query input
        :type query: str
        :param data_id: identifier for the data we are viewing
        :type data_id: string
        :param curr_query: current valid pandas dataframe query
        :return: tuple of (query (if valid), styling for query input (if invalid input), query input title (containing
        invalid query exception information)
        :rtype: tuple of (str, str, str)
        """
        try:
            data = handle_predefined(data_id)
            ctxt_vars = global_state.get_context_variables(data_id)
            df = run_query(data, query, ctxt_vars)
            return (
                query,
                {
                    "line-height": "inherit"
                },
                "",
                build_slider_counts(df, data_id, query),
            )
        except BaseException as ex:
            return (
                curr_query,
                {
                    "line-height": "inherit",
                    "background-color": "pink"
                },
                str(ex),
                curr_marks,
            )