示例#1
0
    def initialize_chart(self):
        # return NotImplemented
        x_attr = self.vis.get_attr_by_channel("x")[0]
        y_attr = self.vis.get_attr_by_channel("y")[0]

        x_attr_abv = str(x_attr.attribute)
        y_attr_abv = str(y_attr.attribute)

        if len(x_attr_abv) > 25:
            x_attr_abv = x_attr.attribute[:15] + "..." + x_attr.attribute[-10:]
        if len(y_attr_abv) > 25:
            y_attr_abv = y_attr.attribute[:15] + "..." + y_attr.attribute[-10:]

        if isinstance(x_attr.attribute, str):
            x_attr.attribute = x_attr.attribute.replace(".", "")
        if isinstance(y_attr.attribute, str):
            y_attr.attribute = y_attr.attribute.replace(".", "")

        chart = (alt.Chart(self.data).mark_rect().encode(
            x=alt.X(
                "xBinStart",
                type="quantitative",
                axis=alt.Axis(title=x_attr_abv),
                bin=alt.BinParams(binned=True),
            ),
            x2=alt.X2("xBinEnd"),
            y=alt.Y(
                "yBinStart",
                type="quantitative",
                axis=alt.Axis(title=y_attr_abv),
                bin=alt.BinParams(binned=True),
            ),
            y2=alt.Y2("yBinEnd"),
            opacity=alt.Opacity(
                "count",
                type="quantitative",
                scale=alt.Scale(type="log"),
                legend=None,
            ),
        ))
        chart = chart.configure_scale(minOpacity=0.1, maxOpacity=1)
        # Setting tooltip as non-null
        chart = chart.configure_mark(tooltip=alt.TooltipContent("encoding"))
        chart = chart.interactive()  # Enable Zooming and Panning

        ####################################
        # Constructing Altair Code String ##
        ####################################

        self.code += "import altair as alt\n"
        # self.code += f"visData = pd.DataFrame({str(self.data.to_dict(orient='records'))})\n"
        self.code += f"visData = pd.DataFrame({str(self.data.to_dict())})\n"
        self.code += f"""
		chart = alt.Chart(visData).mark_rect().encode(
			x=alt.X('xBinStart', type='quantitative', axis=alt.Axis(title='{x_attr_abv}'), bin = alt.BinParams(binned=True)),
			x2=alt.X2('xBinEnd'),
			y=alt.Y('yBinStart', type='quantitative', axis=alt.Axis(title='{y_attr_abv}'), bin = alt.BinParams(binned=True)),
			y2=alt.Y2('yBinEnd'),
			opacity = alt.Opacity('count',type='quantitative',scale=alt.Scale(type="log"),legend=None)
		)
		chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding')) # Setting tooltip as non-null
		"""
        return chart
def make_plot(infile):
    grouped_flows = infra.pd.read_parquet(infile)
    grouped_flows = grouped_flows.reset_index()
    grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows["bytes_down"]

    # Map down to a smaller number of protocol names, including "other".
    grouped_flows["name"] = grouped_flows.apply(
        lambda row: _assign_protocol_plain_name(row.protocol,
                                                row.dest_port),
        axis="columns"
    )

    test = grouped_flows
    test = grouped_flows.loc[(grouped_flows["protocol"]==17) & (grouped_flows["name"] == "Other UDP")].groupby("dest_port").sum()

    print(test.sort_values("bytes_total"))

    # Consolidate by week instead of by day
    grouped_flows = grouped_flows[["start_bin", "bytes_total", "bytes_up", "bytes_down", "name"]].groupby([pd.Grouper(key="start_bin", freq="W-MON"), "name"]).sum()

    grouped_flows = grouped_flows.reset_index()

    print(grouped_flows)

    # Generate an outage annotation overlay
    outage_info = pd.DataFrame([{"start": infra.constants.OUTAGE_START, "end": infra.constants.OUTAGE_END}])
    outage_annotation = alt.Chart(outage_info).mark_rect(
        opacity=0.7,
        # cornerRadius=2,
        strokeWidth=2,
        # stroke="black"
    ).encode(
        x=alt.X("start"),
        x2=alt.X2("end"),
        color=alt.value("#FFFFFF")
    )

    # Figure out legend sorting order by total amount.
    proto_totals = grouped_flows.groupby("name").sum().reset_index()
    legend_sort_order = proto_totals.sort_values("bytes_total", ascending=True).set_index("bytes_total").reset_index()
    sort_list = legend_sort_order["name"].tolist()
    sort_list.reverse()

    # Now get the up and down sorts
    proto_totals = grouped_flows.groupby("name").sum().reset_index()
    sort_down_order = proto_totals.sort_values("bytes_down", ascending=True).set_index("bytes_down").reset_index()
    sort_down_order["order"] = sort_down_order.index
    sort_down_order["direction"] = "Downlink"

    sort_up_order = proto_totals.sort_values("bytes_up", ascending=True).set_index("bytes_up").reset_index()
    sort_up_order["order"] = sort_up_order.index
    sort_up_order["direction"] = "Uplink"

    orders = sort_down_order.append(sort_up_order)

    grouped_flows["Downlink"] = grouped_flows["bytes_down"] / (1000**3)
    grouped_flows["Uplink"] = grouped_flows["bytes_up"] / (1000**3)

    # Melt the dataset for faceting
    links = grouped_flows.melt(
        id_vars=["name", "start_bin"],
        value_vars=["Downlink", "Uplink"],
        var_name="direction",
        value_name="GB"
    ).set_index("name")

    # Merge the sort orders back into the larger dataset
    faceted_flows = links.merge(orders, on=["name", "direction"])

    area = alt.Chart().mark_area().encode(
        x=alt.X("start_bin:T",
                title="Time",
                axis=alt.Axis(labels=True),
                ),
        y=alt.Y("sum(GB):Q",
                title="Share of Traffic Per Week",
                stack="normalize"
                ),
        color=alt.Color(
            "name",
            title="Protocol (By Total)",
            scale=alt.Scale(scheme="tableau10"),
            sort=sort_list,
        ),
        order=alt.Order("order"),
    )

    (area + outage_annotation).properties(
        width=500,
    ).facet(
        column=alt.Column(
            'direction:N',
            title="",
        ),
        data=faceted_flows,
    ).save(
        "renders/bytes_per_protocol_trends_normalized_facet.png",
        scale_factor=2,
    )

    plot = alt.Chart(grouped_flows).mark_area().encode(
        x=alt.X("start_bin:T",
                title="Time",
                axis=alt.Axis(labels=True),
                ),
        y=alt.Y("sum(GB):Q",
                title="Total Traffic Per Week(GB)",
                ),
        # shape="direction",
        color="name",
        detail="name",
    ).properties(
        # title="Local Service Use",
        width=500,
    ).save("renders/bytes_per_protocol_trends.png",
           scale_factor=2
           )

    return plot
def make_org_plot(infile):
    """ Generate plots to explore the traffic distribution across organizations
    """
    grouped_flows = infra.pd.read_parquet(infile)
    grouped_flows = grouped_flows.reset_index()
    grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows[
        "bytes_down"]

    # Consolidate by week instead of by day
    grouped_flows = grouped_flows[[
        "start_bin", "bytes_total", "bytes_up", "bytes_down", "org"
    ]].groupby([pd.Grouper(key="start_bin", freq="W-MON"), "org"]).sum()

    grouped_flows = grouped_flows.reset_index()

    # Generate an outage annotation overlay
    outage_info = pd.DataFrame([{
        "start": infra.constants.OUTAGE_START,
        "end": infra.constants.OUTAGE_END
    }])
    outage_annotation = alt.Chart(outage_info).mark_rect(
        opacity=0.7,
        # cornerRadius=2,
        strokeWidth=2,
        # stroke="black"
    ).encode(x=alt.X("start"), x2=alt.X2("end"), color=alt.value("#FFFFFF"))

    # Group into other orgs
    number_of_main_orgs = 9
    sorted_flows = grouped_flows.groupby("org").sum().sort_values(
        "bytes_total", ascending=False)
    orgs_to_other = sorted_flows.index[number_of_main_orgs:]
    number_othered = len(orgs_to_other)

    # Create a separate frame with only the main flows and the aggregated other.
    grouped_with_other = grouped_flows.copy()
    grouped_with_other["org"] = grouped_with_other["org"].replace(
        orgs_to_other, "Other N={}".format(number_othered))

    # Group together to find orders for the legend and both areas below.
    org_groups = grouped_with_other.groupby("org").sum().reset_index()

    # Figure out legend sorting order by total amount.
    legend_order = org_groups.sort_values(
        "bytes_total", ascending=False).set_index("bytes_total").reset_index()
    legend_sort_list = legend_order["org"].tolist()

    # Figure out area layer order by amounts for upload and download.
    sort_order_down = org_groups.sort_values(
        "bytes_down", ascending=True).set_index("bytes_down").reset_index()
    sort_order_down["order"] = sort_order_down.index
    sort_order_down["direction"] = "Downlink"

    sort_order_up = org_groups.sort_values(
        "bytes_up", ascending=True).set_index("bytes_up").reset_index()
    sort_order_up["order"] = sort_order_up.index
    sort_order_up["direction"] = "Uplink"

    area_sort_orders = sort_order_up.append(sort_order_down)

    # Melt the main dataframe
    grouped_with_other["Downlink"] = grouped_with_other["bytes_down"] / (1000**
                                                                         3)
    grouped_with_other["Uplink"] = grouped_with_other["bytes_up"] / (1000**3)
    grouped_with_other = grouped_with_other.melt(
        id_vars=["org", "start_bin"],
        value_vars=["Downlink", "Uplink"],
        var_name="direction",
        value_name="GB")

    # Merge the sort order back into the larger dataset
    grouped_with_other = grouped_with_other.merge(area_sort_orders,
                                                  on=["org", "direction"])
    print(grouped_with_other)
    area = alt.Chart().mark_area().encode(
        x=alt.X(
            "start_bin:T",
            title="Time",
            axis=alt.Axis(labels=True),
        ),
        y=alt.Y(
            "sum(GB):Q",
            title="Share of Traffic Per Week",
            stack="normalize",
        ),
        # shape="direction",
        color=alt.Color(
            "org",
            title="Organization (By Total)",
            scale=alt.Scale(scheme="paired"),
            sort=legend_sort_list,
        ),
        order=alt.Order("order"),
    )

    (area + outage_annotation).properties(width=500, ).facet(
        column=alt.Column(
            "direction:N",
            title="",
        ),
        data=grouped_with_other,
    ).save("renders/bytes_per_category_org_facet_main.png", scale_factor=2)

    # Create a separate frame for just the other flows
    main_flows = sorted_flows.index[:number_of_main_orgs]
    others = grouped_flows.copy().reset_index().set_index("org")
    others = others.drop(main_flows).reset_index()

    # Figure out sorting order by total amount.
    sort_check = others.groupby("org").sum().reset_index()
    sort_order = sort_check.sort_values(
        "bytes_total", ascending=True).set_index("bytes_total").reset_index()
    sort_list = sort_order["org"].tolist()
    sort_list.reverse()
    sort_order["order"] = sort_order.index

    # Merge the sort order back into the larger dataset
    others = others.merge(sort_order[["org", "order"]], on="org")

    print(len(others["org"].unique()))
    print(others["org"].unique())
    print(others)

    others["GB"] = others["bytes_total"] / (1000**3)
    area = alt.Chart(others).mark_area().encode(
        x=alt.X(
            "start_bin:T",
            title="Time",
            axis=alt.Axis(labels=True),
        ),
        y=alt.Y(
            "sum(GB):Q",
            title="Total Traffic Per Week(GB)",
            stack="normalize",
        ),
        # shape="direction",
        color=alt.Color(
            "org",
            title="Organization",
            scale=alt.Scale(scheme="category20c"),
            sort=sort_list,
        ),
        # The order actually makes this chart harder to understand, since the color needs to wrap around.
        order=alt.Order("order"),
    )

    (area + outage_annotation).configure_legend(
        symbolLimit=100,
        columns=2,
    ).properties(
        width=1000,
        height=500,
    ).save("renders/bytes_per_category_org_weekly_stream_others.png",
           scale_factor=2)
def make_category_plot(infile):
    grouped_flows = infra.pd.read_parquet(infile)
    grouped_flows = grouped_flows.reset_index()
    grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows[
        "bytes_down"]

    # Consolidate by week instead of by day
    grouped_flows = grouped_flows[[
        "start_bin", "bytes_total", "category", "bytes_up", "bytes_down"
    ]].groupby([pd.Grouper(key="start_bin", freq="W-MON"), "category"]).sum()

    grouped_flows = grouped_flows.reset_index()

    # Generate an outage annotation overlay
    outage_info = pd.DataFrame([{
        "start": infra.constants.OUTAGE_START,
        "end": infra.constants.OUTAGE_END
    }])
    outage_annotation = alt.Chart(outage_info).mark_rect(
        opacity=0.7,
        # cornerRadius=2,
        strokeWidth=2,
        # stroke="black"
    ).encode(x=alt.X("start"), x2=alt.X2("end"), color=alt.value("#FFFFFF"))

    # Figure out legend sorting order by total amount.
    cat_totals = grouped_flows.groupby("category").sum().reset_index()
    legend_sort_order = cat_totals.sort_values(
        "bytes_total", ascending=True).set_index("bytes_total").reset_index()
    sort_list = legend_sort_order["category"].tolist()
    sort_list.reverse()

    # Now get the up and down sorts
    cat_totals = grouped_flows.groupby("category").sum().reset_index()
    sort_down_order = cat_totals.sort_values(
        "bytes_down", ascending=True).set_index("bytes_down").reset_index()
    sort_down_order["order"] = sort_down_order.index
    sort_down_order["direction"] = "Downlink"

    sort_up_order = cat_totals.sort_values(
        "bytes_up", ascending=True).set_index("bytes_up").reset_index()
    sort_up_order["order"] = sort_up_order.index
    sort_up_order["direction"] = "Uplink"

    orders = sort_down_order.append(sort_up_order)

    grouped_flows["Downlink"] = grouped_flows["bytes_down"] / (1000**3)
    grouped_flows["Uplink"] = grouped_flows["bytes_up"] / (1000**3)

    # Melt the dataset for faceting
    links = grouped_flows.melt(id_vars=["category", "start_bin"],
                               value_vars=["Downlink", "Uplink"],
                               var_name="direction",
                               value_name="GB").set_index("category")

    # Merge the sort orders back into the larger dataset
    faceted_flows = links.merge(orders, on=["category", "direction"])

    area = alt.Chart().mark_area().encode(
        x=alt.X(
            "start_bin:T",
            title="Time",
            axis=alt.Axis(labels=True),
        ),
        y=alt.Y("sum(GB):Q",
                title="Share of Traffic Per Week",
                stack="normalize"),
        color=alt.Color(
            "category",
            title="Category (By Total)",
            scale=alt.Scale(scheme="tableau20"),
            sort=sort_list,
        ),
        order=alt.Order("order"),
    )

    (area + outage_annotation).properties(width=500, ).facet(
        column=alt.Column(
            'direction:N',
            title="",
        ),
        data=faceted_flows,
    ).save(
        "renders/bytes_per_category_cat_facet.png",
        scale_factor=2,
    )
示例#5
0
文件: viz.py 项目: rnair07/bootcamp
def altair_box(data=None, encode_x=None, encode_y=None, 
               encode_color=alt.Color(), height=None, width=None):
    """Generate a box plot with Altair.

    Parameters
    ----------
    data : Pandas DataFrame
        A tidy data frame.
    encode_x : str or altair.X instance
        Specification of x-values.
    encode_y : str or altair.Y instance
        Specification of y-values.
    encode_color : str or Color instance or None or Undefined (default)
        Specification of coloring of box plot. If Undefined (Default),
        all boxes are colored with Altair defaults. If None, the boxes
        are colored according to the categorical variable.
    height : float or None (default)
        Height of the chart, in pixels. If None, inferred.
    width : float or None (default)
        Width of the chart, in pixels. If None, inferred.

    Returns
    -------
    output : Chart
        Altair Chart instance.
    """

    # Make Altair instances
    if isinstance(encode_x, alt.X):
        x = encode_x
    else:
        x = alt.X(encode_x)

    if isinstance(encode_y, alt.Y):
        y = encode_y
    else:
        y = alt.Y(encode_y)

    # Get column names
    if len(x.shorthand) > 1 and x.shorthand[-2] == ':':
        x_name = x.shorthand[:-2]
    else:
        x_name = x.shorthand

    if len(y.shorthand) > 1 and y.shorthand[-2] == ':':
        y_name = y.shorthand[:-2]
    else:
        y_name = y.shorthand

    # Get axis titles
    if isinstance(x.title, alt.utils.schemapi.UndefinedType):
        x_title = x_name
    else:
        x_title = x.title
    if isinstance(y.title, alt.utils.schemapi.UndefinedType):
        y_title = y_name
    else:
        y_title = y.title

    # Determine types
    var_types = [None, None]
    for i, var in enumerate([x, y]):
        if not isinstance(var.type, alt.utils.schemapi.UndefinedType):
            var_types[i] = var.type[0].upper()
        elif len(var.shorthand) > 1 and var.shorthand[-2] == ':':
            var_types[i] = var.shorthand[-1]
        else:
            raise RuntimeError(
                    f'Data type of `encode_{var}` must be specified.')

    # Make sure data types are given and ok
    if var_types[0] not in 'NO' and var_types[1] not in 'NO':
        raise RuntimeError('Either `x` or `y` must be nominal or ordinal.')
    if var_types == ['N, N']:
        raise RuntimeError('Cannot have both `x` and `y` be nominal.')

    # Decide if it's a horizontal plot or not
    if var_types[0] in 'NO':
        horizontal = False
        cats = x_name
        val = y_name
        if encode_color is None:
            encode_color = alt.Color(f'{cats}:N', title=x.title)
    else:
        horizontal = True
        cats = y_name
        val = x_name
        if encode_color is None:
            encode_color = alt.Color(f'{cats}:N', title=y.title)

    # Set up groupby object
    grouped = data.groupby(cats)
    n_boxes = len(grouped)

    # Set default heights and widths, also of bars
    if width is None:
        if horizontal:
            width = 400
        else:
            width = 200
    if height is None:
        if horizontal:
            height = 200
        else:
            height = 300

    if horizontal:
        size = height*0.9 / n_boxes
    else:
        size = width*0.9 / n_boxes
            
    # Data frame for boxes and whiskers
    df_box = (grouped[val].apply(_box_and_whisker)
                          .reset_index()
                          .rename(columns={'level_1': 'box_val'})
                          .pivot(index=cats, columns='box_val'))
    df_box.columns = df_box.columns.get_level_values(1)
    df_box = df_box.reset_index()

    # Data frame for outliers
    df_outlier = grouped[val].apply(_outliers).reset_index(level=0)

    if horizontal:
        chart_box = alt.Chart(
                data=df_box,
                width=width,
                height=height              
            ).mark_bar(
                size=size
            ).encode(
                y=alt.Y(f'{cats}:N', title=y_title),
                x=alt.X('bottom:Q', title=x_title),
                x2=alt.X2('top:Q', title=x_title),
                color=encode_color)

        chart_median = alt.Chart(
                data=df_box,
                width=width,
                height=height              
            ).mark_tick(
                size=size,
                color='white'
            ).encode(
                y=alt.Y(f'{cats}:N',  title=y_title),
                x=alt.X('middle:Q', title=x_title))

        chart_whisker = alt.Chart(
                data=df_box,
                width=width,
                height=height              
            ).mark_rule(
            ).encode(
                y=alt.Y(f'{cats}:N', title=y_title),
                x=alt.X('bottom_whisker:Q', title=x_title),
                x2=alt.X2('top_whisker:Q', title=x_title))

        chart_outliers = alt.Chart(
                data=df_outlier,
                width=width,
                height=height              
            ).mark_point(
            ).encode(
                y=alt.Y(f'{cats}:N', title=y_title),
                x=alt.X(f'{val}:Q', title=x_title),
                color=encode_color)
    else:
        chart_box = alt.Chart(
                data=df_box,
                width=width,
                height=height       
            ).mark_bar(
                size=size
            ).encode(
                x=alt.X(f'{cats}:N', title=x_title),
                y=alt.Y('bottom:Q', title=y_title),
                y2=alt.Y2('top:Q', title=y_title),
                color=encode_color)

        chart_median = alt.Chart(
                data=df_box,
                width=width,
                height=height       
            ).mark_tick(
                size=size,
                color='white'
            ).encode(
                x=alt.X(f'{cats}:N', title=x_title),
                y=alt.Y('middle:Q', title=y_title))

        chart_whisker = alt.Chart(
                data=df_box,
                width=width,
                height=height       
            ).mark_rule(
            ).encode(
                x=alt.X(f'{cats}:N', title=x_title),
                y=alt.Y('bottom_whisker:Q', title=y_title),
                y2=alt.Y2('top_whisker:Q', title=y_title))

        chart_outliers = alt.Chart(
                data=df_outlier,
                width=width,
                height=height       
            ).mark_point(
            ).encode(
                x=alt.X(f'{cats}:N', title=x_title),
                y=alt.Y(f'{val}:Q', title=y_title),
                color=encode_color)

    return chart_whisker + chart_box + chart_median + chart_outliers
示例#6
0
def value_vs_time_chart(
    base: alt.Chart,
    active_fixed_viewpoint_selector: bool,
    sensor_name: str,
    sensor_unit: str,
    belief_horizon_unit: str,
    intuitive_forecast_horizon: bool,
    interpolate: bool,
    ci: float,
    event_value_range: Tuple[float, float],
) -> alt.LayerChart:

    # Configure the stepwise line for the reference
    if interpolate is True:
        ts_line_reference_chart = base.mark_line(interpolate="monotone")
    else:
        ts_line_reference_chart = base.mark_rule().encode(x2=alt.X2("event_end:T"))
    ts_line_reference_chart = ts_line_reference_chart.encode(
        y=alt.Y(
            "reference_value",
            scale=alt.Scale(domain=(event_value_range[0], event_value_range[-1])),
        ),
        color=alt.ColorValue("black"),
        tooltip=[
            alt.Tooltip(
                "event_start:T",
                timeUnit="yearmonthdatehoursminutes",
                title="Event start",
            ),
            alt.Tooltip(
                "event_end:T", timeUnit="yearmonthdatehoursminutes", title="Event end"
            ),
            alt.Tooltip("reference_value:Q", title="Real value", format=".2f"),
        ],
    )

    # Configure the stepwise line for the beliefs
    if interpolate is True:
        ts_line_chart = base.mark_line(interpolate="monotone")
    else:
        ts_line_chart = base.mark_rule().encode(x2=alt.X2("event_end:T"))
    ts_line_chart = ts_line_chart.encode(
        y=alt.Y("expected_value", title="%s (%s)" % (sensor_name, sensor_unit))
    )

    if active_fixed_viewpoint_selector is True:
        ts_line_chart = (
            ts_line_chart.transform_filter(
                "datum.belief_time <= nearest_x_select.belief_time"
            )
            .transform_joinaggregate(
                most_recent_belief_time="max(belief_time)",
                groupby=["event_start", "source"],
            )
            .transform_filter("datum.belief_time == datum.most_recent_belief_time")
        )

    # Configure the confidence intervals
    if interpolate is True:
        confidence_interval = ts_line_chart.mark_area(
            interpolate="monotone", opacity=0.3
        )
    else:
        confidence_interval = ts_line_chart.mark_bar(opacity=0.3)
    confidence_interval = confidence_interval.encode(
        y="lower_value",
        y2="upper_value",
        tooltip=[
            alt.Tooltip(
                "event_start:T",
                timeUnit="yearmonthdatehoursminutes",
                title="Event start",
            ),
            alt.Tooltip(
                "event_end:T", timeUnit="yearmonthdatehoursminutes", title="Event end"
            ),
            alt.Tooltip("expected_value:Q", title="Expected value", format=".2f"),
            alt.Tooltip(
                "belief_time:T",
                timeUnit="yearmonthdatehoursminutes",
                title="Belief time",
            ),
            alt.Tooltip(
                "belief_horizon:Q",
                title="%s (%s)"
                % (
                    "Forecast horizon"
                    if intuitive_forecast_horizon
                    else "Belief horizon",
                    belief_horizon_unit,
                ),
            ),
            alt.Tooltip("source", title="Source"),
            alt.Tooltip(
                "upper_value:Q",
                format=".2f",
                title="Upper value of {0:.0f}% confidence interval".format(100 * ci),
            ),
            alt.Tooltip(
                "lower_value:Q",
                format=".2f",
                title="Lower value of {0:.0f}% confidence interval".format(100 * ci),
            ),
        ],
    )

    return (ts_line_reference_chart + ts_line_chart + confidence_interval).properties(
        title="Model results"
    )
示例#7
0
def test_quantitative_x2_y2():
    chart = ChartMetadata(
        alt.Chart(df_quant).mark_point().encode(alt.X('a'), alt.Y('b'),
                                                alt.X2('c'), alt.Y2('alpha')))
    _convert(chart)
示例#8
0
def test_convert_x2_y2_fail_temporal(column):
    chart = ChartMetadata(
        alt.Chart(df).mark_point().encode(alt.X2(column), alt.Y2(column)))
    _convert(chart)
def get_gender_overview_graph():
    """returns all gender related graphs
    
       Parameters: None

       Returns: Graph
       
    """
    male = get_year_gender_df('Männer')
    female = get_year_gender_df('Frauen')

    male['Start'] = female['End']
    male['Display'] = male['End'] - female['End']
    female['Start'] = 0
    female['Display'] = female['End']
    combined = pd.concat([male, female], axis=0)

    print()
    print('Totale Anzahl Asylanträge über die letzten 25 Jahre:')
    print(male['End'].sum())
    print()

    male_status = get_year_gender_status_df('Männer')
    female_status = get_year_gender_status_df('Frauen')
    combined_status = pd.concat([male_status, female_status], axis=0)

    color_scale = alt.Scale(domain=["Frauen", "Männer"],
                            range=["#e5f5b8", "#58bdc0"])

    bars = alt.Chart(combined).mark_bar().encode(
        x=alt.X('Start:Q', title='Anzahl Asylgesuche'),
        x2=alt.X2('End:Q', title=''),
        y=alt.Y('Jahr:N'),
        color=alt.Color(
            'Gender:N',
            legend=alt.Legend(title='Gender'),
            scale=color_scale,
        ))

    text = bars.mark_text(align='left', baseline='middle',
                          dx=3).encode(x='End:Q',
                                       y='Jahr:N',
                                       text=alt.Text('Display:Q'))

    line_chart_accepted = alt.Chart(combined_status).mark_line().encode(
        alt.X('Jahr:O'),
        alt.Y('Anerkennungsquote:Q', axis=alt.Axis(format='%')),
        color='Gender:N')

    line_chart_shelter = alt.Chart(combined_status).mark_line().encode(
        alt.X('Jahr:O'),
        alt.Y('Schutzquote:Q', axis=alt.Axis(format='%')),
        color='Gender:N')

    line_charts = alt.vconcat(
        line_chart_accepted.properties(
            height=220, title='Annerkennungsquote nach Geschlecht'),
        line_chart_shelter.properties(height=220,
                                      title='Schutzquote nach Geschlecht'))
    return alt.hconcat((bars + text).properties(
        height=540,
        title=
        'Anzahl Asylgesuche über die Jahre, aufgeschlüsselt nach Geschlecht'),
                       line_charts)
示例#10
0
base_wheat = alt.Chart(
    data.wheat.url).transform_calculate(year_end="+datum.year + 5")

base_monarchs = alt.Chart(data.monarchs.url).transform_calculate(
    offset="((!datum.commonwealth && datum.index % 2) ? -1: 1) * 2 + 95",
    off2="((!datum.commonwealth && datum.index % 2) ? -1: 1) + 95",
    y="95",
    x="+datum.start + (+datum.end - +datum.start)/2")

bars = base_wheat.mark_bar(**{
    "fill": "#aaa",
    "stroke": "#999"
}).encode(x=alt.X("year:Q", axis=alt.Axis(format='d', tickCount=5)),
          y=alt.Y("wheat:Q", axis=alt.Axis(zindex=1)),
          x2=alt.X2("year_end"))

area = base_wheat.mark_area(**{
    "color": "#a4cedb",
    "opacity": 0.7
}).encode(x=alt.X("year:Q"), y=alt.Y("wages:Q"))

area_line_1 = area.mark_line(**{"color": "#000", "opacity": 0.7})
area_line_2 = area.mark_line(**{"yOffset": -2, "color": "#EE8182"})

top_bars = base_monarchs.mark_bar(stroke="#000").encode(
    x=alt.X("start:Q"),
    x2=alt.X2("end"),
    y=alt.Y("y:Q"),
    y2=alt.Y2("offset"),
    fill=alt.Fill("commonwealth:N",
示例#11
0
    def plot_individual_effects(self):
        """Plot individual effects"""
        covs = self.covariate_baselines
        ie = self.individual_effects.join(covs)
        param_names = list(ie.index.get_level_values('parameter').unique())
        ie = (ie - 1) * 100
        ie = ie.sort_values(by=['observed'])

        plots = []

        for parameter in param_names:
            df = ie.xs(parameter, level=1)

            id_order = list(df.index)
            id_order = [str(int(x)) for x in id_order]

            if len(df) > 20:
                id_order[10] = '...'

            df = df.reset_index()
            df['ID'] = df['ID'].astype(int).astype(str)

            error_bars = (alt.Chart(df).mark_errorbar(ticks=True).encode(
                x=alt.X('p5:Q',
                        title='Effect size in percent',
                        scale=alt.Scale(zero=False)),
                x2=alt.X2('p95:Q'),
                y=alt.Y('ID:N', title='ID', sort=id_order),
                tooltip=['ID', 'p5', 'observed', 'p95'] + list(covs.columns),
            ))

            rule = (alt.Chart(df).mark_rule(
                strokeDash=[10, 2], color='gray').encode(
                    x=alt.X('xzero:Q')).transform_calculate(xzero="0"))

            points = (alt.Chart(df).mark_point(size=40,
                                               filled=True,
                                               color='black').encode(
                                                   x=alt.X('observed:Q'),
                                                   y=alt.Y('ID:N',
                                                           sort=id_order),
                                               ))

            plot = alt.layer(
                points,
                error_bars,
                rule,
                data=df,
                width=700,
                title=f'Individuals for parameter {parameter}',
            )
            if len(df) > 20:
                plot = (
                    plot.transform_window(
                        sort=[alt.SortField('observed', order='ascending')],
                        rank='row_number(observed)',
                    ).transform_window(
                        sort=[alt.SortField('observed', order='descending')],
                        nrank='row_number(observed)',
                    ).transform_filter('datum.rank <= 10 | datum.nrank <= 11').
                    transform_calculate(
                        ID="datum.nrank == 11 ? '...' : datum.ID",
                        p5="datum.nrank == 11 ? '...' : datum.p5",
                        p95="datum.nrank == 11 ? '...' : datum.p95",
                        observed="datum.nrank == 11 ? '...' : datum.observed",
                    ))
            plots.append(plot)

        v = alt.vconcat(*plots).resolve_scale(x='shared')
        return v
示例#12
0
    def plot_covariate_effects(self):
        """Plot covariate effects"""
        ce = (self.covariate_effects - 1) * 100
        cov_stats = pd.melt(
            self.covariate_statistics.reset_index(),
            var_name='condition',
            id_vars=['covariate'],
            value_vars=['p5', 'p95', 'other'],
        )

        cov_stats = cov_stats.replace({
            'p5': '5th',
            'p95': '95th'
        }).set_index(['covariate', 'condition'])

        ce = ce.join(cov_stats, how='inner')

        # The left join reorders the index, pandas bug #34133
        ce = ce.reorder_levels(['parameter', 'covariate', 'condition'])

        param_names = list(ce.index.get_level_values('parameter').unique())
        plots = []

        for parameter in param_names:
            df = ce.xs(parameter, level=0)
            df = df.reset_index()

            error_bars = (alt.Chart(df).mark_errorbar(ticks=True).encode(
                x=alt.X('p5:Q',
                        title='Effect size in percent',
                        scale=alt.Scale(zero=False)),
                x2=alt.X2('p95:Q'),
                y=alt.Y('condition:N', title=None),
            ))

            rule = (alt.Chart(df).mark_rule(
                strokeDash=[10, 4], color='gray').encode(
                    x=alt.X('xzero:Q')).transform_calculate(xzero="0"))

            points = (alt.Chart(df).mark_point(filled=True,
                                               color='black').encode(
                                                   x=alt.X('mean:Q'),
                                                   y=alt.Y('condition:N'),
                                               ))

            text = (alt.Chart(df).mark_text(dy=-15, color="red").encode(
                x=alt.X("mean:Q"),
                y=alt.Y("condition:N"),
                text=alt.Text("value:Q")))

            plot = (alt.layer(
                error_bars, rule, points, text, data=df, width=700,
                height=100).facet(
                    columns=1.0,
                    row=alt.Facet('covariate:N', title=None),
                    title=f'{parameter}').resolve_scale(y='independent'))

            plots.append(plot)

        v = alt.vconcat(*plots).resolve_scale(x='shared')
        return v
示例#13
0
def wsb_chart(
    data: pd.DataFrame,
    xvar: str = "start",
    x2var: str = "end",
    xvar_middle: str = "middle",
    yvar: str = "mantissa",
    vvar: str = "original",
    evar: str = "multiplier",
    xcat: str = "category",
    w: int = 400,
    h: int = 400,
    color_scheme: str = "orangered",
    title: str = "Width-Scale Bar Chart",
) -> alt.LayerChart:
    _n_bars = len(data[xcat].unique())
    _padding_width = (w / _n_bars) * 0.1

    data_with_padding = data.copy()
    data_with_padding["start"] = (data_with_padding["start"] +
                                  _padding_width / 4 +
                                  _padding_width / 2 * data_with_padding.index)
    data_with_padding["end"] = (data_with_padding["end"] + _padding_width / 4 +
                                _padding_width / 2 * data_with_padding.index)
    data_with_padding["middle"] = (
        (data_with_padding["end"] - data_with_padding["start"]) /
        2) + data_with_padding["start"]

    selection = alt.selection_single(fields=["multiplier"], bind="legend")

    # base = alt.Chart(data, width=w, height=h)
    base = alt.Chart(data_with_padding, width=w, height=h)

    bar = (
        # base.mark_rect(xOffset=1.0, x2Offset=0.5)
        base.mark_rect().encode(
            x=alt.X(
                f"{xvar}:Q",
                axis=alt.Axis(
                    titleY=(-0.5 + 22),
                    labels=False,
                    title=xcat.capitalize(),
                    grid=False,
                    # values=data[xvar_middle].to_list(),
                    values=data_with_padding[xvar_middle].to_list(),
                ),
            ),
            x2=alt.X2(f"{x2var}:Q"),
            y=alt.Y(
                f"{yvar}:Q",
                axis=alt.Axis(
                    title=yvar.capitalize(),
                    titleAngle=0,
                    titleAlign="left",
                    titleY=-5,
                    titleX=0,
                    labelExpr="datum.value + ' ×'",
                ),
                scale=alt.Scale(domain=[0, 10]),
            ),
            color=alt.Color(
                f"{evar}:O",
                title="Magnitude Multiplier",
                legend=alt.Legend(labelExpr="'× ' + format(datum.value, ',')"),
                scale=alt.Scale(scheme=color_scheme),
            ),
            tooltip=[
                alt.Tooltip(f"{xcat}:N", title=xcat.capitalize()),
                alt.Tooltip(f"{vvar}:N", title="Value"),
                alt.Tooltip(f"{yvar}:Q", title=yvar.capitalize()),
                alt.Tooltip(f"{evar}:O",
                            format=",",
                            title="Magnitude Multiplier"),
            ],
            opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
        ).add_selection(selection))

    # Altair/Vega-Lite:
    # Default `labelFontSize` = 10
    # Default `tickSize` = 5
    # Default `labelPadding` = 2
    # Default `translate` = 0.5

    text = base.mark_text(align="center", baseline="middle",
                          fontSize=10).encode(
                              x=alt.X(f"{xvar_middle}:Q"),
                              y=alt.value(h + (10 / 2) + 5 + 2 + 0.5),
                              text=alt.Text(f"{xcat}:N"),
                          )

    return alt.layer(bar, text, title=alt.TitleParams(title, anchor="start"))
示例#14
0
def _design_chart(title: str, query_list: list, queries: list, stat: str,
                  stat_list: dict, parts: SimpleNamespace) -> str:
    if stat == 'base':
        y_title = 'date'
        y_val = [
            '.'.join(
                filter(None, [
                    x.get('year', ''),
                    x.get('month', ''),
                    x.get('day', ''),
                    x.get('hour', '')
                ])) for x in queries
        ]
        if y_val[0] == '':
            y_title = 'query'
            y_val = query_list
        df = pd.DataFrame({**{y_title: y_val}, **stat_list})
        minmax = alt.Chart(data=df, width=250).mark_bar(tooltip={
            "content": "encoding"
        }).encode(x=alt.X('min'),
                  x2=alt.X2('max'),
                  y=alt.Y(y_title, sort=None),
                  color=alt.Color('mean',
                                  scale=alt.Scale(scheme='redblue'),
                                  sort="descending"))

        mean = alt.Chart(data=df, width=250).mark_tick(color='white', thickness=3, tooltip={"content": "encoding"})\
            .encode(
                x='mean',
                y=alt.Y(y_title, sort=None)
        )

        graph = (minmax + mean).configure_tick(
            bandSize=10  # controls the width of the tick
        ).configure_scale(rangeStep=10  # controls the width of the bar
                          )

        if title is None:
            return graph.to_json()
        else:
            return graph.properties(title=title).to_json()

    range_ = list()
    single = list()
    none_ = list()
    aspects = {'year', 'month', 'day', 'hour', 'geohash', 'feature'}

    for part in aspects:
        if hasattr(parts, part):
            if isinstance(getattr(parts, part), list):
                range_.append(part)
            else:
                single.append(part)
        else:
            none_.append(part)

    x_title = 'query'
    x_val = query_list
    color_title = stat
    y_title = stat
    y_val = stat_list
    if len(range_) == 0 and len(single) >= 3:
        x_title = 'date'
        x_val = [
            '.'.join(
                filter(None, [
                    x.get('year', ''),
                    x.get('month', ''),
                    x.get('day', ''),
                    x.get('hour', '')
                ])) for x in queries
        ]
    elif len(range_) == 1:
        x_title = range_[0]
        x_val = [x[range_[0]] for x in queries]
    elif len(range_) == 2:
        x_title = range_[0]
        x_val = [x[range_[0]] for x in queries]
        y_title = range_[1]
        y_val = [y[range_[1]] for y in queries]

    df = pd.DataFrame({**{x_title: x_val}, **{y_title: y_val}, **stat_list})
    graph = alt.Chart(data=df, height=300, width=400).mark_bar(tooltip={"content": "encoding"}) \
        .encode(
        alt.X(x_title, sort=None),
        alt.Y(y_title, sort=None),
        alt.Color(color_title, scale=alt.Scale(scheme='spectral'), sort="descending")
    )

    if title is None:
        return graph.to_json()
    else:
        return graph.properties(title=title).to_json()
示例#15
0
    def visualize_timelime(self):
        print("[INFO] generating dashboard.")
        lfrom = []
        lto = []
        lid = []

        #get the data from the tracker
        dps = self.tracker.vis_data_points
        for id, dp in dps.items():
            for app in dp.appereances:
                lfrom.append(app[0] / 30)
                lto.append(app[1] / 30)
                lid.append(str(dp.label))

        #timeline chart
        df = pd.DataFrame(list(zip(lfrom, lto, lid)),
                          columns=["from", "to", "label"])
        chart = alt.Chart(df).mark_bar().encode(alt.X("from", title="Timeline(seconds)"), alt.X2("to", title = ""), y="label", color=alt.Color("label", \
                                                scale=alt.Scale(scheme='dark2')))
        chart.save(constants.OUT_PATH + "chart_timeline.png")