# - [Issue reported on Altair's github](https://github.com/altair-viz/altair/issues/588)
# - [`gpdvega` github](https://github.com/altair-viz/altair/issues/588)
#%%
safs_by_dist = combined.groupby('school_dist')[['lat', 'lon',
                                                'saf_s_11']].mean()
#%%
nysd = gpd.read_file('shapes/nysd/nysd.shp')
nysd = nysd.to_crs({'init': 'epsg:4326'})
safs_by_dist.index = pd.to_numeric(safs_by_dist.index)
geo_safs_by_dist = nysd.join(safs_by_dist, how='inner')
geo_safs_by_dist = geo_safs_by_dist.drop(['lat', 'lon'], axis=1)
geo_safs_by_dist = geo_safs_by_dist.to_crs({'init': 'epsg:4326'})
#%%
# gpdvega library enables using a GeoDataFrame directly with Altair
alt.Chart(geo_safs_by_dist).mark_geoshape().encode(
    color=alt.Color('saf_s_11:Q', scale=alt.Scale(scheme='blues')),
    tooltip='SchoolDist:N').properties(
        width=500, height=500, title='Safety Score by NYC School District')
#%% [markdown]
# Darker shades show a higher score, while lighter scores show a lower score. We can see some parts of the Bronx and Queens that have higher scores.
#%% [markdown]
# ## Visualising Relation Between Ethnicity and Score
#%%
ethnicities = ['white_per', 'asian_per', 'black_per', 'hispanic_per']
eth_corr = pd.DataFrame(
    combined.corr()['sat_score'][ethnicities]).reset_index()
eth_corr = eth_corr.rename(columns={
    'index': 'ethnicity_per',
    'sat_score': 'sat_score_corr'
})
alt.Chart(eth_corr).mark_bar().encode(
示例#2
0
def inputs_graph():
    alt.renderers.enable('mimetype')
    alt.data_transformers.disable_max_rows()
    w, h = 1400, 250
    data = ld.getData()[["LapDist","Throttle","Brake","Lap"]]
    dfturns = ld.GetTurns()
    zoom = alt.selection_interval(bind='scales', encodings=['x'])
    throttle = alt.Chart(data).mark_line().encode(x='LapDist', y=alt.Y('Throttle', scale=alt.Scale(domain=[-0.2, 1.2])), color=alt.Color('Lap', scale=alt.Scale(scheme='blues'))).properties(width=w, height=h)
    brake = alt.Chart(data).mark_line().encode(x='LapDist', y=alt.Y('Brake', scale=alt.Scale(domain=[-0.2, 1.2])), color=alt.Color('Lap', scale=alt.Scale(scheme='reds'))).properties(width=w, height=h)
    cturns = alt.Chart(dfturns).mark_rule().encode(x="LapDist").properties(width=w, height=h)
    ctext = alt.Chart(dfturns).mark_text(align="center", angle=90, dy=-7, dx=-100).encode(x="LapDist", text="Turn").properties(width=w, height=h)
    brake = brake + cturns + ctext
    throttle = throttle + cturns + ctext
    chart = throttle.add_selection(zoom) & brake.add_selection(zoom) 
    chart = chart.resolve_scale(color='independent')
    return chart.to_json()

if __name__ == "__main__":
    teams = get_teams("https://en.wikipedia.org/wiki/2020_NBA_playoffs")

    # Make a list of all players for all teams
    players = []
    for team in teams:
        print(team)
        players += get_players(team[0], team[1], 3)

    # Create a pandas dataframe of the Player-objects as dictionaries
    data = pd.DataFrame([player.to_dict() for player in players])

    stats = ["ppg", "bpg", "rpg"]
    title = ["Points Per Game", "Blocks Per Game", "Rebound Per Game"]
    files = [""]

    # Assemble the three charts for ppg, bpg and rpg
    for stat in range(3):
        chart = alt.Chart(data).mark_bar().encode(y=alt.Y("name",
                                                          sort="color",
                                                          title="Player Name"),
                                                  x=alt.X(stats[stat] + ":Q",
                                                          title=title[stat]),
                                                  color=alt.Color(
                                                      "team:N",
                                                      title="Team Name"))
        chart.save("NBA_player_statistics/players_over_" + stats[stat] +
                   ".html")
示例#4
0
burst_sim = fm.simulate_shots(
    shots=fm.max_consecutive_shots,
    auto_burst_length=5,
    control_time=5,
)

for (
        t,
        _cursor_coor,
        _pellets_coors,
        cof,
        _vertical_recoil,
        _horizontal_recoil,
) in burst_sim:
    datapoints.append({
        "time": t,
        "control": f"5 burst + {fm.fire_timing.refire_time + 5}ms",
        "cof": cof,
    })

dataset = altair.Data(values=datapoints)

chart = (altair.Chart(dataset).mark_line().encode(
    x="time:Q",
    y="cof:Q",
    color=altair.Color("control:O", scale=altair.Scale(scheme="dark2")),
    tooltip=["time:Q", "control:O"],
).properties(title="Cone of Fire", height=900, width=900).interactive())

chart.save("cof_simulation.html")
示例#5
0
        def show_time_series(title: str, df: pd.DataFrame, par: str,
                             y_lab: str):
            """
            Plots a time series plot. for time series plots the marker group by parameter is automatically set to the
            station.

            Parameters:
            -----------
            :param title:
            :param df:
            :param par:
            :return:
            """

            x_lab = ''
            df['time'] = pd.to_datetime(df['time'])
            min_dat = df['time'].min()
            max_dat = df['time'].max()
            time_format = get_time_format(min_dat, max_dat)
            if self.yax_max == self.yax_min:
                scy = alt.Scale()
            else:
                scy = alt.Scale(domain=(self.yax_min, self.yax_max))

            if self.moving_average_days > 0:
                line = alt.Chart(df, title=title).mark_line(
                    point=False, clip=True
                ).transform_window(
                    rolling_mean='mean({})'.format(par),
                    frame=[
                        -self.moving_average_days / 2, self.moving_average_days
                    ]).encode(
                        x=alt.X('time:T', axis=alt.Axis(title=x_lab)),
                        # https://github.com/d3/d3-time-format#locale_format
                        y=alt.Y('rolling_mean:Q',
                                scale=scy,
                                axis=alt.Axis(title=y_lab)),
                        color=alt.Color(
                            'direction_id',
                            scale=alt.Scale(scheme=cn.color_schema)),
                    )
            else:
                line = alt.Chart(df).mark_line(point=True, clip=True).encode(
                    x=alt.X(f'time:T',
                            axis=alt.Axis(title=x_lab,
                                          labelAngle=30,
                                          format=time_format)),
                    y=alt.Y('{}:Q'.format(par),
                            scale=scy,
                            axis=alt.Axis(title=y_lab)),
                    color=alt.Color('direction_id',
                                    scale=alt.Scale(scheme=cn.color_schema)),
                )

            points = alt.Chart(df).mark_point().encode(
                x=alt.X('time:T', axis=alt.Axis(title=x_lab)),
                y=alt.Y('{}:Q'.format(par),
                        scale=scy,
                        axis=alt.Axis(title=y_lab)),
                color=alt.Color('direction_id',
                                scale=alt.Scale(scheme=cn.color_schema)),
                tooltip=['site_name', 'direction_id', 'time', par],
                opacity=alt.value(0.3))
            chart = (points + line).properties(width=self.plot_width,
                                               height=self.plot_height,
                                               title=title)
            st.altair_chart(chart)
示例#6
0
def result_heatmap(data, result="win", title=None,
                   width=500, height=500):
    """
    Function that takes a player's history data and returns an altair chart
    showing their winning percentage based on their hand totals and the
    dealer's up card
    """
    possible_results = ["win", "loss", "push", "surrender"]
    assert result in possible_results, (
        "'result' must be 'win', 'loss', or 'push'"
    )
    if not title:
        title = f"{result.title()} Percentage"
    # convert data to a DataFrame if it's just a player's history list
    if isinstance(data, list):
        data = pd.DataFrame(data)

    # remove any hands where the dealer had blackjack or the player busted
    sub_data = data[(data["dealer_blackjack"] == 0) &
                    (data["total"] <= 21)].copy()
    # calculate winning percentage for each total and dealer up card combo
    grouped_pct = sub_data.groupby(
        ["total", "dealer_up"]
    ).apply(results_pct, as_series=False)
    # unpack the tuple returned by groupby function and rename columns
    grouped_pct = grouped_pct.apply(pd.Series)
    grouped_pct.columns = possible_results
    # reset index and sort for plotting
    pct_data = grouped_pct.reset_index().sort_values("total", ascending=False)
    # dynamically determine how the legend should be labeled
    min_val = round(min(pct_data[possible_results].min()), 1)
    max_val = round(max(pct_data[possible_results].max()), 1)
    min_int = int(min_val * 10)
    max_int = int(max_val * 10)
    values = [
        round(x * 0.1, 1) for x in range(min_int, max_int + 1)
    ]
    # create altair heatmap
    chart = alt.Chart(
        pct_data, title=title, width=width, height=height
    ).mark_rect(binSpacing=1).encode(
        x=alt.X(
            "dealer_up:O",
            axis=alt.Axis(orient="top", labelAngle=0),
            title="Dealer Up Card"
        ),
        y=alt.Y(
            "total:O",
            title="Player Total",
            sort=alt.EncodingSortField(op="mean", order="descending")
        ),
        color=alt.Color(
            f"{result}:Q",
            legend=alt.Legend(
                title=f"{result.title()} Probability",
                values=values
            )
        ),
        tooltip=[
            alt.Tooltip("dealer_up", title="Dealer Up Card"),
            alt.Tooltip("total", title="Player Total"),
            alt.Tooltip(f"{result}", title=f"{result.title()} Probability")
        ]
    )

    return chart
示例#7
0

# In[7]:


Name_of_Feat = st.selectbox("Feature", Types_of_Features)

chart_df = Final_table_clean[[ 'Song Name', 'Album Name', 'Release Date', 'Popularity', f'{Name_of_Feat}']]

import altair as alt

feat_header = Name_of_Feat.capitalize()

st.header(f'{feat_header}' " vs. Popularity")
c = alt.Chart(chart_df).mark_circle().encode(
    alt.X('Popularity', scale=alt.Scale(zero=False)), y=f'{Name_of_Feat}', color=alt.Color('Popularity', scale=alt.Scale(zero=False)), 
    size=alt.value(200), tooltip=['Popularity', f'{Name_of_Feat}', 'Song Name', 'Album Name'])

st.altair_chart(c, use_container_width=True)

st.header("Table of Groovy Song Attributes")
st.table(chart_df)


# In[8]:


st.write("acousticness: Confidence measure from 0.0 to 1.0 on if a track is acoustic.")
st.write("energy: Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale. Perceptual features contributing to this attribute include dynamic range, perceived loudness, timbre, onset rate, and general entropy.")
st.write("instrumentalness: Predicts whether a track contains no vocals. “Ooh” and “aah” sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly “vocal”. The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks, but confidence is higher as the value approaches 1.0.")
st.write("liveness: Detects the presence of an audience in the recording. Higher liveness values represent an increased probability that the track was performed live. A value above 0.8 provides strong likelihood that the track is live.")
示例#8
0
def confirmed():
    data = "data/confirmed.csv"
    filename = "graphs/confirmed.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)
    df["date"] = pd.to_datetime(df["date"])
    df = df.loc[df["source"] == "fhi:git"]
    df["new_sma7"] = df.new.rolling(window=7).mean().shift()

    df = df.melt(
        id_vars=["date"],
        value_vars=["new", "new_sma7", "total"],
        var_name="category",
        value_name="value",
    ).dropna()

    rename = {"new": "New cases", "new_sma7": "Avg 7 d.", "total": "Cumulative"}

    df["category"] = df["category"].replace(rename)

    base = alt.Chart(
        df,
        title="Number of reported COVID-19 cases by specimen collection date (Source: FHI)",
    ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)))

    bar = (
        base.transform_filter(alt.datum.category == "New cases")
        .mark_bar(color="#FFD1D1")
        .encode(y=alt.Y("value:Q", axis=alt.Axis(title="New per day", grid=True)))
    )

    line = (
        base.transform_filter(alt.datum.category == "Cumulative")
        .mark_line(color="#2E507B", strokeWidth=3)
        .encode(
            y=alt.Y("value:Q", axis=alt.Axis(title="Cumulative")),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=["New cases", "Avg 7 d.", "Cumulative"],
                    range=["#FFD1D1", "red", "#2E507B"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
    )

    ma7 = (
        base.transform_filter(alt.datum.category == "Avg 7 d.")
        .mark_line(opacity=0.8)
        .encode(y=alt.Y("value:Q"), color=alt.Color("category:N"))
    )

    chart = (
        alt.layer(bar + ma7, line)
        .resolve_scale(y="independent")
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=480,
            legendY=655,
        )
    )

    chart.save(filename)
示例#9
0
"""
Seattle Weather Heatmap
-----------------------
This example shows the 2010 daily high temperature (F) in Seattle, WA.
"""
# category: case studies
import altair as alt
from vega_datasets import data

# Since the data is more than 5,000 rows we'll import it from a URL
source = data.seattle_temps.url

alt.Chart(
    source,
    title="2010 Daily High Temperature (F) in Seattle, WA").mark_rect().encode(
        x=alt.X('date:O', timeUnit='date'),
        y=alt.Y('date:O', timeUnit='month'),
        color=alt.Color('temp:Q', aggregate='max'),
        tooltip=[
            alt.Tooltip('date:T', timeUnit='monthdate', title='Date'),
            alt.Tooltip('temp:Q', aggregate='max', title='Max Temp')
        ]).properties(width=600)
示例#10
0
def vaccine_doses():
    data = "data/vaccine_doses.csv"
    filename = "graphs/vaccine_doses.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)
    df["date"] = pd.to_datetime(df["date"])
    df = df[df["granularity_geo"] == "nation"]
    df["new_sma7"] = df.new_doses.rolling(window=7).mean().shift()

    df = df.melt(
        id_vars=["date"],
        value_vars=["total_dose_1", "total_dose_2", "total_dose_3"],
        var_name="category",
        value_name="value",
    ).dropna()

    rename = {
        "total_dose_1": "Dose 1",
        "total_dose_2": "Dose 2",
        "total_dose_3": "Dose 3",
    }

    df["category"] = df["category"].replace(rename)

    chart = (
        alt.Chart(
            df,
            title="Number of people who received their first, second and third dose of a COVID-19 vaccine in Norway (Source: FHI)",
        )
        .mark_area(line={}, opacity=0.3)
        .encode(
            x=alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)),
            y=alt.Y(
                "value:Q",
                stack=None,
                title="Number of people",
            ),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=[
                        "Dose 1",
                        "Dose 2",
                        "Dose 3",
                    ],
                    range=["#5dade2", " #2ecc71", "#006600"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=380,
            legendY=660,
        )
    )

    chart.save(filename)
示例#11
0
def tested_lab():
    data = "data/tested_lab.csv"
    filename = "graphs/tested_lab.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)

    mapping = {
        "new_neg": "New (Negative)",
        "new_pos": "New (Positive)",
        "new_total": "New",
        "pr100_pos": "Share Positive",
        "total": "Cumulative",
    }

    df = df.rename(columns=mapping)
    df["date"] = pd.to_datetime(df["date"])
    df["Share Negative"] = 100 - df["Share Positive"]
    df = df.melt(
        id_vars=["date", "Share Positive"], var_name="category", value_name="value"
    )

    base = alt.Chart(
        df,
        title="Number of tested persons per specimen collection date and number of positive results (Source: FHI)",
    ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)))

    andel = base.mark_line(color="red", opacity=0.8).encode(
        y=alt.Y("Share Positive:Q", title="% Positive", axis=alt.Axis(grid=True))
    )

    bar = (
        base.transform_filter(
            (alt.datum.category == "New (Negative)")
            | (alt.datum.category == "New (Positive)")
        )
        .mark_bar()
        .encode(
            y=alt.Y("value:Q", title="Number of persons"),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=["New (Positive)", "New (Negative)", "% Positive"],
                    range=["#FF9622", "#6DA9FF", "red"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
    )

    chart = (
        alt.layer(bar, andel)
        .resolve_scale(y="independent")
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=480,
            legendY=655,
        )
    )

    chart.save(filename)
示例#12
0
def smittestopp():
    data = "data/smittestopp.csv"
    filename = "graphs/smittestopp.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)
    df["date"] = pd.to_datetime(df["date"])

    df = df.melt(
        id_vars=["date"],
        value_vars=["new_reported", "total_downloads"],
        var_name="category",
        value_name="value",
    ).dropna()

    rename = {
        "new_reported": "Number of reported infections",
        "total_downloads": "Number of downloads",
    }

    df["category"] = df["category"].replace(rename)

    base = alt.Chart(
        df,
        title="Number of downloads of Smittestopp og number of reported infections through the app (Source: FHI)",
    ).encode(alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)))

    downloads = (
        base.transform_filter(alt.datum.category == "Number of downloads")
        .mark_area(line={}, color="#5BC1FF", opacity=0.2)
        .encode(
            y=alt.Y(
                "value:Q",
                axis=alt.Axis(title="Number of downloads", grid=True),
            )
        )
    )

    reported = (
        base.transform_filter(alt.datum.category == "Number of reported infections")
        .mark_bar(color="#FFA57E")
        .encode(
            y=alt.Y("value:Q", axis=alt.Axis(title="Number of reported infections")),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=[
                        "Number of downloads",
                        "Number of reported infections",
                    ],
                    range=["#5BC1FF", "#FFA57E"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
    )

    chart = (
        alt.layer(reported, downloads)
        .resolve_scale(y="independent")
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            labelLimit=200,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=390,
            legendY=660,
        )
    )

    chart.save(filename)
示例#13
0
def hospitalized():
    data = "data/hospitalized.csv"
    filename = "graphs/hospitalized.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)

    today = date.today()
    idx = pd.date_range("2020-03-08", today)
    df.index = pd.DatetimeIndex(df["date"])
    df = df.reindex(idx)
    df["date"] = df.index
    df = df.reset_index(drop=True)

    df["admissions"] = df["admissions"].fillna(method="ffill").astype(int)
    df["icu"] = df["icu"].fillna(method="ffill").astype(int)
    df["respiratory"] = df["respiratory"].fillna(method="ffill").astype(int)

    df_melt = pd.melt(
        df,
        id_vars=["date"],
        value_vars=["admissions", "icu", "respiratory"],
        value_name="value",
    ).replace(
        {
            "admissions": "Hospitalized",
            "icu": "Intensive",
            "respiratory": "Respirator",
        }
    )

    chart = (
        alt.Chart(
            df_melt,
            title="Number of patients admitted to hospital with COVID-19 (Source: Helsedirektoratet)",
        )
        .mark_area(line={}, opacity=0.3)
        .encode(
            x=alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40)),
            y=alt.Y(
                "value:Q",
                stack=None,
                title="Number of patients",
            ),
            color=alt.Color(
                "variable:N",
                scale=alt.Scale(
                    domain=["Hospitalized", "Intensive Care", "Respirator"],
                    range=["#5A9DFF", "#FF8B1B", "#FF642B"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=480,
            legendY=655,
        )
    )

    chart.save(filename)
示例#14
0
def dead():
    data = "data/dead.csv"
    filename = "graphs/dead.png"
    if os.path.exists(filename):
        os.remove(filename)

    df = pd.read_csv(data)

    today = date.today()
    idx = pd.date_range("2020-03-07", df["date"].max())
    df.index = pd.DatetimeIndex(df["date"])
    df = df.reindex(idx)
    df["date"] = df.index
    df = df.reset_index(drop=True)
    df = df[df.date <= str(today)]

    df["new"] = df["new"].fillna(0).astype(int)
    df["total"] = df["total"].fillna(method="bfill").astype(int)
    df["new_sma7"] = df.new.rolling(window=7).mean()

    df = df.melt(
        id_vars=["date"],
        value_vars=["new", "new_sma7", "total"],
        var_name="category",
        value_name="value",
    ).dropna()

    rename = {"new": "New", "new_sma7": "Avg 7 d.", "total": "Cumulative"}
    df["category"] = df["category"].replace(rename)

    base = alt.Chart(df, title="COVID-19 related deaths (Source: FHI)").encode(
        alt.X("yearmonthdate(date):O", axis=alt.Axis(title=None, labelAngle=-40))
    )

    bar = (
        base.transform_filter(alt.datum.category == "New")
        .mark_bar(color="#FFD1D1")
        .encode(y=alt.Y("value:Q", axis=alt.Axis(title="New per day", grid=True)))
    )

    line = (
        base.transform_filter(alt.datum.category == "Cumulative")
        .mark_line(color="#2E507B", strokeWidth=3)
        .encode(
            y=alt.Y("value:Q", axis=alt.Axis(title="Cumulative")),
            color=alt.Color(
                "category:N",
                scale=alt.Scale(
                    domain=["New", "Avg 7 d.", "Cumulative"],
                    range=["#FFD1D1", "red", "#2E507B"],
                ),
                legend=alt.Legend(title=None),
            ),
        )
    )

    ma7 = (
        base.transform_filter(alt.datum.category == "Avg 7 d.")
        .mark_line(opacity=0.8)
        .encode(y=alt.Y("value:Q"), color=alt.Color("category:N"))
    )

    chart = (
        alt.layer(bar + ma7, line)
        .resolve_scale(y="independent")
        .properties(width=1200, height=600)
        .configure_legend(
            strokeColor="gray",
            fillColor="#FFFFFF",
            labelFontSize=12,
            symbolStrokeWidth=2,
            symbolSize=160,
            padding=6,
            cornerRadius=5,
            direction="horizontal",
            orient="none",
            legendX=480,
            legendY=655,
        )
    )

    chart.save(filename)
示例#15
0
chart = alt.layer(line, selectors, points, rules, text).properties(width=900,
                                                                   height=300)

#Plot Altair 7 geographical analysis; ref : https://github.com/altair-viz/altair/issues/2044
import altair as alt
from vega_datasets import data
world_source = final_df

source = alt.topo_feature(data.world_110m.url, "countries")
background = alt.Chart(source).mark_geoshape(fill="white")

foreground = (alt.Chart(source).mark_geoshape(
    stroke="black", strokeWidth=0.15).encode(
        color=alt.Color(
            "confirmed:N",
            scale=alt.Scale(scheme="redpurple"),
            legend=None,
        ),
        tooltip=[
            alt.Tooltip("Country/Region:N", title="Country"),
            alt.Tooltip("confirmed:Q", title="confirmed cases"),
        ],
    ).transform_lookup(
        lookup="id",
        from_=alt.LookupData(world_source, "id",
                             ["confirmed", "Country/Region"]),
    ))

final_map = ((background + foreground).configure_view(
    strokeWidth=0).properties(width=700, height=400).project("naturalEarth1"))
print(final_map)
def make_category_per_user_plots(infile):
    grouped_flows = infra.pd.read_parquet(infile)
    grouped_flows = grouped_flows.reset_index()
    grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows[
        "bytes_down"]
    user_category_total = grouped_flows[["user", "category", "bytes_total"
                                         ]].groupby(["user", "category"
                                                     ]).sum().reset_index()

    # Filter users by time in network to eliminate early incomplete samples
    user_active_ranges = infra.pd.read_parquet(
        "data/clean/user_active_deltas.parquet")[[
            "user", "days_since_first_active", "days_active", "days_online"
        ]]
    # Drop users that joined less than a week ago.
    users_to_analyze = user_active_ranges.loc[
        user_active_ranges["days_since_first_active"] > 7]
    # Drop users active for less than one day
    users_to_analyze = users_to_analyze.loc[
        users_to_analyze["days_active"] > 1, ]

    # Sort categories by total amount of bytes.
    cat_totals = grouped_flows.groupby("category").sum().reset_index()
    cat_sort_order = cat_totals.sort_values(
        "bytes_total", ascending=False).set_index("bytes_total").reset_index()
    cat_sort_order["cat_rank"] = cat_sort_order["bytes_total"].rank(
        method="min", ascending=False)
    cat_sort_list = cat_sort_order["category"].tolist()

    # Rank users by their daily use.
    user_totals = user_category_total.groupby("user").sum().reset_index()
    user_totals = user_totals.merge(users_to_analyze, on="user", how="inner")
    user_totals["user_total_bytes_avg_online_day"] = user_totals[
        "bytes_total"] / user_totals["days_online"]
    user_totals["user_rank"] = user_totals[
        "user_total_bytes_avg_online_day"].rank(method="min")

    user_category_total = user_category_total.merge(user_totals[[
        "user", "user_rank", "days_online", "user_total_bytes_avg_online_day"
    ]],
                                                    on="user",
                                                    how="inner")
    user_category_total = user_category_total.merge(
        cat_sort_order[["category", "cat_rank"]], on="category", how="inner")
    print(user_category_total)

    user_category_total["bytes_avg_online_day"] = user_category_total[
        "bytes_total"] / user_category_total["days_online"]
    user_category_total["share_of_bytes_avg_online_day"] = \
        user_category_total["bytes_avg_online_day"] / user_category_total["user_total_bytes_avg_online_day"]
    print(user_category_total)

    # This might not be showing exactly what I want to show, since in merging
    # users some users that dominate video could be overrepresented. Maybe
    # want to merge on the fraction of traffic to each part from each user?
    # Are users counted equally or are bytes counted equally...
    alt.Chart(user_category_total[[
        "category", "user_rank", "cat_rank", "bytes_avg_online_day"
    ]]).mark_bar().encode(
        x="user_rank:O",
        y=alt.Y(
            "bytes_avg_online_day",
            stack="normalize",
            sort=cat_sort_list,
        ),
        color=alt.Color(
            "category:N",
            scale=alt.Scale(scheme="tableau20"),
            sort=cat_sort_list,
        ),
        order=alt.Order(
            "cat_rank",
            sort="descending",
        ),
    ).properties(width=500, ).save(
        "renders/bytes_per_average_online_day_per_user_bar.png",
        scale_factor=2,
    )

    alt.Chart(user_category_total[[
        "category", "user_rank", "cat_rank", "bytes_avg_online_day"
    ]]).mark_point(
        size=10,
        strokeWidth=2,
    ).encode(
        x="user_rank:O",
        y=alt.Y("bytes_avg_online_day",
                sort=cat_sort_list,
                title="average bytes per online day"),
        color=alt.Color(
            "category:N",
            scale=alt.Scale(scheme="tableau20"),
            sort=cat_sort_list,
        ),
        order=alt.Order(
            "cat_rank",
            sort="descending",
        ),
    ).properties(width=500, ).save(
        "renders/bytes_per_average_online_day_per_user_points.png",
        scale_factor=2,
    )

    alt.Chart(user_category_total[[
        "category", "user_rank", "cat_rank", "share_of_bytes_avg_online_day"
    ]]).mark_point(
        size=10,
        strokeWidth=2,
    ).encode(
        x="user_rank:O",
        y=alt.Y("share_of_bytes_avg_online_day",
                sort=cat_sort_list,
                title="share of average bytes per online day"),
        color=alt.Color(
            "category:N",
            scale=alt.Scale(scheme="tableau20"),
            sort=cat_sort_list,
        ),
        order=alt.Order(
            "cat_rank",
            sort="descending",
        ),
    ).properties(width=500, ).save(
        "renders/share_of_bytes_per_average_online_day_per_user_points.png",
        scale_factor=2,
    )
示例#17
0
def outcome_bars(data, name=None, width=100):
    """
    Create a bar chart showing the percentage of hands won, lost, and pushed
    """
    # if it's a dataframe already, just add the name for the legend
    if isinstance(data, pd.DataFrame):
        data_list = [data]
    elif isinstance(data, list):
        # check if it's a list of dicionaries, like player history, or a list
        # of lists
        for item in data:
            l_o_d = isinstance(item, dict)
        # if it's a list of dictionaries, just convert them
        if l_o_d:
            data_list = [pd.DataFrame(data)]
        else:
            data_list = [pd.DataFrame(item) for item in data]
    else:
        msg = "'data' must be a DataFrame or list"
        raise TypeError(msg)
    # calculate percentages
    # assign name to data
    if not name:
        name = [f"Game{i}" for i in range(len(data))]
    plot_data_list = []  # list to hold dataframes that will be plotted
    for _name, _data in zip(name, data_list):
        win, loss, push, surrender = results_pct(_data, as_series=False)
        plot_data_list.append(
            {"game": _name, "result": "Win", "pct": win, "order": 1},
        )
        plot_data_list.append(
            {"game": _name, "result": "Loss", "pct": loss, "order": 2}
        )
        plot_data_list.append(
            {"game": _name, "result": "Push", "pct": push, "order": 3}
        )
        plot_data_list.append(
            {"game": _name, "result": "Surrender", "pct": surrender, "order": 3}
        )
    plot_data = pd.DataFrame(plot_data_list)

    # create altair chart
    chart = alt.Chart(plot_data, width=width).mark_bar().encode(
        x=alt.X(
            "game",
            axis=alt.Axis(labelAngle=-45),
            title=None,
            sort=["Win", "Loss", "Push"]
        ),
        y=alt.Y(
            "pct:Q"
        ),
        color=alt.Color(
            "game:O",
            legend=None
        ),
        column=alt.Column(
            "result:O",
            title="Result"
        ),
        tooltip=[
            alt.Tooltip("pct", title="Pct")
        ]
    )
    return chart
示例#18
0
def make_plot():
    transactions = infra.pd.read_parquet(
        "data/clean/transactions_DIV_none_INDEX_timestamp.parquet")

    purchases = transactions.loc[transactions["kind"] == "purchase"]
    purchases = purchases.groupby("amount_bytes")["timestamp"].count()
    purchases = purchases.reset_index().rename({"timestamp": "count"},
                                               axis="columns")
    purchases["amount_MB"] = purchases["amount_bytes"] * 1.0 / 1000**2
    purchases[
        "total_GB"] = purchases["amount_MB"] * purchases["count"] * 1.0 / 1000

    print(purchases)
    bars = alt.Chart(purchases).mark_bar().encode(
        x=alt.X(
            'count',
            title="Count",
        ),
        y=alt.Y(
            'amount_MB',
            type="ordinal",
            title="Package Type (MB)",
        ),
        color=alt.Color(
            'amount_MB:N',
            legend=None,
        ))

    text = bars.mark_text(
        align="left",
        baseline="middle",
        xOffset=5,
    ).encode(
        text="count:Q",
        color=alt.value("black"),
    )

    bars = text + bars

    bars.properties(
        width=500,
        height=75,
    ).save(
        "renders/package_counts.png",
        scale_factor=2,
    )

    alt.Chart(purchases).mark_bar().encode(x=alt.X(
        'total_GB',
        title="Total GB Purchased",
    ),
                                           y=alt.Y(
                                               'amount_MB',
                                               type="ordinal",
                                               title="Package Type (MB)",
                                           ),
                                           color=alt.Color(
                                               'amount_MB:N',
                                               legend=None,
                                           )).properties(
                                               width=500,
                                               height=75,
                                           ).save(
                                               "renders/package_bytes.png",
                                               scale_factor=2,
                                           )
示例#19
0
    'col': 2
}, {
    'country': 'United States',
    'animal': 'sheep',
    'col': 1
}])

domains = ['person', 'cattle', 'pigs', 'sheep']

shape_scale = alt.Scale(
    domain=domains,
    range=[
        'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z',
        'M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z',
        'M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z',
        'M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z'
    ])

color_scale = alt.Scale(domain=domains,
                        range=[
                            'rgb(162,160,152)', 'rgb(194,81,64)',
                            'rgb(93,93,93)', 'rgb(91,131,149)'
                        ])

alt.Chart(source).mark_point(filled=True).encode(
    alt.X('col:O', axis=None), alt.Y('animal:O', axis=None),
    alt.Row('country:N', header=alt.Header(title='')),
    alt.Shape('animal:N', legend=None, scale=shape_scale),
    alt.Color('animal:N', legend=None, scale=color_scale), alt.OpacityValue(1),
    alt.SizeValue(200)).properties(width=800, height=200)
示例#20
0
source2 = [
    {
        "start": "1933",
        "end": "1945",
        "event": "Nazi Rule"
    },
    {
        "start": "1948",
        "end": "1989",
        "event": "GDR (East Germany)"
    },
]

source = alt.pd.DataFrame(source)
source2 = alt.pd.DataFrame(source2)

line = alt.Chart(source).mark_line(color="#333").encode(
    x=alt.X("year:T", axis=alt.Axis(format="%Y"), title="Year"),
    y=alt.Y("population", title="Population"),
)

point = line.mark_point(color="#333")

rect = alt.Chart(source2).mark_rect().encode(x="start:T",
                                             x2="end:T",
                                             color=alt.Color("event:N",
                                                             title="Event"))

(rect + line + point).properties(
    title="Population of Falkensee from 1875 to 2014", width=500, height=300)
示例#21
0
def exercise_ecg_interactive_plot(
    sample_id: Union[int, str], folder: Optional[str] = None, time_interval_seconds: int = 10,
) -> Union[HTML, alt.Chart]:
  """Wrangle exercise ECG data to tidy and present it as an interactive plot.

  Args:
    sample_id: The id of the ECG sample to retrieve.
    folder: The local or Cloud Storage folder under which the files reside.
    time_interval_seconds: the width of the time interval (in seconds) to display of signal data

  Returns:
    An Altair plot or a notebook-friendly error.
  """
  (exercise_ecg_trend, exercise_ecg_signal) = reshape_exercise_ecg_to_tidy(sample_id=sample_id, folder=folder)
  if(exercise_ecg_trend.shape[0] == 0 or exercise_ecg_signal.shape[0] == 0):
    return HTML(f'''
      <div class="alert alert-block alert-danger">
      <b>Warning:</b> Exercise ECG not available for sample {sample_id}.<br>
      Use the <kbd>folder</kbd> parameter to read HD5s from a different local directory or Cloud Storage bucket.
      </div>''')

  trend_data_file = os.path.basename(EXERCISE_ECG_TREND_DATA_FILE.name)
  exercise_ecg_trend.to_json(trend_data_file, orient='records')
  signal_data_file = os.path.basename(EXERCISE_ECG_SIGNAL_DATA_FILE.name)
  exercise_ecg_signal.to_json(signal_data_file, orient='records')

  brush = alt.selection_single(on='mouseover', nearest=True, fields=['time'], init={'time': 200.0})

  lead_dropdown = alt.binding_select(options=list(exercise_ecg_signal.lead.unique()))
  lead_select = alt.selection_single(
      fields=['lead'], bind=lead_dropdown, name='Choose just one to view',
      init={'lead': exercise_ecg_signal.lead.unique()[0]},
  )

  trend = alt.Chart(trend_data_file).mark_point(opacity=0.8, filled=True, size=100).encode(
      x='time:Q',
      color=alt.Color('phasename:N', legend=alt.Legend(orient='top'), title='Phase names'),
      tooltip=[
          'artifact:Q', 'grade:Q', 'heartrate:Q', 'load:Q', 'mets:Q', 'pacecount:Q',
          'phasename:N', 'phasetime:Q', 'time:Q', 'vecount:Q',
      ],
  ).properties(
      width=900, height=100, title=f'Click on a point to select a {time_interval_seconds} second time interval.',
  ).add_selection(brush)

  signal = alt.Chart(signal_data_file).mark_line().encode(
      alt.X('time:Q', axis=alt.Axis(labelAngle=15)),
      y='raw_mV:Q',
      color=alt.Color('lead:N', legend=alt.Legend(orient='top'), title='Lead names'),
  ).properties(
      width=900, height=300, title='Exercise ECG signal for {}'.format(sample_id),
  ).add_selection(
      lead_select,
  ).transform_filter(
      lead_select,
  ).transform_filter(
      # https://github.com/altair-viz/altair/issues/1960
      f'''((toNumber({brush.name}.time) - {time_interval_seconds/2.0}) < datum.time)
           && (datum.time < toNumber({brush.name}.time) + {time_interval_seconds/2.0})''',
  )

  return trend.encode(y='heartrate:Q') & trend.encode(y='load:Q') & signal
示例#22
0
import altair as alt
import pandas as pd

penguins_df = pd.read_csv('data/penguins.csv')

colour_plot = alt.Chart(penguins_df).mark_point(size=10).encode(
    alt.X('flipper_length_mm',
          scale=alt.Scale(domain=[160, 240]),
          title="Flipper length (mm)"),
    alt.Y('body_mass_g',
          scale=alt.Scale(domain=[2500, 6500]),
          title='Mass (grams)'),
    alt.Color('species',
              title='Penguin species',
              scale=alt.Scale(scheme='set2')), alt.Shape('species')
).properties(
    title=
    'Gentoo penguins tend to have the longest flippers and weight the most among the penguin species.'
)

colour_plot
示例#23
0
st.header("**Card Frequency over Decks**")
st.image(build_image(top_30_path), width=150)
st.text(f"The 30 most frequent cards used in {class_selectbox} decks")

top_30 = cards_appearance.head(30)

rarity_color_list = [
    '#641E16', '#2ECC71', '#D0D3D4', '#3498DB', '#8E44AD', '#F7DC6F'
]

cards_appearance_bars = alt.Chart(top_30).mark_bar(size=20).encode(
    x=alt.X('numberOfAppearance:Q'),
    y=alt.Y('cardName:N', sort="-x"),
    color=alt.Color('numberOfAppearance:Q',
                    legend=None,
                    scale=alt.Scale(domain=[class_selectbox],
                                    range=[class_selected_details["color"]])),
    # TODO improvement: colors by rarity with correct sort by Q desc
    tooltip=['cardName', 'cardRarity', 'numberOfAppearance'])
cards_appearance_text = cards_appearance_bars.mark_text(
    align='left', baseline='middle', dx=3).encode(text='numberOfAppearance:Q')
cards_appearance_chart = (cards_appearance_bars +
                          cards_appearance_text).configure_axis(
                              grid=False).configure_view(
                                  strokeWidth=0).properties(width=700,
                                                            height=700)

st.write(cards_appearance_chart)

st.header("**Card Details**")
示例#24
0
def render_most_similar(data, title):
    bars = (
        alt.Chart(data, height=400, title=title)
            .mark_bar()
            .encode(
            alt.X(
                'distance',
                title='',
                scale=alt.Scale(domain=(0, 1.0), clamp=True),
                axis=None
            ),
            alt.Y(
                'word',
                title='',
                sort=alt.EncodingSortField(
                    field='distance',
                    order='descending'
                )
            ),
            color=alt.Color('distance', legend=None, scale=alt.Scale(scheme='blues')),
            tooltip=[
                alt.Tooltip(
                    field='word',
                    type='nominal'
                ),
                alt.Tooltip(
                    field='distance',
                    format='.3f',
                    type='quantitative'
                )
            ]
        )
    )
    text = alt.Chart(data).mark_text(
        align='left',
        baseline='middle',
        dx=5,
        font='Roboto',
        size=15,
        color='black'
    ).encode(
        x=alt.X(
            'distance',
            axis=None
        ),
        y=alt.Y(
            'word',
            sort=alt.EncodingSortField(
                field='distance',
                order='descending'
            )
        ),
        text=alt.Text("distance", format=".3f"),
    )
    chart = bars + text
    chart = (chart.configure_axisX(
        labelFontSize=20,
        labelFont='Roboto',
        grid=False,
        domain=False
    )
        .configure_axisY(
        labelFontSize=20,
        labelFont='Roboto',
        grid=False,
        domain=False
    )
        .configure_view(
        strokeOpacity=0
    )
        .configure_title(
        fontSize=25,
        font='Roboto',
        dy=-10
    )
    )

    return chart
示例#25
0
def render_curve(df, ns=[], epsilons=[], save_path=None):
    """Render, and optionally save, a plot of the loss-data curve.
    Optionally takes arguments `ns` and `epsilons` to draw lines on the plot
    illustrating where metrics were calculated.
    Arguments:
    - df: (pd.DataFrame) the dataframe containing a loss-data curve as returned
        by LossDataEstimator.compute_curve or LossDataEstimator.to_dataframe.
    - ns: (list<num>) the list of training set sizes to use for computing
        metrics.
    - epsilons: (list<num>) the settings of epsilon used for computing SDL and
        eSC.
    - save_path: (str) optional: a path (ending in .pdf or .png) to save the
        chart. saving requires the
        [`altair-saver`](https://github.com/altair-viz/altair_saver/) package
        and its dependencies.
    Returns: an Altair chart. Note that this chart displays well in notebooks,
        so calling `render_curve(df)` without a save path will work well with
        Jupyter.
    """
    import altair as alt
    from . import altair_theme  # noqa: F401
    alt.data_transformers.disable_max_rows()

    if "name" not in df:
        print("Dataframe has no 'name' field. Using 'default'.")
        df['name'] = 'default'

    if len(ns) > 0:
        ns = _closest_valid_ns(df, ns)

    title = 'Loss-data curve'
    color_title = 'Representation'
    xscale = alt.Scale(type='log')
    yscale = alt.Scale(type='log')

    x_axis = alt.X('samples', scale=xscale, title='Dataset size')
    y_axis = alt.Y('mean(val_loss)', scale=yscale, title='Validation loss')

    line = alt.Chart(df, title=title).mark_line()
    line = line.encode(
        x=x_axis,
        y=y_axis,
        color=alt.Color('name:N', title=color_title, legend=None),
    )

    point = alt.Chart(df, title=title).mark_point(size=80, opacity=1)
    point = point.encode(x=x_axis,
                         y=y_axis,
                         color=alt.Color(
                             'name:N',
                             title=color_title,
                         ),
                         shape=alt.Shape('name:N', title=color_title),
                         tooltip=['samples', 'name'])

    rules_df = pd.concat(
        [pd.DataFrame({'x': ns}),
         pd.DataFrame({'y': epsilons})], sort=False)

    rule_x = alt.Chart(rules_df).mark_rule(strokeDash=[4, 4]).encode(x='x')
    rule_y = alt.Chart(rules_df).mark_rule(strokeDash=[4, 4]).encode(y='y')

    chart = alt.layer(rule_x, rule_y, line,
                      point).resolve_scale(color='independent',
                                           shape='independent')
    if save_path is not None:
        import altair_saver
        altair_saver.save(chart, save_path)
    return chart
示例#26
0
    def build_graph(self):

        with open(os.path.join(os.path.dirname(__file__), 'colors.json')) as f:
            colors = json.load(f)
        allColorsValues = []

        # filter data
        max_languages = 5
        top_languages = {}
        for year in self.yearly_data.keys():
            for quarter in self.yearly_data[year].keys():
                for language in sorted(list(self.yearly_data[year][quarter].keys()),
                                       key=lambda lang: self.yearly_data[year][quarter][lang], reverse=True)[
                                0:max_languages]:
                    if 'top' not in self.yearly_data[year][quarter]:
                        self.yearly_data[year][quarter]['top'] = {}
                    if self.yearly_data[year][quarter][language] != 0:
                        self.yearly_data[year][quarter]['top'][language] = self.yearly_data[year][quarter][language]

                        if language not in top_languages:
                            top_languages[language] = 1
                        top_languages[language] += 1

        # print(self.yearly_data)

        all_languages = list(top_languages.keys())

        for language in all_languages:
            if colors[language]['color'] is not None:
                allColorsValues.append(colors[language]['color'])

        languages_all_loc = {}

        for language in all_languages:
            language_year = []
            for year in self.yearly_data.keys():
                language_quarter = [0, 0, 0, 0]
                for quarter in self.yearly_data[year].keys():
                    if language in self.yearly_data[year][quarter]['top']:
                        language_quarter[quarter - 1] = self.yearly_data[year][quarter]['top'][language]
                    else:
                        language_quarter[quarter - 1] = 0
                language_year.append(language_quarter)
            languages_all_loc[language] = language_year

        # print(languages_all_loc)

        language_df = {}

        def prep_df(df, name):
            df = df.stack().reset_index()
            df.columns = ['c1', 'c2', 'values']
            df['Language'] = name
            return df

        for language in languages_all_loc.keys():
            language_df[language] = pd.DataFrame(languages_all_loc[language], index=list(self.yearly_data.keys()),
                                                 columns=["Q1", "Q2", "Q3", "Q4"])

        for language in language_df.keys():
            language_df[language] = prep_df(language_df[language], language)

        df = pd.concat(language_df.values())

        chart = alt.Chart(df).mark_bar().encode(

            # tell Altair which field to group columns on
            x=alt.X('c2:N', title=None),

            # tell Altair which field to use as Y values and how to calculate
            y=alt.Y('sum(values):Q',
                    axis=alt.Axis(
                        grid=False,
                        title='Lines Of Code added')),

            # tell Altair which field to use to use as the set of columns to be  represented in each group
            column=alt.Column('c1:N', title=None),

            # tell Altair which field to use for color segmentation
            color=alt.Color('Language:N',
                            scale=alt.Scale(
                                domain=all_languages,
                                # make it look pretty with an enjoyable color pallet
                                range=allColorsValues,
                            ),
                            )) \
            .configure_view(
            # remove grid lines around column clusters
            strokeOpacity=0
        )
        chart.save('bar_graph.png')
        return 'bar_graph.png'
示例#27
0
def chart1():
    def mds_special():
        font = "Arial"
        axisColor = "#000000"
        gridColor = "#DEDDDD"
        return {
            "config": {
                "title": {
                    "fontSize": 18,
                    "font": font,
                    "anchor": "start",  # equivalent of left-aligned.
                    "fontColor": "#000000"
                },
                'view': {
                    "height": 300,
                    "width": 400
                },
                "axisX": {
                    "domain": True,
                    #"domainColor": axisColor,
                    "gridColor": gridColor,
                    "domainWidth": 1,
                    "grid": False,
                    "labelFont": font,
                    "labelFontSize": 12,
                    "labelAngle": 0,
                    "tickColor": axisColor,
                    "tickSize":
                    5,  # default, including it just to show you can change it
                    "titleFont": font,
                    "titleFontSize": 16,
                    "titlePadding":
                    10,  # guessing, not specified in styleguide
                    "title": "X Axis Title (units)",
                },
                "axisY": {
                    "domain": False,
                    "grid": True,
                    "gridColor": gridColor,
                    "gridWidth": 1,
                    "labelFont": font,
                    "labelFontSize": 12,
                    "labelAngle": 0,
                    #"ticks": False, # even if you don't have a "domain" you need to turn these off.
                    "titleFont": font,
                    "titleFontSize": 16,
                    "titlePadding":
                    10,  # guessing, not specified in styleguide
                    "title": "Y Axis Title (units)",
                    # titles are by default vertical left of axis so we need to hack this
                    #"titleAngle": 0, # horizontal
                    #"titleY": -10, # move it up
                    #"titleX": 18, # move it to the right so it aligns with the labels
                },
            }
        }

    # register the custom theme under a chosen name
    alt.themes.register('mds_special', mds_special)

    # enable the newly registered theme
    alt.themes.enable('mds_special')
    from vega_datasets import data

    states = alt.topo_feature(data.us_10m.url, 'states')
    hate_crime = pd.read_csv('../data/crime_state_id_clean.csv')

    p1 = alt.Chart(states).mark_geoshape().encode(
        alt.Color('avg_hatecrimes_per_100k_fbi:Q',
                  title="Average hate crime per 100K"),
        tooltip=[
            alt.Tooltip('avg_hatecrimes_per_100k_fbi:Q',
                        title='Average hate crime per 100K'),
            alt.Tooltip('state:N')
        ]).transform_lookup(
            lookup='id',
            from_=alt.LookupData(hate_crime, 'id', [
                'avg_hatecrimes_per_100k_fbi', 'state'
            ])).project('albersUsa').properties(
                title='Average hate crimes per 100K population in each state',
                width=550,
                height=300)

    return p1
示例#28
0
def main(_):
  print("Loading data...")
  dfs = []
  for filename in os.listdir(FLAGS.data):
    if filename.endswith(".csv"):
      dfs.append(
          pd.read_csv(os.path.join(FLAGS.data, filename), encoding="utf-8"))
  data = pd.concat(dfs)
  print("%d Examples" % (len(set(data["id"]))))
  print("%d Annotations" % len(data))

  if not os.path.isdir(FLAGS.plot_dir):
    os.makedirs(FLAGS.plot_dir)

  with open(FLAGS.emotion_file, "r") as f:
    all_emotions = f.read().splitlines()
  all_emotions_neutral = all_emotions + ["neutral"]
  emotion2idx = {e: i for i, e in enumerate(all_emotions)}
  print("%d emotion Categories" % len(all_emotions))

  print("Processing data...")

  # Remove neutral labels
  data = data[data["neutral"] == 0]

  # Remove examples with no ratings (difficult examples)
  data = data[data[all_emotions_neutral].sum(axis=1) != 0]

  # Convert into num_examples x num_raters x num_ratings format
  data = data.groupby("id").filter(lambda x: len(x) >= 3)
  id_groups = data.groupby("id")

  worker2examples = {}  # dict mapping worker ids to (example, rater id) tuples
  max_num_raters = data.groupby("id").size().max()
  ratings = np.zeros(
      (len(id_groups), max_num_raters, len(all_emotions)))  # ignore "neutral"
  rater_msk = np.zeros(
      (len(id_groups), max_num_raters))  # for masking out non-existent raters
  print("Ratings shape", ratings.shape)

  # Get ratings and rater mask
  texts = []
  for ex_idx, (_, g) in enumerate(id_groups):
    texts.append(g.iloc[0]["text"])
    rater_count = 0

    # iterate through workers
    for _, row in g.iterrows():
      for e in all_emotions:
        ratings[ex_idx, rater_count, emotion2idx[e]] = row[e]
        rater_msk[ex_idx, rater_count] = 1

      worker_id = row["rater_id"]
      if worker_id in worker2examples:
        worker2examples[worker_id].append((ex_idx, rater_count))
      else:
        worker2examples[worker_id] = [(ex_idx, rater_count)]
      rater_count += 1

  print("Calculating leave-out (partial) correlations...")
  partial_corr_per_rater = []
  corr_per_rater = []
  for worker_id in worker2examples:
    partial_corrs, corrs = LeaveOut(ratings, rater_msk, worker2examples,
                                    worker_id)
    if len(partial_corrs) < len(all_emotions):
      continue

    partial_corr_per_rater.append(partial_corrs)
    corr_per_rater.append(corrs)
  corr_per_rater = np.array(corr_per_rater)
  partial_corr_per_rater = np.array(partial_corr_per_rater)

  # Verify that there are no NaN values
  assert np.isnan(corr_per_rater).sum() == 0

  # Apply Wilcoxon signed rank test to test significance of each dimension
  p_vals = np.apply_along_axis(wilcoxon, 0, partial_corr_per_rater)[1]

  # Apply Bonferroni correction
  reject, corr_pvals, _, newalpha = multipletests(
      p_vals, alpha=0.05, method="bonferroni")
  print("Which dimensions to keep?")
  print(reject)
  print(corr_pvals)
  print(newalpha)

  print("Running PPCA on all the data...")
  # Take all raters and split them randomly
  x = []
  y = []
  rater_counts = rater_msk.sum(axis=1).astype(int)
  all_ratings_avg = []
  for i, ex in enumerate(ratings):
    # Get actual raters based on mask
    keep = []
    for worker_rating in ex[:rater_counts[i]]:
      keep.append(list(worker_rating))
    all_ratings_avg.append(list(np.array(keep).mean(axis=0)))

    # Shuffle raters randomly
    random.shuffle(keep)

    num_raters = len(keep)
    x.append(list(np.array(keep[:int(num_raters / 2)]).mean(axis=0)))
    y.append(list(np.array(keep[int(num_raters / 2):]).mean(axis=0)))

  x = np.array(x)
  y = np.array(y)
  all_ratings_avg = np.array(all_ratings_avg)
  w, v = PPCA(x, y)  # final components (p-values determine which ones to keep)

  print("Plotting percentage of covariance explained...")
  PlotCovar(v)

  # Apply varimax rotation
  w_vari = Varimax(w)

  # Get mapping between ppcs and emotions
  map_df = pd.DataFrame(
      w_vari, index=all_emotions, columns=np.arange(len(all_emotions))).round(4)
  # Sort to move values to diagonal
  map_df = map_df[list(
      np.argsort(map_df.apply(lambda x: pd.Series.nonzero(x)[0]).values)[0])]
  f = plt.figure(figsize=(10, 6), dpi=300)
  sns.heatmap(
      map_df,
      center=0,
      cmap=sns.diverging_palette(240, 10, n=50),
      yticklabels=all_emotions)
  plt.xlabel("Component")
  plt.savefig(
      FLAGS.plot_dir + "/component_loadings.pdf",
      dpi=600,
      format="pdf",
      bbox_inches="tight")
  ppc2emotion = map_df.abs().idxmax().to_dict()
  emotion2ppc = {e: i for i, e in ppc2emotion.items()}
  print(ppc2emotion)

  print("Plotting frequency and mean left-out rater correlations...")
  corr_mean = corr_per_rater.mean(axis=0)
  corr_mean_ordered = [corr_mean[emotion2ppc[e]] for e in all_emotions]
  df_plot = pd.DataFrame({
      "emotion": all_emotions,
      "agreement": corr_mean_ordered
  })
  df_plot["count"] = df_plot["emotion"].map(
      data[all_emotions].sum(axis=0).to_dict())
  df_plot.sort_values("count", ascending=False, inplace=True)
  df_plot.to_csv(FLAGS.plot_dir + "/emotion_agreements.csv", index=False)

  # Get colors
  norm = plt.Normalize(df_plot["agreement"].min(), df_plot["agreement"].max())
  sm = plt.cm.ScalarMappable(cmap="BuPu", norm=norm)
  sm.set_array([])

  # Generate figure
  fig = plt.figure(dpi=600, figsize=(5, 6))
  ax = sns.barplot(
      data=df_plot,
      y="emotion",
      x="count",
      orient="h",
      hue="agreement",
      palette="BuPu",
      dodge=False,
      edgecolor="black",
      linewidth=1)
  ax.get_legend().remove()
  ax.figure.colorbar(sm)
  plt.text(18000, 31, "Interrater\nCorrelation", ha="center")
  plt.xlabel("Number of Examples")
  plt.ylabel("")
  plt.draw()
  labels = [item.get_text() for item in ax.get_xticklabels()]
  ax.set_xticklabels(["%dk" % (int(int(label) / 1000)) for label in labels])
  plt.tight_layout()
  fig.savefig(
      FLAGS.plot_dir + "/label_distr_agreement.pdf",
      dpi=600,
      format="pdf",
      bbox_inches="tight")

  print("Generating t-SNE plot...")
  # Get PPC scores for all examples
  all_ratings_avg = Demean(all_ratings_avg)  # demean all ratings
  ppc_scores = all_ratings_avg.dot(w_vari)  # project onto ppcs
  ppc_scores_abs = np.absolute(ppc_scores)

  # Load maximally distinct colors
  colors = pd.read_csv(
      FLAGS.rgb_colors, sep="\t", header=None, names=np.arange(3))

  # Set colors (todo(ddemszky): add names to colors in file)
  palette_rgb = colors.values
  with open(FLAGS.emotion_color_order) as f:
    color_order = f.read().splitlines()
  ppc2color = {emotion2ppc[e]: i for i, e in enumerate(color_order)}
  # get rgb value for each example based on weighted average of top emotions
  rgb_vals = []
  hex_vals = []
  top_categories = []
  threshold = 0.5  # exclude points not loading on any of the top 10 categories
  counter = 0
  rgb_max = 255
  other_color = palette_rgb[len(all_emotions), :]
  for i, scores in enumerate(ppc_scores_abs):

    top_ppcs = [
        idx for idx in (-scores).argsort()[:2] if scores[idx] > threshold
    ]
    top_emotions = ",".join([ppc2emotion[idx] for idx in top_ppcs
                            ]) if top_ppcs else "other"
    top_categories.append(top_emotions)
    if len(top_ppcs) < 1:  # doesn't have top emotions from list
      color = other_color  # use grey
      counter += 1
    else:
      # Weighted average of top emotions (square->weighted average->square root)
      color_ids = [ppc2color[idx] for idx in top_ppcs]
      weights = [scores[idx] for idx in top_ppcs]
      # Need to round, otherwise floating point precision issues will result
      # in values slightly above 1
      avg = np.round(
          np.sqrt(
              np.average(
                  np.power(palette_rgb[color_ids] * rgb_max, 2),
                  axis=0,
                  weights=weights)) / rgb_max, 4)
      if (avg > 1).sum() > 0:
        print(avg)
      color = avg
    rgb_vals.append(list(color))
    hex_vals.append("#%02x%02x%02x" %
                    tuple(np.array(color * rgb_max, dtype=int)))
  rgb_vals = np.array(rgb_vals)

  # Create t-SNE model
  tsne_model = TSNE(
      perplexity=30,
      n_components=2,
      n_iter=1000,
      random_state=23,
      learning_rate=500,
      init="pca")
  new_values = tsne_model.fit_transform(ppc_scores)
  x = []
  y = []
  for value in new_values:
    x.append(value[0])
    y.append(value[1])
  # Put data in dataframe
  df = pd.DataFrame({
      "x": x,
      "y": y,
      "color": hex_vals,
      "label(s)": top_categories,
      "text": texts
  })

  df = df[df["label(s)"] != "other"]
  df["top_label"] = df["label(s)"].str.split(",").str[0]

  # Two selections:
  # - a brush that is active on the top panel
  # - a multi-click that is active on the bottom panel
  brush = alt.selection(type="interval")
  click = alt.selection_multi(encodings=["color"])

  sample = df.sample(5000)  # max 5000 examples can be plotted
  points = alt.Chart(sample).mark_point(
      filled=True, size=50).encode(
          x="x:Q",
          y="y:Q",
          color=alt.Color("color", scale=None),
          tooltip=["label(s)", "text"]).properties(
              width=700, height=600).add_selection(brush)

  # Bottom panel is a bar chart
  bars = alt.Chart(sample).mark_bar().encode(
      x="count()",
      y="top_label:N",
      color=alt.condition(click, alt.Color("color:N", scale=None),
                          alt.value("lightgray")),
  ).transform_filter(brush.ref()).properties(
      width=700, selection=click)

  chart = alt.vconcat(
      points, bars, data=sample, title="t-SNE Projection of Examples")

  chart.save(FLAGS.plot_dir + "/tsne.html", format="html")
color_scale = alt.Scale(
    domain=[
        "Very Unlikely",
        "Unlikely",
        "Does Not Matter",
        "Likely",
        "Very Likely"
    ],
    range=["#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab"]
)

y_axis = alt.Axis(
    title='Statement Study',
    offset=5,
    ticks=False,
    minExtent=60,
    domain=False
)

chart = alt.Chart(source).mark_bar().encode(
    x='percentage_start:Q',
    x2='percentage_end:Q',
    y=alt.Y('question:N', axis=y_axis),
    color=alt.Color(
        'type:N',
        legend=alt.Legend( title='Response'),
        scale=color_scale,
    )
)

chart.save('statement_divergent_chart.html')
示例#30
0
def make_org_quantiles_plots(infile):
    grouped_flows = infra.pd.read_parquet(infile)
    grouped_flows = grouped_flows.reset_index()
    grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows[
        "bytes_down"]
    user_org_total = grouped_flows[["user", "org", "bytes_total"
                                    ]].groupby(["user",
                                                "org"]).sum().reset_index()

    # Filter users by time in network to eliminate early incomplete samples
    user_active_ranges = infra.pd.read_parquet(
        "data/clean/user_active_deltas.parquet")[[
            "user", "days_since_first_active", "days_active", "days_online"
        ]]
    # Drop users that joined less than a week ago.
    users_to_analyze = user_active_ranges.loc[
        user_active_ranges["days_since_first_active"] > 7]
    # Drop users active for less than one day
    users_to_analyze = users_to_analyze.loc[
        users_to_analyze["days_active"] > 1, ]

    # Sort orgs by total amount of bytes.
    org_totals = grouped_flows.groupby("org").sum().reset_index()
    org_sort_order = org_totals.sort_values(
        "bytes_total", ascending=False).set_index("bytes_total").reset_index()
    org_sort_order["rank"] = org_sort_order["bytes_total"].rank(
        method="min", ascending=False)
    org_sort_list = org_sort_order["org"].tolist()

    # Group users by quantiles of their daily use.
    user_totals = user_org_total.groupby("user").sum().reset_index()
    user_totals = user_totals.merge(users_to_analyze, on="user", how="inner")
    user_totals["avg_daily_bytes"] = user_totals["bytes_total"] / user_totals[
        "days_online"]
    user_totals["rank_total"] = user_totals["bytes_total"].rank(method="min",
                                                                pct=True)

    user_totals["rank_daily"] = user_totals["avg_daily_bytes"].rank(
        method="min")
    user_totals["quantile"] = pd.cut(user_totals["rank_daily"],
                                     10,
                                     precision=0,
                                     right=False,
                                     include_lowest=True)

    # Compute the share of each user's traffic in each org
    user_shares = user_totals.rename(
        columns={"bytes_total": "user_bytes_total"})
    user_shares = user_org_total.merge(
        user_shares[["user", "user_bytes_total"]], on="user", how="inner")
    user_shares["org_share"] = user_shares["bytes_total"] / user_shares[
        "user_bytes_total"]
    user_shares = user_shares[["user", "org", "org_share"]]

    # Merge the user quantile information back into the flows, and then group by category
    quantile_flows = user_org_total.merge(
        user_totals[["user", "quantile", "days_online"]],
        on="user",
        how="inner")
    quantile_flows["normalized_bytes_total"] = quantile_flows[
        "bytes_total"] / quantile_flows["days_online"]

    # Merge category share information into the plot frame
    quantile_flows = quantile_flows.merge(user_shares,
                                          on=["user", "org"],
                                          how="inner")

    # Compute means for quantiles and quantile labels
    quantile_totals = quantile_flows.groupby(["quantile", "org"]).mean()
    quantile_totals = quantile_totals.reset_index()
    quantile_totals["quantile_str"] = quantile_totals["quantile"].apply(
        lambda x: str(x))

    # Add sort information back to rendered dataframe
    quantile_totals = quantile_totals.merge(org_sort_order[["org", "rank"]],
                                            on="org",
                                            how="inner")

    # This might not be showing exactly what I want to show, since in merging
    # users some users that dominate video could be overrepresented. Maybe
    # want to merge on the fraction of traffic to each part from each user?
    # Are users counted equally or are bytes counted equally...
    alt.Chart(quantile_totals[[
        "org", "quantile_str", "bytes_total", "rank", "normalized_bytes_total"
    ]]).mark_bar().encode(
        x="quantile_str:O",
        y=alt.Y(
            "normalized_bytes_total",
            stack="normalize",
            sort=org_sort_list,
        ),
        color=alt.Color(
            "org:N",
            scale=alt.Scale(scheme="tableau20"),
            sort=org_sort_list,
        ),
        order=alt.Order(
            "rank",
            sort="descending",
        ),
    ).properties(width=500, ).save(
        "renders/bytes_per_org_per_quantile_bar.png",
        scale_factor=2,
    )

    quantile_totals["normalize_mb_total"] = quantile_totals[
        "normalized_bytes_total"] / 1000.0**2

    # Generate an order based on the intervals, not the strings, to correctly sort the axis.
    quantiles = quantile_totals[["quantile", "quantile_str"
                                 ]].groupby(["quantile"]).first()
    quantiles = quantiles["quantile_str"].to_list()
    alt.Chart(quantile_totals[[
        "org", "quantile_str", "bytes_total", "rank", "normalize_mb_total"
    ]]).mark_line().encode(
        x=alt.X(
            "quantile_str:N",
            title="User by Rank of Average Use Per Online Day (Grouped)",
            sort=quantiles,
        ),
        y=alt.Y("normalize_mb_total",
                sort=org_sort_list,
                title="Average Traffic Per Online Day (MB)"),
        color=alt.Color(
            "org:N",
            scale=alt.Scale(scheme="tableau20"),
            sort=org_sort_list,
            legend=alt.Legend(
                title="Organization",
                orient="none",
                fillColor="white",
                labelLimit=500,
                padding=5,
                strokeColor="black",
                columns=3,
                labelFontSize=8,
                legendX=15,
                legendY=5,
                symbolLimit=20,
            ),
        ),
        order=alt.Order(
            "rank",
            sort="descending",
        ),
    ).configure_axisX(
        labelAngle=0,
        labelFontSize=7,
    ).properties(width=500, ).save(
        "renders/bytes_per_org_per_quantile_line.png",
        scale_factor=2,
    )

    alt.Chart(quantile_totals[[
        "org", "quantile_str", "org_share", "rank"
    ]]).mark_line().encode(
        x=alt.X(
            "quantile_str:N",
            title="User by Rank of Average Use Per Online Day (Grouped)",
            sort=quantiles,
        ),
        y=alt.Y("org_share",
                sort=org_sort_list,
                title="Average Fraction of Traffic Per User"),
        color=alt.Color(
            "org:N",
            scale=alt.Scale(scheme="tableau20"),
            sort=org_sort_list,
            legend=alt.Legend(
                title="Organization",
                # orient="none",
                # fillColor="white",
                labelLimit=500,
                # padding=5,
                # strokeColor="black",
                # columns=3,
                # labelFontSize=8,
                # legendX=15,
                # legendY=5,
                symbolLimit=20,
            ),
        ),
        order=alt.Order(
            "rank",
            sort="descending",
        ),
    ).configure_axisX(
        labelAngle=0,
        labelFontSize=7,
    ).properties(width=500, ).save(
        "renders/bytes_per_org_share_per_quantile_line.png",
        scale_factor=2,
    )