Пример #1
0
st.write('1 + 1 = ', 2)
st.write('Below is a DataFrame:', data_frame, 'Above is a dataframe.')


"""**1.2 直接写表格/图表**"""

import pandas as pd
import numpy as np
import altair as alt

df = pd.DataFrame(
    np.random.randn(200, 3),
   columns=['a', 'b', 'c'])

c = alt.Chart(df).mark_circle().encode(
    x='a', y='b', size='c', color='c', tooltip=['a', 'b', 'c'])

st.write(c)


"""**1.3 直接写代码**"""
code = '''def hello():
     print("Hello, Streamlit!")'''
st.code(code, language='python')



""" 

# 2 数据展示 - Display data
Пример #2
0
                data=data,
                get_position=["lon", "lat"],
                auto_highlight=True,
                radius=100,
                elevation_scale=50,
                elevation_range=[0, 1000],
                pickable=True,
                extruded=True,
            ),
        ],
    ))

st.subheader("Répartition par minute entre  %i:00 et %i:00" %
             (hour, (hour + 1) % 24))
filtered = data[(data[DATE_TIME].dt.hour >= hour)
                & (data[DATE_TIME].dt.hour < (hour + 1))]
hist = np.histogram(filtered[DATE_TIME].dt.minute, bins=60, range=(0, 60))[0]
chart_data = pd.DataFrame({"minute": range(60), "pickups": hist})

st.altair_chart(alt.Chart(chart_data).mark_area(
    interpolate='step-after', ).encode(x=alt.X("minute:Q",
                                               scale=alt.Scale(nice=False)),
                                       y=alt.Y("pickups:Q"),
                                       tooltip=['minute', 'pickups']),
                use_container_width=True)

if st.checkbox("Voir les données brutes", False):
    st.subheader("Données brutes par minute entre %i:00 et %i:00" %
                 (hour, (hour + 1) % 24))
    st.write(data)
Пример #3
0
"""
Error Bars showing Confidence Interval
======================================
This example shows how to show error bars using covidence intervals.
The confidence intervals are computed internally in vega by
a non-parametric `bootstrap of the mean <https://github.com/vega/vega-statistics/blob/master/src/bootstrapCI.js>`_.
"""
# category: bar charts
import altair as alt
from vega_datasets import data

barley = data.barley()

points = alt.Chart(barley).mark_point(filled=True).encode(
    alt.X('mean(yield)',
          scale=alt.Scale(zero=False),
          axis=alt.Axis(title='Barley Yield')),
    y='variety',
    color=alt.value('black'))

error_bars = alt.Chart(barley).mark_rule().encode(x='ci0(yield)',
                                                  x2='ci1(yield)',
                                                  y='variety')

points + error_bars
Пример #4
0
def channel_curve_compare(experiment_df,
                          width=800,
                          heights=(50, 400),
                          line_size=5,
                          legend_mark_size=100):
    """Creates an interactive curve comparison chart for a list of experiments.

    It lets you tick or untick experiments that you want to compare by clicking on the legend (shift+click for multi),
    you can select the x range which you want to investigate by selecting it on the top chart and you
    get shown the actual values on mousehover.

    The chart is build on top of the Altair which in turn is build on top of Vega-Lite and Vega.
    That means you can use the objects produces by this script (converting it first to json by .to_json() method)
    in your html webpage without any problem.

    Args:
        experiment_df('pandas.DataFrame'): Dataframe containing ['id','x','CHANNEL_NAME'].
            It can be obtained from a list of experiments by using the
            `neptunelib.api.concat_experiments_on_channel` function. If the len of the dataframe exceeds 5000 it will
            cause the MaxRowsError. Read the Note to learn why and how to disable it.
        width(int): width of the chart. Default is 800.
        heights(tuple): heights of the subcharts. The first value controls the top chart, the second
            controls the bottom chart. Default is (50,400).
        line_size(int): size of the lines. Default is 5.
        legend_mark_size(int): size of the marks in legend. Default is 100.

    Returns:
        `altair.Chart`: Altair chart object which will be automatically rendered in the notebook. You can
        also run the `.to_json()` method on it to convert it to the Vega-Lite json format.

    Examples:
        Instantiate a session::

            from neptunelib.api.session import Session
            session = Session()

        Fetch a project and a list of experiments::

            project = session.get_projects('neptune-ml')['neptune-ml/Salt-Detection']
            experiments = project.get_experiments(state=['aborted'], owner=['neyo'], min_running_time=100000)

        Construct a channel value dataframe::

            from neptunelib.api.utils import concat_experiments_on_channel
            compare_df = concat_experiments_on_channel(experiments,'unet_0 epoch_val iout loss')

        Plot interactive chart in notebook::

            from neptunelib.viz.experiments import channel_curve_compare
            channel_curve_compare(compare_df)

    Note:
        Because Vega-Lite visualizations keep all the chart data in the HTML the visualizations can consume huge
        amounts of memory if not handled properly. That is why, by default the hard limit of 5000 rows is set to
        the len of dataframe. That being said, you can disable it by adding the following line in the notebook or code::

            import altair as alt
            alt.data_transformers.enable('default', max_rows=None)

    """

    assert len(experiment_df.columns
               ) == 3, 'Experiment dataframe should have 3 columns \
        ["id","x", "CHANNEL_NAME"]. \
        It has {} namely {}'.format(len(experiment_df.columns),
                                    experiment_df.columns)

    top_height, bottom_height = heights
    prep_cols, channel_name = _preprocess_columns(experiment_df.columns)
    experiment_df.columns = prep_cols

    nearest = alt.selection(type='single',
                            nearest=True,
                            on='mouseover',
                            fields=['x'],
                            empty='none')
    interval = alt.selection(type='interval', encodings=['x'])
    legend_selection = alt.selection_multi(fields=['id'])

    legend = alt.Chart().mark_point(filled=True, size=legend_mark_size).encode(
        y=alt.Y('id:N'),
        color=alt.condition(
            legend_selection, alt.Color('id:N', legend=None),
            alt.value('lightgray'))).add_selection(legend_selection)

    selectors = alt.Chart().mark_point().encode(
        x='x:Q',
        opacity=alt.value(0),
    ).add_selection(nearest)

    top_view = alt.Chart(
        width=width, height=top_height).mark_line(size=line_size).encode(
            x=alt.X('x:Q', title=None),
            y=alt.Y('y:Q', scale=alt.Scale(zero=False), title=None),
            color=alt.Color('id:N', legend=None),
            opacity=alt.condition(
                legend_selection, alt.OpacityValue(1),
                alt.OpacityValue(0.0))).add_selection(interval)

    line = alt.Chart().mark_line(size=line_size).encode(
        x=alt.X('x:Q', title='iteration'),
        y=alt.Y('y:Q', scale=alt.Scale(zero=False), title=channel_name),
        color=alt.Color('id:N', legend=None),
        opacity=alt.condition(legend_selection, alt.OpacityValue(1),
                              alt.OpacityValue(0.0)))

    points = line.mark_point().encode(
        color=alt.condition(legend_selection, alt.Color('id:N', legend=None),
                            alt.value('white')),
        opacity=alt.condition(nearest, alt.value(1), alt.value(0)))

    text = line.mark_text(align='left', dx=5, dy=-5).encode(
        text=alt.condition(nearest, 'y:Q', alt.value(' ')),
        opacity=alt.condition(legend_selection, alt.OpacityValue(1),
                              alt.OpacityValue(0.0)))

    rules = alt.Chart().mark_rule(color='gray').encode(
        x='x:Q', ).transform_filter(nearest)

    bottom_view = alt.layer(line,
                            selectors,
                            points,
                            rules,
                            text,
                            width=width,
                            height=bottom_height).transform_filter(interval)

    combined = alt.hconcat(alt.vconcat(top_view, bottom_view),
                           legend,
                           data=experiment_df)
    return combined
Пример #5
0
"""
Gantt Chart
-----------------
This example shows how to make a simple gantt chart.
"""
# category: basic charts

import altair as alt
import pandas as pd

data = pd.DataFrame([{
    "task": "A",
    "start": 1,
    "end": 3
}, {
    "task": "B",
    "start": 3,
    "end": 8
}, {
    "task": "C",
    "start": 8,
    "end": 10
}])

chart = alt.Chart(data).mark_bar().encode(
    x='start',
    x2='end',
    y='task',
)
Пример #6
0
=================
This example shows the London tube lines against the background of the
borough boundaries. It is based on the vega-lite example at
https://vega.github.io/vega-lite/examples/geo_layer_line_london.html.
"""
# category: geographic

import altair as alt
from vega_datasets import data

boroughs = alt.topo_feature(data.londonBoroughs.url, 'boroughs')
tubelines = alt.topo_feature(data.londonTubeLines.url, 'line')
centroids = data.londonCentroids.url

background = alt.Chart(boroughs).mark_geoshape(
    stroke='white',
    strokeWidth=2).encode(color=alt.value('#eee'), ).properties(width=700,
                                                                height=500)

labels = alt.Chart(centroids).mark_text().encode(
    longitude='cx:Q',
    latitude='cy:Q',
    text='bLabel:N',
    size=alt.value(8),
    opacity=alt.value(0.6)
).transform_calculate(
    "bLabel",
    "indexof (datum.name,' ') > 0  ? substring(datum.name,0,indexof(datum.name, ' ')) : datum.name"
)

line_scale = alt.Scale(domain=[
    "Bakerloo", "Central", "Circle", "District", "DLR", "Hammersmith & City",
Пример #7
0
def main():
    st.title("Uber Pickups in New York City")
    st.markdown("""
    This is a demo of a Streamlit app that shows the Uber pickups
    geographical distribution in New York City. Use the slider
    to pick a specific hour and look at how the charts change.

    [See source code](https://github.com/streamlit/demo-uber-nyc-pickups/blob/master/app.py)
    """)

    data = load_data(100000)

    hour = st.slider("Hour to look at", 0, 23)
    assert isinstance(hour, int)

    data = data[data[DATE_TIME].dt.hour == hour]

    st.subheader("Geo data between %i:00 and %i:00" % (hour, (hour + 1) % 24))
    midpoint = (np.average(data["lat"]), np.average(data["lon"]))

    st.write(
        pdk.Deck(
            map_style="mapbox://styles/mapbox/light-v9",
            initial_view_state={
                "latitude": midpoint[0],
                "longitude": midpoint[1],
                "zoom": 11,
                "pitch": 50,
            },
            layers=[
                pdk.Layer(
                    "HexagonLayer",
                    data=data,
                    get_position=["lon", "lat"],
                    radius=100,
                    elevation_scale=4,
                    elevation_range=[0, 1000],
                    pickable=True,
                    extruded=True,
                ),
            ],
        ))

    st.subheader("Breakdown by minute between %i:00 and %i:00" %
                 (hour, (hour + 1) % 24))
    filtered = data[(data[DATE_TIME].dt.hour >= hour)
                    & (data[DATE_TIME].dt.hour < (hour + 1))]
    hist = np.histogram(filtered[DATE_TIME].dt.minute, bins=60,
                        range=(0, 60))[0]
    chart_data = pd.DataFrame({"minute": range(60), "pickups": hist})

    st.altair_chart(alt.Chart(chart_data).mark_area(
        interpolate='step-after', ).encode(x=alt.X(
            "minute:Q", scale=alt.Scale(nice=False)),
                                           y=alt.Y("pickups:Q"),
                                           tooltip=['minute', 'pickups']),
                    use_container_width=True)

    show_raw_data = st.checkbox("Show raw data", False)
    assert isinstance(show_raw_data, bool)
    if show_raw_data:
        st.subheader("Raw data by minute between %i:00 and %i:00" %
                     (hour, (hour + 1) % 24))
        st.write(data)
Пример #8
0
def make_la_positive_test_chart(df, positive_lower_bound, positive_upper_bound, 
                                testing_lower_bound, testing_upper_bound, 
                                chart_title1, chart_title2): 
    num_weeks = len(df.week2.unique())  
    chart_width = num_weeks * 10
    
    base = (alt.Chart(df)
            .mark_bar(binSpacing = bin_spacing)
            .encode(
                x=alt.X("week2", title="date", sort=None)
            )
    )
    
    positive_bar = (
        base
        .mark_bar(color = navy)
        .encode(
            y=alt.Y("pct_positive", title="Percent",
                axis=alt.Axis(format="%")
            ),
        )

    )
    
    positive_lower_line = (
        alt.Chart(pd.DataFrame({"y": [positive_lower_bound]}))
        .mark_rule(color=maroon, strokeDash=[6, 3])
        .encode(y="y")
    )  
    
    positive_upper_line = (
        alt.Chart(pd.DataFrame({"y": [positive_upper_bound]}))
        .mark_rule(color=maroon, strokeDash=[6, 3])
        .encode(y="y")
    ) 
    
    positive_chart = (
        (positive_bar + positive_lower_line + positive_upper_line)
            .properties(title=chart_title1, width = chart_width)
         )

    test_bar = (
        base
        .mark_bar(color = blue)
        .encode(
            y=alt.Y("weekly_tests", title="# Weekly Tests",),
        )
    )
    
    num_positive_bar  = (
        base
        .mark_bar(color = gray)
        .encode(
            y=alt.Y("weekly_cases", title="# Weekly Tests",),
        )
    )
    
    weekly_test_lower_line = (
        alt.Chart(pd.DataFrame({"y": [testing_lower_bound * 7]}))
        .mark_rule(color=maroon, strokeDash=[6, 3])
        .encode(y="y")
    )  
    
    weekly_test_upper_line = (
        alt.Chart(pd.DataFrame({"y": [testing_upper_bound * 7]}))
        .mark_rule(color=maroon, strokeDash=[6, 3])
        .encode(y="y")
    )
    
    
    test_chart = (
        (test_bar + num_positive_bar + weekly_test_lower_line + weekly_test_upper_line)
            .properties(title=chart_title2, width = chart_width)
         )
    
    
    combined_weekly_chart = (
        alt.hconcat(positive_chart, test_chart)
        .configure_title(
            fontSize=title_font_size, font=font_name, anchor="middle", color="black"
        )
        .configure_axis(gridOpacity=grid_opacity, domainOpacity=domain_opacity)
        .configure_view(strokeOpacity=stroke_opacity)
    )
        
    show_svg(combined_weekly_chart)
Пример #9
0
def make_lacounty_hospital_chart(df):
    chart_width = 350
    acute_color = green
    icu_color = navy
    ventilator_color = orange

    base = (
        alt.Chart(df)
        .mark_line()
        .encode(
            x=alt.X(
                "date2:T",
                title="date",
                axis=alt.Axis(format=fulldate_format),
            ),
            y=alt.Y("pct_available_avg3", title="3-day avg", 
                    axis=alt.Axis(format="%")
            ),
            color=alt.Color(
                "equipment",
                scale=alt.Scale(
                    domain=["Acute Care Beds", "ICU Beds", "Ventilators"],
                    range=[acute_color, icu_color, ventilator_color],
                ),
            ),
        )
    )

    line1 = (
        alt.Chart(pd.DataFrame({"y": [0.3]}))
        .mark_rule(color=maroon, strokeDash=[6, 3])
        .encode(y="y")
    )

    hospital_pct_chart = (
        (base + line1)
        .properties(
            title="Percent of Available Hospital Equipment by Type",
            width=chart_width,
        )
        .configure_title(
            fontSize=title_font_size, font=font_name, anchor="middle", color="black"
        )
        .configure_axis(
            gridOpacity=grid_opacity, domainOpacity=domain_opacity, ticks=False
        )
        .configure_view(strokeOpacity=stroke_opacity)
    )

    hospital_num_chart = (
        alt.Chart(df)
        .mark_line()
        .encode(
            x=alt.X(
                "date2:T",
                title="date",
                axis=alt.Axis(format=fulldate_format),
            ),
            y=alt.Y("n_available_avg3", title="3-day avg"),
            color=alt.Color(
                "equipment",
                scale=alt.Scale(
                    domain=["Acute Care Beds", "ICU Beds", "Ventilators"],
                    range=[acute_color, icu_color, ventilator_color],
                ),
            ),
        ).properties(
            title="Number of Available Hospital Equipment by Type", width=chart_width
        ).configure_title(
            fontSize=title_font_size, font=font_name, anchor="middle", color="black"
        ).configure_axis(
            gridOpacity=grid_opacity, domainOpacity=domain_opacity, ticks=False
        ).configure_view(strokeOpacity=stroke_opacity)
    )

    hospital_covid_chart = (
        alt.Chart(df)
        .mark_line()
        .encode(
            x=alt.X(
                "date2:T",
                title="date",
                axis=alt.Axis(format=fulldate_format),
            ),
            y=alt.Y("n_covid_avg7", title="7-day avg"),
            color=alt.Color(
                "equipment",
                scale=alt.Scale(
                    domain=["Acute Care Beds", "ICU Beds", "Ventilators"],
                    range=[acute_color, icu_color, ventilator_color],
                ),
            ),
        ).properties(
            title="Number of COVID-Occupied / Under Investigation Equipment Use by Type",
            width=chart_width,
        )
        .configure_title(
            fontSize=title_font_size, font=font_name, anchor="middle", color="black"
        )
        .configure_axis(
            gridOpacity=grid_opacity, domainOpacity=domain_opacity, ticks=False
        )
        .configure_view(strokeOpacity=stroke_opacity)
    )

    show_svg(hospital_pct_chart) 
    show_svg(hospital_num_chart)
    show_svg(hospital_covid_chart)
Пример #10
0
                     names=["lon", "lat", "city"])
temperatures = pd.read_csv("data/minard_temperature.txt",
                           sep=" ",
                           names=["lon", "temp", "days", "day"])
troops = pd.read_csv(
    "data/minard_troops.txt",
    sep=" ",
    names=["lon", "lat", "survivors", "direction", "division"])

temperatures["label"] = temperatures.fillna("").apply(
    axis=1, func=lambda row: "{}° {}".format(row[1], row[3].replace("-", " ")))
troops = troops.sort_values(by=["division", "survivors"], ascending=False)

troops_chart = alt.Chart(troops).mark_trail().encode(
    x='lon:Q',
    y='lat:Q',
    size=alt.Size('survivors', scale=alt.Scale(range=[1, 75]), legend=None),
    color=alt.Color('direction'))
troops_chart.save("troops_first_try.html")

troops_chart = alt.Chart(troops).mark_trail().encode(
    longitude='lon:Q',
    latitude='lat:Q',
    size=alt.Size('survivors', scale=alt.Scale(range=[1, 75]), legend=None),
    detail='division',
    color=alt.Color('direction',
                    scale=alt.Scale(domain=['A', 'R'],
                                    range=['#EBD2A8', '#888888']),
                    legend=None),
).project(type="mercator")
troops_chart.save("troops.html")
Пример #11
0
    return d


X = DNA_base_count(sequence)

#X_label = list(X)
#X_values = list(X.values())

X

### 2. Print text
st.subheader('2. Print text')
st.write('There are  ' + str(X['A']) + ' adenine (A)')
st.write('There are  ' + str(X['T']) + ' thymine (T)')
st.write('There are  ' + str(X['G']) + ' guanine (G)')
st.write('There are  ' + str(X['C']) + ' cytosine (C)')

### 3. Display DataFrame
st.subheader('3. Display DataFrame')
df = pd.DataFrame.from_dict(X, orient='index')
df = df.rename({0: 'count'}, axis='columns')
df.reset_index(inplace=True)
df = df.rename(columns={'index': 'base'})
st.write(df)

### 4. Display Bar Chart using Altair
st.subheader('4. Display Bar chart')
p = alt.Chart(df).mark_bar().encode(x='base', y='count')
p = p.properties(width=alt.Step(80)  # controls width of bar.
                 )
st.write(p)
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:

If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
forums](https://discuss.streamlit.io).

In the meantime, below is an example of what you can do with just a few lines of code:
"""

with st.echo(code_location='below'):
    total_points = st.slider("Number of points in spiral", 1, 5000, 2000)
    num_turns = st.slider("Number of turns in spiral", 1, 100, 9)

    Point = namedtuple('Point', 'x y')
    data = []

    points_per_turn = total_points / num_turns

    for curr_point_num in range(total_points):
        curr_turn, i = divmod(curr_point_num, points_per_turn)
        angle = (curr_turn + 1) * 2 * math.pi * i / points_per_turn
        radius = curr_point_num / total_points
        x = radius * math.cos(angle)
        y = radius * math.sin(angle)
        data.append(Point(x, y))

    st.altair_chart(
        alt.Chart(pd.DataFrame(data), height=500,
                  width=500).mark_circle(color='#0068c9',
                                         opacity=0.5).encode(x='x:Q', y='y:Q'))
Пример #13
0
    def run(self, num_clusters, title):
        # minimal document frequency is 10% of total dataset
        minimal_df = int(len(self.data.summary) * (10) / 100)

        tfidf = TfidfVectorizer(min_df=minimal_df,
                                max_df=0.95,
                                max_features=3000,
                                stop_words=self.stopwords)

        tfidf_matrix = tfidf.fit_transform(self.data.summary)
        terms = tfidf.get_feature_names()

        from sklearn.metrics.pairwise import cosine_similarity

        dist = 1 - cosine_similarity(tfidf_matrix)

        km = KMeans(n_clusters=num_clusters).fit(tfidf_matrix)
        clusters = km.labels_.tolist()

        opportunities = {
            "title": self.data.title.to_list(),
            "summary": self.data.summary.to_list(),
            "cluster": clusters,
        }
        frame = pd.DataFrame(opportunities,
                             index=[clusters],
                             columns=["title", "cluster"])
        frame["cluster"].value_counts()

        # Reduces the shape of TF-IDF vectors to 2D with
        # MULTIDIMENSIONAL SCALING

        MDS()
        mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)
        pos = mds.fit_transform(dist)  # shape (n_components, n_samples)
        xs, ys = pos[:, 0], pos[:, 1]

        # SET UP THE CLUSTER'S DESCRIPTION WITH A DICT
        cluster_description = {}
        for i in range(num_clusters):
            cluster_description[i] = (
                terms[km.cluster_centers_.argsort()[:, ::-1][i][0]] + " | " +
                terms[km.cluster_centers_.argsort()[:, ::-1][i][1]] + " [" +
                str(frame["cluster"].value_counts()[i]) + " jobs]")

        # GRAPH WITH ALTAIR PACKAGE

        view = pd.DataFrame(
            dict(x=xs, y=ys, cluster=clusters,
                 title=self.data.title.to_list()))

        view["description"] = view["cluster"].map(cluster_description)

        scales = alt.selection_interval(bind="scales")

        selection = alt.selection_multi(fields=["description"])

        color = alt.condition(
            selection,
            alt.Color("description:N",
                      scale=alt.Scale(scheme="category10"),
                      legend=None),
            alt.value("lightgray"),
        )

        scatter = (alt.Chart(view).mark_point().encode(
            x="x:Q",
            y="y:Q",
            color=color,
            tooltip="title",
            shape=alt.Shape("description:N", legend=None),
        ).properties(width=600, height=600).add_selection(scales))

        legend = (alt.Chart(view).mark_point().encode(
            y=alt.Y("description:N", axis=alt.Axis(orient="right")),
            color=color,
            shape=alt.Shape("description:N", legend=None),
        ).add_selection(selection))

        # join CHARTS!
        chart = ((scatter | legend).configure(
            background="white").configure_axisLeft(
                grid=False,
                labels=False,
                domain=False,
                ticks=False,
                title=None).configure_axisX(
                    grid=False,
                    labels=False,
                    domain=False,
                    ticks=False,
                    title=None).properties(
                        title="1. Clusters").configure_title(fontSize=20,
                                                             offset=5,
                                                             orient="top",
                                                             anchor="middle"))

        save_url = "output/" + title + "-" + str(num_clusters) + ".html"
        chart.save(save_url)

        # MAKING TABLE OF TERMS

        term_html = '\t<h3 align="center">2. Ranking de Termos por Cluster:</h3>\n\n'
        # sort cluster centers by proximity to centroid
        order_centroids = km.cluster_centers_.argsort()[:, ::-1]

        term_html += '\t<table border="1" class="dataframe"> \n\t<tbody>'
        for i in range(num_clusters):
            term_html += "\n\t<tr>"
            term_html += "\n\t\t<th> Termos do Cluster %d </th>" % i

            term_html += "<td>"

            for ind in order_centroids[
                    i, :10]:  # replace 10 with n terms per cluster
                term_html += " %s " % terms[ind].split()

            term_html += "</td>"
            term_html += "<th> Cargos do Cluster %d </th>" % i
            term_html += "<td>"

            for title in frame.loc[i]["title"].values.tolist(
            )[0:4]:  # replace 4 with n jobs per cluster
                term_html += " %s |" % title

            term_html += "</td></tr>"

        term_html += "\n\t</tbody></table>"

        # MAKING TF-IDF RANKING TABLE

        tfidf_html = '\t<h3 align="center">3. Ranking TF-IDF:</h3>\n\n'

        first_vector_tfidfvectorizer = tfidf_matrix[0]

        # TF-IDF values in a pandas data frame
        df = pd.DataFrame(
            first_vector_tfidfvectorizer.T.todense(),
            index=tfidf.get_feature_names(),
            columns=["tfidf"],
        )
        tfidf_html += str(
            df.sort_values(by=["tfidf"], ascending=False).to_html())

        # UPDATE HTML
        complete_html = open(save_url, "r").read()[:-15]
        complete_html += term_html + tfidf_html + "</body></html>"

        f = open(save_url, "w")
        f.write(complete_html)
        f.close()

        print("\nDone! Output saved at:\n\t" + save_url)
Пример #14
0
"""
Line Chart with Percent axis
----------------------------
This example shows how to format the tick labels of the
y-axis of a chart as percentages.
"""

import altair as alt
from altair.expr import datum
from vega_datasets import data

source = data.jobs.url

alt.Chart(source).mark_line().encode(
    alt.X('year:O'), alt.Y('perc:Q', axis=alt.Axis(format='%')),
    color='sex:N').properties(
        title='Percent of work-force working as Welders').transform_filter(
            datum.job == 'Welder')
Пример #15
0
np.random.seed(42)
source = pd.DataFrame(np.cumsum(np.random.randn(100, 3), 0).round(2),
                    columns=['A', 'B', 'C'], index=pd.RangeIndex(100, name='x'))
source = source.reset_index().melt('x', var_name='category', value_name='y')

# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['x'], empty='none')

def make_chart(source)

    # The basic line
    line = alt.Chart(source).mark_line(interpolate='basis').encode(
        x='x:Q',
        y='y:Q',
        color='category:N'
    )

    # Transparent selectors across the chart. This is what tells us
    # the x-value of the cursor
    selectors = alt.Chart(source).mark_point().encode(
        x='x:Q',
        opacity=alt.value(0),
    ).add_selection(
        nearest
    )

    # Draw points on the line, and highlight based on selection
    points = line.mark_point().encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0))
Пример #16
0
def setup_cases_deaths_chart(df, geog, name):
    # Define chart titles
    if geog == "county":
        chart_title = f"{name} County"
    if geog == "state":
        chart_title = f"{name}"
    if geog == "msa":
        chart_title = f"{name} MSA"
    # Add City of LA to this geog
    if geog == "lacity":
        chart_title = f"{name}"
    
    # Set up base charts
    base = (alt.Chart(
        df.drop(columns = "date"))
        .mark_line()
        .encode(
            x=alt.X("date2", 
                   title="date", axis=alt.Axis(format=fulldate_format))
        )
    )
    
    base_2weeks = (
        alt.Chart(df[df.date >= two_weeks_ago].drop(columns = "date"))
        .mark_line()
        .encode(
            x=alt.X("date2", 
                    title="date", axis=alt.Axis(format=fulldate_format))
        )
    )
    
    
    tier_base = (base.mark_line(strokeDash=[2,3], clip=True))
        
    # Make cases charts    
    cases_line = (
        base
        .encode(
            y=alt.Y("cases_avg7:Q", title="7-day avg"),
            color=alt.value(navy),
        )
    )
    
    cases_shaded = (
        base_2weeks
        .mark_area()
        .encode(
            y=alt.Y("cases_avg7:Q", title="7-day avg"),
            color=alt.value(light_gray)
        )
    )
    
    cases_extra_outline = (
        base_2weeks
        .mark_line()
        .encode(
            y=alt.Y("cases_avg7:Q", title="7-day avg"),
            color=alt.value(navy_outline)
        )
    )
    

    tier1_hline = (
        tier_base
        .encode(y=alt.Y("tier1_case_cutoff:Q"),
               color=alt.value(orange))
    )

    tier2_hline = (
        tier_base
        .encode(y=alt.Y("tier2_case_cutoff:Q"),
               color=alt.value(maroon))
    )
    
    tier3_hline = (
        tier_base
        .encode(y=alt.Y("tier3_case_cutoff:Q"),
               color=alt.value(purple))
    )


    cases_chart = (
        (cases_line + cases_shaded + cases_extra_outline + 
         tier1_hline + tier2_hline + tier3_hline)
        .properties(
              title=f"{chart_title}: New Cases", width=chart_width, height=chart_height
            )
        )
    
    
    # Make deaths chart
    deaths_line = (
        base
        .encode(
            y=alt.Y("deaths_avg7:Q", title="7-day avg"),
            color=alt.value(blue),
        )
    )

    deaths_shaded = (
        base_2weeks
        .mark_area()
        .encode(
            y=alt.Y("deaths_avg7:Q", title="7-day avg"),
            color=alt.value(light_gray)
        )
    )
    
    deaths_extra_outline = (
        base_2weeks
        .encode(
            y=alt.Y("deaths_avg7:Q", title="7-day avg"),
            color=alt.value(blue_outline)
        )
    )    
    

    deaths_chart = (
        (deaths_line + deaths_shaded + deaths_extra_outline)
              .properties(
                  title=f"{chart_title}: New Deaths", width=chart_width, height=chart_height
                )
        )    
    
    return cases_chart, deaths_chart
Пример #17
0
    layers=[{
        "type": "HexagonLayer",
        "data": data,
        "radius": 1000,
        "elevationScale": 4,
        "elevationRange": [0, 10000],
        "pickable": True,
        "extruded": True,
    }],
)

st.subheader("Quebra por minuto entre %i:00 e %i:00" % (hour, (hour + 1) % 24))
filtered = data[(data[DATE_TIME].dt.hour >= hour)
                & (data[DATE_TIME].dt.hour < (hour + 1))]
hist = np.histogram(filtered[DATE_TIME].dt.minute, bins=60, range=(0, 60))[0]
chart_data = pd.DataFrame({"minuto": range(60), "obitos": hist})
st.write(
    alt.Chart(chart_data,
              height=150).mark_area(interpolate="step-after",
                                    line=True).encode(
                                        x=alt.X("minuto:Q",
                                                scale=alt.Scale(nice=False)),
                                        y=alt.Y("obitos:Q"),
                                        tooltip=["minuto", "obitos"],
                                    ))

if st.checkbox("Mostrar dado cru (raw)", False):
    st.subheader("Dados por minuto entre %i:00 e %i:00" % (hour,
                                                           (hour + 1) % 24))
    st.write(data)
Пример #18
0
import altair as alt
from vega_datasets import data

source = data.seattle_weather()
brush = alt.selection(type='interval', encodings=['x'])

bars = alt.Chart().mark_bar().encode(
    x='month(date):O',
    y='mean(precipitation):Q',
    opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)),
).add_selection(
    brush
)

line = alt.Chart().mark_rule(color='firebrick').encode(
    y='mean(precipitation):Q',
    size=alt.SizeValue(3)
).transform_filter(
    brush
)
alt.renderers.enable('altair_viewer')

alt.layer(bars, line, data=source)
Пример #19
0
    dfa=df.loc[(df.SeriesName == select_event),[2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018]]
    dfa=dfa.T
    dfa.columns=countries
    dfa = dfa.reset_index()

   


    multi_lc = alt.Chart(dfa).transform_fold(
        option,
        ).mark_line().encode(
        x='index:Q',
        y=alt.Y('value:Q', title=''),
        color='key:N'
        
        
    ).properties(
        title=select_event,
        width=600,
        height=400
    ).interactive()
    if(len(option)==0):
            st.line_chart(dfa)
    else:
        st.write( multi_lc )


elif navigate_button=='Statistical Analysis':
    st.header("Statistical Analysis")
    
Пример #20
0
def graph_VL_PL_transit_j_cam(df_concat_pl_jo, df_pct_pl_transit, *cam):
    """
    pour creer des graph du nb de veh  par heue sur une journee à 1 camera
    en entree : 
        df_pct_pl_transit : df du pct de pl en transit, issus de resultat.pourcentage_pl_camera
        df_concat_pl_jo : df du nb de pl par jo classe en transit ou tital, issu de resultat.pourcentage_pl_camera
        cam : integer : numeros de la camera etudiee. on peut en passer plsueiurs et obtenir une somme des nb veh et une moyenne des %PL
    en sortie : 
        graph : chart altair avec en x l'heure et en y le nb de veh
    """
    #selection df pour graphique, on peurt demander 'Jours Ouvré'
    if [
            voie for voie, cams in dico_corrsp_camera_site.items()
            if cams == list(cam)
    ]:
        titre = f'Nombre de PL et % de PL en transit sur {[voie for voie, cams in dico_corrsp_camera_site.items() if cams==list(cam)][0]}'
    else:
        if len(cam) > 1:
            titre = f'Nombre de PL et % de PL en transit au droit des caméras {cam}'
        else:
            titre = f'Nombre de PL et % de PL en transit au droit de la caméra {cam[0]}'
    if len(cam) > 1:
        df_concat_pl_jo_multi_cam = df_concat_pl_jo.loc[
            df_concat_pl_jo['camera_id'].isin(cam)].groupby(['heure',
                                                             'type']).agg({
                                                                 'nb_veh':
                                                                 'sum'
                                                             }).reset_index()
        df_concat_pl_jo_multi_cam[
            'nb_veh'] = df_concat_pl_jo_multi_cam['nb_veh'] / len(cam)
        df_pct_pl_transit_multi_cam = df_pct_pl_transit.loc[
            df_pct_pl_transit['camera_id'].isin(cam)].groupby(['heure']).agg({
                'nb_veh_x':
                'sum',
                'nb_veh_y':
                'sum'
            }).reset_index()
        df_pct_pl_transit_multi_cam[
            'pct_pl_transit'] = df_pct_pl_transit_multi_cam[
                'nb_veh_y'] / df_pct_pl_transit_multi_cam['nb_veh_x'] * 100
    else:
        df_concat_pl_jo_multi_cam = df_concat_pl_jo.loc[
            df_concat_pl_jo['camera_id'].isin(cam)]
        df_pct_pl_transit_multi_cam = df_pct_pl_transit.loc[
            df_pct_pl_transit['camera_id'].isin(cam)]

    bar = alt.Chart(df_concat_pl_jo_multi_cam,
                    title=titre).mark_bar(opacity=0.7, size=20).encode(
                        x='heure:O',
                        y=alt.Y('nb_veh:Q',
                                stack=None,
                                axis=alt.Axis(title='Nb de vehicules',
                                              grid=False)),
                        color='type')
    line = alt.Chart(df_pct_pl_transit_multi_cam).mark_line(
        color='green').encode(x='heure:O',
                              y=alt.Y(
                                  'pct_pl_transit:Q',
                                  axis=alt.Axis(title='% de PL en transit')))
    (bar + line).resolve_scale(y='independent').properties(width=800)
    return (bar + line).resolve_scale(y='independent').properties(width=800)
Пример #21
0
    """Smooth the given signal using a rectangular window."""
    window = np.ones(points) / points
    return np.convolve(signal, window, mode='same')


data = pd.DataFrame(
    {
        'dist': segment['Distance / km'],
        'elevation': segment['elevation'],
        'heart': smooth(segment['hr'], 51),
    }
)

area1 = altair.Chart(data).mark_area(
    fillOpacity=0.4, strokeWidth=5, line=True
).encode(
    x=altair.X('dist', title='Distance / km'),
    y=altair.Y('elevation', title='Elevation / m'),
)

line1 = altair.Chart(data).mark_line(
    strokeWidth=5
).encode(
    x=altair.X('dist', title='Distance / km'),
    y=altair.Y('heart', title='Heart rate / bpm'),
    color=altair.value('#1b9e77'),
)
chart = altair.layer(
    area1,
    line1,
    width=WIDTH,
    height=HEIGHT,
Пример #22
0
def intervalle_confiance_cam(df_pct_pl_transit, df_concat_pl_jo,
                             intervall_conf, *cam):
    """
    fgraph pour intervalle de confiance et affichage données de comptagesde PL.
    en entrée : 
        intervall_conf : booleen : Terue si on veut le zonage de l'intervalle de confiance, False si on ne le veut pas
    """
    pour_graph_synth, lien_traf_gest_traf_lapi = indice_confiance_cam(
        df_pct_pl_transit, df_concat_pl_jo, cam)
    lien_traf_gest_traf_lapi['heure'] = lien_traf_gest_traf_lapi.apply(
        lambda x: pd.to_datetime(0) + pd.Timedelta(str(x['heure']) + 'H'),
        axis=1)
    pour_graph_synth['heure'] = pour_graph_synth.apply(
        lambda x: pd.to_datetime(0) + pd.Timedelta(str(x['heure']) + 'H'),
        axis=1)
    #print(pour_graph_synth,lien_traf_gest_traf_lapi)
    #print(dico_corrsp_camera_site.items(),[voie for voie, cams in dico_corrsp_camera_site.items() if cams==list(cam)])
    if [
            voie for voie, cams in dico_corrsp_camera_site.items()
            if cams == list(cam)
    ]:
        titre_interv = f'Nombre de PL et % de PL en transit sur {[voie for voie, cams in dico_corrsp_camera_site.items() if cams==list(cam)][0]}'
        titre_nb_pl = f'Nombre de PL selon la source sur {[voie for voie, cams in dico_corrsp_camera_site.items() if cams==list(cam)][0]}'
    else:
        if len(cam) > 1:
            titre_interv = f'Nombre de PL et % de PL en transit au droit des caméras {cam}'
            titre_nb_pl = f'Nombre de PL selon la source au droit des caméras {cam}'
        else:
            titre_interv = f'Nombre de PL et % de PL en transit au droit de la caméra {cam[0]}'
            titre_nb_pl = f'Nombre de PL selon la source au droit de la caméra {cam[0]}'

    #pour n'affcihier que "Comptage gestionnnaire" si Comptage gestionnnaire=Comptage gestionnnaire recale
    if ((pour_graph_synth.loc[pour_graph_synth['type'] ==
                              'Comptage gestionnnaire recalé'].nb_veh ==
         pour_graph_synth.loc[pour_graph_synth['type'] ==
                              'Comptage gestionnnaire'].nb_veh).all()):
        pour_graph_synth = pour_graph_synth.loc[pour_graph_synth.type.isin(
            ['LAPI', 'Comptage gestionnnaire'])].copy()
        df_intervalle = pour_graph_synth.copy()
    else:
        df_intervalle = pour_graph_synth.loc[pour_graph_synth['type'].isin(
            ['LAPI', 'Comptage gestionnnaire recalé'])].copy()

    #pour legende
    lien_traf_gest_traf_lapi['legend_pct_transit'] = 'Pourcentage PL transit'
    lien_traf_gest_traf_lapi['legend_i_conf'] = 'Intervalle de confiance'
    line_trafic = alt.Chart(
        df_intervalle, title=titre_interv).mark_line().encode(
            x=alt.X('hoursminutes(heure)',
                    axis=alt.Axis(title='Heure',
                                  titleFontSize=14,
                                  labelFontSize=14)),
            y=alt.Y('nb_veh:Q',
                    axis=alt.Axis(title='Nombre de PL',
                                  titleFontSize=14,
                                  labelFontSize=14)),
            color=alt.Color('type',
                            sort=(['LAPI', 'Comptage gestionnnaire recale']),
                            legend=alt.Legend(title='source du nombre de PL',
                                              titleFontSize=14,
                                              labelFontSize=14,
                                              labelLimit=400)))
    area_pct_max = alt.Chart(lien_traf_gest_traf_lapi).mark_area(
        opacity=0.7, color='green').encode(
            x='hoursminutes(heure)',
            y=alt.Y('pct_pl_transit_max:Q',
                    axis=alt.Axis(title='Pourcentage de PL en transit',
                                  titleFontSize=14,
                                  labelFontSize=14,
                                  labelColor='green',
                                  titleColor='green'),
                    scale=alt.Scale(domain=(0, 100))),
            y2='pct_pl_transit_min:Q',
            opacity=alt.Opacity('legend_i_conf'))
    line_pct = alt.Chart(lien_traf_gest_traf_lapi).mark_line(
        color='green').encode(
            x='hoursminutes(heure)',
            y=alt.Y('pct_pl_transit',
                    axis=alt.Axis(title='Pourcentage de PL en transit',
                                  titleFontSize=14,
                                  labelFontSize=14,
                                  labelColor='green',
                                  titleColor='green'),
                    scale=alt.Scale(domain=(0, 100))),
            opacity=alt.Opacity('legend_pct_transit',
                                legend=alt.Legend(
                                    title='Analyse du transit LAPI',
                                    titleFontSize=14,
                                    labelFontSize=14)))
    pct = (area_pct_max + line_pct) if intervall_conf else line_pct
    graph_interval = (line_trafic + pct).resolve_scale(
        y='independent').properties(width=800,
                                    height=400).configure_title(fontSize=18)

    #graph comparaison nb_pl
    graph_nb_pl = alt.Chart(
        pour_graph_synth, title=titre_nb_pl).mark_line(opacity=0.7).encode(
            x=alt.X('hoursminutes(heure)',
                    axis=alt.Axis(title='Heure',
                                  titleFontSize=14,
                                  labelFontSize=14)),
            y=alt.Y('nb_veh:Q',
                    axis=alt.Axis(title='Nombre de PL',
                                  titleFontSize=14,
                                  labelFontSize=14)),
            color=alt.Color('type',
                            sort=[
                                'LAPI', 'Comptage gestionnnaire',
                                'Comptage gestionnnaire recale'
                            ],
                            title='source du nombre de PL',
                            legend=alt.Legend(
                                titleFontSize=14,
                                labelFontSize=14,
                                labelLimit=400))).properties(
                                    width=800,
                                    height=400).configure_title(fontSize=18)

    return graph_interval, graph_nb_pl
Пример #23
0
    #execute only if "mindate" exists
    if not mindate:
        st.error("Please select start date for plot range")
    else:
        #update plot ranges via filtering the table to dates newer than mindate only
        if dateupdate:
            df = data.loc[data['Date'] >= mindate]
        else:
            df = data.loc[data['Date'] >= datetime.datetime.strptime(
                '2020-01-01', '%Y-%m-%d')]

        #write a header line and dataframe for visualization
        st.write("AESO Historical Data", df)

        #test Altair charting with electricity price over time
        chart1 = (alt.Chart(df).mark_line(opacity=0.5).encode(
            x="Date:T", y=alt.Y("Electricity Price $/kwh:Q", stack=None)))

        # chart2 = (
        #     alt.Chart(df)
        #     .mark_line(opacity=0.5)
        #     .encode(
        #         x="Date:T",
        #         y=alt.Y("AIL Demand (MW):Q", stack=None)
        #     )
        # )

        #test plotly charting with AIL Demand over time
        chart2 = px.scatter(df, x='Date', y='AIL Demand (MW)')

        st.write('Alberta Electricity Price History')
        st.altair_chart(chart1, use_container_width=True)
Пример #24
0
def graph_PL_transit_dir_jo_cam(df_pct_pl_transit, *cam, coeff_uvp=3):
    """
    graph de synthese du nombre de pl en trasit par heure. Base nb pl dir et pct_pl_transit lapi
    en entree : 
        df_pct_pl_transit : df du pct de pl en transit, issu de resultat.pourcentage_pl_camera
        coeff_uvp : coefficient de conversion en UVP, cf resultat.PL_transit_dir_jo_cam
    en sortie : 
        graph : chart altair avec le nb pl, nb pl transit, %PL transit
    """
    #import donnees
    concat_dir_trafic, df_pct_pl_transit_multi_cam = PL_transit_dir_jo_cam(
        df_pct_pl_transit, coeff_uvp, cam)

    #creation du titre
    if [
            voie for voie, cams in dico_corrsp_camera_site.items()
            if cams == list(cam)
    ]:
        titre = f'Nombre de PL et % de PL en transit sur {[voie for voie, cams in dico_corrsp_camera_site.items() if cams==list(cam)][0]}'
    else:
        if len(cam) > 1:
            titre = f'Nombre de PL et % de PL en transit au droit des caméras {cam}'
        else:
            titre = f'Nombre de PL et % de PL en transit au droit de la caméra {cam[0]}'
    #ajout d'un attribut pour legende
    df_pct_pl_transit_multi_cam['legend'] = 'Pourcentage PL en transit'
    concat_dir_trafic = concat_dir_trafic.loc[concat_dir_trafic['type'].isin(
        ['Tous PL', 'PL en transit'])].copy()

    bar_nb_pl_dir = alt.Chart(
        concat_dir_trafic, title=titre).mark_bar(opacity=0.7).encode(
            x=alt.X('heure:O',
                    axis=alt.Axis(title='Heure',
                                  titleFontSize=14,
                                  labelFontSize=14)),
            y=alt.Y('nb_pl:Q',
                    stack=None,
                    axis=alt.Axis(title='Nombre de PL',
                                  titleFontSize=14,
                                  labelFontSize=14)),
            color=alt.Color('type',
                            legend=alt.Legend(title='Type de PL',
                                              titleFontSize=14,
                                              labelFontSize=14),
                            sort="descending"))
    line_pct_pl_lapi = alt.Chart(df_pct_pl_transit_multi_cam).mark_line(
        color='green').encode(x=alt.X('heure:O',
                                      axis=alt.Axis(title='Heure',
                                                    titleFontSize=14,
                                                    labelFontSize=14)),
                              y=alt.Y('pct_pl_transit',
                                      axis=alt.Axis(title='% PL en transit',
                                                    labelFontSize=14,
                                                    labelColor='green',
                                                    titleFontSize=14,
                                                    titleColor='green',
                                                    grid=False),
                                      scale=alt.Scale(domain=(0, 100))),
                              opacity=alt.Opacity('legend',
                                                  legend=alt.Legend(
                                                      title='Donnees LAPI',
                                                      titleFontSize=14,
                                                      labelFontSize=14,
                                                      labelLimit=300)))
    return (bar_nb_pl_dir + line_pct_pl_lapi).resolve_scale(
        y='independent').properties(width=800,
                                    height=400).configure_title(fontSize=18)
Пример #25
0
def main():
    ## sidebar
    data = load_data('total_data.pkl')
    countydf = pd.DataFrame(data.Combined_Key.str.split(',', 2).tolist(),
                            columns=['County', 'State',
                                     'Country']).drop('Country', axis=1)
    countydf = pd.DataFrame(
        countydf.groupby('State')['County'].apply(
            lambda x: x.values.tolist())).reset_index()

    st.sidebar.title("Navigation")
    page = st.sidebar.radio("Go to", ('Hybrid Model', 'Data Exploratory'))

    if page == 'Hybrid Model':
        '## County Level SIR Simulation Model'
        statesselected = st.selectbox("Select a County", countydf['State'])
        countylist = (countydf[countydf['State'] == statesselected]['County']
                      ).tolist()[0]
        countyselected = st.selectbox('Select a county for demo', countylist)

        name = countyselected + ', ' + statesselected.strip() + ', ' + 'US'

        df2 = data_cleaning(data, name)

        #data=data[data['Combined_Key']==name]
        #df2 = load_data('{}.pkl'.format(selected.lower()))

        #Model training
        train_df = df2[df2['Date'] < df2.Date.iloc[-7]]
        test_df = df2[(df2['Date'] > df2.Date.iloc[-7])
                      & (df2['Date'] < df2.Date.iloc[-1])]

        # initialize model
        #'## Training the Model'
        with st.spinner('Model Training in Progress...'):
            population = df2.Population[1]
            model = Train_Dynamic_SIR(epoch=5000,
                                      data=train_df,
                                      population=population,
                                      gamma=1 / 15,
                                      c=1,
                                      b=-10,
                                      a=0.08)

            # train the model
            estimate_df = model.train()

        "## Future Forecast"
        # initialize parameters for prediction
        population = model.population
        I0 = train_df['I'].iloc[-1]
        R0 = train_df['R'].iloc[-1]
        S0 = population - I0 - R0
        est_beta = model.beta
        est_alpha = model.a
        est_b = model.b
        est_c = model.c

        forecast_period = 21
        #forecast_period = st.slider("Choose the forecast period(days)", 5, 60,step =5, value=21)

        prediction = Predict_SIR(pred_period=forecast_period,
                                 S=S0,
                                 I=I0,
                                 R=R0,
                                 gamma=1 / 14,
                                 a=est_alpha,
                                 c=est_c,
                                 b=est_b,
                                 past_days=train_df['Day'].max())
        recent = len(df2)
        Date = df2['Date'][recent - 1]
        dfdate = df2[df2['Date'] == Date]

        #Calculating death rate
        N = dfdate.loc[dfdate['Date'] == Date, 'Population'].iloc[0]
        confirmed = dfdate.loc[dfdate['Date'] == Date, 'Confirmed'].iloc[0]
        deaths = round(
            ((dfdate.loc[dfdate['Date'] == Date, 'Deaths'].iloc[0]) /
             confirmed) * 100, 3)
        I0 = dfdate.loc[dfdate['Date'] == Date, 'I'].iloc[0]
        R0 = dfdate.loc[dfdate['Date'] == Date,
                        'R'].iloc[0] + dfdate.loc[dfdate['Date'] == Date,
                                                  'Deaths'].iloc[0]

        deaths = st.slider("Input a realistic death rate(%) ",
                           0.0,
                           30.0,
                           value=deaths)
        result = prediction.run(
            death_rate=deaths)  # death_rate is an assumption

        simulation_period = st.slider('Input Simulation period (days)',
                                      0,
                                      100,
                                      step=1,
                                      value=21)
        recovery_day = st.slider('Input recovery period (%)',
                                 1,
                                 28,
                                 step=1,
                                 value=14)

        #TEST
        betalist = model.show_betalist()
        minbeta = round(min(betalist), 2)
        maxbeta = round(max(betalist), 2)
        averagebeta = (minbeta + maxbeta) * 2

        beta = prediction.finalbeta()
        userbeta = round((100 - (beta * 100)), 2)
        userbeta = st.slider('Input Social distancing factor (%)',
                             0.00,
                             100.00,
                             step=0.01,
                             value=userbeta)

        #NEW CALCULATION
        maxlimit = (maxbeta * 1.1) - (minbeta * 0.9) / averagebeta
        D = (maxbeta * 1.1) - (minbeta * 0.9) / (100 * averagebeta)
        defaultbeta = (maxbeta * 1.1) / (D * averagebeta)
        #socialdist=st.slider('New change Social distancing',D,maxlimit,step = 0.01,value =defaultbeta)

        gamma = 1 / recovery_day

        beta = (100 - userbeta) / 100
        st.subheader('SIR simulation for chosen Date '.format(
            df2['Date'].dt.date[recent - 1]))
        st.write(dfdate[[
            'Date', 'Population', 'Confirmed', 'Recovered', 'Deaths', 'Active'
        ]])

        st.write('Curent value of (Beta) Social distancing factor : ',
                 userbeta)
        st.write('Current Death rate is : ', deaths)

        rr = round(beta / gamma, 3)
        st.write('Effective reproduction number(R0) (%): ', rr)

        S0 = N - I0 - R0
        t = np.linspace(0, simulation_period, 500)

        # The SIR model differential equations.
        def deriv(y, t, N, beta, gamma):
            S, I, R = y
            dSdt = -beta * S * I / N
            dIdt = beta * S * I / N - gamma * I
            dRdt = gamma * I
            return dSdt, dIdt, dRdt

        # Initial conditions vector
        y0 = S0, I0, R0
        # Integrate the SIR equations over the time grid, t.
        ret = odeint(deriv, y0, t, args=(N, beta, gamma))
        S, I, R = ret.T

        #plotting_SIR_Simulation(S, I, R ,N,t,simulation_period,deaths)
        plotting_SIR_Susceptible(S, I, R, N, t, simulation_period)
        plotting_SIR_Infection(S, I, R, N, t, simulation_period)
        plotting_SIR_Recovery(S, I, R, N, t, simulation_period)
        #plotting_SIR_IR(S, I, R ,N,t,simulation_period)

    else:
        st.title('Explore County Level Data ')
        # load data
        statesselected = st.selectbox("Select a County", countydf['State'])
        countylist = (countydf[countydf['State'] == statesselected]['County']
                      ).tolist()[0]
        countyselected = st.selectbox('Select a county for demo', countylist)
        name = countyselected + ', ' + statesselected.strip() + ', ' + 'US'

        df = data_cleaning(data, name)

        # drawing
        base = alt.Chart(df).mark_bar().encode(
            x='monthdate(Date):O', ).properties(width=500)

        red = alt.value('#f54242')
        a = base.encode(y='Confirmed').properties(title='Total Confirmed')
        st.altair_chart(a, use_container_width=True)

        b = base.encode(y='Deaths', color=red).properties(title='Total Deaths')
        st.altair_chart(b, use_container_width=True)

        c = base.encode(y='New Cases').properties(title='Daily New Cases')
        st.altair_chart(c, use_container_width=True)

        d = base.encode(y='New deaths',
                        color=red).properties(title='Daily New Deaths')
        st.altair_chart(d, use_container_width=True)

        dates = df['Date'].dt.date.unique()

        selected_date = st.selectbox('Select a Date to Start', (dates))
        forecastdf = df[df['Date'].dt.date >= selected_date]

        if st.checkbox('Show Raw Data'):
            st.write(forecastdf)

        if st.checkbox('Visualization Chart'):
            df_temp = forecastdf.rename(columns={
                'I': 'Active Infection Cases',
                'R': 'Recovered Cases'
            })
            e = pd.melt(
                frame=df_temp,
                id_vars='Date',
                value_vars=['Active Infection Cases', 'Recovered Cases'],
                var_name='type',
                value_name='count')

            e = alt.Chart(e).mark_area().encode(
                x=alt.X('Date:T', title='Date'),
                y=alt.Y('count:Q', title='Number of Cases'),
                color=alt.Color(
                    'type:O', legend=alt.Legend(
                        title=None,
                        orient='top-left'))).configure_axis(grid=False)

            st.altair_chart(e, use_container_width=True)

    st.title("About")
    st.info("This app uses JHU data available in [Github]"
            "(https://github.com/CSSEGISandData/COVID-19) repository.\n\n")
Пример #26
0
def graph_TV_jo_cam(df_pct_pl_transit, uvp, coeff_uvp, *cam):
    """
    graph de synthese du nombre de pl en trasit, TV et pl totaux par heure. Base nb pl dir et pct_pl_transit lapi
    en entree : 
        df_pct_pl_transit : df du nb de vehicules, issus de resultat.pourcentage_pl_camera
        uvp : booleen :  si on veut le graph ne UVP ou non
        coeff_uvp : float : coefficientd'equivalence PL- UVP. utilse si uvp=True, sinon eu importe mais doit exister
        cam : integer : les cameras concernees
    en sortie : 
        bar_nb_pl_dir : chart altair avec le nb pl, nb pl transit, tv
    """
    concat_dir_trafic = PL_transit_dir_jo_cam(df_pct_pl_transit, coeff_uvp,
                                              cam)[0]
    #creation du titre
    if [
            voie for voie, cams in dico_corrsp_camera_site.items()
            if cams == list(cam)
    ]:
        titre = f'Nombre de véhicules sur {[voie for voie, cams in dico_corrsp_camera_site.items() if cams==list(cam)][0]}'
    else:
        if len(cam) > 1:
            titre = f'Nombre de véhicules au droit des caméras {cam}'
        else:
            titre = f'Nombre de véhicules au droit de la caméra {cam[0]}'

    if not uvp:
        concat_dir_trafic = concat_dir_trafic.loc[concat_dir_trafic.type.isin(
            ['Tous PL', 'PL en transit', 'Tous Vehicules'])].copy()
        bar_nb_pl_dir = alt.Chart(
            concat_dir_trafic, title=titre).mark_bar().encode(
                x=alt.X('heure:O',
                        axis=alt.Axis(title='Heure',
                                      titleFontSize=14,
                                      labelFontSize=14)),
                y=alt.Y('nb_pl:Q',
                        stack=None,
                        axis=alt.Axis(title='Nombre de vehicules',
                                      titleFontSize=14,
                                      labelFontSize=14)),
                color=alt.Color(
                    'type',
                    sort=['Tous vehicules', 'Tous PL', 'PL en transit'],
                    legend=alt.Legend(title='Type de vehicules',
                                      titleFontSize=14,
                                      labelFontSize=14)),
                order=alt.Order('type', sort='descending')).properties(
                    width=800, height=400).configure_title(fontSize=18)
    else:
        concat_dir_trafic = concat_dir_trafic.loc[concat_dir_trafic.type.isin(
            ['UVP Tous PL', 'UVP PL en transit',
             'UVP Tous Vehicules'])].copy()
        bar_nb_pl_dir = alt.Chart(
            concat_dir_trafic, title=titre).mark_bar().encode(
                x=alt.X('heure:O',
                        axis=alt.Axis(title='Heure',
                                      titleFontSize=14,
                                      labelFontSize=14)),
                y=alt.Y('nb_pl:Q',
                        stack=None,
                        axis=alt.Axis(title='Nombre de vehicules',
                                      titleFontSize=14,
                                      labelFontSize=14)),
                color=alt.Color('type',
                                sort=[
                                    'UVP Tous vehicules', 'UVP Tous PL',
                                    'UVP PL en transit'
                                ],
                                legend=alt.Legend(title='Type de vehicules',
                                                  titleFontSize=14,
                                                  labelFontSize=14)),
                order=alt.Order('type', sort='descending')).properties(
                    width=800, height=400).configure_title(fontSize=18)
    return bar_nb_pl_dir
train_dataset = tf.data.experimental.make_csv_dataset(
    train_dataset_fp,
    batch_size,
    column_names=column_names,
    label_name=label_name,
    num_epochs=1,
)
# -
features, labels = next(iter(train_dataset))
print(features)
# -
df = pd.DataFrame(features)
df["label"] = labels
alt.Chart(df).mark_circle().encode(x="petal_length",
                                   y="sepal_length",
                                   color="label:N").properties(width=200,
                                                               height=150)


# -
def pack_features_vector(features, labels):
    """Pack the features into a single array."""
    features = tf.stack(list(features.values()), axis=1)
    return features, labels


train_dataset = train_dataset.map(pack_features_vector)
features, labels = next(iter(train_dataset))
print(features[:5])

# ## Select the type of model
Пример #28
0
    NORM = st.radio('', ('Compare', 'Normalise'))
    for ticker in ticker_list:
        temp_df = pd.DataFrame(columns=['symbol', 'date', 'price'])
        temp_df['symbol'] = [ticker] * prices[ticker].shape[0]
        temp_df['date'] = list(prices[ticker].index)
        temp_df['date'] = pd.to_datetime(temp_df['date'], format='%Y-%m-%d')
        if NORM == 'Normalise':
            temp_df['price'] = list(prices[ticker]['Adjusted_close'] * 100 /
                                    prices[ticker]['Adjusted_close'].iloc[0])
        else:
            temp_df['price'] = list(prices[ticker]['Adjusted_close'])
        price_frame = price_frame.append(temp_df)

    #show multiple securities in one chart
    chart = at.Chart(price_frame).mark_line().encode(at.X('date:T'),
                                                     at.Y('price:Q'),
                                                     color='symbol')
    st.write(chart.configure_view(continuousHeight=400, continuousWidth=750))

    #create excel friendly format
    if st.button('Create Excel'):
        excel_df = pd.DataFrame(index=prices[ticker_list[0]].index)
        for ticker in ticker_list:
            temp_df = pd.DataFrame(index=prices[ticker].index)
            if NORM == 'Normalise':
                temp_df[ticker] = list(
                    prices[ticker]['Adjusted_close'] * 100 /
                    prices[ticker]['Adjusted_close'].iloc[0])
            else:
                temp_df[ticker] = list(prices[ticker]['Adjusted_close'])
            excel_df = pd.merge(excel_df,
Пример #29
0
Footnotes are best placed right after the paragraph first used.[^footnote]

[^footnote]: But you can also put them at the end of the document.

'''
)

st.title("My awesome interactive graph")

#altair example interactive chart
source = source[source["Origin"].isin(filtered)]
brush = alt.selection(type='interval')

points = alt.Chart(source).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color=alt.condition(brush, 'Origin', alt.value('lightgray'))
).add_selection(
    brush
)

bars = alt.Chart(source).mark_bar().encode(
    y='Origin',
    color='Origin',
    x='count(Origin)'
).transform_filter(
    brush
)

points & bars
Пример #30
0
def vline(chart: alt.Chart, value: float) -> alt.Chart:
    """Draw a vertical line on an Altair chart object."""
    line = alt.Chart().mark_rule(color="grey").encode(x="val:Q")
    return _add_line(chart, value, line)