Exemplo n.º 1
0
import altair as alt
import altair_viewer

driver = GraphDatabase.driver("bolt://localhost", auth=("neo4j", "neo"))
with driver.session(database="foo") as session:
    result = session.run("""
    MATCH (p:Place)
    RETURN p.name AS place, p.node2vecMoreTraining AS embedding, p.community AS community, p.country AS country
    """)
    X = {row["place"]: {"embedding": row["embedding"], "community": row["community"], "country": row["country"]} for row in result}

X_embedded = TSNE(n_components=2, random_state=6).fit_transform([X[key]["embedding"] for key in X.keys()])

places = list(X.keys())
df = pd.DataFrame(data = {
    "place": places,
    "community": [f"Community-{X[place]['community']}" for place in places],
    "country": [X[place]['country'] for place in places],
    "x": [value[0] for value in list(X_embedded)],
    "y": [value[1] for value in list(X_embedded)]
})

chart = alt.Chart(df).mark_circle(size=60).encode(
    x='x',
    y='y',
    color='community',
    tooltip=['place', 'community', 'country']
).properties(width=800, height=500)

altair_viewer.display(chart)
Exemplo n.º 2
0
def plot_reported_cases(county='Alle_Fylker',
                        periodicity='day',
                        start=None,
                        end=None,
                        plot_lib='Altair',
                        view_plot=False,
                        plot_title=False,
                        width=800,
                        height=500):
    """
    Function to generate a bar-plot for Covid19's total daily cases from
    FHI/Norway.

    Parameters:
    ~~~~~~~~~~~~~~~~~~~~
    :param county: str: Name od the county to be analyzed, which could
                        be  'Agder', 'Innlandet', 'More_og_Romsdal',
                        'Nordland', 'Oslo', 'Rogaland',
                        'Troms_og_Finnmark', Trondelag',
                        'Vestfold_og_Telemark', 'Vestland',
                        'Viken'or 'Alle_Fylker' for all counties.
    :param periodicity: str: Periodicity to display the plot, which could
                             be 'day' or 'week'. Default 'day'.
    :param start: str or date object: The starting date for the analysis.
    :param end: str or date object: The ending date for the analysis.
    :param plot_lib: str: The name of the plotting library to be used.
                          Default is 'Altair' which uses Altair. A option
                          is 'plt' which uses matplotlib.
    :param view_plot: bool: The default is False to not show the plot.
    :param plot_title: bool: The default is False to not display the
                             plot's title.
    :param width: int: Only used for 'Altair' plot. Define the width of
                       the plotting. Default is 800.
    :param height: int: Only used for 'Altair' plot. Define the height of
                       the plotting. Default is 500.

    Notes:
    ~~~~~~~~~~~~~~~~~~~~
    - If start and end are None, them it will plot the entire data-base.
    """
    # setting fixed parameters
    col = 'Nye tilfeller'
    title = 'Number of reported cases'

    # setting prefix and suffix for the data-files
    path = 'data/'
    if periodicity == 'day':
        prefix = path + 'antall-meldte-covid-19-t_day_'

    elif periodicity == 'week':
        prefix = path + 'antall-meldte-covid-19-t_week_'

    else:
        raise ValueError("Error: The given periodicity is not valid.")

    suffix = '.csv'

    # reading data as DataFrame
    df = pd.read_csv(filepath_or_buffer=prefix + county + suffix,
                     sep=',',
                     index_col='Dato')

    # setting data range
    df = df[start:end]

    # creating Altair's plotting
    if plot_lib == 'Altair':

        # fixing datetime for Altair
        df.reset_index(inplace=True)

        if periodicity == 'day':
            df['Dato'] = pd.to_datetime(df["Dato"], dayfirst=True)

            # plotting bar and lines in dual-axis
            chart = alt.Chart(df).mark_bar().encode(alt.X('Dato:T'),
                                                    alt.Y(col + ':Q',
                                                          title=title),
                                                    tooltip=['Dato', col])

        elif periodicity == 'week':
            # plotting bar and lines in dual-axis
            chart = alt.Chart(df).mark_bar().encode(alt.X(
                'Dato', scale=alt.Scale(zero=False)),
                                                    alt.Y(col + ':Q',
                                                          title=title),
                                                    tooltip=['Dato', col])

        # plotting
        chart = chart.properties(
            width=width,
            height=height,
            title=f'Data Vs ({title}) for {county}').interactive()

        # viewing the plot
        if view_plot is True:
            # altair_viewer.show(chart)
            altair_viewer.display(chart)

        return chart

    # creating Matplotlib plot
    elif plot_lib == 'plt':

        # plotting using matplotlib
        plt.bar(x=df.index, height=df[col])

        # plotting set-up
        if plot_title is True:
            plt.title(f'Data Vs {title} for {county}')

        plt.xlabel('Date')
        plt.ylabel(f'{title}')
        plt.xticks(rotation='vertical')

        # viewing plot
        if view_plot is True:
            plt.show()

    else:
        raise ValueError(f"Error: Plot type {plot_lib} not implemented.")
Exemplo n.º 3
0
    data = data_new.copy()
    mode = 'rf'
    # assign to variables
    X = data.drop(columns=['class'])
    y = data['class']
    # encoding: Label encoding for binary class, one-hot encoding for the nominal variables
    y = sklearn.preprocessing.LabelEncoder().fit_transform(y)
    X = pd.get_dummies(X)
    log_reg = sklearn.linear_model.LogisticRegression(max_iter=10000)
    lda = sklearn.discriminant_analysis.LinearDiscriminantAnalysis()
    gnb = sklearn.naive_bayes.GaussianNB()
    models = [log_reg, lda, gnb]
    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
        X, y, test_size=0.2, random_state=1)

    # threshold charts
    threshold_chart = get_score_threshold_chart(X_train, X_test, y_train,
                                                y_test, 'nb', 'F2')

    # roc charts
    # chart_secondary = get_roc_chart(X_train, X_test, y_train, y_test, mode)
    # chart_original = get_roc_chart(X_train, X_test, y_train, y_test, mode)
    """ comment/uncomment depending on the chart that shall be displayed"""
    chart = get_roc_chart(X_train, X_test, y_train, y_test, mode)
    chart.save('roc.html')
    view.display(chart)
    #view.display(get_balance_chart(data_new, title='Balance plot for secondary data', reindex=['p', 'e']))
    #view.display(get_balance_chart(data_original, title='Balance plot for 1987 data'))#, count=False, reindex=['p', 'e']))
    #view.display(get_correlation_chart(data_new, show_progress=True))
    #view.display(get_correlation_chart(data_original, show_progress=True))
    #view.display(threshold_chart)
def candlestick(source, width=800, height=500, view=True, lines=None):
    """
    Function to generate a interactive candlestick chart for visualization of
    the time series of a financial source.

    Parameters:
    ===================
    :param source: pd.DataFrame: Time series DataFrame containing
                                 OLHCV values.
    :param width: int: The width of the chart.
    :param height: int: The height of the chart.
    :param view: bool: If True, it will return a URL to visualize the chart.
    :param lines: dict: Containing as keys the name of the columns and as
                        values the colors of the lines.

    Return:
    ===================
    The function returns a URL where the interactive chart will be displayed.
    """
    source.reset_index(inplace=True)

    # defining colors for the candlesticks
    open_close_color = alt.condition("datum.open <= datum.close",
                                     alt.value("#06982d"),
                                     alt.value("#ae1325"))

    # creating the base for the candlestick's chart
    base = alt.Chart(source).encode(
        alt.X('index:T',
              axis=alt.Axis(format='%Y/%m/%d', labelAngle=-90, title='Dates')),
        color=open_close_color,
    )

    # creating a line for highest and lowest
    rule = base.mark_rule().encode(
        alt.Y(
            'low:Q',
            scale=alt.Scale(zero=False),
            axis=alt.Axis(title='Prices', orient='right'),
        ), alt.Y2('high:Q'))

    # creating the candlestick's bars
    bar = base.mark_bar().encode(alt.Y('open:Q'), alt.Y2('close:Q'))

    # joining OLHC together
    chart = rule + bar

    # drawing line
    # !!!!!!!!!! need to fix the problem with the colors
    if lines is not None:
        for k, v in lines.items():
            chart += base.mark_line(color=v, opacity=0.3).encode(y=alt.Y(k))

    # adding tooltips, properties and interaction
    chart = chart.encode(tooltip=[
        alt.Tooltip('index:T', title='Date'),
        alt.Tooltip('open', title='Open'),
        alt.Tooltip('low', title='Low'),
        alt.Tooltip('high', title='High'),
        alt.Tooltip('close', title='Close'),
        alt.Tooltip('volume', title='Volume')
    ]).properties(width=width,
                  height=height,
                  title=f'Candlestick visualization').interactive()

    # creating x-axis selections
    # !!!!!!!!!! it is jumping bar - fix later
    nearest = alt.selection(type='single',
                            nearest=True,
                            on='mouseover',
                            fields=['index'],
                            empty='none')

    # drawing a vertical rule at the location of the selection
    v_rule = alt.Chart(source).mark_rule(color='gray').encode(
        x='index:T', ).transform_filter(nearest)

    # adding nearest selection on candlestick's chart
    chart = chart.add_selection(nearest)
    # ##########

    if view is True:
        # altair_viewer.show(chart)
        altair_viewer.display(chart + v_rule)