import altair as alt import altair_viewer driver = GraphDatabase.driver("bolt://localhost", auth=("neo4j", "neo")) with driver.session(database="foo") as session: result = session.run(""" MATCH (p:Place) RETURN p.name AS place, p.node2vecMoreTraining AS embedding, p.community AS community, p.country AS country """) X = {row["place"]: {"embedding": row["embedding"], "community": row["community"], "country": row["country"]} for row in result} X_embedded = TSNE(n_components=2, random_state=6).fit_transform([X[key]["embedding"] for key in X.keys()]) places = list(X.keys()) df = pd.DataFrame(data = { "place": places, "community": [f"Community-{X[place]['community']}" for place in places], "country": [X[place]['country'] for place in places], "x": [value[0] for value in list(X_embedded)], "y": [value[1] for value in list(X_embedded)] }) chart = alt.Chart(df).mark_circle(size=60).encode( x='x', y='y', color='community', tooltip=['place', 'community', 'country'] ).properties(width=800, height=500) altair_viewer.display(chart)
def plot_reported_cases(county='Alle_Fylker', periodicity='day', start=None, end=None, plot_lib='Altair', view_plot=False, plot_title=False, width=800, height=500): """ Function to generate a bar-plot for Covid19's total daily cases from FHI/Norway. Parameters: ~~~~~~~~~~~~~~~~~~~~ :param county: str: Name od the county to be analyzed, which could be 'Agder', 'Innlandet', 'More_og_Romsdal', 'Nordland', 'Oslo', 'Rogaland', 'Troms_og_Finnmark', Trondelag', 'Vestfold_og_Telemark', 'Vestland', 'Viken'or 'Alle_Fylker' for all counties. :param periodicity: str: Periodicity to display the plot, which could be 'day' or 'week'. Default 'day'. :param start: str or date object: The starting date for the analysis. :param end: str or date object: The ending date for the analysis. :param plot_lib: str: The name of the plotting library to be used. Default is 'Altair' which uses Altair. A option is 'plt' which uses matplotlib. :param view_plot: bool: The default is False to not show the plot. :param plot_title: bool: The default is False to not display the plot's title. :param width: int: Only used for 'Altair' plot. Define the width of the plotting. Default is 800. :param height: int: Only used for 'Altair' plot. Define the height of the plotting. Default is 500. Notes: ~~~~~~~~~~~~~~~~~~~~ - If start and end are None, them it will plot the entire data-base. """ # setting fixed parameters col = 'Nye tilfeller' title = 'Number of reported cases' # setting prefix and suffix for the data-files path = 'data/' if periodicity == 'day': prefix = path + 'antall-meldte-covid-19-t_day_' elif periodicity == 'week': prefix = path + 'antall-meldte-covid-19-t_week_' else: raise ValueError("Error: The given periodicity is not valid.") suffix = '.csv' # reading data as DataFrame df = pd.read_csv(filepath_or_buffer=prefix + county + suffix, sep=',', index_col='Dato') # setting data range df = df[start:end] # creating Altair's plotting if plot_lib == 'Altair': # fixing datetime for Altair df.reset_index(inplace=True) if periodicity == 'day': df['Dato'] = pd.to_datetime(df["Dato"], dayfirst=True) # plotting bar and lines in dual-axis chart = alt.Chart(df).mark_bar().encode(alt.X('Dato:T'), alt.Y(col + ':Q', title=title), tooltip=['Dato', col]) elif periodicity == 'week': # plotting bar and lines in dual-axis chart = alt.Chart(df).mark_bar().encode(alt.X( 'Dato', scale=alt.Scale(zero=False)), alt.Y(col + ':Q', title=title), tooltip=['Dato', col]) # plotting chart = chart.properties( width=width, height=height, title=f'Data Vs ({title}) for {county}').interactive() # viewing the plot if view_plot is True: # altair_viewer.show(chart) altair_viewer.display(chart) return chart # creating Matplotlib plot elif plot_lib == 'plt': # plotting using matplotlib plt.bar(x=df.index, height=df[col]) # plotting set-up if plot_title is True: plt.title(f'Data Vs {title} for {county}') plt.xlabel('Date') plt.ylabel(f'{title}') plt.xticks(rotation='vertical') # viewing plot if view_plot is True: plt.show() else: raise ValueError(f"Error: Plot type {plot_lib} not implemented.")
data = data_new.copy() mode = 'rf' # assign to variables X = data.drop(columns=['class']) y = data['class'] # encoding: Label encoding for binary class, one-hot encoding for the nominal variables y = sklearn.preprocessing.LabelEncoder().fit_transform(y) X = pd.get_dummies(X) log_reg = sklearn.linear_model.LogisticRegression(max_iter=10000) lda = sklearn.discriminant_analysis.LinearDiscriminantAnalysis() gnb = sklearn.naive_bayes.GaussianNB() models = [log_reg, lda, gnb] X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( X, y, test_size=0.2, random_state=1) # threshold charts threshold_chart = get_score_threshold_chart(X_train, X_test, y_train, y_test, 'nb', 'F2') # roc charts # chart_secondary = get_roc_chart(X_train, X_test, y_train, y_test, mode) # chart_original = get_roc_chart(X_train, X_test, y_train, y_test, mode) """ comment/uncomment depending on the chart that shall be displayed""" chart = get_roc_chart(X_train, X_test, y_train, y_test, mode) chart.save('roc.html') view.display(chart) #view.display(get_balance_chart(data_new, title='Balance plot for secondary data', reindex=['p', 'e'])) #view.display(get_balance_chart(data_original, title='Balance plot for 1987 data'))#, count=False, reindex=['p', 'e'])) #view.display(get_correlation_chart(data_new, show_progress=True)) #view.display(get_correlation_chart(data_original, show_progress=True)) #view.display(threshold_chart)
def candlestick(source, width=800, height=500, view=True, lines=None): """ Function to generate a interactive candlestick chart for visualization of the time series of a financial source. Parameters: =================== :param source: pd.DataFrame: Time series DataFrame containing OLHCV values. :param width: int: The width of the chart. :param height: int: The height of the chart. :param view: bool: If True, it will return a URL to visualize the chart. :param lines: dict: Containing as keys the name of the columns and as values the colors of the lines. Return: =================== The function returns a URL where the interactive chart will be displayed. """ source.reset_index(inplace=True) # defining colors for the candlesticks open_close_color = alt.condition("datum.open <= datum.close", alt.value("#06982d"), alt.value("#ae1325")) # creating the base for the candlestick's chart base = alt.Chart(source).encode( alt.X('index:T', axis=alt.Axis(format='%Y/%m/%d', labelAngle=-90, title='Dates')), color=open_close_color, ) # creating a line for highest and lowest rule = base.mark_rule().encode( alt.Y( 'low:Q', scale=alt.Scale(zero=False), axis=alt.Axis(title='Prices', orient='right'), ), alt.Y2('high:Q')) # creating the candlestick's bars bar = base.mark_bar().encode(alt.Y('open:Q'), alt.Y2('close:Q')) # joining OLHC together chart = rule + bar # drawing line # !!!!!!!!!! need to fix the problem with the colors if lines is not None: for k, v in lines.items(): chart += base.mark_line(color=v, opacity=0.3).encode(y=alt.Y(k)) # adding tooltips, properties and interaction chart = chart.encode(tooltip=[ alt.Tooltip('index:T', title='Date'), alt.Tooltip('open', title='Open'), alt.Tooltip('low', title='Low'), alt.Tooltip('high', title='High'), alt.Tooltip('close', title='Close'), alt.Tooltip('volume', title='Volume') ]).properties(width=width, height=height, title=f'Candlestick visualization').interactive() # creating x-axis selections # !!!!!!!!!! it is jumping bar - fix later nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['index'], empty='none') # drawing a vertical rule at the location of the selection v_rule = alt.Chart(source).mark_rule(color='gray').encode( x='index:T', ).transform_filter(nearest) # adding nearest selection on candlestick's chart chart = chart.add_selection(nearest) # ########## if view is True: # altair_viewer.show(chart) altair_viewer.display(chart + v_rule)