Exemplo n.º 1
0
def return_figures():
    """Creates four plotly visualizations

  Args:
    None

    Returns:
      list (dict): list containing the four plotly visualizations

  """

    x0 = data.loc[data.target == 0, 'age']
    x1 = data.loc[data.target == 1, 'age']

    group_labels = ["Heart Disease", "No Heart Disease"]
    fig_one = ff.create_distplot([x0, x1],
                                 group_labels,
                                 bin_size=3,
                                 show_rug=False)
    fig_one.update_traces(opacity=0.55)
    fig_one.update_layout(
        title='Probability Density of Age versus Presence of Heart Disease',
        xaxis=dict(title='Age'),
        yaxis=dict(title='Probability Density'))

    fig_two = plotly_corr_heatmap(data, show_diagonal=False)
    fig_two.update_layout(title="Features Correlation Matrix")

    # third chart plots percent of population that is rural from 1990 to 2015
    proportions = data.groupby(['target', 'sex']).size().reset_index()
    proportions.columns = ['target', 'sex', 'number']
    proportions['totalsex'] = proportions.groupby('sex').number.transform(
        'sum')
    proportions['proportion'] = proportions['number'] / proportions['totalsex']
    # turn it into a categorical variable

    proportions['sex'] = proportions['sex'].map(sex_map)
    proportions['target'] = proportions['target'].map(target_map)

    fig_three = px.bar(proportions,
                       x='sex',
                       y='proportion',
                       color='target',
                       barmode='group')
    fig_three.update_layout(title='Proportion of Sex with Heart Disease')

    # fourth chart shows rural population vs arable land
    fig_four = px.scatter_3d(data.loc[data.sex == 1],
                             x="chol",
                             y="trestbps",
                             z="thalach",
                             color="target_name")
    fig_four.update_layout(
        showlegend=False,
        title=
        'Max Heart Rate Achieved (Thalach), Cholestoral, and Resting Blood Pressure (trestbps) in Males'
    )
    fig_four.update_traces(opacity=0.75, marker=dict(size=5))

    scatter_columns = list(data.drop('target', axis=1).columns)
    fig_five = px.scatter_matrix(
        data,
        dimensions=scatter_columns,
        color='target_name',
        symbol='target_name',
        title='Scatter matrix for Heart Disease Dataset')
    fig_five.update_traces(diagonal_visible=False, opacity=0.15)
    # fig_five.update_layout(xaxis=dict(shoticklabels=False))

    # append all charts to the figures list
    figures = []
    #figures.append(dict(data=graph_one, layout=layout_one))
    figures.append(fig_one)
    figures.append(fig_two)
    figures.append(fig_three)
    figures.append(fig_four)
    #figures.append(dict(data=graph_two, layout=layout_two))
    #figures.append(dict(data=graph_three, layout=layout_three))
    #figures.append(dict(data=graph_four, layout=layout_four))
    #figures.append(dict(data=graph_five, layout=layout_five))
    figures.append(fig_five)

    return figures
Exemplo n.º 2
0
from sklearn.manifold import TSNE
import plotly.express as px

df = px.data.iris()

features = df.loc[:, :'petal_width']

tsne = TSNE(n_components=3, random_state=0)
projections = tsne.fit_transform(features, )

fig = px.scatter_3d(projections,
                    x=0,
                    y=1,
                    z=2,
                    color=df.species,
                    labels={'color': 'species'})
fig.update_traces(marker_size=8)
fig.write_html("./tsne_example.html")
#plt.plot(ref_points[:, 0], ref_points[:, 1], 'o', color=my_gray, markersize=10, markeredgecolor='none',
#         label='reference points' if i == 0 else '')
# draw dashed lines between reference and transformed points
#for i in range(n):
#    plt.plot([ref_points[i, 0], tsr_points[i, 0]], [ref_points[i, 1], tsr_points[i, 1]], '--', color=colors[i])
#plt.legend(numpoints=1)
#plt.savefig('pointset_registration_2.png', format='png')

# compute the affine transform from the point set
translation, transformation = compute_affine_transform(ref_points, tsr_points)
invt = np.linalg.inv(transformation)
offset = -np.dot(invt, translation)
ref_centroid = np.mean(ref_points, axis=0)
tsr_centroid = np.mean(tsr_points, axis=0)
new_points = np.empty_like(ref_points)
for i in range(n):
    new_points[i] = ref_centroid + np.dot(transformation,
                                          tsr_points[i] - tsr_centroid)
    print(
        'point %d will move to (%3.1f, %3.1f, %3.1f) to be compared with (%3.1f, %3.1f, %3.1f)'
        % (i, new_points[i, 0], new_points[i, 1], new_points[i, 2],
           ref_points[i, 0], ref_points[i, 1], ref_points[i, 2]))
    #plt.plot(new_points[i, 0], new_points[i, 1], 'x', color=colors[i], markersize=12,
    #         label='new points' if i == 0 else '')
#plt.legend(numpoints=1)
#plt.savefig('pointset_registration_3.png', format='png')
#plt.show()
df = pd.DataFrame(new_points, columns=['x', 'y', 'section'])
fig = px.scatter_3d(df, x='x', y='y', z='section', color='section')
fig.show()
Exemplo n.º 4
0
df['Label'].unique()
#df['Label']
names_numeric = df['Label'].unique()
print(names_numeric)

df_labels = df[['Label']]
df = df.drop(['Label'], axis=1)
y = df_labels.values

from sklearn import preprocessing
X = preprocessing.scale(df)
print(df)

from sklearn.decomposition import PCA
pca = PCA(n_components=3)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data=principalComponents,
                           columns=['pc1', 'pc2', 'pc3'])
x = principalDf.values
print(pca.explained_variance_ratio_)

final = np.column_stack((x, y))
final_df = pd.DataFrame(final, columns=['pc1', 'pc2', 'pc3', 'labels'])
print(names)
print(names_numeric)

import plotly.express as px
import plotly
fig = px.scatter_3d(final_df, x='pc1', y='pc2', z='pc3', color='labels')
plotly.offline.plot(fig, "3d.html")
Exemplo n.º 5
0
def build_plot(is_anim, plot_type, df, progress=None, **kwargs) -> dict:

    params = dict(**kwargs)
    for k, v in params.items():
        if v == amp_consts.NONE_SELECTED:
            params[k] = filter_none(params[k])
    num_columns = df.select_dtypes(include=[np.number]).columns.to_list()

    if is_anim:
        time_column = params.pop("time_column", "")
        if (
            time_column
            in df.select_dtypes(
                include=[np.datetime64, "datetime", "datetime64", "datetime64[ns, UTC]"]
            ).columns.to_list()
        ):
            df["time_step"] = df[time_column].dt.strftime("%Y/%m/%d %H:%M:%S")
            afc = "time_step"
        else:
            afc = time_column
        params["animation_frame"] = afc
        df = df.sort_values([afc])
        if plot_type not in [
            amp_consts.PLOT_PCA_3D,
            amp_consts.PLOT_PCA_2D,
            amp_consts.PLOT_PCA_SCATTER,
            amp_consts.PLOT_LDA_2D,
            amp_consts.PLOT_QDA_2D,
            amp_consts.PLOT_NCA,
        ]:
            x = params.get("x")
            params["range_x"] = (
                None if x not in num_columns else [df[x].min(), df[x].max()]
            )
            y = params.get("y")
            params["range_y"] = (
                None if y not in num_columns else [df[y].min(), df[y].max()]
            )
            if plot_type in [amp_consts.PLOT_SCATTER_3D, amp_consts.PLOT_PCA_3D]:
                z = params.get("z")
                params["range_z"] = (
                    None if z not in num_columns else [df[z].min(), df[z].max()]
                )

    params["data_frame"] = df

    fig = None
    model_data = None
    column_names = None
    class_names = None

    if plot_type == amp_consts.PLOT_SCATTER:
        fig = px.scatter(**params)
    elif plot_type == amp_consts.PLOT_SCATTER_3D:
        fig = px.scatter_3d(**params)
    elif plot_type == amp_consts.PLOT_LINE:
        fig = px.line(**params)
    elif plot_type == amp_consts.PLOT_BAR:
        fig = px.bar(**params)
    elif plot_type == amp_consts.PLOT_HISTOGRAM:
        if "orientation" in params and params.get("orientation") == "h":
            params["x"], params["y"] = None, params["x"]
        fig = px.histogram(**params)
    elif plot_type == amp_consts.PLOT_BOX:
        fig = px.box(**params)
    elif plot_type == amp_consts.PLOT_VIOLIN:
        fig = px.violin(**params)
    elif plot_type == amp_consts.PLOT_DENSITY_HEATMAP:
        fig = px.density_heatmap(**params)
    elif plot_type == amp_consts.PLOT_DENSITY_CONTOUR:
        fc = params.pop("fill_contours") is True
        fig = px.density_contour(**params)
        if fc:
            fig.update_traces(contours_coloring="fill", contours_showlabels=True)
    elif plot_type == amp_consts.PLOT_PARALLEL_CATEGORIES:
        fig = px.parallel_categories(**params)
    elif plot_type == amp_consts.PLOT_PARALLEL_COORDINATES:
        fig = px.parallel_coordinates(**params)
    elif plot_type == amp_consts.PLOT_SCATTER_MATRIX:
        fig = make_subplots(
            rows=len(num_columns),
            cols=len(num_columns),
            shared_xaxes=True,
            row_titles=num_columns,
        )
        color_column = params.get("color")
        if color_column is not None:
            template_colors = pio.templates[params.get("template")].layout["colorway"]
            if template_colors is None:
                template_colors = pio.templates[pio.templates.default].layout["colorway"]
            color_count = len(df[color_column].unique())
            if len(template_colors) >= color_count:
                pass
            else:
                template_colors = np.repeat(
                    template_colors, (color_count // len(template_colors)) + 1
                )
            template_colors = template_colors[:color_count]
        else:
            template_colors = 0
        legend_added = False
        step = 0
        total = len(num_columns) ** 2
        matrix_diag = params["matrix_diag"]
        matrix_up = params["matrix_up"]
        matrix_down = params["matrix_down"]
        for i, c in enumerate(num_columns):
            for j, l in enumerate(num_columns):
                progress(step, total)
                step += 1
                if i == j:
                    if matrix_diag == "Nothing":
                        continue
                    elif matrix_diag == "Histogram":
                        mtx_plot_kind = "Histogram"
                    else:
                        mtx_plot_kind = "Scatter"
                else:
                    if (
                        (i == j)
                        or (i > j and matrix_up == "Scatter")
                        or (i < j and matrix_down == "Scatter")
                    ):
                        mtx_plot_kind = "Scatter"
                    elif (i > j and matrix_up == "Nothing") or (
                        i < j and matrix_down == "Nothing"
                    ):
                        continue
                    elif (i > j and matrix_up == "2D histogram") or (
                        i < j and matrix_down == "2D histogram"
                    ):
                        mtx_plot_kind = "2D histogram"
                    else:
                        mtx_plot_kind = "Error"

                if isinstance(template_colors, int) or mtx_plot_kind == "2D histogram":
                    if mtx_plot_kind == "Histogram":
                        add_histogram(fig=fig, x=df[c], index=i + 1)
                    elif mtx_plot_kind == "Scatter":
                        add_scatter(
                            fig=fig,
                            x=df[c],
                            y=df[l],
                            row=j + 1,
                            col=i + 1,
                        )
                    elif mtx_plot_kind == "2D histogram":
                        add_2d_hist(fig=fig, x=df[c], y=df[l], row=j + 1, col=i + 1)
                else:
                    for color_parse, cat in zip(
                        template_colors, df[color_column].unique()
                    ):
                        df_cat = df[df[color_column] == cat]
                        if mtx_plot_kind == "Histogram":
                            add_histogram(
                                fig=fig,
                                x=df_cat[c],
                                index=i + 1,
                                name=cat,
                                marker=color_parse,
                                legend=not legend_added,
                            )
                        elif mtx_plot_kind == "Scatter":
                            add_scatter(
                                fig=fig,
                                x=df_cat[c],
                                y=df_cat[l],
                                row=j + 1,
                                col=i + 1,
                                name=cat,
                                marker=color_parse,
                                legend=not legend_added,
                            )
                    legend_added = True
                fig.update_xaxes(
                    title_text=c,
                    row=j + 1,
                    col=i + 1,
                )
                if c == 0:
                    fig.update_yaxes(
                        title_text=l,
                        row=j + 1,
                        col=i + 1,
                    )
        fig.update_layout(barmode="stack")
    elif plot_type in [
        amp_consts.PLOT_PCA_2D,
        amp_consts.PLOT_PCA_3D,
        amp_consts.PLOT_PCA_SCATTER,
    ]:
        X = df.loc[:, num_columns]
        ignored_columns = params.pop("ignore_columns", [])
        if ignored_columns:
            X = X.drop(
                list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1
            )
        column_names = X.columns.to_list()
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        model_data = PCA()
        x_new = model_data.fit_transform(X)
        pc1_lbl = f"PC1 ({model_data.explained_variance_ratio_[0] * 100:.2f}%)"
        pc2_lbl = f"PC2 ({model_data.explained_variance_ratio_[1] * 100:.2f}%)"
        x = x_new[:, 0]
        y = x_new[:, 1]
        df[pc1_lbl] = x * (1.0 / (x.max() - x.min()))
        df[pc2_lbl] = y * (1.0 / (y.max() - y.min()))
        params["x"] = pc1_lbl
        params["y"] = pc2_lbl
        if is_anim:
            params["range_x"] = [-1, 1]
            params["range_y"] = [-1, 1]
        try:
            sl = params.pop("show_loadings") is True
        except:
            sl = None
        if plot_type in [amp_consts.PLOT_PCA_3D]:
            z = x_new[:, 2]
            pc3_lbl = f"PC3 ({model_data.explained_variance_ratio_[2] * 100:.2f}%)"
            df[pc3_lbl] = z * (1.0 / (z.max() - z.min()))
            params["z"] = pc3_lbl
            if is_anim:
                params["range_z"] = [-1, 1]
            fig = px.scatter_3d(**params)
            if sl:
                loadings = np.transpose(model_data.components_[0:3, :])
                m = 1 / np.amax(loadings)
                loadings = loadings * m
                xc, yc, zc = [], [], []
                for i in range(loadings.shape[0]):
                    xc.extend([0, loadings[i, 0], None])
                    yc.extend([0, loadings[i, 1], None])
                    zc.extend([0, loadings[i, 2], None])
                fig.add_trace(
                    go.Scatter3d(
                        x=xc,
                        y=yc,
                        z=zc,
                        mode="lines",
                        name="Loadings",
                        showlegend=False,
                        line=dict(color="black"),
                        opacity=0.3,
                    )
                )
                fig.add_trace(
                    go.Scatter3d(
                        x=loadings[:, 0],
                        y=loadings[:, 1],
                        z=loadings[:, 2],
                        mode="text",
                        text=num_columns,
                        opacity=0.7,
                        name="Loadings",
                    ),
                )
        elif plot_type in [amp_consts.PLOT_PCA_3D]:
            fig = px.scatter(**params)
            if sl:
                loadings = np.transpose(model_data.components_[0:2, :])
                m = 1 / np.amax(loadings)
                loadings = loadings * m
                xc, yc = [], []
                for i in range(loadings.shape[0]):
                    xc.extend([0, loadings[i, 0], None])
                    yc.extend([0, loadings[i, 1], None])
                fig.add_trace(
                    go.Scatter(
                        x=xc,
                        y=yc,
                        mode="lines",
                        name="Loadings",
                        showlegend=False,
                        line=dict(color="black"),
                        opacity=0.3,
                    )
                )
                fig.add_trace(
                    go.Scatter(
                        x=loadings[:, 0],
                        y=loadings[:, 1],
                        mode="text",
                        text=column_names,
                        opacity=0.7,
                        name="Loadings",
                    ),
                )
        elif plot_type in [amp_consts.PLOT_PCA_SCATTER]:
            l = lambda x, y: x == y
            params_ = {
                "data_frame": x_new,
                "labels": {str(i): f"PC {i+1}" for i in range(x_new.shape[1] - 1)},
            }
            if params["color"] is not None:
                params_["color"] = df[params["color"]]
            if params["dimensions"] is not None:
                params_["dimensions"] = range(
                    min(
                        params["dimensions"],
                        x_new.shape[1] - 1,
                    )
                )
            if is_anim:
                params_["range_x"] = [-1, 1]
                params_["range_y"] = [-1, 1]
            fig = px.scatter_matrix(**params_)
            fig.update_traces(diagonal_visible=False)
    elif plot_type in [amp_consts.PLOT_LDA_2D, amp_consts.PLOT_QDA_2D]:
        X = df.loc[:, num_columns]
        ignored_columns = params.pop("ignore_columns", [])
        if ignored_columns:
            X = X.drop(
                list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1
            )
        column_names = X.columns.to_list()
        if params["target"] in df.select_dtypes(include=["object"]).columns.to_list():
            t = df[params["target"]].astype("category").cat.codes
        elif params["target"] in df.select_dtypes(include=[np.float]).columns.to_list():
            t = df[params["target"]].astype("int")
        else:
            t = df[params["target"]]
        class_names = df[params["target"]].unique()
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        if plot_type == amp_consts.PLOT_LDA_2D:
            model_data = LinearDiscriminantAnalysis(solver=params.pop("solver", "svd"))
        elif plot_type == amp_consts.PLOT_QDA_2D:
            model_data = QuadraticDiscriminantAnalysis(store_covariance=True)
        x_new = model_data.fit(X, y=t).transform(X)
        label_root = "LD" if plot_type == amp_consts.PLOT_LDA_2D else "QD"
        pc1_lbl = f"{label_root}1 ({model_data.explained_variance_ratio_[0] * 100:.2f}%)"
        pc2_lbl = f"{label_root}2 ({model_data.explained_variance_ratio_[1] * 100:.2f}%)"
        x = x_new[:, 0]
        y = x_new[:, 1]
        df[pc1_lbl] = x / np.abs(x).max()
        df[pc2_lbl] = y / np.abs(y).max()
        params["x"] = pc1_lbl
        params["y"] = pc2_lbl
        if is_anim:
            params["range_x"] = [-1, 1]
            params["range_y"] = [-1, 1]
        params.pop("target")
        sl = params.pop("show_loadings") is True
        fig = px.scatter(**params)
        if sl:
            loadings = np.transpose(model_data.coef_[0:2, :])
            loadings[:, 0] = loadings[:, 0] / np.abs(loadings[:, 0]).max()
            loadings[:, 1] = loadings[:, 1] / np.abs(loadings[:, 1]).max()
            # m = 1 / np.amax(loadings)
            # loadings = loadings * m
            xc, yc = [], []
            for i in range(loadings.shape[0]):
                xc.extend([0, loadings[i, 0], None])
                yc.extend([0, loadings[i, 1], None])
            fig.add_trace(
                go.Scatter(
                    x=xc,
                    y=yc,
                    mode="lines",
                    name="Loadings",
                    showlegend=False,
                    line=dict(color="black"),
                    opacity=0.3,
                )
            )
            fig.add_trace(
                go.Scatter(
                    x=loadings[:, 0],
                    y=loadings[:, 1],
                    mode="text",
                    text=column_names,
                    opacity=0.7,
                    name="Loadings",
                ),
            )
    elif plot_type in [amp_consts.PLOT_NCA]:
        X = df.loc[:, num_columns]
        ignored_columns = params.pop("ignore_columns", [])
        if ignored_columns:
            X = X.drop(
                list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1
            )
        column_names = X.columns.to_list()
        if params["target"] in df.select_dtypes(include=["object"]).columns.to_list():
            t = df[params["target"]].astype("category").cat.codes
        elif params["target"] in df.select_dtypes(include=[np.float]).columns.to_list():
            t = df[params["target"]].astype("int")
        else:
            t = df[params["target"]]
        class_names = df[params["target"]].unique()
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        model_data = NeighborhoodComponentsAnalysis(
            init=params.pop("init", "auto"),
            n_components=min(len(column_names), params.pop("n_components", 2)),
        )
        x_new = model_data.fit(X, y=t).transform(X)
        df["x_nca"] = x_new[:, 0]
        df["y_nca"] = x_new[:, 1]
        params["x"] = "x_nca"
        params["y"] = "y_nca"
        if is_anim:
            params["range_x"] = [-1, 1]
            params["range_y"] = [-1, 1]
        params.pop("target")
        fig = px.scatter(**params)
    elif plot_type == amp_consts.PLOT_CORR_MATRIX:
        fig = px.imshow(
            df[num_columns].corr(method=params.get("corr_method")).values,
            x=num_columns,
            y=num_columns,
        )
    else:
        fig = None

    if plot_type in amp_consts.PLOT_IS_3D:
        fig.update_layout(scene={"aspectmode": "cube"})

    if fig is not None:
        fig.update_layout(
            height=params["height"],
            template=params["template"],
            legend={"traceorder": "normal"},
        )
        if ("size" not in params) or (
            (params["size"] is None) or (params["size"] == amp_consts.NONE_SELECTED)
        ):
            fig.update_traces(
                marker=dict(
                    size=8,
                    line=dict(width=2),  # color="DarkSlateGrey"),
                    opacity=0.7,
                ),
                selector=dict(mode="markers"),
            )

    return {
        k: v
        for k, v in zip(
            ["figure", "model_data", "column_names", "class_names"],
            [fig, model_data, column_names, class_names],
        )
        if v is not None
    }
Exemplo n.º 6
0
'y' : output[:,1]*1000,
'z' : output[:,2]*-1000,
'Sdirect': output[:,3],
'Smises' : output[:,4]/1e9
})

    
print("Runtime: %f seconds" % (time.time()-start_time))


Smises_max_loc_model = dfmodel.loc[dfmodel['Smises']==dfmodel.max()['Smises']][['x','y','z','Smises']]
Smises_max_loc_valid = dfjambent.loc[dfjambent['Smises']==dfjambent.max()['Smises']][['x','y','z','Smises']]
print('Max von Mises stress numerical model = ','\n','\n',Smises_max_loc_model,'\n')
print('Max von Mises stress validation model = ','\n','\n',Smises_max_loc_valid,'\n')

fig = px.scatter_3d(dfmodel, x='x', y='y', z='z', color='Smises')
fig.update_layout(title='Von mises stresses numerical model')
fig.show()

fig = px.scatter_3d(dfjambent, x='x', y='y', z='z', color='Smises')
fig.update_layout(title='Von mises stresses validation model')
fig.show()

#HL_dy_bending = dfbending[dfbending.z==0]
#HL_dy_bending = HL_dy_bending[HL_dy_bending.y==0]
#HL_dy_bending = HL_dy_bending.sort_values(by=['x']) # sort the nodes by x value (necessary for plotting)
#
#
#HL_dy_jambent = dfjambent[dfjambent.z==0]
#HL_dy_jambent = HL_dy_jambent[HL_dy_jambent.y==0]
#HL_dy_jambent = HL_dy_jambent.sort_values(by=['x']) # sort the nodes by x value (necessary for plotting)
Exemplo n.º 7
0
def scatterplot(
    df: pd.DataFrame,
    col: str,
    color: str = None,
    hover_name: str = None,
    hover_data: [] = None,
    title="",
    return_figure=False,
):
    """
    Show scatterplot of DataFrame column using python plotly scatter.

    Plot the values in column col. For example, if every cell in df[col]
    is a list of three values (e.g. from doing PCA with 3 components),
    a 3D-Plot is created and every cell entry [x, y, z] is visualized
    as the point (x, y, z).

    Parameters
    ----------
    df: DataFrame with a column to be visualized.

    col: str
        The name of the column of the DataFrame to use for x and y (and z)
        axis.

    color: str, optional, default=None
        Name of the column to use for coloring (rows with same value get same
        color).

    hover_name: str, optional, default=None
        Name of the column to supply title of hover data when hovering over a
        point.

    hover_data: List[str], optional, default=[]
        List of column names to supply data when hovering over a point.

    title: str, default to "".
        Title of the plot.

    return_figure: bool, optional, default=False
        Function returns the figure instead of showing it if set to True.

    Examples
    --------
    >>> import texthero as hero
    >>> import pandas as pd
    >>> df = pd.DataFrame(["Football, Sports, Soccer",
    ...                    "music, violin, orchestra", "football, fun, sports",
    ...                    "music, fun, guitar"], columns=["texts"])
    >>> df["texts"] = hero.clean(df["texts"]).pipe(hero.tokenize)
    >>> df["pca"] = (
    ...             hero.tfidf(df["texts"])
    ...                 .pipe(hero.pca, n_components=3)
    ... )
    >>> df["topics"] = (
    ...                hero.tfidf(df["texts"])
    ...                    .pipe(hero.kmeans, n_clusters=2)
    ... )
    >>> hero.scatterplot(df, col="pca", color="topics",
    ...                  hover_data=["texts"]) # doctest: +SKIP
    """

    plot_values = np.stack(df[col], axis=1)
    dimension = len(plot_values)

    if dimension < 2 or dimension > 3:
        raise ValueError(
            "The column you want to visualize has dimension < 2 or dimension > 3."
            " The function can only visualize 2- and 3-dimensional data.")

    if dimension == 2:
        x, y = plot_values[0], plot_values[1]

        fig = px.scatter(
            df,
            x=x,
            y=y,
            color=color,
            hover_data=hover_data,
            title=title,
            hover_name=hover_name,
        )

    else:
        x, y, z = plot_values[0], plot_values[1], plot_values[2]

        fig = px.scatter_3d(
            df,
            x=x,
            y=y,
            z=z,
            color=color,
            hover_data=hover_data,
            title=title,
            hover_name=hover_name,
        )

    if return_figure:
        return fig
    else:
        fig.show()
Exemplo n.º 8
0
def plot_3d_diffusion_embedding(x1, x2, x3):
    fig = px.scatter_3d(x=x1, y=x2, z=x3)
    fig.show(renderer='iframe')
Exemplo n.º 9
0
    def viz_streamlit_entity_embed_manifold(
            pipe,  # nlu component_list
            default_texts: List[str] = ("Donald Trump likes to visit New York", "Angela Merkel likes to visit Berlin!", 'Peter hates visiting Paris'),
            title: Optional[str] = "Lower dimensional Manifold visualization for Entity embeddings",
            sub_title: Optional[str] = "Apply any of the 10+ `Manifold` or `Matrix Decomposition` algorithms to reduce the dimensionality of `Entity Embeddings` to `1-D`, `2-D` and `3-D` ",
            default_algos_to_apply: List[str] = ("TSNE", "PCA"),
            target_dimensions: List[int] = (1, 2, 3),
            show_algo_select: bool = True,
            set_wide_layout_CSS: bool = True,
            num_cols: int = 3,
            model_select_position: str = 'side',  # side or main
            key: str = "NLU_streamlit",
            show_infos: bool = True,
            show_logo: bool = True,
            n_jobs: Optional[int] = 3,  # False
    ):

        from nlu.pipe.viz.streamlit_viz.streamlit_utils_OS import StreamlitUtilsOS
        StreamlitVizTracker.footer_displayed = False

        try:
            import plotly.express as px
            from sklearn.metrics.pairwise import distance_metrics
        except:
            st.error(
                "You need the sklearn and plotly package in your Python environment installed for similarity visualizations. Run <pip install sklearn plotly>")

        if show_logo: StreamlitVizTracker.show_logo()
        if set_wide_layout_CSS: _set_block_container_style()
        if title: st.header(title)
        if sub_title: st.subheader(sub_title)
        # if show_logo :VizUtilsStreamlitOS.show_logo()
        # VizUtilsStreamlitOS.loaded_word_embeding_pipes = []
        if isinstance(default_texts, list) : default_texts = '\n'.join(default_texts)
        data = st.text_area('Enter N texts, seperated by new lines to visualize Sentence Embeddings for ',
                            default_texts).split('\n')
        output_level = 'chunk'
        ner_emebed_pipe_algo_selection = []
        loaded_ner_embed_nlu_refs = []
        algos = ['TSNE']
        # A component_list should have a NER and a Word Embedding
        if pipe not in StreamlitVizTracker.loaded_ner_word_embeding_pipes: StreamlitVizTracker.loaded_ner_word_embeding_pipes.append(
            pipe)
        if pipe not in StreamlitVizTracker.loaded_word_embeding_pipes: StreamlitVizTracker.loaded_word_embeding_pipes.append(
            pipe)

        if show_algo_select:
            # Manifold Selection
            exp = st.expander("Select additional manifold and dimension reduction techniques to apply")
            algos = exp.multiselect(
                "Reduce embedding dimensionality to something visualizable",
                options=(
                    "TSNE", "ISOMAP", 'LLE', 'Spectral Embedding', 'MDS', 'PCA', 'SVD aka LSA', 'DictionaryLearning',
                    'FactorAnalysis', 'FastICA', 'KernelPCA', 'LatentDirichletAllocation'),
                default=default_algos_to_apply, )
            ner_emb_components_usable = [e for e in Discoverer.get_components('ner', True, include_aliases=True) if
                                         'embed' not in e and 'sentence' not in e]

            # Find nlu_ref of currenlty loaded component_list
            for p in StreamlitVizTracker.loaded_ner_word_embeding_pipes:
                loaded_ner_embed_nlu_refs.append(p.nlu_ref)

            # NER Selection
            if model_select_position == 'side':
                ner_emebed_pipe_algo_selection = st.sidebar.multiselect(
                    "Pick additional NER Models for the Dimension Reduction", options=ner_emb_components_usable,
                    default=loaded_ner_embed_nlu_refs, key=key)
            else:
                ner_emebed_pipe_algo_selection = exp.multiselect(
                    "Pick additional NER Models for the Dimension Reduction", options=ner_emb_components_usable,
                    default=loaded_ner_embed_nlu_refs, key=key)

        for ner_nlu_ref in ner_emebed_pipe_algo_selection:
            load = True
            for ner_p in StreamlitVizTracker.loaded_ner_word_embeding_pipes:
                if ner_p.nlu_ref == ner_nlu_ref:
                    load = False
                    break
            if not load: continue
            p = nlu.load(ner_nlu_ref)
            if p not in StreamlitVizTracker.loaded_ner_word_embeding_pipes: StreamlitVizTracker.loaded_ner_word_embeding_pipes.append(
                p)
            if p not in StreamlitVizTracker.loaded_word_embeding_pipes: StreamlitVizTracker.loaded_word_embeding_pipes.append(
                p)

        col_index = 0
        cols = st.columns(num_cols)

        def are_cols_full():
            return col_index == num_cols

        for p in StreamlitVizTracker.loaded_ner_word_embeding_pipes:
            p = EntityManifoldUtils.insert_chunk_embedder_to_pipe_if_missing(p)
            predictions = p.predict(data, metadata=True, output_level=output_level, multithread=False).dropna()
            entity_cols = EntityManifoldUtils.get_ner_cols(predictions)
            chunk_embed_col = EntityManifoldUtils.find_chunk_embed_col(predictions)

            # TODO get cols for non default NER? or multi ner setups?
            # features = predictions[EntityManifoldUtils.get_ner_cols(predictions)]
            # e_col = StreamlitUtilsOS.find_embed_col(predictions)
            e_com = StreamlitUtilsOS.find_embed_component(p)
            e_com_storage_ref = StorageRefUtils.extract_storage_ref(e_com)
            emb = predictions[chunk_embed_col]
            mat = np.array([x for x in emb])
            # for ner_emb_p in ps:
            for algo in algos:
                # Only pos values for latent Dirchlet
                if algo == 'LatentDirichletAllocation': mat = np.square(mat)
                if len(mat.shape) > 2: mat = mat.reshape(len(emb), mat.shape[-1])
                hover_data = entity_cols + ['text']
                # calc reduced dimensionality with every algo
                feature_to_color_by = entity_cols[0]
                if 1 in target_dimensions:
                    low_dim_data = StreamlitUtilsOS.get_manifold_algo(algo, 1, n_jobs).fit_transform(mat)
                    x = low_dim_data[:, 0]
                    y = np.zeros(low_dim_data[:, 0].shape)

                    # predictions['text'] = original_text
                    tsne_df = pd.DataFrame({**{'x': x, 'y': y},
                                            **{k: predictions[k] for k in entity_cols},
                                            **{'text': predictions[entity_cols[-1]]}
                                            })
                    fig = px.scatter(tsne_df, x="x", y="y", color=feature_to_color_by, hover_data=hover_data)
                    subh = f"""Word-Embeddings =`{e_com_storage_ref}`, NER-Model =`{p.nlu_ref}`, Manifold-Algo =`{algo}` for `D=1`"""
                    cols[col_index].markdown(subh)
                    cols[col_index].write(fig, key=key)
                    col_index += 1
                    if are_cols_full():
                        cols = st.columns(num_cols)
                        col_index = 0
                if 2 in target_dimensions:
                    low_dim_data = StreamlitUtilsOS.get_manifold_algo(algo, 2, n_jobs).fit_transform(mat)
                    x = low_dim_data[:, 0]
                    y = low_dim_data[:, 1]
                    tsne_df = pd.DataFrame({**{'x': x, 'y': y},
                                            **{k: predictions[k] for k in entity_cols},
                                            **{'text': predictions[entity_cols[-1]]}
                                            })
                    fig = px.scatter(tsne_df, x="x", y="y", color=feature_to_color_by, hover_data=hover_data)
                    subh = f"""Word-Embeddings =`{e_com_storage_ref}`, NER-Model =`{p.nlu_ref}`, Manifold-Algo =`{algo}` for `D=2`"""
                    cols[col_index].markdown(subh)
                    cols[col_index].write(fig, key=key)
                    col_index += 1
                    if are_cols_full():
                        cols = st.columns(num_cols)
                        col_index = 0
                if 3 in target_dimensions:
                    low_dim_data = StreamlitUtilsOS.get_manifold_algo(algo, 3, n_jobs).fit_transform(mat)
                    x = low_dim_data[:, 0]
                    y = low_dim_data[:, 1]
                    z = low_dim_data[:, 2]
                    tsne_df = pd.DataFrame({**{'x': x, 'y': y, 'z': z},
                                            **{k: predictions[k] for k in entity_cols},
                                            **{'text': predictions[entity_cols[-1]]}
                                            })
                    fig = px.scatter_3d(tsne_df, x="x", y="y", z='z', color=feature_to_color_by, hover_data=hover_data)
                    subh = f"""Word-Embeddings =`{e_com_storage_ref}`, NER-Model =`{p.nlu_ref}`, Manifold-Algo =`{algo}` for `D=3`"""
                    cols[col_index].markdown(subh)
                    cols[col_index].write(fig, key=key)
                    col_index += 1
                    if are_cols_full():
                        cols = st.columns(num_cols)
                        col_index = 0

                # Todo fancy embed infos etc
                # if display_embed_information: display_embed_vetor_information(e_com,mat)

            # if display_embed_information:
            #     exp = st.expander("Embedding vector information")
            #     exp.write(embed_vector_info)

        if show_infos:
            # VizUtilsStreamlitOS.display_infos()
            StreamlitVizTracker.display_model_info(pipe.nlu_ref, pipes=[pipe])
            StreamlitVizTracker.display_footer()
Exemplo n.º 10
0
def visualize(img, outputs, renderer):
    vert = outputs['verts'][0]
    cam = outputs['cam_pred'][0]
    texture = outputs['texture'][0]
    faces = outputs['faces'][0]
    shape_pred = renderer(vert, cam)
    img_pred = renderer(vert, cam, texture=texture)
    #renderer.saveMesh(vert, texture)

    vertex_seg_map = torch.argmax(outputs['vertex_seg_map'][0],
                                  dim=1).unsqueeze(1).type(torch.FloatTensor)
    x = torch.cat([vert.cpu(), vertex_seg_map], dim=1)
    print(outputs['vertex_seg_map'][0].max(1)[0].sum())
    df = x.numpy()
    df = pd.DataFrame(df)

    fig = px.scatter_3d(df, x=0, y=1, z=2, color=3)
    fig.write_html("./file.html")
    print("file.html written")

    tex_seg = vertex_seg_map.repeat(1, 3)
    for i in range(642):
        if (tex_seg[i][0].item() == 0.0):
            tex_seg[i] = torch.tensor([0.0, 0.0, 1.])
        elif (tex_seg[i][0].item() == 1.0):
            tex_seg[i] = torch.tensor([0.0, 1.0, 0.0])
        elif (tex_seg[i][0].item() == 2.0):
            tex_seg[i] = torch.tensor([0.0, 1.0, 1.0])
        elif (tex_seg[i][0].item() == 3.0):
            tex_seg[i] = torch.tensor([1., 0., 0.])
        elif (tex_seg[i][0].item() == 4.0):
            tex_seg[i] = torch.tensor([1., 0., 1.])
        elif (tex_seg[i][0].item() == 5.0):
            tex_seg[i] = torch.tensor([1., 1., 0.])
        elif (tex_seg[i][0].item() == 6.0):
            tex_seg[i] = torch.tensor([1., 0.5, 0.5])
        elif (tex_seg[i][0].item() == 7.0):
            tex_seg[i] = torch.tensor([0.5, 1, 1])

    save_obj("demo_seg.obj",
             vert,
             outputs['faces'][0],
             tex_seg.contiguous(),
             texture_type='vertex')
    print("seg_obj file written")

    # Different viewpoints.
    vp1 = renderer.diff_vp(vert,
                           cam,
                           angle=30,
                           axis=[0, 1, 0],
                           texture=texture,
                           extra_elev=True)
    vp2 = renderer.diff_vp(vert,
                           cam,
                           angle=60,
                           axis=[0, 1, 0],
                           texture=texture,
                           extra_elev=True)
    vp3 = renderer.diff_vp(vert,
                           cam,
                           angle=90,
                           axis=[0, 1, 0],
                           texture=texture)

    img = np.transpose(img, (1, 2, 0))
    import matplotlib.pyplot as plt
    plt.ion()
    plt.figure(1)
    plt.clf()
    plt.subplot(231)
    plt.imshow(img)
    plt.title('input')
    plt.axis('off')
    plt.subplot(232)
    plt.imshow(shape_pred)
    plt.title('pred mesh')
    plt.axis('off')
    plt.subplot(233)
    plt.imshow(img_pred)
    plt.title('pred mesh w/texture')
    plt.axis('off')
    plt.subplot(234)
    plt.imshow(vp1)
    plt.title('different viewpoints')
    plt.axis('off')
    plt.subplot(235)
    plt.imshow(vp2)
    plt.axis('off')
    plt.subplot(236)
    plt.imshow(vp3)
    plt.axis('off')
    plt.draw()
    plt.show()
    print('saving file to demo_image.png')
    plt.savefig('demo_image.png')
Exemplo n.º 11
0
# %%
print(df)

# %%
import plotly
import plotly.graph_objs as go
import plotly.express as px
from ipywidgets import interact, widgets

plotly.offline.init_notebook_mode()

# Configure the trace.
trace = px.scatter_3d(
    df,
    x='X',
    y='Y',
    z='Z',
    color='label',
    #  title="Graph for Layer "+str(layer)
)

trace.show()

# %%
################# Explore Codebook Summation and Centroids #################

i = 0
channel_labels = {'x', 'y', 'z'}
fig = plt.figure(figsize=(40, 40))

for c in enumerate(codebook):
    color_index = 0
Exemplo n.º 12
0
            reduced_data = tsne.fit_transform(features)

            clusters_data = reduced_data

        else:
            clusters_data = features

        k_means = KMeans(n_clusters=K)
        y = k_means.fit_predict(clusters_data)

        df['Cluster'] = y
        df['Cluster'] = df['Cluster'].apply(str)
        fig = px.scatter_3d(df,
                            x=df['X'],
                            y=df['Y'],
                            z=df['Z'],
                            color=df['Cluster'],
                            width=700,
                            height=700)
        st.plotly_chart(fig)

        interpret(df, "k")

    elif clustering_method == techniques[1]:
        st.write("Hierarchical results")
        features = normalize(features)

        fig = ff.create_dendrogram(features)
        fig.update_layout(width=800, height=600)
        st.plotly_chart(fig)
Exemplo n.º 13
0
clustered_df = cleaned_crypto_df.merge(pcs_df, on='Unnamed: 0')
clustered_df = clustered_df.merge(coins_name, on='Unnamed: 0')

clustered_df['Class'] = model.labels_

clustered_df.set_index('Unnamed: 0', drop=True, inplace=True)
clustered_df.head(10)
# %% [markdown]
# # Visualizing Results

# %%
# 3D scatter plot
fig = px.scatter_3d(clustered_df,
                    x='PC 1',
                    y='PC 2',
                    z='PC 3',
                    color='Class',
                    symbol='Class',
                    hover_name='CoinName',
                    hover_data=['Algorithm'])
fig.update_layout(legend={'x': 0, 'y': 1})
fig.show()

# %%
# create a hvplot table for all the current tradable cryptocurrencies
obj_table = clustered_df.hvplot.table(columns=[
    'CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined',
    'Class'
],
                                      width=500)

hvplot.show(obj_table)
Exemplo n.º 14
0
X_test['y_pred'] = lr.predict(X_test_array)

lr.coef_

import plotly.express as px

X_train.head()

"""# Primeira visualização

I- Podemos observar na função executada no início do kernel que a correlação entre a *Temperatura Minima (C)* e a *Precipitacao (mm)* é baixa; porém

II- Podemos observar no gráfico abaixo que quanto maior a temperatura e menor o volume de precipitação, ***MAIOR será nosso consumo de cerveja.***
"""

fig = px.scatter_3d(X_test, x='Precipitacao (mm)',y='Temperatura Minima (C)',z='y_pred')
fig.show()

"""# Segunda visualização

I- Aqui podemos observar que quanto maior for nossa temperatura, ***MAIOR será o consumo de cerveja.***

II- Podemos observar também que o consumo de cerveja é consideravelmente maior nos finais de semana.
"""

fig = px.scatter_3d(X_test, x='Final de Semana',y='Temperatura Maxima (C)',z='y_pred')
fig.show()

"""# Terceira visualização

I- Neste gráfico também podemos observar que quanto maior for nossa temperatura e menor for nosso volume de precipitação, ***MAIOR será o consumo de cerveja.***
x_scaled2 = min_max_scaler.fit_transform(x)
df1 = pd.DataFrame(x_scaled2)

#  ------------------------ k-means Clustering ---------------------

kmeans = KMeans(init="k-means++", n_clusters=4, random_state=15,
                max_iter=500).fit(x_scaled2)
df1['kmeans'] = kmeans.labels_
df1.columns = ['energy', 'instrumentalness', 'loudness', 'kmeans']

kmeans = df1['kmeans']
df['kmeans'] = kmeans

fig = px.scatter_3d(df,
                    x='energy',
                    y='instrumentalness',
                    z='loudness',
                    color='kmeans')
fig.show()

# ---------------------------------- Violin Plot for each cluster ------------------
c0 = df1[df1['kmeans'] == 0]
c1 = df1[df1['kmeans'] == 1]
c2 = df1[df1['kmeans'] == 2]
c3 = df1[df1['kmeans'] == 3]

# genre =df ['genre']
# c0['genre'] = genre
# c1['genre'] = genre
# c2['genre'] = genre
Exemplo n.º 16
0
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

iris = pd.read_csv("Iris.csv")

fig = px.scatter_3d(iris,
                    x='SepalLengthCm',
                    y='SepalWidthCm',
                    z='PetalWidthCm',
                    color='Species')
fig.show()
Exemplo n.º 17
0
import pandas as pd
import plotly.express as px
from sklearn.mixture import BayesianGaussianMixture
from sklearn.cluster import OPTICS
import matplotlib.pyplot as plt

dfw = pd.read_csv('./data/wilac_clustering_data.csv')
dfw.mean()
dfw

fig = px.scatter_3d(dfw,
                    x='Gini2016',
                    y='GDPpc2016',
                    z='InsAcceptance',
                    color='quadraticEffect',
                    hover_name='sample',
                    color_continuous_scale='RdBu',
                    color_continuous_midpoint=0,
                    template="plotly_dark",
                    opacity=0.9,
                    title='Scatterplot Quadratic Effect on Willingness \
                    to Participate in Activism')
fig.write_html("./plots/wilacQua.html")

dfc = pd.read_csv('./data/colac_clustering_data.csv')
dfc.mean()

fig = px.scatter_3d(dfc,
                    x='Gini2016',
                    y='GDPpc2016',
                    z='InsAcceptance',
                    color='quadraticEffect',
Exemplo n.º 18
0
 alphas = []
 Res = []
 CLs = []
 CDs = []
 for alpha in alpha_inputs:
     for Re in Re_inputs:
         alphas.append(alpha)
         Res.append(Re)
         CLs.append(Cl_e216(alpha, Re))
         CDs.append(Cd_profile_e216(alpha, Re))
 px.scatter_3d(x=alphas,
               y=Res,
               z=CLs,
               size=np.ones_like(alphas),
               color=CLs,
               log_y=True,
               labels={
                   "x": "alphas",
                   "y": "Re",
                   "z": "CL"
               }).show()
 px.scatter_3d(x=alphas,
               y=Res,
               z=CDs,
               size=np.ones_like(alphas),
               color=CDs,
               log_y=True,
               labels={
                   "x": "alphas",
                   "y": "Re",
                   "z": "CD"
#iris_df['class'] = predictions
iris_df.head()

# %% [markdown]
# data Visualization of results

# %%
# use hvplot to plot 2D scatter, use bokeh to viz (for web browsers)
import holoviews as hv
hv.extension('bokeh')
iris_df.hvplot.scatter(x='petal_width', y='sepal_length', by='class')
hvplot.show(
    iris_df.hvplot.scatter(x='petal_width', y='sepal_length', by='class'))
# %%
# use plotly.express library
fig = px.scatter_3d(iris_df,
                    x='petal_width',
                    y='sepal_length',
                    z='petal_length',
                    color='class',
                    symbol='class',
                    size='sepal_width',
                    width=800)
fig.update_layout(legend=dict(
    x=0, y=1))  # update the legend position to legend = {'x': 0, 'y':1}
fig.show()

# %%

# %%
Exemplo n.º 20
0
df = pd.read_excel('data.xlsx')
rands = np.random.normal(0, 0.1, size=(df.shape[0], 3))
num_cols = ['Data Source', 'Target Variable', 'Prediction Type']
df[num_cols] = df[num_cols] + rands

fig = px.scatter_3d(df,
                    x='Data Source',
                    y='Target Variable',
                    z='Prediction Type',
                    color='Family',
                    width=1000,
                    height=700,
                    title='Machine Learning Exploration',
                    hover_data={
                        'Family': True,
                        'Technique': True,
                        'Data Source': False,
                        'Target Variable': False,
                        'Prediction Type': False,
                        'Description': False
                    },
                    range_x=[-.15, 1.15],
                    range_y=[-.15, 1.15],
                    range_z=[-1.15, 1.15])

fig.update_layout(
    scene={
        'xaxis': {
            'ticktext': ['Unstructured', 'Structured'],
            'tickvals': [0, 1],
Exemplo n.º 21
0
        "y": "Gamma"
    }).show()

    # # ER_from_P test
    chamber_pressure_inputs = np.logspace(5, 6, 200)
    exit_pressure_inputs = np.logspace(4, 5, 200)
    ox_for_test = 0
    chamber_pressures = []
    exit_pressures = []
    ers = []
    for chamber_pressure in chamber_pressure_inputs:
        for exit_pressure in exit_pressure_inputs:
            chamber_pressures.append(chamber_pressure)
            exit_pressures.append(exit_pressure)
            ers.append(
                expansion_ratio_from_pressure(chamber_pressure, exit_pressure,
                                              gamma(ox_for_test), ox_for_test))
    data = pd.DataFrame({
        'chamber_pressure': chamber_pressures,
        'exit_pressure': exit_pressures,
        'ers': ers
    })
    px.scatter_3d(data,
                  x='chamber_pressure',
                  y='exit_pressure',
                  z='ers',
                  color='ers',
                  log_x=True,
                  log_y=True,
                  log_z=True).show()
Exemplo n.º 22
0
affinity # Display affinity matrix

affinity_sym = 0.5*(affinity+affinity.T) # Make 'affinity' symetric by averaging each point
affinity_sym # Display symetric affinity matrix

# As you can see, the affinity matrix labeled 'affinity' is not symmetric. With an assigned n of 2 each point must find 2 nearest neighbors including itself. A 1 in the matrix means that that point is a nearest neighbor for the point in which the row pertains to. In the example of the first point (the first row in the matrix), its nearest neighbor is itself and the second point. For point 2 (the second row in the matrix) its nearest neighbor is itself and point 3 not point 1. This happens because point 3 is closer to 2 then point 1 in terms of the Euclidean distance.
#
# The affinity matrix is made symmetric by averaging it by its transpose. As you can see from 'affinity_sym' some elements of the matrix now have a value of 0.5. For example, the first row (pertaining to point 1) has a value of 0.5 where it used to have a value of 1 in the second column. This tells us that point 1's nearest neighbor is point 2 but point 1 is not a nearest neighbor of point 2.
#
# Looking back at the matrix 'affinity', if we sum the matrix by its rows each row will have a value of 2 (the n value we assigned) because each point is required to find its 2 nearest neighbors. If we sum the matrix by its columns, we will get different values for each column. Column 1 will sum to 1, telling us that it is only a nearest neighbor to itself. Column 2, on the other hand, will sum to 3, telling us that it is the nearest neighbor to 2 other points besides itself.

# ***
# ## Distance Correlation Matrix
# These cells are created to test if the sepctral embedding can be computed using a distance matrix rather then an affinity matrix.

dis_corr_matrix = pairwise_distances(data_df[0:500], metric=dis_corr) # A distance correlation matrix is computed using the diget data (this matrix is symmetric)
dis_corr_matrix
hv.Image(dis_corr_matrix).opts(width=400, height=300, colorbar=True, cmap='jet') # Plot distance matrix

D = 0.25 # Distance condition
cond_matrix = np.where(dis_corr_matrix<D, 1, 0) # All distances less then 'D' are assighned a 1 and all distances greater then 'D' are assighned a 0
hv.Image(cond_matrix).opts(width=400, height=300, colorbar=True, cmap='Greys') # Plot conditional matrix 'cond_matrix'

embedding = SpectralEmbedding(n_components=3, affinity='precomputed', n_jobs=n_cpus, eigen_solver='arpack') # Compute 3D spectral embedding
data_transformed = embedding.fit_transform(np.abs(cond_matrix)) # Compute data transform by using 'cond_matrix' created above

plot_input = pd.DataFrame(data_transformed,columns=['x','y','z']) # Save transformed data at data frame
plot_input['Number'] = num_df.astype(str) # Add column of diget lables
plot = px.scatter_3d(plot_input, x='x', y='y', z='z', color='Number', width=500, height=400, opacity=0.7) # Create plot of embedding
#plot # Display plot
                #count up the open buckets and measure the debt supply at each time step
                debt_supply[time_step]=sum([c["debt"] for c in cdps if c["open"]==True])
                #calculate the loss or gain per day as a function of the Dai supply on that day
                if liquidated_debt[time_step]!=0: loss_gain_perc[time_step] = loss_gain[time_step]/liquidated_debt[time_step]
                else: loss_gain_perc[time_step]=0
                result_matrix += 

            #record the total loss from the simulation
            result_array[simulation] = sum(loss_gain)/np.average(debt_supply)
            #record the total array of ETH prices from the simulation
            eth_price_record[simulation] = M

        data_results+=[{"sigma":sigma,"collateral_cutoff":collateral_cutoff,"gain-loss":np.average(result_array)}]
        print(sigma,collateral_cutoff,liquidation_penalty, np.average(result_array))

fig = px.scatter_3d(pd.DataFrame(data_results), x='sigma', y='collateral_cutoff', z='gain-loss',
              color='gain-loss',size_max=40,opacity=0.7)
plot_url = py.plot(fig,filename="3d_gains.html")

#display a graph of the ETH price over time
data = [go.Scatter(x=x,
            y=M, mode='lines',line=dict(color="blue"))
       ]

layout = go.Layout(xaxis=dict(title="Days"),yaxis=dict(title="ETH Price"))

fig = Figure(data=data, layout=layout)
plot_url = py.plot(fig,filename="ETH Price over time.html")

#display a graph of the colalteralization ratios of each bucket over time
collateral_df = pd.DataFrame(collateralizations)
Exemplo n.º 24
0
def main(start_data, end_data):
    ###### CUSTOMIZE COLOR THEME ######
    alt.themes.register("finastra", finastra_theme)
    alt.themes.enable("finastra")
    violet, fuchsia = ["#694ED6", "#C137A2"]

    ###### SET UP PAGE ######
    icon_path = os.path.join(".", "raw", "esg_ai_logo.png")
    st.set_page_config(page_title="INVESTECH",
                       page_icon=icon_path,
                       layout='centered',
                       initial_sidebar_state="collapsed")
    _, logo, _ = st.beta_columns(3)
    logo.image(icon_path, width=200)
    style = ("text-align:center; padding: 0px; font-family: arial black;, "
             "font-size: 400%")
    title = f"<h1 style='{style}'>INVESTECH</h1><br><br>"
    st.write(title, unsafe_allow_html=True)

    ###### LOAD DATA ######
    with st.spinner(text="Fetching Data..."):
        data, companies = load_data(start_data, end_data)
    df_conn = data["conn"]
    df_data = data["data"]
    embeddings = data["embed"]

    ####### CREATE SIDEBAR CATEGORY FILTER######
    st.sidebar.title("Filter Options")
    date_place = st.sidebar.empty()
    esg_categories = st.sidebar.multiselect("Select News Categories",
                                            ["E", "S", "G"], ["E", "S", "G"])
    pub = st.sidebar.empty()
    num_neighbors = st.sidebar.slider("Number of Connections", 1, 20, value=8)

    ###### RUN COMPUTATIONS WHEN A COMPANY IS SELECTED ######
    company = st.selectbox("Select a Company to Analyze", companies)
    if company and company != "Select a Company":
        ###### FILTER ######
        df_company = df_data[df_data.Organization == company]
        diff_col = f"{company.replace(' ', '_')}_diff"
        esg_keys = ["E_score", "S_score", "G_score"]
        esg_df = get_melted_frame(data, esg_keys, keepcol=diff_col)
        ind_esg_df = get_melted_frame(data, esg_keys, dropcol="industry_tone")
        tone_df = get_melted_frame(data, ["overall_score"], keepcol=diff_col)
        ind_tone_df = get_melted_frame(data, ["overall_score"],
                                       dropcol="industry_tone")

        ###### DATE WIDGET ######
        start = df_company.DATE.min()
        end = df_company.DATE.max()
        selected_dates = date_place.date_input("Select a Date Range",
                                               value=[start, end],
                                               min_value=start,
                                               max_value=end,
                                               key=None)
        time.sleep(
            0.8)  #Allow user some time to select the two dates -- hacky :D
        start, end = selected_dates

        ###### FILTER DATA ######
        df_company = filter_company_data(df_company, esg_categories, start,
                                         end)
        esg_df = filter_on_date(esg_df, start, end)
        ind_esg_df = filter_on_date(ind_esg_df, start, end)
        tone_df = filter_on_date(tone_df, start, end)
        ind_tone_df = filter_on_date(ind_tone_df, start, end)
        date_filtered = filter_on_date(df_data, start, end)

        ###### PUBLISHER SELECT BOX ######
        publishers = df_company.SourceCommonName.sort_values().unique().tolist(
        )
        publishers.insert(0, "all")
        publisher = pub.selectbox("Select Publisher", publishers)
        df_company = filter_publisher(df_company, publisher)

        ###### DISPLAY DATA ######
        URL_Expander = st.beta_expander(f"View {company.title()} Data:", True)
        URL_Expander.write(f"### {len(df_company):,d} Matching Articles for " +
                           company.title())
        display_cols = [
            "DATE", "SourceCommonName", "Tone", "Polarity", "NegativeTone",
            "PositiveTone"
        ]  #  "WordCount"
        URL_Expander.write(df_company[display_cols])

        ####
        URL_Expander.write(f"#### Sample Articles")
        link_df = df_company[["DATE", "URL"]].head(3).copy()
        # link_df["URL"] = link_df["URL"].apply(lambda R: f"[{R}]({R})")
        link_df["ARTICLE"] = link_df.URL.apply(get_clickable_name)
        link_df = link_df[["DATE", "ARTICLE"]].to_markdown(index=False)
        URL_Expander.markdown(link_df)
        ####

        ###### CHART: METRIC OVER TIME ######
        st.markdown("---")
        col1, col2 = st.beta_columns((1, 3))

        metric_options = [
            "Tone", "NegativeTone", "PositiveTone", "Polarity",
            "ActivityDensity", "WordCount", "Overall Score", "ESG Scores"
        ]
        line_metric = col1.radio("Choose Metric", options=metric_options)

        if line_metric == "ESG Scores":
            # Get ESG scores
            esg_df["WHO"] = company.title()
            ind_esg_df["WHO"] = "Industry Average"
            esg_plot_df = pd.concat([esg_df,
                                     ind_esg_df]).reset_index(drop=True)
            esg_plot_df.replace(
                {
                    "E_score": "Environment",
                    "S_score": "Social",
                    "G_score": "Governance"
                },
                inplace=True)

            metric_chart = alt.Chart(
                esg_plot_df, title="Trends Over Time").mark_line().encode(
                    x=alt.X("yearmonthdate(DATE):O", title="DATE"),
                    y=alt.Y("Score:Q"),
                    color=alt.Color("ESG",
                                    sort=None,
                                    legend=alt.Legend(title=None,
                                                      orient="top")),
                    strokeDash=alt.StrokeDash("WHO",
                                              sort=None,
                                              legend=alt.Legend(
                                                  title=None,
                                                  symbolType="stroke",
                                                  symbolFillColor="gray",
                                                  symbolStrokeWidth=4,
                                                  orient="top")),
                    tooltip=[
                        "DATE", "ESG",
                        alt.Tooltip("Score", format=".5f")
                    ])

        else:
            if line_metric == "Overall Score":
                line_metric = "Score"
                tone_df["WHO"] = company.title()
                ind_tone_df["WHO"] = "Industry Average"
                plot_df = pd.concat([tone_df,
                                     ind_tone_df]).reset_index(drop=True)
            else:
                df1 = df_company.groupby(
                    "DATE")[line_metric].mean().reset_index()
                df2 = filter_on_date(
                    df_data.groupby("DATE")[line_metric].mean().reset_index(),
                    start, end)
                df1["WHO"] = company.title()
                df2["WHO"] = "Industry Average"
                plot_df = pd.concat([df1, df2]).reset_index(drop=True)
            metric_chart = alt.Chart(
                plot_df, title="Trends Over Time").mark_line().encode(
                    x=alt.X("yearmonthdate(DATE):O", title="DATE"),
                    y=alt.Y(f"{line_metric}:Q",
                            scale=alt.Scale(type="linear")),
                    color=alt.Color("WHO", legend=None),
                    strokeDash=alt.StrokeDash(
                        "WHO",
                        sort=None,
                        legend=alt.Legend(
                            title=None,
                            symbolType="stroke",
                            symbolFillColor="gray",
                            symbolStrokeWidth=4,
                            orient="top",
                        ),
                    ),
                    tooltip=["DATE",
                             alt.Tooltip(line_metric, format=".3f")])
        metric_chart = metric_chart.properties(height=340,
                                               width=200).interactive()
        col2.altair_chart(metric_chart, use_container_width=True)

        ###### CHART: ESG RADAR ######
        col1, col2 = st.beta_columns((1, 2))
        avg_esg = data["ESG"]
        avg_esg.rename(columns={"Unnamed: 0": "Type"}, inplace=True)
        avg_esg.replace(
            {
                "T": "Overall",
                "E": "Environment",
                "S": "Social",
                "G": "Governance"
            },
            inplace=True)
        avg_esg["Industry Average"] = avg_esg.mean(axis=1)

        radar_df = avg_esg[["Type", company,
                            "Industry Average"]].melt("Type",
                                                      value_name="score",
                                                      var_name="entity")

        radar = px.line_polar(radar_df,
                              r="score",
                              theta="Type",
                              color="entity",
                              line_close=True,
                              hover_name="Type",
                              hover_data={
                                  "Type": True,
                                  "entity": True,
                                  "score": ":.2f"
                              },
                              color_discrete_map={
                                  "Industry Average": fuchsia,
                                  company: violet
                              })
        radar.update_layout(
            template=None,
            polar={
                "radialaxis": {
                    "showticklabels": False,
                    "ticks": ""
                },
                "angularaxis": {
                    "showticklabels": False,
                    "ticks": ""
                },
            },
            legend={
                "title": None,
                "yanchor": "middle",
                "orientation": "h"
            },
            title={
                "text": "<b>ESG Scores</b>",
                "x": 0.5,
                "y": 0.8875,
                "xanchor": "center",
                "yanchor": "top",
                "font": {
                    "family": "Futura",
                    "size": 23
                }
            },
            margin={
                "l": 5,
                "r": 5,
                "t": 0,
                "b": 0
            },
        )
        radar.update_layout(showlegend=False)
        col1.plotly_chart(radar, use_container_width=True)

        ###### CHART: DOCUMENT TONE DISTRIBUTION #####
        # add overall average
        dist_chart = alt.Chart(
            df_company, title="Document Tone "
            "Distribution").transform_density(
                density='Tone',
                as_=["Tone",
                     "density"]).mark_area(opacity=0.5, color="purple").encode(
                         x=alt.X('Tone:Q', scale=alt.Scale(domain=(-10, 10))),
                         y='density:Q',
                         tooltip=[
                             alt.Tooltip("Tone", format=".3f"),
                             alt.Tooltip("density:Q", format=".4f")
                         ]).properties(height=325, ).configure_title(
                             dy=-20).interactive()
        col2.markdown("### <br>", unsafe_allow_html=True)
        col2.altair_chart(dist_chart, use_container_width=True)

        ###### CHART: SCATTER OF ARTICLES OVER TIME #####
        # st.markdown("---")
        scatter = alt.Chart(df_company,
                            title="Article Tone").mark_circle().encode(
                                x="NegativeTone:Q",
                                y="PositiveTone:Q",
                                size="WordCount:Q",
                                color=alt.Color("Polarity:Q",
                                                scale=alt.Scale()),
                                tooltip=[
                                    alt.Tooltip("Polarity", format=".3f"),
                                    alt.Tooltip("NegativeTone", format=".3f"),
                                    alt.Tooltip("PositiveTone", format=".3f"),
                                    alt.Tooltip("DATE"),
                                    alt.Tooltip("WordCount", format=",d"),
                                    alt.Tooltip("SourceCommonName",
                                                title="Site")
                                ]).properties(height=450).interactive()
        st.altair_chart(scatter, use_container_width=True)

        ###### NUMBER OF NEIGHBORS TO FIND #####
        neighbor_cols = [f"n{i}_rec" for i in range(num_neighbors)]
        company_df = df_conn[df_conn.company == company]
        neighbors = company_df[neighbor_cols].iloc[0]

        ###### CHART: 3D EMBEDDING WITH NEIGHBORS ######
        st.markdown("---")
        color_f = lambda f: f"Company: {company.title()}" if f == company else (
            "Connected Company" if f in neighbors.values else "Other Company")
        embeddings["colorCode"] = embeddings.company.apply(color_f)
        point_colors = {
            company: violet,
            "Connected Company": fuchsia,
            "Other Company": "lightgrey"
        }
        fig_3d = px.scatter_3d(
            embeddings,
            x="0",
            y="1",
            z="2",
            color='colorCode',
            color_discrete_map=point_colors,
            opacity=0.4,
            hover_name="company",
            hover_data={c: False
                        for c in embeddings.columns},
        )
        fig_3d.update_layout(
            legend={
                "orientation": "h",
                "yanchor": "bottom",
                "title": None
            },
            title={
                "text": "<b>Company Connections</b>",
                "x": 0.5,
                "y": 0.9,
                "xanchor": "center",
                "yanchor": "top",
                "font": {
                    "family": "Futura",
                    "size": 23
                }
            },
            scene={
                "xaxis": {
                    "visible": False
                },
                "yaxis": {
                    "visible": False
                },
                "zaxis": {
                    "visible": False
                }
            },
            margin={
                "l": 0,
                "r": 0,
                "t": 0,
                "b": 0
            },
        )
        st.plotly_chart(fig_3d, use_container_width=True)

        ###### CHART: NEIGHBOR SIMILIARITY ######
        st.markdown("---")
        neighbor_conf = pd.DataFrame({
            "Neighbor":
            neighbors,
            "Confidence":
            company_df[[f"n{i}_conf" for i in range(num_neighbors)]].values[0]
        })
        conf_plot = alt.Chart(
            neighbor_conf, title="Connected Companies").mark_bar().encode(
                x="Confidence:Q",
                y=alt.Y("Neighbor:N", sort="-x"),
                tooltip=["Neighbor",
                         alt.Tooltip("Confidence", format=".3f")],
                color=alt.Color(
                    "Confidence:Q", scale=alt.Scale(),
                    legend=None)).properties(height=25 * num_neighbors +
                                             100).configure_axis(grid=False)
        st.altair_chart(conf_plot, use_container_width=True)
Exemplo n.º 25
0
        x="col0",
        y="col1",
        labels={"col0": "dim 1", "col1": "dim 2"},
        animation_frame="t",
    )

    fig2d.show()

    # For Dimensionality Reduction to 3D
    reducer.setRds(query0=query0, query1=query1, dim=3)
    print(reducer.getRdsDf())
    fig3d = px.scatter_3d(
        reducer.getRdsDf(),
        x="col0",
        y="col1",
        z="col2",
        labels={"col0": "dim 1", "col1": "dim 2", "col2": "dim 3"},
        animation_frame="t",
        color="query0",
    )
    fig3d.update_traces(
        marker=dict(size=2),
    )

    fig3d.update_layout(
        title_text="post embedding",
        scene=dict(
            xaxis=dict(
                nticks=4,
                range=[-20, 20],
                # range=[min(embedder.em["col0"]), max(embedder.em["col0"])],
Exemplo n.º 26
0
    def _assemble_chart_object_from_filtered_df_and_chart_input_list(
        self,
        df: pd.DataFrame,
        chart_input_values_list: Tuple[Any],
        template: str = None,
    ) -> Any:
        """take a dataframe and a list of chart input values from the dash callback, produce a plotly figure

        1. create a dict with all the original (default) values and updated values (from the chart inputs)
        2. create and return the figure based on that data

        Args:
            df (pandas.DataFrame): dataframe we want to filter
            chart_input_values_list (Tuple[Any]): list of values we'll use to update the chart
            template (:obj: `str`, optional): layout template we want to use. Options include:
                ['default', 'turbo', 'turbo-dark']

        Returns:
            plotly.graph_objs._figure.Figure (plotly.express.bar, line, etc)

        Raises:
            ValueError if chart_input_values_list doesn't have the same length as self.chart_input_list
        """
        if len(chart_input_values_list) != len(self.chart_input_list):
            raise ValueError(
                '''chart_input_values_list ({}) and chart_input_list ({}) must have the same length'''
                .format(chart_input_values_list, self.chart_input_list))

        # 1
        figure_values_dict = dict(self._chart_input_string_default_value_dict)
        for index, chart_input_value in enumerate(chart_input_values_list):
            figure_values_dict[
                self.chart_input_list[index]] = chart_input_value

        # 2
        if figure_values_dict['output_type'] == 'scatter':
            return px.scatter(
                data_frame=df,
                x=figure_values_dict['x'],
                y=figure_values_dict['y'],
                color=figure_values_dict['color'],
                size=figure_values_dict['size'],
                hover_name=figure_values_dict['hover_name'],
                hover_data=figure_values_dict['hover_data'],
                template=self._template_lookup_dict[template]
                ['chart_template'],
            )

        if figure_values_dict['output_type'] == 'line':
            return px.line(
                data_frame=df,
                x=figure_values_dict['x'],
                y=figure_values_dict['y'],
                color=figure_values_dict['color'],
                hover_name=figure_values_dict['hover_name'],
                hover_data=figure_values_dict['hover_data'],
                template=self._template_lookup_dict[template]
                ['chart_template'],
            )

        if figure_values_dict['output_type'] == 'area':
            return px.area(
                data_frame=df,
                x=figure_values_dict['x'],
                y=figure_values_dict['y'],
                color=figure_values_dict['color'],
                hover_name=figure_values_dict['hover_name'],
                hover_data=figure_values_dict['hover_data'],
                template=self._template_lookup_dict[template]
                ['chart_template'],
            )

        if figure_values_dict['output_type'] == 'bar':
            return px.bar(
                data_frame=df,
                x=figure_values_dict['x'],
                y=figure_values_dict['y'],
                color=figure_values_dict['color'],
                hover_name=figure_values_dict['hover_name'],
                hover_data=figure_values_dict['hover_data'],
                template=self._template_lookup_dict[template]
                ['chart_template'],
            )

        if figure_values_dict['output_type'] == 'violin':
            return px.violin(
                data_frame=df,
                x=figure_values_dict['x'],
                y=figure_values_dict['y'],
                color=figure_values_dict['color'],
                hover_name=figure_values_dict['hover_name'],
                hover_data=figure_values_dict['hover_data'],
                points='all',
                template=self._template_lookup_dict[template]
                ['chart_template'],
            )

        if figure_values_dict['output_type'] == 'scatter_3d':
            return px.scatter_3d(
                data_frame=df,
                x=figure_values_dict['x'],
                y=figure_values_dict['y'],
                z=figure_values_dict['z'],
                color=figure_values_dict['color'],
                hover_name=figure_values_dict['hover_name'],
                hover_data=figure_values_dict['hover_data'],
                template=self._template_lookup_dict[template]
                ['chart_template'],
            )

        if figure_values_dict['output_type'] == 'scatter_geo':
            return px.scatter_geo(
                data_frame=df,
                locations=figure_values_dict['locations'],
                locationmode=figure_values_dict['locationmode'],
                projection=figure_values_dict['projection'],
                color=figure_values_dict['color'],
                size=figure_values_dict['size'],
                hover_name=figure_values_dict['hover_name'],
                hover_data=figure_values_dict['hover_data'],
                template=self._template_lookup_dict[template]
                ['chart_template'],
            )

        if figure_values_dict['output_type'] == 'choropleth':
            return px.choropleth(
                data_frame=df,
                locations=figure_values_dict['locations'],
                locationmode=figure_values_dict['locationmode'],
                projection=figure_values_dict['projection'],
                color=figure_values_dict['color'],
                hover_name=figure_values_dict['hover_name'],
                hover_data=figure_values_dict['hover_data'],
                template=self._template_lookup_dict[template]
                ['chart_template'],
            )

        # who are you? who who, who who
        else:
            raise ValueError(
                """I don't know what to do with a "{}" output_type. Please add it to {}."""
                .format(figure_values_dict['output_type'], __file__))
Exemplo n.º 27
0
def random():
    # Load data
    pcf = pipe.load_pcf(r'W:\Neurophysiology-Storage1\Wahl\Hendrik\PhD\Data\Batch3\M41\20200511')

    # Neurons as samples, position bins as features
    raw_data = pcf.bin_avg_activity
    pc_idx = [x[0] for x in pcf.place_cells]
    labels = np.zeros(len(raw_data))
    labels[pc_idx] = 1

    # Standardize (z-score) data
    data = raw_data-np.mean(raw_data, axis=0)/np.std(raw_data, axis=0)

    # perform PCA (input as shape (n_samples, n_features)
    score, evectors, evals = pca(data)

    # plot the eigenvalues
    plot_eigenvalues(evals, limit=False)

    # plot variance explained
    plot_variance_explained(np.cumsum(evals)/np.sum(evals), cutoff=0.95)

    # visualize weights of the n-th principal component
    n_comp = 1
    plt.figure()
    for i in range(n_comp):
        plt.plot(weights[i], label=f'Comp {i+1}', linewidth=2)
        for zone in pcf.params['zone_borders']:
            plt.axvspan(zone[0], zone[1], color='red', alpha=0.1)
    plt.legend()

    perform_PCA(data, labels, 2, plot=True)

    # built-in PCA
    pca_model = PCA(n_components=80)  # Initializes PCA
    out = pca_model.fit(data)  # Performs PCA
    scores = pca_model.transform(data)
    weights = pca_model.components_

    # Plot first three components
    df = pd.DataFrame(np.vstack((scores.T, labels)).T)
    df.rename(columns=str, inplace=True)
    df.rename(columns={'80': 'labels'}, inplace=True)
    pio.renderers.default = 'browser'
    fig = px.scatter_3d(df, x='0', y='1', z='2', color='labels')
    fig.show()

    def perform_PCA(data, labels, n_comp, plot=False):
        pca_model = PCA(n_components=80)  # Initializes PCA
        pca_model.fit(data)  # Performs PCA
        scores = pca_model.transform(data)
        nrows = 3
        ncols = 3
        if plot:
            fig, ax= plt.subplots(nrows, ncols)
            i = 0
            for row in range(nrows):
                for col in range(ncols):
                    ax[row, col].scatter(x=scores[:, i], y=scores[:, i+1], s=10, c=labels)
                    ax[row, col].set_xlabel(f'Component {i+1}')
                    ax[row, col].set_ylabel(f'Component {i+2}')
                    i += 1

    # Plot PCA component with overlaying histogram
    plot_pc_with_hist(-score, evectors, (0, 1), labels, pcf.params)


    # t-SNE
    fig, ax = plt.subplots(2, 3)
    perplexities = [5, 30, 50, 75, 100, 500]
    count = 0
    for row in range(2):
        for col in range(3):
            pca_mod = PCA(n_components=50)
            pca_results = pca_mod.fit_transform(data)
            tsne_mod = TSNE(n_components=2, perplexity=perplexities[count], n_iter=5000)
            embed = tsne_mod.fit_transform(pca_results)
            ax[row, col].scatter(x=embed[:, 0], y=embed[:, 1], c=labels)
            ax[row, col].set_xlabel('Component 1')
            ax[row, col].set_ylabel('Component 2')
            ax[row, col].set_title(f'Perplexity {perplexities[count]}')
            count += 1

    # 3D
    for perp in perplexities:
        tsne_mod = TSNE(n_components=3, perplexity=perp, n_iter=5000)
        embed = tsne_mod.fit_transform(data)
        df = pd.DataFrame(np.vstack((embed.T, labels)).T)
        df.rename(columns=str, inplace=True)
        df.rename(columns={'3': 'labels'}, inplace=True)
        pio.renderers.default = 'browser'
        fig = px.scatter_3d(df, x='0', y='1', z='2', color='labels')
        fig.show()
Exemplo n.º 28
0
)


# Building Scatter plot
x = []
y = []
z = []
for value in wordProjections:
    x.append(value[0])
    y.append(value[1])
    z.append(value[2])


scatter_plot = px.scatter_3d(
    wordProjections, x=0, y=1, z=2,
    color = labels_word2vec,
    hover_name = labels_word2vec,
    labels = {'color': ''}        
)

scatter_plot.update_layout(
    plot_bgcolor="black",
    paper_bgcolor="black",
    font_color="#666699",
    height=800,
)


# Building Histogram
histogram = px.histogram(   
    df,
    x = 'label',
Exemplo n.º 29
0
ar = scipy.interpolate.griddata(points=(dfs.x, dfs.y),
                                values=dfs.h_li,
                                xi=(xi, yi))

# %%
plt.imshow(ar, extent=(dfs.x.min(), dfs.x.max(), dfs.y.min(), dfs.y.max()))

# %%

# %%
import plotly.express as px

# %%
px.scatter_3d(data_frame=dfs,
              x="longitude",
              y="latitude",
              z="h_li",
              color="laser")

# %%

# %% [markdown]
# ### Play using XrViz
#
# Install the PyViz JupyterLab extension first using the [extension manager](https://jupyterlab.readthedocs.io/en/stable/user/extensions.html#using-the-extension-manager) or via the command below:
#
# ```bash
# jupyter labextension install @pyviz/[email protected] --no-build
# jupyter labextension list  # check to see that extension is installed
# jupyter lab build --debug  # build extension ??? with debug messages printed
# ```
Exemplo n.º 30
0
plot(
    fig,
    filename="tsne_plot.html",
    auto_open=False,
    config={
        "scrollZoom": True,
        "modeBarButtonsToRemove": ["lasso2d", "zoom2d"]
    },
)

#%% Plotly 3D tsne
fig = px.scatter_3d(
    data_frame=df_subset.dropna(),
    hover_data=["subcats", "mid"],
    size="dy",
    x="tsne1",
    y="tsne2",
    z="tsne3",
    color_discrete_sequence=bright,
    color="cattext",
)
fig.update_traces(marker=dict(size=2.3, opacity=1.0,
                              line=dict(width=0)))  # size=2.3
plot(fig, filename="tsne_plot.html", auto_open=False)

#%% Save tsne vectors
df_sl_cols_keep = [
    "mid",
    "tsne1",
    "tsne2",
    "cat",
    "dx",