Пример #1
0
def sdm_curve(BFWDF, score):
    BFWDF['score'] = BFWDF[score]

    fig = go.Figure()

    fig.add_trace(
        go.Violin(x=BFWDF['subgroup'][BFWDF['Tag'] == 'Imposter'],
                  y=BFWDF['score'][BFWDF['Tag'] == 'Imposter'],
                  legendgroup='Imposter',
                  scalegroup='Imposter',
                  name='Imposter',
                  side='negative',
                  line_color='blue'))
    fig.add_trace(
        go.Violin(x=BFWDF['subgroup'][BFWDF['Tag'] == 'Genuine'],
                  y=BFWDF['score'][BFWDF['Tag'] == 'Genuine'],
                  legendgroup='Genuine',
                  scalegroup='Genuine',
                  name='Genuine',
                  side='positive',
                  line_color='red'))

    fig.update_traces(meanline_visible=True)
    fig.update_layout(violinmode='overlay',
                      title="SDM Curve by Subgroup",
                      xaxis_title="subgroup",
                      yaxis_title="score")
    fig.update_xaxes(categoryorder="category ascending")

    fig.update_layout(legend_title='')

    return fig
Пример #2
0
def index_dist_graph(col_name):
    hose_x = hose[col_name]
    hnx_x = hnx[col_name]
    fig = go.Figure()
    fig.add_trace(
        go.Violin(x=hnx.index.year,
                  y=hnx_x,
                  legendgroup='HNX',
                  scalegroup='HNX',
                  name='HNX-INDEX',
                  side='positive',
                  line_color=HNX_COLOR,
                  opacity=0.6))
    fig.add_trace(
        go.Violin(x=hose.index.year,
                  y=hose_x,
                  legendgroup='HOSE',
                  scalegroup='HOSE',
                  name='VN-INDEX',
                  side='positive',
                  line_color=HOSE_COLOR,
                  opacity=0.6))

    fig.update_traces(meanline_visible=True, width=3)
    fig.update_layout(violingap=0,
                      violinmode='overlay',
                      width=1000,
                      title="Distribution of Daily Percentage Change by Index")

    return fig
Пример #3
0
def generate_fig(threshold, df, col, cutoff=None):
    df_review_vals = df['review'].unique().tolist()
    fig = go.Figure()
    if not 'positive' in df_review_vals:
        fig.add_trace(go.Violin(y=df[col], x=[1] * len(df), box_visible=True))
    else:
        fig.add_trace(
            go.Violin(y=df.loc[~(df['review'] == 'positive'), col],
                      x=[1] * len(df),
                      side='negative',
                      line_color='blue',
                      pointpos=-1.5))
        fig.add_trace(
            go.Violin(y=df.loc[df['review'] == 'positive', col],
                      x=[1] * len(df),
                      side='positive',
                      line_color='orange',
                      pointpos=1.5))
    fig.update_traces(points='all')
    fig.add_trace(
        go.Scatter(y=[threshold] * 3,
                   mode='lines',
                   line_color='mediumseagreen'))
    if cutoff is not None:
        fig.add_trace(
            go.Scatter(y=[cutoff] * 3, mode='lines', line_color='indianred'))
    fig.update_layout(margin=dict(l=0, r=0, t=0, b=0),
                      showlegend=False,
                      font=dict(size=14))
    fig.update_xaxes(showticklabels=False)
    fig.update_yaxes(range=[-0.1, 1.1])
    return fig
Пример #4
0
def update_graph(n_clicks, big_occupations, small_occupations, big_industries,
                 small_industries, technics):
    if n_clicks == 0:
        raise PreventUpdate
    else:
        fig = go.Figure()

        filted_min_df = gen_filtered_min_df(big_occupations, small_occupations,
                                            big_industries, small_industries,
                                            technics)
        filted_max_df = gen_filtered_max_df(big_occupations, small_occupations,
                                            big_industries, small_industries,
                                            technics)

        # グラフ描画
        fig.add_trace(
            go.Violin(x=filted_min_df["IPO"],
                      y=filted_min_df['min_salary'],
                      legendgroup='min',
                      scalegroup='min',
                      name='各最低給与の分布',
                      line_color='blue'))

        fig.add_trace(
            go.Violin(x=filted_max_df["IPO"],
                      y=filted_max_df['max_salary'],
                      legendgroup='max',
                      scalegroup='max',
                      name='各最大給与の分布',
                      line_color='orange'))

        fig.update_traces(meanline_visible=True, jitter=0.05)
        fig.update_layout(violingap=0, violinmode='group')

        return fig
Пример #5
0
def reg_violins():
    for grouping, title in (
        ('unpaired',
         'ΔΔG landscape of Gβ2 (GNB2) bound to Gγ1 (GNG1) subunit only'),
        ('wAlpha', 'ΔΔG landscape of Gβ2 bound to γ1 and αi (GNAI) subunits'),
        ('wGRK2',
         'ΔΔG landscape of Gβ2 bound to γ1 and β-adrenergic receptor kinase 1 (GRK2))'
         )):
        df = read_data(grouping)
        df = df.assign(ddG_limited=df.ddG.apply(
            lambda v: min(abs(v), 50) * abs(v) / (v + 0.0001)))

        fig = go.Figure()
        for groupname, group in (('gnomAD', gnomad), ('Pathogenic',
                                                      pathogenic),
                                 ('Clinvar_homologues', clinvar)):
            fig.add_trace(
                go.Violin(y=df.loc[df.mutation.isin(group)].ddG_limited,
                          name=groupname,
                          box_visible=True,
                          meanline_visible=True))

        fig.add_trace(
            go.Violin(y=df.ddG_limited,
                      name='sequence-space',
                      box_visible=True,
                      meanline_visible=True))

        fig.update_layout(title_text=title, yaxis={'range': [-10, 51]})
        fig.write_image('violin_' + grouping + '.png', scale=3)
        fig.show()
Пример #6
0
    def plot_violin_of(self, feature: str, np_hist: tuple):
        _, bins = np_hist
        cur_feature_data = self._features_data[feature]
        fig = go.Figure()

        fig.add_trace(
            go.Violin(
                y=cur_feature_data,
                x0=0,
                name="global",
                box_visible=True,
                meanline_visible=True,
            ))
        prev_bin = 0.
        for bin_idx, cur_bin in enumerate(bins, 1):
            if bin_idx != bins.shape[0]:
                cur_data = cur_feature_data[(cur_feature_data > prev_bin)
                                            & (cur_feature_data <= cur_bin)]
            else:
                cur_data = cur_feature_data[(cur_feature_data > prev_bin)]
            fig.add_trace(
                go.Violin(
                    y=cur_data,
                    x0=bin_idx,
                    name=str(cur_bin) if bin_idx != bins.shape[0] else 'max',
                    box_visible=True,
                    meanline_visible=True,
                    # points="all",
                ))
            prev_bin = cur_bin
        fig.update_layout(_LAYOUT)
        fig.update_layout({
            'title': f"Feature {feature}",
            'xaxis': {
                'tickmode': 'array',
                'tickvals': list(range(len(bins) + 1)),
                'ticktext': ['global'] + [str(cur_bin) for cur_bin in bins]
            }
        })
        # fig.show()
        # print(f"{STATUS_ARROW}Save violin plot of {feature} as pnh")
        # fig.write_image(
        #     self._output_folder.joinpath(
        #         f"feature_{feature}_violin.png"
        #     ).as_posix()
        # )
        print(f"{STATUS_ARROW}Save violin plot of {feature} as html")
        fig.write_html(
            self._output_folder.joinpath(
                f"feature_{feature}_violin.html").as_posix())
def violin_compare(results: pd.DataFrame,
                   *,
                   x: str,
                   series: str,
                   y: str = 'score',
                   bandwidth: float = 0.025,
                   x_title: Optional[str] = None,
                   y_title: Optional[str] = None,
                   x_range: Optional[Tuple[float, float]] = None,
                   y_range: Optional[Tuple[float, float]] = None,
                   title: Optional[str] = None) -> go.Figure:
    data = __preprocess_distribution(results, x, y, series)

    if len(data.columns) != 2:
        raise ValueError(f"The input data is not suitable for violin_compare: The series dimension '{series}'"
                         f" should have 2 distinct values, but has {len(data.columns)}")

    c1 = data.columns[0]
    c2 = data.columns[1]

    traces = [
        go.Violin(x=data.index,
                  y=data[c1],
                  box=vl.Box(visible=False),
                  marker=vl.Marker(color=COLORS[0]),
                  meanline=vl.Meanline(visible=True),
                  bandwidth=bandwidth,
                  name=c1.capitalize(),
                  side='negative'),
        go.Violin(x=data.index,
                  y=data[c2],
                  box=vl.Box(visible=False),
                  marker=vl.Marker(color=COLORS[1]),
                  meanline=vl.Meanline(visible=True),
                  bandwidth=bandwidth,
                  name=c2.capitalize(),
                  side='positive')
    ]

    return __create_figure(traces,
                           title,
                           series,
                           x_title,
                           x,
                           x_range,
                           y_title,
                           y,
                           y_range,
                           violingap=0,
                           violinmode='overlay')
Пример #8
0
def generate_violin_plot(df, column):
    fig = go.Figure()
    values = df[column].unique()
    for value in values:
        fig.add_trace(
            go.Violin(x=df[column][df[column] == value],
                      y=df['Total'][df[column] == value],
                      name=value,
                      box_visible=True,
                      meanline_visible=True))
    fig.update_layout(  #paper_bgcolor="#F8F9F9",
        title=column,
        height=220,
        width=500,
        autosize=True,
        margin={
            "r": 0,
            "t": 50,
            "l": 0,
            "b": 0
        },
        showlegend=False,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)').update_yaxes(automargin=True)
    return dcc.Graph(figure=fig, id='violin_plot')
Пример #9
0
    def log_val_cosine(self, distances, step):

        title_text = 'Cosine Distance Validation'
        fig = go.Figure()

        violin_plot = lambda ys, name: go.Violin(
            y=ys,
            box_visible=True,
            meanline_visible=True,
            spanmode='hard',
            name=name,
        )

        for name, distance in distances.items():
            fig.add_trace(violin_plot(distance, name))

        # TODO: Visdom doesn't work with title in layout
        fig.update_layout(shapes=[
            # Line Horizontal
            go.layout.Shape(
                type="line",
                x0=-0.5,
                y0=0.5,
                x1=2.5,
                y1=0.5,
                line=dict(
                    width=2,
                    dash="dot",
                ),
            ),
        ])

        self.viz.plotlyplot(fig, win=title_text)
Пример #10
0
def violinplot(df,
               title='Violin-Plot',
               out_path=None,
               layout_width=None,
               layout_height=None,
               numeric_only=True):
    data = []

    columns = [
        k for k, v in df.dtypes.items() if 'float' in str(v) or 'int' in str(v)
    ] if numeric_only else df.columns
    for column in columns:
        data.append(
            go.Violin(
                y=df[column],
                box_visible=True,
                meanline_visible=True,
                points='outliers'  # all, outliers, suspectedoutliers
            ))

    max_col = 2
    subplot_titles = [f'{x.lower()}' for x in columns]
    plot_subplots(data,
                  max_col,
                  title,
                  subplot_titles=subplot_titles,
                  out_path=out_path,
                  layout_width=layout_width,
                  layout_height=layout_height)
Пример #11
0
def generate_distribution_violin(data_path: str, num_ranks: int,
                                 timesteps: int, bw_value: float):
    num_ranks = 2
    vpic_reader = VPICReader(data_path, num_ranks=num_ranks)
    fig = go.Figure()

    for tsidx in range(0, timesteps):
        data = vpic_reader.read_global(tsidx)
        print(len(data))
        # plotted_data = np.random.choice(data, 50000)
        plotted_data = np.random.choice(data, 500)

        head_cutoff = 0.5
        head_cutoff_2 = 4
        tail_cutoff = 10

        head_data = len([i for i in plotted_data if i < head_cutoff])
        head_data_2 = len([i for i in plotted_data if i < head_cutoff_2])
        tail_data = len([i for i in plotted_data if i > tail_cutoff])

        percent_head = head_data * 100.0 / len(plotted_data)
        percent_head_2 = head_data_2 * 100.0 / len(plotted_data)
        percent_tail = tail_data * 100.0 / len(plotted_data)

        print('TS {0}, < {1}: {2:.2f}'.format(tsidx, head_cutoff,
                                              percent_head))
        print('TS {0}, < {1}: {2:.2f}'.format(tsidx, head_cutoff_2,
                                              percent_head_2))
        print('TS {0}, > {1}: {2:.2f}'.format(tsidx, tail_cutoff,
                                              percent_tail))

        plotted_data = list(map(lambda x: log_tailed(x, 10), plotted_data))

        ts_name = 'Timestep {0}'.format(vpic_reader.get_ts(tsidx), )

        violin_data = go.Violin(y=plotted_data,
                                box_visible=False,
                                meanline_visible=False,
                                name=ts_name,
                                side='positive',
                                points=False,
                                bandwidth=bw_value,
                                scalemode='width',
                                line=dict(width=1))

        fig.add_trace(violin_data)

    fig.update_traces(width=1.8)
    fig.update_layout(
        title_text='Energy distribution from 4 timesteps of a VPIC simulation'
        ' (tail is logarithmic)',
        yaxis=dict(tickmode='array',
                   tickvals=list(range(0, 18, 2)),
                   ticktext=[
                       '{0:.0f}'.format(log_tailed_reverse(x, 10))
                       for x in range(0, 18, 2)
                   ]),
    )

    fig.show()
def plot_summary_stat(stat_conds, stat_name="stat"):
    fig = go.Figure()
    maximum = 0
    for condition in stat_conds:
        fig.add_trace(go.Violin(y=stat_conds[condition],
                            name=condition,
                            points='all',
                            box_visible=True,
                            meanline_visible=True))
        this_max = max(stat_conds[condition])
        if this_max > maximum:
            maximum = this_max

    fig.update_layout(
        autosize=False,
        width=800,
        height=600,
        showlegend=False,
        plot_bgcolor='rgba(0,0,0,0)',
        xaxis=go.layout.XAxis(
            title_text="Condition",
        ),
        yaxis=go.layout.YAxis(
            title_text=stat_name,
            range=[0,maximum],
            ticks="outside",
            gridcolor='rgba(0,0,0,.1)'
        )
    )
    fig.show()
    if not os.path.exists(analysis_plot_dir):
        os.mkdir(analysis_plot_dir)
    fig.write_image(analysis_plot_dir+stat_name+".png")
Пример #13
0
def get_ridgeplot_fig(df, distance='total_distance', nvals='all'):

    clubs, xmin, xmax = utils.get_clubs(df)
    colors = n_colors('rgb(242, 139, 0)', 'rgb(206, 0, 0)', 12, colortype='rgb')
    fig = go.Figure()
    for club, color in zip(clubs, colors):
        name = utils.club_enum[club]
        array = df.groupby('club').get_group(club)[distance].values
        data = utils.get_values(array, nvals)
        fig.add_trace(go.Violin(x=data, name=name, line_color=color))

    fig.update_traces(
        orientation='h',
        side='positive',
        width=3,
        points=False,
    )

    fig.update_layout(
        margin=dict(t=30, r=10, b=10, l=10),
        xaxis_showgrid=True,
        xaxis_zeroline=False,
        showlegend=False,
        xaxis=dict(
            range=[xmin-10, xmax+20],
            tickmode='linear',
            tick0=0,
            dtick=10,
        )
    )

    return fig
Пример #14
0
    def boxplot(self, points="outliers"):
        """Points can be all, outliers, suspectedoutliers"""
        df_sh = self.splits_filtered
        time_scale = 60
        fig = go.Figure()
        pointpos = [-0.9, -1.1, -0.6, -0.3]
        for split in df_sh.split_id.unique():
            split, split_name = self.get_split(split)
            ddf = df_sh.loc[df_sh.split_id == split, :]
            fig.add_trace(
                go.Violin(y=np.array(ddf['split_duration']) / time_scale,
                          name=f'{split} - {split_name}',
                          pointpos=0
                          #name='kale',
                          #        marker_color='#3D9970'
                          ))

        fig.update_traces(
            meanline_visible=True,
            points=points,  # show all points
            jitter=0.5,  # add some jitter on points for better visibility
            scalemode='count')  #scale violin plot area with total count
        fig.update_layout(title_text="Split times distribution",
                          violingap=0,
                          violingroupgap=0,
                          violinmode='overlay')
        fig.show()
        return fig
Пример #15
0
def graficar_violin_plot(x, y, df, titulo, y_label):

    fig = go.Figure()
    niveles = df[x].unique()
    for nivel in niveles:
        xaxis = df[x][df[x] == nivel]
        yaxis = df[y][df[x] == nivel]
        fig.add_trace(
            go.Violin(
                x=xaxis,  #keep cereal type at x axis
                y=yaxis,  #keep carbohydrates type at y axis
                name=nivel,  #name of each category
                box_visible=
                True,  #if you want to show box plot within the violin
                meanline_visible=True,  #if meanline of boxplot should be visible
                points="all"  #plot all the points of distribution
            ))
    fig.update_layout(
        title={
            'text': titulo,
            'y': 0.9,
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        yaxis_title=y_label,  #set y axis
    )
    fig.show()
Пример #16
0
def generate_violin_plot(df, col, filename):
    group_labels = pd.unique(df[col])
    # group_labels = group_labels.astype(str)
    # print(group_labels)
    fig_1 = go.Figure()
    for g in group_labels:
        y_t = df["target"][df[col] == g]
        x_t = df[col][df[col] == g]

        fig_1.add_trace(
            go.Violin(
                y=y_t,
                x=x_t,
                name=g.astype(str),
                box_visible=True,
                meanline_visible=True,
                # points=False
            )
        )

    fig_1.update_layout(
        title="Continuous Response by Categorical Predictor",
        xaxis_title="Groupings",
        yaxis_title="Response",
    )
    # fig_1.show()
    fig_1.write_html(
        file=filename,
        include_plotlyjs="cdn",
    )
Пример #17
0
def ridge_line(df_act, t_range='day', n=1000):
    """
    https://plotly.com/python/violin/

    for one day plot the activity distribution over the day
    - sample uniform from each interval   
    """
    df = activities_dist(df_act.copy(), t_range, n)

    colors = n_colors('rgb(5, 200, 200)',
                      'rgb(200, 10, 10)',
                      len(df.columns),
                      colortype='rgb')
    data = df.values.T

    fig = go.Figure()
    i = 0
    for data_line, color in zip(data, colors):
        fig.add_trace(
            go.Violin(x=data_line, line_color=color, name=df.columns[i]))
        i += 1

    fig.update_traces(orientation='h', side='positive', width=3, points=False)
    fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
    return fig
Пример #18
0
def create_violinplot(score_type, lower_score, upper_score, title, figname):
	layout = go.Layout(yaxis=dict(range=[0, 6000]))
	fig = go.Figure(layout = layout)
	fig.add_trace(go.Violin(y = scored_df.shares[scored_df[score_type] < lower_score], 
		name = 'Scores < ' + str(lower_score) + '<br>n = ' + str(scored_df.shares[scored_df[score_type] < lower_score].count())))
	fig.add_trace(go.Violin(y = scored_df.shares[scored_df[score_type].between(lower_score, upper_score)],
		name = str(lower_score) + ' ≤ Scores ≤ ' + str(upper_score) + '<br>n = ' 
		+ str(scored_df.shares[scored_df[score_type].between(lower_score, upper_score)].count())))
	fig.add_trace(go.Violin(y = scored_df.shares[scored_df[score_type] > upper_score], 
		name = 'Scores >' + str(upper_score) + '<br>n = ' + str(scored_df.shares[scored_df[score_type] > upper_score].count())))
	fig.update_layout(
		title = title,
		title_x = 0.5,
		yaxis_title = 'Number of Shares',
		showlegend = True
	)
	fig.show()
Пример #19
0
def plot_hist(plot_df,
              cols=[
                  "poll_point", "total_contrib", "contrib_count",
                  "contrib_per_person", "contrib_diff", "contrib_perc_change",
                  "contrib_per_person_diff", "contrib_per_person_perc_change"
              ],
              height=1600,
              width=1400,
              title="Contributions by State",
              out_file=None):

    fig = make_subplots(rows=len(cols), cols=1, subplot_titles=cols)
    for i, col in enumerate(cols):

        fig.add_trace(go.Violin(x=plot_df['state'][plot_df['name'] == 'Biden'],
                                y=plot_df[col][plot_df['name'] == 'Biden'],
                                legendgroup='Biden',
                                scalegroup='Biden',
                                name='Biden',
                                side='negative',
                                line_color='#002868',
                                width=1),
                      row=i + 1,
                      col=1)
        fig.add_trace(go.Violin(x=plot_df['state'][plot_df['name'] == 'Trump'],
                                y=plot_df[col][plot_df['name'] == 'Trump'],
                                legendgroup='Trump',
                                scalegroup='Trump',
                                name='Trump',
                                side='positive',
                                line_color="#BF0A30",
                                width=1),
                      row=i + 1,
                      col=1)
        # fig = px.violin(plot_df, x = "state", y = "total_contrib", color = "name"
        #                 # , box = True
        #                 )
    fig.update_traces(meanline_visible=True)
    fig.update_layout(height=height,
                      width=width,
                      title_text=title,
                      violingap=0,
                      violinmode='overlay')
    if out_file is not None:
        fig.write_html(out_file)
    return fig
Пример #20
0
def plot_occ_violin(
        merged_df: pd.DataFrame,
        height=1600,
        width=1400,
        # title = "National Contribution by Occupation",
        out_dir=None):
    """Quick violin plots per occupation and average donation.

    Args:
        merged_df (pd.DataFrame): contains output of get_merged_df
    """
    # Look at retired people nationally.
    merged_df = merged_df[[
        "state", "date", "transaction_amt", "name", "occupation"
    ]]
    top_10_jobs = list(
        merged_df.groupby("occupation")["occupation"].count().sort_values(
            ascending=False)[:10].index)
    merged_df = merged_df.query("occupation.isin(@top_10_jobs)")

    # fig = make_subplots(rows = len(top_10_jobs), cols = 1, subplot_titles = top_10_jobs)
    for i, job in enumerate(top_10_jobs):
        job_df = merged_df.query("occupation == @job & transaction_amt > 0")
        # Add both sides of the violin plot.
        fig = go.Figure()

        for cand in ["Biden", "Trump"]:
            fig.add_trace(
                go.Violin(
                    x=job_df['state'][job_df['name'] == cand],
                    y=job_df["transaction_amt"][job_df['name'] == cand],
                    # legendgroup=cand,
                    #     scalegroup=cand,
                    name=cand,
                    side='negative' if cand == "Biden" else "positive",
                    line_color='#002868' if cand == "Biden" else "#BF0A30",
                    width=1))
        fig.update_traces(meanline_visible=True)
        fig.data[0].update(
            span=[job_df.transaction_amt.min(),
                  job_df.transaction_amt.max()],
            spanmode='manual')
        fig.data[1].update(
            span=[job_df.transaction_amt.min(),
                  job_df.transaction_amt.max()],
            spanmode='manual')

        fig.update_layout(height=height,
                          width=width,
                          title_text=f"National Contribution by {job.lower()}",
                          violingap=0,
                          violinmode='overlay')

        if out_dir is not None:
            fig.write_html(
                os.path.join(out_dir, f"contrib_by_{job.lower()}.html"))

    return fig
Пример #21
0
def plot_density(fobject,
                 other_objects,
                 title):
    data = pd.DataFrame(columns=['y', 'y_hat', 'subgroup', 'model'])
    objects = [fobject]
    if other_objects is not None:
        for other_obj in other_objects:
            objects.append(other_obj)
    for obj in objects:
        for subgroup in np.unique(obj.protected):
            y, y_hat = obj.y[obj.protected == subgroup], obj.y_hat[obj.protected == subgroup]
            data_to_append = pd.DataFrame({'y': y,
                                           'y_hat': y_hat,
                                           'subgroup': np.repeat(subgroup, len(y)),
                                           'model': np.repeat(obj.label, len(y))})
            data = data.append(data_to_append)

    fig = go.Figure()

    counter = 0
    for model in data.model.unique():
        for i, sub in enumerate(data.subgroup.unique()):
            counter += 1
            fig.add_trace(
                go.Violin(
                    box_visible=True,
                    x=data.loc[(data.subgroup == sub) & (data.model == model)].y_hat,
                    y0=sub + model,
                    name=sub,
                    fillcolor=_theme.get_default_colors(len(data.subgroup.unique()), type='line')[i],
                    opacity=0.9,
                    line_color='black'
                )
            )

    violins_in_model = int(counter / len(data.model.unique()))
    starter_violins = np.arange(0, counter, violins_in_model)

    fig.update_xaxes(title='prediction')
    fig.update_yaxes(title='model', tickvals=list((starter_violins + (violins_in_model - 1) / 2)),
                     ticktext=list(data.model.unique()))

    # hide doubling entries in legend
    legend_entries = set()
    for trace in fig['data']:
        legend_entries.add(trace['name'])

    for trace in fig['data']:
        if trace['name'] in legend_entries:
            legend_entries.remove(trace['name'])
        else:
            trace['showlegend'] = False

    if title is None:
        title = "Density plot"
    fig.update_layout(utils._fairness_theme(title))

    return fig
Пример #22
0
def violin_plot():
    """Get data for plot, return plot

    Adds plotly.graph_objects charts for violin plot at initial loading page

    Returns:
        (dcc.Graph)
    """
    np.random.seed(1)
    points = (np.linspace(1, 2, 12)[:, None] * np.random.randn(12, 200) +
              (np.arange(12) + 2 * np.random.random(12))[:, None])
    points2 = np.array([
        np.concatenate((point, [points.min(), points.max()]))
        for point in points
    ])
    colors = n_colors('rgb(32, 32, 41)',
                      'rgb(190, 155, 137)',
                      12,
                      colortype='rgb')
    data = []
    for data_line, color in zip(points2, colors):
        trace = go.Violin(x=data_line,
                          line_color=color,
                          side='positive',
                          width=3,
                          points=False,
                          hoverinfo='skip')
        data.append(trace)
    layout = dict(title='u t i l s . p y',
                  xaxis={
                      'showgrid': False,
                      'zeroline': False,
                      'visible': False,
                      'fixedrange': True,
                  },
                  yaxis={
                      'showgrid': False,
                      'zeroline': False,
                      'visible': False,
                      'fixedrange': True,
                  },
                  showlegend=False,
                  margin=dict(l=0, r=0, t=80, b=0))
    return dcc.Graph(figure=dict(data=data, layout=layout),
                     id='violin-plot',
                     config={
                         'modeBarButtonsToRemove': [
                             'zoom2d', 'pan2d', 'select2d', 'lasso2d',
                             'zoomIn2d', 'zoomOut2d', 'autoScale2d',
                             'resetScale2d', 'toggleSpikelines',
                             'hoverClosestCartesian', 'hoverCompareCartesian'
                         ],
                     },
                     style={
                         'margin-top': '15vh',
                         'height': '60vh'
                     })
Пример #23
0
def _get_violin_traces(
    col: str,
    filtered_data: pd.DataFrame,
    colour_axes_name: str,
    x_axes_name: str,
    x_encoding_dict: dict,
    colour_encoding_dict: dict,
):
    traces = []
    if (not colour_axes_name) and (not x_axes_name):
        traces.append(
            go.Violin(y=filtered_data[col], name=col, line_color="black"))
    elif x_axes_name:
        if colour_axes_name:
            for label in colour_encoding_dict:
                traces.append(
                    go.Violin(
                        x=filtered_data[x_axes_name][(
                            filtered_data[colour_axes_name] == label
                        )].replace(x_encoding_dict),
                        y=filtered_data[col][filtered_data[colour_axes_name] ==
                                             label],
                        name=colour_encoding_dict[label],
                        legendgroup=colour_encoding_dict[label],
                        scalegroup=colour_encoding_dict[label],
                    ))
        else:
            traces.append(
                go.Violin(
                    x=filtered_data[x_axes_name].replace(x_encoding_dict),
                    y=filtered_data[col],
                ))
    else:
        for label in colour_encoding_dict:
            traces.append(
                go.Violin(
                    x=filtered_data[colour_axes_name][
                        filtered_data[colour_axes_name] == label].replace(
                            colour_encoding_dict),
                    y=filtered_data[col][filtered_data[colour_axes_name] ==
                                         label],
                    name=colour_encoding_dict[label],
                ))
    return traces
def plot_ail_dstribution_by_year(data):
    demand_dist_by_year = go.Figure()
    for y in range(data.year.min(), data.year.max() + 1):
        demand_dist_by_year.add_trace(
            go.Violin(y=data[data["year"] == y]["AIL_DEMAND"],
                      name=y,
                      box_visible=True,
                      meanline_visible=True))
    demand_dist_by_year.update_layout(title_text="Distribution of AIL by Year")
    return demand_dist_by_year
Пример #25
0
def box_plot(df, cols):
    fig = go.Figure()
    for col in cols:
        fig.add_trace(
            go.Violin(y=df[col],
                      name=col,
                      box_visible=True,
                      meanline_visible=True))
    fig.update_layout(title='Box and Violin Plots')
    return fig
Пример #26
0
def plot_quantile_returns_violin(return_by_q, ylim_percentiles=None):
    """
    Plots a violin box plot of period wise returns for factor quantiles.

    Parameters
    ----------
    return_by_q : pd.DataFrame - MultiIndex
        DataFrame with date and quantile as rows MultiIndex,
        forward return windows as columns, returns as values.
    ylim_percentiles : tuple of integers
        Percentiles of observed data to use as y limits for plot.
    """

    return_by_q = return_by_q.copy()

    if ylim_percentiles is not None:
        ymin = (np.nanpercentile(return_by_q.values, ylim_percentiles[0]) *
                DECIMAL_TO_BPS)
        ymax = (np.nanpercentile(return_by_q.values, ylim_percentiles[1]) *
                DECIMAL_TO_BPS)
    else:
        ymin = None
        ymax = None

    gf = make_subplots(rows=1,
                       cols=1,
                       x_title='分位数',
                       y_title='收益率(基点)',
                       shared_xaxes=True)

    unstacked_dr = return_by_q.multiply(DECIMAL_TO_BPS)
    unstacked_dr.columns = unstacked_dr.columns.set_names('forward_periods')
    unstacked_dr = unstacked_dr.stack()
    unstacked_dr.name = 'return'
    unstacked_dr = unstacked_dr.reset_index()

    groups = unstacked_dr['forward_periods'].unique()
    for name in groups:
        gf.add_trace(
            go.Violin(x=unstacked_dr['factor_quantile'][
                unstacked_dr['forward_periods'] == name],
                      y=unstacked_dr['return'][unstacked_dr['forward_periods']
                                               == name],
                      box_visible=False,
                      legendgroup=name,
                      scalegroup=name,
                      name=name))

    gf.update_traces(meanline_visible=True)
    gf.update_layout(violinmode='group')
    gf.update_layout(title_text="因子分位数期间收益率")
    gf.update_yaxes(range=[ymin, ymax])
    gf.update_layout(
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
    gf.show()
Пример #27
0
    def create_violin_plot(df, selected_columns):
        """ Violin plot for all columns in a data frame. """
        fig = go.Figure()
        if len(selected_columns) == 0:
            return fig
        else:
            for col in selected_columns:
                fig.add_trace(go.Violin(y=df[col],
                                        name=col))

        return fig
Пример #28
0
def q3Plotter(productCD, width=800, height=300):
  enable_plotly_in_cell()
  fig = make_subplots(rows=1, cols=2, subplot_titles=['Histogram', 'Violin Plot'], horizontal_spacing=0.1)
  fig.append_trace(go.Histogram(x = mergeData[mergeData.ProductCD==productCD].TransactionAmt), row=1, col=1)
  fig.append_trace(go.Violin(x = mergeData[mergeData.ProductCD==productCD].TransactionAmt,  box_visible=True), row=1, col=2)
  #trace = go.Histogram(x = mergeData[mergeData.ProductCD==i].TransactionAmt)
  #data = [trace]
  #layout = go.Layout(title=go.layout.Title(text='Distribution of TransactionAmt for ProductCD: '+i),
  #                  xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text="$")))
  #fig = go.Figure(data=data, layout=layout)
  fig.update_layout(height = height, width = width, showlegend=False, title_text="Distribution of TransactionAmt for ProductCD: "+productCD)
  iplot(fig)
Пример #29
0
def generate_violin_plot(df_complex, df_random, colors):
    violin_plot = go.Figure()

    violin_plot.add_trace(
        go.Violin(x=df_complex['language'],
                  y=df_complex['cyclomatic_complexity'],
                  legendgroup='Top',
                  scalegroup='Top',
                  name='Top',
                  line_color='blue',
                  points=False,
                  spanmode="hard"))
    violin_plot.add_trace(
        go.Violin(x=df_random['language'],
                  y=df_random['cyclomatic_complexity'],
                  legendgroup='Random',
                  scalegroup='Random',
                  name='Random',
                  line_color='orange',
                  points=False,
                  spanmode="hard"))

    violin_plot.update_traces(box_visible=True)
    violin_plot.update_yaxes(type='log')
    violin_plot.update_layout(violinmode='group',
                              margin={
                                  'l': 40,
                                  'b': 40,
                                  't': 10,
                                  'r': 10
                              })
    violin_plot.update_xaxes(title='Languages')
    violin_plot.update_yaxes(title='Cyclomatic Complexity')
    violin_plot.update_layout(plot_bgcolor=colors['background'],
                              paper_bgcolor=colors['background'],
                              font_color=colors['text'])

    violin_plot.update_xaxes(showgrid=False)
    violin_plot.update_yaxes(showgrid=False)
    return violin_plot
Пример #30
0
    def plot_cont_resp_cat_pred(self, feat, y, y_name, **kwargs):
        n = 200

        # add noise to data
        group_labels = [f"group_{int(i)}" for i in range(len(feat.unique()))]
        ele_group = pd.cut(feat.to_list(),
                           bins=len(group_labels),
                           labels=group_labels)
        temp_df = pd.DataFrame({"a": feat.values, "b": ele_group})
        temp_df["noise"] = temp_df["a"].values + np.random.normal(
            0, 1, len(temp_df["a"]))
        temp_df = temp_df.groupby("b")["noise"].apply(list).reset_index(
            name="agg")
        temp_df = temp_df[temp_df["agg"].astype(bool)]
        group_list = temp_df["agg"].to_list()
        group_labels = [
            f"group_{int(i)}" for i in range(1,
                                             len(temp_df["agg"]) + 1)
        ]
        del temp_df

        # Create distribution plot with custom bin_size
        stat_text = f'(t-value={kwargs["t_val"]}) (p-value={kwargs["p_val"]})'
        title_text = "Continuous Response by Categorical Predictor"
        fig_1 = ff.create_distplot(group_list, group_labels, bin_size=0.2)
        fig_1.update_layout(
            title=f"{title_text}: {stat_text}",
            xaxis_title=f"Response: {y_name}",
            yaxis_title="Distribution",
        )
        plot_file_1 = f"plots/{feat.name}_distr_cont_resp_plot.html"
        fig_1.write_html(file=plot_file_1, include_plotlyjs="cdn")

        fig_2 = go.Figure()
        for curr_hist, curr_group in zip(group_list, group_labels):
            fig_2.add_trace(
                go.Violin(
                    x=np.repeat(curr_group, n),
                    y=curr_hist,
                    name=curr_group,
                    box_visible=True,
                    meanline_visible=True,
                ))
        fig_2.update_layout(
            title=f"{title_text}: {stat_text}",
            xaxis_title="Groupings",
            yaxis_title=f"Response: {y_name}",
        )
        plot_file_2 = f"plots/{feat.name}_violin_cont_resp_plot.html"
        fig_2.write_html(file=plot_file_2, include_plotlyjs="cdn")

        return None