def plot_facet_scatter(df, x, y, group1, group2): fig = ff.create_facet_grid( df, x=x, y=y, color_name=group2, color_is_cat=True if df[group2].dtype.name == 'category' else False, # facet_col=group1, facet_row=group1, ) plotly.offline.plot(fig) # plot_box(df, 'Species', 'sepal length (cm)') # plot_scatter(df, 'sepal length (cm)', 'sepal width (cm)', 'Species') # plot_scatter(df, 'petal length (cm)', 'sepal width (cm)', 'Species') # plot_density(df, 'petal width (cm)', 'Species') # # boston = load_boston() # df1 = pd.DataFrame(boston.data, columns=boston.feature_names) # df1['Target'] = boston.target # df1['RAD'] = df1['RAD'].astype('category') # plot_box(df1, 'RAD', 'Target') # # ###PAIR PLOTS # sns.pairplot(df1) # plt.show() # # # plot_facet_scatter(df1, 'AGE', 'Target', 'RAD', 'ZN')
def update_facet_grid1(traceType, y): facet_hist = ff.create_facet_grid( df, y=y, facet_row='Gender', facet_col='Capture Location', trace_type=traceType, ) facet_hist['layout']['title'] = '{} - {} by gender and location'.format( traceType, y) return dcc.Graph( id='facet_grid_1', figure=facet_hist, )
def redrawGraph(x, y, color, row, col, size, checklist, prevLayout): df = deepcopy(diamonds[:size]) df = df.reset_index(drop=True) rowVal = row colVal = col trace = 'scattergl' if (row == 'None'): rowVal = None if (col == 'None'): colVal = None if (color == 'None'): color = None if ('jitter' in checklist): df = pd.concat([deepcopy(df), jitter(df, x, y)], ignore_index=True) fig = ff.create_facet_grid(df, x=x, y=y, trace_type='scattergl', color_name=color, facet_row=rowVal, facet_col=colVal, marker=dict( size=4, line=dict(width=0.1, color='black'), ), ggplot2=True) fig['layout']['hovermode'] = 'closest' fig['layout']['dragmode'] = 'zoom' fig['layout']['height'] = 900 fig['layout']['width'] = None if not rowVal and not colVal: fig['layout']['yaxis']['zeroline'] = False fig['layout']['xaxis']['zeroline'] = False fig['layout']['yaxis']['title'] = y fig['layout']['annotations'][1]['text'] = "" fig['layout']['xaxis']['title'] = x fig['layout']['annotations'][0]['text'] = "" fig['layout']['autosize'] = True if discrete(x): relabel(x, fig, 0) if discrete(y): relabel(y, fig, 1) print(fig) return fig
def q1(): D = { (1, 1): 'chest_pain_type', (1, 2): 'resting_blood_pressure', (1, 3): 'cholesterol', (1, 4): 'fasting_blood_sugar', (2, 1): 'rest_ecg', (2, 2): 'max_heart_rate_achieved', (2, 3): 'exercise_induced_angina', (3, 1): 'st_depression', (3, 2): 'st_slope', (3, 3): 'num_major_vessels', (4, 1): 'thalassemia' } # q1_fig = tools.make_subplots(rows=4, cols=4) q1_fig = plotly.tools.make_subplots(rows=4, cols=4, shared_xaxes=True, subplot_titles=[ 'CPT', 'RBP', D[(1, 3)], 'FBS', 'RE', 'MHRA', 'EIA', '', D[(3, 1)], D[(3, 1)], 'NMV', '', D[(4, 1)], '', '', '' ]) q1_df = df.astype('float64') q1_df['sex'] = q1_df['sex'].apply(lambda x: 'Male' if x == 1.0 else 'Female') # for k in q1_df.keys(): for k in D.keys(): trace = ff.create_facet_grid( q1_df, x='age', y=D[k], color_name='sex', # show_boxes=False, marker={ 'size': 5, 'opacity': 1.0 }, colormap={ 'Male': 'rgb(165, 242, 242)', 'Female': 'rgb(253, 174, 216)' }, facet_col_labels='name', facet_row_labels='name', # ggplot2=True ) # print(k[0], k[1]) for f in trace.data: q1_fig.append_trace(f, k[0], k[1]) # fig = ff.create_facet_grid( # q1_df, # x='age', # y='thalassemia', # color_name='sex', # show_boxes=False, # marker={'size': 10, 'opacity': 1.0}, # colormap={'Male': 'rgb(165, 242, 242)', 'Female': 'rgb(253, 174, 216)'} # ) # py.iplot(fig, filename='facet - custom colormap') q1_fig['layout'].update( height=700, width=700, showlegend=False, title='<b>Statistics of Attributes<b>', # title="<b>Feature importance</b>", titlefont=dict(size=20, color='rgb(23,203,203)'), ) return q1_fig
def create_facet_grid(*args, **kwargs): FigureFactory._deprecated('create_facet_grid') from plotly.figure_factory import create_facet_grid return create_facet_grid(*args, **kwargs)
def redrawGraph(x, y, color, row, col, size, checklist, prevLayout): df = deepcopy(diamonds[:size]) df = df.reset_index(drop=True) rowVal = row colVal = col colorVal = color trace = 'scattergl' if (row == 'None'): rowVal = None if (col == 'None'): colVal = None if (color == 'None'): colorVal = None if ('jitter' in checklist): df = pd.concat([deepcopy(df), jitter(df, x, y)], ignore_index=True) fig = ff.create_facet_grid(df, x=x, y=y, trace_type='scattergl', color_name=colorVal, facet_row=rowVal, facet_col=colVal, marker=dict( size=4, line=dict(width=0.1, color='black'), ), ggplot2=True) fig['layout']['hovermode'] = 'closest' fig['layout']['dragmode'] = 'zoom' fig['layout']['height'] = 900 fig['layout']['width'] = None if not rowVal and not colVal: fig['layout']['yaxis']['zeroline'] = False fig['layout']['xaxis']['zeroline'] = False fig['layout']['yaxis']['title'] = y fig['layout']['annotations'][1]['text'] = "" fig['layout']['xaxis']['title'] = x fig['layout']['annotations'][0]['text'] = "" fig['layout']['autosize'] = True # hack solution to legendgroup'n if colorVal is not None: if (colorVal != rowVal and colorVal != colVal): for j in range(0, len(fig['data'])): fig['data'][j]['legendgroup'] = fig['data'][j]['name'] fig['data'][j]['showlegend'] = False if (color == 'color-labels'): for k in range(0, len(df['color-labels'].unique())): fig['data'][k]['showlegend'] = True elif (color == 'cut-labels'): for k in range(0, len(df['cut-labels'].unique())): fig['data'][k]['showlegend'] = True elif (color == 'clarity-labels'): for k in range(0, len(df['clarity-labels'].unique())): fig['data'][k]['showlegend'] = True elif rowVal is not None and colVal is not None and rowVal != colVal and\ (colorVal == rowVal or colorVal == colVal): for j in range(0, len(fig['data'])): fig['data'][j]['legendgroup'] = fig['data'][j]['name'] fig['data'][j]['showlegend'] = False if (color == 'color-labels'): l = 0 for k in range(0, len(df['color-labels'].unique())): fig['data'][l]['showlegend'] = True l = l + (len(df['color-labels'].unique()) + 1) elif (color == 'cut-labels'): l = 0 for k in range(0, len(df['cut-labels'].unique())): fig['data'][l]['showlegend'] = True l = l + (len(df['cut-labels'].unique()) + 1) elif (color == 'clarity-labels'): l = 0 for k in range(0, len(df['clarity-labels'].unique())): fig['data'][l]['showlegend'] = True l = l + (len(df['clarity-labels'].unique()) + 1) if discrete(x): relabel(x, fig, 0) if discrete(y): relabel(y, fig, 1) return fig
def make_subplots(data: pd.DataFrame, columns: List[str] = None, *, kind: str = "box", **kwargs): """Make subplots and arrange them in an optimized grid layout.""" if kind not in ("box", "histogram", "scatter", "scatter_with_bounds"): raise ValueError(f"Can NOT handle plot of kind: {kind}.") index = data.index.droplevel(-1).unique() if len(index.names) > 2: logger.warning( f"Can only handle hierarchical index of depth <= 2, got {len(index.names)}. Grouping index." ) return make_subplots(group_index(data, range(index.nlevels - 1)), columns, kind=kind, **kwargs) grid = ff.create_facet_grid( data.reset_index(), facet_row=index.names[1] if index.nlevels > 1 else None, facet_col=index.names[0], trace_type="box", # box does not need data specification ggplot2=True, ) shape = np.shape(grid._grid_ref)[:-1] sub_plots = tools.make_subplots( rows=shape[0], cols=shape[1], shared_yaxes=kwargs.pop("shared_yaxes", True), shared_xaxes=kwargs.pop("shared_xaxes", False), print_grid=kwargs.pop("print_grid", False), ) if isinstance(index, pd.MultiIndex): index_grid = zip(*index.labels) else: index_grid = iter( np.transpose([ np.tile(np.arange(shape[1]), shape[0]), np.repeat(np.arange(shape[0]), shape[1]) ])) for idx, grp in data.groupby(level=np.arange(index.nlevels).tolist()): if not isinstance(columns, str) and kind == "scatter_with_bounds": if columns is None: raise ValueError( "`scatter_with_bounds` requires `col` argument, not provided." ) try: columns, = columns except ValueError: raise ValueError( "`scatter_with_bounds` does not allow for multiple columns." ) fig = eval(f"create_duration_{kind}(grp, columns, **kwargs)") col, row = map(int, next(index_grid)) # col-first plotting for trace in fig.data: sub_plots.append_trace(trace, row + 1, col + 1) layout = sub_plots.layout layout.update( title=kwargs.get("title", fig.layout.title), shapes=grid.layout.shapes, annotations=grid.layout.annotations, showlegend=False, ) x_dom_vals = [k for k in layout.to_plotly_json().keys() if "xaxis" in k] y_dom_vals = [k for k in layout.to_plotly_json().keys() if "yaxis" in k] layout_shapes = pd.DataFrame( layout.to_plotly_json()["shapes"]).sort_values(["x0", "y0"]) h_shapes = layout_shapes[~layout_shapes.x0.duplicated(keep=False)] v_shapes = layout_shapes[~layout_shapes.y0.duplicated(keep=False)] # handle single-columns h_shapes = h_shapes.query("y1 - y0 != 1") v_shapes = v_shapes.query("x1 - x0 != 1") # update axis domains and layout for idx, x_axis in enumerate(x_dom_vals): x0, x1 = h_shapes.iloc[idx % shape[1]][["x0", "x1"]] layout[x_axis].domain = (x0 + 0.03, x1 - 0.03) layout[x_axis].update(showticklabels=False, zeroline=False) for idx, y_axis in enumerate(y_dom_vals): y0, y1 = v_shapes.iloc[idx % shape[0]][["y0", "y1"]] layout[y_axis].domain = (y0 + 0.03, y1 - 0.03) layout[y_axis].update(zeroline=False) # correct annotation to match the relevant group and width annot_df = pd.DataFrame( layout.to_plotly_json()["annotations"]).sort_values(["x", "y"]) annot_df = annot_df[annot_df.text.str.len() > 0] aw = min( # annotation width magic int(max(60 / shape[1] - (2 * shape[1]), 6)), int(max(30 / shape[0] - (2 * shape[0]), 6))) for i, annot_idx in enumerate(annot_df.index): annot = layout.annotations[annot_idx] index_label: Union[str, Any] = annot["text"] if isinstance(index, pd.MultiIndex): index_axis = i >= shape[1] if shape[0] == 1: pass # no worries, the order and label are aight elif shape[1] == 1: index_label = index.levels[index_axis][max(0, i - 1)] else: index_label = index.levels[index_axis][i % shape[1]] text: str = str(index_label) annot["text"] = re.sub(r"^(.{%d}).*(.{%d})$" % (aw, aw), "\g<1>...\g<2>", text) # Ignore PycodestyleBear (W605) annot["hovertext"] = "<br>".join(pformat(index_label).split("\n")) # add axis titles as plot annotations layout.annotations = ( *layout.annotations, { "x": 0.5, "y": -0.05, "xref": "paper", "yref": "paper", "text": fig.layout.xaxis["title"]["text"], "showarrow": False, }, { "x": -0.05, "y": 0.5, "xref": "paper", "yref": "paper", "text": fig.layout.yaxis["title"]["text"], "textangle": -90, "showarrow": False, }, ) # custom user layout updates user_layout = kwargs.pop("layout", None) if user_layout: layout.update(user_layout) return sub_plots
def update_graph(table, xaxis_field, yaxis_fields, split, chart_type, aggregation_type, split_type, filter_variable_1, filter_values_1, filter_variable_2, filter_values_2, filter_variable_3, filter_values_3): if xaxis_field is None or yaxis_fields is None: return {"layout": {'style': {'display': 'none'}}} filter_variables = [ filter_variable_1, filter_variable_2, filter_variable_3 ] filter_values = [filter_values_1, filter_values_2, filter_values_3] filters = {} for i, filter_variable in enumerate(filter_variables): if filter_variable is not None and filter_values[i] is not None: filters[filter_variable] = filter_values[i] if split is None: columns = set(yaxis_fields + [xaxis_field] + list(filters.keys())) df = pd.read_sql( 'SELECT ' + ','.join(list(columns)) + ' FROM ' + table, conn) for variable, values in filters.items(): df = df[df[variable].isin(values)] if aggregation_type == 'avg': df = df.groupby('step', as_index=False).aggregate(np.mean) elif aggregation_type == 'min': df = df.groupby('step', as_index=False).aggregate(min) elif aggregation_type == 'max': df = df.groupby('step', as_index=False).aggregate(max) elif aggregation_type == 'median': df = df.groupby('step', as_index=False).aggregate(np.median) elif aggregation_type == 'count': df = df.groupby('step', as_index=False).aggregate(len) if chart_type == 'bar': plot = [ go.Bar(x=df[xaxis_field], y=df[yaxis_field], name=yaxis_field) for yaxis_field in yaxis_fields ] elif chart_type == 'box': plot = [ go.Box(x=df[xaxis_field], y=df[yaxis_field], name=yaxis_field) for yaxis_field in yaxis_fields ] elif chart_type == 'histogram': plot = [ go.Histogram(y=df[yaxis_field], name=yaxis_field, histnorm='probability') for yaxis_field in yaxis_fields ] else: plot = [ go.Scatter(x=df[xaxis_field], y=df[yaxis_field], mode=chart_type, name=yaxis_field, marker={ 'size': 15, 'opacity': 0.5, 'line': { 'width': 0.5, 'color': 'white' } }) for yaxis_field in yaxis_fields ] else: columns = set(yaxis_fields + [xaxis_field] + split + list(filters.keys())) df = pd.read_sql( 'SELECT ' + ','.join(list(columns)) + ' FROM ' + table, conn) for variable, values in filters.items(): df = df[df[variable].isin(values)] if aggregation_type == 'avg': df = df.groupby(split + ['step'], as_index=False).aggregate(np.mean) elif aggregation_type == 'min': df = df.groupby(split + ['step'], as_index=False).aggregate(min) elif aggregation_type == 'max': df = df.groupby(split + ['step'], as_index=False).aggregate(max) elif aggregation_type == 'median': df = df.groupby(split + ['step'], as_index=False).aggregate(np.median) elif aggregation_type == 'count': df = df.groupby(split + ['step'], as_index=False).aggregate(len) if split_type == 'facet': trace = 'scatter' if chart_type == 'markers' or chart_type == 'lines' else chart_type if len(split) == 1: plot = ff.create_facet_grid( df, x=xaxis_field, y=yaxis_fields[0], facet_col=split[0], trace_type=trace, ) elif len(split) == 2: plot = ff.create_facet_grid( df, x=xaxis_field, y=yaxis_fields[0], facet_col=split[0], facet_row=split[1], trace_type=trace, ) else: plot = ff.create_facet_grid( df, x=xaxis_field, y=yaxis_fields[0], facet_col=split[0], facet_row=split[1], color_name=split[2], color_is_cat=True, trace_type=trace, ) else: levels = df[split[0]].unique() if chart_type == 'bar': plot = [ go.Bar(x=df[df[split[0]] == level][xaxis_field], y=df[df[split[0]] == level][yaxis_fields[0]], name=level) for level in levels ] elif chart_type == 'box': plot = [ go.Box(x=df[df[split[0]] == level][xaxis_field], y=df[df[split[0]] == level][yaxis_fields[0]], name=level) for level in levels ] elif chart_type == 'histogram': plot = [ go.Histogram(y=df[df[split[0]] == level][yaxis_fields[0]], name=level, histnorm='probability') for level in levels ] else: plot = [ go.Scatter(x=df[df[split[0]] == level][xaxis_field], y=df[df[split[0]] == level][yaxis_fields[0]], mode=chart_type, name=level, marker={ 'size': 15, 'opacity': 0.5, 'line': { 'width': 0.5, 'color': 'white' } }) for level in levels ] layout = {'style': {'display': 'block'}} return {'data': plot, 'layout': layout}
fig['layout'].update(showlegend=False, title="Price Distributions by Marital Status", height=1000, width=800) iplot(fig, filename='custom-sized-subplot-with-subplot-titles') df.head() # Notice how divorced have a considerably low amount of balance. fig = ff.create_facet_grid(df, x='duration', y='balance', color_name='marital', show_boxes=False, marker={ 'size': 10, 'opacity': 1.0 }, colormap={ 'single': 'rgb(165, 242, 242)', 'married': 'rgb(253, 174, 216)', 'divorced': 'rgba(201, 109, 59, 0.82)' }) iplot(fig, filename='facet - custom colormap') # Hmmm We have missed some important clients with some high balances. # This shouldn't be happening. fig = ff.create_facet_grid( df, y='balance', facet_row='marital', facet_col='deposit',
linewidth=1, palette="Set2") ax3.legend_.remove() ax3.set_title("Relationship between Smokers and Charges") plt.show() # Make sure we don't have any null values df[df.isnull().any(axis=1)] fig = ff.create_facet_grid(df, x='age', y='charges', color_name='weight_condition', show_boxes=False, marker={ 'size': 10, 'opacity': 1.0 }, colormap={ 'Underweight': 'rgb(208, 246, 130)', 'Normal Weight': 'rgb(166, 246, 130)', 'Overweight': 'rgb(251, 232, 238)', 'Obese': 'rgb(253, 45, 28)' }) 251, 232, 238 fig['layout'].update(title="Weight Status vs Charges", width=800, height=600, plot_bgcolor='rgb(251, 251, 251)', paper_bgcolor='rgb(255, 255, 255)')
def update_figure(n_clicks, episode_slider, word_input, speaker_input): fullmerge = {} data_subset = data[data['episode'] >= episode_slider[0]][data['episode'] <= episode_slider[1]] #Merge all episodes into one long string peeps = list(filter(None,speaker_input.upper().replace(" ","").split(","))) for peep in peeps: fullmerge.update({peep:' '.join(data_subset[data_subset.Speaker == peep]['cleaned'])}) finaldata = pd.DataFrame.from_dict(fullmerge, orient = 'index').reset_index().rename(columns = {'index':'speaker',0:'text'}) totalwords = pd.DataFrame() for peep in peeps: totalwords = totalwords.append(pd.Series([peep, ' ', finaldata[finaldata['speaker'] == peep].text.str.count(' ').iloc[0]]), ignore_index = True) totalwords = totalwords.rename(index=str, columns={0: "speaker", 1: "word",2: "total"}) words = list(filter(None,word_input.lower().split(","))) #Calculate frequency of words in finaldata df = pd.DataFrame() for peep in peeps: for word in words: df = df.append(pd.Series([peep, word, finaldata[finaldata['speaker'] == peep].text.str.count(word).iloc[0]]), ignore_index = True) df = df.rename(index=str, columns={0: "speaker", 1: "word",2: "amount"}) #Calculate rate per 1000 words df = pd.merge(df, totalwords[['speaker','total']], on='speaker') df['Number of times said per 1000 words'] = (df['amount']/df['total'])*1000 #Sort data by rate and then speaker df = df.sort_values(by=['Number of times said per 1000 words','speaker'], ascending = [True,False]) #Graph if len(words) != 1: if len(peeps) != 1: #Apply the tfidf function, and find the most "distinguishing" word among the given words and speakers tfidf = TfidfVectorizer(stop_words='english', vocabulary = words) tfs = tfidf.fit_transform(finaldata['text']) matrix = pd.DataFrame(tfs.todense(), index = peeps, columns = tfidf.get_feature_names()).transpose() matrix['word'] = matrix.index matrix = pd.melt(matrix, id_vars = 'word') matrix = matrix.rename(index=str, columns={'value': "tfidf",'variable': "speaker"}) distWord = matrix.loc[matrix['tfidf'].idxmax()]['word'] distSpeaker = matrix.loc[matrix['tfidf'].idxmax()]['speaker'] tfidfSent = ("Most distinguishing: '" + distWord + "' by " + distSpeaker + ".*") fig = ff.create_facet_grid( df, x='Number of times said per 1000 words', y='word', facet_col='speaker', color_name='speaker', trace_type='bar', orientation = 'h', scales = 'free', width = 1200 ) for i in range(len(peeps)+1): if i == 0: fig.layout.xaxis.update({'range': [df['Number of times said per 1000 words'].min(), (df['Number of times said per 1000 words'].max()+(.15 * df['Number of times said per 1000 words'].max()))]}) else: exec('fig.layout.xaxis' + str(i)+".update({'range': [df['Number of times said per 1000 words'].min(), (df['Number of times said per 1000 words'].max()+(.15 * df['Number of times said per 1000 words'].max()))]})") fig.layout.xaxis.title = tfidfSent fig.layout.update(plot_bgcolor='rgba(230,230,230,90)') elif len(peeps) == 1: fig = ff.create_facet_grid( df, x='word', y='Number of times said per 1000 words', color_name='word', trace_type='bar', scales = 'free', width = 1200 ) fig.layout.update(plot_bgcolor='rgba(230,230,230,90)') elif len(words) == 1: fig = ff.create_facet_grid( df, x='speaker', y='Number of times said per 1000 words', color_name='speaker', trace_type='bar', scales = 'free', width = 1200 ) fig.layout.update(plot_bgcolor='rgba(230,230,230,90)') return { 'data': fig }
def faceted_fig(df, x, y, facet_col, y_bounds=None): fig = ff.create_facet_grid(df, x=x, y=y, facet_col=facet_col, facet_col_labels='name', facet_row_labels='name') return fig