def create_plot(feature): # =========================== # BOX-SCATTER PLOT # =========================== if feature == "All Countries Box-Scatter Plot": fig = go.Figure() for column in df.columns[1:-1].to_list(): fig.add_trace( go.Box(y=df[column], name=column, boxpoints='all', hovertext=df["country"])) fig.update_layout( title="All Countries Box-Scatter Plot", updatemenus=[ go.layout.Updatemenu( active=0, buttons=list([ dict( label='cureduexpen_pri', method='update', args=[ { 'visible': [ True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Primary Education Expenditure', 'showlegend': True } ]), dict(label='cureduexpen_sec', method='update', args=[{ 'visible': [ False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, { 'title': 'Secondary Education Expenditure', 'showlegend': True }]), dict( label='cureduexpen_ter', method='update', args=[ { 'visible': [ False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Tertiary Education Expenditure', 'showlegend': True } ]), dict(label='cureduexpen_total', method='update', args=[{ 'visible': [ False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, { 'title': 'Total Education Expenditure', 'showlegend': True }]), dict( label='eduattain_doctoral', method='update', args=[ { 'visible': [ False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Doctoral Education Attainment', 'showlegend': True } ]), dict(label='eduattain_bachelor', method='update', args=[{ 'visible': [ False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, { 'title': "Bachelor's Education Attainment", 'showlegend': True }]), dict( label='eduattain_master', method='update', args=[ { 'visible': [ False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': "Master's Education Attainment", 'showlegend': True } ]), dict(label='eduattain_sec', method='update', args=[{ 'visible': [ False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, { 'title': 'Secondary Education Attainment', 'showlegend': True }]), dict( label='eduattain_postsec', method='update', args=[ { 'visible': [ False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Post-Secondary Education Attainment', 'showlegend': True } ]), dict(label='eduattain_primary', method='update', args=[{ 'visible': [ False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False ] }, { 'title': 'Primary Education Attainment', 'showlegend': True }]), dict( label='eduattain_tertiary', method='update', args=[ { 'visible': [ False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Tertiary Education Attainment', 'showlegend': True } ]), dict(label='eduattain_uppersec', method='update', args=[{ 'visible': [ False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False ] }, { 'title': 'Upper Secondary Education Attainment', 'showlegend': True }]), dict( label='expense', method='update', args=[ { 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Total Expenses (% GDP)', 'showlegend': True } ]), dict(label='life_expectatbirth_fem', method='update', args=[{ 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False ] }, { 'title': 'Life Expectancy, Female (Years)', 'showlegend': True }]), dict( label='life_expectatbirth_male', method='update', args=[ { 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Life Expectancy Male (Years)', 'showlegend': True } ]), dict(label='life_expectatbirth_total', method='update', args=[{ 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False ] }, { 'title': 'Life Expectancy Total (Years)', 'showlegend': True }]), dict( label='milexp_gdp', method='update', args=[ { 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Military Expenditure (% GDP)', 'showlegend': True } ]), dict(label='milexp_usd', method='update', args=[{ 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False ] }, { 'title': 'Military Expenditure (USD)', 'showlegend': True }]), dict( label='totalreserves', method='update', args=[ { 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Total Reserves (% external debt)', 'showlegend': True } ]), dict(label='unemptotal_modeiloest', method='update', args=[{ 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False ] }, { 'title': 'Unemployment ILO (% Total Labor Force)', 'showlegend': True }]), dict( label='unemptotal_nationalest', method='update', args=[ { 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False ] }, # the index of True aligns with the indices of plot traces { 'title': 'Unemployment National (% Total Labor Force)', 'showlegend': True } ]), dict(label='happiness_score', method='update', args=[{ 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False ] }, { 'title': 'Happiness Score (1-10)', 'showlegend': True }]), dict( label='social_support', method='update', args=[ { 'visible': [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True ] }, # the index of True aligns with the indices of plot traces { 'title': 'Social Support (1-10)', 'showlegend': True } ]), ])) ]) # =========================== # BAR PLOT # =========================== else: # scale all columns from 0-1 scaler = MinMaxScaler() df2 = df.copy() df2.loc[:, df2.columns != 'country'] = scaler.fit_transform( df2.loc[:, df2.columns != 'country']) df2.drop('pay_usd', axis=1, inplace=True) # PLOT DATA - BAR fig = px.bar(df2, x="country", y=list(df2.columns[1:]), title="All Countries Stacked Bar Plot by Attribute") fig.update_layout(barmode='stack', xaxis={ 'categoryorder': 'array', 'categoryarray': by_continent }) fig.update_xaxes(title={'text': None}) # =========================== # RESIZE PLOT # =========================== fig.update_layout(autosize=False, width=1200, height=500) return plotly.io.to_json(fig)
def get_box(cond): return go.Box(y=[ cond['p05'], cond['p25'], cond['p50'], cond['p75'], cond['p95'] ], x=[cond['experiment']] * 5, name=cond['config'])
7. Min and Max values are shown with "whiskers". 8. The main use of box plot is to perform a real analysis """ # Importing the libraries import plotly.offline as pyo import plotly.graph_objs as go # set up an array of 20 data points, with 20 as the median value y = [1,14,14,15,16,18,18,19,19,20,20,23,24,26,27,27,28,29,33,54] # BAsic box plot data = [go.Box(y=y)] pyo.plot(data,filename="basic_box_plot.html") # Changes applied """ In this exammple a additional parameter is 'boxpoints' which is use to show all points """ data_1 = [go.Box(y=y,boxpoints='all')] pyo.plot(data_1,filename="box_plot_1.html") """
def isf_compare_year(): #get data for each year (all_2014, bucket_list_2014) = isf.get_isf_for_years('2014', '2014') (all_2015, bucket_list_2015) = isf.get_isf_for_years('2015', '2015') (all_2016, bucket_list_2016) = isf.get_isf_for_years('2016', '2016') (all_2017, bucket_list_2017) = isf.get_isf_for_years('2017', '2017') (all_2018, bucket_list_2018) = isf.get_isf_for_years('2018', '2018') #get non-bucketed data for each year allData_2014 = [data[0] for data in all_2014 if data[0]] allData_2015 = [data[0] for data in all_2015 if data[0]] allData_2016 = [data[0] for data in all_2016 if data[0]] allData_2017 = [data[0] for data in all_2017 if data[0]] allData_2018 = [data[0] for data in all_2018 if data[0]] #layout dict yaxis_dict = dict(title='mgdl/unit', zeroline=True, zerolinecolor='#800000', showline=False, rangemode='tozero') #create plot for non-bucketed data allWhisker_2014 = go.Box(y=allData_2014, name='2014') allWhisker_2015 = go.Box(y=allData_2015, name='2015') allWhisker_2016 = go.Box(y=allData_2016, name='2016') allWhisker_2017 = go.Box(y=allData_2017, name='2017') allWhisker_2018 = go.Box(y=allData_2018, name='2018') layout_allWhisker = go.Layout(title=('All ISF values for each yaer'), width=1000, height=800, yaxis=yaxis_dict) graph_all = go.Figure(data=[ allWhisker_2014, allWhisker_2015, allWhisker_2016, allWhisker_2017, allWhisker_2018 ], layout=layout_allWhisker) graphJSON_all = json.dumps(graph_all, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 0-2am time bucket plot14_0 = go.Box(y=bucket_list_2014[0], name='2014') plot15_0 = go.Box(y=bucket_list_2015[0], name='2015') plot16_0 = go.Box(y=bucket_list_2016[0], name='2016') plot17_0 = go.Box(y=bucket_list_2017[0], name='2017') plot18_0 = go.Box(y=bucket_list_2018[0], name='2018') layout_0am = go.Layout(title=('ISF values for 0am-2am time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_0am = go.Figure( data=[plot14_0, plot15_0, plot16_0, plot17_0, plot18_0], layout=layout_0am) graphJSON_0am = json.dumps(graph_0am, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 2-4am time bucket plot14_1 = go.Box(y=bucket_list_2014[1], name='2014') plot15_1 = go.Box(y=bucket_list_2015[1], name='2015') plot16_1 = go.Box(y=bucket_list_2016[1], name='2016') plot17_1 = go.Box(y=bucket_list_2017[1], name='2017') plot18_1 = go.Box(y=bucket_list_2018[1], name='2018') layout_2am = go.Layout(title=('ISF values for 2am-4am time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_2am = go.Figure( data=[plot14_1, plot15_1, plot16_1, plot17_1, plot18_1], layout=layout_2am) graphJSON_2am = json.dumps(graph_2am, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 4-6am time bucket plot14_2 = go.Box(y=bucket_list_2014[2], name='2014') plot15_2 = go.Box(y=bucket_list_2015[2], name='2015') plot16_2 = go.Box(y=bucket_list_2016[2], name='2016') plot17_2 = go.Box(y=bucket_list_2017[2], name='2017') plot18_2 = go.Box(y=bucket_list_2018[2], name='2018') layout_4am = go.Layout(title=('ISF values for 4am-6am time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_4am = go.Figure( data=[plot14_2, plot15_2, plot16_2, plot17_2, plot18_2], layout=layout_4am) graphJSON_4am = json.dumps(graph_4am, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 6am-8am time bucket plot14_3 = go.Box(y=bucket_list_2014[3], name='2014') plot15_3 = go.Box(y=bucket_list_2015[3], name='2015') plot16_3 = go.Box(y=bucket_list_2016[3], name='2016') plot17_3 = go.Box(y=bucket_list_2017[3], name='2017') plot18_3 = go.Box(y=bucket_list_2018[3], name='2018') layout_6am = go.Layout(title=('ISF values for 6am-8am time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_6am = go.Figure( data=[plot14_3, plot15_3, plot16_3, plot17_3, plot18_3], layout=layout_6am) graphJSON_6am = json.dumps(graph_6am, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 8am-10am time bucket plot14_4 = go.Box(y=bucket_list_2014[4], name='2014') plot15_4 = go.Box(y=bucket_list_2015[4], name='2015') plot16_4 = go.Box(y=bucket_list_2016[4], name='2016') plot17_4 = go.Box(y=bucket_list_2017[4], name='2017') plot18_4 = go.Box(y=bucket_list_2018[4], name='2018') layout_8am = go.Layout(title=('ISF values for 8am-10am time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_8am = go.Figure( data=[plot14_4, plot15_4, plot16_4, plot17_4, plot18_4], layout=layout_8am) graphJSON_8am = json.dumps(graph_8am, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 10am-12pm time bucket plot14_5 = go.Box(y=bucket_list_2014[5], name='2014') plot15_5 = go.Box(y=bucket_list_2015[5], name='2015') plot16_5 = go.Box(y=bucket_list_2016[5], name='2016') plot17_5 = go.Box(y=bucket_list_2017[5], name='2017') plot18_5 = go.Box(y=bucket_list_2018[5], name='2018') layout_10am = go.Layout(title=('ISF values for 10am-12pm time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_10am = go.Figure( data=[plot14_5, plot15_5, plot16_5, plot17_5, plot18_5], layout=layout_10am) graphJSON_10am = json.dumps(graph_10am, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 12pm-2pm time bucket plot14_6 = go.Box(y=bucket_list_2014[6], name='2014') plot15_6 = go.Box(y=bucket_list_2015[6], name='2015') plot16_6 = go.Box(y=bucket_list_2016[6], name='2016') plot17_6 = go.Box(y=bucket_list_2017[6], name='2017') plot18_6 = go.Box(y=bucket_list_2018[6], name='2018') layout_12pm = go.Layout(title=('ISF values for 12pm-2pm time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_12pm = go.Figure( data=[plot14_6, plot15_6, plot16_6, plot17_6, plot18_6], layout=layout_12pm) graphJSON_12pm = json.dumps(graph_12pm, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 2pm-4pm time bucket plot14_7 = go.Box(y=bucket_list_2014[7], name='2014') plot15_7 = go.Box(y=bucket_list_2015[7], name='2015') plot16_7 = go.Box(y=bucket_list_2016[7], name='2016') plot17_7 = go.Box(y=bucket_list_2017[7], name='2017') plot18_7 = go.Box(y=bucket_list_2018[7], name='2018') layout_2pm = go.Layout(title=('ISF values for 2pm-4pm time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_2pm = go.Figure( data=[plot14_7, plot15_7, plot16_7, plot17_7, plot18_7], layout=layout_2pm) graphJSON_2pm = json.dumps(graph_2pm, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 4pm-6pm time bucket plot14_8 = go.Box(y=bucket_list_2014[8], name='2014') plot15_8 = go.Box(y=bucket_list_2015[8], name='2015') plot16_8 = go.Box(y=bucket_list_2016[8], name='2016') plot17_8 = go.Box(y=bucket_list_2017[8], name='2017') plot18_8 = go.Box(y=bucket_list_2018[8], name='2018') layout_4pm = go.Layout(title=('ISF values for 4pm-6pm time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_4pm = go.Figure( data=[plot14_8, plot15_8, plot16_8, plot17_8, plot18_8], layout=layout_4pm) graphJSON_4pm = json.dumps(graph_4pm, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 6pm-8pm time bucket plot14_9 = go.Box(y=bucket_list_2014[9], name='2014') plot15_9 = go.Box(y=bucket_list_2015[9], name='2015') plot16_9 = go.Box(y=bucket_list_2016[9], name='2016') plot17_9 = go.Box(y=bucket_list_2017[9], name='2017') plot18_9 = go.Box(y=bucket_list_2018[9], name='2018') layout_6pm = go.Layout(title=('ISF values for 6pm-8pm time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_6pm = go.Figure( data=[plot14_9, plot15_9, plot16_9, plot17_9, plot18_9], layout=layout_6pm) graphJSON_6pm = json.dumps(graph_6pm, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 8pm-10pm time bucket plot14_10 = go.Box(y=bucket_list_2014[10], name='2014') plot15_10 = go.Box(y=bucket_list_2015[10], name='2015') plot16_10 = go.Box(y=bucket_list_2016[10], name='2016') plot17_10 = go.Box(y=bucket_list_2017[10], name='2017') plot18_10 = go.Box(y=bucket_list_2018[10], name='2018') layout_8pm = go.Layout(title=('ISF values for 8pm-10pm time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_8pm = go.Figure( data=[plot14_10, plot15_10, plot16_10, plot17_10, plot18_10], layout=layout_8pm) graphJSON_8pm = json.dumps(graph_8pm, cls=plotly.utils.PlotlyJSONEncoder) #create plot for 10pm-12am time bucket plot14_11 = go.Box(y=bucket_list_2014[11], name='2014') plot15_11 = go.Box(y=bucket_list_2015[11], name='2015') plot16_11 = go.Box(y=bucket_list_2016[11], name='2016') plot17_11 = go.Box(y=bucket_list_2017[11], name='2017') plot18_11 = go.Box(y=bucket_list_2018[11], name='2018') layout_10pm = go.Layout(title=('ISF values for 10pm-12am time bucket'), width=1000, height=800, yaxis=yaxis_dict) graph_10pm = go.Figure( data=[plot14_11, plot15_11, plot16_11, plot17_11, plot18_11], layout=layout_10pm) graphJSON_10pm = json.dumps(graph_10pm, cls=plotly.utils.PlotlyJSONEncoder) return render_template('isfYear.html', version=app.config['VERSION'], page_title='Minerva Compare ISF Values', graphJSON_all=graphJSON_all, graphJSON_0am=graphJSON_0am, graphJSON_2am=graphJSON_2am, graphJSON_4am=graphJSON_4am, graphJSON_6am=graphJSON_6am, graphJSON_8am=graphJSON_8am, graphJSON_10am=graphJSON_10am, graphJSON_12pm=graphJSON_12pm, graphJSON_2pm=graphJSON_2pm, graphJSON_4pm=graphJSON_4pm, graphJSON_6pm=graphJSON_6pm, graphJSON_8pm=graphJSON_8pm, graphJSON_10pm=graphJSON_10pm)
def box_plot(data): data = [ go.Box( y = data[0], marker_color = '#76323F', boxmean=True, name = '1' ), go.Box( y = data[1], marker_color = '#D4D1D3', boxmean=True, name = '2' ), go.Box( y = data[2], marker_color = '#565656', boxmean=True, name = '3' ), go.Box( y = data[3], marker_color = '#C09F80', boxmean=True, name = '4' ) ] layout = go.Layout( template = 'plotly_white', autosize = False, bargap = 0.35, font = { 'family': 'Raleway', 'size': 10 }, height = 330, legend = { 'x': -0.0228945952895, 'y': -0.189563896463, 'orientation': 'h', 'yanchor': 'top' }, margin = { 'r': 0, 't': 20, 'b': 10, 'l': 10 }, showlegend = False, title = '', width = 330, xaxis = { 'autorange': True, 'showline': True, 'title': '', 'type': 'category' }, yaxis = { 'autorange': True, 'showgrid': True, 'showline': True, 'title': '', 'type': 'linear', 'zeroline': False }, transition = { 'duration': 500, 'easing': 'cubic-in-out' } ) figure = go.Figure(data=data, layout=layout) return figure
'#B1CEE6', '#7BD1C7', '#689FA8', '#1F3451', '#6DB29E', '#B1CEE6', '#7BD1C7', '#689FA8' ] fig = go.Figure() for xd, yd, cls in zip(x_data, y_data, colors): fig.add_trace( go.Box(y=yd, name=xd, boxpoints='all', jitter=0.5, whiskerwidth=0.2, fillcolor=cls, marker={ 'color': '#47535C', 'size': 2, 'line': { 'color': cls, 'width': 2 } }, line_width=1)) fig.update_layout(yaxis=dict( autorange=True, showgrid=True, zeroline=True, dtick=250, gridcolor='rgb(255, 255, 255)', gridwidth=1,
data_frame['finishedSqFt'] = data_frame['finishedSqFt'].astype('int64') data_frame['lotsizeSqFt'] = data_frame['lotsizeSqFt'].astype('int64') print(data_frame['amount'].dtype) print(data_frame['finishedSqFt'].dtype) print(data_frame['lotsizeSqFt'].dtype) data_frame = data_frame.loc[(data_frame['amount'] < 2000000) & (data_frame['finishedSqFt'] < 50000) & (data_frame['amount'] > 30000)] ### # box plot ### # Create a trace for a box plot trace = go.Box(y=data_frame['amount'], name='amount', boxpoints='all') # Assign it to an iterable object named myData my_data = [trace] # Add axes and title my_layout = go.Layout(title='Box plot for house price of Zillow data') # Setup figure my_figure = go.Figure(data=my_data, layout=my_layout) # Create the box plot py.plot(my_figure, filename='box_zillow_amount') ### # 2d density plot
age_bin5 = list() i = 0 for d, a in itertools.izip(data[gene], data['Age']): if int(a) <= 55: age_bin1.append(d) if 55 < int(a) <= 65: age_bin2.append(d) if 65 < int(a) <= 75: age_bin3.append(d) if 75 < int(a) <= 55: age_bin4.append(d) if int(a) > 85: age_bin5.append(d) trace1 = go.Box(y=age_bin1, name="Under 55") trace2 = go.Box(y=age_bin2, name="55-65") trace3 = go.Box(y=age_bin3, name="65-75") trace4 = go.Box(y=age_bin4, name="75-85") trace5 = go.Box(y=age_bin5, name="Over 85") traces = [trace1, trace2, trace3, trace4, trace5] layout = go.Layout( title="CNA Value By Age for Gene: {}".format(gene), xaxis=dict(title="CNA Value"), yaxis=dict(title="Age")) figure = go.Figure(data=traces, layout=layout) plotly.offline.plot(figure, filename="{}_correlations.html".format(gene))
def box_plot(df, groupby=None, val=None, figsize=(1024, 512), jitter=None, marker_alpha=1, marker_mode=None, title='', ylabel=''): """ Visualize box plots for all columns in the dataframe <df>. Parameters ---------- df: pandas dataframe each column represents a categorical variable; samples are along columns. groupby: str The name of a column to define groups. If None provided, we assume equal sample sizes and use columns as groups. val: str Used in conjuction with <groupby> to choose column values to plot. If None, then we choose the first column (excluding <groupby>) jitter: float (0-1) The proportion of each box area to jitter datapoints marker_alpha: float (0-1) The opacity of the data points plotted boxpoint_mode: 'all','suspectedoutliers', 'Outliers', Boolean, or None Specifies the way that datapoints are plotted. figsize: tuple The (width, height) of the figure in pixels title: str The figure title ylabel: str The name of the y axis outfile: filepath str If provided, output to an HTML file at provided location Example ------- from sklearn.datasets import make_classification N_FEATURES = 4 X, y = make_classification(n_samples=100, n_clusters_per_class=1, n_classes=4, n_features=N_FEATURES) df = pd.DataFrame(X, columns=['feature_%d' % f for f in range(N_FEATURES)]) df['class'] = y box_plot(df, title='Box Plot') """ layout = go.Layout(title=title, height=figsize[1], width=figsize[0], yaxis=go.YAxis(title=ylabel)) data = [] if groupby is None: for col in df.columns: data.append(go.Box(y=df[col], name=col)) else: groups = sorted(df[groupby].unique().tolist()) if val is None: val = df.columns.drop(groupby)[ 0] # choose first non-groupby column for group in groups: mask = df[groupby] == group data.append( go.Box(y=df.loc[mask, val], name=group, jitter=jitter, boxpoints=marker_mode, marker=dict(opacity=marker_alpha))) ol.iplot(go.Figure(data=data, layout=layout), show_link=False)
def plot_box(data: Union["ps.DataFrame", "ps.Series"], **kwargs): import plotly.graph_objs as go import pyspark.pandas as ps if isinstance(data, ps.DataFrame): raise RuntimeError( "plotly does not support a box plot with Koalas DataFrame. Use Series instead." ) # 'whis' isn't actually an argument in plotly (but in matplotlib). But seems like # plotly doesn't expose the reach of the whiskers to the beyond the first and # third quartiles (?). Looks they use default 1.5. whis = kwargs.pop("whis", 1.5) # 'precision' is Koalas specific to control precision for approx_percentile precision = kwargs.pop("precision", 0.01) # Plotly options boxpoints = kwargs.pop("boxpoints", "suspectedoutliers") notched = kwargs.pop("notched", False) if boxpoints not in ["suspectedoutliers", False]: raise ValueError( "plotly plotting backend does not support 'boxpoints' set to '%s'. " "Set to 'suspectedoutliers' or False." % boxpoints) if notched: raise ValueError( "plotly plotting backend does not support 'notched' set to '%s'. " "Set to False." % notched) colname = name_like_string(data.name) spark_column_name = data._internal.spark_column_name_for( data._column_label) # Computes mean, median, Q1 and Q3 with approx_percentile and precision col_stats, col_fences = BoxPlotBase.compute_stats(data, spark_column_name, whis, precision) # Creates a column to flag rows as outliers or not outliers = BoxPlotBase.outliers(data, spark_column_name, *col_fences) # Computes min and max values of non-outliers - the whiskers whiskers = BoxPlotBase.calc_whiskers(spark_column_name, outliers) fliers = None if boxpoints: fliers = BoxPlotBase.get_fliers(spark_column_name, outliers, whiskers[0]) fliers = [fliers] if len(fliers) > 0 else None fig = go.Figure() fig.add_trace( go.Box( name=colname, q1=[col_stats["q1"]], median=[col_stats["med"]], q3=[col_stats["q3"]], mean=[col_stats["mean"]], lowerfence=[whiskers[0]], upperfence=[whiskers[1]], y=fliers, boxpoints=boxpoints, notched=notched, ** kwargs, # this is for workarounds. Box takes different options from express.box. )) fig["layout"]["xaxis"]["title"] = colname fig["layout"]["yaxis"]["title"] = "value" return fig
plotly.tools.set_credentials_file(username='******', api_key='hPKWVIYRADr9YBrkCJKL') import plotly.plotly as py import plotly.graph_objs as go x = ['$100', '$100', '$100', '$100', '$100', '$100', '$120', '$120', '$120', '$120', '$120', '$120', '$140', '$140', '$140', '$140', '$140', '$140', '$160', '$160', '$160', '$160', '$160', '$160', '$180', '$180', '$180', '$180', '$180', '$180', '$200', '$200', '$200', '$200', '$200', '$200'] trace0 = go.Box( y=[-100, -50, 48218, 48218, 126144, 190174, -100, 4595, 65295, 65295, 160411, 239776, -100, 9938, 82335, 82335, 194883, 289253, 7995, 20713, 116389, 116389, 263419, 388706, 12200, 26172, 133416, 133416, 297892, 438704], x=x, name='zv50', marker=dict( color='#2388D0' ) ) trace1 = go.Box( y=[-100, -50, 16723, 16723, 32160, 50672, -100, 3348, 24494, 24494, 43291, 66795, -100, 7844, 32343, 32343, 54540, 83021, 2621, 12440, 40077, 40077, 65819, 99318, 6458, 17052, 47905, 47905, 76965, 115451, 10151, 21691, 55856, 55856, 88191, 131697], x=x, name='zv60', marker=dict( color='#2F8FD3' ) ) trace2 = go.Box(
max_txss.insert(position, max) # fig = go.Figure() # fig.add_scatter(x=tc, # y=txs, # mode='markers', # line = dict( # color = ('rgb(22, 96, 167)'), # width = 1) # ) # pio.write_image(fig, "../Backend/graphs/9/sim/"+log.split('.')[0]+".svg") traces = go.Box(y=time_totals, name='9 Sim Agents', jitter=0.3, pointpos=-1.8, boxpoints='all', marker=dict(color='rgb(10, 140, 208)', ), boxmean=True, showlegend=False) tracer = go.Box(y=time_total, name='9 Real Agents', jitter=0.3, pointpos=-1.8, boxpoints='all', marker=dict(color='rgb(8, 81, 156)', ), boxmean=True, showlegend=False) data = [traces, tracer]
ys = [ responses_df['Q35_Part_1'].values, responses_df['Q35_Part_2'].values, responses_df['Q35_Part_3'].values, responses_df['Q35_Part_4'].values, responses_df['Q35_Part_5'].values, responses_df['Q35_Part_6'].values ] names = [ "Self-taught", 'Online courses (Coursera, Udemy, edX, etc.)', 'Work', 'University', 'Kaggle competitions', 'Other' ] #colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)', 'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)'] colors = ["#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71", "#df6a84"] trace = [] for i in range(6): trace.append(go.Box(y=ys[i], name=names[i], marker=dict(color=colors[i], ))) layout = go.Layout( title='Box plots on % contribution of each ML / DS training category') fig = go.Figure(data=trace, layout=layout) iplot(fig, filename="TimeSpent") # **Observations:** # # * Looking at the median of each of the learning categories, it seems there is no one category that completely dominated the learning process of ML / DS # * Self-taught seems to have higher percentage of share in the learning process compared to others. # * Only less than half of the respondents have the percentage share of 'University' as greater than 0 # # # ### Distribution of DS / ML Learning Category at different Countries: #
####### # Objective: Make a DataFrame using the Abalone dataset (../data/abalone.csv). # Take two independent random samples of different sizes from the 'rings' field. # HINT: np.random.choice(df['rings'],10,replace=False) takes 10 random values # Use box plots to show that the samples do derive from the same population. ###### # Perform imports here: import plotly.offline as pyo import plotly.graph_objs as go import pandas as pd import numpy as np # create a DataFrame from the .csv file: df = pd.read_csv('data/abalone.csv') # take two random samples of different sizes: sample1 = np.random.choice(df['rings'],15,replace=False) sample2 = np.random.choice(df['rings'],45,replace=False) # create a data variable with two Box plots: data = [go.Box(y=sample1,name='Sample1'),go.Box(y=sample2,name='Sample2')] # add a layout layout = go.Layout(title='Two Samples') # create a fig from data & layout, and plot the fig fig = go.Figure(data=data,layout=layout) pyo.plot(fig)
net_data[tr_exploit][test_exploit].append(d) summaries[tr_exploit] = get_summary(data) with open(loc_str.format(tr_exploit), 'w') as f: json.dump(data, f, indent=2) summaries['net'] = get_summary(raw_data) with open(loc_str.format('summary'), 'w') as f: json.dump(summaries, f, indent=2) # plot data for tr_exploit in exploits: accs = [[entry['accuracy'] for entry in net_data[tr_exploit][ex]] for ex in exploits] boxes = [go.Box( y=accs[i], name=exploits[i], boxmean='sd' ) for i in range(len(exploits))] layout = go.Layout( title='ADD-GAN: Accuracy per Exploit Trained on {}'.format(tr_exploit), yaxis=dict(title='Accuracy (%)') ) fig = go.Figure(data=boxes, layout=layout) py.plot(fig, filename='add-gan-cross-results-{}'.format(tr_exploit))
item_2 = 0 item_3 = 0 item_4 = 0 item_5 = 0 for item_i in heartDisease[item]: for i in range(0,6): if (item == i): item_i +=1 heartDisease_i = 0 for i in range (0,6): heartDisease_i = (item_i/len(heartDisease)) * 100 print("The percentage of level", i, "in the response variable is: {0:.2f}".format(heartDisease_i)) classImbalance('heartdisease') trace0 = go.Box( y=heartDisease['age'], name='age' ) trace1 = go.Box( y=heartDisease['sex'], name='sex' ) trace2 = go.Box( y=heartDisease['cp'], name='cp' ) trace3 = go.Box( y=heartDisease['trestbps'], name='trestbps' ) trace4 = go.Box( y=heartDisease['chol'],
df3 = df3.groupby(['TownID', 'Latitude', 'Longitude'])['Total'].sum() df3 = df3.values.tolist() df4 = pd.read_csv("F_area.csv", encoding='utf-8') df4 = df4[['Total', 'Longitude', 'Latitude', 'TownID']] df4 = df4.groupby(['TownID', 'Latitude', 'Longitude'])['Total'].sum() df4 = df4.values.tolist() df5 = pd.read_csv("C_area.csv", encoding='utf-8') df5 = df5[['Total', 'Longitude', 'Latitude', 'TownID']] df5 = df5.groupby(['TownID', 'Latitude', 'Longitude'])['Total'].sum() df5 = df5.values.tolist() trace1 = go.Box( y=df1, name="台北市", boxpoints='outliers', marker=dict(color='rgba(93, 164, 214, 0.5)', ), line=dict( width=1, color='rgba(93, 164, 214, 0.5)', ), ) trace2 = go.Box( y=df2, name="桃園市", boxpoints='outliers', marker=dict(color='rgba(255, 144, 14, 0.5)', ), line=dict( width=1, color='rgba(255, 144, 14, 0.5)', ), ) trace3 = go.Box(
def data_visualization(data_set): fig_check_null = px.imshow(data_set.isnull(), labels=dict(x='Features', y="Samples"), x=list(data_set.columns.values), contrast_rescaling='minmax') fig_check_null.update_layout(title={ 'text': "NaN values detection", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }, showlegend=False) fig_check_null.show() # removed features with multiple NaN values data_set = data_set.drop([ 'elevation_gain', 'elevation_loss', 'max_run_cadence', 'steps', 'avg_stride_length', 'min_elevation', 'max_elevation', 'avg_double_cadence', 'max_double_cadence', 'max_vertical_speed', 'water_estimated', 'min_respiration_rate', 'max_respiration_rate', 'avg_respiration_rate', 'activity_training_load' ], axis=1) corr = data_set.corr() trace = go.Heatmap(z=corr.values, x=corr.index.values, y=corr.columns.values) data = [trace] fig_corr = go.Figure(data=data, layout={ 'title': { 'text': "Correlation Matrix", 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }, }) fig_corr.show() # removed features with low correlation with activity type data_set = data_set.drop(['event_type_id', 'moving_duration'], axis=1) # removed features highly correlated with others data_set = data_set.drop([ 'aerobic_training_effect', 'avg_hr', 'start_time_local', 'start_time_gmt', 'elapsed_duration', 'max_temperature' ], axis=1) fig_box = make_subplots(rows=1, cols=len(data_set.columns.values)) col_count = 1 for col in data_set: fig_box.add_trace(go.Box(y=data_set[col].values, name=data_set[col].name), row=1, col=col_count) col_count += 1 fig_box.update_layout(title={ 'text': "Outliers Detection", 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }, showlegend=False) fig_box.show() return data_set
def make_pb1(color): trace22 = go.Box(x=qdata.Open.pct_change(), marker=dict(color=color)) data22 = [trace22] f22 = dict(data=data22) return f22
def sessionsbytime_figure(df, selected_groupby): fig = plotly.subplots.make_subplots(rows=1, cols=1) fig.update_layout(margin=dict(l=40, r=40, t=40, b=40)) # TODO: if weekly is chosen, show the actual session name instead of a dot # TODO: use different shapes for PET vs MR # TODO: try to connect baseline with followup with arc line or something # or could have "by subject" choice that has a subject per y value # Customize figure #fig['layout'].update(xaxis={'automargin': True}, yaxis={'automargin': True}) from itertools import cycle import plotly.express as px palette = cycle(px.colors.qualitative.Plotly) #palette = cycle(px.colors.qualitative.Vivid) #palette = cycle(px.colors.qualitative.Bold) for mod, sesstype in itertools.product(df.MODALITY.unique(), df.SESSTYPE.unique()): #print(sesstype, mod) # Get subset for this session type dfs = df[(df.SESSTYPE == sesstype) & (df.MODALITY == mod)] # Nothing to plot so go to next session type if dfs.empty: continue # Plot base on view view = 'default' if view == "month": # TBD pass elif view == 'all': # Let's do this for the all time view to see histograms by year # or quarter or whatever fits well # Plot this session type fig.append_trace( go.Histogram( hovertext=dfs['SESSION'], name='{} ({})'.format(sesstype, len(dfs)), x=dfs['DATE'], y=dfs['PROJECT'], ), _row, _col) elif view == 'weekly': # Let's do this only for the weekly view and customize it specifically # for Mon thru Fri and allow you to choose this week and last week dfs['ONE'] = 1 # Plot this session type fig.append_trace( go.Bar( hovertext=dfs['SESSION'], name='{} ({})'.format(sesstype, len(dfs)), x=dfs['DATE'], y=dfs['ONE'], ), _row, _col) # width function of number of days being plotted #@width = #print(fig.layout.xaxis.width) fig.update_layout( barmode='stack', width=900, #bargroupgap=0, #wbidth=100, bargap=0.1) else: # Create boxplot for this var and add to figure # Default to the jittered boxplot with no boxes # markers symbols, see https://plotly.com/python/marker-style/ if mod == 'MR': symb = 'circle-dot' elif mod == 'PET': symb = 'diamond-wide-dot' else: symb = 'diamond-tall-dot' _color = next(palette) # Convert hex to rgba with alpha of 0.5 if _color.startswith('#'): _rgba = 'rgba({},{},{},{})'.format( int(_color[1:3], 16), int(_color[3:5], 16), int(_color[5:7], 16), 0.7) else: _r,_g,_b = _color[4:-1].split(',') _a = 0.7 _rgba = 'rgba({},{},{},{})'.format(_r, _g, _b, _a) # Plot this session type _row = 1 _col = 1 fig.append_trace( go.Box( name='{} {} ({})'.format(sesstype, mod, len(dfs)), x=dfs['DATE'], y=dfs[selected_groupby], boxpoints='all', jitter=0.7, text=dfs['SESSION'], pointpos=0.5, orientation='h', marker={ 'symbol': symb, 'color': _rgba, 'size': 12, 'line': dict(width=2, color=_color) }, line={'color': 'rgba(0,0,0,0)'}, fillcolor='rgba(0,0,0,0)', hoveron='points', ), _row, _col) # show lines so we can better distinguish categories fig.update_yaxes(showgrid=True) #fig.update_xaxes(range=[]) #full_fig = fig.full_figure_for_development() #print(full_fig.layout.xaxis.range) x_mins = [] x_maxs = [] for trace_data in fig.data: x_mins.append(min(trace_data.x)) x_maxs.append(max(trace_data.x)) x_min = min(x_mins) x_max = max(x_maxs) #print('x_min=', x_min, 'x_max=', x_max) if x_min == '2021-11-01' or x_min == '2021-11-10': fig.update_xaxes( range=('2021-10-31', '2021-12-01'), tickvals=[ '2021-11-01', '2021-11-08', '2021-11-15', '2021-11-22', '2021-11-29']) fig.update_layout(width=900) return fig
def isfplots(): '''A box-and-whisker plot with isf data sorted in 2-hr time buckets ''' (all, bucket_list) = isf.get_all_isf_plus_buckets() allData = [data[0] for data in all if data[0]] all_whisker = go.Box(y=allData, name='all isf') bucket0 = go.Box(y=bucket_list[0], name='0am-2am') bucket1 = go.Box(y=bucket_list[1], name='2am-4am') bucket2 = go.Box(y=bucket_list[2], name='4am-6am') bucket3 = go.Box(y=bucket_list[3], name='6am-8am') bucket4 = go.Box(y=bucket_list[4], name='8am-10am') bucket5 = go.Box(y=bucket_list[5], name='10am-12pm') bucket6 = go.Box(y=bucket_list[6], name='12pm-14pm') bucket7 = go.Box(y=bucket_list[7], name='14pm-16pm') bucket8 = go.Box(y=bucket_list[8], name='16pm-18pm') bucket9 = go.Box(y=bucket_list[9], name='18pm-20pm') bucket10 = go.Box(y=bucket_list[10], name='20pm-22pm') bucket11 = go.Box(y=bucket_list[11], name='22pm-24pm') layout = go.Layout( title=('isf values'), width=1500, height=1000, yaxis=dict( title='mgdl/unit', # the y zeroline is the line where y=0 zeroline=True, zerolinecolor='#800000', zerolinewidth=2, # this is the vertical line at the left edge showline=False, rangemode='tozero')) graph = go.Figure(data=[ all_whisker, bucket0, bucket1, bucket2, bucket3, bucket4, bucket5, bucket6, bucket7, bucket8, bucket9, bucket10, bucket11 ], layout=layout) graphJSON = json.dumps(graph, cls=plotly.utils.PlotlyJSONEncoder) return render_template('isfplots.html', version=app.config['VERSION'], page_title='Minerva ISF values', graphJSON=graphJSON)
import plotly.graph_objs as go fig = go.Figure() fig.add_trace(go.Box( x=[2,3,1,5], y=[ ['First','First','First','First'], ["A","A","A","A"] ], name="A", orientation="h" )) fig.add_trace(go.Box( x=[8,3,6,5], y=[ ['First','First','First','First'], ["B","B","B","B"] ], name="B", orientation="h" )) fig.add_trace(go.Box( x=[2,3,2,5], y=[ ['Second','Second','Second','Second'], ["C","C","C","C"] ], name="C", orientation="h" )) fig.add_trace(go.Box( x=[7.5,3,6,4],
for i in sorted(df.year.unique())], name='sum') ] }) ], className="six columns"), ], className="row"), html.Div([ html.H3('Nombre de jours d\'accomplissement par année'), dcc.Graph( id='g3', figure={ 'data': [ go.Box(y=df[df.year == i]['days_to_complete'].tolist(), name=str(i), boxpoints=False) for i in sorted(df.year.unique()) ] }) ]), html.U(html.H2('Détails')), html.Div([ html.Div([ dcc.Slider( id='year-slider', min=df['year'].min(), max=df['year'].max(), value=df['year'].min(), marks={str(year): str(year) for year in df['year'].unique()}) ],
# Nearly 80% of the borrowers are female. # # **Countrywise Loan Amount Distribution:** # # Now let us look at the loan amount distribution at country level. # In[ ]: trace = [] for name, group in kiva_loans_df.groupby("country"): trace.append ( go.Box( x=group["loan_amount_trunc"].values, name=name ) ) layout = go.Layout( title='Loan Amount Distribution by country', width = 800, height = 2000 ) #data = [trace0, trace1] fig = go.Figure(data=trace, layout=layout) py.iplot(fig, filename="LoanAmountCountry") # **Sectorwise Loan Amount distribution:** # In[ ]:
y_data = [y0, y1, y2, y3, y4] colors = [ 'rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)', 'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)' ] traces = [] for xd, yd, cls in zip(x_data, y_data, colors): traces.append( go.Box( y=yd, name=xd, boxpoints=False, jitter=0.5, whiskerwidth=0.2, fillcolor=cls, marker=dict(size=2, ), line=dict(width=1), )) layout = go.Layout( title='Difference in sales {} from cluster to cluster'.format( field_to_plot), yaxis=dict( autorange=True, showgrid=True, zeroline=True, dtick=50, gridcolor='black', gridwidth=0.1,
def update_graph(festival_name, genre_name): if festival_name == 'Sundance': s, s_winner, s_loser = newDataset(sundance, genre_name) elif festival_name == 'Tribeca': s, s_winner, s_loser = newDataset(tribeca, genre_name) elif festival_name == 'Chicago': s, s_winner, s_loser = newDataset(chicago, genre_name) elif festival_name == 'Berlin': s, s_winner, s_loser = newDataset(berlin, genre_name) elif festival_name == 'Rotterdam': s, s_winner, s_loser = newDataset(rotterdam, genre_name) elif festival_name == 'Cannes': s, s_winner, s_loser = newDataset(cannes, genre_name) elif festival_name == 'Venice': s, s_winner, s_loser = newDataset(venice, genre_name) elif festival_name == 'SXSW': s, s_winner, s_loser = newDataset(sxsw, genre_name) elif festival_name == 'Seattle': s, s_winner, s_loser = newDataset(seattle, genre_name) elif festival_name == 'SanFrancisco': s, s_winner, s_loser = newDataset(san, genre_name) elif festival_name == 'Slamdance': s, s_winner, s_loser = newDataset(slam, genre_name) elif festival_name == 'Locarno': s, s_winner, s_loser = newDataset(locarno, genre_name) elif festival_name == 'Sitges': s, s_winner, s_loser = newDataset(sitges, genre_name) elif festival_name == 'Toronto': s, s_winner, s_loser = newDataset(toronto, genre_name) elif festival_name == 'KarlovyVary': s, s_winner, s_loser = newDataset(kv, genre_name) elif festival_name == 'HongKong': s, s_winner, s_loser = newDataset(hongkong, genre_name) elif festival_name == 'Austin': s, s_winner, s_loser = newDataset(austin, genre_name) elif festival_name == 'Torino': s, s_winner, s_loser = newDataset(torino, genre_name) elif festival_name == 'Marrakech': s, s_winner, s_loser = newDataset(marrakech, genre_name) elif festival_name == 'Tokyo': s, s_winner, s_loser = newDataset(tokyo, genre_name) elif festival_name == 'GoldenHorse': s, s_winner, s_loser = newDataset(goldenhorse, genre_name) elif festival_name == 'BuenosAires': s, s_winner, s_loser = newDataset(buenosaires, genre_name) elif festival_name == 'Gramado': s, s_winner, s_loser = newDataset(gramado, genre_name) elif festival_name == 'Cairo': s, s_winner, s_loser = newDataset(cairo, genre_name) elif festival_name == 'Havana': s, s_winner, s_loser = newDataset(havana, genre_name) elif festival_name == 'Rio': s, s_winner, s_loser = newDataset(rio, genre_name) elif festival_name == 'SaoPaulo': s, s_winner, s_loser = newDataset(saopaulo, genre_name) elif festival_name == 'AsiaPacific': s, s_winner, s_loser = newDataset(asiapacific, genre_name) elif festival_name == 'India': s, s_winner, s_loser = newDataset(india, genre_name) elif festival_name == 'Sydney': s, s_winner, s_loser = newDataset(sydney, genre_name) elif festival_name == 'Beijing': s, s_winner, s_loser = newDataset(beijing, genre_name) elif festival_name == 'TokyoF': s, s_winner, s_loser = newDataset(tokyof, genre_name) elif festival_name == 'AAFCA': s, s_winner, s_loser = newDataset(aafca, genre_name) s, s_winner, s_loser = newDataset(brisbane, genre_name) elif festival_name == 'Jerusalem': s, s_winner, s_loser = newDataset(jerusalem, genre_name) elif festival_name == 'Haifa': s, s_winner, s_loser = newDataset(haifa, genre_name) elif festival_name == 'GrandBell': s, s_winner, s_loser = newDataset(grandbell, genre_name) elif festival_name == 'Fajr': s, s_winner, s_loser = newDataset(fajr, genre_name) elif festival_name == 'Singapore': s, s_winner, s_loser = newDataset(singapore, genre_name) elif festival_name == 'Yamagata': s, s_winner, s_loser = newDataset(yamagata, genre_name) elif festival_name == 'Shanghai': s, s_winner, s_loser = newDataset(shanghai, genre_name) elif festival_name == 'Kerala': s, s_winner, s_loser = newDataset(kerala, genre_name) elif festival_name == 'Taipei': s, s_winner, s_loser = newDataset(taipei, genre_name) elif festival_name == 'Jeonju': s, s_winner, s_loser = newDataset(jeonju, genre_name) elif festival_name == 'Moscow': s, s_winner, s_loser = newDataset(moscow, genre_name) elif festival_name == 'Edinburgh': s, s_winner, s_loser = newDataset(edinburgh, genre_name) elif festival_name == 'Mannheim-Heidelberg': s, s_winner, s_loser = newDataset(mannheimheidelberg, genre_name) elif festival_name == 'San Sebastián': s, s_winner, s_loser = newDataset(sansebastián , genre_name) elif festival_name == 'Taormina': s, s_winner, s_loser = newDataset(taormina, genre_name) elif festival_name == 'London': s, s_winner, s_loser = newDataset(london, genre_name) elif festival_name == 'Thessaloniki': s, s_winner, s_loser = newDataset(thessaloniki, genre_name) allfilm = newbo[newbo['Genre_' + festival_name] == genre_name] allfilm_outlier, allfilm_new = outliers(allfilm, feature) s_outlier, s_new = outliers(s, feature) s_winner_new = drop_contenders_new(s_winner, s_new, False) s_loser_new = drop_contenders_new(s_loser, s_new, False) allmean = cal_mean(allfilm_new, feature) smean = cal_mean(s_new, feature) slmean = cal_mean(s_loser_new, feature) swmean = cal_mean(s_winner_new, feature) title1 = festival_name + ' ' + genre_name + ' Box Office Boxplot' title2 = genre_name + ' Films Average Box Office' figure = go.Figure(data=[go.Box(y=s_winner_new[feature], name='Winners', marker_color='#C4DFE6'), go.Box(y=s_loser_new[feature], name='Nominees', marker_color='#66A5AD'), go.Box(y=s_new[feature], name='Contenders', marker_color='#07575B'), go.Box(y=allfilm_new[feature], name='Non-Contenders', marker_color='#003B46')], layout=go.Layout(title={'text': title1, 'x': 0.5})) figure2 = go.Figure(data=[go.Bar(x=[allmean, smean, slmean, swmean], y=['others', 'contenders', 'nominee', 'winner'], text=combine[festival_name]['boxoffice'][genre_name], textposition='auto', marker_color=['#003B46', '#07575B', '#66A5AD', '#C4DFE6'], orientation='h')], layout=go.Layout(title={'text': title2, 'x': 0.5})) figure.update_layout( paper_bgcolor='#1e2130', plot_bgcolor='#1e2130', legend={'font': {'color': 'darkgray'}}, font={'color': 'darkgray'}, showlegend=True ) figure2.update_layout( paper_bgcolor='#1e2130', plot_bgcolor='#1e2130', legend={'font': {'color': 'darkgray'}}, font={'color': 'darkgray'}, showlegend=False ) return figure, figure2
def main(_type='alpha', _low_connectivity=False, _plots=None, _plot_types=None): if _plots is None: _plots = ['same', 'different'] if _plot_types is None: _plot_types = ['stacked_bar', 'box', 'bar'] _same_arrays = [] _different_arrays = [] _same_highest = [] _different_highest = [] if _type == 'alpha': _alphas = ALPHAS _betas = [BETA] * len(ALPHAS) _names = _alphas elif _type == 'beta': _alphas = [ALPHA] * len(BETAS) _betas = BETAS _names = _betas else: raise Exception( 'No such type. Only \'alpha\' or \'beta\' are acceptable.') for _alpha, _beta in zip(_alphas, _betas): print('Alpha:', _alpha, 'beta:', _beta) _, _same_time_lags_arrays, _different_time_lags_arrays, _same_time_lags_highest, \ _different_time_lags_highest = same_inner_correlation_vs_different_inner_correlation_cross_correlation.compute_fiber_densities( _alpha=_alpha, _beta=_beta, _low_connectivity=_low_connectivity) _same_arrays.append(_same_time_lags_arrays[TIME_LAG_INDEX]) _different_arrays.append(_different_time_lags_arrays[TIME_LAG_INDEX]) _same_highest.append(_same_time_lags_highest) _different_highest.append(_different_time_lags_highest) if _plots is not None: # stacked bar plot if 'stacked_bar' in _plot_types: for _name, _sums in zip(['same', 'different'], [_same_highest, _different_highest]): if _name in _plots: _y_arrays = [[], [], []] for _type_sums in _sums: _left_wins, _none_wins, _right_wins = 0, 0, 0 for _time_lag, _type_sum in zip( same_inner_correlation_vs_different_inner_correlation_cross_correlation .TIME_LAGS, _type_sums): if _time_lag > 0: _left_wins += _type_sum elif _time_lag < 0: _right_wins += _type_sum else: _none_wins += _type_sum _total = sum(_type_sums) _y_arrays[0].append(_left_wins / _total) _y_arrays[1].append(_none_wins / _total) _y_arrays[2].append(_right_wins / _total) _colors_array = config.colors(3) _fig = go.Figure(data=[ go.Bar(x=_names, y=_y_array, name=_name, marker={'color': _color}) for _name, _y_array, _color in zip(['Leader', 'None', 'Follower'], _y_arrays, _colors_array) ], layout={ 'xaxis': { 'title': _type.capitalize(), 'zeroline': False, 'tickmode': 'array', 'tickvals': _names, 'type': 'category' }, 'yaxis': { 'title': 'Highest correlation fraction', 'range': [0, 1.1], 'zeroline': False, 'tickmode': 'array', 'tickvals': [0, 0.5, 1] }, 'barmode': 'stack', 'legend': { 'xanchor': 'right', 'yanchor': 'top', 'bordercolor': 'black', 'borderwidth': 2 }, }) save.to_html(_fig=_fig, _path=os.path.join(paths.PLOTS, save.get_module_name()), _filename='plot_stacked_bar_' + _type + '_low_con_' + str(_low_connectivity) + '_' + _name) # box plot if 'box' in _plot_types: for _name, _arrays in zip(['same', 'different'], [_same_arrays, _different_arrays]): if _name in _plots: _fig = go.Figure( data=[ go.Box(y=_y, name=_name, boxpoints=False, line={'width': 1}, marker={ 'size': 10, 'color': '#2e82bf' }, showlegend=False) for _y, _name in zip(_arrays, _names) ], layout={ 'xaxis': { 'title': _type.capitalize(), 'zeroline': False, 'tickmode': 'array', 'tickvals': _names, 'type': 'category' }, 'yaxis': { 'title': 'Inner correlation' if _name == 'same' else 'Different network correlation', 'range': [-1, 1.1], 'zeroline': False, 'tickmode': 'array', 'tickvals': [-1, -0.5, 0, 0.5, 1] } }) save.to_html(_fig=_fig, _path=os.path.join(paths.PLOTS, save.get_module_name()), _filename='plot_box_' + _type + '_low_con_' + str(_low_connectivity) + '_' + _name) # bar plot if 'bar' in _plot_types: for _name, _sums in zip(['same', 'different'], [_same_highest, _different_highest]): if _name in _plots: _fig = go.Figure(data=go.Bar( x=_names, y=[ _type_sums[TIME_LAG_INDEX] / sum(_type_sums) for _type_sums in _sums ], marker={'color': '#2e82bf'}), layout={ 'xaxis': { 'title': _type.capitalize(), 'zeroline': False, 'tickmode': 'array', 'tickvals': _names, 'type': 'category' }, 'yaxis': { 'title': 'Lag ' + str(TIME_LAG) + ' highest correlation fraction', 'range': [0, 1.1], 'zeroline': False, 'tickmode': 'array', 'tickvals': [0, 0.5, 1] } }) save.to_html(_fig=_fig, _path=os.path.join(paths.PLOTS, save.get_module_name()), _filename='plot_bar_' + _type + '_low_con_' + str(_low_connectivity) + '_' + _name)
def main(): _simulations = load.structured() _simulations = filtering.by_time_points_amount(_simulations, TIME_POINTS) _simulations = filtering.by_categories( _simulations, _is_single_cell=False, _is_heterogeneity=True, _is_low_connectivity=False, _is_causality=False, _is_dominant_passive=False, _is_fibrin=False ) _simulations = filtering.by_pair_distance(_simulations, _distance=PAIR_DISTANCE) _simulations = filtering.by_heterogeneity(_simulations, _std=STD) print('Total simulations:', len(_simulations)) _fiber_densities = compute_fiber_densities(_simulations) _window_distances_communicating = [[] for _i in OFFSETS_X] _window_distances_non_communicating = [[] for _i in OFFSETS_X] # window distances loop for _window_distance_index, _window_distance in enumerate(OFFSETS_X): print('Window distance:', _window_distance) # communicating loop for _simulation in tqdm(_simulations, desc='Communicating loop'): _left_cell_fiber_densities = _fiber_densities[(_simulation, _window_distance, 'left_cell')] _right_cell_fiber_densities = _fiber_densities[(_simulation, _window_distance, 'right_cell')] _correlation = compute_lib.correlation( compute_lib.derivative(_left_cell_fiber_densities, _n=DERIVATIVE), compute_lib.derivative(_right_cell_fiber_densities, _n=DERIVATIVE) ) _window_distances_communicating[_window_distance_index].append(_correlation) # non-communicating loop _simulations_indices = range(len(_simulations)) for _simulation_1_index in tqdm(_simulations_indices, desc='Non-communicating pairs loop'): _simulation_1 = _simulations[_simulation_1_index] for _simulation_2_index in _simulations_indices[_simulation_1_index + 1:]: _simulation_2 = _simulations[_simulation_2_index] for _simulation_1_cell_id, _simulation_2_cell_id in product(['left_cell', 'right_cell'], ['left_cell', 'right_cell']): _simulation_1_fiber_densities = \ _fiber_densities[(_simulation_1, _window_distance, _simulation_1_cell_id)] _simulation_2_fiber_densities = \ _fiber_densities[(_simulation_2, _window_distance, _simulation_2_cell_id)] _correlation = compute_lib.correlation( compute_lib.derivative(_simulation_1_fiber_densities, _n=DERIVATIVE), compute_lib.derivative(_simulation_2_fiber_densities, _n=DERIVATIVE) ) _window_distances_non_communicating[_window_distance_index].append(_correlation) # rank sums print('Wilcoxon rank-sum tests between communicating and non-communicating:', ranksums(_window_distances_communicating[_window_distance_index], _window_distances_non_communicating[_window_distance_index])) # plot _data = [] _colors_array = config.colors(2) for _communicating, _communicating_text, _pair_distances, _color in \ zip([True, False], ['Communicating', 'Non-communicating'], [_window_distances_communicating, _window_distances_non_communicating], _colors_array): _y = [] _x = [] for _window_distance_index, _window_distance in enumerate(OFFSETS_X): _y += _pair_distances[_window_distance_index] _x += [_window_distance for _i in _pair_distances[_window_distance_index]] _data.append( go.Box( y=_y, x=_x, name=_communicating_text, boxpoints='all' if _communicating else False, jitter=1, pointpos=0, line={ 'width': 1 }, fillcolor='white', marker={ 'size': 10, 'color': _color }, opacity=0.7 ) ) _fig = go.Figure( data=_data, layout={ 'xaxis': { 'title': 'Window distance (cell diameter)', 'zeroline': False, 'tickmode': 'array', 'tickvals': OFFSETS_X, 'type': 'category' }, 'yaxis': { 'title': 'Correlation', 'range': [-1, 1], 'zeroline': False, 'tickmode': 'array', 'tickvals': [-1, -0.5, 0, 0.5, 1] }, 'boxmode': 'group', 'legend': { 'xanchor': 'right', 'yanchor': 'top', 'bordercolor': 'black', 'borderwidth': 2 } } ) save.to_html( _fig=_fig, _path=os.path.join(paths.PLOTS, save.get_module_name()), _filename='plot' )
def listToBox(values, n): #TODO set colour based on Firefox/Tor Original/Ublock? return go.Box(y=values, name=n, boxpoints=False) #,jitter=0.3,pointpos=-1.8)
def update_main_graph(parameter, dimension, trace): if parameter == 'age': lake = lake_age sea = sea_age elif parameter == 'education': lake = lake_edu sea = sea_edu else: lake = lake_gen sea = sea_gen if dimension == 'agreeableness': column = "A" elif dimension == 'conscientiosness': column = "C" elif dimension == 'extraversion': column = "E" elif dimension == 'neuroticism': column = "N" else: column = "O" trace_ocean = go.Box( name = "Respondents", x = sea[parameter], y = sea[column], ) trace_lake = go.Scatter( name = "Mean", x = lake[parameter], y = lake[column], ) trace_progress = go.Scatter( name = "Progress", x = [lake.loc[0, parameter], lake.loc[len(lake.index) - 1, parameter]], y = [lake.loc[0, column], lake.loc[len(lake.index) - 1, column]], ) data = [] if 'mean' in trace: data.append(trace_lake) if 'res' in trace: data.append(trace_ocean) if 'prog' in trace: data.append(trace_progress) layout = go.Layout( xaxis=dict( title=parameter.title(), showgrid=True, zeroline=True, zerolinecolor='#969696', zerolinewidth=4 ), yaxis=dict( title=dimension.title(), range=[1, 6], showgrid=True, gridcolor='#bdbdbd', gridwidth=1 ) ) figure = go.Figure(data = data, layout = layout) return figure