def line_chart(df_melted, y_axis, title): nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['Year'], empty='none') selectors = alt.Chart(df_melted).mark_point().encode( x='Year:Q', opacity=alt.value(0), ).add_selection(nearest) line = alt.Chart(df_melted).mark_line(color=color).encode( x=alt.X('Year:Q', axis=alt.Axis(tickCount=forecast_period), sort=list(df_melted.index)), y=alt.Y(y_axis), tooltip=[alt.Tooltip(y_axis, format=',.0%')]) points = line.mark_point(color=color, size=40).encode( opacity=alt.condition(nearest, alt.value(1), alt.value(0))) chart = alt.layer(line, selectors, points).properties(title=title, width=alt.Step(60), height=400).interactive() return chart
def get_divergence_chart(self): """ Calculate and style the chart of how actual rolls diverged from the expected distribution. """ # Get colors for bars normalize = lambda s: (s - s.min()) / (s.max() - s.min()) rdiffs = normalize(self.history_count["Difference"]) rdiffs = [d if not np.isnan(d) else 0 for d in rdiffs] reds = [cm.get_cmap('Reds_r', 51)(i) for i in range(51)] reds += reds[::-1] roll_colors = [colors.to_hex(reds[int(100 * d)]) for d in rdiffs] # Make Altair horizontal bar chart plt_df = self.history_count.round(2) diff_chart = alt.Chart(plt_df).mark_bar(size=30, strokeWidth=3, stroke="black").encode( y='Roll:O', x=alt.X('Difference:Q', scale=alt.Scale(padding=25)), color=alt.Color('Roll:O', legend=None, scale=alt.Scale( domain=self.history_count.Roll.to_list(), range=roll_colors)), tooltip=list(plt_df.columns) ).properties( title="Roll Differential from Expected Count", width=self.screen_width / 4, height=alt.Step(32) ).configure_title( fontSize=32, dy=-50, limit=600, font="Arial", align="center", anchor="middle" ).configure_axis( labelFontSize=14, titleFontSize=16 ) roll_count = self.history_count.set_index("Roll")[["Count"]].T return diff_chart, roll_count
def altair_chart(df, vertical_axis, coloris, titre): hist = alt.Chart(df).mark_bar(size=25).encode( x=alt.X('Region', sort='-y'), y=vertical_axis, color=alt.value(coloris)).properties(width=alt.Step(50), height=400, title=titre).interactive() st.altair_chart(hist, use_container_width=True) return
def bar_chart(df_melted, y_axis, title): chart = alt.Chart(df_melted).mark_bar(color=color, size=40).encode( x=alt.X('Year:Q', axis=alt.Axis(tickCount=forecast_period), sort=list(df_melted.index)), y=alt.Y(y_axis), tooltip=[alt.Tooltip(y_axis, format=',.0f') ]).properties(title=title, width=alt.Step(60), height=400).interactive() return chart
def huabar(a, b): st.write( alt.Chart(data).mark_bar(size=30).encode( x=alt.X(a, sort=None), y=b, color=alt.Color( b, scale=alt.Scale(domain=(100, -100), scheme="redyellowgreen"))).properties( width=alt.Step(40), height=600))
def bar_plot(xcol,ycol): alt.data_transformers.disable_max_rows() click = alt.selection_multi() chart = alt.Chart(df[(df["Predicted Subscription (current)"] == ycol)]).mark_bar().encode( alt.X(xcol,title = xcol), y= "count(Type of Job)", color = alt.Color(xcol, legend=None) ).properties(width=alt.Step(50)).configure_axis( labelFontSize=20, titleFontSize=25) return chart.to_html()
def make_prop_chart(dimension, width_step): return alt.Chart(os.path.basename(props_data_file)).mark_circle( size=50, color='green').transform_calculate( rank="format(datum.rank,'03')").transform_filter( select_models).transform_filter(select_brush).encode( x=alt.X(f'{dimension}:N', axis=alt.Axis(labelAngle=0, title=None, orient='top', labelPadding=5)), y=alt.Y('prop:N', axis=alt.Axis(title=None)), tooltip=['prop:N']).properties( width=alt.Step(width_step), title=f'specification terms (x-{dimension})' ).interactive()
def make_violation_chart(dimension, width_step): return alt.Chart(os.path.basename(violations_data_file)).mark_circle( color='red', ).transform_calculate( rank="format(datum.rank,'03')", ).transform_filter( select_models).transform_filter(select_brush).encode( x=alt.X(f'{dimension}:N', axis=alt.Axis(labelAngle=0, title=None, orient='top', labelPadding=5)), y=alt.Y('violation:N', axis=alt.Axis(title=None)), size=alt.Size('num:Q', legend=None), opacity=alt.Opacity('weight:Q', scale=alt.Scale(range=[0, 1]), legend=None), tooltip=[ 'set:N', 'rank:Q', 'violation:N', 'num:Q', 'weight:Q', 'cost_contrib:Q' ]).properties( width=alt.Step(width_step), title=f'soft rule violations (x-{dimension})' ).interactive()
#기준선 rule = alt.Chart(source).mark_rule(color='blue').encode(y='mean(temp_max):Q') #새로운 축 line = base.mark_line(color='orange').encode(y='temp_min:Q') #tik 추가 tick = base.mark_tick( color='red', thickness=2, size=60 * 0.9, # controls width of tick. ).encode(y='temp_avg:Q', ) #실제 그래프 chart = (bar + text + rule + line + tick).properties( width=alt.Step(60), height=400, ) chart.save('Simple Bar Chart.html') """ 퍼센트 막대 그래프 """ source = pd.DataFrame({ 'Activity': ['Sleeping', 'Eating', 'TV', 'Work', 'Exercise'], 'Time': [8, 2, 4, 8, 2] }) base = alt.Chart(source).transform_joinaggregate( TotalTime='sum(Time)', ).transform_calculate( PercentOfTotal="datum.Time / datum.TotalTime").encode( x=alt.X('PercentOfTotal:Q', axis=alt.Axis(format='.0%')))
# -- adjust import altair as alt import pandas as pd data = pd.DataFrame({'name': ['a', 'b'], 'value': [4, 10]}) alt.Chart(data).mark_bar(size=10).encode(x='name:O', y='value:Q') alt.Chart(data).mark_bar(size=30).encode(x='name:O', y='value:Q') alt.Chart(data).mark_bar(size=30).encode(x='name:O', y='value:Q').properties(width=200) alt.Chart(data).mark_bar(size=30).encode( x='name:N', y='value:Q').properties(width=alt.Step(100)) # adjust chart size import altair as alt from vega_datasets import data cars = data.cars() alt.Chart(cars).mark_bar().encode(x='Origin', y='count()').properties(width=200, height=150) alt.Chart(cars).mark_bar().encode(x='Origin', y='count()', column='Cylinders:Q').properties(width=100,
""" Jitter Chart ------------ In this chart, we encode the ``Cylinders`` column from the ``cars`` dataset in the ``y``-channel. Because most cars (all but seven) in this dataset have 4, 6, or 8 cylinders, the default presentation of this data would show most of the data concentrated on three horizontal lines. Furthermore, in that default presentation, it would be difficult to gauge the relative frequencies with which different values occur (because there would be so much overlap). To compensate for this, we use the ``yOffset`` channel to incorporate a random offset (jittering). This is adapted from a corresponding Vega-Lite Example: `Dot Plot with Jittering <https://vega.github.io/vega-lite/examples/point_offset_random.html>`_. """ # category: scatter plots import altair as alt from vega_datasets import data source = data.cars() alt.Chart(source).mark_point().encode( x='Horsepower:Q', y='Cylinders:O', yOffset='randomCalc:Q' ).transform_calculate( randomCalc='random()' ).properties( height=alt.Step(50) )
X = Counter(sequence) X_list = list(X) # print(X_list) X_values = list(X.values()) X # Display the data in raw text (Optional to do so) st.subheader("2. Print text") full_forms = {'A': 'adenine (A)', 'C': 'thymine (cytosine)', 'T': 'thymine (T)', 'G': 'adenine (guamine)'} for ch in 'ATGC': st.write(f"There are {str(X[ch])} {full_forms[ch]}") # Display the data in a dataframe which is better in readability st.subheader("3. Display DataFrame") df = pd.DataFrame.from_dict(X, orient='index') # print(df[:5]) df.rename({0: 'count'}, axis = 'columns', inplace=True) # print(df[:5]) df.reset_index(inplace=True) # the original index becomes a column now # print(df) df.rename(columns={'index':'Nucleotide'}, inplace=True) # print(df) st.write(df) # Graph using Altair st.subheader("4. Display Bar Chart") p = alt.Chart(df).mark_bar().encode(x='Nucleotide', y='count') p = p.properties(width=alt.Step(80)) # 80 otherwise bars would be thin st.write(p)
for fang in shdata.values(): yao_list.extend(fang["方"].keys()) c = collections.Counter(yao_list) data = pd.DataFrame({"fname": c.keys(), "fcount": c.values()}) data.sort_values('fcount', ascending=False, inplace=True) # 畫漸變顏色bar圖 st.write( alt.Chart(data).mark_bar(size=30).encode( x=alt.X('fname', sort=None), y='fcount', color=alt.Color("fcount", scale=alt.Scale(domain=(100, 1), scheme="redyellowgreen"))).properties( width=alt.Step(40), height=600)) st.write(data) st.title('伤寒论方剂使用次数排名') fang_list = [val['名'] for val in shdata.values()] c = collections.Counter(fang_list) data = pd.DataFrame({"fname": c.keys(), "fcount": c.values()}) data.sort_values('fcount', ascending=False, inplace=True) huabar('fname', 'fcount') #st.write(alt.Chart(data).mark_bar().encode(x=alt.X('fname', sort=None), y='fcount')) st.write(data)
def deposits_chart(df): chart = alt.Chart(df).mark_bar().encode( alt.Column('year(date):T', title='Year'), alt.Y('sum(change):Q', title='Ammount'), alt.X('description:O', title=None)).properties(width=alt.Step(50)) st.altair_chart(chart)
X_label = list(X) X_values = list(X.values()) X # -------------------- # 2. Printing in a text format. # -------------------- st.subheader("2. Print text") st.write("There are " + str(X["A"]) + " adenine (A)") st.write("There are " + str(X["T"]) + " Thymine (T)") st.write("There are " + str(X["G"]) + " guanine (G)") st.write("There are " + str(X["C"]) + " cytosine (C)") # -------------------- # 3. Displaying in `DataFrame` format. # -------------------- st.subheader("3. Display DataFrame") df = pd.DataFrame.from_dict(X, orient="index") df = df.rename({0: "count"}, axis="columns") df.reset_index(inplace=True) df = df.rename(columns={"index": "nucleotide"}) st.write(df) # -------------------- # 4. Display Bar Chart using `Altair`. # -------------------- st.subheader("4. Display Bar chart") p = alt.Chart(df).mark_bar().encode(x="nucleotide", y="count") p = p.properties(width=alt.Step(60)) # Defines the width of the bar. st.write(p)
('G', seq.count('G')), ('C', seq.count('C'))]) return dna_count_dict X = DNA_nucleotide_count(sequence) X_label = list(X) X_values = list(X.values()) st.subheader("1. Print text") st.write("There are " + str(X['A']) + " adenine (A)") st.write("There are " + str(X['T']) + " thymine (T)") st.write("There are " + str(X['G']) + " guanine (G)") st.write("There are " + str(X['C']) + " cytosine (C)") # Display Dataframe st.subheader("3. Display DataFrame") df = pd.DataFrame.from_dict(X, orient='index') df = df.rename({0: 'count'}, axis='columns') df.reset_index(inplace=True) df = df.rename(columns={'index': 'nucleotide'}) st.write(df) # Display Bar chart st.subheader("4. Display Bar Chart") bar_chart = alt.Chart(df).mark_bar().encode(x='nucleotide', y='count') bar_chart = bar_chart.properties(width=alt.Step(80)) st.write(bar_chart)
def create_exploratory_visualisation(trial_id, directory, vis_data_file, match_data_file, violations_data_file, props_data_file, baseline_label='baseline', verde_label='verde'): """ Uses altair to generate the exploratory visualisation. :param trial_id: :param directory: :param vis_data_file: :param match_data_file: :param violations_data_file: :param props_data_file: :param baseline_label: :param verde_label: :return: """ vl_viewer = f'{trial_id}_view_one_vl.html?vl_json=' # common data and transforms for first layer with marks for each vis model base = alt.Chart(os.path.basename(vis_data_file)).transform_calculate( rank="format(datum.rank,'03')", link=f"'{vl_viewer}' + datum.vl_spec_file").properties( width=250, height=alt.Step(30), title='visualisation rankings') # add a selectable square for each vis model select_models = alt.selection_multi(fields=['set', 'rank']) select_brush = alt.selection_interval() squares = base.mark_square(size=150).encode( alt.X('set:O', axis=alt.Axis(labelAngle=0, title=None, orient='top', labelPadding=5)), alt.Y('rank:O', axis=alt.Axis(title=None)), tooltip=['set:N', 'rank:N', 'cost:Q'], opacity=alt.Opacity('has_match:O', legend=None), color=alt.condition( select_models | select_brush, alt.value('steelblue'), alt.value('lightgray'))).add_selection(select_models, select_brush).interactive() # add a small circle with the hyperlink to the actual vis. # Shame that xoffset is not an encoding channel, so we have to do in two steps... def make_circles(vis_set, offset): return base.transform_filter(datum.set == vis_set).mark_circle( size=25, xOffset=offset, ).encode(alt.X('set:O', axis=alt.Axis(labelAngle=0, title=None, orient='top', labelPadding=5)), alt.Y('rank:O'), tooltip=['link:N'], href='link:N', color=alt.condition(select_models | select_brush, alt.value('steelblue'), alt.value('lightgray'))).interactive() baseline_circles = make_circles(baseline_label, -15) verde_circles = make_circles(verde_label, 15) # next layer is match lines, handle case of no matches if match_data_file: col_domain = ['not', 'with_equal_cost', 'with_different_cost'] col_range_ = ['steelblue', 'green', 'red'] match_lines = alt.Chart( os.path.basename(match_data_file)).mark_line().transform_calculate( rank="format(datum.rank,'03')").encode( alt.X('set:O', axis=alt.Axis(labelAngle=0, title=None, orient='top', labelPadding=5)), alt.Y('rank:O'), detail=['match:N', 'match_type:N'], strokeDash=alt.StrokeDash( 'match_type:N', scale=alt.Scale(domain=['verde_addition', 'exact'], range=[[5, 4], [1, 0]]), legend=alt.Legend(orient='bottom')), color=alt.condition( select_models | select_brush, alt.Color('crossed:N', scale=alt.Scale(domain=col_domain, range=col_range_), legend=alt.Legend(orient='bottom')), alt.value('lightgray'))) else: match_lines = None # rules to connect models with the same cost cost_rules = base.mark_rule(strokeWidth=2).transform_aggregate( min_rank='min(rank)', max_rank='max(rank)', groupby=['set', 'cost' ]).encode(alt.X('set:O', axis=alt.Axis(labelAngle=0, title=None, orient='top', labelPadding=5)), alt.Y('min_rank:O'), alt.Y2('max_rank:O'), color=alt.condition(select_models | select_brush, alt.value('steelblue'), alt.value('lightgray')), tooltip=['cost:Q', 'min_rank:O', 'max_rank:O']).interactive() rank_chart = baseline_circles + verde_circles if match_lines: rank_chart = rank_chart + match_lines rank_chart = rank_chart + cost_rules + squares # chart to show violation occurrences and weights for selected vis models across sets def make_violation_chart(dimension, width_step): return alt.Chart(os.path.basename(violations_data_file)).mark_circle( color='red', ).transform_calculate( rank="format(datum.rank,'03')", ).transform_filter( select_models).transform_filter(select_brush).encode( x=alt.X(f'{dimension}:N', axis=alt.Axis(labelAngle=0, title=None, orient='top', labelPadding=5)), y=alt.Y('violation:N', axis=alt.Axis(title=None)), size=alt.Size('num:Q', legend=None), opacity=alt.Opacity('weight:Q', scale=alt.Scale(range=[0, 1]), legend=None), tooltip=[ 'set:N', 'rank:Q', 'violation:N', 'num:Q', 'weight:Q', 'cost_contrib:Q' ]).properties( width=alt.Step(width_step), title=f'soft rule violations (x-{dimension})' ).interactive() violation_set_chart = make_violation_chart('set', 40) violation_rank_chart = make_violation_chart('rank', 30) # chart to show prop occurrences for selected vis models across sets def make_prop_chart(dimension, width_step): return alt.Chart(os.path.basename(props_data_file)).mark_circle( size=50, color='green').transform_calculate( rank="format(datum.rank,'03')").transform_filter( select_models).transform_filter(select_brush).encode( x=alt.X(f'{dimension}:N', axis=alt.Axis(labelAngle=0, title=None, orient='top', labelPadding=5)), y=alt.Y('prop:N', axis=alt.Axis(title=None)), tooltip=['prop:N']).properties( width=alt.Step(width_step), title=f'specification terms (x-{dimension})' ).interactive() prop_set_chart = make_prop_chart('set', 40) prop_rank_chart = make_prop_chart('rank', 30) # glue them all together top_chart = rank_chart | violation_set_chart | prop_set_chart bottom_chart = violation_rank_chart | prop_rank_chart chart = top_chart & bottom_chart # put a timestamp ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") chart = chart.properties(title=f'{trial_id} {ts}') file_name = os.path.join(directory, 'vegalite', f'{trial_id}_view_compare.html') logging.info(f'writing comparison visualisation to {file_name}') chart.save(file_name)
return d X = DNA_nucleotide_count(sequence) X_label = list(X) X_values = list(X.values()) X ## 2. Print text st.subheader('2. Print text') st.write('There are ' + str(X['A']) + ' adenine (A)') st.write('There are ' + str(X['T']) + ' thymine (T)') st.write('There are ' + str(X['G']) + ' guanine (G)') st.write('There are ' + str(X['C']) + ' cytosine (C)') ### 3. Display DataFrame st.subheader('3. Display DataFrame') df = pd.DataFrame.from_dict(X, orient='index') df = df.rename({0: 'count'}, axis='columns') df.reset_index(inplace=True) df = df.rename(columns={'index': 'nucleotide'}) st.write(df) ### 4 . Display Bar Chart using Altair st.subheader('4. Display Bar Chart') p = alt.Chart(df).mark_bar().encode(x='nucleotide', y='count') p = p.properties(width=alt.Step(80) #Controls Width of bar ) st.write(p)
'Nucleotide Base': ['A', 'T', 'G', 'C'], 'Count': [ DNA_Sequence.count('A'), DNA_Sequence.count('T'), DNA_Sequence.count('G'), DNA_Sequence.count('C') ] } df = pd.DataFrame(data, columns=['Nucleotide Base', 'Count']) ###Graphical Representation of the base count### st.subheader("[4] Graphical Representation of the Nucleotide Base count") Graph = alt.Chart(df).mark_bar().encode(x='Nucleotide Base', y='Count') #Adjusting the size of the bars in the Graph Graph = Graph.properties(width=alt.Step(75)) #Displaying the Graph st.write(Graph) st.write(""" *** """) ###GC Content of the given DNA Sequence### C_count = DNA_Sequence.count('C') ####Counting Cytosine### G_count = DNA_Sequence.count('G') ####Counting Guanine### try: GC_content = (C_count + G_count) / length_DNA * 100 except:
}, { "a": "a3", "b": "b3", "c": "x", "p": "0.99" }, { "a": "a3", "b": "b3", "c": "y", "p": "0.80" }, { "a": "a3", "b": "b3", "c": "z", "p": "0.37" }, ]) alt.Chart(source, width=60, height=alt.Step(8)).mark_bar().encode( y=alt.Y("c:N", axis=None), x=alt.X("p:Q", title=None, axis=alt.Axis(format="%")), color=alt.Color("c:N", title="settings", legend=alt.Legend(orient="bottom", titleOrient="left")), row=alt.Row("a:N", title="Factor A", header=alt.Header(labelAngle=0)), column=alt.Column("b:N", title="Factor B"), )
source = data.barley() alt.Chart(source, title="The Morris Mistake").mark_point().encode( alt.X('yield:Q', title="Barley Yield (bushels/acre)", scale=alt.Scale(zero=False), axis=alt.Axis(grid=False)), alt.Y('variety:N', title="", sort='-x', axis=alt.Axis(grid=True)), color=alt.Color('year:N', legend=alt.Legend(title="Year")), row=alt.Row( 'site:N', title="", sort=alt.EncodingSortField(field='yield', op='sum', order='descending'), )).properties(height=alt.Step(20)).configure_view(stroke="transparent") ### import altair as alt from vega_datasets import data # Since these data are each more than 5,000 rows we'll import from the URLs airports = data.airports.url flights_airport = data.flights_airport.url states = alt.topo_feature(data.us_10m.url, feature="states") # Create mouseover selection select_city = alt.selection_single(on="mouseover", nearest=True, fields=["origin"],
def app(): ###################### # Page Title ###################### st.write(""" # DNA Counting-App counting the nucleotide composition of DNA * **Python libraries:** streamlit, altair, PIL """) url = 'https://image.shutterstock.com/image-photo/blue-helix-human-dna-structure-600w-1669326868.jpg' image = Image.open(requests.get(url, stream=True).raw) st.image(image, use_column_width=True) # Input Text Box ###################### st.header("Enter DNA sequence") sequence_input = ">DNA Query 2\nGAACACGTGGAGGCAAACAGGAAGGTGAAGAAGAACTTATCCTATCAGGACGGAAGGTCCTGTGCTCGGG\nATCTTCCAGACGTCGCGACTCTAAATTGCCCCCTCTGAGGTCAAGGAACACAAGATGGTTTTGGAAATGC\nTGAACCCGATACATTATAACATCACCAGCATCGTGCCTGAAGCCATGCCTGCTGCCACCATGCCAGTCCT" sequence = st.text_area("Sequence input", sequence_input, height=200) sequence = sequence.splitlines() sequence = sequence[1:] sequence = ''.join(sequence) # st.header('INPUT (DNA QUERY)') # sequence def DNA_seq_count(seq): d = dict([ ("A", seq.count('A')), ('T', seq.count("T")), ('G', seq.count('G')), ('C', seq.count('C')), ]) return d X = DNA_seq_count(sequence) # Display the results in various ways ###################### c1, c2, c3, c4 = st.beta_columns([1, 1, 2, 2]) c1.subheader('1. Print Dictionary') c1.write(X) c2.subheader('2. Print text') c2.write('There are ' + str(X['A']) + ' adenine (A)') c2.write('There are ' + str(X['T']) + ' thymine (T)') c2.write('There are ' + str(X['G']) + ' guanine (G)') c2.write('There are ' + str(X['C']) + ' cytosine (C)') c3.subheader('3. Dataframe') df = pd.DataFrame.from_dict(X, orient='index') df = df.rename({0: 'count'}, axis='columns') df.reset_index(inplace=True) df = df.rename({'index': 'nucleotide'}, axis='columns') c3.write(df) c4.subheader('4. Display Bar graph w. Altair') p = alt.Chart(df).mark_bar().encode(x='nucleotide', y='count') p = p.properties(width=alt.Step(60)) c4.write(p)
def DNA_nucleotide_count(seq): d = dict([('A', seq.count('A')), ('T', seq.count('T')), ('G', seq.count('G')), ('C', seq.count('C'))]) return d x = DNA_nucleotide_count(sequence) x # Print text st.subheader('2. Print text') st.write('There are', str(x['A']), 'adenine (A)') st.write('There are', str(x['T']), 'thymine (T)') st.write('There are', str(x['G']), 'guanine (G)') st.write('There are', str(x['C']), 'cytosine (C)') # Display DataFrame st.subheader("3. Display DataFrame") df = pd.DataFrame.from_dict(x, orient='index') df = df.rename({0: 'count'}, axis=1) df.reset_index(inplace=True) df.rename(columns={'index': 'Nucleotide'}, inplace=True) st.write(df) # Display bar chart st.subheader('4. Display chart using Altair') p = alt.Chart(df).mark_bar().encode(x='Nucleotide', y='count') p = p.properties(width=alt.Step(80)) st.write(p)
st.subheader('1 Print dictionary') def DNA_nucleotide_count(seq): seq = "".join(seq.splitlines()) if "\n" in seq else seq return dict([(key, seq.count(key)) for key in sorted(set(seq))]) X = DNA_nucleotide_count(sequence) X ### 2. Print text st.subheader('2. Print text') for dna_nucleotide, count in X.items(): st.write(f"There're {count} {DNA_NUCLEOTIDES[dna_nucleotide]}") ### 3. Display DataFrame st.subheader('3. Display DataFrame') df = pd.DataFrame.from_dict(X, orient='index') df = df.rename({0: 'counts'}, axis='columns') df.reset_index(inplace=True) df = df.rename(columns={'index': 'nucleotides'}) st.write(df) ### 4. Display Bar Char using Altair st.subheader('4. Display Bar Char') p = alt.Chart(df).mark_bar().encode( x="nucleotides", y="counts").properties(width=alt.Step(80) # controls width of bar. ) st.write(p)
#X_label = list(X) #X_values = list(X.values()) X ### 2. Print text st.subheader('2. Print text') st.write('There are ' + str(X['A']) + ' adenine (A)') st.write('There are ' + str(X['T']) + ' thymine (T)') st.write('There are ' + str(X['G']) + ' guanine (G)') st.write('There are ' + str(X['C']) + ' cytosine (C)') ### 3. Display DataFrame st.subheader('3. Display DataFrame') df = pd.DataFrame.from_dict(X, orient='index') df = df.rename({0: 'count'}, axis='columns') df.reset_index(inplace=True) df = df.rename(columns = {'index':'nucleotide'}) st.write(df) ### 4. Display Bar Chart using Altair st.subheader('4. Display Bar chart') p = alt.Chart(df).mark_bar().encode( x='nucleotide', y='count' ) p = p.properties( width=alt.Step(80) # controls width of bar. ) st.write(p)
df.head() df.category.unique() d1 = df.groupby(['category']).agg({'price': 'mean', 'item_id': 'nunique'}) d1 = pd.DataFrame(d1) # Convert Series to DataFrame d1.reset_index(level=0, inplace=True) # Convert Index to Column #d1.price=d1.price.apply(np.round) d1.price = d1.price.astype(int) #d1=d1.sort_values(by='item_id') d1.columns = ['Furniture Category', 'Average Price (SR)', 'Number of Items'] ## bar = alt.Chart(d1).mark_bar( ).encode(x='Furniture Category', y='Number of Items').properties( width=alt.Step(40), # controls width of bar. title= '#Items vs Price Per Category (Bars represent #Items, Ticks represent Avg. Price)' ) text_bar = bar.mark_text( align='center', baseline='bottom', dx=3 # Nudges text to right so it doesn't appear on top of the bar ).encode(text='Number of Items') tick = alt.Chart(d1).mark_tick( color='red', thickness=4, size=40 * 0.9, # controls width of tick. ).encode(x='Furniture Category',
X = DNA_nucleotide_count(sequence) #X_label = list(X) #X_values = list(X.values()) X ### 2. Print text st.subheader('2. Print text') st.write('There are ' + str(X['A']) + ' adenine (A)') st.write('There are ' + str(X['T']) + ' thymine (T)') st.write('There are ' + str(X['G']) + ' guanine (G)') st.write('There are ' + str(X['C']) + ' cytosine (C)') ### 3. Display DataFrame st.subheader('3. Display DataFrame') df = pd.DataFrame.from_dict(X, orient='index') df = df.rename({0: 'count'}, axis='columns') df.reset_index(inplace=True) df = df.rename(columns={'index': 'nucleotide'}) st.write(df) ### 4. Display Bar Chart using Altair st.subheader('4. Display Bar chart') p = alt.Chart(df).mark_bar().encode(x='nucleotide', y='count') p = p.properties(width=alt.Step(80) # controls width of bar. ) st.write(p)
st.subheader('2. Print Text') st.write("There are " + str(X['A']) + " adenine (A)") st.write("There are " + str(X['T']) + " thymine (T)") st.write("There are " + str(X['C']) + " guanine (G)") st.write("There are " + str(X['T']) + " cytosine(C)") # Display Dataframe st.subheader('3. Display DataFrame') df = pd.DataFrame.from_dict(X, orient='index') df = df.rename({0: 'count'}, axis='columns') df.reset_index(inplace=True) df = df.rename(columns = {'index':'nucleotide'}) st.write(df) # Display Bar Chart Using Altair st.subheader('4. Display Bar Chart') p = alt.Chart(df).mark_bar().encode( x='nucleotide', y='count' ) p = p.properties( width=alt.Step(80) # controls the width of the bar ) st.write(p)
X # Print text st.subheader('2. Print text') st.write('There are ' + str(X['A']) + ' adenine (A)') st.write('There are ' + str(X['T']) + ' thymine (T)') st.write('There are ' + str(X['G']) + ' guanine (G)') st.write('There are ' + str(X['C']) + ' cytosine (C)') # Display DataFrame st.subheader('3. Display DataFrame') df = pd.DataFrame.from_dict(X, orient='index') df = df.rename({0: 'count'}, axis='columns') df.reset_index(inplace=True) df = df.rename(columns = {'index':'nucleotide'}) st.write(df) # Display BarChart st.subheader('4. Display BarChart') p = alt.Chart(df).mark_bar().encode( x='nucleotide', y='count' ) # controls width of bar p = p.properties( width=alt.Step(80) ) st.write(p)
def main(in_file_1, out_dir): # read data and combine two data set vertically train_df = pd.read_csv(in_file_1) X_train = train_df.drop(columns=['quality', 'quality_rank']) y_train = train_df['quality_rank'] #----------------------------------------------------------------------------------------------------------------------------- # PreProcessor numeric_features = [ 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol' ] binary_features = ['type'] numeric_transformer = make_pipeline(SimpleImputer(), StandardScaler()) binary_transformer = make_pipeline( OneHotEncoder(drop="if_binary", dtype=int)) preprocessor = ColumnTransformer(transformers=[( 'num', numeric_transformer, numeric_features), ('bin', binary_transformer, binary_features)]) #----------------------------------------------------------------------------------------------------------------------------- # Model selection results = {} scoring_metric = {'f1_micro'} classifiers_plot = { "RidgeClassifier": RidgeClassifier(random_state=123), "Random Forest": RandomForestClassifier(bootstrap=False, max_depth=20, max_features='sqrt', n_estimators=1800, random_state=123), "KNN": KNeighborsClassifier(n_neighbors=5), "MLP Classifier": MLPClassifier(alpha=0.05, hidden_layer_sizes=(50, 100, 50), learning_rate='adaptive', max_iter=1000, random_state=123), "Nearest Centroid": NearestCentroid(), "QDA": QuadraticDiscriminantAnalysis() } for (name, model) in classifiers_plot.items(): pipe_iter = make_pipeline(preprocessor, model) results[name] = mean_std_cross_val_scores(pipe_iter, X_train, y_train, return_train_score=True, scoring=scoring_metric) pd.DataFrame(results) #--------------------------------------------------------------------------------------------------------- # Plotting result # All classifiers plots_dict = {} plot_results = pd.DataFrame(results).T plot_results = plot_results.reset_index() bar_all = alt.Chart(plot_results).mark_bar().encode( alt.X('test_f1_micro', axis=alt.Axis(title='F1 Micro score')), alt.Y('index', sort='-x', axis=alt.Axis(title='Classifier')), ).properties(width=alt.Step(40) # controls width of bar. ) plots_dict['f1_score_all_classifiers.svg'] = bar_all # Stability across cv folds scoring_metric = {'f1_micro'} pipe_rf = make_pipeline( preprocessor, RandomForestClassifier(bootstrap=False, max_depth=20, max_features='sqrt', n_estimators=1800, random_state=123)) scores_rf = cross_validate(pipe_rf, X_train, y_train, return_train_score=True, scoring=scoring_metric, n_jobs=-1, cv=20) plot_rf = pd.DataFrame(scores_rf) bar_rf = alt.Chart(plot_rf).mark_bar().encode( x=alt.X('test_f1_micro', axis=alt.Axis(title='F1 Micro score'), bin=alt.Bin(extent=[0.75, 0.9], step=0.02)), y=alt.Y('count()'), ) plots_dict['f1_score_random_forest.svg'] = bar_rf pipe_mlp = make_pipeline( preprocessor, MLPClassifier(alpha=0.05, hidden_layer_sizes=(50, 100, 50), learning_rate='adaptive', max_iter=1000, random_state=123)) scores_mlp = cross_validate(pipe_mlp, X_train, y_train, return_train_score=True, scoring=scoring_metric, n_jobs=-1, cv=20) plot_mlp = pd.DataFrame(scores_mlp) bar_mlp = alt.Chart(plot_mlp).mark_bar().encode( x=alt.X('test_f1_micro', axis=alt.Axis(title='F1 Micro score'), bin=alt.Bin(maxbins=6)), y=alt.Y('count()'), ) plots_dict['f1_score_mlp.svg'] = bar_mlp save_plots(out_dir, plots_dict) #----------------------------------------------------------------------------------------------------------------------------- #Hyperparameters Tuning rf_pipeline = make_pipeline(preprocessor, MLPClassifier()) param_dist = { 'mlpclassifier__hidden_layer_sizes': [(50, 50, 50), (50, 100, 50), (100, )], 'mlpclassifier__activation': ['tanh', 'relu'], 'mlpclassifier__solver': ['sgd', 'adam'], 'mlpclassifier__alpha': [0.0001, 0.05], 'mlpclassifier__learning_rate': ['constant', 'adaptive'], 'mlpclassifier__max_iter': [300, 500, 450, 200, 300] } random_search = RandomizedSearchCV(rf_pipeline, param_distributions=param_dist, n_jobs=-1, n_iter=50, cv=5, scoring='f1_micro') random_search.fit(X_train, y_train) best_model_pipe = random_search.best_estimator_ try: joblib_file = out_dir + "best_Model.pkl" joblib.dump(best_model_pipe, joblib_file) except: os.makedirs(os.path.dirname(out_dir)) joblib_file = out_dir + "best_Model.pkl" joblib.dump(best_model_pipe, joblib_file)