# Note on directory navigation: https://stackoverflow.com/questions/4810927/how-to-go-up-a-level-in-the-src-path-of-a-url-in-html # "../ " Does not work! # So keep charts and graphics in the same folder as the notebook. # In[ ]: Installed nbextensions from https://github.com/ipython-contrib/jupyter_contrib_nbextensions Now I control the Automatic Saves (every 15 minutes rather than every 2) There are a whole host of other configurable features # In[22]: from IPython.core.display import display, HTML display(HTML('<img src="Jupyter-Menu-Example.png">' )) print("Available under nbextensions:") # In[ ]: Trying out some simple Python # In[1]: someList = [9,8,7,6,5,4,3,2,1] # In[2]:
def iplot_paulivec(rho, options=None): """ Create a paulivec representation. Graphical representation of the input array. Args: rho (array): Density matrix options (dict): Representation settings containing - width (integer): graph horizontal size - height (integer): graph vertical size - slider (bool): activate slider - show_legend (bool): show legend of graph content """ # HTML html_template = Template(""" <p> <div id="paulivec_$divNumber"></div> </p> """) # JavaScript javascript_template = Template(""" <script> requirejs.config({ paths: { qVisualization: "https://qvisualization.mybluemix.net/q-visualizations" } }); require(["qVisualization"], function(qVisualizations) { qVisualizations.plotState("paulivec_$divNumber", "paulivec", $executions, $options); }); </script> """) if not options: options = {} # Process data and execute div_number = str(time.time()) div_number = re.sub('[.]', '', div_number) if 'slider' in options and options['slider'] is True: options['slider'] = 1 else: options['slider'] = 0 if 'show_legend' in options and options['show_legend'] is False: options['show_legend'] = 0 else: options['show_legend'] = 1 data_to_plot = [] rho_data = process_data(rho) data_to_plot.append(dict(data=rho_data)) html = html_template.substitute({'divNumber': div_number}) javascript = javascript_template.substitute({ 'divNumber': div_number, 'executions': data_to_plot, 'options': options }) display(HTML(html + javascript))
def promp_for_aad_device_login(url, text, code): """A copy-text-to-clipboard javascript function plus a button, basically""" assert isinstance(url, str) assert isinstance(text, str) assert isinstance(code, str) if not isnotebook(): # print helpful message: print('To authenticate, please go to', url, 'and paste', code, 'in the box.') return html = ''' <script> function copyToClipboard(text) { if (window.clipboardData && window.clipboardData.setData) { return clipboardData.setData("Text", text); } else if (document.queryCommandSupported && document.queryCommandSupported("copy")) { var textarea = document.createElement("textarea"); textarea.textContent = text; textarea.style.position = "fixed"; // Prevent scrolling to bottom of page in MS Edge. document.body.appendChild(textarea); textarea.select(); try { return document.execCommand("copy"); // Security exception may be thrown by some browsers. } catch (ex) { console.warn("Copy to clipboard failed.", ex); return false; } finally { document.body.removeChild(textarea); } } } // onclick var b = document.getElementById('{buttonId}'); b.onclick = function(){ copyToClipboard('{code}'); window.open('{url}', '_blank', 'location=yes,height=400,width=520,scrollbars=no,status=no'); }; b.scrollIntoView(); </script> <div> Clicking this button will open an AAD device login window. <br/> Paste this code to authenticate.<br/> <table> <tr> <th>Code</th> <th>URL</th> <th>Easy Button</th> </tr> <tr style="font-size:25px" id='{buttonId}'> <td>{code}</td> <td>{url}</td> <td><button>{text}</button></td> </tr> </table> </div> ''' # replace tokens for k, v in { 'url': url, 'text': text, 'code': code, 'buttonId': code }.items(): html = html.replace('{' + k + '}', v) # render display(HTML(html))
def evaluate_score(score, test_data, name): """Plot some graphs and compute some metrics on a set of predictions. Requires matplotlib and IPython. score - model predictions, same length as test_data test_data - data to use on the evalutions """ is_fishy = utils.is_fishy(test_data) score_fishy = score[is_fishy] score_nonfishy = score[~is_fishy] precisions, recalls, thresholds = metrics.precision_recall_curve( is_fishy, score) display(HTML("<h1>%s</h1>" % name)) ylim = 15.0 f, (a1, a2) = plt.subplots(1, 2, figsize=(20, 5)) a1_precall = a1.twinx() def convert_range(ax_f): y1, y2 = ax_f.get_ylim() a1_precall.set_ylim(y1 / ylim, y2 / ylim) a1_precall.figure.canvas.draw() a1.callbacks.connect("ylim_changed", convert_range) new_score_fishy = a1.hist(score_fishy, bins=200, normed=True, color='b', alpha=0.5, label="fishy score") new_score_nonfishy = a1.hist(score_nonfishy, bins=200, normed=True, color='r', alpha=0.5, label="nonfishy score") plot_precision = a1_precall.plot(thresholds, precisions[:-1], color='g', label='Precision') plot_recall = a1_precall.plot(thresholds, recalls[:-1], color='b', label='Recall') a1.set_ylim(0, ylim) a1.set_xlim(0, 1) a1.set_ylabel('Histogram count') a1.set_xlabel('Prediction score') a1_precall.set_ylabel('Curve') fpr, tpr, _ = metrics.roc_curve(is_fishy, score) auc = metrics.auc(fpr, tpr) predicted = score > 0.5 fp = (predicted & ~(is_fishy)).sum() / float(len(is_fishy)) lloss = metrics.log_loss(is_fishy, predicted) label = 'ROC curve\narea = %0.2f\nlog loss = %0.2f\nfp = %0.2f' % ( auc, lloss, fp) a2.plot(fpr, tpr, color='r', label=label) a2.set_xlabel('False positive rate') a2.set_ylabel('True positive rate') h1, l1 = a2.get_legend_handles_labels() h2, l2 = a1.get_legend_handles_labels() h3, l3 = a1_precall.get_legend_handles_labels() a2.legend(h1 + h2 + h3, l1 + l2 + l3, loc='lower right') plt.show() total = sum(new_score_fishy[0] + new_score_nonfishy[0]) non_overlap = sum(abs(new_score_fishy[0] - new_score_nonfishy[0])) overlap = total - non_overlap error = overlap / total
def notebook_max_width(): from IPython.core.display import display, HTML display(HTML("<style>.container { width:100% !important; }</style>"))
def widen(arg): from IPython.core.display import display, HTML display(HTML("<style>.container { width:100% !important; }</style>"))
def css_styling(): styles = open('./style/nbstyle.css', 'r').read() return HTML(styles)
def display(self, data): data_read = self.__open_or_read(data) display(HTML(self.convert(data_read)))
def iplot_state_hinton(rho, figsize=None): """ Create a hinton representation. Graphical representation of the input array using a 2D city style graph (hinton). Args: rho (array): Density matrix figsize (tuple): Figure size in pixels. """ # HTML html_template = Template(""" <p> <div id="hinton_$divNumber"></div> </p> """) # JavaScript javascript_template = Template(""" <script> requirejs.config({ paths: { qVisualization: "https://qvisualization.mybluemix.net/q-visualizations" } }); require(["qVisualization"], function(qVisualizations) { qVisualizations.plotState("hinton_$divNumber", "hinton", $executions, $options); }); </script> """) rho = _validate_input_state(rho) if figsize is None: options = {} else: options = {'width': figsize[0], 'height': figsize[1]} # Process data and execute div_number = str(time.time()) div_number = re.sub('[.]', '', div_number) # Process data and execute real = [] imag = [] for xvalue in rho: row_real = [] col_imag = [] for value_real in xvalue.real: row_real.append(float(value_real)) real.append(row_real) for value_imag in xvalue.imag: col_imag.append(float(value_imag)) imag.append(col_imag) html = html_template.substitute({'divNumber': div_number}) javascript = javascript_template.substitute({ 'divNumber': div_number, 'executions': [{ 'data': real }, { 'data': imag }], 'options': options }) display(HTML(html + javascript))
'Gulf of Maine': [-72.0, 41.0, -69.0, 43.0], 'New York harbor region': [-75., 39., -71., 41.5] } box = area['New York harbor region'] # <markdowncell> # ### Search CSW for datasets of interest # <codecell> if False: from IPython.core.display import HTML url = 'http://www.ngdc.noaa.gov/geoportal/' HTML('<iframe src=%s width=950 height=400></iframe>' % url) # <codecell> # Connect to CSW, explore it's properties. CSW = { 'NGDC Geoportal': 'http://www.ngdc.noaa.gov/geoportal/csw', 'USGS WHSC Geoportal': 'http://geoport.whoi.edu/geoportal/csw', 'NODC Geoportal: granule level': 'http://www.nodc.noaa.gov/geoportal/csw', 'NODC Geoportal: collection level': 'http://data.nodc.noaa.gov/geoportal/csw', 'NRCAN CUSTOM': 'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw', 'USGS Woods Hole GI_CAT': 'http://geoport.whoi.edu/gi-cat/services/cswiso', 'USGS CIDA Geonetwork': 'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw', 'USGS Coastal and Marine Program': 'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw',
def monthly_returns_map(returns): """ Display per month and per year returns in a table """ monthly_data = em.aggregate_returns(returns.pct_change(),'monthly') yearly_data = em.aggregate_returns(returns.pct_change(),'yearly') table_header = """ <table class='table table-hover table-condensed table-striped'> <thead> <tr> <th style="text-align:right">Year</th> <th style="text-align:right">Jan</th> <th style="text-align:right">Feb</th> <th style="text-align:right">Mar</th> <th style="text-align:right">Apr</th> <th style="text-align:right">May</th> <th style="text-align:right">Jun</th> <th style="text-align:right">Jul</th> <th style="text-align:right">Aug</th> <th style="text-align:right">Sep</th> <th style="text-align:right">Oct</th> <th style="text-align:right">Nov</th> <th style="text-align:right">Dec</th> <th style="text-align:right">Year</th> </tr> </thead> <tbody> <tr>""" first_year = True first_month = True year = 0 month = 0 year_count = 0 table = '' for m, val in monthly_data.iteritems(): year = m[0] month = m[1] if first_month: if year_count % 15 == 0: table += table_header table += "<td align='right'><b>{}</b></td>\n".format(year) first_month = False # pad empty months for first year if sim doesn't start in January if first_year: first_year = False if month > 1: for _ in range(1, month): table += "<td align='right'>-</td>\n" table += "<td align='right'>{:.1f}</td>\n".format(val * 100) # check for dec, add yearly if month == 12: table += "<td align='right'><b>{:.1f}</b></td>\n".format( yearly_data[year] * 100) table += '</tr>\n <tr> \n' first_month = True year_count += 1 # add padding for empty months and last year's value if month != 12: for i in range(month+1, 13): table += "<td align='right'>-</td>\n" if i == 12: table += "<td align='right'><b>{:.1f}</b></td>\n".format( yearly_data[year] * 100) table += '</tr>\n <tr> \n' table += '</tr>\n </tbody> \n </table>' display(HTML(table))
def set_data_source(context, data_source_type=None): """ TODO: Needs a docstring and tests. """ data_source_name = None if not data_source_type: configured_datasources = [ datasource for datasource in context.list_datasources() ] if len(configured_datasources) == 0: display( HTML(""" <p> No data sources found in the great_expectations.yml of your project. </p> <p> If you did not create the data source during init, here is how to add it now: <a href="https://great-expectations.readthedocs.io/en/latest/how_to_add_data_source.html">How To Add a Data Source</a> </p> """)) elif len(configured_datasources) > 1: display( HTML(""" <p> Found more than one data source in the great_expectations.yml of your project: <b>{1:s}</b> </p> <p> Uncomment the next cell and set data_source_name to one of these names. </p> """.format( data_source_type, ",".join([ datasource["name"] for datasource in configured_datasources ]), ))) else: data_source_name = configured_datasources[0]["name"] display( HTML( "Will be using this data source from your project's great_expectations.yml: <b>{:s}</b>" .format(data_source_name))) else: configured_datasources = [ datasource["name"] for datasource in context.list_datasources() if datasource["type"] == data_source_type ] if len(configured_datasources) == 0: display( HTML(""" <p> No {:s} data sources found in the great_expectations.yml of your project. </p> <p> If you did not create the data source during init, here is how to add it now: <a href="https://great-expectations.readthedocs.io/en/latest/how_to_add_data_source.html">How To Add a Data Source</a> </p> """.format(data_source_type))) elif len(configured_datasources) > 1: display( HTML(""" <p> Found more than one {:s} data source in the great_expectations.yml of your project: <b>{:s}</b> </p> <p> Uncomment the next cell and set data_source_name to one of these names. </p> """.format(data_source_type, ",".join(configured_datasources)))) else: data_source_name = configured_datasources[0] display( HTML( "Will be using this {:s} data source from your project's great_expectations.yml: <b>{:s}</b>" .format(data_source_type, data_source_name))) return data_source_name
def show_available_data_asset_names(context, data_source_name=None): """ List asset names found in the current context. """ # TODO: Needs tests. styles = """ <style type='text/css'> ul.data-assets { margin-top: 0px; } ul.data-assets li { line-height: 1.2em; list-style-type: circle; } ul.data-assets li span.expectation-suite { background: #ddd; } </style> """ print("Inspecting your data sources. This may take a moment...") expectation_suite_keys = context.list_expectation_suites() datasources = context.list_datasources() html = "" for datasource in datasources: if data_source_name and datasource["name"] != data_source_name: continue html += "<h2 style='margin: 0'>Datasource: {:s} ({:s})</h2>".format( datasource["name"], datasource["class_name"]) ds = context.get_datasource(datasource["name"]) generators = ds.list_batch_kwargs_generators() for generator_info in generators: html += "batch_kwargs_generator: {:s} ({:s})".format( generator_info["name"], generator_info["class_name"]) generator = ds.get_batch_kwargs_generator(generator_info["name"]) # TODO hacks to deal w/ inconsistent return types. Remove urgently mystery_object = generator.get_available_data_asset_names() if isinstance(mystery_object, dict) and "names" in mystery_object.keys(): data_asset_names = sorted( [name[0] for name in mystery_object["names"]]) elif isinstance(mystery_object, list): data_asset_names = sorted(mystery_object) else: data_asset_names = [] if len(data_asset_names) > 0: html += "<h3 style='margin: 0.2em 0'>Data Assets Found:</h3>" html += styles html += "<ul class='data-assets'>" for data_asset_name in data_asset_names: html += "<li>{:s}</li>".format(data_asset_name) data_asset_expectation_suite_keys = [ es_key for es_key in expectation_suite_keys if es_key.data_asset_name.datasource == datasource["name"] and es_key.data_asset_name.generator == generator_info["name"] and es_key.data_asset_name. generator_asset == data_asset_name ] if len(data_asset_expectation_suite_keys) > 0: html += "<ul>" for es_key in data_asset_expectation_suite_keys: html += "<li><span class='expectation-suite'>Expectation Suite</span>: {:s}</li>".format( es_key.expectation_suite_name) html += "</ul>" html += "</ul>" else: display( HTML("""<p>No data assets found in this data source.</p> <p>Read about how batch kwargs generators derive data assets from data sources: <a href="https://great-expectations.readthedocs.io/en/latest/how_to_add_data_source.html">Data assets</a> </p>""")) display(HTML(html))
def display_animation(): dataset = dataset_dict["py_semantic"] scene_idx = 34 anim = create_animate_for_scene(dataset, scene_idx) print("scene_idx", scene_idx) HTML(anim.to_jshtml())
def iplot_hinton(rho, options=None): """ Create a hinton representation. Graphical representation of the input array using a 2D city style graph (hinton). Args: rho (array): Density matrix options (dict): Representation settings containing - width (integer): graph horizontal size - height (integer): graph vertical size """ # HTML html_template = Template(""" <p> <div id="hinton_$divNumber"></div> </p> """) # JavaScript javascript_template = Template(""" <script> requirejs.config({ paths: { qVisualization: "https://qvisualization.mybluemix.net/q-visualizations" } }); require(["qVisualization"], function(qVisualizations) { qVisualizations.plotState("hinton_$divNumber", "hinton", $executions, $options); }); </script> """) if not options: options = {} # Process data and execute div_number = str(time.time()) div_number = re.sub('[.]', '', div_number) # Process data and execute real = [] imag = [] for xvalue in rho: row_real = [] col_imag = [] for value_real in xvalue.real: row_real.append(float(value_real)) real.append(row_real) for value_imag in xvalue.imag: col_imag.append(float(value_imag)) imag.append(col_imag) html = html_template.substitute({'divNumber': div_number}) javascript = javascript_template.substitute({ 'divNumber': div_number, 'executions': [{ 'data': real }, { 'data': imag }], 'options': options }) display(HTML(html + javascript))
df = pd.read_sql_query( "select * from Annotations_consolidated where id_post = 3326", con) # %% df.head(100) # %% pd.read_sql_query("select * from Posts where id_post == 3257", con).Body[0] # %% # %% df_articles = pd.read_sql_query("Select * from Articles", con) # %% df_articles.head() # %% display(HTML(df_articles.Body[0])) # %% df_posts = pd.read_sql_query("Select * from Posts", con) # %% df_posts.head() # %% df_posts.query("ID_Article == 1") # %%
def printHTML(text): display(HTML(text))
def hive(self, line, cell=None): if cell is None: line = line.replace("\r", "") if line == "": self.displayHelp() elif line.lower() == "status": self.retStatus() elif line.lower() == "debug": print("Toggling Debug from %s to %s" % (self.debug, not self.debug)) self.debug = not self.debug elif line.lower() == "disconnect": self.disconnectHive() elif line.lower() == "connect silent": self.connectHive(False, True) elif line.lower() == "connect alt": self.connectHive(True, False) elif line.lower() == "connect": self.connectHive(False, False) elif line.lower().find('set ') == 0: self.setvar(line) else: print( "I am sorry, I don't know what you want to do, try just %hive for help options" ) else: cell = cell.replace("\r", "") if self.hive_connected == True: result_df, qtime, status = self.runQuery(cell) if self.debug: print("status: %s" % status) if status.find("Failure") >= 0: print("Error: %s" % status) elif status.find("Success - No Results") == 0: print("No Results returned in %s seconds" % qtime) else: self.myip.user_ns['prev_hive'] = result_df mycnt = len(result_df) print("%s Records in Approx %s seconds" % (mycnt, qtime)) print("") if mycnt <= int(self.hive_opts['pd_display.max_rows'][0]): if self.debug: print("Testing max_colwidth: %s" % pd.get_option('max_colwidth')) if self.hive_opts['pd_use_beaker'][0] == True: if self.hive_opts['pd_beaker_bool_workaround'][ 0] == True: for x in result_df.columns: if result_df.dtypes[x] == 'bool': result_df[x] = result_df[x].astype( object) display(TableDisplay(result_df)) else: display( HTML( result_df.to_html( index=self.hive_opts['pd_display_idx'] [0]))) else: print( "Number of results (%s) greater than pd_display_max(%s)" % (mycnt, self.hive_opts['pd_display.max_rows'][0])) else: print( "Hive is not connected: Please see help at %hive - To Connect: %hive connect" )
else: return 'Percent' new_table['Unit'] = new_table.apply(lambda row: user_perc2(row['Test Outcome'], row['Unit']), axis = 1) next_table = pd.concat([next_table, new_table]) # - # Test outcome codelist not complete missing codes labelled as `error` next_table.fillna('All', inplace = True) from IPython.core.display import HTML for col in next_table: if col not in ['Value']: next_table[col] = next_table[col].astype('category') display(HTML(f"<h2>{col}</h2>")) display(next_table[col].cat.categories) next_table['Test Outcome'] = next_table['Test Outcome'].map( lambda x: { 'Individuals tested (testing episodes) by test date' : 'error', 'Cumulative individuals tested (testing episodes)' : 'Total', 'Positive cases by test date' : 'error', 'Cumulative positive cases' : 'Positive', 'Cumulative negative cases' : 'Negative', 'Cumulative number of tests' : 'error', 'Cumulative tests completed' : 'Total', '% results within 1 day' : 'Results within 1 Day', '% results within 2 days' : 'Results within 2 Day', '% results within 3 days' : 'Results within 3 Day' }.get(x, x))
def md(str): display(HTML(markdown.markdown(str + "<br />")))
import pandas as pd import os.path from IPython.core.display import display, HTML display(HTML("<style>.container { width:100% !important; }</style>")) # Data Preprocessing & Creating Files joining SPY, BTC, VIX def pre(ask_df, bid_df): df = pd.DataFrame() df["Gmt time"] = bid_df["Gmt time"] df["Bid"] = bid_df["Close"] df["Ask"] = ask_df["Close"] df["Gmt time"] = pd.to_datetime(df["Gmt time"], format="%d.%m.%Y %H:%M:%S.%f") df["Midprice"] = (df["Ask"] + df["Bid"])/2 return df def subset(df, df_2): df = df[df["Gmt time"].isin(df_2["Gmt time"])] df = df.reset_index() df.drop(columns=["index"], inplace=True) return df def create_df(BTC_df, SPY_df, VIX_df): df = pd.DataFrame() df["Gmt time"] = BTC_df["Gmt time"] df["BTC_Bid"] = BTC_df["Bid"] df["BTC_Ask"] = BTC_df["Ask"] df["BTC_Midprice"] = BTC_df["Midprice"] df["SPY_Bid"] = SPY_df["Bid"] df["SPY_Ask"] = SPY_df["Ask"] df["SPY_Midprice"] = SPY_df["Midprice"]
def table(self, limit=100, columns=None): result = self.table_html(limit=limit, columns=columns) return display(HTML(result))
def show(entity, roots_only=True, formatting='indented'): """Show a representation of the entity in the notebook.""" html = ent2html(entity, roots_only=roots_only, formatting=formatting) display(HTML(html))
def draw(self, data): """Draw a visualization in the ipython notebook.""" json_data = self.format_data(data) HTML(self.dump_html(json_data))
def ChangeTrainAllMethods(fac): clear_output() #stop button button = __HTMLJSCSSTemplates.button #progress bar inc = __HTMLJSCSSTemplates.inc progress_bar = __HTMLJSCSSTemplates.progress_bar progress_bar_idx = 0 TTypes = ROOT.TMVA.Types error_plot_supported = [ int(TTypes.kMLP), int(TTypes.kDNN), int(TTypes.kBDT) ] exit_button_supported = [ int(TTypes.kSVM), int(TTypes.kCuts), int(TTypes.kBoost), int(TTypes.kBDT) ] for methodMapElement in fac.fMethodsMap: sleep_time = 0.5 display( HTML("<center><h1>Dataset: " + str(methodMapElement[0]) + "</h1></center>")) for m in methodMapElement[1]: m.GetMethodType._threaded = True m.GetName._threaded = True method_type = int(m.GetMethodType()) name = str(m.GetName()) display(HTML("<h2><b>Train method: " + name + "</b></h2>")) m.InitIPythonInteractive() t = Thread(target=ROOT.TMVA.MethodBase.TrainMethod, args=[m]) t.start() if method_type in error_plot_supported: time.sleep(sleep_time) sleep_time = GotoSleepUntilTrackingReady(m, sleep_time) display(HTML(button)) if m.GetMaxIter() != 0: display( HTML(progress_bar.substitute({"id": progress_bar_idx}))) display( HTML( inc.substitute({ "id": progress_bar_idx, "progress": 100 * m.GetCurrentIter() / m.GetMaxIter() }))) JPyInterface.JsDraw.Draw(m.GetInteractiveTrainingError(), "drawTrainingTestingErrors") try: while not m.TrainingEnded(): JPyInterface.JsDraw.InsertData( m.GetInteractiveTrainingError()) if m.GetMaxIter() != 0: display( HTML( inc.substitute({ "id": progress_bar_idx, "progress": 100 * m.GetCurrentIter() / m.GetMaxIter() }))) time.sleep(sleep_time) except KeyboardInterrupt: m.ExitFromTraining() else: if method_type in exit_button_supported: display(HTML(button)) time.sleep(sleep_time) if m.GetMaxIter() != 0: display( HTML(progress_bar.substitute({"id": progress_bar_idx}))) display( HTML( inc.substitute({ "id": progress_bar_idx, "progress": 100 * m.GetCurrentIter() / m.GetMaxIter() }))) else: display(HTML("<b>Training...</b>")) if method_type in exit_button_supported: try: while not m.TrainingEnded(): if m.GetMaxIter() != 0: display( HTML( inc.substitute({ "id": progress_bar_idx, "progress": 100 * m.GetCurrentIter() / m.GetMaxIter() }))) time.sleep(sleep_time) except KeyboardInterrupt: m.ExitFromTraining() else: while not m.TrainingEnded(): if m.GetMaxIter() != 0: display( HTML( inc.substitute({ "id": progress_bar_idx, "progress": 100 * m.GetCurrentIter() / m.GetMaxIter() }))) time.sleep(sleep_time) if m.GetMaxIter() != 0: display( HTML( inc.substitute({ "id": progress_bar_idx, "progress": 100 * m.GetCurrentIter() / m.GetMaxIter() }))) else: display(HTML("<b>End</b>")) progress_bar_idx += 1 t.join() return
import pandas as pd import matplotlib.pyplot as plt from nltk.corpus import stopwords import os import webbrowser nltk.download('stopwords') ENGLISH_STOP_WORDS = set(stopwords.words('english')) #taking the input from a text file at the given file path #example of a path in windows: C:/Users/rishi/OneDrive/Desktop/testingdoc.txt file_path = input("Please provide the path of the text file: ") assert os.path.exists(file_path), "File not found at , " + str(file_path) with open(file_path, 'r', encoding='utf-8') as file: text = file.read().replace('\n', '') texts = [] texts.append(text) wc = WordCloud(use_tfidf=False, stopwords=ENGLISH_STOP_WORDS) #don't randomize color, show only top 50 embed_code = wc.get_embed_code(text=texts, random_color=True, topn=50) HTML(embed_code) with open("./wordcloudoutput.html", "w") as file: file.write(embed_code) new = 2 # open in a new tab, if possible url = "file:///" + os.path.realpath('wordcloudoutput.html') webbrowser.open(url, new=new)
from pandas._libs.tslibs.timestamps import Timestamp from wordcloud import WordCloud import os, pkgutil, json, urllib from urllib.request import urlopen from pprint import pprint from scattertext import CorpusFromPandas, produce_scattertext_explorer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.cluster import MiniBatchKMeans from sklearn.decomposition import PCA from sklearn.metrics import homogeneity_score from sklearn.metrics import silhouette_score import xlrd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.cluster import KMeans display(HTML("<style>.container { width:98% !important; }</style>")) class Data_visualization: def __init__(self, file_name): self.str = "" self.word_array = [] self.file_name = file_name def wordcloud(self): stop_words = nltk.corpus.stopwords.words("english") Word__tokenize = nltk.word_tokenize(self.str) # filter word in stop word Word__tokenize_filter = [ w for w in Word__tokenize if not w in stop_words ]
def yes_no_prompt(prompt): display(HTML("<h2>{}</h2>".format(prompt))) response = input().strip().lower() return response[0] == "y" if len(response) > 0 else False
def DisplayByPrice(self): for key in sorted(self.mydict, key=self.mydict.get, reverse=True): display(HTML("You save $" + str(self.mydict[key]) + " Now" + key))
url = "https://jsonplaceholder.typicode.com/posts/1" response = requests.get(url, timeout=240) response.status_code response.json() content = response.json() content.keys() def request_with_check(url): page_response = requests.get(url, timeout=240) status = page_response.status_code if status > 299: raise AssertionError("page content not found, status: %s" % status) return page_response #request_with_check("https://www.google.co.in/mycustom404page") request_with_check("https://www.google.co.in/") from IPython.core.display import HTML HTML("<b>Rendered HTML</b>") page_response = requests.get( "https://www.moneycontrol.com/india/stockpricequote/auto-2-3-wheelers/heromotocorp/HHM", timeout=240) page_content = BeautifulSoup(page_response.content, "html.parser") print(page_content) HTML(str(page_content.find("h1"))) print(HTML(str(page_content.find("div", attrs={'id': "content_full"}))))