def print_md(value): display(Markdown(value))
def print_source(obj): source = inspect.getsource(obj) display(Markdown('```python\n' + source + '\n```'))
def random_derivatives(): def ho(x): if x == 1: return "" else: return x a, b, c = np.random.randint(2, 7, 3) text = f"Gegeven $f(x) = ({a-1}- {ho(b)}x)^{ho(c)}$, bepaal $f^\\prime(x)$" display(Markdown("**(a)** " + text)) display(Markdown("<details><pre>" + text + "</pre></details>")) a, b, c, d = np.random.randint(2, 7, 4) text = f"Gegeven $g(x) = {ho(a-1)}x^{ho(b)}\\ \\text{{tan}}({ho(c)}x^{ho(d)})$, geef $g^\\prime(x)$" display(Markdown("**(b)** " + text)) display(Markdown("<details><pre>" + text + "</pre></details>")) a, b, c, d = np.random.randint(2, 7, 4) text = f"Gegeven $h(x) = \\text{{log}}_{a}({b-1}x-{c}x^{d})$, geef $h^\\prime(x)$" display(Markdown("**(c)** " + text)) display(Markdown("<details><pre>" + text + "</pre></details>")) a, b = np.random.randint(2, 7, 2) text = f"Gegeven $k(x) = \\frac{{{a}}}{{x^{b}}}$, geef $k^{{\\prime\\prime}}(x)$" display(Markdown("**(d)** " + text)) display(Markdown("<details><pre>" + text + "</pre></details>")) a, b = np.random.randint(2, 7, 2) text = f"Gegeven $\\frac{{dy}}{{dx}} = x^{a} - {ho(b-1)}y$, geef $\\frac{{d^2y}}{{dx^2}}$" display(Markdown("**(e)** " + text)) display(Markdown("<details><pre>" + text + "</pre></details>")) a, b, c, d = np.random.randint(2, 7, 4) text = f"Gegeven ${ho(a-1)}x^3y - {ho(b-1)}x^2 + {ho(c-1)}y^4 = {2*d}$, geef $\\frac{{dy}}{{dx}}$" display(Markdown("**(f)** " + text)) display(Markdown("<details><pre>" + text + "</pre></details>"))
def print_h3(value): display(Markdown("### " + str(value)))
def latex_formula(form, details=True): latex = form.simplify().to_latex(outer=True) if latex: display(Math(latex)) if details: display(Markdown("<details><pre>$" + latex + "$</pre></details>"))
def printmd(string): display(Markdown(string))
def merge_dictionary(pair, v_in): v_out = {} bigram = re.escape(' '.join(pair)) p = re.compile(r'(?<!\S)' + bigram + r'(?!\S)') for word in v_in: w_out = p.sub(''.join(pair), word) v_out[w_out] = v_in[word] return v_out bpe_codes = {} bpe_codes_reverse = {} for i in range(num_merges): display(Markdown("### Iteration {}".format(i + 1))) pairs = get_stats(dictionary) best = max(pairs, key=pairs.get) dictionary = merge_dictionary(best, dictionary) bpe_codes[best] = i bpe_codes_reverse[best[0] + best[1]] = best print("new merge: {}".format(best)) print("dictionary: {}".format(dictionary)) # - print(bpe_codes) # ## SentencePiece
def printmd(S): display(Markdown(S)) return
def ImportData(year, resolution, data_dir, land=False, stats=False, drop_na=False): """ INPUT: - year -> Year of data collection. (string) - resolution -> Data resolution. (string) - data_dir -> Dataset directory. (path string) - land -> Choose whether or not to import only data from land hexagons with at least one bioclimate variable available. (bool) - stats -> Choose whether or not to display some statistics about the data. (bool) - drop_na -> Choose whether or not to drop rows with any NA value in the bioclimatic variables. (bool) OUTPUT: - hexagones -> Geodataframe containing all the available selected data. """ # Import hexagon centroids coordinate in a geodataframe file_dir = os.path.join( data_dir, 'Spatial', f'Centroids_ISEA3H{resolution}_Geodetic_V_WGS84.shp') centroids = gpd.read_file(file_dir) # Import IGBP land cover class fractions for hexagons in a dataframe file_dir = os.path.join( data_dir, f'ISEA3H{resolution}_MCD12Q1_V06_Y{year}_IGBP_Fractions.txt') LC = pd.read_csv(file_dir, sep='\t') # Import bioclimate variables for hexagons in a dataframe file_dir = os.path.join( data_dir, f'ISEA3H{resolution}_WorldClim30AS_V02_BIO_Centroid.txt') BV = pd.read_csv(file_dir, sep='\t') # Merge IGBP land cover fractions, bioclimate variables and hexagone coordinates hexagons = centroids.merge(LC, on='HID') hexagons = hexagons.merge(BV, on='HID') n = hexagons.shape[0] if land: # Identify hexagons composed of water bodies only # Water bodies are identified by the fact that they are the ones and only # for which the sum of all land cover class fractions is 0 LC_rowsums = LC.iloc[:, 1:].sum(axis=1) LC_water_idx = LC_rowsums[LC_rowsums == 0].index.values nw = len(LC_water_idx) # Identify hexagons not only composed of water bodies. LC_land_idx = np.array(list(set(range(n)) - set(LC_water_idx))) nl = len(LC_land_idx) # Identify hexagons that have no bioclimate variable available (NA=-100) mask_NA_rows = eval(' & '.join( [f'(BV.{col} == -100)' for col in BV.columns.values[1:]])) BV_NA_idx = BV[mask_NA_rows].index.values n_na = len(BV_NA_idx) # Identify hexagons that have at least one bioclimate variable available BV_A_idx = np.sort( np.array(list(set(BV.index.values) - set(BV_NA_idx)))) n_a = len(BV_A_idx) # Identify hexagons wich satisfy both desired conditions land_A_idx = np.intersect1d(BV_A_idx, LC_land_idx) nl_a = len(land_A_idx) # Extract land hexagons with at least one bioclimate variable available. hexagons = hexagons.iloc[land_A_idx].copy() if drop_na: # Identify and remove rows with at least one bioclimate variable not # available (NA=-100) hexagons.replace(to_replace=-100, value=np.nan, inplace=True) hexagons.dropna(axis='index', how='any', inplace=True) n_aa = hexagons.shape[0] if stats: str2display = f"The entire dataset is composed of {n} hexagons." if land: str2display = str2display + f"""<ul> <li> {nl} correspond to lands ({100*nl/n:.4}% of the entire dataset); <li> {nw} correspond to water bodies ({100*nw/n:.4}% of the entire dataset); <li> {n_a} have at least one bioclimate variable available ({100*n_a/n:.4}% of the entire dataset); <li> {n_na} have no bioclimate variable available ({100*n_na/n:.4}% of the entire dataset).</ul> When considering only the land hexagons: <ul> <li> {nl_a} hexagons have at least one bioclimate variable available ({100*nl_a/nl:.4}% of the land hexagons and {100*nl_a/n:.4}% of the entire dataset);""" if drop_na: str2display = str2display + f""" <li> {n_aa} hexagons have all the bioclimate variables available ({100*n_aa/nl:.4}% of the land hexagons and {100*n_aa/n:.4}% of the entire dataset) </ul>""" elif drop_na: str2display = str2display + f"""<br> Only {n_aa} hexagons have all the bioclimatic variables available ({100*n_aa/n:.4}% of the entire dataset).""" display(Markdown(str2display)) return hexagons
def pandas_df_to_Markdown_table(df): fmt = ['---' for i in range(len(df.columns))] df_fmt = pd.DataFrame([fmt], columns=df.columns) df_fmated = pd.concat([df_fmt, df]) display(Markdown(df_fmated.to_csv(sep='|', index=False)))
def EmpiricalDistribution(data, NA_val=-100, figsize=(15, 8), show=True, save=False, plot_dir='Output/Plots', title='ED', save_params={}): """ INPUT: - data -> Data vector (Series or array-like) - NA_val -> Value used for NA. - figsize -> Size of the figure object. (tuple of int: (width, height)) - show -> Choose whether or not to display the plot. (bool) - save -> Choose whether or not to save the plot. (bool) - plot_dir -> Plot saving directory. (path string) - title -> Name of the plot file without file extension. (string) - save_params -> Parameters for the saving operation. (dict) """ # Default values for parameter dictionaries: # save_params SP = {'format': 'jpg'} # Update parameter dictionaries with user choices SP.update(save_params) # Remove NA values if needed NA_mask = data != NA_val data = data[NA_mask] n_NA = (~NA_mask).sum() if n_NA > 0: display(Markdown(f"There are {n_NA} not avalable data points")) fig = plt.figure(figsize=figsize) gs = plt.GridSpec(nrows=2, ncols=2, figure=fig) ax0 = fig.add_subplot(gs[0, 0]) ax1 = fig.add_subplot(gs[0, 1]) ax2 = fig.add_subplot(gs[1, :]) # Frequency histogram bw = (data.max() - data.min()) / 100 sns.histplot(data=data, color='darkblue', binwidth=bw, ax=ax0) ax0.set(xlabel='', ylabel='Counts') # Density histogram sns.kdeplot(x=data, color='darkblue', fill=True, ax=ax1) ax1.set(xlabel='', ylabel='Density') # Boxplot sns.boxplot(x=data, color='skyblue', fliersize=3, ax=ax2) ax2.set_xlabel('') fig.tight_layout() # Save the plot if needed if save: # Output file directory file_dir = os.path.join(plot_dir, f"{title}.{SP['format']}") plt.savefig(file_dir, **SP) # Prevent display of the plot if needed if not show: plt.close()
# # ![training conditional proportions](./images/training_conditional_proportions.png "Class Conditional Proportions") # # Hint: # - You can use DataFrame's `.melt` method to "unpivot" a DataFrame. See the following code cell for an example. # In[52]: from IPython.display import display, Markdown df = pd.DataFrame({ 'word_1': [1, 0, 1, 0], 'word_2': [0, 1, 0, 1], 'type': ['spam', 'ham', 'ham', 'ham'] }) display(Markdown("> Our Original DataFrame has some words column and a type column. You can think of each row as a sentence, and the value of 1 or 0 indicates the number of occurances of the word in this sentence.")) display(df); display(Markdown("> `melt` will turn columns into variale, notice how `word_1` and `word_2` become `variable`, their values are stored in the value column")) display(df.melt("type")) # ### Question 3a # # Create a bar chart like the one above comparing the proportion of spam and ham emails containing certain words. Choose a set of words that are different from the ones above, but also have different proportions for the two classes. Make sure to only consider emails from `train`. # # <!-- # BEGIN QUESTION # name: q3a # points: 2 # manual: true # image: true
def html_hello_world(): from IPython.display import display, Markdown txt = open("minimal.html").read() L = ["```HTML", txt, "```"] md = "\n".join(L) display(Markdown(md))
def printmd(txt: str = ""): display(Markdown(txt))
def inputs(self): display(Markdown('## INPUTS'), qgrid.show_grid(self.info.in_df, grid_options={'editable': False})) if self.info.loader_df.size: display(Markdown('## LOADERS'), qgrid.show_grid(self.info.loader_df, grid_options={'editable': False}))
def colab_link(path): "Get a link to the notebook at `path` on Colab" cfg = get_config() res = f'https://colab.research.google.com/github/{cfg.user}/{cfg.lib_name}/blob/{cfg.branch}/{cfg.path("nbs_path").name}/{path}.ipynb' display(Markdown(f'[Open `{path}` in Colab]({res})'))
m = folium.Map([-20.0760232, 34.3582913909869], zoom_start=10, tiles="CartoDb dark_matter") locs_dtm_short = zip(dtm_short_gdf.lat, dtm_short_gdf.lon) locs_hsio_short = zip(hsio_short_gdf.lat, hsio_short_gdf.lon) for location in locs_dtm_short: folium.CircleMarker(location=location, color="red", radius=4).add_to(m) for location in locs_hsio_short: folium.CircleMarker(location=location, color="white", radius=2).add_to(m) #m.save("map1.html") m # %% Markdown( f" There are {len(dtm_short)} DTM tracked displacement sites in Mozambique and {len(hsio_short)} health facilities" ) # %% # calculate the nearest health facility from each site def calculate_nearest(row, destination, val, col="geometry"): dest_unary = destination["geometry"].unary_union nearest_geom = nearest_points(row[col], dest_unary) match_geom = destination.loc[destination.geometry == nearest_geom[1]] match_value = match_geom[val].to_numpy()[0] return match_value dtm_short_gdf["nearest_geom"] = dtm_short_gdf.apply(calculate_nearest, destination=hsio_short_gdf,
def __init__(self, image, fig_xsize=None, fig_ysize=None, cmap=plt.cm.gist_gray, vmin=None, vmax=None): display( Markdown( f"<text style=color:blue><b>Area of Interest Selector Tips:\n</b></text>" )) display( Markdown( f'<text style=color:blue>- This plot uses "matplotlib notebook", whereas the other plots in this notebook use "matplotlib inline".</text>' )) display( Markdown( f'<text style=color:blue>- If you run this cell out of sequence and the plot is not interactive, rerun the "%matplotlib notebook" code cell.</text>' )) display( Markdown( f'<text style=color:blue>- Use the pan tool to pan with the left mouse button.</text>' )) display( Markdown( f'<text style=color:blue>- Use the pan tool to zoom with the right mouse button.</text>' )) display( Markdown( f'<text style=color:blue>- You can also zoom with a selection box using the zoom to rectangle tool.</text>' )) display( Markdown( f'<text style=color:blue>- To turn off the pan or zoom to rectangle tool so you can select an AOI, click the selected tool button again.</text>' )) display( Markdown(f'<text style=color:darkred><b>IMPORTANT!</b></text>')) display( Markdown( f'<text style=color:darkred>- Upon loading the AOI selector, the selection tool is already active.</text>' )) display( Markdown( f'<text style=color:darkred>- Click, drag, and release the left mouse button to select an area.</text>' )) display( Markdown( f'<text style=color:darkred>- The square tool icon in the menu is <b>NOT</b> the selection tool. It is the zoom tool.</text>' )) display( Markdown( f'<text style=color:darkred>- If you select any tool, you must toggle it off before you can select an AOI</text>' )) self.image = image self.x1 = None self.y1 = None self.x2 = None self.y2 = None if not vmin: self.vmin = np.nanpercentile(self.image, 1) else: self.vmin = vmin if not vmax: self.vmax = np.nanpercentile(self.image, 99) else: self.vmax = vmax if fig_xsize and fig_ysize: self.fig, self.current_ax = plt.subplots(figsize=(fig_xsize, fig_ysize)) else: self.fig, self.current_ax = plt.subplots() self.fig.suptitle('Area-Of-Interest Selector', fontsize=16) self.current_ax.imshow(self.image, cmap=plt.cm.gist_gray, vmin=self.vmin, vmax=self.vmax) def toggle_selector(self, event): print(' Key pressed.') if event.key in ['Q', 'q'] and toggle_selector.RS.active: print(' RectangleSelector deactivated.') toggle_selector.RS.set_active(False) if event.key in ['A', 'a'] and not toggle_selector.RS.active: print(' RectangleSelector activated.') toggle_selector.RS.set_active(True) toggle_selector.RS = RectangleSelector( self.current_ax, self.line_select_callback, drawtype='box', useblit=True, button=[1, 3], # don't use middle button minspanx=5, minspany=5, spancoords='pixels', rectprops=dict(facecolor='red', edgecolor='yellow', alpha=0.3, fill=True), interactive=True) plt.connect('key_press_event', toggle_selector)
def displayMD(self, md): display(Markdown(md))
def PrintModelSettings(): display(Markdown('#### Please check your settings:')) print(GetFormattedSettings()) display(Markdown('#### Results to be saved at:')) print(savepath.value)
def generate_column_correlation_network(df, th=0.8, edge_labels_flag=True, layout="spring_layout"): G = nx.Graph() col_numerical = list(df.select_dtypes([np.number]).columns) comb2_col_numerical = list(itertools.combinations( col_numerical, 2)) # Make combinations from col_numerical # go over all combinations calculate correlation for rec in comb2_col_numerical: col1 = rec[0] col2 = rec[1] corr = df[col1].corr(df[col2], method='pearson') corr = round(corr, 2) if abs(corr) >= th: # if correlation is high enoigh add edge to graph G.add_edge(col1, col2, weight=corr) num_nodes = G.number_of_nodes() num_edges = G.number_of_edges() if num_nodes <= 1 or num_edges == 0: print("netwotk has no nodes (or just 1) or edges") return # calculate communities if num_edges > 1: communities_generator = community.girvan_newman(G) top_level_communities = next(communities_generator) next_level_communities = next(communities_generator) display(Markdown("## Communites:")) node_to_cc_dict = dict() cc = sorted(map(sorted, next_level_communities)) counter = 0 if len(cc) == 0: print("netwotk has no communites") return for c in cc: counter += 1 for n in list(c): node_to_cc_dict[n] = counter print(c) print("-------------------") # print communites and plot display(Markdown("## Network:")) print("number of nodes:", num_nodes) print("number of edges:", num_edges) plt.figure(figsize=(26, 10)) if layout == "spring_layout": pos = nx.spring_layout(G, k=0.15, iterations=20, scale=2) elif layout == "planar_layout": pos = nx.planar_layout(G) elif layout == "circular_layout": pos = nx.circular_layout(G) else: print("problrm with choosing a layout") if num_edges > 1: com_values = [node_to_cc_dict[n] for n in G.nodes()] nx.draw_networkx(G, pos, cmap=plt.get_cmap('jet'), node_color=com_values, with_labels=True, alpha=0.6, font_size=14) else: nx.draw_networkx(G, pos, cmap=plt.get_cmap('jet'), with_labels=True, alpha=0.6, font_size=16) if edge_labels_flag: edge_labels = nx.get_edge_attributes(G, 'weight') nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, alpha=0.8) plt.show()
def _printmd(self, string): display(Markdown(string))
from pathlib import Path from IPython.display import display, Markdown version_info = (0, 0, 1) __version__ = '.'.join(map(str, version_info)) github_url = "https://github.com/ContextLab/sherlock-topic-model-paper/tree/master/code/sherlock_helpers" pkg_dir = Path(__file__).resolve().parent message = Markdown( "Helper functions and variables used across multiple notebooks can be " f"found in `{pkg_dir}`, or on GitHub, [here]({github_url}).<br />You can " "also view source code directly from the notebook with:<br /><pre> " "from sherlock_helpers.functions import show_source<br /> show_source(foo)" "</pre>") try: # check whether package was imported from a notebook get_ipython() display(message) except NameError: pass