def emp_dist(values): """ Takes an array of values and returns an empirical distribution Parameters ---------- values : array Array of values that will be grouped by the distribution Returns ------- Table A distribution Examples -------- >>> x = make_array(1, 1, 1, 1, 1, 2, 3, 3, 3, 4) >>> emp_dist(x) Value | Proportion 1 | 0.5 2 | 0.1 3 | 0.3 4 | 0.1 """ total = len(values) position_counts = Table().with_column('position', values).group(0) new_dist = Table().values(position_counts.column(0)) return new_dist.with_column( 'Proportion', position_counts.column(1) / total )
def sanitize_dataframe(df: Table): """Sanitize a DataFrame to prepare it for serialization. copied from the ipyvega project * Make a copy * Convert categoricals to strings. * Convert np.bool_ dtypes to Python bool objects * Convert np.int dtypes to Python int objects * Convert floats to objects and replace NaNs/infs with None. * Convert DateTime dtypes into appropriate string representations """ import numpy as np if df is None: return None # raise InternalLogicalError("Cannot sanitize empty df") df = df.copy() def to_list_if_array(val): if isinstance(val, np.ndarray): return val.tolist() else: return val for col_name in df.labels: dtype = df.column(col_name).dtype if str(dtype) == 'category': # XXXX: work around bug in to_json for categorical types # https://github.com/pydata/pandas/issues/10778 df[col_name] = df[col_name].astype(str) elif str(dtype) == 'bool': # convert numpy bools to objects; np.bool is not JSON serializable df[col_name] = df[col_name].astype(object) elif np.issubdtype(dtype, np.integer): # convert integers to objects; np.int is not JSON serializable df[col_name] = df[col_name].astype(object) elif np.issubdtype(dtype, np.floating): # For floats, convert to Python float: np.float is not JSON serializable # Also convert NaN/inf values to null, as they are not JSON serializable col = df[col_name] bad_values = np.isnan(col) | np.isinf(col) df[col_name] = np.where(bad_values, None, col).astype(object) # col.astype(object)[~bad_values]= None elif str(dtype).startswith('datetime'): # Convert datetimes to strings # astype(str) will choose the appropriate resolution new_column = df[col_name].astype(str) new_column[new_column == 'NaT'] = '' df[col_name] = new_column elif dtype == object: # Convert numpy arrays saved as objects to lists # Arrays are not JSON serializable col = np.vectorize(to_list_if_array)(df[col_name]) df[col_name] = np.where(notnull(col), col, None).astype(object) return df
from newsapi import NewsApiClient from flask import Flask, url_for, render_template, request, jsonify, session import weight, os, menuScraping, graph, weightranking from datetime import datetime from datascience import Table import matplotlib.pyplot as plt import io import base64 from graph import build_graph bias = Table().read_table("bias.csv").select("News Source", "Horizontal Rank") news_sources = bias.column("News Source") news_rankings = bias.column("Horizontal Rank") news_dict = {} for x in range(0, (len(news_sources))): news_dict[news_sources[x]] = news_rankings[x] app = Flask(__name__) newsapi = NewsApiClient(api_key='672b5745f9aa4ecbbc044a0025fc28d3') sources = "cnn, the-new-york-times, bbc-news, the-guardian-uk, associated-press, usa-today, the-economist, the-hill, fortune" sourcesarray = sources.split(", ") def get_news_by_category(category): top_news = [] if category == 'economy': top_news.append(newsapi.get_top_headlines(q='econ', sources=sources)) top_news.append(newsapi.get_top_headlines(q='money', sources=sources)) top_news.append(newsapi.get_top_headlines(q='monetary', sources=sources))
def slope(tbl, col_x, col_y): r = find_r(tbl, col_x, col_y) return r * np.std(tbl.column(col_y)) / np.std(tbl.column(col_x)) def intercept(tbl, col_x, col_y): return np.mean(tbl.column(col_y)) - slope(tbl, col_x, col_y) * np.mean( tbl.column(col_x)) # Visualizing the Comparison of Hispanic Percentages to African American Wages in 2000 and 2017 #scatterplot2000 line_2000 = ( slope(tbl_2000, "Hispanic Percent 2000", "African American Wages 2000") * tbl_2000.column("Hispanic Percent 2000")) + intercept( tbl_2000, "Hispanic Percent 2000", "African American Wages 2000") tbl_2000.scatter("Hispanic Percent 2000", "African American Wages 2000", fit_line=True) #scatterplot2017 line_2017 = (slope(tbl_2017, 0, 1) * tbl_2017.column(0)) + intercept( tbl_2017, 0, 1) tbl_2017.scatter("Hispanic Percent 2017", "African American Wages 2017", fit_line=True) # Creating Bootstraps By Resampling #bootstrap2000