def plot_distance_trip_time(df): num_rows = df.shape[0] title = 'trip duration v distance travelled' print ggplot(df, aes(s.TRIP_DURATION_COL, s.DISTANCE_TRAVELED_COL_NAME)) + \ ggtitle(_make_title(title, num_rows))+ \ stat_smooth(colour="red") + \ geom_point(colour='steelblue') + \ scale_x_continuous( # breaks=[10,20,30], #labels=["horrible", "ok", "awesome"] ) return df
def plot(self, inputs): """Plot the given X and Y axes on a scatter plot""" if inputs.year not in self.dat.Year.values: return if inputs.xvar not in self.dat or inputs.yvar not in self.dat: return subdat = self.dat[self.dat.Year == inputs.year] p = ggplot(subdat, aes(x=inputs.xvar, y=inputs.yvar)) p = p + geom_point() if inputs.shownames: p = p + geom_text(aes(label=self.ID_col), vjust=1, hjust=1) if inputs.linear: p = p + stat_smooth(color="red", method="lm") return p
if (reward == 1): wins_for_player_1[i] += 1.0 elif (reward == 0.5): draw_for_players[i] += 1.0 print(i, wins_for_player_1[i], draw_for_players[i]) data.append({ 'Type': 0, 'Wins': wins_for_player_1[i], 'Training': training_steps * (i - 1) }) data.append({ 'Type': 1, 'Wins': draw_for_players[i], 'Training': training_steps * (i - 1) }) learnitMC(training_steps, epsilon, alpha, n) # learnit(training_steps, epsilon, alpha) # the original learning code. # Pandas gives you the power of R learningdf = pd.DataFrame(data) # I use ggplot when I generate figures in R and would like to use it with Python, HOWEVER: # latest Pandas causes problems for ggplot so I needed these two patches: # https://stackoverflow.com/questions/50591982/importerror-cannot-import-name-timestamp/52378663 # https://github.com/yhat/ggpy/issues/612 p = gg.ggplot(gg.aes(x='Training', y='Wins', group='Type'), data=learningdf)+ gg.xlab('Learning games') + \ gg.ylab('Wins for player 1') + gg.ggtitle("n="+str(n)) + gg.geom_point() + gg.stat_smooth(method='loess') p.make() filename = "experiment_" + str(n) + ".pdf" p.save(filename)
slope = 0.3 x = randn(num) * 50. + 150.0 y = randn(num) * 5 + x * slope plt.scatter(x, y, c='b') # In[72]: # plt.scatter(x[(y < 1) & (y > -1)], y[(y < 1) & (y > -1)], c='r') # np.argsort, np.sort, complicated index slicing dframe = pd.DataFrame({'x': x, 'y': y}) g = sns.jointplot('x', 'y', data=dframe, kind="reg") # ## Grab Python version of ggplot http://ggplot.yhathq.com/ # In[73]: from ggplot import ggplot, aes, geom_line, stat_smooth, geom_dotplot, geom_point # In[74]: ggplot(aes(x='x', y='y'), data=dframe) + geom_point() + stat_smooth(colour='blue', span=0.2) # In[ ]:
def plot_transmission_results(tx_results, percentage_decline, save_path, path_names): #%% what are inputs? # transmission results # There'll be a folder called 'Runs prepared for ...' # all the folders inside that folder will have a CEPAC results folder. # tx_data is a dictionary and will have two keys, 'monthly' and 'popstats' # 'monthly' key will only have primary transmissions data tx_data = deepcopy(tx_results) t = 120 total_var = 3 total_val = 4 # percentage decline # this is also dictionary of percentage decline values for each folder # having cepac results # save_path eaxact folder where you want to save your images # path_names will have paths to transmissions and sensitivity directories #%% plot percentage decline # geberate an environment object first # lets go for line plot data_plot = pd.DataFrame( columns=['x', 'Percentage decline', 'Transmissions', 'Variable'], index=range(0, total_var * total_val)) data_in = pd.read_excel( os.path.join(path_names['transmission'], 'Input files', 'transmission_rate_multiplier_required_inputs.xlsx')) col = [ 'Incidence rate per 100 PY specific to high-risk group 1', 'HIV uninfected individuals in high-risk group 1', 'HIV infected individuals in high-risk group 1' ] col_adj = ['Incidence', 'Uninfected', 'Infected'] data_in[col[0]] = data_in[col[0]].round(1) base_val = [np.float64(0.9), 2960000, 136400] y1_values = {col[0]: [], col[1]: [], col[2]: []} for var in percentage_decline: if 'HIV+' in var: y1_values[col[2]].append(percentage_decline[var]) elif 'HIV-' in var: y1_values[col[1]].append(percentage_decline[var]) elif 'Incidence' in var: y1_values[col[0]].append(percentage_decline[var]) for i in range(len(col)): idx = data_in.loc[data_in.loc[:, col[i]] != base_val[i], col[i]].index.values[0] data_plot.loc[idx - 1:idx + 3 - 1, 'x'] = data_in.loc[idx:idx + 3, col[i]].values data_plot.loc[idx - 1:idx + 3 - 1, 'Variable'] = col_adj[i] data_plot.loc[idx - 1:idx + 3 - 1, 'Percentage decline'] = y1_values[col[i]] # plot df_float = data_plot.loc[data_plot.loc[:, 'Percentage decline'] <= 200, :] (ggplot(aes(x='x', y='Percentage decline'), df_float) + geom_line() + facet_wrap('Variable', scales='free')).save( os.path.join(save_path, 'Percentage decline')) del df_float #%% visualizing transmissions # index = range(time * number of values for each variable * number of variables) def set_abc(run, var_idx, var_name, var_value_idx): # set variable names data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'Variable'] = var_name # set variable value data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'Value'] = data_plot.loc[ data_plot.loc[:, 'Variable'] == var_name, 'x'].values[var_value_idx] if 'RunA' in run: data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'RunA tx'] = tx_data[var]['monthly'][run][ 'transmissions'].iloc[0:t].values elif 'RunB' in run: data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'RunB tx'] = tx_data[var]['monthly'][run][ 'transmissions'].iloc[0:t].values elif 'RunC' in run: data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'RunC tx'] = tx_data[var]['monthly'][run][ 'transmissions'].iloc[0:t].values data_plot_tx = pd.DataFrame( index=range(t * total_var * total_val), columns=['Variable', 'Value', 'RunA tx', 'RunB tx', 'RunC tx']) var_idx = -1 var_val_idx = [-1, -1, -1] for var in tx_data: var_idx += 1 if 'HIV+' in var: var_val_idx[2] += 1 var_name = col_adj[2] for run in tx_data[var]['monthly']: set_abc(run, var_idx, var_name, var_val_idx[2]) elif 'HIV-' in var: var_val_idx[1] += 1 var_name = col_adj[1] for run in tx_data[var]['monthly']: set_abc(run, var_idx, var_name, var_val_idx[1]) elif 'Incidence' in var: var_val_idx[0] += 1 var_name = col_adj[0] for run in tx_data[var]['monthly']: set_abc(run, var_idx, var_name, var_val_idx[0]) else: continue data_plot_tx['t'] = 0 t_float = -1 for row in data_plot_tx.index: if t_float == t - 1: t_float = -1 t_float += 1 data_plot_tx.loc[row, 't'] = t_float #%% plots for individual runs run_col = ['RunA tx', 'RunB tx', 'RunC tx'] inci = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Incidence', :] inf = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Infected', :] uninf = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Uninfected', :] for i in run_col: (ggplot(aes(x='t', y=i, color='Value'), data_plot_tx) + geom_line() + facet_wrap('Variable', scales='free')).save( os.path.join( save_path, str(i + r'_transmissions for all variable all values'))) (ggplot(aes(x='t', y=i), inci) + geom_line() + facet_wrap('Variable', 'Value', scales='free')).save( os.path.join( save_path, str(i + r'_plots for individual values of incidence'))) (ggplot(aes(x='t', y=i), inf) + geom_line() + facet_wrap('Variable', 'Value', scales='free')).save( os.path.join( save_path, str(i + r'_plots for individual values of infected population'))) (ggplot(aes(x='t', y=i), uninf) + geom_line() + facet_wrap('Variable', 'Value', scales='free')).save( os.path.join( save_path, str(i + '_plots for individual values of uninfected population'))) #%% compare runs ABC data_plot_abc = {} for var in col_adj: float_df = pd.DataFrame(index=range(0, t * total_var * total_val), columns=['t', 'Value', 'Transmissions', 'Run']) insert_idx = -1 for val in data_plot.loc[data_plot.loc[:, 'Variable'] == var, 'x']: var_df = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == var, :] var_df = var_df.reset_index(drop=True) var_val_df = var_df.loc[var_df.loc[:, 'Value'] == val, :] var_val_df = var_val_df.reset_index(drop=True) for c in ['RunA tx', 'RunB tx', 'RunC tx']: insert_idx += 1 float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 'Run'] = c float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 'Transmissions'] = var_val_df.loc[:, c].values float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 'Run'] = c float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 'Value'] = val float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 't'] = np.arange(t) data_plot_abc[var] = float_df.dropna() (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) + geom_line() + facet_wrap('Value', scales='free') + ggtitle(var)).save( os.path.join( save_path, str(var + '_comparison of transmissions in runs ABC'))) #%% compare runs BC for var in data_plot_abc: float_df = data_plot_abc[var].loc[ data_plot_abc[var].loc[:, 'Run'] != 'RunA tx', :] (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) + geom_line(alpha=0.2) + facet_wrap('Value', scales='free') + stat_smooth(method='loess', se=False) + ggtitle(var)).save( os.path.join(save_path, str(var + '_comparison of transmissions in runs BC'))) return
def _plot_scat_w_line(self, gp_aes): return gp_aes + gp.geom_point(color='coral') + gp.stat_smooth(span=.2, color='blue', se=False) + gp.theme_seaborn( context='talk')
for x in repeatedKnnResults], columns = ['p', 'k', 'cvAccuracy', 'testAccuracy']) ggdata = pandas.concat( [DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'cv', 'Accuracy' : knnResultsSimplified.cvAccuracy}), DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'test', 'Accuracy' : knnResultsSimplified.testAccuracy})], axis = 0 ) ggobj = ggplot.ggplot( data = ggdata, aesthetics = ggplot.aes(x='log10(p)', y='Accuracy', color='type', group='type', linetype='type') ) ggobj += ggplot.theme_bw() # ggobj += ggplot.scale_x_log() ggobj += ggplot.geom_point(alpha=0.6) ggobj += ggplot.stat_smooth() ggobj += ggplot.facet_wrap('k') print(ggobj)
import ggplot from ggplot import aes, meat, geom_line, stat_smooth ggplot(aes(x='date', y='beef'), data=meat) +\ geom_line() +\ stat_smooth(colour='blue', span=0.2) ''' ggplot(diamonds, aes(x='carat', y='price', color='cut')) +\ geom_point() +\ scale_color_brewer(type='diverging', palette=4) +\ xlab("Carats") + ylab("Price") + ggtitle("Diamonds") ggplot(diamonds, aes(x='price', fill='cut')) +\ geom_density(alpha=0.25) +\ facet_wrap("clarity") '''
import ggplot as gp import pandas as pd import numpy as np crime = pd.read_csv('crimeRatesByState2005.csv') # 去除全美平均值和华盛顿特区两个数据点 crime2 = crime[crime.state != 'United States'] crime2 = crime2[crime.state != 'District of Columbia'] print( gp.ggplot(gp.aes(x='murder', y='burglary'), data=crime2) + gp.geom_point() + gp.stat_smooth(method='loess', color='red'))