def test_H_observed_EC2_variants(): """Illustrate the variants of H_observed""" print( "\n\n-- test_H_observed_EC2_variants(): 'H_observed', 'M_observed', uses: 'planted_distribution_model_H' --" ) # --- Parameters for graph n = 3000 a = 1 h = 8 d = 2 k = 3 f = 0.2 distribution = 'uniform' alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=None, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, _ = replace_fraction_of_rows(X0, f, avoidNeighbors=False) # --- Print first rows of matrices distance = 3 print("First rows of powers of H0:") for k in range(1, distance + 1): print("{}: {}".format(k, np.linalg.matrix_power(H0, k)[0])) print("\nNumber of observed edges between labels (M_observed):") M = M_observed(W, X1, distance=distance, NB=True) print("M[0]:\n{}".format(M[0])) print("M[2]:\n{}".format(M[1])) for EC in [False, True]: for variant in [1, 2]: print("\nP (H observed): variant {} with EC={}".format( variant, EC)) H_vec = H_observed(W, X1, distance=distance, NB=EC, variant=variant) for i, H in enumerate(H_vec): print("{}:\n{}".format(i, H))
def run(choice, variant, create_data=False, show_plot=False, create_pdf=False, show_pdf=False, append_data=False): """main parameterized method to produce all figures. Can be run from external jupyther notebook or method to produce all figures, optionally as PDF CHOICE uses a different saved experimental run VARIANT uses a different wayt o plot """ # %% -- Setup CREATE_DATA = create_data APPEND_DATA = append_data # allows to add more data, requires CREATE_DATA to be true CHOICE = choice VARIANT = variant SHOW_PLOT = show_plot CREATE_PDF = create_pdf SHOW_PDF = show_pdf BOTH = True # show both figures for W and H SHOW_TITLE = True # show parameters in title of plot f = 1 # fraction of labeled nodes for H estimation csv_filename = 'Fig_Scaling_Hrow_{}.csv'.format(CHOICE) fig_filename = 'Fig_Scaling_Hrow_{}-{}.pdf'.format(CHOICE, VARIANT) plot_colors = ['darkorange', 'blue'] header = ['currenttime', 'choice', # W, or H 'l', 'time'] if CREATE_DATA and not APPEND_DATA: save_csv_record(join(data_directory, csv_filename), header, append=APPEND_DATA) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # %% -- Default parameters n = 10000 ymax = 10 h = 3 d = 10 # actual degree is double distribution = 'uniform' exponent = None # %% -- CHOICES and VARIANTS if CHOICE == 1: W_repeat = [0, 0, 30, 5, 3, 1] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 4.3 W_annotate_y = 1 H_annotate_x = 6 H_annotate_y = 0.005 elif CHOICE == 2: # small exponent 3, does not show the advantage well d = 3 W_repeat = [0, 0, 10, 5, 5, 5, 5, 5, 5] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 5 W_annotate_y = 0.08 H_annotate_x = 6.5 H_annotate_y = 0.004 elif CHOICE == 3: # small exponent 2, does not show the advantage well d = 2 W_repeat = [0, 0, 50, 50, 50, 50, 50, 50, 50] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 6.5 W_annotate_y = 0.02 H_annotate_x = 6.5 H_annotate_y = 0.004 elif CHOICE == 4: distribution = 'powerlaw' exponent = -0.5 W_repeat = [0, 0, 50, 9, 5, 3] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 4 W_annotate_y = 1 H_annotate_x = 6.5 H_annotate_y = 0.006 if VARIANT == 1: plot_colors = ['blue', 'darkorange'] SHOW_TITLE = False if VARIANT == 2: plot_colors = ['blue', 'darkorange'] BOTH = False SHOW_TITLE = False elif CHOICE == 5: distribution = 'powerlaw' exponent = -0.5 W_repeat = [0, 0, 1, 1] # index starts with 0. useful only for W^2 and later H_repeat = [0] + [1] * 8 W_annotate_x = 4 W_annotate_y = 1 H_annotate_x = 6.5 H_annotate_y = 0.006 elif CHOICE == 11: W_repeat = [0, 0, 1, 1, 0, 0] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 4.3 W_annotate_y = 1 H_annotate_x = 6 H_annotate_y = 0.005 elif CHOICE == 12: W_repeat = [0, 0, 31, 11, 5, 3, 3, 3, 3] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 4.3 W_annotate_y = 2.5 H_annotate_x = 5.5 H_annotate_y = 0.004 f = 0.1 plot_colors = ['blue', 'darkorange'] ymax = 100 if VARIANT == 1: # TODO: when trying to add additional data, then it creates 7 instead of 4 rows, # but the same code idea of CREATE vs ADD data appears to work in Fig_MHE_Optimal_Lambda, for that to replicate run below # run(12, 1, create_pdf=True, show_pdf=True, create_data=False, append_data=True) W_repeat = [0, 0, 0, 0, 0, 0, 0, 0, 0] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] else: raise Warning("Incorrect choice!") # %% -- Create data if CREATE_DATA or APPEND_DATA: # Create graph k = 3 a = 1 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) start = time.time() W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) time_calc = time.time() - start # print("\nTime for graph:{}".format(time_calc)) # print("Average outdegree: {}".format(calculate_average_outdegree_from_graph(W))) # Calculations W for length, rep in enumerate(W_repeat): for _ in range(rep): start = time.time() if length == 2: result = W.dot(W) elif length == 3: result = W.dot(W.dot(W)) # naive enumeration used as nothing can be faster elif length == 4: result = W.dot(W.dot(W.dot(W))) elif length == 5: result = W.dot(W.dot(W.dot(W.dot(W)))) elif length == 6: result = W.dot(W.dot(W.dot(W.dot(W.dot(W))))) elif length == 7: result = W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W)))))) elif length == 8: result = W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W))))))) elif length == 9: result = W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W)))))))) time_calc = time.time() - start tuple = [str(datetime.datetime.now())] text = ['W', length, time_calc] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("W, d: {}, time: {}".format(length, time_calc)) save_csv_record(join(data_directory, csv_filename), tuple) # Calculations H_NB for length, rep in enumerate(H_repeat): for _ in range(rep): X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) start = time.time() result = H_observed(W, X=X1, distance=length, NB=True, variant=1) time_calc = time.time() - start tuple = [str(datetime.datetime.now())] text = ['H', length, time_calc] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("H, d: {}, time: {}".format(length, time_calc)) save_csv_record(join(data_directory, csv_filename), tuple) # Calculate and display M statistics for length, _ in enumerate(H_repeat): M = M_observed(W, X=X0, distance=length, NB=True) M = M[-1] s = np.sum(M) # print("l: {}, sum: {:e}, M:\n{}".format(length, s, M)) # %% -- Read, aggregate, and pivot data df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1 (length {}):\n{}".format(len(df1.index), df1.head(15))) df2 = df1.groupby(['choice', 'l']).agg \ ({'time': [np.max, np.mean, np.median, np.min, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(30))) df3 = pd.pivot_table(df2, index=['l'], columns=['choice'], values='time_median', ) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) #%% -- Setup figure mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 20 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams['xtick.major.pad'] = 6 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) #%% -- Draw the plot and annotate df4 = df3['H'] # print("\n-- df4 (length {}):\n{}".format(len(df4.index), df4.head(30))) Y1 = df3['W'].plot(logy=True, color=plot_colors[0], marker='o', legend=None, clip_on=False, # cut off data points outside of plot area # zorder=3 ) # style='o', kind='bar', style='o-', plt.annotate(r'$\mathbf{W}^\ell$', xy=(W_annotate_x, W_annotate_y), color=plot_colors[0], ) if BOTH: Y2 = df3['H'].plot(logy=True, color=plot_colors[1], marker='o', legend=None, clip_on=False, # cut off data points outside of plot area zorder=3 ) # style='o', kind='bar', style='o-', plt.annotate(r'$\mathbf{\hat P}_{\mathrm{NB}}^{(\ell)}$', xy=(H_annotate_x, H_annotate_y), color=plot_colors[1], ) if SHOW_TITLE: plt.title(r'$\!\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}$'.format(int(n / 1000), 2 * d, h, f)) # %% -- Figure settings & plot plt.grid(b=True, which='both', alpha=0.2, linestyle='solid', axis='y', linewidth=0.5) # linestyle='dashed', which='minor' plt.xlabel(r'Path length ($\ell$)', labelpad=0) plt.ylabel(r'$\!$Time [sec]', labelpad=1) plt.ylim(0.001, ymax) # placed after yticks plt.xticks(range(1, 9)) if SHOW_PLOT: plt.show() if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, # frameon=None ) if SHOW_PDF: # os.system('{} "'.format(open_cmd[sys.platform]) + join(figure_directory, fig_filename) + '"') # shows actually created PDF showfig(join(figure_directory, fig_filename)) # shows actually created PDF # TODO replace with this method
def run(choice, variant, create_data=False, show_plot=False, create_pdf=False, show_pdf=False): """main parameterized method to produce all figures. Can be run from external jupyther notebook or method to produce all figures in PDF """ # %% -- Setup CREATE_DATA = create_data CHOICE = choice VARIANT = variant SHOW_PLOT = show_plot CREATE_PDF = create_pdf SHOW_PDF = show_pdf SHOW_TITLE = True LEGEND_MATCH_COLORS = False SHOW_DISTRIBUTION_IN_TITLE = True SHOW_BACKTRACK_ESTIMATE = True SHOW_NONBACKTRACK_ESTIMATE = True plot_colors = ['darkgreen', 'darkorange', 'blue'] label_vec = [ r'$\mathbf{H}^{\ell}\,\,\,\,$', r'$\mathbf{\hat P}^{(\ell)}$', r'$\mathbf{\hat P}_{\mathrm{NB}}^{(\ell)}$' ] csv_filename = 'Fig_Backtracking_Advantage_{}.csv'.format(CHOICE) fig_filename = 'Fig_Backtracking_Advantage_{}-{}.pdf'.format( CHOICE, VARIANT) header = [ 'currenttime', 'choice', # H, Hrow, HrowEC 'l', 'valueH', # maximal values in first row of H 'valueM' ] # average value across entries in M if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # %% -- Default parameters ymin = 0.3 ymax = 1 exponent = None # %% -- CHOICES and VARIANTS if CHOICE == 1: # n=1000, shows NB to be slight lower for l=2: probably due to sampling issues (d=3, thus very few points available) n = 1000 h = 8 d = 3 f = 0.1 distribution = 'uniform' rep = 10000 length = 8 elif CHOICE == 2: n = 1000 h = 8 d = 10 f = 0.1 distribution = 'uniform' rep = 10000 length = 8 elif CHOICE == 3: # nice: shows nicely that difference is even bigger for smaller h n = 1000 h = 3 d = 10 f = 0.1 distribution = 'uniform' rep = 10000 length = 8 ymax = 0.8 elif CHOICE == 4: n = 10000 h = 3 d = 10 f = 0.1 distribution = 'uniform' rep = 100 length = 8 ymin = 0.333 ymax = 0.65 elif CHOICE == 5: n = 10000 h = 3 d = 3 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 elif CHOICE == 6: # n=1000, the powerlaw problem with small graphs and high exponent n = 1000 h = 8 d = 3 f = 0.1 distribution = 'powerlaw' exponent = -0.5 rep = 10000 length = 8 elif CHOICE == 7: n = 10000 h = 8 d = 3 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 # ymin = 0.4 ymax = 1 elif CHOICE == 8: n = 10000 h = 8 d = 10 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 # ymin = 0.4 ymax = 1 elif CHOICE == 9: # shows lower NB due to problem with sampling from high powerlaw -0.5 n = 10000 h = 8 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.5 rep = 1000 length = 8 elif CHOICE == 10: n = 10000 h = 8 d = 3 f = 0.1 distribution = 'powerlaw' exponent = -0.5 rep = 1000 length = 8 elif CHOICE == 11: # problem: shows that NB is too low (probably because of problem with sampling from -0.5 factor) n = 1000 h = 8 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.5 rep = 1000 length = 8 elif CHOICE == 12: # problem: shows no problem with NB (probably because no problem with sampling from -0.2 factor) n = 1000 h = 8 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.2 rep = 1000 length = 8 elif CHOICE == 20: n = 10000 h = 3 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.3 rep = 1000 length = 8 ymin = 0.333 ymax = 0.65 elif CHOICE == 21: # originally used before color change n = 10000 h = 3 d = 25 f = 0.1 distribution = 'powerlaw' exponent = -0.3 rep = 1000 length = 8 ymin = 0.333 ymax = 0.65 if VARIANT == 1: SHOW_TITLE = False plot_colors = ['red', 'blue', 'darkorange'] label_vec = [r'$\mathbf{H}^{\ell}\quad\quad$', 'naive', 'better'] LEGEND_MATCH_COLORS = True if VARIANT == 2: SHOW_TITLE = False plot_colors = ['red', 'blue', 'darkorange'] label_vec = [r'$\mathbf{H}^{\ell}\quad\quad$', 'naive', 'better'] SHOW_NONBACKTRACK_ESTIMATE = False LEGEND_MATCH_COLORS = True if VARIANT == 3: SHOW_TITLE = False plot_colors = ['red', 'blue', 'darkorange'] label_vec = [r'$\mathbf{H}^{\ell}\quad\quad$', 'naive', 'better'] SHOW_BACKTRACK_ESTIMATE = False SHOW_NONBACKTRACK_ESTIMATE = False LEGEND_MATCH_COLORS = True if VARIANT == 4: plot_colors = ['red', 'blue', 'darkorange'] LEGEND_MATCH_COLORS = True elif CHOICE == 25: n = 10000 h = 8 d = 5 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 elif CHOICE == 26: n = 10000 h = 8 d = 25 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 ymax = 0.9 ymin = 0.4 elif CHOICE == 27: n = 10000 h = 8 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.3 rep = 1000 length = 8 ymax = 0.9 ymin = 0.33 elif CHOICE == 31: n = 10000 h = 3 d = 10 f = 0.1 distribution = 'uniform' length = 8 ymin = 0.333 ymax = 0.65 SHOW_DISTRIBUTION_IN_TITLE = False plot_colors = ['red', 'blue', 'darkorange'] LEGEND_MATCH_COLORS = True if VARIANT == 0: rep = 1000 if VARIANT == 1: rep = 20 else: raise Warning("Incorrect choice!") k = 3 a = 1 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # %% -- Create data if CREATE_DATA: # Calculations H print("Max entry of first rows of powers of H0:") for l in range(1, length + 1): valueH = np.max(np.linalg.matrix_power(H0, l)[0]) tuple = [str(datetime.datetime.now())] text = ['H', l, valueH, ''] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) print("{}: {}".format(l, valueH)) save_csv_record(join(data_directory, csv_filename), tuple) # Calculations Hrow and HrowEC for r in range(rep): print('Repetition {}'.format(r)) # Create graph start = time.time() W, Xd = planted_distribution_model_H( n, alpha=alpha0, H=H0, d_out= d, # notice that for undirected graphs, actual degree = 2*d distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) time_calc = time.time() - start # print("\nTime for graph:{}".format(time_calc)) print("Average outdegree: {}".format( calculate_average_outdegree_from_graph(W))) # Calculate H_vec and M_vec versions (M_vec to calculate the average number of entries in M) H_vec = H_observed(W, X1, distance=length, NB=False, variant=1) H_vec_EC = H_observed(W, X1, distance=length, NB=True, variant=1) M_vec = M_observed(W, X1, distance=length, NB=False) M_vec_EC = M_observed(W, X1, distance=length, NB=True) # Calculation H_vec # print("Max entry of first rows of H_vec") for l, H in enumerate(H_vec): valueH = H[0][ (l + 1) % 2] # better than 'value = np.max(H[0])', otherwise sometimes chooses another higher entry -> biased estimate valueM = np.average(M_vec[l + 1]) # print(M_vec[l+1]) # print(valueM) tuple = [str(datetime.datetime.now())] text = ['Hrow', l + 1, valueH, valueM] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("{}: {}".format(l + 1, value)) save_csv_record(join(data_directory, csv_filename), tuple) # Calculation H_vec_EC # print("Max entry of first rows of H_vec_EC") for l, H in enumerate(H_vec_EC): valueH = H[0][(l + 1) % 2] valueM = np.average(M_vec_EC[l + 1]) # print(M_vec_EC[l+1]) # print(valueM) tuple = [str(datetime.datetime.now())] text = ['HrowEC', l + 1, valueH, valueM] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("{}: {}".format(l + 1, value)) save_csv_record(join(data_directory, csv_filename), tuple) #%% -- Read, aggregate, and pivot data df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1 (length {}):\n{}".format(len(df1.index), df1.head(15))) df2 = df1.groupby(['choice', 'l']).agg \ ({'valueH': [np.mean, np.std, np.size], # Multiple Aggregates 'valueM': [np.mean], }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values ] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'valueH_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(30))) df3 = pd.pivot_table(df2, index=['l'], columns=['choice'], values=['valueH_mean', 'valueH_std', 'valueM_mean']) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values ] # flatten the column hierarchy # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) # df3.drop(['valueM_mean_H', 'valueH_std_H'], axis=1, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.reset_index(level=0, inplace=True) # get l into columns # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) #%% -- Setup figure mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 16 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 20 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams['xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) #%% -- Extract values into columns (plotting dataframew with bars plus error lines and lines gave troubles) l_vec = df3['l'].values # .tolist() does not work with bar plot mean_H_vec = df3['valueH_mean_H'].values mean_Hrow_vec = df3['valueH_mean_Hrow'].values mean_Hrow_vecEC = df3['valueH_mean_HrowEC'].values std_Hrow_vec = df3['valueH_std_Hrow'].values std_Hrow_vecEC = df3['valueH_std_HrowEC'].values #%% -- Draw the plot and annotate width = 0.3 # the width of the bars if SHOW_BACKTRACK_ESTIMATE: left_vec = l_vec if SHOW_NONBACKTRACK_ESTIMATE: left_vec = left_vec - width bar1 = ax.bar( left_vec, mean_Hrow_vec, width, color=plot_colors[1], yerr=std_Hrow_vec, error_kw={ 'ecolor': 'black', 'linewidth': 2 }, # error-bars colour label=label_vec[1]) if SHOW_NONBACKTRACK_ESTIMATE: bar2 = ax.bar( l_vec, mean_Hrow_vecEC, width, color=plot_colors[2], yerr=std_Hrow_vecEC, error_kw={ 'ecolor': 'black', 'linewidth': 2 }, # error-bars colour label=label_vec[2]) gt = ax.plot(l_vec, mean_H_vec, color=plot_colors[0], linestyle='solid', linewidth=2, marker='o', markersize=10, markeredgewidth=2, markerfacecolor='None', markeredgecolor=plot_colors[0], label=label_vec[0]) if CHOICE == 4 or CHOICE == 20: ax.annotate( np.round(mean_Hrow_vec[1], 2), xy=(2.15, 0.65), xytext=(2.1, 0.60), arrowprops=dict(facecolor='black', arrowstyle="->"), ) #%% -- Legend if distribution == 'uniform' and SHOW_DISTRIBUTION_IN_TITLE: distribution_label = ',$uniform' else: distribution_label = '$' if SHOW_TITLE: plt.title( r'$\!\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'. format(int(n / 1000), 2 * d, h, f, distribution_label )) # notice that actual d is double than in one direction handles, labels = ax.get_legend_handles_labels() legend = plt.legend( handles, labels, loc='upper right', handlelength=1.5, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation # title='Iterations' borderaxespad=0.1, # distance between legend and the outer axes borderpad=0.1, # padding inside legend box numpoints=1, # put the marker only once ) if LEGEND_MATCH_COLORS: # TODO: how to get back the nicer line spacing defined in legend above after changing the legend text colors legend.get_texts()[0].set_color(plot_colors[0]) if SHOW_BACKTRACK_ESTIMATE: legend.get_texts()[1].set_color(plot_colors[1]) if SHOW_NONBACKTRACK_ESTIMATE: legend.get_texts()[2].set_color(plot_colors[2]) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.8) # 0.8 # %% -- Figure settings & plot ax.set_xticks(range(10)) plt.grid(b=True, which='both', alpha=0.2, linestyle='solid', axis='y', linewidth=0.5) # linestyle='dashed', which='minor' plt.xlabel(r'Path length ($\ell$)', labelpad=0) plt.ylim(ymin, ymax) # placed after yticks plt.xlim(0.5, 5.5) plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom= 'off', # ticks along the bottom edge are off TODO: Paul, this does not work anymore :( 1/26/2020 top='off', # ticks along the top edge are off # labelbottom='off', # labels along the bottom edge are off ) if CREATE_PDF: plt.savefig( join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, # frameon=None ) if SHOW_PDF: showfig(join(figure_directory, fig_filename)) if SHOW_PLOT: plt.show()
def test_M_observed(): """Illustrate M_observed: non-backtracking or not Also shows that W^2 is denser for powerlaw graphs than uniform """ print( "\n-- test_M_observed(): 'M_observed', uses: 'planted_distribution_model_H' --" ) # --- Parameters for graph n = 3000 a = 1 h = 8 d = 10 # variant 2 d = 2 # variant 1 k = 3 distribution = 'powerlaw' # variant 2 distribution = 'uniform' # variant 1 exponent = -0.5 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) # --- Print results distance = 8 M_vec = M_observed(W, X0, distance=distance, NB=False) M_vec_EC = M_observed(W, X0, distance=distance, NB=True) print("Graph with n={} nodes and uniform d={} degrees".format(n, d)) print("\nSum of entries and first rows of M_vec (without NB)") for i, M in enumerate(M_vec): # M_vec[1:] to skip the first entry in list print("{}: {}, {}".format(i, np.sum(M), M[0])) print("\nSum of entries and first rows of M_vec (with NB)") for i, M in enumerate(M_vec_EC): print("{}: {}, {}".format(i, np.sum(M), M[0])) if True: print("\nFull matrices:") print("M_vec") for i, M in enumerate(M_vec): # skip the first entry in list print("{}: \n{}".format(i, M)) print("\nM_vec_EC") for i, M in enumerate(M_vec_EC): # skip the first entry in list print("{}: \n{}".format(i, M))
def test_gradient_optimization2(): print( "\n-- 'estimateH, define_gradient_energy_H, define_energy_H, uses: planted_distribution_model_H, H_observed, M_observed, --" ) # --- Parameters for graph n = 10000 a = 1 h = 2 d = 10 k = 7 distribution = 'powerlaw' exponent = -0.3 np.set_printoptions(precision=4) alpha0 = create_parameterized_alpha(k, a) H0 = create_parameterized_H(k, h, symmetric=True) f = 0.02 print("Graph n={}, d={}, f={}".format(n, d, f)) print("H0:\n{}".format(H0)) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # --- M_vec, H_vec statistics distance = 5 print("M_vec:") M_vec = M_observed(W, X1, distance=distance) for i, M in enumerate(M_vec): print("{}:\n{}".format(i, M)) print("\nH_vec_observed:") H_vec = H_observed(W, X1, distance=distance) for i, H in enumerate(H_vec): print("{}:\n{}".format(i, H)) # --- estimate_H based on distance 1 and uninformative point distance = 1 weights = [1, 0, 0, 0, 0] print( "\n= Estimate H based on X1 and distance={} from uninformative point:". format(distance)) h0 = np.ones(int(k * (k - 1) / 2)).dot( 1 / k) # use uninformative matrix to start with energy_H = define_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) start = time.time() H1 = estimateH(X1, W, distance=distance, weights=weights, gradient=False) time_est = time.time() - start print("Estimated H without gradient:\n{}".format(H1)) print("Time :{}".format(time_est)) e = energy_H(H1) print("Energy at estimated point: {}".format(e)) start = time.time() H2 = estimateH(X1, W, distance=distance, weights=weights, gradient=True) time_est = time.time() - start print("Estimated H with gradient:\n{}".format(H2)) print("Time :{}".format(time_est)) e = energy_H(H2) print("Energy at estimated point: {}".format(e)) G = gradient_energy_H(H2) h = derivative_H_to_h(G) print("Gradient matrix at estimated point:\n{}".format(G)) print("Gradient vector at estimated point:\n{}".format(h))
def test_gradient(): print( "\n-- 'define_gradient_energy_H, define_energy_H, uses: planted_distribution_model_H, H_observed, M_observed, --" ) # --- Parameters for graph n = 1000 a = 1 h = 8 d = 25 k = 3 distribution = 'powerlaw' exponent = -0.3 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) f = 0.5 print("Graph n={}, d={}, f={}".format(n, d, f)) print("H0:\n{}\n".format(H0)) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # --- M_vec, H_vec statistics distance = 5 print("M_vec:") M_vec = M_observed(W, X1, distance=distance) for i, M in enumerate(M_vec): print("{}:\n{}".format(i, M)) print("H_vec:") H_vec = H_observed(W, X1, distance=distance) for i, H in enumerate(H_vec): print("{}:\n{}".format(i, H)) # --- Gradient at multiple points for distance 1 print("\n=Defining the gradient function with distance 1") distance = 1 weights = [1, 0, 0, 0, 0] gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) energy_H = define_energy_H(weights=weights, distance=1, H_vec_observed=H_vec) H_actual = H_vec[0] print( "1st example point: H_actual (row-stochastic frequencies of neighbors):\n{}" .format(H_actual)) e = energy_H(H_actual) g = gradient_energy_H(H_actual) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) H_point = transform_hToH(np.array([0.2, 0.6, 0.2]), 3) print("\n2nd example point: H_point:\n{}".format(H_point)) e = energy_H(H_point) g = gradient_energy_H(H_point) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) H_point2 = H_point - 0.45 * g print( "\n3rd example point in opposite direction of gradient: H_point2=H_point-0.45*gradient:\n{}" .format(H_point2)) e = energy_H(H_point2) g = gradient_energy_H(H_point2) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) # --- Gradient at multiple points for distance 5 distance = 5 weights = [0, 0, 0, 0, 1] print("\n= Defining the gradient function with distance={} and weights={}". format(distance, weights)) gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) energy_H = define_energy_H(weights=weights, distance=1, H_vec_observed=H_vec) H_actual = H_vec[0] print("1st point: H_actual:\n{}".format(H_actual)) e = energy_H(H_actual) g = gradient_energy_H(H_actual) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) H_point = transform_hToH(np.array([0.2, 0.6, 0.2]), 3) print("\n2nd point: H_point:\n{}".format(H_point)) e = energy_H(H_point) g = gradient_energy_H(H_point) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) H_point2 = H_point - 1.5 * g print( "\n3rd point in opposite direction of gradient: H_point2:\n{}".format( H_point2)) e = energy_H(H_point2) g = gradient_energy_H(H_point2) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h)
def test_estimate_synthetic(): print( "\n\n-- test_estimate_synthetic(): 'estimateH', uses: 'M_observed', 'planted_distribution_model_H', --" ) # --- Parameters for graph n = 1000 a = 1 h = 8 d = 25 k = 3 distribution = 'powerlaw' exponent = -0.3 f = 0.05 print("n={}, a={},d={}, f={}".format(n, a, d, f)) alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) print("H0:\n{}".format(H0)) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # --- Print some neighbor statistics M_vec = M_observed(W, X0, distance=3, NB=True) print("\nNeighbor statistics in fully labeled graph:") print("M^(1): direct neighbors:\n{}".format(M_vec[1])) print("M^(2): distance-2 neighbors:\n{}".format(M_vec[2])) print("M^(3): distance-3 neighbors:\n{}".format(M_vec[3])) # --- MHE --- print("\nMHE: Estimate H based on X0 (fully labeled graph):") start = time.time() H1 = estimateH(X0, W, method='MHE', variant=1) H2 = estimateH(X0, W, method='MHE', variant=2) H3 = estimateH(X0, W, method='MHE', variant=3) time_est = time.time() - start print("Estimated H based on X0 (MHE), variant 1:\n{}".format(H1)) print("Estimated H based on X0 (MHE), variant 2:\n{}".format(H2)) print("Estimated H based on X0 (MHE), variant 3:\n{}".format(H3)) print("Time for all three variants:{}".format(time_est)) print("\nMHE: Estimate H based on X1 with f={}:".format(f)) start = time.time() H1 = estimateH(X1, W, method='MHE', variant=1) H2 = estimateH(X1, W, method='MHE', variant=2) H3 = estimateH(X1, W, method='MHE', variant=3) time_est = time.time() - start print("Estimated H based on X1 (MHE), variant 1:\n{}".format(H1)) print("Estimated H based on X1 (MHE), variant 2:\n{}".format(H2)) print("Estimated H based on X1 (MHE), variant 3:\n{}".format(H3)) print("Time for all three variants:{}".format(time_est)) print( "\nMHE, variant=1: Estimate H based on X1 with f={}, but with initial correct vector:" ) weight = [0, 0, 0, 0, 0] # ignored for MHE initial_h0 = [0.1, 0.8, 0.1] H5 = estimateH(X1, W, method='MHE', weights=weight) H5_r = estimateH(X1, W, method='MHE', weights=weight, randomize=True) H5_i = estimateH(X1, W, method='MHE', weights=weight, initial_H0=transform_hToH(initial_h0, 3)) print("Estimated H based on X5 only (MHE): \n{}".format(H5)) print("Estimated H based on X5 only (MHE), randomize:\n{}".format(H5_r)) print("Estimated H based on X5 only (MHE), initial=GT:\n{}".format(H5_i)) # --- DHE --- print("\nDHE: Estimate H based on X1 with f={}:".format(f)) start = time.time() H1 = estimateH(X1, W, method='DHE', variant=1, distance=1) H2 = estimateH(X1, W, method='DHE', variant=2, distance=1) H3 = estimateH(X1, W, method='DHE', variant=3, distance=1) time_est = time.time() - start print( "Estimated H based on X1 (DHE, distance=1), variant 1:\n{}".format(H1)) print( "Estimated H based on X1 (DHE, distance=1), variant 2:\n{}".format(H2)) print( "Estimated H based on X1 (DHE, distance=1), variant 3:\n{}".format(H3)) print("Time for all three variants:{}".format(time_est)) # --- LHE --- print("\nLHE: Estimate H based on X1 with f={}:".format(f)) start = time.time() H1 = estimateH(X1, W, method='LHE') time_est = time.time() - start print("Estimated H based on X1 (LHE):\n{}".format(H1)) print("Time for LHE:{}".format(time_est)) # --- Baseline holdout method --- f2 = 0.5 X2, ind2 = replace_fraction_of_rows(X0, 1 - f2) print("\nHoldout method: Estimate H based on X2 with f={}):".format(f2)) start = time.time() H2 = estimateH_baseline_serial(X2=X2, ind=ind2, W=W, numberOfSplits=1, numMax=10) time_est = time.time() - start print("Estimated H based on X2 (Holdout method) with f={}:\n{}".format( f2, H2)) print("Time for Holdout method:{}".format( time_est)) # TODO: result suggests this method does not work?
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, show_fig=True): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot CREATE_PDF = create_pdf SHOW_PDF = show_pdf SHOW_FIG1 = show_fig SHOW_FIG2 = show_fig csv_filename = 'Fig_MHE_Optimal_ScalingFactor_f_lambda10_{}.csv'.format( CHOICE) header = [ 'currenttime', 'option', # one option corresponds to one choice of weight vector. In practice, one choice of scaling factor (for weight vector) 'f', 'scaling', 'diff' ] # L2 norm between H and estimate if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters rep = 100 randomize = False initial_h0 = None # initial vector to start finding optimal H distribution = 'powerlaw' exponent = -0.3 rep_differentGraphs = 1 EC = True f_vec = [0.9 * pow(0.1, 1 / 12)**x for x in range(42)] fraction_of_minimum = 1.1 # scaling parameters that lead to optimum except for this scaling factor are included ymin2 = 0.28 ymax2 = 500 xmin = 0.001 # xmin = 0.0005 xmax = None xtick_lab = [0.001, 0.01, 0.1, 1] # ytick_lab1 = np.arange(0, 1, 0.1) ytick_lab2 = [0.3, 1, 10, 100, 1000] ymax1 = 1.2 ymin1 = 0.001 # ytick_lab1 = [0.001, 0.01, 0.1, 1] k = 3 a = 1 stratified = True gradient = False n = 10000 # color_vec = ['blue', 'orange', 'red'] color_vec = ["#4C72B0", "#55A868", "#C44E52", "#CCB974", "#64B5CD"] color_vec = ["#4C72B0", "#8172B2", "#C44E52"] # label_vec = [r'$\tilde {\mathbf{H}}$', r'$\tilde{\mathbf{H}}^{(5)}_{\mathrm{NB}}$', r'$\tilde {\mathbf{H}}^{(5)}_{\mathrm{NB}}$ r'] label_vec = ['MCE', 'DCE', 'DCEr'] marker_vec = ['s', 'x', 'o'] legendPosition = 'upper right' # -- Options if CHOICE == 11: h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 12: h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 13: h = 8 d = 10 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 14: h = 3 d = 10 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 15: h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 100] randomize_vec = [False, False, True] length_vec = [1, 5, 5] # elif CHOICE == 16: # n = 10000 # h = 3 # d = 10 # option_vec = ['opt1', 'opt2', 'opt3'] # scaling_vec = [0, 50, 50] # randomize_vec = [False, False, True] # length_vec = [1, 5, 5] elif CHOICE == 17: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 100] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 18: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] # -- Options elif CHOICE == 19: h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 100] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 20: h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 100] randomize_vec = [False, False, True] length_vec = [1, 5, 5] gradient = True legendPosition = 'center right' else: raise Warning("Incorrect choice!") alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for rs in range(1, rep_differentGraphs + 1): # print('Graph {}'.format(rs)) # -- Create graph W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for r in range(1, rep + 1): # print('Repetition {}'.format(r)) for f in f_vec: # -- Sample labeled data X1, ind = replace_fraction_of_rows(X0, 1 - f, stratified=stratified) # -- Calculate number of labeled neighbors M_vec = M_observed(W, X1, distance=5, NB=True) M = M_vec[1] num_N = np.sum(M) # print("f={:1.4f}, number labeled neighbors={}".format(f, num_N)) # print("M_vec:\n{}".format(M_vec)) # -- Create estimates and compare against GT for option, scaling, randomize, length in zip( option_vec, scaling_vec, randomize_vec, length_vec): H_est = estimateH(X1, W, method='DHE', variant=1, distance=length, EC=EC, weights=scaling, randomize=randomize, initial_H0=initial_h0, gradient=gradient) diff = LA.norm(H_est - H0) tuple = [str(datetime.datetime.now())] text = [option, f, scaling, diff] tuple.extend(text) save_csv_record(join(data_directory, csv_filename), tuple) # print("diff={:1.4f}, H_est:\n{}".format(diff, H_est)) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # Aggregate repetitions df2 = df1.groupby(['option', 'f']).agg \ ({'diff': [np.mean, np.std, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values ] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'diff_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) # Pivot table df3 = pd.pivot_table(df2, index=['f'], columns=['option'], values=['diff_mean', 'diff_std']) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values ] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) # Extract values X_f = df3['f'].values # plot x values Y = [] Y_std = [] for option in option_vec: Y.append(df3['diff_mean_{}'.format(option)].values) Y_std.append(df3['diff_std_{}'.format(option)].values) # print("X_f:\n", X_f) # print("Y:\n", Y) # print("Y_std:\n", Y_std) if SHOW_FIG1: # -- Setup figure fig_filename = 'Fig_MHE_Optimal_ScalingFactor_diff_f_lambda10_{}.pdf'.format( CHOICE) mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 14 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 16 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams[ 'xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams[ 'ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Draw the plots for i, (color, marker) in enumerate(zip(color_vec, marker_vec)): p = ax.plot(X_f, Y[i], color=color, linewidth=3, label=label_vec[i], marker=marker) if i != 1: ax.fill_between(X_f, Y[i] + Y_std[i], Y[i] - Y_std[i], facecolor=color, alpha=0.2, edgecolor='none') plt.xscale('log') plt.yscale('log') # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' plt.title(r'$\!\!\!n\!=\!{}\mathrm{{k}}, h\!=\!{}, d\!=\!{}{}'.format( int(n / 1000), h, d, distribution_label)) handles, labels = ax.get_legend_handles_labels() legend = plt.legend( handles, labels, loc=legendPosition, # 'upper right' handlelength=1.5, labelspacing=0, # distance between label entries handletextpad= 0.3, # distance between label and the line representation # title='Variants', borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.1, # padding inside legend box ) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.xticks(xtick_lab, xtick_lab) # plt.yticks(ytick_lab1, ytick_lab1) plt.grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.grid(b=True, which='major', axis='y', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.xlabel(r'Label Sparsity $(f)$', labelpad=0) # labelpad=0 plt.ylabel(r'L2 norm', labelpad=-5) if xmin is None: xmin = plt.xlim()[0] if xmax is None: xmax = plt.xlim()[1] if ymin1 is None: ymin1 = plt.ylim()[1] if ymax1 is None: ymax1 = plt.ylim()[1] plt.xlim(xmin, xmax) plt.ylim(ymin1, ymax1) if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_FIG1: plt.show() if SHOW_PDF: os.system('{} "'.format(open_cmd[sys.platform]) + join(figure_directory, fig_filename) + '"') # shows actually created PDF