def test_H_observed_EC2_variants(): """Illustrate the variants of H_observed""" print( "\n\n-- test_H_observed_EC2_variants(): 'H_observed', 'M_observed', uses: 'planted_distribution_model_H' --" ) # --- Parameters for graph n = 3000 a = 1 h = 8 d = 2 k = 3 f = 0.2 distribution = 'uniform' alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=None, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, _ = replace_fraction_of_rows(X0, f, avoidNeighbors=False) # --- Print first rows of matrices distance = 3 print("First rows of powers of H0:") for k in range(1, distance + 1): print("{}: {}".format(k, np.linalg.matrix_power(H0, k)[0])) print("\nNumber of observed edges between labels (M_observed):") M = M_observed(W, X1, distance=distance, NB=True) print("M[0]:\n{}".format(M[0])) print("M[2]:\n{}".format(M[1])) for EC in [False, True]: for variant in [1, 2]: print("\nP (H observed): variant {} with EC={}".format( variant, EC)) H_vec = H_observed(W, X1, distance=distance, NB=EC, variant=variant) for i, H in enumerate(H_vec): print("{}:\n{}".format(i, H))
def run(choice, create_data=False, add_data=False, create_fig=True, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False, show_arrows=True): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PDF = show_pdf SHOW_PLOT = show_plot CREATE_PDF = create_pdf csv_filename = 'Fig_MHE_Optimal_ScalingFactor_d_{}.csv'.format(CHOICE) header = [ 'currenttime', 'option', # one option corresponds to one choice of weight vector. In practice, one choice of scaling factor (for weight vector) 'd', 'scaling', 'diff' ] # L2 norm between H and estimate if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters randomize = False initial_h0 = None # initial vector to start finding optimal H exponent = -0.3 length = 5 variant = 1 rep = 26 EC = True scaling_vec = [0] + [0.1 * pow(10, 1 / 8)**x for x in range(33)] num_options = len(scaling_vec) scaling_vec = np.array(scaling_vec) weight = np.array([np.power(scaling_vec, i) for i in range(5)]) weight = weight.transpose() d_vec = list(range(3, 9)) + [10 * pow(10, 1 / 12)**x for x in range(13)] # print(d_vec) d_vec = [int(i) for i in d_vec] fraction_of_minimum = 1.1 # scaling parameters that lead to optimum except for this scaling factor are included ymin2 = 0.3 ymax2 = 500 xmin1 = 3 xmax1 = 100 xmin2 = 2.87 xmax2 = 105 xtick_lab = [3, 5, 10, 30, 100] # ytick_lab1 = np.arange(0, 1, 0.1) ytick_lab1 = [0.001, 0.01, 0.1, 1] ytick_lab2 = [0.3, 1, 10, 100, 1000] ymax1 = 0.2 ymin1 = 0.001 k = 3 a = 1 # -- Options if CHOICE == 1: # #=100 n = 1000 h = 8 f = 0.1 distribution = 'uniform' ytick_lab1 = [0.01, 0.1, 0.5] ymax1 = 0.5 ymin1 = 0.01 elif CHOICE == 2: # selection #=124 n = 10000 h = 8 f = 0.1 distribution = 'powerlaw' ymin1 = 0.003 elif CHOICE == 3: # special selection #=100 n = 10000 h = 8 f = 0.05 distribution = 'powerlaw' ymin1 = 0.005 ymax1 = 0.5 elif CHOICE == 4: # selection #=100 n = 10000 h = 3 f = 0.1 distribution = 'powerlaw' ymin1 = 0.003 elif CHOICE == 5: # #=5 n = 10000 h = 3 f = 0.1 distribution = 'uniform' elif CHOICE == 6: # #=5 n = 10000 h = 8 f = 0.1 distribution = 'uniform' elif CHOICE == 7: # special selection #=100 n = 10000 h = 3 f = 0.05 distribution = 'powerlaw' ymax1 = 0.401 ymin1 = 0.003 else: raise Warning("Incorrect choice!") alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed #print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for r in range(1, rep + 1): # print('Repetition {}'.format(r)) for d in d_vec: # print('d: {}'.format(d)) # -- Create graph W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # -- Create estimates and compare against GT for option in range(num_options): H_est = estimateH(X1, W, method='MHE', variant=variant, distance=length, EC=EC, weights=weight[option], randomize=randomize, initial_h0=initial_h0) diff = LA.norm(H_est - H0) tuple = [str(datetime.datetime.now())] text = [option, d, scaling_vec[option], diff] tuple.extend(text) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) #print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # Aggregate repetitions df2 = df1.groupby(['d', 'scaling']).agg \ ({'diff': [np.mean, np.std, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values ] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'diff_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) # find minimum diff for each d, then join it back into df2 df3 = df2.groupby(['d']).agg \ ({'diff_mean': [np.min], # Multiple Aggregates }) df3.columns = ['_'.join(col).strip() for col in df3.columns.values ] # flatten the column hierarchy # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(90))) df4 = pd.merge( df2, df3, left_on='d', right_index=True ) # ! join df2 and df3 on column "d" from df2, and index (=d) from df3 # df4 = df4.drop(['index'], axis=1) # does not work # print("\n-- df4 (length {}):\n{}".format(len(df4.index), df4.head(25))) # Select columns for energy comparison plot: H0 df5 = df4.query('scaling==0') # print("\n-- df5 (length {}):\n{}".format(len(df5.index), df5.head(90))) # df5.drop('option', axis=1, inplace=True) # gives warning df5 = df5.drop(['diff_mean_amin'], axis=1) # print("\n-- df5: scaling==0 (length {}):\n{}".format(len(df5.index), df5.head(90))) X_d = df5['d'].values # plot value Y_diff0 = df5['diff_mean'].values # plot value Y_diff0_std = df5['diff_std'].values # plot value # Select columns for energy comparison plot: H5 optimal df6 = df4.copy() # print("\n-- df6 (length {}):\n{}".format(len(df6.index), df6.head(90))) df6['cond'] = np.where((df6['diff_mean'] == df6['diff_mean_amin']), True, False) df6 = df6.query('cond==True') df6.drop([ 'cond', ], axis=1, inplace=True) # print("\n-- df6: best scaling (length {}):\n{}".format(len(df6.index), df6.head(90))) Y_diff1 = df6['diff_mean'].values # plot value Y_diff1_std = df6['diff_std'].values # plot value Y_scaling = df6['scaling'].values # plot value # Select all (d, scaling) combinations that are close to optimal df4['cond'] = np.where( (df4['diff_mean'] <= fraction_of_minimum * df4['diff_mean_amin']), True, False) df7 = df4.query('cond==True') df7.drop([ 'cond', ], axis=1, inplace=True) # print("\n-- df7: all good data points(length {}):\n{}".format(len(df7.index), df7.head(90))) X_points = df7['d'].values # plot value Y_points = df7['scaling'].values # plot value # Select average (and lower and upper bound) on good data points df8 = df7.groupby(['d']).agg \ ({'scaling': [np.mean, np.amin, np.amax, ], # Multiple Aggregates }) df8.columns = ['_'.join(col).strip() for col in df8.columns.values ] # flatten the column hierarchy df8.reset_index(inplace=True) # remove the index hierarchy # print("\n-- df8: input for moving average (length {}):\n{}".format(len(df8.index), df8.head(15))) Y_point_mean = df8['scaling_mean'].values # plot value if SHOW_PLOT or SHOW_PDF or CREATE_PDF: # -- Setup figure fig_filename = 'Fig_MHE_Optimal_ScalingFactor_diff_d_{}.pdf'.format( CHOICE) mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 14 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 16 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams[ 'xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams[ 'ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Draw the plots p1 = ax.plot(X_d, Y_diff0, color='blue', linewidth=2) ax.fill_between(X_d, Y_diff0 + Y_diff0_std, Y_diff0 - Y_diff0_std, facecolor='blue', alpha=0.2, edgecolor='none', label=r'$\tilde {\mathbf{H}}$') p2 = ax.plot(X_d, Y_diff1, color='red', linewidth=2) ax.fill_between(X_d, Y_diff1 + Y_diff1_std, Y_diff1 - Y_diff1_std, facecolor='red', alpha=0.2, edgecolor='none', label=r'$\tilde {\mathbf{H}}^{\ell}_{\mathrm{EC}}$') plt.xscale('log') plt.yscale('log') # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' plt.title(r'$\!\!\!n\!=\!{}\mathrm{{k}}, h\!=\!{}, f\!=\!{}{}'.format( int(n / 1000), h, f, distribution_label)) handles, labels = ax.get_legend_handles_labels() legend = plt.legend( handles, labels, loc='upper right', # 'upper right' handlelength=1.5, labelspacing=0, # distance between label entries handletextpad= 0.3, # distance between label and the line representation # title='Variants', borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box ) frame = legend.get_frame() # frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.xticks(xtick_lab, xtick_lab) plt.yticks(ytick_lab1, ytick_lab1) plt.grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.grid(b=True, which='major', axis='y', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.xlabel(r'$d$', labelpad=0) # labelpad=0 plt.ylabel(r'L$^2$ norm', labelpad=-5) if xmin1 is None: xmin1 = plt.xlim()[0] if xmax1 is None: xmax1 = plt.xlim()[1] if ymin1 is None: ymin1 = plt.ylim()[1] if ymax1 is None: ymax1 = plt.ylim()[1] plt.xlim(xmin1, xmax1) plt.ylim(ymin1, ymax1) if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PDF: showfig(join(figure_directory, fig_filename)) # shows actually created PDF if SHOW_PLOT: plt.show() if SHOW_PLOT or SHOW_PDF or CREATE_PDF: # -- Setup figure fig_filename = 'Fig_MHE_Optimal_ScalingFactor_lambda_d_{}.pdf'.format( CHOICE) mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 14 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams[ 'xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams[ 'ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Draw the plots p1 = ax.plot( X_points, Y_points, color='0.8', linewidth=0, marker='o', markeredgewidth=0.0, clip_on=False, # cut off data points outside of plot area zorder=9, markevery=1, label=r'$\!\leq${} Opt'.format(fraction_of_minimum)) p2 = ax.plot( X_d, Y_scaling, color='red', linewidth=0, marker='o', clip_on=False, # cut off data points outside of plot area zorder=10, markevery=1, label=r'Opt$(\lambda|d)$') plt.xscale('log') plt.yscale('log') # Draw the moving average from Y_point_mean def movingaverage(interval, window_size): window = np.ones(int(window_size)) / float(window_size) return np.convolve(interval, window, 'same') Y_point_mean_window = movingaverage(Y_point_mean, 3) p5 = ax.plot(X_d, Y_point_mean_window, color='red', linewidth=1, marker=None) # p3 = ax.plot(X_d, Y_point_mean, color='red', linewidth=1, marker=None) # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' plt.title(r'$\!\!\!n\!=\!{}\mathrm{{k}}, h\!=\!{}, f\!=\!{}{}'.format( int(n / 1000), h, f, distribution_label)) handles, labels = ax.get_legend_handles_labels() legend = plt.legend( handles[::-1], labels[::-1], loc='upper left', # 'upper right' handlelength=1, labelspacing=0, # distance between label entries handletextpad= 0.3, # distance between label and the line representation borderaxespad=0.3, # distance between legend and the outer axes borderpad=0.1, # padding inside legend box numpoints=1, # put the marker only once ) frame = legend.get_frame() # frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.xticks(xtick_lab, xtick_lab) plt.yticks(ytick_lab2, ytick_lab2) plt.grid(b=True, which='minor', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.xlabel(r'$d$', labelpad=0) # labelpad=0 plt.ylabel(r'$\lambda$', labelpad=0, rotation=0) if xmin2 is None: xmin2 = plt.xlim()[0] if xmax2 is None: xmax2 = plt.xlim()[1] if ymin2 is None: ymin2 = plt.ylim()[0] if ymax2 is None: ymax2 = plt.ylim()[1] plt.xlim(xmin2, xmax2) plt.ylim(ymin2, ymax2) if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PDF: showfig(join(figure_directory, fig_filename)) # shows actually created PDF if SHOW_PLOT: plt.show()
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, show_fig=True): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data CREATE_PDF = create_pdf SHOW_PDF=show_pdf SHOW_FIG1 = show_fig # bar diagram SHOW_FIG2 = False # curve csv_filename = 'Fig_MHE_Variants_{}.csv'.format(CHOICE) header = ['currenttime', 'option', # one option corresponds to one choice of weight vector. In practice, one choice of scaling factor (for weight vector) 'variant', # 1, 2, 3 (against GT), and 1-2, 1-3, 2-3 (against each other) 'length', 'diff', 'time'] # L2 norm between H and estimate if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # Default Graph parameters and options n = 10000 f = 0.1 h = 8 distribution = 'uniform' randomize = False initial_h0 = None # initial vector to start finding optimal H initial_H0 = None exponent = -0.3 length = 5 rep = 10 # EC = [False] + [True] * 31 scaling_vec = [1, 0.1, 0.14, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.4, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 20, 30, 40, 50, 60, 70, 80, 90, 100] num_options = len(scaling_vec) scaling_vec = np.array(scaling_vec) weight = np.array([np.power(scaling_vec, i) for i in range(5)]) weight = weight.transpose() ymin1 = None ymax1 = None xmin2 = None xmax2 = None ymin2 = None ymax2 = None # fig1_index = [0, 11, 16, 21, 23, 24, 25, 26] # which index of scaling options to display if CHOICE_FIG_BAR_VARIANT==True fig1_index = [21] smartInit = False smartInitRandomize = False delta = 0.1 variant_vec = [1,2, 3] # for figure 1 variant_vec = [1] # for figure 2, to speed up calculations if CHOICE == 1: # ok n = 1000 d = 10 ymax2 = 0.24 elif CHOICE == 2: # ok n = 1000 d = 10 distribution = 'powerlaw' ymax2 = 0.24 elif CHOICE == 3: # ok n = 1000 d = 5 distribution = 'powerlaw' ymax2 = 0.4 elif CHOICE == 4: # ok n = 1000 d = 25 distribution = 'powerlaw' ymax2 = 0.16 elif CHOICE == 10: # ok d = 10 ymax2 = 0.1 elif CHOICE == 11: # (selection) d = 10 distribution = 'powerlaw' exponent = -0.5 ymax2 = 0.1 ymax1 = 0.14 elif CHOICE == 12: d = 3 ymax2 = 0.19 ymax1 = 0.2 elif CHOICE == 13: d = 25 ymax2 = 0.05 elif CHOICE == 14: # selection (for comparison) d = 25 distribution = 'powerlaw' ymax2 = 0.046 ymax1 = 0.08 elif CHOICE == 15: # selection d = 25 f = 0.05 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 0.095 elif CHOICE == 16: # selection TODO !!! d = 25 f = 0.01 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 0.4 ymin2 = 0 elif CHOICE == 17: # selection TODO !!! d = 25 f = 0.003 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0 elif CHOICE == 18: # selection TODO !!! d = 25 f = 0.001 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0 elif CHOICE == 20: h = 3 d = 10 ymax1 = 0.12 elif CHOICE == 21: # selection (for comparison against f=0.05: 26) h = 3 d = 10 distribution = 'powerlaw' exponent = -0.5 ymax1 = 0.15 ymax2 = 0.099 elif CHOICE == 22: # selection (for comparison with start from GT: 44) h = 3 d = 3 ymax1 = 0.25 ymax2 = 0.39 elif CHOICE == 23: # ok h = 3 d = 25 ymax1 = 0.1 ymax2 = 0.12 elif CHOICE == 24: h = 3 d = 25 distribution = 'powerlaw' ymax1 = 0.08 elif CHOICE == 25: # main selection h = 3 d = 25 f = 0.05 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 0.125 elif CHOICE == 26: # selection, #=200 h = 3 d = 10 f = 0.05 distribution = 'powerlaw' ymax1 = 0.21 ymax2 = 0.26 elif CHOICE == 27: # selection, #=200 d = 10 f = 0.05 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 0.21 elif CHOICE == 60: # ??? #=50 !!!, 50 more d = 25 f = 0.01 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 0.6 elif CHOICE == 61: # ??? #=50, 100 more d = 25 f = 0.005 distribution = 'powerlaw' ymax1 = 0.99 ymax2 = 0.99 elif CHOICE == 62: # ??? #=50, 150 more h = 3 d = 25 f = 0.01 distribution = 'powerlaw' ymax1 = 0.6 ymax2 = 0.6 elif CHOICE == 63: # ??? #=50, 150 more h = 3 d = 25 f = 0.005 distribution = 'powerlaw' ymax1 = 1.2 ymax2 = 1.0 # --- Randomization --- # randomized 22 elif CHOICE == 32: randomize = True h = 3 d = 3 ymax1 = 0.25 ymax2 = 0.4 # --- GT --- # version of 22 where GT is supplied to start optimiziation # just to check if the global optimum of the energy function actually corresponds to the GT elif CHOICE == 42: # selection, #=200 (for comparison with start from GT) initial_h0 = [0.2, 0.6, 0.2] # start optimization at optimal point h = 3 d = 3 ymax1 = 0.25 ymax2 = 0.39 # version of 15 where GT is supplied to start optimiziation elif CHOICE == 43: # selection, #=200 initial_h0 = [0.1, 0.8, 0.1] # start optimization at optimal point h = 8 d = 25 f = 0.05 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 0.095 # version of 12 where GT is supplied to start optimiziation elif CHOICE == 44: # selection, #=200 (for comparison) initial_h0 = [0.1, 0.8, 0.1] # start optimization at optimal point h = 8 d = 3 ymax1 = 0.2 ymax2 = 0.19 # version of 25 where GT is supplied to start optimiziation elif CHOICE == 45: # selection, #=200 h = 3 d = 25 f = 0.05 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 0.125 # version of 12 where GT is supplied to start optimiziation elif CHOICE == 46: # selection TODO !!! initial_h0 = [0.1, 0.8, 0.1] # start optimization at optimal point d = 25 f = 0.01 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 0.4 ymin2 = 0.0 # version of 12 where GT is supplied to start optimiziation elif CHOICE == 47: # selection TODO !!! initial_h0 = [0.1, 0.8, 0.1] # start optimization at optimal point d = 25 f = 0.003 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0.0 # version of 12 with smart init elif CHOICE == 48: # selection TODO !!! d = 25 f = 0.003 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0.0 smartInit = True # version of 12 with smart init elif CHOICE == 49: # selection TODO !!! d = 25 f = 0.003 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0.0 smartInit = True smartInitRandomize = True # initialize optimization at several random points for smart init only elif CHOICE == 50: # selection TODO !!! initial_h0 = [0.1, 0.8, 0.1] # start optimization at optimal point d = 25 f = 0.001 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0.0 elif CHOICE == 51: # selection TODO !!! d = 25 f = 0.001 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0.0 randomize = True # start optimization at several random points delta = 0.1 elif CHOICE == 52: # selection TODO !!! d = 25 f = 0.001 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0.0 randomize = True # start optimization at several random points delta = 0.2 elif CHOICE == 53: # selection TODO !!! d = 25 f = 0.001 distribution = 'powerlaw' ymax1 = 0.15 ymax2 = 1. ymin2 = 0.0 randomize = True # start optimization at several random points delta = 0.3 else: raise Warning("Incorrect choice!") k = 3 a = 1 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for r in range(1, rep+1): # print('Repetition {}'.format(r)) # -- Create graph W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # -- Create estimates and compare against GT, or against each other H_est={} for length in range(1, length + 1): for option in range(num_options): for variant in variant_vec: start = time.time() if smartInit: startWeight = 0.2 initial_H0 = estimateH(X1, W, method='DHE', variant=variant, distance=5, EC=EC[option], weights=startWeight, randomize=smartInitRandomize) H_est[variant] = estimateH(X1, W, method='DHE', variant=variant, distance=length, EC=EC[option], weights=weight[option], randomize=randomize, initial_h0=initial_h0, initial_H0=initial_H0, delta = delta ) time_est = time.time() - start diff = LA.norm(H_est[variant] - H0) tuple = [str(datetime.datetime.now())] text = [option, variant, length, diff, time_est] # text = np.asarray(text) # (without np, entries get ugly format) not used here because it transforms integers to float !! tuple.extend(text) save_csv_record(join(data_directory, csv_filename), tuple) # -- Compare against each other for variant1 in variant_vec: for variant2 in variant_vec: if variant1 < variant2: diff = LA.norm(H_est[variant1] - H_est[variant2]) tuple = [str(datetime.datetime.now())] text = [option, "{}-{}".format(variant1, variant2), length, diff, time_est] tuple.extend(text) save_csv_record(join(data_directory, csv_filename), tuple) if SHOW_FIG1: # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1 (length {}):\n{}".format(len(df1.index), df1.head(15))) df2 = df1.groupby(['option', 'variant', 'length']).agg \ ({'diff': [np.mean, np.std, np.size], # Multiple Aggregates 'time': [np.mean, np.std], }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'diff_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(90))) # -- Create one separate figure for each option for option in range(num_options): if option not in fig1_index: continue scaling = scaling_vec[option] fig_filename = 'Fig_MHE_Variants_{}_{}.pdf'.format(CHOICE, option) df3 = df2.query('option==@option') # Query df3 = pd.pivot_table(df3, index=['length'], columns=['variant'], values=['diff_mean', 'diff_std']) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values] # flatten the column hierarchy df3.reset_index(level=0, inplace=True) # get length into columns # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.drop(['diff_std_1-2', 'diff_std_1-3', 'diff_std_2-3', ], axis=1, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) # -- Setup figure mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 16 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams['xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Extract values into columns (plotting dataframew with bars plus error lines and lines gave troubles) l_vec = df3['length'].values # .tolist() does not work with bar plot, requires np.array diff_mean_1 = df3['diff_mean_1'].values diff_mean_2 = df3['diff_mean_2'].values diff_mean_3 = df3['diff_mean_3'].values diff_std_1 = df3['diff_std_1'].values diff_std_3 = df3['diff_std_2'].values diff_std_2 = df3['diff_std_3'].values # -- Draw the bar plots width = 0.2 # the width of the bars bar1 = ax.bar(l_vec-1.5*width, diff_mean_1, width, color='blue', yerr=diff_std_1, error_kw={'ecolor':'black', 'linewidth':2}, # error-bars colour label=r'1') bar2 = ax.bar(l_vec-0.5*width, diff_mean_2, width, color='darkorange', yerr=diff_std_2, error_kw={'ecolor':'black', 'linewidth':2}, # error-bars colour label=r'2') bar3 = ax.bar(l_vec+0.5*width, diff_mean_3, width, color='green', yerr=diff_std_1, error_kw={'ecolor':'black', 'linewidth':2}, # error-bars colour label=r'3') if CHOICE == 15 and option == 0: ax.annotate(np.round(diff_mean_1[1], 2), xy=(1.6, 0.15), xytext=(0.8, 0.122), arrowprops=dict(facecolor='black', arrowstyle="->"), ) # -- Legend handles, labels = ax.get_legend_handles_labels() # print("labels: {}".format(labels)) legend = plt.legend(handles, labels, loc='upper right', handlelength=2, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation title='Variants', borderaxespad=0.3, # distance between legend and the outer axes borderpad=0.1, # padding inside legend box ) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.8) # 0.8 # -- Title and figure settings if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' plt.title(r'$\!\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format(int(n / 1000), d, h, f, distribution_label)) # ax.set_xticks(range(10)) plt.grid(b=True, which='both', alpha=0.2, linestyle='solid', axis='y', linewidth=0.5) # linestyle='dashed', which='minor' plt.xlabel(r'Max path length ($\ell_{{\mathrm{{max}}}})$', labelpad=0) plt.ylabel(r'L2 norm', labelpad=0) if ymin1 is None: ymin1 = plt.ylim()[0] ymin1 = max(ymin1, 0) if ymax1 is None: ymax1 = plt.ylim()[1] plt.ylim(ymin1, ymax1) plt.xlim(0.5,5.5) plt.xticks([1, 2, 3, 4, 5]) plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off # labelbottom='off', # labels along the bottom edge are off ) plt.annotate(r'$\lambda={:g}$'.format(float(scaling)), xycoords = 'axes fraction', xy=(0.5, 0.9), ha="center", va="center") if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_FIG1: plt.show() if SHOW_PDF: os.system('{} "'.format(open_cmd[sys.platform]) + join(figure_directory, fig_filename) + '"') # shows actually created PDF if SHOW_FIG2: # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1 (length {}):\n{}".format(len(df1.index), df1.head(15))) df2 = df1.groupby(['option', 'variant', 'length']).agg \ ({'diff': [np.mean, np.std, np.size], # Multiple Aggregates 'time': [np.mean, np.std], }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'diff_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(90))) df2['length'] = df2['length'].astype(str) # transform numbers into string for later join: '.join(col).strip()' df3 = df2.query('variant=="1"') # We only focus on variant 1 (as close to row stochastic matrix as possible) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(n=20))) df4 = pd.pivot_table(df3, index=['option'], columns=['length'], values=['diff_mean', 'diff_std']) # Pivot # print("\n-- df4 (length {}):\n{}".format(len(df4.index), df4.head(30))) df4.columns = ['_'.join(col).strip() for col in df4.columns.values] # flatten the column hierarchy, requires to have only strings df4.reset_index(level=0, inplace=True) # get length into columns # print("\n-- df4 (length {}):\n{}".format(len(df4.index), df4.head(30))) # Add scaling factor for each row option = df4['option'].values # extract the values from dataframe scaling = scaling_vec[option] # look up the scaling factor in original list scaling = pd.Series(scaling) # print("scaling:\n{}".format(scaling)) df5 = df4.assign(scaling=scaling.values) # print("\n-- df5 (length {}):\n{}".format(len(df5.index), df5.head(30))) # Filter rows select_rows = [i for i in range(num_options) if EC[i]] # only those values for EC being tru df6 = df5[df5['option'].isin(select_rows)] # print("\n-- df6 (length {}):\n{}".format(len(df6.index), df6.head(30))) fig_filename = 'Fig_MHE_ScalingFactor_{}.pdf'.format(CHOICE) # -- Setup figure mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 14 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams['xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Extract values into columns (plotting dataframew with bars plus error lines and lines gave troubles) scaling = df6['scaling'].values # .tolist() does not work with bar plot, requires np.array diff_mean_1 = df6['diff_mean_1'].values diff_mean_2 = df6['diff_mean_2'].values diff_mean_3 = df6['diff_mean_3'].values diff_mean_4 = df6['diff_mean_4'].values diff_mean_5 = df6['diff_mean_5'].values diff_std_5 = df6['diff_std_5'].values # -- Draw the plots p1 = ax.plot(scaling, diff_mean_1, color='black', linewidth=1, linestyle='--', label=r'$\ell_\mathrm{max} = 1$') p2 = ax.plot(scaling, diff_mean_2, color='orange', label=r'$\ell_\mathrm{max} = 2$') p3 = ax.plot(scaling, diff_mean_3, color='blue', label=r'$\ell_\mathrm{max} = 3$') p4 = ax.plot(scaling, diff_mean_4, color='green', label=r'$\ell_\mathrm{max} = 4$') p5 = ax.plot(scaling, diff_mean_5, color='red', marker='o', label=r'$\ell_\mathrm{max} = 5$') plt.xscale('log') upper = diff_mean_5 + diff_std_5 lower = diff_mean_5 - diff_std_5 ax.fill_between(scaling, upper, lower, facecolor='red', alpha=0.2, edgecolor='none') # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' plt.title(r'$\!\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format(int(n / 1000), d, h, f, distribution_label)) handles, labels = ax.get_legend_handles_labels() # print("labels: {}".format(labels)) legend = plt.legend(handles, labels, loc='upper center', # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation # title='Variants', borderaxespad=0.3, # distance between legend and the outer axes borderpad=0.1, # padding inside legend box ) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings # ax.set_xticks(range(10)) plt.grid(b=True, which='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.xlabel(r'$\lambda$', labelpad=0) plt.ylabel(r'L$^2$ norm', labelpad=0) if xmin2 is None: xmin2 = plt.xlim()[0] if xmax2 is None: xmax2 = plt.xlim()[1] if ymin2 is None: ymin2 = plt.ylim()[0] ymin2 = max(ymin2, 0) if ymax2 is None: ymax2 = plt.ylim()[1] plt.xlim(xmin2, xmax2) plt.ylim(ymin2, ymax2) plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected # bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off right='off', # ticks along the top edge are off # labelbottom='off', # labels along the bottom edge are off ) if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_FIG2: plt.show() if SHOW_PDF: os.system('{} "'.format(open_cmd[sys.platform]) + join(figure_directory, fig_filename) + '"') # shows actually created PDF
def test_M_observed(): """Illustrate M_observed: non-backtracking or not Also shows that W^2 is denser for powerlaw graphs than uniform """ print( "\n-- test_M_observed(): 'M_observed', uses: 'planted_distribution_model_H' --" ) # --- Parameters for graph n = 3000 a = 1 h = 8 d = 10 # variant 2 d = 2 # variant 1 k = 3 distribution = 'powerlaw' # variant 2 distribution = 'uniform' # variant 1 exponent = -0.5 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) # --- Print results distance = 8 M_vec = M_observed(W, X0, distance=distance, NB=False) M_vec_EC = M_observed(W, X0, distance=distance, NB=True) print("Graph with n={} nodes and uniform d={} degrees".format(n, d)) print("\nSum of entries and first rows of M_vec (without NB)") for i, M in enumerate(M_vec): # M_vec[1:] to skip the first entry in list print("{}: {}, {}".format(i, np.sum(M), M[0])) print("\nSum of entries and first rows of M_vec (with NB)") for i, M in enumerate(M_vec_EC): print("{}: {}, {}".format(i, np.sum(M), M[0])) if True: print("\nFull matrices:") print("M_vec") for i, M in enumerate(M_vec): # skip the first entry in list print("{}: \n{}".format(i, M)) print("\nM_vec_EC") for i, M in enumerate(M_vec_EC): # skip the first entry in list print("{}: \n{}".format(i, M))
def test_gradient_optimization2(): print( "\n-- 'estimateH, define_gradient_energy_H, define_energy_H, uses: planted_distribution_model_H, H_observed, M_observed, --" ) # --- Parameters for graph n = 10000 a = 1 h = 2 d = 10 k = 7 distribution = 'powerlaw' exponent = -0.3 np.set_printoptions(precision=4) alpha0 = create_parameterized_alpha(k, a) H0 = create_parameterized_H(k, h, symmetric=True) f = 0.02 print("Graph n={}, d={}, f={}".format(n, d, f)) print("H0:\n{}".format(H0)) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # --- M_vec, H_vec statistics distance = 5 print("M_vec:") M_vec = M_observed(W, X1, distance=distance) for i, M in enumerate(M_vec): print("{}:\n{}".format(i, M)) print("\nH_vec_observed:") H_vec = H_observed(W, X1, distance=distance) for i, H in enumerate(H_vec): print("{}:\n{}".format(i, H)) # --- estimate_H based on distance 1 and uninformative point distance = 1 weights = [1, 0, 0, 0, 0] print( "\n= Estimate H based on X1 and distance={} from uninformative point:". format(distance)) h0 = np.ones(int(k * (k - 1) / 2)).dot( 1 / k) # use uninformative matrix to start with energy_H = define_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) start = time.time() H1 = estimateH(X1, W, distance=distance, weights=weights, gradient=False) time_est = time.time() - start print("Estimated H without gradient:\n{}".format(H1)) print("Time :{}".format(time_est)) e = energy_H(H1) print("Energy at estimated point: {}".format(e)) start = time.time() H2 = estimateH(X1, W, distance=distance, weights=weights, gradient=True) time_est = time.time() - start print("Estimated H with gradient:\n{}".format(H2)) print("Time :{}".format(time_est)) e = energy_H(H2) print("Energy at estimated point: {}".format(e)) G = gradient_energy_H(H2) h = derivative_H_to_h(G) print("Gradient matrix at estimated point:\n{}".format(G)) print("Gradient vector at estimated point:\n{}".format(h))
def test_gradient_optimization(): print( "\n-- 'estimateH, define_gradient_energy_H, define_energy_H, uses: planted_distribution_model_H, H_observed, M_observed, --" ) # --- Parameters for graph n = 1000 a = 1 h = 8 d = 25 k = 3 distribution = 'powerlaw' exponent = -0.3 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) f = 0.1 print("Graph n={}, d={}, f={}".format(n, d, f)) print("H0:\n{}".format(H0)) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # --- M_vec, H_vec statistics distance = 5 print("\nH_vec_observed:") H_vec = H_observed(W, X1, distance=distance) for i, H in enumerate(H_vec): print("{}:\n{}".format(i, H)) # --- estimate_H based on distance 1 print( "\n= Estimate H based on X1 and distance=1 (old without or with gradient):" ) distance = 1 weights = [1, 0, 0, 0, 0] start = time.time() H1 = estimateH(X1, W, distance=distance, weights=weights, gradient=False) time_est = time.time() - start print("Estimated H without gradient:\n{}".format(H1)) print("Time :{}".format(time_est)) start = time.time() H2 = estimateH(X1, W, distance=distance, weights=weights, gradient=True) time_est = time.time() - start print("Estimated H with gradient:\n{}".format(H2)) print("Time :{}".format(time_est)) # --- estimate_H based on distance 5 and uninformative point print( "\n= Estimate H based on X1 and distance=5 (ignoring distances 1-4) from various points (old without or with gradient):" ) print( "From uninformative point (all methods get stuck, even with gradient !!!:" ) distance = 5 weights = [0, 0, 0, 0, 1] h0 = np.ones(3).dot(1 / k) # use uninformative matrix to start with start = time.time() H1 = estimateH(X1, W, distance=distance, weights=weights, gradient=False) time_est = time.time() - start print("Estimated H without gradient:\n{}".format(H1)) print("Time :{}".format(time_est)) start = time.time() H2 = estimateH(X1, W, distance=distance, weights=weights, gradient=True) time_est = time.time() - start print("Estimated H with gradient:\n{}".format(H2)) print("Time :{}".format(time_est)) gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) g = gradient_energy_H(transform_hToH(h0, 3)) h = derivative_H_to_h(g) print("Gradient at uninformative point:\n{}".format(g)) print("Gradient at uninformative point: {}".format(h)) energy_H = define_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) e = energy_H(H2) print("Energy at estimated point: {}".format(e)) # --- estimate_H based on distance 5 and wrong point print( "\n= From wrong point (gradient method with BFGS can fix it, SLSQP stays stuck !!!" ) distance = 5 weights = [0, 0, 0, 0, 1] h0 = np.array([0.4, 0.3, 0.3]) start = time.time() H1 = estimateH(X1, W, distance=distance, weights=weights, gradient=False, initial_h0=h0) time_est = time.time() - start print("Estimated H without gradient:\n{}".format(H1)) print("Time :{}".format(time_est)) start = time.time() H2 = estimateH(X1, W, distance=distance, weights=weights, gradient=True, initial_h0=h0) time_est = time.time() - start print("Estimated H with gradient:\n{}".format(H2)) print("Time :{}".format(time_est)) gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) g = gradient_energy_H(transform_hToH(h0, 3)) h = derivative_H_to_h(g) print("Gradient at wrong point:\n{}".format(g)) print("Gradient at wrong point: {}".format(h)) energy_H = define_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) e = energy_H(H2) print("Energy at estimated point: {}".format(e)) # --- estimate_H based on distance 5 and some closer point print( "\n= From closer point (converges for BFGS, but not always for SLSQP!!!):" ) distance = 5 weights = [0, 0, 0, 0, 1] h0 = np.array([0.3, 0.4, 0.3]) start = time.time() H1 = estimateH(X1, W, distance=distance, weights=weights, gradient=False, initial_h0=h0) time_est = time.time() - start print("Estimated H without gradient:\n{}".format(H1)) print("Time :{}".format(time_est)) start = time.time() H2 = estimateH(X1, W, distance=distance, weights=weights, gradient=True, initial_h0=h0) time_est = time.time() - start print("Estimated H with gradient:\n{}".format(H2)) print("Time :{}".format(time_est)) gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) g = gradient_energy_H(transform_hToH(h0, 3)) h = derivative_H_to_h(g) print("Gradient at closer point:\n{}".format(g)) print("Gradient at closer point: {}".format(h)) energy_H = define_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) e = energy_H(H2) print("Energy at estimated point: {}".format(e)) # --- estimate_H based on distance 5 and some closer point print("\n= From even closer point:") distance = 5 weights = [0, 0, 0, 0, 1] h0 = np.array([0.2, 0.4, 0.2]) start = time.time() H1 = estimateH(X1, W, distance=distance, weights=weights, gradient=False, initial_h0=h0) time_est = time.time() - start print("Estimated H without gradient:\n{}".format(H1)) print("Time :{}".format(time_est)) start = time.time() H2 = estimateH(X1, W, distance=distance, weights=weights, gradient=True, initial_h0=h0) time_est = time.time() - start print("Estimated H with gradient:\n{}".format(H2)) print("Time :{}".format(time_est)) gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) g = gradient_energy_H(transform_hToH(h0, 3)) h = derivative_H_to_h(g) print("Gradient at closer point:\n{}".format(g)) print("Gradient at closer point: {}".format(h)) energy_H = define_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) e = energy_H(H2) print("Energy at estimated point: {}".format(e)) # --- estimate_H based on distance 5 and some closer point print( "\n= Variant with constraints (constraints only work with SLSQP !!!):") start = time.time() H2 = estimateH(X1, W, distance=distance, weights=weights, gradient=True, initial_h0=h0, constraints=True) time_est = time.time() - start print("Estimated H with gradient and constraints:\n{}".format(H2)) print("Time :{}".format(time_est)) e = energy_H(H2) print("Energy at estimated point: {}".format(e))
def test_planted_distribution_model(): """ Tests the main graph generator with statistics and visualized degree distribution and edge adjacency matrix """ print("\n--- 'planted_distribution_model_H', 'planted_distribution_model_P', 'number_of_connectedComponents', 'create_blocked_matrix_from_graph' --") CHOICE = 21 print("CHOICE:", CHOICE) debug = 0 # directed = True # !!! TODO: not yet clear what undirected means here, only P accepts directed backEdgesAllowed = True # ??? should be enforced in code sameInAsOutDegreeRanking = False distribution = 'powerlaw' exponent = -0.3 VERSION_P = True # --- AAAI figures --- if CHOICE in [1, 2, 3, 4, 5, 6]: n = 120 alpha0 = [1/6, 1/3, 1/2] h = 8 P = np.array([[1, h, 1], [1, 1, h], [h, 1, 1]]) if CHOICE == 1: # P (equivalent to 2), AAAI 2 m = 1080 elif CHOICE == 2: # H (equivalent to 1) H0 = row_normalize_matrix(P) d_vec = [18, 9, 6] VERSION_P = False elif CHOICE == 3: # H (equivalent to 4), AAAI 3 H0 = row_normalize_matrix(P) d_vec = 9 VERSION_P = False elif CHOICE == 4: # P (equivalent to 3) P = np.array([[1, h, 1], [2, 2, 2*h], [3*h, 3, 3]]) m = 1080 elif CHOICE == 5: # H (equivalent to 2), but backedges=False H0 = row_normalize_matrix(P) d_vec = [18, 9, 6] VERSION_P = False backEdgesAllowed = False elif CHOICE == 6: # P undirected, AAAI 4 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) directed = False backEdgesAllowed = False m = 540 # --- AGAIN DIRECTED --- if CHOICE == 12: n = 1001 alpha0 = [0.6, 0.2, 0.2] P = np.array([[0.1, 0.8, 0.1], [0.8, 0.1, 0.1], [0.1, 0.1, 0.8]]) m = 3000 distribution = 'uniform' # uniform powerlaw exponent = None backEdgesAllowed = False # ??? should be enforced in code if CHOICE == 13: # Nice for block matrix visualization n = 1000 alpha0 = [0.334, 0.333, 0.333] h = 2 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) m = 2000 distribution = 'uniform' # uniform powerlaw exponent = None backEdgesAllowed = False # ??? should be enforced in code if CHOICE == 14: n = 1000 alpha0 = [0.3334, 0.3333, 0.3333] h = 10 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) m = 10000 exponent = -0.55 # --- UNDIRECTED --- if CHOICE == 20: n = 100 alpha0 = [0.6, 0.2, 0.2] h = 1.4 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) H0 = row_normalize_matrix(P) d_vec = 5 directed = False exponent = -0.3 VERSION_P = False elif CHOICE == 21: n = 1001 alpha0 = [0.6, 0.2, 0.2] h = 4 P = np.array([[1, h, 1], [h, 1, 1], [1, 1, h]]) H0 = row_normalize_matrix(P) d_vec = 3.4 # don't specify vector for undirected distribution = 'uniform' # uniform powerlaw exponent = -0.5 directed = False backEdgesAllowed = True # ignored in code for undirected VERSION_P = False sameInAsOutDegreeRanking = True # ignored in code for undirected elif CHOICE == 22: n = 1000 m = 3000 alpha0 = [0.6, 0.2, 0.2] h = 4 P = np.array([[1, 3*h, 1], [2*h, 1, 1], [1, 1, h]]) distribution = 'uniform' # uniform powerlaw exponent = -0.5 directed = False backEdgesAllowed = False # ignored in code for undirected sameInAsOutDegreeRanking = True # ignored in code for undirected debug=0 VERSION_P = True H0 = row_normalize_matrix(P) # --- Create the graph start = time.time() if VERSION_P: W, Xd = planted_distribution_model(n, alpha=alpha0, P=P, m=m, distribution=distribution, exponent=exponent, directed=directed, backEdgesAllowed=backEdgesAllowed, sameInAsOutDegreeRanking=sameInAsOutDegreeRanking, debug=debug) else: W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d_vec, distribution=distribution, exponent=exponent, directed=directed, backEdgesAllowed=backEdgesAllowed, sameInAsOutDegreeRanking=sameInAsOutDegreeRanking, debug=debug) time_est = time.time()-start print("Time for graph generation: {}".format(time_est)) # - Undirectd degrees: In + Out W_und = W.multiply(W.transpose()) """if backEdgesAllowed then there can be edges in both directions.""" # W_und.data[:] = np.sign(W_und.data) # W contains weighted edges -> unweighted before counting edges with Ptot print("Fraction of edges that go in both directions: {}".format(np.sum(W_und.data) / np.sum(W.data))) # --- Statistics on created graph print("\n- 'calculate_Ptot_from_graph':") P_tot = calculate_Ptot_from_graph(W, Xd) print("P_tot:\n{}".format(P_tot)) print("sum(P_tot): {}".format(np.sum(P_tot))) print("P (normalized to sum=1):\n{}".format(1. * P_tot / np.sum(P_tot))) # Potential: normalized sum = 1 H = row_normalize_matrix(P_tot) print("H (row-normalized):\n{}".format(H)) print("\n- 'calculate_nVec_from_Xd':") n_vec = calculate_nVec_from_Xd(Xd) print("n_vec: {}".format(n_vec)) print("alpha: {}".format(1.*n_vec / sum(n_vec))) print("\n- Average Out/Indegree 'calculate_average_outdegree_from_graph' (assumes directed for total; for undirected the totals are incorrect):") print("Average outdegree: {}".format(calculate_average_outdegree_from_graph(W))) print("Average indegree: {}".format(calculate_average_outdegree_from_graph(W.transpose()))) print("Average total degree: {}".format(calculate_average_outdegree_from_graph(W + W.transpose()))) print("Average outdegree per class: {}".format(calculate_average_outdegree_from_graph(W, Xd))) print("Average indegree per class: {}".format(calculate_average_outdegree_from_graph(W.transpose(), Xd))) print("Average total degree per class: {}".format(calculate_average_outdegree_from_graph(W + W.transpose(), Xd))) # - Overall degree distribution: In / out print("\n- Overall Out/In/Total degree distribution 'calculate_outdegree_distribution_from_graph':") print("Overall Out and Indegree distribution:") d_out_vec_tot = calculate_outdegree_distribution_from_graph(W, Xd=None) d_in_vec_tot = calculate_outdegree_distribution_from_graph(W.transpose(), Xd=None) print("Outdegree distribution (degree / number):\n{}".format(np.array([d_out_vec_tot.keys(), d_out_vec_tot.values()]))) print("Indegree distribution (degree / number):\n{}".format(np.array([d_in_vec_tot.keys(), d_in_vec_tot.values()]))) # - Overall degree distribution: In + Out d_tot_vec_tot = calculate_outdegree_distribution_from_graph(W + W.transpose(), Xd=None) print("Total degree distribution (degree / number):\n{}".format(np.array([d_tot_vec_tot.keys(), d_tot_vec_tot.values()]))) # - Per-class degree distribution: In / out print("\n- Per-class Out/In/Total degree distribution 'calculate_outdegree_distribution_from_graph':") print("\nOutdegree distribution per class:") d_out_vec = calculate_outdegree_distribution_from_graph(W, Xd) for i in range(len(d_out_vec)): print("Class {}:".format(i)) print(np.array([d_out_vec[i].keys(), d_out_vec[i].values()])) print("Indegree distribution per class:") d_in_vec = calculate_outdegree_distribution_from_graph(W.transpose(), Xd) for i in range(len(d_in_vec)): print("Class {}:".format(i)) print(np.array([d_in_vec[i].keys(), d_in_vec[i].values()])) # - per-class degree distribution: In + out print("\nTotal degree distribution per class:") d_vec_und = calculate_outdegree_distribution_from_graph(W + W.transpose(), Xd) for i in range(len(d_vec_und)): print("Class {}:".format(i)) print(np.array([d_vec_und[i].keys(), d_vec_und[i].values()])) print("\n- number of weakly connected components':") print("Number of weakly connected components: {}".format(connected_components(W, directed=True, connection='weak', return_labels=False))) # --- convergence boundary # print("\n- '_out_eps_convergence_directed_linbp', 'eps_convergence_linbp'") # if directed: # eps_noEcho = _out_eps_convergence_directed_linbp(P, W, echo=False) # eps_Echo = _out_eps_convergence_directed_linbp(P, W, echo=True) # else: Hc = to_centering_beliefs(H) eps_noEcho = eps_convergence_linbp(Hc, W, echo=False) eps_Echo = eps_convergence_linbp(Hc, W, echo=True) print("Eps (w/ echo): {}".format(eps_Echo)) print("Eps (no echo): {}".format(eps_noEcho)) # --- Fig1: Draw edge distributions print("\n- Fig1: Draw degree distributions") params = {'backend': 'pdf', 'lines.linewidth': 4, 'font.size': 10, 'axes.labelsize': 24, # fontsize for x and y labels (was 10) 'axes.titlesize': 22, 'xtick.labelsize': 20, 'ytick.labelsize': 20, 'legend.fontsize': 8, 'figure.figsize': [5, 4], 'font.family': 'sans-serif' } mpl.rcdefaults() mpl.rcParams.update(params) fig = plt.figure(1) ax = fig.add_axes([0.15, 0.15, 0.8, 0.8]) # main axes ax.xaxis.labelpad = -12 ax.yaxis.labelpad = -12 # A: Draw directed degree distribution y_vec = [] for i in range(len(d_out_vec)): y = np.repeat(list(d_out_vec[i].keys()), list(d_out_vec[i].values()) ) # !!! np.repeat y = -np.sort(-y) y_vec.append(y) # print ("Class {}:\n{}".format(i,y)) y_tot = np.repeat(list(d_out_vec_tot.keys()), list(d_out_vec_tot.values())) # total outdegree y_tot = -np.sort(-y_tot) plt.loglog(range(1, len(y_vec[0])+1), y_vec[0], lw=4, color='orange', label=r"A out", linestyle='-') # !!! plot default index starts from 0 otherwise plt.loglog(range(1, len(y_vec[1])+1), y_vec[1], lw=4, color='blue', label=r"B out", linestyle='--') plt.loglog(range(1, len(y_vec[2])+1), y_vec[2], lw=4, color='green', label=r"C out", linestyle=':') plt.loglog(range(1, len(y_tot)+1), y_tot, lw=1, color='black', label=r"tot out", linestyle='-') # B: Draw second edge distribution of undirected degree distribution y_vec = [] for i in range(len(d_vec_und)): y = np.repeat(list(d_vec_und[i].keys()), list(d_vec_und[i].values()) ) # !!! np.repeat y = -np.sort(-y) y_vec.append(y) # print ("Class {}:\n{}".format(i,y)) y_tot = np.repeat(list(d_tot_vec_tot.keys()), list(d_tot_vec_tot.values())) # total outdegree y_tot = -np.sort(-y_tot) plt.loglog(range(1, len(y_vec[0])+1), y_vec[0], lw=4, color='orange', label=r"A", linestyle='-') plt.loglog(range(1, len(y_vec[1])+1), y_vec[1], lw=4, color='blue', label=r"B", linestyle='--') plt.loglog(range(1, len(y_vec[2])+1), y_vec[2], lw=4, color='green', label=r"C", linestyle=':') plt.loglog(range(1, len(y_tot)+1), y_tot, lw=1, color='black', label=r"tot", linestyle='-') plt.legend(loc='upper right', labelspacing=0) filename = 'figs/Fig_test_planted_distribution_model1_{}.pdf'.format(CHOICE) plt.savefig(filename, dpi=None, facecolor='w', edgecolor='w', orientation='portrait', papertype='letter', format='pdf', transparent=True, bbox_inches='tight', pad_inches=0.1, # frameon=None, # TODO: frameon deprecated ) os.system("open " + filename) # --- Fig2: Draw block matrix print("\n- Fig2: 'create_blocked_matrix_from_graph'") W_new, Xd_new = create_blocked_matrix_from_graph(W, Xd) fig = plt.figure(2) row, col = W_new.nonzero() # transform the sparse W back to row col format plt.plot(col, row, 'o', color='r', markersize=2, markeredgewidth=2, lw=0, zorder=3) # Notice (col, row) because first axis is vertical in matrices # plt.matshow(W_new.todense(), cmap=plt.cm.Greys) # cmap=plt.cm.gray / Blues # alternative that does not work as well plt.gca().invert_yaxis() # invert the y-axis to start on top and go down # Show quadrants d1 = alpha0[0] * n d2 = (alpha0[0] + alpha0[1]) * n plt.grid(which='major', color='0.7', linestyle='-', linewidth=1) plt.xticks([0, d1, d2, n]) plt.yticks([0, d1, d2, n]) plt.xlabel('to', labelpad=-1) plt.ylabel('from', rotation=90, labelpad=0) frame = plt.gca() # frame.axes.xaxis.set_ticklabels([]) # would hide the labels # frame.axes.yaxis.set_ticklabels([]) frame.tick_params(direction='inout', width=1, length=10) filename = 'figs/Fig_test_planted_distribution_model2_{}.pdf'.format(CHOICE) plt.savefig(filename, dpi=None, facecolor='w', edgecolor='w', orientation='portrait', papertype='letter', format='pdf', transparent=True, bbox_inches='tight', pad_inches=0.1) os.system("open " + filename)
def run(choice, variant, create_data=False, show_plot=False, create_pdf=False, show_pdf=False, append_data=False): """main parameterized method to produce all figures. Can be run from external jupyther notebook or method to produce all figures, optionally as PDF CHOICE uses a different saved experimental run VARIANT uses a different wayt o plot """ # %% -- Setup CREATE_DATA = create_data APPEND_DATA = append_data # allows to add more data, requires CREATE_DATA to be true CHOICE = choice VARIANT = variant SHOW_PLOT = show_plot CREATE_PDF = create_pdf SHOW_PDF = show_pdf BOTH = True # show both figures for W and H SHOW_TITLE = True # show parameters in title of plot f = 1 # fraction of labeled nodes for H estimation csv_filename = 'Fig_Scaling_Hrow_{}.csv'.format(CHOICE) fig_filename = 'Fig_Scaling_Hrow_{}-{}.pdf'.format(CHOICE, VARIANT) plot_colors = ['darkorange', 'blue'] header = ['currenttime', 'choice', # W, or H 'l', 'time'] if CREATE_DATA and not APPEND_DATA: save_csv_record(join(data_directory, csv_filename), header, append=APPEND_DATA) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # %% -- Default parameters n = 10000 ymax = 10 h = 3 d = 10 # actual degree is double distribution = 'uniform' exponent = None # %% -- CHOICES and VARIANTS if CHOICE == 1: W_repeat = [0, 0, 30, 5, 3, 1] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 4.3 W_annotate_y = 1 H_annotate_x = 6 H_annotate_y = 0.005 elif CHOICE == 2: # small exponent 3, does not show the advantage well d = 3 W_repeat = [0, 0, 10, 5, 5, 5, 5, 5, 5] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 5 W_annotate_y = 0.08 H_annotate_x = 6.5 H_annotate_y = 0.004 elif CHOICE == 3: # small exponent 2, does not show the advantage well d = 2 W_repeat = [0, 0, 50, 50, 50, 50, 50, 50, 50] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 6.5 W_annotate_y = 0.02 H_annotate_x = 6.5 H_annotate_y = 0.004 elif CHOICE == 4: distribution = 'powerlaw' exponent = -0.5 W_repeat = [0, 0, 50, 9, 5, 3] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 4 W_annotate_y = 1 H_annotate_x = 6.5 H_annotate_y = 0.006 if VARIANT == 1: plot_colors = ['blue', 'darkorange'] SHOW_TITLE = False if VARIANT == 2: plot_colors = ['blue', 'darkorange'] BOTH = False SHOW_TITLE = False elif CHOICE == 5: distribution = 'powerlaw' exponent = -0.5 W_repeat = [0, 0, 1, 1] # index starts with 0. useful only for W^2 and later H_repeat = [0] + [1] * 8 W_annotate_x = 4 W_annotate_y = 1 H_annotate_x = 6.5 H_annotate_y = 0.006 elif CHOICE == 11: W_repeat = [0, 0, 1, 1, 0, 0] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 4.3 W_annotate_y = 1 H_annotate_x = 6 H_annotate_y = 0.005 elif CHOICE == 12: W_repeat = [0, 0, 31, 11, 5, 3, 3, 3, 3] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] W_annotate_x = 4.3 W_annotate_y = 2.5 H_annotate_x = 5.5 H_annotate_y = 0.004 f = 0.1 plot_colors = ['blue', 'darkorange'] ymax = 100 if VARIANT == 1: # TODO: when trying to add additional data, then it creates 7 instead of 4 rows, # but the same code idea of CREATE vs ADD data appears to work in Fig_MHE_Optimal_Lambda, for that to replicate run below # run(12, 1, create_pdf=True, show_pdf=True, create_data=False, append_data=True) W_repeat = [0, 0, 0, 0, 0, 0, 0, 0, 0] # index starts with 0. useful only for W^2 and later H_repeat = [0, 50, 50, 50, 50, 50, 50, 50, 50] else: raise Warning("Incorrect choice!") # %% -- Create data if CREATE_DATA or APPEND_DATA: # Create graph k = 3 a = 1 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) start = time.time() W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) time_calc = time.time() - start # print("\nTime for graph:{}".format(time_calc)) # print("Average outdegree: {}".format(calculate_average_outdegree_from_graph(W))) # Calculations W for length, rep in enumerate(W_repeat): for _ in range(rep): start = time.time() if length == 2: result = W.dot(W) elif length == 3: result = W.dot(W.dot(W)) # naive enumeration used as nothing can be faster elif length == 4: result = W.dot(W.dot(W.dot(W))) elif length == 5: result = W.dot(W.dot(W.dot(W.dot(W)))) elif length == 6: result = W.dot(W.dot(W.dot(W.dot(W.dot(W))))) elif length == 7: result = W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W)))))) elif length == 8: result = W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W))))))) elif length == 9: result = W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W.dot(W)))))))) time_calc = time.time() - start tuple = [str(datetime.datetime.now())] text = ['W', length, time_calc] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("W, d: {}, time: {}".format(length, time_calc)) save_csv_record(join(data_directory, csv_filename), tuple) # Calculations H_NB for length, rep in enumerate(H_repeat): for _ in range(rep): X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) start = time.time() result = H_observed(W, X=X1, distance=length, NB=True, variant=1) time_calc = time.time() - start tuple = [str(datetime.datetime.now())] text = ['H', length, time_calc] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("H, d: {}, time: {}".format(length, time_calc)) save_csv_record(join(data_directory, csv_filename), tuple) # Calculate and display M statistics for length, _ in enumerate(H_repeat): M = M_observed(W, X=X0, distance=length, NB=True) M = M[-1] s = np.sum(M) # print("l: {}, sum: {:e}, M:\n{}".format(length, s, M)) # %% -- Read, aggregate, and pivot data df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1 (length {}):\n{}".format(len(df1.index), df1.head(15))) df2 = df1.groupby(['choice', 'l']).agg \ ({'time': [np.max, np.mean, np.median, np.min, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(30))) df3 = pd.pivot_table(df2, index=['l'], columns=['choice'], values='time_median', ) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) #%% -- Setup figure mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 20 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams['xtick.major.pad'] = 6 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) #%% -- Draw the plot and annotate df4 = df3['H'] # print("\n-- df4 (length {}):\n{}".format(len(df4.index), df4.head(30))) Y1 = df3['W'].plot(logy=True, color=plot_colors[0], marker='o', legend=None, clip_on=False, # cut off data points outside of plot area # zorder=3 ) # style='o', kind='bar', style='o-', plt.annotate(r'$\mathbf{W}^\ell$', xy=(W_annotate_x, W_annotate_y), color=plot_colors[0], ) if BOTH: Y2 = df3['H'].plot(logy=True, color=plot_colors[1], marker='o', legend=None, clip_on=False, # cut off data points outside of plot area zorder=3 ) # style='o', kind='bar', style='o-', plt.annotate(r'$\mathbf{\hat P}_{\mathrm{NB}}^{(\ell)}$', xy=(H_annotate_x, H_annotate_y), color=plot_colors[1], ) if SHOW_TITLE: plt.title(r'$\!\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}$'.format(int(n / 1000), 2 * d, h, f)) # %% -- Figure settings & plot plt.grid(b=True, which='both', alpha=0.2, linestyle='solid', axis='y', linewidth=0.5) # linestyle='dashed', which='minor' plt.xlabel(r'Path length ($\ell$)', labelpad=0) plt.ylabel(r'$\!$Time [sec]', labelpad=1) plt.ylim(0.001, ymax) # placed after yticks plt.xticks(range(1, 9)) if SHOW_PLOT: plt.show() if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, # frameon=None ) if SHOW_PDF: # os.system('{} "'.format(open_cmd[sys.platform]) + join(figure_directory, fig_filename) + '"') # shows actually created PDF showfig(join(figure_directory, fig_filename)) # shows actually created PDF # TODO replace with this method
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False, show_arrows=False): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot SHOW_PDF = show_pdf CREATE_PDF = create_pdf SHOW_STD = True ## FALSE for just scatter plot points SHOW_ARROWS = show_arrows # -- Default Graph parameters rep_SameGraph = 1 # iterations on same graph distribution = 'powerlaw' exponent = -0.3 length = 5 variant = 1 EC = False numberOfSplits = 1 scaling_vec = [None]*10 ymin = 0.3 ymax = 1 xmin = 1e-3 xmax = 1e3 xtick_lab = [1e-3, 0.01, 0.1, 1, 10, 100, 1000] xtick_labels = [r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$10^{2}$', r'$10^{3}$'] ytick_lab = np.arange(0, 1.1, 0.1) k = 3 a = 1 rep_DifferentGraphs = 1 # iterations on different graphs err = 0 avoidNeighbors = False convergencePercentage_W = 0.99 facecolor_vec = ["#4C72B0", "#55A868", "#8172B2", "#C44E52", "#CCB974", "#64B5CD"] label_vec = ['MCE', 'LCE', 'DCE', 'Holdout'] linewidth_vec = [4, 3, 1, 2, 2, 1] # clip_ons = [True, True, True, True, True, True] FILEZNAME = 'Fig_timing_accuracy_learning' marker_vec = ['s', '^', 'v', 'o', 'x', '+', 'None'] #'^' length_vec = [5] stratified = True f = 0.01 numMaxIt_vec = [10]*7 alpha_vec = [0] * 7 beta_vec = [0] * 7 # TODO: LinBP does not use beta. Also SSLH uses alpha, but not beta for W^row! Now fixed gamma_vec = [0] * 7 s_vec = [0.5] * 7 # -- Main Options if CHOICE == 1: # Main graph n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8] elif CHOICE == 2: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'GS'] randomize_vec = [False]*3 + [True] + [None] scaling_vec = [None]*2 + [10, 100] + [None] elif CHOICE == 3: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'GS'] randomize_vec = [False]*3 + [True] + [None] scaling_vec = [None]*2 + [10, 100] + [None] f = 0.02 elif CHOICE == 4: # TODO: Overnight Wolfgang n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8, 16] elif CHOICE == 5: # Toy graph with 100 nodes n = 100 h = 3 d = 8 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8] f=0.05 elif CHOICE == 6: # To be run by Prakhar on Cluster n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCEr', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8] f=0.003 xmin = 1e-2 # ymax = 0.9 ymin = 0.2 ymax = 0.9 xmin = 1e-2 xmax = 1e3 elif CHOICE == 7: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8, 16] f=0.009 # elif CHOICE == 8: # not working well # n = 1000 # h = 3 # d = 25 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] # learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] # label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] # randomize_vec = [False]*3 + [True] + [None]*2 # scaling_vec = [None]*2 + [10, 100] + [None]*2 # splits_vec = [1, 2, 4, 8, 16] # f=0.005 else: raise Warning("Incorrect choice!") csv_filename = '{}_{}.csv'.format(FILEZNAME, CHOICE) header = ['currenttime', 'option', 'lensplit', 'f', 'accuracy', 'timetaken'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) H0c = to_centering_beliefs(H0) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for i in range(rep_DifferentGraphs): # create several graphs with same parameters # print("\ni: {}".format(i)) W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for j in range(rep_SameGraph): # repeat several times for same graph # print("j: {}".format(j)) ind = None X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified = stratified) # TODO: stratified sampling option = True X2 = introduce_errors(X1, ind, err) for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weight, randomize, option) in \ enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, scaling_vec, randomize_vec, option_vec)): # weight = np.array([np.power(scaling, i) for i in range(5)]) # TODO: now enough to specify weight as a scalar! H_est_dict = {} timeTaken_dict = {} # -- Learning if learning_method == 'Holdout' : for numberOfSplits in splits_vec: prev_time = time.time() H_est_dict[numberOfSplits] = estimateH_baseline_serial(X2, ind, W, numMax=numMaxIt, # ignore_rows=ind, numberOfSplits=numberOfSplits, # method=learning_method, variant=1, distance=length, EC=EC, weights=weight, alpha=alpha, beta=beta, gamma=gamma) timeTaken = time.time() - prev_time timeTaken_dict[numberOfSplits] = timeTaken elif learning_method in ['LHE', 'MHE', 'DHE']: # TODO: no smartInit, just randomization as option for length in length_vec: prev_time = time.time() H_est_dict[length] = estimateH(X2, W, method=learning_method, variant=1, randomize=randomize, distance=length, EC=EC, weights=weight) timeTaken = time.time() - prev_time timeTaken_dict[length] = timeTaken elif learning_method == 'GS': H_est_dict['GS'] = H0 for key in H_est_dict: H_est = H_est_dict[key] H2c = to_centering_beliefs(H_est) # print("H_estimated by {} is \n".format(learning_method), H_est) # print("H0 is \n", H0) # print("randomize was: ", randomize) # Propagation X2c = to_centering_beliefs(X2, ignoreZeroRows=True) # try without eps_max = eps_convergence_linbp_parameterized(H2c, W, method='noecho', alpha=alpha, beta=beta, gamma=gamma, X=X2) eps = s * eps_max # print("Max Eps ", eps_max) try: F, actualIt, actualPercentageConverged = \ linBP_symmetric_parameterized(X2, W, H2c * eps, method='noecho', alpha=alpha, beta=beta, gamma=gamma, numMaxIt=numMaxIt, convergencePercentage=convergencePercentage_W, convergenceThreshold=0.99, debug=2) except ValueError as e: print( "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h)) else: accuracy_X = matrix_difference(X0, F, ignore_rows=ind) tuple = [str(datetime.datetime.now())] if learning_method == 'Holdout': text = [option,"split{}".format(key), f, accuracy_X, timeTaken_dict[key]] elif learning_method in ['MHE', 'DHE', 'LHE']: text = [option, "len{}".format(key), f, accuracy_X, timeTaken_dict[key]] elif learning_method == 'GS': text = [option, 0, f, accuracy_X, 0] tuple.extend(text) # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option, f, actualIt, accuracy_X)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # Aggregate repetitions df2 = df1.groupby(['option', 'lensplit', 'f']).agg \ ({'accuracy': [np.mean, np.std, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'accuracy_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) df3 = df1.groupby(['option', 'lensplit', 'f']).agg({'timetaken': [np.median] }) df3.columns = ['_'.join(col).strip() for col in df3.columns.values] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # resultdf3 = df3.sort(['timetaken'], ascending=1) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(15))) X_time_median_dict = {} Y_acc_dict = {} Y_std_dict = {} for option in option_vec: Y_acc_dict[option] = df2.loc[(df2['option'] == option), "accuracy_mean"].values Y_std_dict[option] = df2.loc[(df2['option'] == option), "accuracy_std"].values X_time_median_dict[option] = df3.loc[(df3['option'] == option), "timetaken_median"].values # print("option: ", option) # print("Y_acc_dict[option]: ", Y_acc_dict[option]) # print("Y_std_dict[option]: ", Y_std_dict[option]) # print("X_time_median_dict[option]: ", X_time_median_dict[option]) # -- Setup figure fig_filename = '{}_{}.pdf'.format(FILEZNAME, CHOICE) mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']}) mpl.rcParams['axes.labelsize'] = 18 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) SHOW_ARROWS = True for choice, color, learning_method, label, linewidth, marker in \ zip(option_vec, facecolor_vec, learning_method_vec, label_vec, linewidth_vec, marker_vec): if learning_method == 'Holdout': # Draw std X1 = X_time_median_dict[choice] s = X1.argsort() X1 = X1[s] Y1 = Y_acc_dict[choice][s] Y2 = Y_std_dict[choice][s] if SHOW_STD: ax.fill_between(X1, Y1 + Y2, Y1 - Y2, facecolor=color, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X1, Y1 + Y2, linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X1, Y1 - Y2, linewidth=0.5, color='0.8', linestyle='solid') ax.set_ylim(bottom=ymin) ax.plot(X1, Y1, linewidth=linewidth, color=color, linestyle='solid', label=label, zorder=20, marker='x', markersize=linewidth + 5, markeredgewidth=1) ax.annotate(np.round(X1[1], decimals=1), xy=(X1[1], Y1[1] - 0.05), color=color, va='center', annotation_clip=False, zorder=5) else: ax.scatter(list(X1), list(Y1), color=color, label=label, marker='x', s=42) elif learning_method == 'GS': ax.plot([1e-4, 1e4], [Y_acc_dict[choice], Y_acc_dict[choice]], linewidth=1, color='black', linestyle='dashed', zorder=0, marker=None, label=label, ) else: # For all other if SHOW_STD: ax.errorbar(list(X_time_median_dict[choice]), list(Y_acc_dict[choice]), yerr=Y_std_dict[choice], fmt='-o', linewidth=2, color=color, label=label, marker=marker, markersize=8) ax.annotate(np.round(X_time_median_dict[choice], decimals=2), xy=(X_time_median_dict[choice], Y_acc_dict[choice]-0.05), color=color, va='center', annotation_clip=False, zorder=5) else: ax.scatter(list(X_time_median_dict[choice]), list(Y_acc_dict[choice]), color=color, label=label, marker=marker, s=42) if SHOW_ARROWS: dce_opt = 'opt4' holdout_opt = 'opt5' ax.annotate(s='', xy=(X_time_median_dict[dce_opt], Y_acc_dict[dce_opt]-0.3), xytext=(X_time_median_dict[holdout_opt][2]+0.02, Y_acc_dict[dce_opt]-0.3), arrowprops=dict(arrowstyle='<->')) ax.annotate(str(int(np.round(X_time_median_dict[holdout_opt][2] / X_time_median_dict[dce_opt]))) + 'x', xy=((X_time_median_dict[dce_opt] + X_time_median_dict[holdout_opt][2])/100, Y_acc_dict[dce_opt]-0.28), color='black', va='center', # bbox = dict(boxstyle="round,pad=0.3", fc="w"), annotation_clip=False, zorder=5) # -- Title and legend title(r'$\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}$'.format(int(n / 1000), d, h, f)) handles, label_vec = ax.get_legend_handles_labels() for i, (h, learning_method) in enumerate(zip(handles, learning_method_vec)): # remove error bars in legend if isinstance(handles[i], collections.Container): handles[i] = handles[i][0] # plt.legend(loc='upper left', numpoints=1, ncol=3, fontsize=8, bbox_to_anchor=(0, 0)) SHOW_STD = False legend = plt.legend(handles, label_vec, loc='upper right', # 'upper right' handlelength=2, fontsize=12, labelspacing=0.2, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once ) if not(SHOW_STD): legend = plt.legend(handles, label_vec, loc='upper right', # 'upper right' handlelength=2, fontsize=10, labelspacing=0.2, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once scatterpoints=1 # display only one-scatter point in legend ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.xscale('log') plt.xticks(xtick_lab, xtick_labels) plt.yticks(ytick_lab, ytick_lab) ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f')) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') ax.set_ylim(bottom=ymin) grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlim(xmin, xmax) ylim(ymin, ymax) xlabel(r'Time Median (sec)', labelpad=0) # labelpad=0 ylabel(r'Accuracy', labelpad=0) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PDF: showfig(join(figure_directory, fig_filename)) if SHOW_PLOT: plt.show()
def test_H_observed(): """Illustrate H_observed""" print( "\n\n-- test_H_observed(): 'H_observed', uses: 'planted_distribution_model_H' --" ) # --- Parameters for graph n = 3000 a = 1 h = 8 d = 2 k = 3 distribution = 'uniform' alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=None, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) # --- Print first rows of matrices distance = 8 print("First rows of powers of H0:") for k in range(1, distance + 1): print("{}: {}".format(k, np.linalg.matrix_power(H0, k)[0])) H_vec = H_observed(W, X0, distance=distance, NB=False) H_vec_EC = H_observed(W, X0, distance=distance, NB=True) print("First rows of H_vec (without NB)") for i, H in enumerate(H_vec): # skip the first entry in list print("{}: {}".format(i + 1, H[0])) print("First rows of H_vec (with NB)") for i, H in enumerate(H_vec_EC): print("{}: {}".format(i + 1, H[0])) # --- Print just the top entry in first row (easier to compare) h_vec = [] for k in range(1, distance + 1): h_vec.append(np.max(np.linalg.matrix_power(H0, k)[0])) hrow_vec = [] for H in H_vec: hrow_vec.append(np.max(H[0])) hrow_EC_vec = [] for H in H_vec_EC: hrow_EC_vec.append(np.max(H[0])) print("\nh_vec:\n{}".format(np.around(h_vec, 3))) print("hrow_vec (estimated without NB):\n{}".format(np.around(hrow_vec, 3))) print("hrow_EC_vec (estimated with NB):\n{}".format( np.around(hrow_EC_vec, 3)))
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, show_fig=True): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot CREATE_PDF = create_pdf SHOW_PDF = show_pdf SHOW_FIG1 = show_fig SHOW_FIG2 = show_fig csv_filename = 'Fig_MHE_Optimal_ScalingFactor_f_lambda10_{}.csv'.format( CHOICE) header = [ 'currenttime', 'option', # one option corresponds to one choice of weight vector. In practice, one choice of scaling factor (for weight vector) 'f', 'scaling', 'diff' ] # L2 norm between H and estimate if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters rep = 100 randomize = False initial_h0 = None # initial vector to start finding optimal H distribution = 'powerlaw' exponent = -0.3 rep_differentGraphs = 1 EC = True f_vec = [0.9 * pow(0.1, 1 / 12)**x for x in range(42)] fraction_of_minimum = 1.1 # scaling parameters that lead to optimum except for this scaling factor are included ymin2 = 0.28 ymax2 = 500 xmin = 0.001 # xmin = 0.0005 xmax = None xtick_lab = [0.001, 0.01, 0.1, 1] # ytick_lab1 = np.arange(0, 1, 0.1) ytick_lab2 = [0.3, 1, 10, 100, 1000] ymax1 = 1.2 ymin1 = 0.001 # ytick_lab1 = [0.001, 0.01, 0.1, 1] k = 3 a = 1 stratified = True gradient = False n = 10000 # color_vec = ['blue', 'orange', 'red'] color_vec = ["#4C72B0", "#55A868", "#C44E52", "#CCB974", "#64B5CD"] color_vec = ["#4C72B0", "#8172B2", "#C44E52"] # label_vec = [r'$\tilde {\mathbf{H}}$', r'$\tilde{\mathbf{H}}^{(5)}_{\mathrm{NB}}$', r'$\tilde {\mathbf{H}}^{(5)}_{\mathrm{NB}}$ r'] label_vec = ['MCE', 'DCE', 'DCEr'] marker_vec = ['s', 'x', 'o'] legendPosition = 'upper right' # -- Options if CHOICE == 11: h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 12: h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 13: h = 8 d = 10 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 14: h = 3 d = 10 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 15: h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 100] randomize_vec = [False, False, True] length_vec = [1, 5, 5] # elif CHOICE == 16: # n = 10000 # h = 3 # d = 10 # option_vec = ['opt1', 'opt2', 'opt3'] # scaling_vec = [0, 50, 50] # randomize_vec = [False, False, True] # length_vec = [1, 5, 5] elif CHOICE == 17: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 100] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 18: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 10] randomize_vec = [False, False, True] length_vec = [1, 5, 5] # -- Options elif CHOICE == 19: h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 100] randomize_vec = [False, False, True] length_vec = [1, 5, 5] elif CHOICE == 20: h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] scaling_vec = [0, 10, 100] randomize_vec = [False, False, True] length_vec = [1, 5, 5] gradient = True legendPosition = 'center right' else: raise Warning("Incorrect choice!") alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for rs in range(1, rep_differentGraphs + 1): # print('Graph {}'.format(rs)) # -- Create graph W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for r in range(1, rep + 1): # print('Repetition {}'.format(r)) for f in f_vec: # -- Sample labeled data X1, ind = replace_fraction_of_rows(X0, 1 - f, stratified=stratified) # -- Calculate number of labeled neighbors M_vec = M_observed(W, X1, distance=5, NB=True) M = M_vec[1] num_N = np.sum(M) # print("f={:1.4f}, number labeled neighbors={}".format(f, num_N)) # print("M_vec:\n{}".format(M_vec)) # -- Create estimates and compare against GT for option, scaling, randomize, length in zip( option_vec, scaling_vec, randomize_vec, length_vec): H_est = estimateH(X1, W, method='DHE', variant=1, distance=length, EC=EC, weights=scaling, randomize=randomize, initial_H0=initial_h0, gradient=gradient) diff = LA.norm(H_est - H0) tuple = [str(datetime.datetime.now())] text = [option, f, scaling, diff] tuple.extend(text) save_csv_record(join(data_directory, csv_filename), tuple) # print("diff={:1.4f}, H_est:\n{}".format(diff, H_est)) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # Aggregate repetitions df2 = df1.groupby(['option', 'f']).agg \ ({'diff': [np.mean, np.std, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values ] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'diff_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) # Pivot table df3 = pd.pivot_table(df2, index=['f'], columns=['option'], values=['diff_mean', 'diff_std']) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values ] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) # Extract values X_f = df3['f'].values # plot x values Y = [] Y_std = [] for option in option_vec: Y.append(df3['diff_mean_{}'.format(option)].values) Y_std.append(df3['diff_std_{}'.format(option)].values) # print("X_f:\n", X_f) # print("Y:\n", Y) # print("Y_std:\n", Y_std) if SHOW_FIG1: # -- Setup figure fig_filename = 'Fig_MHE_Optimal_ScalingFactor_diff_f_lambda10_{}.pdf'.format( CHOICE) mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 14 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 16 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams[ 'xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams[ 'ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Draw the plots for i, (color, marker) in enumerate(zip(color_vec, marker_vec)): p = ax.plot(X_f, Y[i], color=color, linewidth=3, label=label_vec[i], marker=marker) if i != 1: ax.fill_between(X_f, Y[i] + Y_std[i], Y[i] - Y_std[i], facecolor=color, alpha=0.2, edgecolor='none') plt.xscale('log') plt.yscale('log') # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' plt.title(r'$\!\!\!n\!=\!{}\mathrm{{k}}, h\!=\!{}, d\!=\!{}{}'.format( int(n / 1000), h, d, distribution_label)) handles, labels = ax.get_legend_handles_labels() legend = plt.legend( handles, labels, loc=legendPosition, # 'upper right' handlelength=1.5, labelspacing=0, # distance between label entries handletextpad= 0.3, # distance between label and the line representation # title='Variants', borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.1, # padding inside legend box ) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.xticks(xtick_lab, xtick_lab) # plt.yticks(ytick_lab1, ytick_lab1) plt.grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.grid(b=True, which='major', axis='y', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.xlabel(r'Label Sparsity $(f)$', labelpad=0) # labelpad=0 plt.ylabel(r'L2 norm', labelpad=-5) if xmin is None: xmin = plt.xlim()[0] if xmax is None: xmax = plt.xlim()[1] if ymin1 is None: ymin1 = plt.ylim()[1] if ymax1 is None: ymax1 = plt.ylim()[1] plt.xlim(xmin, xmax) plt.ylim(ymin1, ymax1) if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_FIG1: plt.show() if SHOW_PDF: os.system('{} "'.format(open_cmd[sys.platform]) + join(figure_directory, fig_filename) + '"') # shows actually created PDF
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot SHOW_PDF = show_pdf CREATE_PDF = create_pdf SHOW_ARROWS = False STD_FILL = False CALCULATE_DATA_STATISTICS = False csv_filename = 'Fig_timing_VaryK_{}.csv'.format(CHOICE) header = ['currenttime', 'option', 'k', 'f', 'time'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters rep_SameGraph = 2 # iterations on same graph initial_h0 = None # initial vector to start finding optimal H distribution = 'powerlaw' exponent = -0.3 length = 5 variant = 1 EC = True # Non-backtracking for learning ymin = 0.0 ymax = 1 xmin = 2 xmax = 7.5 xtick_lab = [2, 3, 4, 5, 6, 7, 8] xtick_labels = ['2', '3', '4', '5', '6', '7', '8'] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 50] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$50$' ] f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)] k_vec = [3, 4, 5] rep_DifferentGraphs = 1000 # iterations on different graphs err = 0 avoidNeighbors = False gradient = False convergencePercentage_W = None stratified = True label_vec = ['*'] * 10 clip_on_vec = [True] * 15 draw_std_vec = range(10) numberOfSplits = 1 linestyle_vec = ['solid'] * 15 linewidth_vec = [3, 2, 4, 2, 3, 2] + [3] * 15 marker_vec = ['^', 's', 'o', 'x', 'o', '+', 's'] * 3 markersize_vec = [8, 7, 8, 10, 7, 6] + [10] * 10 facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#64B5CD" ] legend_location = 'upper right' # -- Options with propagation variants if CHOICE == 600: ## 1k nodes n = 1000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['GT', 'MHE', 'DHE', 'Holdout'] weight_vec = [10] * 4 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] xmin = 3. xmax = 10. ymin = 0. ymax = 50. label_vec = ['GT', 'MCE', 'DCE', 'Holdout'] facecolor_vec = [ 'black' ] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 4 f_vec = [0.03, 0.01, 0.001] k_vec = [3, 4, 5, 6] ytick_lab = [0, 1e-3, 1e-2, 1e-1, 1, 10, 50] ytick_labels = [ r'$0$', r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$50$' ] elif CHOICE == 601: ## 10k nodes n = 10000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['GT', 'MHE', 'DHE', 'Holdout'] weight_vec = [10] * 4 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 15 + [True] xmin = 3. xmax = 8. ymin = 0. ymax = 500. label_vec = ['GT', 'MCE', 'DCE', 'Holdout'] facecolor_vec = [ 'black' ] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 4 f_vec = [0.03, 0.01, 0.001] k_vec = [3, 4, 5] ytick_lab = [0, 1e-3, 1e-2, 1e-1, 1, 10, 100, 300] ytick_labels = [ r'$0$', r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$300$' ] elif CHOICE == 602: ## 10k nodes n = 10000 h = 8 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 3 + [True] + [False] ymin = 0.01 ymax = 500 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DHEr'] facecolor_vec = [ "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974" ] * 4 f_vec = [0.01] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7, 8] # option_vec = ['opt2', 'opt3', 'opt6'] # learning_method_vec = ['MHE', 'DHE', 'LHE'] # k_vec = [2, 3, 4, 5] elif CHOICE == 603: ## 10k nodes n = 10000 h = 3 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 4 + [True] xmin = 1.8 xmax = 8.2 ymin = 0.01 ymax = 500 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr'] facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52" ] * 4 f_vec = [0.01] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7, 8] legend_location = 'upper right' # option_vec = ['opt2', 'opt3', 'opt6'] # learning_method_vec = ['MHE', 'DHE', 'LHE'] # k_vec = [2, 3, 4, 5] # option_vec = ['opt4', 'opt3'] # learning_method_vec = ['MHE', 'MHE'] # randomize_vec = [True, False] # k_vec = [2, 3, 4, 5] elif CHOICE == 604: ## 10k nodes with Gradient n = 10000 h = 3 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 4 + [True] ymin = 0.00 ymax = 800 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr'] facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52" ] * 4 f_vec = [0.01] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [7, 8] gradient = True legend_location = 'center right' elif CHOICE == 605: ## 10k nodes with Gradient with f = 0.005 n = 10000 h = 3 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 4 + [True] ymin = 0.00 ymax = 800 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr'] facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52" ] * 4 f_vec = [0.005] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7] # k_vec = [7, 8] gradient = True legend_location = 'center right' elif CHOICE == 606: ## 10k nodes with Gradient with f = 0.005 and Gradient and PruneRandom n = 10000 h = 3 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 4 + [True] xmin = 1.8 xmax = 7.2 ymin = 0.01 ymax = 800 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr'] facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52" ] * 4 f_vec = [0.005] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7] gradient = True pruneRandom = True legend_location = 'upper right' elif CHOICE == 607: ## 10k nodes with gradient and PruneRandom n = 10000 h = 3 d = 25 option_vec = ['opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 3 + [True] + [False] xmin = 1.8 xmax = 7. ymin = 0.01 ymax = 800 label_vec = ['LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = [ "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974" ] * 4 legend_location = 'upper left' marker_vec = [None, 's', 'x', 'o', '^', '+'] * 3 markersize_vec = [8, 7, 10, 8, 7, 6] + [10] * 10 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] clip_on_vec = [True] * 10 gradient = True pruneRandom = True ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] elif CHOICE == 608: ## 10k nodes with gradient and PruneRandom n = 10000 h = 3 d = 25 option_vec = ['opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 3 + [True] + [False] xmin = 1.8 xmax = 7.2 ymin = 0.01 ymax = 800 label_vec = ['LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = [ "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974" ] * 4 legend_location = 'upper left' marker_vec = [None, 's', 'x', 'o', '^', '+'] * 3 markersize_vec = [8, 7, 10, 8, 7, 6] + [10] * 10 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] clip_on_vec = [True] * 10 gradient = True pruneRandom = True ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] rep_DifferentGraphs = 10 else: raise Warning("Incorrect choice!") RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for i in range(rep_DifferentGraphs ): # create several graphs with same parameters # print("\ni: {}".format(i)) for k in k_vec: # print("\nk: {}".format(k)) H0 = create_parameterized_H(k, h, symmetric=True) H0c = to_centering_beliefs(H0) a = [1.] * k alpha0 = np.array(a) alpha0 = alpha0 / np.sum(alpha0) W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for j in range( rep_SameGraph): # repeat several times for same graph # print("j: {}".format(j)) ind = None for f in f_vec: # Remove fraction (1-f) of rows from X0 (notice that different from first implementation) X1, ind = replace_fraction_of_rows( X0, 1 - f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified=stratified) X2 = introduce_errors(X1, ind, err) for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \ enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)): # -- Learning if learning_method == 'GT': timeTaken = 0.0 elif learning_method == 'Holdout': prev_time = time.time() H2 = estimateH_baseline_serial( X2, ind, W, numMax=numMaxIt, numberOfSplits=numberOfSplits, EC=EC, alpha=alpha, beta=beta, gamma=gamma) timeTaken = time.time() - prev_time else: prev_time = time.time() if gradient and pruneRandom: H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize, gradient=gradient) else: H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize) timeTaken = time.time() - prev_time tuple = [str(datetime.datetime.now())] text = [option_vec[option_index], k, f, timeTaken] tuple.extend(text) # print("option: {}, f: {}, timeTaken: {}".format(option_vec[option_index], f, timeTaken)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # -- Aggregate repetitions df2 = df1.groupby(['option', 'k', 'f']).agg \ ({'time': [np.mean, np.std, np.size, np.median], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values ] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) # -- Pivot table df3 = pd.pivot_table(df2, index=['f', 'k'], columns=['option'], values=['time_mean', 'time_std', 'time_median']) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values ] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(100))) # X_f = k_vec X_f = df3['k'].values # read k from values instead Y_hash = defaultdict(dict) Y_hash_std = defaultdict(dict) for f in f_vec: for option in option_vec: Y_hash[f][option] = list() Y_hash_std[f][option] = list() for f in f_vec: for option in option_vec: Y_hash[f][option] = df3.loc[df3['f'] == f]['time_mean_{}'.format( option)].values # mean # Y_hash[f][option] = df3.loc[df3['f'] == f]['time_median_{}'.format(option)].values # median Y_hash_std[f][option] = df3.loc[df3['f'] == f][ 'time_std_{}'.format(option)].values if SHOW_PLOT or SHOW_PDF or CREATE_PDF: # -- Setup figure fig_filename = 'Fig_Time_varyK_{}.pdf'.format(CHOICE) mpl.rc( 'font', **{ 'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans'] }) mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams[ 'xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams[ 'ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) opt_f_vecs = [(option, f) for option in option_vec for f in f_vec] for ((option, f), color, linewidth, clip_on, linestyle, marker, markersize) in \ zip(opt_f_vecs, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec): label = label_vec[option_vec.index(option)] # label = label + " " + str(f) if STD_FILL: ax.fill_between(X_f, Y_hash[f][option] + Y_hash_std[f][option], Y_hash[f][option] - Y_hash_std[f][option], facecolor=color, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X_f, Y_hash[f][option] + Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y_hash[f][option] - Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y_hash[f][option], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker, markersize=markersize, markeredgecolor='black', markeredgewidth=1, clip_on=clip_on) if SHOW_ARROWS: for indx in [2, 3]: ax.annotate(s='', xy=(X_f[indx] - 0.05, Y_hash[f]['opt4'][indx]), xytext=(X_f[indx] - 0.05, Y_hash[f]['opt5'][indx]), arrowprops=dict(facecolor='blue', arrowstyle='<->')) ax.annotate( str( int( np.round(Y_hash[f]['opt5'][indx] / Y_hash[f]['opt4'][indx]))) + 'x', xy=(X_f[indx] - 0.4, (Y_hash[f]['opt5'][indx] + Y_hash[f]['opt4'][indx]) / 10), color='black', va='center', annotation_clip=False, zorder=5) # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' if n < 1000: n_label = '{}'.format(n) else: n_label = '{}k'.format(int(n / 1000)) title(r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format( n_label, d, h, f, distribution_label)) handles, label_vec = ax.get_legend_handles_labels() legend = plt.legend( handles, label_vec, loc=legend_location, # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad= 0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.yscale('log') plt.xticks(xtick_lab, xtick_labels) plt.yticks(ytick_lab, ytick_lab) # Only show ticks on the left and bottom spines ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlabel(r'Number of Classes $(k)$', labelpad=0) # labelpad=0 ylabel(r'Time [sec]', labelpad=0) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PLOT: plt.show() if SHOW_PDF: showfig(join(figure_directory, fig_filename)) # shows actually created PDF
def run(option, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, show_fig=True): # -- Setup OPTION = option CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT=show_plot CREATE_PDF=create_pdf SHOW_PDF=show_pdf SHOW_FIG2 = show_fig # curve RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed header = ['currenttime', 'option', # one option corresponds to one choice of weight vector. In practice, one choice of scaling factor (for weight vector) 'variant', # 1, 2, 3 (against GT), and 1-2, 1-3, 2-3 (against each other) 'length', 'diff', 'time'] # L2 norm between H and estimate # Default Graph parameters and options n = 10000 d = 25 h = 8 distribution = 'powerlaw' randomize = False initial_h0 = None # initial vector to start finding optimal H initial_H0 = None exponent = -0.3 length = 5 rep_differentGraphs = 1 rep = 10 # EC = [False] + [True] * 35 # scaling_vec = [1, 0.1, 0.14, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.4, 2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 20, 30, 40, 50, 60, 70, 80, 90, 100] scaling_vec = [1] + [round(0.1 * pow(10, 1/8)**x, 4) for x in range(33)] num_options = len(scaling_vec) scaling_vec = np.array(scaling_vec) # weight = np.array([np.power(scaling_vec, i) for i in range(5)]) # weight = weight.transpose() # ymin1 = None # ymax1 = None xmin2 = 0.1 xmax2 = 1000 ymax2 = 1. ymin2 = 0 stratified = False xtick_lab = [0.1, 1, 10, 100, 1000] # ytick_lab = [0.05, 0.1, 0.5, 1] # fig1_index = [0, 11, 16, 21, 23, 24, 25, 26] # which index of scaling options to display if CHOICE_FIG_BAR_VARIANT==True smartInit = False smartInitRandomize = False delta = 0.1 variant = 1 # for figure 2, to speed up calculations logarithm = False if OPTION == 1: CHOICE_vec = [18, 50, 51, 52, 53, 54] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 4 randomize_vec = [False]*2 + [True]*4 delta_vec = [None]*2 + [0.1, 0.2, 0.3] + [0.1] constraints_vec = [False]*5 + [True] # elif OPTION == 0: # CHOICE_vec = [54] # initial_H0_vec = [None] # randomize_vec = [True] # delta_vec = [0.1] # constraints_vec = [True] # # elif OPTION == 2: # f = 0.003 # CHOICE_vec = [101, 102, 103, 104, 105] # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 3 # randomize_vec = [False]*2 + [True]*3 # delta_vec = [None]*2 + [0.1, 0.3] + [0.1] # constraints_vec = [False]*4 + [True] # # elif OPTION == 3: # f = 0.003 # h = 3 # CHOICE_vec = [111, 112, 113, 114, 115] # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 3 # randomize_vec = [False]*2 + [True]*3 # delta_vec = [None]*2 + [0.1, 0.3] + [0.1] # constraints_vec = [False]*4 + [True] # elif OPTION == 4: # f = 0.001 # h = 8 # CHOICE_vec = [121, 122, 123, 124] # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 2 # randomize_vec = [False]*2 + [True]*2 # delta_vec = [None]*2 + [0.1, 0.3] # constraints_vec = [False]*4 elif OPTION == 5: f = 0.001 h = 8 ymax2 = 2 ymin2 = 4e-2 CHOICE_vec = [131, 132, 133] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False]*2 + [True]*1 delta_vec = [None]*2 + [0.1] constraints_vec = [False]*3 stratified = True # CHOICE_vec = [131, 132, 133, 134] # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 2 # randomize_vec = [False]*2 + [True]*2 # delta_vec = [None]*2 + [0.1, 0.3] # constraints_vec = [False]*4 # stratified = True # elif OPTION == 6: # f = 0.003 # h = 8 # CHOICE_vec = [141, 142, 143] # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 # randomize_vec = [False]*2 + [True]*1 # delta_vec = [None]*2 + [0.1] # constraints_vec = [False]*3 # # CHOICE_vec = [141, 142, 143, 144] # # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 2 # # randomize_vec = [False]*2 + [True]*2 # # delta_vec = [None]*2 + [0.1, 0.3] # # constraints_vec = [False]*4 elif OPTION == 7: f = 0.003 h = 8 CHOICE_vec = [151, 152, 153] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False]*2 + [True]*1 delta_vec = [None]*2 + [0.1] constraints_vec = [False]*3 stratified = True # CHOICE_vec = [151, 152, 153, 154] # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 2 # randomize_vec = [False]*2 + [True]*2 # delta_vec = [None]*2 + [0.1, 0.3] # constraints_vec = [False]*4 # stratified = True # elif OPTION == 8: # f = 0.001 # h = 3 # CHOICE_vec = [161, 162, 163] # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 # randomize_vec = [False]*2 + [True]*1 # delta_vec = [None]*2 + [0.1] # constraints_vec = [False]*3 # # CHOICE_vec = [161, 162, 163, 164] # # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 2 # # randomize_vec = [False]*2 + [True]*2 # # delta_vec = [None]*2 + [0.1, 0.3] # # constraints_vec = [False]*4 elif OPTION == 9: f = 0.001 h = 3 CHOICE_vec = [171, 172, 173] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False]*2 + [True]*1 delta_vec = [None]*2 + [0.1] constraints_vec = [False]*3 stratified = True ymin2 = 6e-2 ymax2 = 1 # CHOICE_vec = [171, 172, 173, 174] # initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 2 # randomize_vec = [False]*2 + [True]*2 # delta_vec = [None]*2 + [0.1, 0.3] # constraints_vec = [False]*4 elif OPTION == 10: f = 0.001 h = 3 d = 10 CHOICE_vec = [181, 182, 183] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False] * 2 + [True] * 1 delta_vec = [None] * 2 + [0.1] constraints_vec = [False] * 3 stratified = True elif OPTION == 11: f = 0.05 h = 8 d = 25 ymax2 = 0.08 CHOICE_vec = [191, 192, 193] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False] * 2 + [True] * 1 delta_vec = [None] * 2 + [0.1] constraints_vec = [False] * 3 stratified = True elif OPTION == 12: f = 0.05 h = 3 d = 25 ymax2 = 0.08 CHOICE_vec = [201, 202, 203] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False] * 2 + [True] * 1 delta_vec = [None] * 2 + [0.1] constraints_vec = [False] * 3 stratified = True elif OPTION == 13: n=1000 f = 0.01 h = 3 CHOICE_vec = [211, 212, 213] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False]*2 + [True]*1 delta_vec = [None]*2 + [0.1] constraints_vec = [False]*3 stratified = True ymin2 = 6e-2 ymax2 = 1 elif OPTION == 15: n=100000 f = 0.01 h = 3 CHOICE_vec = [221, 222, 223] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False]*2 + [True]*1 delta_vec = [None]*2 + [0.1] constraints_vec = [False]*3 stratified = True ymin2 = 5e-3 ymax2 = 2e-1 elif OPTION == 16: # variant on 13 with logarithm n=1000 f = 0.01 h = 3 CHOICE_vec = [231, 232, 233] initial_H0_vec = [None] + [create_parameterized_H(3, h)] + [None] * 1 randomize_vec = [False]*2 + [True]*1 delta_vec = [None]*2 + [0.1] constraints_vec = [False]*3 stratified = True ymin2 = 6e-2 ymax2 = 1 logarithm = True elif OPTION == 17: f = 0.001 h = 8 ymax2 = 2 ymin2 = 4e-2 CHOICE_vec = [133] initial_H0_vec = [None] * 1 randomize_vec = [True]*1 delta_vec = [0.1] constraints_vec = [False]*3 stratified = True else: raise Warning("Incorrect choice!") k = 3 a = 1 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) if CREATE_DATA: for CHOICE in CHOICE_vec: csv_filename = 'Fig_MHE_Variants_{}.csv'.format(CHOICE) save_csv_record(join(data_directory, csv_filename), header, append=False) # print("OPTION: {}".format(OPTION)) # -- Create data if CREATE_DATA or ADD_DATA: for rs in range(1, rep_differentGraphs+1): # print('Graph {}'.format(rs)) # -- Create graph W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for r in range(1, rep + 1): # print('Repetition {}'.format(r)) X1, ind = replace_fraction_of_rows(X0, 1 - f, stratified=stratified) for CHOICE, initial_H0, randomize, delta, constraints in zip(CHOICE_vec, initial_H0_vec, randomize_vec, delta_vec, constraints_vec): csv_filename = 'Fig_MHE_Variants_{}.csv'.format(CHOICE) # -- Create estimates and compare against GT, or against each other for length in range(1, length + 1): for option in range(num_options): start = time.time() if smartInit: startWeight = 0.2 initial_H0 = estimateH(X1, W, method='DHE', variant=variant, distance=5, EC=EC[option], weights=startWeight, randomize=smartInitRandomize, logarithm=logarithm) # print(option) # print(scaling_vec) # print(scaling_vec[option]) H_est = estimateH(X1, W, method='DHE', variant=variant, distance=length, EC=EC[option], weights=scaling_vec[option], randomize=randomize, initial_H0=initial_H0, constraints = constraints, delta = delta ) time_est = time.time() - start diff = LA.norm(H_est - H0) # if np.amin(H_est) < 0: # if True: # print("\nCHOICE: {}, weight: {}".format(CHOICE, scaling_vec[option])) # print("length:{}".format(length)) # print("H_est:\n{}".format(H_est)) # print("diff: {}".format(diff)) tuple = [str(datetime.datetime.now())] text = [option, variant, length, diff, time_est] # text = np.asarray(text) # (without np, entries get ugly format) not used here because it transforms integers to float !! tuple.extend(text) save_csv_record(join(data_directory, csv_filename), tuple) if SHOW_FIG2: for CHOICE, initial_h0, randomize, delta in zip(CHOICE_vec, initial_H0_vec, randomize_vec, delta_vec): csv_filename = 'Fig_MHE_Variants_{}.csv'.format(CHOICE) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1 (length {}):\n{}".format(len(df1.index), df1.head(15))) df2 = df1.groupby(['option', 'variant', 'length']).agg \ ({'diff': [np.mean, np.std, np.size], # Multiple Aggregates 'time': [np.mean, np.std], }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'diff_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(30))) df2['length'] = df2['length'].astype(str) # transform numbers into string for later join: '.join(col).strip()' df3 = df2.query('variant=="1"') # We only focus on variant 1 (as close to row stochastic matrix as possible) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(n=20))) df4 = pd.pivot_table(df3, index=['option'], columns=['length'], values=['diff_mean', 'diff_std']) # Pivot # print("\n-- df4 (length {}):\n{}".format(len(df4.index), df4.head(30))) df4.columns = ['_'.join(col).strip() for col in df4.columns.values] # flatten the column hierarchy, requires to have only strings df4.reset_index(level=0, inplace=True) # get length into columns # print("\n-- df4 (length {}):\n{}".format(len(df4.index), df4.head(30))) # Add scaling factor for each row option = df4['option'].values # extract the values from dataframe scaling = scaling_vec[option] # look up the scaling factor in original list scaling = pd.Series(scaling) # print("scaling:\n{}".format(scaling)) df5 = df4.assign(scaling=scaling.values) # print("\n-- df5 (length {}):\n{}".format(len(df5.index), df5.head(30))) # Filter rows select_rows = [i for i in range(num_options) if EC[i]] # only those values for EC being tru df6 = df5[df5['option'].isin(select_rows)] # print("\n-- df6 (length {}):\n{}".format(len(df6.index), df6.head(30))) fig_filename = 'Fig_MHE_ScalingFactor_{}.pdf'.format(CHOICE) # -- Setup figure mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 14 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams['xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Extract values into columns (plotting dataframew with bars plus error lines and lines gave troubles) scaling = df6['scaling'].values # .tolist() does not work with bar plot, requires np.array diff_mean_1 = df6['diff_mean_1'].values diff_mean_2 = df6['diff_mean_2'].values diff_mean_3 = df6['diff_mean_3'].values diff_mean_4 = df6['diff_mean_4'].values diff_mean_5 = df6['diff_mean_5'].values diff_std_5 = df6['diff_std_5'].values # -- Draw the plots p1 = ax.plot(scaling, diff_mean_1, color='black', linewidth=1, linestyle='--', label=r'$\ell_\mathrm{max} = 1$') p2 = ax.plot(scaling, diff_mean_2, color='orange', label=r'$\ell_\mathrm{max} = 2$') p3 = ax.plot(scaling, diff_mean_3, color='blue', label=r'$\ell_\mathrm{max} = 3$') p4 = ax.plot(scaling, diff_mean_4, color='green', label=r'$\ell_\mathrm{max} = 4$') p5 = ax.plot(scaling, diff_mean_5, color='red', marker='o', label=r'$\ell_\mathrm{max} = 5$') plt.xscale('log') plt.yscale('log') upper = diff_mean_5 + diff_std_5 lower = diff_mean_5 - diff_std_5 ax.fill_between(scaling, upper, lower, facecolor='red', alpha=0.2, edgecolor='none') # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' plt.title(r'$\!\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format(int(n / 1000), d, h, f, distribution_label)) handles, labels = ax.get_legend_handles_labels() # print("labels: {}".format(labels)) legend = plt.legend(handles, labels, loc='upper center', # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation # title='Variants', borderaxespad=0.3, # distance between legend and the outer axes borderpad=0.1, # padding inside legend box ) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings # ax.set_xticks(range(10)) plt.grid(b=True, which='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', plt.xlabel(r'Scaling factor $(\lambda)$', labelpad=0) plt.ylabel(r'L2 norm', labelpad=0) if xmin2 is None: xmin2 = plt.xlim()[0] if xmax2 is None: xmax2 = plt.xlim()[1] if ymin2 is None: ymin2 = plt.ylim()[0] ymin2 = max(ymin2, 0) if ymax2 is None: ymax2 = plt.ylim()[1] plt.xlim(xmin2, xmax2) plt.ylim(ymin2, ymax2) plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected # bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off right='off', # ticks along the top edge are off # labelbottom='off', # labels along the bottom edge are off ) plt.xticks(xtick_lab) # plt.yticks(ytick_lab, ytick_lab) if SHOW_PLOT: plt.show() if CREATE_PDF: plt.savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PDF: showfig(join(figure_directory, fig_filename))
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False): verbose = False repeat_diffGraph = 1000 SUBSET = True NOGT = False ## Not draw Ground Truth Comparison CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot SHOW_PDF = show_pdf CREATE_PDF = create_pdf STD_FILL = False csv_filename = 'Fig_fast_optimal_restarts_Accv2_{}.csv'.format(CHOICE) fig_filename = 'Fig_fast_optimal_restarts_Accv2_{}.pdf'.format(CHOICE) header = ['currenttime', 'k', 'restarts', 'accuracy'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters global f_vec, labels, facecolor_vec global number_of_restarts initial_h0 = None distribution = 'powerlaw' exponent = -0.3 # for powerlaw length = 4 # path length constraint = True gradient = True variant = 1 EC = True delta = 0.001 numMaxIt = 10 avoidNeighbors = False convergencePercentage_W = None stratified = True learning_method = 'DHE' weights = 10 randomize = True return_min_energy = True number_of_restarts = [8, 6, 5, 4] clip_on_vec = [True] * 20 draw_std_vec = range(10) ymin = 0.3 ymax = 1 xmin = 0.001 xmax = 1 xtick_lab = [] xtick_labels = [] ytick_lab = np.arange(0, 1.1, 0.1) linestyle_vec = ['solid','solid','solid'] * 20 linewidth_vec = [4,4,4,4]*10 marker_vec = ['x', 'v', '^', '+', '>', '<'] *10 markersize_vec = [10, 8, 8, 8 ,8 ,8 ,8 ]*10 facecolor_vec = ["#C44E52", "#4C72B0", "#8172B2", "#CCB974", "#55A868", "#64B5CD"]*5 # -- Options mainly change k if CHOICE == 101: n = 10000 h = 3 d = 15 k_vec = [3, 4, 5, 6, 7, 10, 13, 16, 18, 20] # k_vec = [4, 5, 7, 10] f = 0.09 distribution = 'uniform' # Write in DESCENDING ORDER number_of_restarts = [30, 20, 10, 7, 5, 4, 3, 2, 1, 50, 99, 100] ### 100:GT 99:GTr ### 50:min{30,GTr} 1:uninformative labels = ['r' + str(a1) for a1 in number_of_restarts] xtick_lab = k_vec xtick_labels = [str(a1) for a1 in k_vec] elif CHOICE == 102: n = 10000 h = 3 d = 15 k_vec = [3, 4, 5, 6, 7, 8] # k_vec = [4, 5, 7, 10] f = 0.09 distribution = 'uniform' # Write in DESCENDING ORDER # number_of_restarts = [30, 20, 10, 7, 5, 4, 3, 2, 1, 50, 99, 100] number_of_restarts = [20, 10, 5, 4, 3, 2] ### 100:GT 99:GTr ### 50:min{30,GTr} 1:uninformative labels = ['r' + str(a1) for a1 in number_of_restarts] xtick_lab = k_vec xtick_labels = [str(a1) for a1 in k_vec] elif CHOICE == 103: n = 10000 h = 3 d = 15 k_vec = [3, 4, 5, 6, 7, 8] # k_vec = [4, 5, 7, 10] f = 0.09 distribution = 'uniform' # Write in DESCENDING ORDER number_of_restarts = [20, 10, 5, 4, 3, 2, 99] ### 100:GT 99:GTr ### 50:min{30,GTr} 1:uninformative marker_vec = ['o', 'x', 'v', '^', '+', 's', None] * 10 markersize_vec = [6, 10, 6, 6, 10, 6] * 10 labels = ['r' + str(a1) for a1 in number_of_restarts] xtick_lab = k_vec xtick_labels = [str(a1) for a1 in k_vec] elif CHOICE == 104: n = 10000 h = 8 d = 15 k_vec = [3, 4, 5, 6, 7, 8] # k_vec = [4, 5, 7, 10] f = 0.09 distribution = 'uniform' # Write in DESCENDING ORDER number_of_restarts = [20, 10, 5, 4, 3, 2, 99] ### 100:GT 99:GTr ### 50:min{30,GTr} 1:uninformative marker_vec = ['o', 'x', 'v', '^', '+', 's', None] * 10 markersize_vec = [6, 10, 6, 6, 10, 6] * 10 labels = ['r' + str(a1) for a1 in number_of_restarts] xtick_lab = k_vec xtick_labels = [str(a1) for a1 in k_vec] elif CHOICE == 105: n = 10000 h = 8 d = 15 k_vec = [3, 4, 5, 6, 7, 8] # k_vec = [4, 5, 7, 10] f = 0.09 distribution = 'uniform' # Write in DESCENDING ORDER number_of_restarts = [20, 10, 5, 4, 3, 2, 100] ### 100:GT 99:GTr ### 50:min{30,GTr} 1:uninformative marker_vec = ['o', 'x', 'v', '^', '+', 's', None] * 10 markersize_vec = [6, 10, 6, 6, 10, 6] * 10 labels = ['r' + str(a1) for a1 in number_of_restarts] xtick_lab = k_vec xtick_labels = [str(a1) for a1 in k_vec] elif CHOICE == 106: n = 10000 h = 3 d = 15 k_vec = [3, 4, 5, 6, 7, 8] # k_vec = [4, 5, 7, 10] f = 0.09 distribution = 'uniform' # Write in DESCENDING ORDER number_of_restarts = [20, 10, 5, 4, 3, 2, 100] ### 100:GT 99:GTr ### 50:min{30,GTr} 1:uninformative marker_vec = ['o', 'x', 'v', '^', '+', 's', None] * 10 markersize_vec = [6, 10, 6, 6, 10, 6] * 10 labels = ['r' + str(a1) for a1 in number_of_restarts] xtick_lab = k_vec xtick_labels = [str(a1) for a1 in k_vec] elif CHOICE == 107: n = 10000 h = 8 d = 15 k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [4, 5, 7, 10] f = 0.09 distribution = 'uniform' # Write in DESCENDING ORDER number_of_restarts = [10, 5, 4, 3, 2, 99] # number_of_restarts = [20, 10, 5, 4, 3, 2, 100] ### 100:GT 99:GTr ### 50:min{30,GTr} 1:uninformative marker_vec = ['x', 'v', '^', 's', 'o', 's', None] * 10 markersize_vec = [10, 6, 6, 6, 6, 6, 6] * 10 labels = [r'$r=$' + str(a1) for a1 in number_of_restarts] xtick_lab = k_vec xtick_labels = [str(a1) for a1 in k_vec] elif CHOICE == 108: n = 10000 h = 8 d = 15 k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [4, 5, 7, 10] f = 0.09 distribution = 'uniform' # Write in DESCENDING ORDER number_of_restarts = [10, 5, 4, 3, 2, 99] # number_of_restarts = [20, 10, 5, 4, 3, 2, 100] ### 100:GT 99:GTr ### 50:min{30,GTr} 1:uninformative marker_vec = ['x', 'v', '^', 's', 'o', 's', None] * 10 markersize_vec = [10, 6, 6, 6, 6, 6, 6] * 10 labels = [r'$r=$' + str(a1) for a1 in number_of_restarts] xtick_lab = k_vec xtick_labels = [str(a1) for a1 in k_vec] repeat_diffGraph = 10 else: raise Warning("Incorrect choice!") RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for _ in range(repeat_diffGraph): for k in k_vec: a = [1.] * k k_star = int(k * (k - 1) / 2) alpha0 = np.array(a) alpha0 = alpha0 / np.sum(alpha0) # Generate Graph # print("Generating Graph: n={} h={} d={} k={}".format(n, h, d, k)) H0 = create_parameterized_H(k, h, symmetric=True) W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) H0_vec = transform_HToh(H0) # print("\nGold standard {}".format(np.round(H0_vec, decimals=3))) X0 = from_dictionary_beliefs(Xd) X2, ind = replace_fraction_of_rows(X0, 1 - f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=None, stratified=stratified) h0 = [1.] * int(k_star) h0 = np.array(h0) h0 = h0 / k delta = 1 / (3 * k) # print("delta: ", delta) perm = [] while len(perm) < number_of_restarts[0]: temp = [] for _ in range(k_star): temp.append(random.choice([-delta, delta])) if temp not in perm: perm.append(temp) if len(perm) >= 2 ** (k_star): break E_list = [] ## format = [[energy, H_vec], []..] for vec in perm: H2_vec, energy = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=False, constraints=constraint, gradient=gradient, return_min_energy=True, verbose=verbose, initial_h0=h0 + np.array(vec)) E_list.append([energy, list(H2_vec)]) # print("All Optimizaed vector:") # [print(i) for i in E_list ] # print("Outside Energy:{} optimized vec:{} \n".format(min_energy_vec[0], optimized_Hvec)) # min_energy_vec = min(E_list) # optimized_Hvec = min_energy_vec[1] # # print("\nEnergy:{} optimized vec:{} \n\n".format(min_energy_vec[0],optimized_Hvec)) # # GTr_optimized_Hvec, GTr_energy = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=False, constraints=constraint, gradient=gradient, return_min_energy=True, verbose=verbose, initial_h0=H0_vec) uninformative_optimized_Hvec, uninformative_energy = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=False, constraints=constraint, gradient=gradient, return_min_energy=True, verbose=verbose, initial_h0=h0) iterative_permutations = list(E_list) for restartz in number_of_restarts: if k==2 or k == 3 and restartz > 8 and restartz<99: continue if restartz <= number_of_restarts[0]: iterative_permutations = random.sample(iterative_permutations, restartz) # print("For restart:{}, we have vectors:\n".format(restartz)) # [print(i) for i in iterative_permutations] if restartz == 100: ## for GT H2c = to_centering_beliefs(H0) # print("\nGT: ", transform_HToh(H0,k)) elif restartz == 99: ## for DCEr init with GT H2c = to_centering_beliefs(transform_hToH(GTr_optimized_Hvec, k)) # print("\nGTr: ", GTr_optimized_Hvec) elif restartz == 1: ## for DCEr with uninformative initial H2c = to_centering_beliefs(transform_hToH(uninformative_optimized_Hvec, k)) # print("\nUninformative: ", uninformative_optimized_Hvec) elif restartz == 50: ## for min{DCEr , GTr} # print("Length:",len(E_list)) # [print(i) for i in E_list] mod_E_list = list(E_list)+[[GTr_energy , list(GTr_optimized_Hvec)]] #Add GTr to list and take min # print("Mod Length:", len(mod_E_list)) # [print(i) for i in mod_E_list] min_energy_vec = min(mod_E_list) # print("\nSelected for 50:",min_energy_vec) optimized_Hvec = min_energy_vec[1] H2c = to_centering_beliefs(transform_hToH(optimized_Hvec, k)) else: min_energy_vec = min(iterative_permutations) optimized_Hvec = min_energy_vec[1] H2c = to_centering_beliefs(transform_hToH(optimized_Hvec, k)) # print("Inside Chosen Energy:{} optimized vec:{} \n".format(min_energy_vec[0], optimized_Hvec)) try: eps_max = eps_convergence_linbp_parameterized(H2c, W, method='noecho', X=X2) s = 0.5 eps = s * eps_max F, actualIt, actualPercentageConverged = \ linBP_symmetric_parameterized(X2, W, H2c * eps, method='noecho', numMaxIt=numMaxIt, convergencePercentage=convergencePercentage_W, debug=2) except ValueError as e: print( "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h)) else: acc = matrix_difference_classwise(X0, F, ignore_rows=ind) tuple = [str(datetime.datetime.now())] text = [k, restartz, acc] tuple.extend(text) if verbose: print("\nGold standard {}".format(np.round(H0_vec, decimals=3))) # print("k:{} Restart:{} OptimizedVec:{} Energy:{} Accuracy:{}".format(k, restartz, np.round(min_energy_vec[1], decimals=3), min_energy_vec[0], acc )) # print("k:{} Restart:{} Accuracy:{}".format(k, 1, L2_dist)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1 (length {}):\n{}".format(len(df1.index), df1.head(20))) # Aggregate repetitions df2 = df1.groupby(['k', 'restarts']).agg \ ({'accuracy': [np.mean, np.std, np.size], }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'accuracy_size': 'count'}, inplace=True) df2['restarts'] = df2['restarts'].astype(str) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(20))) # Pivot table df3 = pd.pivot_table(df2, index=['k'], columns=['restarts'], values=['accuracy_mean', 'accuracy_std'] ) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(10))) df4 = df3.drop('k', axis=1) if NOGT: df4 = df3.drop(['k', 'accuracy_mean_0', 'accuracy_mean_1', 'accuracy_std_0', 'accuracy_std_1'], axis=1) # df4 = df3.drop(['k', 'accuracy_mean_100', 'accuracy_std_100'], axis=1) df5 = df4.div(df4.max(axis=1), axis=0) df5['k'] = df3['k'] # print("\n-- df5 (length {}):\n{}".format(len(df5.index), df5.head(100))) # df5 = df3 ## for normalization X_f = df5['k'].values # read k from values instead Y=[] Y_std=[] for rez in number_of_restarts: if NOGT: if rez == 100 or rez==99: continue Y.append(df5['accuracy_mean_{}'.format(rez)].values) if STD_FILL: Y_std.append(df5['accuracy_std_{}'.format(rez)].values) if CREATE_PDF or SHOW_PDF or SHOW_PLOT: # -- Setup figure mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']}) mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['font.size'] = 16 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Drawing if STD_FILL: for choice, (option, facecolor) in enumerate(zip(number_of_restarts, facecolor_vec)): if option == 100: ## GT if NOGT: continue facecolor = 'black' elif option == 99: ## GT-r if NOGT: continue facecolor = 'black' ax.fill_between(X_f, Y[choice] + Y_std[choice], Y[choice] - Y_std[choice], facecolor=facecolor, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X_f, Y[choice] + Y_std[choice], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y[choice] - Y_std[choice], linewidth=0.5, color='0.8', linestyle='solid') for choice, (option, label, color, linewidth, clip_on, linestyle, marker, markersize) in \ enumerate(zip(number_of_restarts, labels, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec)): if option == 100: ## GT if NOGT: continue linestyle='dashed' linewidth=3 color='black' label='GS' marker='x' markersize=6 elif option == 99: ## GT-r if NOGT: continue linestyle='dashed' linewidth=2 color='black' label='Global Minima' marker = None markersize = 6 elif option == 1: ## GT color="#CCB974" linewidth = 2 label='Uninfo' elif option == 50: ## GT-r label='min{30,GTr}' P = ax.plot(X_f, Y[choice], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker, markersize=markersize, markeredgecolor='black', markeredgewidth=1, clip_on=clip_on) # plt.xscale('log') # -- Title and legend distribution_label = '$' if distribution == 'uniform': distribution_label = ',$uniform' n_label = '{}k'.format(int(n / 1000)) if n < 1000: n_label='{}'.format(n) titleString = r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}, f\!=\!{} $'.format(n_label, d, h, f) title(titleString) handles, labels = ax.get_legend_handles_labels() legend = plt.legend(handles, labels, loc='lower left', # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once # bbox_to_anchor=(1.1, 0) ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 plt.xticks(xtick_lab, xtick_labels) # plt.yticks(ytick_lab, ytick_lab) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.2f')) # ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.0f')) grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlabel(r'Number of Classes $(k)$', labelpad=0) # labelpad=0 ylabel(r'Relative Accuracy', labelpad=0) xlim(2.9, 7.1) # ylim(0.65, 1.015) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PLOT: plt.show() if SHOW_PDF: showfig(join(figure_directory, fig_filename)) # shows actually created PDF
def test_estimate_synthetic(): print( "\n\n-- test_estimate_synthetic(): 'estimateH', uses: 'M_observed', 'planted_distribution_model_H', --" ) # --- Parameters for graph n = 1000 a = 1 h = 8 d = 25 k = 3 distribution = 'powerlaw' exponent = -0.3 f = 0.05 print("n={}, a={},d={}, f={}".format(n, a, d, f)) alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) print("H0:\n{}".format(H0)) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # --- Print some neighbor statistics M_vec = M_observed(W, X0, distance=3, NB=True) print("\nNeighbor statistics in fully labeled graph:") print("M^(1): direct neighbors:\n{}".format(M_vec[1])) print("M^(2): distance-2 neighbors:\n{}".format(M_vec[2])) print("M^(3): distance-3 neighbors:\n{}".format(M_vec[3])) # --- MHE --- print("\nMHE: Estimate H based on X0 (fully labeled graph):") start = time.time() H1 = estimateH(X0, W, method='MHE', variant=1) H2 = estimateH(X0, W, method='MHE', variant=2) H3 = estimateH(X0, W, method='MHE', variant=3) time_est = time.time() - start print("Estimated H based on X0 (MHE), variant 1:\n{}".format(H1)) print("Estimated H based on X0 (MHE), variant 2:\n{}".format(H2)) print("Estimated H based on X0 (MHE), variant 3:\n{}".format(H3)) print("Time for all three variants:{}".format(time_est)) print("\nMHE: Estimate H based on X1 with f={}:".format(f)) start = time.time() H1 = estimateH(X1, W, method='MHE', variant=1) H2 = estimateH(X1, W, method='MHE', variant=2) H3 = estimateH(X1, W, method='MHE', variant=3) time_est = time.time() - start print("Estimated H based on X1 (MHE), variant 1:\n{}".format(H1)) print("Estimated H based on X1 (MHE), variant 2:\n{}".format(H2)) print("Estimated H based on X1 (MHE), variant 3:\n{}".format(H3)) print("Time for all three variants:{}".format(time_est)) print( "\nMHE, variant=1: Estimate H based on X1 with f={}, but with initial correct vector:" ) weight = [0, 0, 0, 0, 0] # ignored for MHE initial_h0 = [0.1, 0.8, 0.1] H5 = estimateH(X1, W, method='MHE', weights=weight) H5_r = estimateH(X1, W, method='MHE', weights=weight, randomize=True) H5_i = estimateH(X1, W, method='MHE', weights=weight, initial_H0=transform_hToH(initial_h0, 3)) print("Estimated H based on X5 only (MHE): \n{}".format(H5)) print("Estimated H based on X5 only (MHE), randomize:\n{}".format(H5_r)) print("Estimated H based on X5 only (MHE), initial=GT:\n{}".format(H5_i)) # --- DHE --- print("\nDHE: Estimate H based on X1 with f={}:".format(f)) start = time.time() H1 = estimateH(X1, W, method='DHE', variant=1, distance=1) H2 = estimateH(X1, W, method='DHE', variant=2, distance=1) H3 = estimateH(X1, W, method='DHE', variant=3, distance=1) time_est = time.time() - start print( "Estimated H based on X1 (DHE, distance=1), variant 1:\n{}".format(H1)) print( "Estimated H based on X1 (DHE, distance=1), variant 2:\n{}".format(H2)) print( "Estimated H based on X1 (DHE, distance=1), variant 3:\n{}".format(H3)) print("Time for all three variants:{}".format(time_est)) # --- LHE --- print("\nLHE: Estimate H based on X1 with f={}:".format(f)) start = time.time() H1 = estimateH(X1, W, method='LHE') time_est = time.time() - start print("Estimated H based on X1 (LHE):\n{}".format(H1)) print("Time for LHE:{}".format(time_est)) # --- Baseline holdout method --- f2 = 0.5 X2, ind2 = replace_fraction_of_rows(X0, 1 - f2) print("\nHoldout method: Estimate H based on X2 with f={}):".format(f2)) start = time.time() H2 = estimateH_baseline_serial(X2=X2, ind=ind2, W=W, numberOfSplits=1, numMax=10) time_est = time.time() - start print("Estimated H based on X2 (Holdout method) with f={}:\n{}".format( f2, H2)) print("Time for Holdout method:{}".format( time_est)) # TODO: result suggests this method does not work?
def run(choice, variant, create_data=False, show_plot=False, create_pdf=False, show_pdf=False): """main parameterized method to produce all figures. Can be run from external jupyther notebook or method to produce all figures in PDF """ # %% -- Setup CREATE_DATA = create_data CHOICE = choice VARIANT = variant SHOW_PLOT = show_plot CREATE_PDF = create_pdf SHOW_PDF = show_pdf SHOW_TITLE = True LEGEND_MATCH_COLORS = False SHOW_DISTRIBUTION_IN_TITLE = True SHOW_BACKTRACK_ESTIMATE = True SHOW_NONBACKTRACK_ESTIMATE = True plot_colors = ['darkgreen', 'darkorange', 'blue'] label_vec = [ r'$\mathbf{H}^{\ell}\,\,\,\,$', r'$\mathbf{\hat P}^{(\ell)}$', r'$\mathbf{\hat P}_{\mathrm{NB}}^{(\ell)}$' ] csv_filename = 'Fig_Backtracking_Advantage_{}.csv'.format(CHOICE) fig_filename = 'Fig_Backtracking_Advantage_{}-{}.pdf'.format( CHOICE, VARIANT) header = [ 'currenttime', 'choice', # H, Hrow, HrowEC 'l', 'valueH', # maximal values in first row of H 'valueM' ] # average value across entries in M if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # %% -- Default parameters ymin = 0.3 ymax = 1 exponent = None # %% -- CHOICES and VARIANTS if CHOICE == 1: # n=1000, shows NB to be slight lower for l=2: probably due to sampling issues (d=3, thus very few points available) n = 1000 h = 8 d = 3 f = 0.1 distribution = 'uniform' rep = 10000 length = 8 elif CHOICE == 2: n = 1000 h = 8 d = 10 f = 0.1 distribution = 'uniform' rep = 10000 length = 8 elif CHOICE == 3: # nice: shows nicely that difference is even bigger for smaller h n = 1000 h = 3 d = 10 f = 0.1 distribution = 'uniform' rep = 10000 length = 8 ymax = 0.8 elif CHOICE == 4: n = 10000 h = 3 d = 10 f = 0.1 distribution = 'uniform' rep = 100 length = 8 ymin = 0.333 ymax = 0.65 elif CHOICE == 5: n = 10000 h = 3 d = 3 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 elif CHOICE == 6: # n=1000, the powerlaw problem with small graphs and high exponent n = 1000 h = 8 d = 3 f = 0.1 distribution = 'powerlaw' exponent = -0.5 rep = 10000 length = 8 elif CHOICE == 7: n = 10000 h = 8 d = 3 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 # ymin = 0.4 ymax = 1 elif CHOICE == 8: n = 10000 h = 8 d = 10 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 # ymin = 0.4 ymax = 1 elif CHOICE == 9: # shows lower NB due to problem with sampling from high powerlaw -0.5 n = 10000 h = 8 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.5 rep = 1000 length = 8 elif CHOICE == 10: n = 10000 h = 8 d = 3 f = 0.1 distribution = 'powerlaw' exponent = -0.5 rep = 1000 length = 8 elif CHOICE == 11: # problem: shows that NB is too low (probably because of problem with sampling from -0.5 factor) n = 1000 h = 8 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.5 rep = 1000 length = 8 elif CHOICE == 12: # problem: shows no problem with NB (probably because no problem with sampling from -0.2 factor) n = 1000 h = 8 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.2 rep = 1000 length = 8 elif CHOICE == 20: n = 10000 h = 3 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.3 rep = 1000 length = 8 ymin = 0.333 ymax = 0.65 elif CHOICE == 21: # originally used before color change n = 10000 h = 3 d = 25 f = 0.1 distribution = 'powerlaw' exponent = -0.3 rep = 1000 length = 8 ymin = 0.333 ymax = 0.65 if VARIANT == 1: SHOW_TITLE = False plot_colors = ['red', 'blue', 'darkorange'] label_vec = [r'$\mathbf{H}^{\ell}\quad\quad$', 'naive', 'better'] LEGEND_MATCH_COLORS = True if VARIANT == 2: SHOW_TITLE = False plot_colors = ['red', 'blue', 'darkorange'] label_vec = [r'$\mathbf{H}^{\ell}\quad\quad$', 'naive', 'better'] SHOW_NONBACKTRACK_ESTIMATE = False LEGEND_MATCH_COLORS = True if VARIANT == 3: SHOW_TITLE = False plot_colors = ['red', 'blue', 'darkorange'] label_vec = [r'$\mathbf{H}^{\ell}\quad\quad$', 'naive', 'better'] SHOW_BACKTRACK_ESTIMATE = False SHOW_NONBACKTRACK_ESTIMATE = False LEGEND_MATCH_COLORS = True if VARIANT == 4: plot_colors = ['red', 'blue', 'darkorange'] LEGEND_MATCH_COLORS = True elif CHOICE == 25: n = 10000 h = 8 d = 5 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 elif CHOICE == 26: n = 10000 h = 8 d = 25 f = 0.1 distribution = 'uniform' rep = 1000 length = 8 ymax = 0.9 ymin = 0.4 elif CHOICE == 27: n = 10000 h = 8 d = 10 f = 0.1 distribution = 'powerlaw' exponent = -0.3 rep = 1000 length = 8 ymax = 0.9 ymin = 0.33 elif CHOICE == 31: n = 10000 h = 3 d = 10 f = 0.1 distribution = 'uniform' length = 8 ymin = 0.333 ymax = 0.65 SHOW_DISTRIBUTION_IN_TITLE = False plot_colors = ['red', 'blue', 'darkorange'] LEGEND_MATCH_COLORS = True if VARIANT == 0: rep = 1000 if VARIANT == 1: rep = 20 else: raise Warning("Incorrect choice!") k = 3 a = 1 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # %% -- Create data if CREATE_DATA: # Calculations H print("Max entry of first rows of powers of H0:") for l in range(1, length + 1): valueH = np.max(np.linalg.matrix_power(H0, l)[0]) tuple = [str(datetime.datetime.now())] text = ['H', l, valueH, ''] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) print("{}: {}".format(l, valueH)) save_csv_record(join(data_directory, csv_filename), tuple) # Calculations Hrow and HrowEC for r in range(rep): print('Repetition {}'.format(r)) # Create graph start = time.time() W, Xd = planted_distribution_model_H( n, alpha=alpha0, H=H0, d_out= d, # notice that for undirected graphs, actual degree = 2*d distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) time_calc = time.time() - start # print("\nTime for graph:{}".format(time_calc)) print("Average outdegree: {}".format( calculate_average_outdegree_from_graph(W))) # Calculate H_vec and M_vec versions (M_vec to calculate the average number of entries in M) H_vec = H_observed(W, X1, distance=length, NB=False, variant=1) H_vec_EC = H_observed(W, X1, distance=length, NB=True, variant=1) M_vec = M_observed(W, X1, distance=length, NB=False) M_vec_EC = M_observed(W, X1, distance=length, NB=True) # Calculation H_vec # print("Max entry of first rows of H_vec") for l, H in enumerate(H_vec): valueH = H[0][ (l + 1) % 2] # better than 'value = np.max(H[0])', otherwise sometimes chooses another higher entry -> biased estimate valueM = np.average(M_vec[l + 1]) # print(M_vec[l+1]) # print(valueM) tuple = [str(datetime.datetime.now())] text = ['Hrow', l + 1, valueH, valueM] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("{}: {}".format(l + 1, value)) save_csv_record(join(data_directory, csv_filename), tuple) # Calculation H_vec_EC # print("Max entry of first rows of H_vec_EC") for l, H in enumerate(H_vec_EC): valueH = H[0][(l + 1) % 2] valueM = np.average(M_vec_EC[l + 1]) # print(M_vec_EC[l+1]) # print(valueM) tuple = [str(datetime.datetime.now())] text = ['HrowEC', l + 1, valueH, valueM] text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("{}: {}".format(l + 1, value)) save_csv_record(join(data_directory, csv_filename), tuple) #%% -- Read, aggregate, and pivot data df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1 (length {}):\n{}".format(len(df1.index), df1.head(15))) df2 = df1.groupby(['choice', 'l']).agg \ ({'valueH': [np.mean, np.std, np.size], # Multiple Aggregates 'valueM': [np.mean], }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values ] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'valueH_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(30))) df3 = pd.pivot_table(df2, index=['l'], columns=['choice'], values=['valueH_mean', 'valueH_std', 'valueM_mean']) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values ] # flatten the column hierarchy # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) # df3.drop(['valueM_mean_H', 'valueH_std_H'], axis=1, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.reset_index(level=0, inplace=True) # get l into columns # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) #%% -- Setup figure mpl.rcParams['backend'] = 'pdf' mpl.rcParams['lines.linewidth'] = 3 mpl.rcParams['font.size'] = 16 mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 20 mpl.rcParams['axes.edgecolor'] = '111111' # axes edge color mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['figure.figsize'] = [4, 4] mpl.rcParams['xtick.major.pad'] = 4 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 4 # padding of tick labels: default = 4 fig = plt.figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) #%% -- Extract values into columns (plotting dataframew with bars plus error lines and lines gave troubles) l_vec = df3['l'].values # .tolist() does not work with bar plot mean_H_vec = df3['valueH_mean_H'].values mean_Hrow_vec = df3['valueH_mean_Hrow'].values mean_Hrow_vecEC = df3['valueH_mean_HrowEC'].values std_Hrow_vec = df3['valueH_std_Hrow'].values std_Hrow_vecEC = df3['valueH_std_HrowEC'].values #%% -- Draw the plot and annotate width = 0.3 # the width of the bars if SHOW_BACKTRACK_ESTIMATE: left_vec = l_vec if SHOW_NONBACKTRACK_ESTIMATE: left_vec = left_vec - width bar1 = ax.bar( left_vec, mean_Hrow_vec, width, color=plot_colors[1], yerr=std_Hrow_vec, error_kw={ 'ecolor': 'black', 'linewidth': 2 }, # error-bars colour label=label_vec[1]) if SHOW_NONBACKTRACK_ESTIMATE: bar2 = ax.bar( l_vec, mean_Hrow_vecEC, width, color=plot_colors[2], yerr=std_Hrow_vecEC, error_kw={ 'ecolor': 'black', 'linewidth': 2 }, # error-bars colour label=label_vec[2]) gt = ax.plot(l_vec, mean_H_vec, color=plot_colors[0], linestyle='solid', linewidth=2, marker='o', markersize=10, markeredgewidth=2, markerfacecolor='None', markeredgecolor=plot_colors[0], label=label_vec[0]) if CHOICE == 4 or CHOICE == 20: ax.annotate( np.round(mean_Hrow_vec[1], 2), xy=(2.15, 0.65), xytext=(2.1, 0.60), arrowprops=dict(facecolor='black', arrowstyle="->"), ) #%% -- Legend if distribution == 'uniform' and SHOW_DISTRIBUTION_IN_TITLE: distribution_label = ',$uniform' else: distribution_label = '$' if SHOW_TITLE: plt.title( r'$\!\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'. format(int(n / 1000), 2 * d, h, f, distribution_label )) # notice that actual d is double than in one direction handles, labels = ax.get_legend_handles_labels() legend = plt.legend( handles, labels, loc='upper right', handlelength=1.5, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation # title='Iterations' borderaxespad=0.1, # distance between legend and the outer axes borderpad=0.1, # padding inside legend box numpoints=1, # put the marker only once ) if LEGEND_MATCH_COLORS: # TODO: how to get back the nicer line spacing defined in legend above after changing the legend text colors legend.get_texts()[0].set_color(plot_colors[0]) if SHOW_BACKTRACK_ESTIMATE: legend.get_texts()[1].set_color(plot_colors[1]) if SHOW_NONBACKTRACK_ESTIMATE: legend.get_texts()[2].set_color(plot_colors[2]) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.8) # 0.8 # %% -- Figure settings & plot ax.set_xticks(range(10)) plt.grid(b=True, which='both', alpha=0.2, linestyle='solid', axis='y', linewidth=0.5) # linestyle='dashed', which='minor' plt.xlabel(r'Path length ($\ell$)', labelpad=0) plt.ylim(ymin, ymax) # placed after yticks plt.xlim(0.5, 5.5) plt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom= 'off', # ticks along the bottom edge are off TODO: Paul, this does not work anymore :( 1/26/2020 top='off', # ticks along the top edge are off # labelbottom='off', # labels along the bottom edge are off ) if CREATE_PDF: plt.savefig( join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, # frameon=None ) if SHOW_PDF: showfig(join(figure_directory, fig_filename)) if SHOW_PLOT: plt.show()
def test_gradient(): print( "\n-- 'define_gradient_energy_H, define_energy_H, uses: planted_distribution_model_H, H_observed, M_observed, --" ) # --- Parameters for graph n = 1000 a = 1 h = 8 d = 25 k = 3 distribution = 'powerlaw' exponent = -0.3 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) f = 0.5 print("Graph n={}, d={}, f={}".format(n, d, f)) print("H0:\n{}\n".format(H0)) # --- Create graph RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) X1, ind = replace_fraction_of_rows(X0, 1 - f) # --- M_vec, H_vec statistics distance = 5 print("M_vec:") M_vec = M_observed(W, X1, distance=distance) for i, M in enumerate(M_vec): print("{}:\n{}".format(i, M)) print("H_vec:") H_vec = H_observed(W, X1, distance=distance) for i, H in enumerate(H_vec): print("{}:\n{}".format(i, H)) # --- Gradient at multiple points for distance 1 print("\n=Defining the gradient function with distance 1") distance = 1 weights = [1, 0, 0, 0, 0] gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) energy_H = define_energy_H(weights=weights, distance=1, H_vec_observed=H_vec) H_actual = H_vec[0] print( "1st example point: H_actual (row-stochastic frequencies of neighbors):\n{}" .format(H_actual)) e = energy_H(H_actual) g = gradient_energy_H(H_actual) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) H_point = transform_hToH(np.array([0.2, 0.6, 0.2]), 3) print("\n2nd example point: H_point:\n{}".format(H_point)) e = energy_H(H_point) g = gradient_energy_H(H_point) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) H_point2 = H_point - 0.45 * g print( "\n3rd example point in opposite direction of gradient: H_point2=H_point-0.45*gradient:\n{}" .format(H_point2)) e = energy_H(H_point2) g = gradient_energy_H(H_point2) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) # --- Gradient at multiple points for distance 5 distance = 5 weights = [0, 0, 0, 0, 1] print("\n= Defining the gradient function with distance={} and weights={}". format(distance, weights)) gradient_energy_H = define_gradient_energy_H(H_vec_observed=H_vec, weights=weights, distance=distance) energy_H = define_energy_H(weights=weights, distance=1, H_vec_observed=H_vec) H_actual = H_vec[0] print("1st point: H_actual:\n{}".format(H_actual)) e = energy_H(H_actual) g = gradient_energy_H(H_actual) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) H_point = transform_hToH(np.array([0.2, 0.6, 0.2]), 3) print("\n2nd point: H_point:\n{}".format(H_point)) e = energy_H(H_point) g = gradient_energy_H(H_point) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h) H_point2 = H_point - 1.5 * g print( "\n3rd point in opposite direction of gradient: H_point2:\n{}".format( H_point2)) e = energy_H(H_point2) g = gradient_energy_H(H_point2) h = derivative_H_to_h(g) print("energy: ", e) print("gradient:\n{}".format(g)) print("projected gradient: ", h)
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PDF = show_pdf SHOW_PLOT = show_plot CREATE_PDF = create_pdf STD_FILL = True csv_filename = 'Fig_End-to-End_accuracy_VaryK_{}.csv'.format(CHOICE) header = ['currenttime', 'option', 'k', 'f', 'accuracy'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters rep_SameGraph = 10 # iterations on same graph initial_h0 = None # initial vector to start finding optimal H distribution = 'powerlaw' exponent = -0.3 length = 5 variant = 1 EC = True # Non-backtracking for learning ymin = 0.3 ymax = 1 xmax = 8 xtick_lab = [2,3,4,5,6,7, 8] xtick_labels = ['2', '3', '4', '5', '6', '7', '8'] ytick_lab = np.arange(0, 1.1, 0.1) f_vec = [0.9 * pow(0.1, 1 / 5) ** x for x in range(21)] k_vec = [3, 4, 5 ] rep_DifferentGraphs = 10 # iterations on different graphs err = 0 avoidNeighbors = False gradient = False pruneRandom = False convergencePercentage_W = None stratified = True label_vec = ['*'] * 10 clip_on_vec = [False] * 10 draw_std_vec = range(10) numberOfSplits = 1 linestyle_vec = ['dashed'] + ['solid'] * 10 linewidth_vec = [5, 4, 3, 3] + [3] * 10 marker_vec = [None, None, 'o', 'x', 'o', '^', 'o', 'x', 'o', '^', 'o', 'x', 'o', '^'] markersize_vec = [0, 0, 4, 8] + [6] * 10 facecolor_vec = ["#4C72B0", "#55A868", "#C44E52", "#8172B2", "#CCB974", "#64B5CD"] # -- Options with propagation variants if CHOICE == 500: ## 1k nodes n = 1000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] learning_method_vec = ['GS', 'MHE', 'DHE'] weight_vec = [10] * 3 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 2 + [True] xmin = 3. ymin = 0. ymax = 1. label_vec = ['GS', 'MCE', 'DCEr'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.03, 0.01, 0.001] k_vec = [3, 4, 5, 6] elif CHOICE == 501: ## 10k nodes n = 10000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] learning_method_vec = ['GT', 'MHE', 'DHE'] weight_vec = [10] * 3 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 2 + [True] xmin = 2. ymin = 0. ymax = 1. label_vec = ['GT', 'MCE', 'DCEr'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.03, 0.01, 0.001] k_vec = [2, 3, 4, 5] elif CHOICE == 502: ## 10k nodes n = 10000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 ymin = 0.6 ymax = 1. label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] # option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE'] # k_vec = [2, 3, 4, 5] elif CHOICE == 503: ## 10k nodes n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 ymin = 0.3 ymax = 0.9 label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [6, 7, 8] clip_on_vec = [True] * 10 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE'] # k_vec = [2, 3, 4, 5] elif CHOICE == 504: ## 10k nodes n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 xmax = 7 ymin = 0.2 ymax = 0.9 label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] # k_vec = [2, 3, 4, 5, 6, 7, 8] k_vec = [7] clip_on_vec = [True] * 10 elif CHOICE == 505: ## 10k nodes with f = 0.005 n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 xmax = 7 ymin = 0.2 ymax = 0.9 label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.005] k_vec = [2, 3, 4, 5, 6, 7] # k_vec = [7] clip_on_vec = [True] * 10 # elif CHOICE == 506: ## 10k nodes with f = 0.005 # n = 10000 # h = 3 # d = 25 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE'] # weight_vec = [10] * 10 # alpha_vec = [0] * 10 # beta_vec = [0] * 10 # gamma_vec = [0] * 10 # s_vec = [0.5] * 10 # numMaxIt_vec = [10] * 10 # randomize_vec = [False] * 4 + [True] + [False] # xmin = 2 # xmax = 7 # ymin = 0.2 # ymax = 0.9 # label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr'] # facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 # f_vec = [0.005] # k_vec = [2,3,4,5,6,7] # # k_vec = [7] # clip_on_vec = [True] * 10 elif CHOICE == 506: ## 10k nodes n = 10000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 xmax = 7 ymin = 0.2 ymax = 0.9 label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.005] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [5] clip_on_vec = [True] * 10 rep_SameGraph = 1 # iterations on same graph rep_DifferentGraphs = 1 # iterations on same graph elif CHOICE == 507: ## 10k nodes with gradient and PruneRandom n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GS', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 ymin = 0.1 ymax = 0.9 label_vec = ['GS', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [6, 7, 8] clip_on_vec = [True] * 10 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE'] # k_vec = [2, 3, 4, 5] gradient = True pruneRandom = True elif CHOICE == 508: ## 10k nodes with gradient and PruneRandom n = 1000 h = 3 d = 10 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GS', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 ymin = 0.1 ymax = 0.9 label_vec = ['GS', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [6, 7, 8] clip_on_vec = [True] * 10 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE'] # k_vec = [2, 3, 4, 5] gradient = True pruneRandom = True rep_DifferentGraphs = 1 rep_SameGraph = 1 else: raise Warning("Incorrect choice!") RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for i in range(rep_DifferentGraphs): # create several graphs with same parameters # print("\ni: {}".format(i)) for k in k_vec: # print("\nk: {}".format(k)) H0 = create_parameterized_H(k, h, symmetric=True) H0c = to_centering_beliefs(H0) a = [1.] * k alpha0 = np.array(a) alpha0 = alpha0 / np.sum(alpha0) W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for j in range(rep_SameGraph): # repeat several times for same graph # print("j: {}".format(j)) ind = None for f in f_vec: # Remove fraction (1-f) of rows from X0 (notice that different from first implementation) X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified=stratified) X2 = introduce_errors(X1, ind, err) for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \ enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)): # -- Learning if learning_method == 'GT': H2c = H0c elif learning_method == 'Holdout': H2 = estimateH_baseline_serial(X2, ind, W, numMax=numMaxIt, # ignore_rows=ind, numberOfSplits=numberOfSplits, # method=learning_method, variant=1, distance=length, EC=EC, alpha=alpha, beta=beta, gamma=gamma) H2c = to_centering_beliefs(H2) elif learning_method != 'DHE': H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize) H2c = to_centering_beliefs(H2) else: H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize, gradient=gradient, randomrestarts=pruneRandom) H2c = to_centering_beliefs(H2) # -- Propagation X2c = to_centering_beliefs(X2, ignoreZeroRows=True) # try without eps_max = eps_convergence_linbp_parameterized(H2c, W, method='noecho', alpha=alpha, beta=beta, gamma=gamma, X=X2) eps = s * eps_max try: F, actualIt, actualPercentageConverged = \ linBP_symmetric_parameterized(X2, W, H2c * eps, method='noecho', alpha=alpha, beta=beta, gamma=gamma, numMaxIt=numMaxIt, convergencePercentage=convergencePercentage_W, debug=2) except ValueError as e: print ( "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h)) else: accuracy_X = matrix_difference(X0, F, ignore_rows=ind) tuple = [str(datetime.datetime.now())] text = [option_vec[option_index], k, f, accuracy_X] # text = ['' if v is None else v for v in text] # TODO: test with vocabularies # text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option_vec[option_index], f, actualIt, accuracy_X)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # -- Aggregate repetitions df2 = df1.groupby(['option', 'k', 'f']).agg \ ({'accuracy': [np.mean, np.std, np.size, np.median], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'accuracy_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) # -- Pivot table df3 = pd.pivot_table(df2, index=['f', 'k'], columns=['option'], values=[ 'accuracy_mean', 'accuracy_std'] ) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(100))) # X_f = k_vec X_f = df3['k'].values # read k from values instead Y_hash = defaultdict(dict) Y_hash_std = defaultdict(dict) for f in f_vec: for option in option_vec: Y_hash[f][option] = list() Y_hash_std[f][option] = list() for f in f_vec: for option in option_vec: Y_hash[f][option] = df3.loc[df3['f'] == f]['accuracy_mean_{}'.format(option)].values Y_hash_std[f][option] = df3.loc[df3['f'] == f]['accuracy_std_{}'.format(option)].values if CREATE_PDF or SHOW_PLOT or SHOW_PDF: # -- Setup figure fig_filename = 'Fig_End-to-End_accuracy_varyK_{}.pdf'.format(CHOICE) mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']}) mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) opt_f_vecs = [(option, f) for option in option_vec for f in f_vec] for ((option, f), color, linewidth, clip_on, linestyle, marker, markersize) in \ zip(opt_f_vecs, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec): # label = learning_method_vec[option_vec.index(option)] label = label_vec[option_vec.index(option)] # label = label + " " + str(f) if STD_FILL: # print((X_f)) # print(Y_hash[f][option]) ax.fill_between(X_f, Y_hash[f][option] + Y_hash_std[f][option], Y_hash[f][option] - Y_hash_std[f][option], facecolor=color, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X_f, Y_hash[f][option] + Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y_hash[f][option] - Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y_hash[f][option], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker, markersize=markersize, markeredgewidth=1, markeredgecolor='black', clip_on=clip_on) if CHOICE==507: Y_f = [1/float(i) for i in X_f] ax.plot(X_f, Y_f, linewidth=2, color='black', linestyle='dashed', label='Random', zorder=4, marker='x', markersize=8, markeredgewidth=1, markeredgecolor='black', clip_on=clip_on) # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' if n < 1000: n_label='{}'.format(n) else: n_label = '{}k'.format(int(n / 1000)) title(r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format(n_label, d, h, f, distribution_label)) handles, label_vec = ax.get_legend_handles_labels() legend = plt.legend(handles, label_vec, loc='upper right', # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.xticks(xtick_lab, xtick_labels) plt.yticks(ytick_lab, ytick_lab) ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f')) # Only show ticks on the left and bottom spines ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlabel(r'Number of Classes $(k)$', labelpad=0) # labelpad=0 ylabel(r'Accuracy', labelpad=0) xlim(xmin, xmax) ylim(ymin, ymax) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PLOT: plt.show() if SHOW_PDF: showfig(join(figure_directory, fig_filename))
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False): CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot SHOW_PDF = show_pdf CREATE_PDF = create_pdf STD_FILL = True # SHORTEN_LENGTH = False fig_filename = 'Fig_homophily_{}.pdf'.format(CHOICE) csv_filename = 'Fig_homophily_{}.csv'.format(CHOICE) header = ['currenttime', 'option', 'f', 'accuracy'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters k = 3 rep_DifferentGraphs = 1 rep_SameGraph = 2 initial_h0 = None distribution = 'powerlaw' exponent = -0.3 length = 5 constraint = True variant = 1 EC = True # Non-backtracking for learning global f_vec, labels, facecolor_vec s = 0.5 err = 0 numMaxIt = 10 avoidNeighbors = False convergencePercentage_W = None stratified = True clip_on_vec = [True] * 10 draw_std_vec = range(10) ymin = 0.3 ymax = 1 xmin = 0.001 xmax = 1 xtick_lab = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1] xtick_labels = ['1e-5', '0.01\%', '0.1\%', '1\%', '10\%', '100\%'] ytick_lab = np.arange(0, 1.1, 0.1) linestyle_vec = ['dashed'] + ['solid'] * 10 linewidth_vec = [5, 2, 3, 3, 3, 3] + [3]*10 marker_vec = [None, '^', 'v', 'o', '^'] + [None]*10 markersize_vec = [0, 8, 8, 8, 6, 6] + [6]*10 facecolor_vec = ['black', "#C44E52", "#64B5CD"] # -- Options with propagation variants if CHOICE == 101: n = 10000 h = 3 d = 15 f_vec = [0.9 * pow(0.1, 1 / 5) ** x for x in range(21)] option_vec = ['opt1', 'opt2', 'opt3'] learning_method_vec = ['GT','DHE','Homophily'] weight_vec = [None] + [10] + [None] randomize_vec = [None] + [True] + [None] xmin = 0.001 ymin = 0.3 ymax = 1 labels = ['GS', 'DCEr', 'Homophily'] else: raise Warning("Incorrect choice!") a = 1 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for i in range(rep_DifferentGraphs): # create several graphs with same parameters W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for j in range(rep_SameGraph): # repeat several times for same graph # print("Graph:{} and j: {}".format(i,j)) ind = None for f in f_vec: X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified=stratified) X2 = introduce_errors(X1, ind, err) for option_index, (option, learning_method, weights, randomize) in \ enumerate(zip(option_vec, learning_method_vec, weight_vec, randomize_vec)): # -- Learning if learning_method == 'GT': H2 = H0 elif learning_method == 'Homophily': H2 = np.identity(k) elif learning_method == 'DHE': H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize, constraints=constraint) # print("learning_method:", learning_method) # print("H:\n{}".format(H2)) # -- Propagation H2c = to_centering_beliefs(H2) X2c = to_centering_beliefs(X2, ignoreZeroRows=True) try: eps_max = eps_convergence_linbp_parameterized(H2c, W, method='noecho', X=X2) eps = s * eps_max F, actualIt, actualPercentageConverged = \ linBP_symmetric_parameterized(X2, W, H2c * eps, method='noecho', numMaxIt=numMaxIt, convergencePercentage=convergencePercentage_W, debug=2) except ValueError as e: print ( "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h)) else: accuracy_X = matrix_difference_classwise(X0, F, ignore_rows=ind) tuple = [str(datetime.datetime.now())] text = [option_vec[option_index], f, accuracy_X] tuple.extend(text) # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option_vec[option_index], f, actualIt, accuracy_X)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) desred_decimals = 7 df1['f'] = df1['f'].apply(lambda x: round(x,desred_decimals)) # rounding due to different starting points # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # Aggregate repetitions df2 = df1.groupby(['option', 'f']).agg \ ({'accuracy': [np.mean, np.std, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'accuracy_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(10))) # Pivot table df3 = pd.pivot_table(df2, index=['f'], columns=['option'], values=['accuracy_mean', 'accuracy_std'] ) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(10))) # Extract values X_f = df3['f'].values # plot x values Y=[] Y_std=[] for option in option_vec: Y.append(df3['accuracy_mean_{}'.format(option)].values) if STD_FILL: Y_std.append(df3['accuracy_std_{}'.format(option)].values) if SHORTEN_LENGTH: SHORT_FACTOR = 2 ## KEEP EVERY Nth ELEMENT X_f = np.copy(X_f[list(range(0, len(X_f), SHORT_FACTOR)), ]) for i in range(len(Y)): Y[i] = np.copy(Y[i][list(range(0, len(Y[i]), SHORT_FACTOR)), ]) if STD_FILL: Y_std[i] = np.copy(Y_std[i][list(range(0, len(Y_std[i]), SHORT_FACTOR)),]) if CREATE_PDF or SHOW_PLOT or SHOW_PDF: # -- Setup figure mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']}) mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['font.size'] = 16 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Drawing if STD_FILL: for choice, (option, facecolor) in enumerate(zip(option_vec, facecolor_vec)): ax.fill_between(X_f, Y[choice] + Y_std[choice], Y[choice] - Y_std[choice], facecolor=facecolor, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X_f, Y[choice] + Y_std[choice], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y[choice] - Y_std[choice], linewidth=0.5, color='0.8', linestyle='solid') for choice, (option, label, color, linewidth, clip_on, linestyle, marker, markersize) in \ enumerate(zip(option_vec, labels, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec)): P = ax.plot(X_f, Y[choice], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker, markersize=markersize, markeredgewidth=1, clip_on=clip_on, markeredgecolor='black') plt.xscale('log') # -- Title and legend distribution_label = '$' if distribution == 'uniform': distribution_label = ',$uniform' n_label = '{}k'.format(int(n / 1000)) if n < 1000: n_label='{}'.format(n) a_label = '' if a != 1: a_label = ', a\!=\!{}'.format(a) titleString = r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}{}{}'.format(n_label, d, h, a_label, distribution_label) plt.title(titleString) handles, labels = ax.get_legend_handles_labels() legend = plt.legend(handles, labels, loc='upper left', # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 plt.xticks(xtick_lab, xtick_labels) plt.yticks(ytick_lab, ytick_lab) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f')) grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlabel(r'Label Sparsity $(f)$', labelpad=0) # labelpad=0 ylabel(r'Accuracy', labelpad=0) xlim(xmin, xmax) ylim(ymin, ymax) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PLOT: plt.show() if SHOW_PDF: showfig(join(figure_directory, fig_filename)) # shows actually created PDF