def load_system(folderstr,timestr): # Import dirname_in = os.path.join(folderstr,timestr) filename_only = 'system_init.pickle' SS = pickle_import(os.path.join(dirname_in,filename_only)) #Export timestr = str(time()).replace('.','p') dirname_out = os.path.join('systems',timestr) SS.dirname = dirname_out filename_out = os.path.join(dirname_out,filename_only) pickle_export(dirname_out, filename_out, SS) return SS
def routine_gen(): # SS = gen_system_example_suspension() timestr = '1558459899p686552_example_suspension_model_known' folderstr = 'example_systems' SS1 = load_system(folderstr,timestr) check_olmss(SS1) # Policy gradient setup t_start = time() K0_method = 'are_perturbed' K0 = set_initial_gains(SS1,K0_method=K0_method) PGO = policy_gradient_setup(SS1) filename_out = 'policy_gradient_options.pickle' path_out = os.path.join(SS1.dirname,filename_out) pickle_export(SS1.dirname,path_out,PGO) t_end = time() print('Initialization completed after %.3f seconds' % (t_end-t_start)) SS1,histlist1 = run_policy_gradient(SS1,PGO) # Find optimal control ignoring noise SS2 = copy(SS1) SS2.set_a(np.zeros_like(SS2.a)) SS2.set_b(np.zeros_like(SS2.b)) SS2.setK(K0) PGO.eta = 1e-5 PGO.epsilon = (1e-1)*SS2.Kare.size SS2,histlist2 = run_policy_gradient(SS2,PGO) SS1.setK(SS2.Kare) SS1.setK(SS2.K) dirname_in = os.path.join(folderstr,timestr) chist_data = calc_comparison_costs(SS1,SS2,histlist1,histlist2) dirname_out = copy(dirname_in) filename_out = 'chist_data.pickle' path_out = os.path.join(dirname_out,filename_out) pickle_export(dirname_out,path_out,chist_data) plot_results(SS1,SS2,chist_data,dirname_in)
def gen_system_mult(n=8,m=8,safety_margin=0.3,noise='weak', mult_noise_method='random',SStype='ER', seed=None,saveSS=True): timestr = str(time()).replace('.','p') dirname_out = os.path.join('systems',timestr) if seed is not None: set_rng_seed(seed) if SStype == 'random': A,B = gen_system_AB_rand(n,m,safety_margin) elif SStype == 'ER': A,B = gen_system_AB_erdos_renyi(n,dirname_out=dirname_out) m = B.shape[1] elif SStype == 'example': A = np.array([[0.8,0.3],[-0.2,0.7]]) B = np.array([[0.5,0.3]]).T Q = np.eye(2) R = np.eye(1) S0 = np.eye(2) Aa = np.array([[0.2,0.3],[0.2,0.3]]) Aa = Aa[:,:,np.newaxis] Bb = np.array([[0.2,0.3]]).T Bb = Bb[:,:,np.newaxis] a = np.array([[0.3]]) b = np.array([[0.3]]) SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0) SS.dirname = dirname_out filename_only = 'system_init.pickle' filename_out = os.path.join(dirname_out,filename_only) pickle_export(dirname_out, filename_out, SS) return SS # LQR cost matrices Q = np.eye(n) # Q = randn(n,n) # Q = np.dot(Q,Q') R = np.eye(m) # R = randn(m,m) # R = np.dot(R,R') # Initial state distribution covariance # S0 = randn(n,n) # S0 = np.dot(S0,S0') S0 = np.eye(n) # Multiplicative noise data p = 2 # Number of multiplicative noises on A q = 2 # Number of multiplicative noises on B if mult_noise_method == 'random': Aa = randn(n,n,p) Bb = randn(n,m,q) elif mult_noise_method == 'rowcol': # Pick a random row and column Aa = np.zeros([n,n,p]) Bb = np.zeros([n,m,q]) Aa[randint(n),:,0] = np.ones(n) Aa[:,randint(n),1] = np.ones(n) Bb[randint(n),:,0] = np.ones(m) Bb[:,randint(m),1] = np.ones(n) elif mult_noise_method == 'random_plus_rowcol': Aa = 0.3*randn(n,n,p) Bb = 0.3*randn(n,m,q) # Pick a random row and column Aa[randint(n),:,0] = np.ones(n) Aa[:,randint(n),1] = np.ones(n) Bb[randint(n),:,0] = np.ones(m) Bb[:,randint(m),1] = np.ones(n) incval = 1.05 decval = 1.00*(1/incval) weakval = 0.90 # a = randn([p,1]) # b = randn([q,1]) a = np.ones([p,1]) b = np.ones([q,1]) a = a*(float(1)/(p*n**2)) # scale as rough heuristic b = b*(float(1)/(q*m**2)) # scale as rough heuristic # noise = 'weak' if noise=='weak' or noise=='critical': # Ensure near-critically mean square stabilizable # increase noise if not P,Kare = dare_mult(A,B,a,Aa,b,Bb,Q,R,show_warn=False) mss = True while mss: if Kare is None: mss = False else: a = incval*a b = incval*b P,Kare = dare_mult(A,B,a,Aa,b,Bb,Q,R,show_warn=False) # Extra mean square stabilizability margin a = decval*a b = decval*b if noise == 'weak': # print('Multiplicative noise set weak') a = weakval*a b = weakval*b elif noise=='olmss_weak' or noise=='olmss_critical': # Ensure near-critically open-loop mean-square stable # increase noise if not K0 = np.zeros([m,n]) P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P') mss = True while mss: if P is None: mss = False else: a = incval*a b = incval*b P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P') # Extra mean square stabilizability margin a = decval*a b = decval*b if noise == 'olmss_weak': # print('Multiplicative noise set to open-loop mean-square stable') a = weakval*a b = weakval*b elif noise=='olmsus': # Ensure near-critically open-loop mean-square unstable # increase noise if not K0 = np.zeros([m,n]) P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P') mss = True while mss: if P is None: mss = False else: a = incval*a b = incval*b P = dlyap_mult(A,B,K0,a,Aa,b,Bb,Q,R,S0,matrixtype='P') # # Extra mean square stabilizability margin # a = decval*a # b = decval*b # print('Multiplicative noise set to open-loop mean-square unstable') elif noise=='none': print('MULTIPLICATIVE NOISE SET TO ZERO!!!') a = np.zeros([p,1]) # For testing only - no noise b = np.zeros([q,1]) # For testing only - no noise else: raise Exception('Invalid noise setting chosen') SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0) if saveSS: SS.dirname = dirname_out filename_only = 'system_init.pickle' filename_out = os.path.join(dirname_out,filename_only) pickle_export(dirname_out, filename_out, SS) return SS
def routine_gen(): # folderstr = 'systems' # timestr = str(time()).replace('.','p') # dirname_in = os.path.join(folderstr,timestr) # create_directory(dirname_in) nSS = 20 # Number of independent runs # Settings for backtracking line search stepsize_method = 'backtrack' nr = 100000 PGO_dict = {'gradient_model_free': {'eta': 1e-1, 'max_iters': 20, 'exact': False}, 'gradient': {'eta': 1e-1, 'max_iters': 20, 'exact': True}, 'natural_gradient': {'eta': 1e-1, 'max_iters': 20, 'exact': True}, 'gauss_newton': {'eta': 1/2, 'max_iters': 20, 'exact': True}} all_dict = {key: {'costnorm':[],'gradnorm':[]} for key in PGO_dict.keys()} # Generate system from scratch seed = 1 # SS = gen_system_erdos_renyi(n=4, # diffusion_constant=1.0, # leakiness_constant=0.1, # time_constant=0.05, # leaky=True, # seed=seed) # SS = gen_system_erdos_renyi(n=2, # diffusion_constant=1.0, # leakiness_constant=0.1, # time_constant=0.05, # leaky=True, # seed=seed) # Load system folderstr = 'example_systems' timestr = '1587086073p9661696_example_network_all_steps_backtrack' dirname_in = os.path.join(folderstr, timestr) filename_in = os.path.join(dirname_in, 'system_init.pickle') SS = pickle_import(filename_in) for i in range(nSS): # Policy gradient setup K0_method = 'zero' K0 = set_initial_gains(SS,K0_method=K0_method) sleep(0.5) for step_direction in PGO_dict: SS.setK(K0) t_start = time() eta = PGO_dict[step_direction]['eta'] max_iters = PGO_dict[step_direction]['max_iters'] exact = PGO_dict[step_direction]['exact'] PGO = policy_gradient_setup(SS, eta, step_direction, max_iters, exact, stepsize_method, nr) t_end = time() print('Initialization completed after %.3f seconds' % (t_end-t_start)) SS, histlist = run_policy_gradient(SS,PGO) costnorm = (histlist[2]/SS.ccare)-1 gradnorm = la.norm(histlist[1], ord='fro', axis=(0,1)) all_dict[step_direction]['costnorm'].append(costnorm) all_dict[step_direction]['gradnorm'].append(gradnorm) filename_out = 'monte_carlo_all_dict.pickle' path_out = os.path.join(dirname_in,filename_out) pickle_export(dirname_in,path_out,all_dict) plot_data(all_dict,dirname_in)
dirname_out = os.path.join('systems',timestr) SS.dirname = dirname_out filename_out = os.path.join(dirname_out,filename_only) pickle_export(dirname_out, filename_out, SS) return SS ############################################################################### if __name__ == "__main__": SS = gen_system_mult(n=2, m=1, safety_margin=0.3, noise='olmss_weak', mult_noise_method='random', SStype='random') # Policy gradient setup t_start = time() # K0_method = 'are_perturbed' K0_method = 'zero' set_initial_gains(SS,K0_method=K0_method) PGO = policy_gradient_setup(SS) filename_out = 'policy_gradient_options.pickle' path_out = os.path.join(SS.dirname,filename_out) pickle_export(SS.dirname,path_out,PGO) t_end = time() print('Initialization completed after %.3f seconds' % (t_end-t_start)) run_policy_gradient(SS,PGO)
def gen_system_example_suspension(): n = 4 m = 1 m1 = 500 m2 = 100 k1 = 5000 k2 = 20000 b1 = 200 b2 = 4000 A = np.array([[0,1,0,0], [-(b1*b2)/(m1*m2),0,((b1/m1)*((b1/m1)+(b1/m2)+(b2/m2)))-(k1/m1),-(b1/m1)], [b2/m2,0,-((b1/m1)+(b1/m2)+(b2/m2)),1], [k2/m2,0,-((k1/m1)+(k1/m2)+(k2/m2)),0]]) B = 1000*np.array([[0], [1/m1], [0], [(1/m1)+(1/m2)]]) C = np.eye(n) D = np.zeros([n,m]) sysc = (A,B,C,D) sysd = scipy.signal.cont2discrete(sysc,dt=0.5,method='bilinear') A = sysd[0] B = sysd[1] # Multiplicative noise data p = 4 q = 1 a = 0.1*np.ones(p) b = 0.2*np.ones(q) Aa = np.zeros([n,n,p]) for i in range(p): Aa[:,i,i] = np.ones(n) Bb = np.zeros([n,m,q]) for j in range(q): Bb[:,j,j] = np.ones(n) Q = np.eye(n) R = np.eye(m) S0 = np.eye(n) # Ensure near-critically mean square stabilizable - increase noise if not mss = False SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0) while not mss: if SS.ccare < np.inf: mss = True else: a = a*0.95 b = b*0.95 SS = LQRSysMult(A,B,a,Aa,b,Bb,Q,R,S0) timestr = str(time()).replace('.','p') dirname_out = os.path.join('systems',timestr) SS.dirname = dirname_out filename_only = 'system_init.pickle' filename_out = os.path.join(dirname_out,filename_only) pickle_export(dirname_out, filename_out, SS) return SS
def routine_gen(): folderstr = 'systems' timestr = str(time()).replace('.', 'p') dirname_in = os.path.join(folderstr, timestr) create_directory(dirname_in) nSS = 20 # Number of independent runs/systems all_dict = { 'gradient': { 'costnorm': [], 'gradnorm': [] }, 'natural_gradient': { 'costnorm': [], 'gradnorm': [] }, 'gauss_newton': { 'costnorm': [], 'gradnorm': [] } } for i in range(nSS): SS = gen_system_mult(n=10, m=10, safety_margin=0.3, noise='olmss_weak', mult_noise_method='random', SStype='random', seed=-1, saveSS=False) # Policy gradient setup K0_method = 'zero' K0 = set_initial_gains(SS, K0_method=K0_method) step_direction_dict = { 'gradient': { 'eta': 1e-5, 'max_iters': 5000 }, 'natural_gradient': { 'eta': 1e-4, 'max_iters': 2000 }, 'gauss_newton': { 'eta': 1 / 2, 'max_iters': 10 } } sleep(1) for step_direction in step_direction_dict: SS.setK(K0) t_start = time() eta = step_direction_dict[step_direction]['eta'] max_iters = step_direction_dict[step_direction]['max_iters'] PGO = policy_gradient_setup(SS, eta, step_direction, max_iters) t_end = time() print('Initialization completed after %.3f seconds' % (t_end - t_start)) SS, histlist = run_policy_gradient(SS, PGO) costnorm = (histlist[2] / SS.ccare) - 1 gradnorm = la.norm(histlist[1], ord='fro', axis=(0, 1)) all_dict[step_direction]['costnorm'].append(costnorm) all_dict[step_direction]['gradnorm'].append(gradnorm) filename_out = 'monte_carlo_all_dict.pickle' path_out = os.path.join(dirname_in, filename_out) pickle_export(dirname_in, path_out, all_dict) plot_data(all_dict, dirname_in)
printout('average time per gradient estimate (s)', textfile) printout("%.3f" % ((t_end - t_start) / n_iterc), textfile) printout('', textfile) return G_act, G_est_all, error_angle_all, error_scale_all, error_norm_all ############################################################################### # Main ############################################################################### timestr = str(time()).replace('.', 'p') dirname_out = timestr create_directory(dirname_out) textfilename_only = "noiseless.txt" textfile_out = os.path.join(dirname_out, textfilename_only) textfile = open(textfile_out, "w+") data_noiseless = gradient_estimate_variance(noise=False, textfile=textfile) textfile.close() filename_only = 'data_noiseless.pickle' filename_out = os.path.join(dirname_out, filename_only) pickle_export(dirname_out, filename_out, data_noiseless) textfilename_only = "noisy.txt" textfile_out = os.path.join(dirname_out, textfilename_only) textfile = open(textfile_out, "w+") data_noisy = gradient_estimate_variance(noise=True, textfile=textfile) textfile.close() filename_only = 'data_noisy.pickle' filename_out = os.path.join(dirname_out, filename_only) pickle_export(dirname_out, filename_out, data_noisy)
def gen_system_erdos_renyi(n, diffusion_constant=1.0, leakiness_constant=0.1, time_constant=0.05, leaky=True, seed=None, detailed_outputs=False, dirname_out='.'): npr.seed(seed) timestr = str(time()).replace('.', 'p') dirname_out = os.path.join('systems', timestr) # ER probability # crp = 7.0 # erp = (np.log(n+1)+crp)/(n+1) # almost surely connected prob=0.999 mean_degree = 4.0 # should be > 1 for giant component to exist erp = mean_degree / (n - 1.0) n_edges = 0 # Create random Erdos-Renyi graph # Adjacency matrix adjacency = np.zeros([n, n]) for i in range(n): for j in range(i + 1, n): if npr.rand() < erp: n_edges += 1 adjacency[i, j] = npr.randint(low=1, high=4) adjacency[j, i] = np.copy(adjacency[i, j]) # Degree matrix degree = np.diag(adjacency.sum(axis=0)) # Graph Laplacian laplacian = degree - adjacency # Continuous-time dynamics matrices Ac = -laplacian * diffusion_constant Bc = np.eye( n ) / time_constant # normalize just to make B = np.eye(n) later in discrete-time if leaky: Fc = leakiness_constant * np.eye(n) Ac = Ac - Fc # Plot visualize_graph_ring(adjacency, n, dirname_out) # Forward Euler discretization A = np.eye(n) + Ac * time_constant B = Bc * time_constant n = np.copy(n) m = np.copy(n) # Multiplicative noises a = 0.005 * npr.randint(low=1, high=5, size=n_edges) * np.ones(n_edges) Aa = np.zeros([n, n, n_edges]) k = 0 for i in range(n): for j in range(i + 1, n): if adjacency[i, j] > 0: Aa[i, i, k] = 1 Aa[j, j, k] = 1 Aa[i, j, k] = -1 Aa[j, i, k] = -1 k += 1 b = 0.05 * npr.randint(low=1, high=5, size=n) * np.ones(n) Bb = np.zeros([n, m, m]) for i in range(n): Bb[i, i, i] = 1 Q = np.eye(n) R = np.eye(m) S0 = np.eye(n) SS = LQRSysMult(A, B, a, Aa, b, Bb, Q, R, S0) SS.dirname = dirname_out filename_only = 'system_init.pickle' filename_out = os.path.join(dirname_out, filename_only) pickle_export(dirname_out, filename_out, SS) return SS
def traverse_sparsity(SS, PGO, optiongroup, optiongroup_dir): # Sparsity traversal settings sparsity_required = 0.95 sparse_thresh = 0.001 plt.ioff() img_folder = 'sparsity_images' img_dirname_out = os.path.join(optiongroup_dir, img_folder) create_directory(img_dirname_out) filename_out_pre = 'sparsity_are' plot_sparse(img_dirname_out, filename_out_pre, SS.Kare, SS.ccare, sparse_thresh, PGO, are_flag=True) regweight_ratio = np.sqrt(2) if optiongroup == 'proximal_gradient_GN_PI': eta_ratio = 1 else: eta_ratio = (1 / regweight_ratio)**np.sqrt( regweight_ratio) # empirically works well sparsity_data = [] img_pattern = 'sparsity%02d' iterc = 0 iterc_max = 18 stop = False sparsity_prev = 0 sparsity_max = 0 while not stop: # Policy gradient t_start = time() SS, hist_list = run_policy_gradient(SS, PGO) t_end = time() filename_out = 'system_%d_regweight_%.3f.pickle' % (iterc, PGO.regweight) filename_out = filename_out.replace('.', 'p') path_out = os.path.join(optiongroup_dir, filename_out) pickle_export(optiongroup_dir, path_out, SS) # Plotting filename_out_pre = img_pattern % iterc ax_im, ax_im_bw, ax_hist, img, img_bw, cbar, sparsity = plot_sparse( img_dirname_out, filename_out_pre, SS.K, SS.c, sparse_thresh, PGO) plt.close('all') sparsity_data.append( [PGO.regweight, sparsity, SS.K, SS.c, t_end - t_start, hist_list]) if sparsity > sparsity_required: stop = True # if sparsity < sparsity_prev: # stop = True # if sparsity_max > 0.60 and sparsity < 0.05: # stop = True if sparsity_max > 0.60 and sparsity < sparsity_prev: stop = True if iterc >= iterc_max - 1: stop = True PGO.eta *= eta_ratio PGO.regweight *= regweight_ratio sparsity_prev = sparsity sparsity_max = np.max([sparsity, sparsity_max]) iterc += 1 # input("Press [enter] to continue.") # vidname = 'sparsity_evolution' # vidsave(img_folder,img_pattern,vidname) filename_out = 'sparsity_data.pickle' path_out = os.path.join(optiongroup_dir, filename_out) pickle_export(optiongroup_dir, path_out, sparsity_data) return sparsity_data
# timestr = '1556656014p3178775_n50_olmss_vec1' # SS = load_system(timestr) check_olmss(SS) optiongroup_list = ['gradient', 'subgradient', 'proximal_gradient'] # optiongroup_list = ['gradient'] # optiongroup_list = ['subgradient'] # optiongroup_list = ['proximal_gradient'] for optiongroup in optiongroup_list: optiongroup_dir = os.path.join(SS.dirname, optiongroup) create_directory(optiongroup_dir) # Policy gradient setup t_start = time() K0_method = 'are' set_initial_gains(SS, K0_method=K0_method) PGO = policy_gradient_setup(SS, optiongroup) filename_out = 'policy_gradient_options.pickle' path_out = os.path.join(optiongroup_dir, filename_out) pickle_export(optiongroup_dir, path_out, PGO) t_end = time() print('Initialization completed after %.3f seconds' % (t_end - t_start)) # Sparsity traversal traverse_sparsity(SS, PGO, optiongroup, optiongroup_dir) # run_policy_gradient(SS,PGO)