def load_data(path_data, action_space, force_reload=False): path_data_processed = path_data + ', processed' file_data_processed = path_data_processed + '/data' if not force_reload and os.path.exists(file_data_processed): print(f'load data from {file_data_processed}') vs = load_vars(file_data_processed) return vs print(f'load data from {path_data}') tools.mkdir(path_data_processed) files = tools.get_files(path_rel=path_data, sort=True) # inputs_final, outputs_final = np.zeros((0, 2)), np.zeros((0, 4)) inputs_final, outputs_final = np.zeros((0, 2 * action_space)), np.zeros((0, 4 * action_space)) counts = np.zeros((len(files)), dtype=np.int) for ind, f in enumerate(files): mu0s_ats_batch, logsigma0s_batch, ress = load_vars(f) inputs = np.concatenate((mu0s_ats_batch, logsigma0s_batch), axis=-1) max_values = np.array([res['max'].x for res in ress]) min_values = np.array([res['min'].x for res in ress]) outputs = np.concatenate((max_values, min_values), axis=-1) inputs_final = np.concatenate((inputs_final, inputs)) # shape:(None, 2) outputs_final = np.concatenate((outputs_final, outputs)) # shape:(None, 4) counts[ind] = mu0s_ats_batch.shape[0] weights = [] cnt_normalize = counts.mean() for cnt in counts: weight = cnt_normalize * 1. / cnt * np.ones(cnt) weights.append(weight) weights = np.concatenate(weights, axis=0) # final = np.concatenate((inputs_final, outputs_final), axis=-1) # --- delete nan and inf # final = final[~np.isnan(final).any(axis=1)] # final = final[~np.isinf(final).any(axis=1)] inds_reserve = np.logical_and(~np.isnan(outputs_final).any(axis=1), ~np.isinf(outputs_final).any(axis=1)) inputs_final = inputs_final[inds_reserve] outputs_final = outputs_final[inds_reserve] weights = weights[inds_reserve] # --- shuffle # np.random.shuffle(final) N = inputs_final.shape[0] inds_shuffle = np.random.permutation(N) inputs_final = inputs_final[inds_shuffle] outputs_final = outputs_final[inds_shuffle] weights = weights[inds_shuffle] # inputs_final, outputs_final = np.split(final, indices_or_sections=[2], axis=-1) ind_split = -500 train_x, train_y, train_weight = \ inputs_final[:ind_split], outputs_final[:ind_split], weights[:ind_split] eval_x, eval_y, eval_weight = \ inputs_final[ind_split:], outputs_final[ind_split:], weights[ind_split:] save_vars(file_data_processed, train_x, train_y, train_weight, eval_x, eval_y, eval_weight) return train_x, train_y, train_weight, eval_x, eval_y, eval_weight
def get_tabular(self, delta): save_path = f'{path_root_tabular}/{delta:.16f}_atari' if delta in self.deltas_dict: pass # TODO: file lock elif os.path.exists(save_path) and os.path.getsize(save_path) > 0: self.deltas_dict[delta] = tools.load_vars(save_path) else: with tools_process.FileLocker( f'{path_root_tabluar_locker}/{delta:.16f}'): if os.path.exists( save_path) and os.path.getsize(save_path) > 0: self.deltas_dict[delta] = tools.load_vars(save_path) else: self.deltas_dict[delta] = self.create_tabular(delta) tools.save_vars(save_path, self.deltas_dict[delta]) return self.deltas_dict[delta]
def load_data_normal(path_data, USE_MULTIPROCESSING=True): path_save = f'{path_data}/train_preprocessed_reduce_v3' if os.path.exists(f'{path_save}/data'): print(f'load data from {path_save}/data') vs = load_vars(f'{path_save}/data') return vs tools.mkdir(f'{path_data}/train_preprocessed') files = tools.get_files(path_rel=path_data, only_sub=False, sort=False, suffix='.pkl') actions, deltas, max_mu_logsigma, min_mu_logsigma = [], [], [], [] for ind, f in enumerate(files[:1]): a_s_batch, _, _, ress_tf = load_vars(f) actions.append(a_s_batch) deltas.append(np.ones_like(a_s_batch) * ress_tf.delta) min_mu_logsigma.append(ress_tf.x.min) max_mu_logsigma.append(ress_tf.x.max) actions = np.concatenate(actions, axis=0) deltas = np.concatenate(deltas, axis=0) min_mu_logsigma = np.concatenate(min_mu_logsigma, axis=0) max_mu_logsigma = np.concatenate(max_mu_logsigma, axis=0) min_mu_tfopt, _ = np.split(min_mu_logsigma, indices_or_sections=2, axis=-1) max_mu_tfopt, _ = np.split(max_mu_logsigma, indices_or_sections=2, axis=-1) time0 = time.time() calculate_mu = get_calculate_mu_func(True) # TODO: 以下为mu_logsigma_fsolve if USE_MULTIPROCESSING: p = multiprocessing.Pool(4) min_mu_fsolve = p.map(calculate_mu, zip(min_mu_tfopt, actions, deltas)) max_mu_fsolve = p.map(calculate_mu, zip(max_mu_tfopt, actions, deltas)) else: min_mu_fsolve = list(map(calculate_mu, zip(min_mu_tfopt, actions, deltas))) max_mu_fsolve = list(map(calculate_mu, zip(max_mu_tfopt, actions, deltas))) min_mu_fsolve = [_[0] for _ in min_mu_fsolve] max_mu_fsolve = [_[0] for _ in max_mu_fsolve] # f_mu_to_logsigma = lambda m, a: (m - a) * (m ** 2 - a * u - 1) / a time1 = time.time() print(time1 - time0) mu_tf_opt = np.concatenate((min_mu_tfopt, max_mu_tfopt), axis=1) mu_fsolve = np.stack( (np.concatenate(min_mu_fsolve, axis=0).squeeze(), np.concatenate(max_mu_fsolve, axis=0).squeeze()) , axis=1) print(mu_tf_opt - mu_fsolve) # exit() inds_shuffle = np.random.permutation(actions.shape[0]) all_ = np.concatenate((actions, deltas, mu_fsolve), axis=1)[inds_shuffle] all_ = all_[~np.isnan(all_).any(axis=1)] inputs_all, outputs_all = np.split(all_, indices_or_sections=2, axis=1) # (actions, deltas) (lambda_min_true, lambda_max_true) weights = np.ones(shape=(inputs_all.shape[0],)) print(outputs_all.shape) ind_split = -3000 train_x, train_y, train_weight = \ inputs_all[:ind_split], outputs_all[:ind_split], weights[:ind_split] eval_x, eval_y, eval_weight = \ inputs_all[ind_split:], outputs_all[ind_split:], weights[ind_split:] save_vars(f'{path_save}/data', train_x, train_y, train_weight, eval_x, eval_y, eval_weight) return train_x, train_y, train_weight, eval_x, eval_y, eval_weight,
def tes_data(): from baselines.common.tools import load_vars, save_vars import matplotlib.pyplot as plt dim = 1 path_root = '/home/hugo/Desktop/wxm/KL2Clip' path_data = f'{path_root}/data/train' # files = tools.get_files(path_rel=f'/home/hugo/Desktop/wxm/KL2Clip/data/dim={dim}, delta=0.01') # files = ['/root/d/e/et/baselines/KL2Clip/data/dim=2, delta=0.01/logsigma0=[0. 0.].pkl'] # TODO tmp # actions, deltas, max_mu_logsigma, min_mu_logsigma = [], [], [], [] # path_data = '/home/hugo/Desktop/wxm/KL2Clip/data/train/dim=1, delta=0.027, train' files = tools.get_files(path_rel=path_data, only_sub=False, sort=False, suffix='.pkl') kl2clip_tabular = KL2Clip(dim=dim, opt1Dkind='tabular') kl2clip_nn = KL2Clip(dim=dim, opt1Dkind='nn') for ind, f in enumerate(files[:]): actions, logsigma0, logsigma0s_batch, ress_tf = load_vars(f) actions = np.round(actions, TabularActionPrecision) # actions = np.float32(actions) # logsigma0 = np.float32(logsigma0) # logsigma0s_batch = np.float32(logsigma0s_batch) delta = ress_tf.delta x0 = np.concatenate( (np.zeros_like(logsigma0s_batch), logsigma0s_batch), axis=1) # x0 = np.float32(x0) # actions = np.float32(actions) # delta = np.float32(delta) ress_tabular = kl2clip_tabular(mu0_logsigma0_cat=x0, a=actions, delta=delta) ress_nn = kl2clip_nn(mu0_logsigma0_cat=x0, a=actions, delta=delta) # exit() # time1 = time.time() # print('time:', time1- time0) # print(actions.shape) # exit() # ratio_min_scipyfsolve, ratio_max_scipyfsolve = ress.ratio.min, ress.ratio.max # lambda_scipyfsolve = ress.lambda_scipyfsolve ratios_tfopt = ress_tf.ratio ratios_tabular = ress_tabular.ratio ratios_nn = ress_nn.ratio print(f'ress_tabular is {ress_tabular}') print(f'ress_nn is {ress_nn}') keys_ress = ['min', 'max'] name_base = f'delta: {delta}, logsigma0: {logsigma0}, delta x 100' fig = plt.figure(figsize=(20, 10)) if dim == 1: for minORmax in keys_ress: # for maximize and minimize # print('cons_scipy.fsolve: ', cons_final) # print('cons_tf.opt: ', ress_tf.con[minORmax]) # plt.scatter(actions, ratios_scipyfsolve[minORmax], label='ratio_' + minORmax + '_scipyfsolve', color='blue', # s=1) ''' mu_logsigma = ress.mu_logsigma[minORmax] cons_func = get_func_cons(batch_size=mu_logsigma.shape[0]) cons_tfopt = ress_tf.con[minORmax] cons_final = cons_func(mu_logsigma, np.array([delta], np.float32)) threshold = 1e-7 plt.scatter(actions[cons_tfopt < threshold], ratios_tfopt[minORmax][cons_tfopt < threshold], label='ratio_' + minORmax + '_tfopt-good', color='black', s=1) plt.scatter(actions[cons_tfopt >= threshold], ratios_tfopt[minORmax][cons_tfopt >= threshold], label='ratio_' + minORmax + '_tfopt-bad', color='pink', s=1) plt.scatter(actions[cons_final < threshold], ratios_scipyfsolve[minORmax][cons_final < threshold], label='ratio_' + minORmax + '_scipyfsolve-good', color='blue', s=1) plt.scatter(actions[cons_final >= threshold], ratios_scipyfsolve[minORmax][cons_final >= threshold], label='ratio_' + minORmax + '_scipyfsolve-bad', color='red', s=1) ''' plt.scatter(actions, ratios_tfopt[minORmax], label='ratio_' + minORmax + '_tfopt', color='black', s=1) plt.scatter(actions, ratios_tabular[minORmax], label='ratio_' + minORmax + '_tabular', color='red', s=1) plt.scatter(actions, ratios_nn[minORmax], label='ratio_' + minORmax + '_nn', color='green', s=1) elif dim == 2: ax = fig.gca(projection='3d') print(actions.shape, ratios_tfopt['max'].shape, ratios_tfopt['min'].shape) for opt_name in keys_ress: # for maximize and minimize ax.scatter(actions[:, 0], actions[:, 1], ratios_tfopt[opt_name], label='ratio_' + opt_name + '_tfopt', color='blue', s=1) ax.scatter(actions[:, 0], actions[:, 1], ratios_scipyfsolve[opt_name], label='ratio_' + opt_name + '_scipyfsolve', color='red', s=1) name = name_base + ', ratio' plt.title(name) plt.legend(loc='best') path_dir, _ = os.path.split(f) # plt.savefig(path_dir + f'/{name}.png') print('save' + path_dir + f'/delta:{delta}.png' + ' ratio') plt.show()
def tes_3d_data(): if tools.ispc('xiaoming'): path_root = '/media/root/新加卷/KL2Clip' else: path_root = '' import plt_tools from baselines.common.tools import load_vars, save_vars import matplotlib.pyplot as plt if 1: dim = 1 # tf.logging.set_verbosity(tf.logging.INFO) files = [] path_data = f'{path_root}/data/train' for dir in sorted(os.listdir(path_data)): dir_pickle = os.path.join(path_data, dir) try: file_path = os.listdir(dir_pickle)[0] if os.listdir(dir_pickle)[0].endswith('pkl') else \ os.listdir(dir_pickle)[1] except: continue files.append(os.path.join(dir_pickle, file_path)) tfoptsssss = [] scipyfsolvesssss = [] a_delta = [] # exit() # files = ['/media/root/新加卷/KL2Clip/data/train/dim=1, delta=0.0902, train/logsigma0=[0].pkl'] for ind, f in enumerate(files): # enumerate(files[1::100]): print(f) actions, _, _, ress_tf = load_vars(f) delta = ress_tf.delta # min_mu_logsigma = ress_tf.x.min # max_mu_logsigma = ress_tf.x.max ratio_min_tfopt, ratio_max_tfopt = ress_tf.ratio.min, ress_tf.ratio.max kl2clip = KL2Clip(dim=dim) x0 = np.zeros(shape=(actions.shape[0], 2), dtype=np.float32) # sort by actions inds = np.argsort(actions, axis=0) inds = inds.reshape(-1) actions = actions[inds] ratio_min_tfopt, ratio_max_tfopt = ratio_min_tfopt[ inds], ratio_max_tfopt[inds] ress = kl2clip(mu0_logsigma0_cat=x0, a=actions, delta=delta) ratio_min_scipyfsolve, ratio_max_scipyfsolve = ress.ratio.min, ress.ratio.max a_delta.append( np.concatenate((actions, delta * np.ones_like(actions)), axis=1)) tfoptsssss.append(ratio_max_tfopt) scipyfsolvesssss.append(ratio_max_scipyfsolve) save_vars('aa.pkl', a_delta, tfoptsssss, scipyfsolvesssss) a_delta, tfoptsssss, scipyfsolvesssss = load_vars('aa.pkl') def filter(arr): for ind in range(len(arr)): arr[ind] = arr[ind][0::30] return arr a_delta, tfoptsssss, scipyfsolvesssss = [ filter(item) for item in (a_delta, tfoptsssss, scipyfsolvesssss) ] a_delta = np.concatenate(a_delta, axis=0) tfoptsssss = np.concatenate(tfoptsssss, axis=0) scipyfsolvesssss = np.concatenate(scipyfsolvesssss, axis=0) fig = plt.figure() ax = fig.gca(projection='3d') ax.view_init(0, 0) ax.scatter(a_delta[:, 0], a_delta[:, 1], tfoptsssss, '_tfopt', s=1, color='black') ax.scatter(a_delta[:, 0], a_delta[:, 1], scipyfsolvesssss, '_scipyfsolve', s=1, color='red') plt_tools.set_postion() plt_tools.set_size() # plt_tools.set_equal() plt.show()
def prepare_data(dim, delta, sharelogsigma, clipcontroltype, cliprange, clip_clipratio, search_delta=False): global ress_tf_last path_data = path_root + '/KL2Clip/data/train_lambda' Name = f'dim={dim}, delta={delta}, train' path_data_processed = path_data + f'/{Name}' tools.mkdir(path_data_processed) if dim == 1: logsigma0s = np.array([0]) else: raise NotImplementedError logsigma0s = logsigma0s.reshape((-1, dim)) batch_size = 2048 mu = np.zeros((dim, )) opt = KL2Clip(dim=dim, batch_size=batch_size, sharelogsigma=sharelogsigma, clipcontroltype=clipcontroltype, cliprange=cliprange) def get_fn_sample(): mu0 = tf.placeholder(shape=[dim], dtype=tf.float32) a = tf.placeholder(shape=[batch_size, dim], dtype=tf.float32) logsigma0 = tf.placeholder(shape=[dim], dtype=tf.float32) sample_size = tf.placeholder(shape=(), dtype=tf.int32) dist = DiagGaussianPd(tf.concat((mu0, logsigma0), axis=0)) samples = dist.sample(sample_size) fn_sample = U.function([mu0, logsigma0, sample_size], samples) fn_p = U.function([mu0, logsigma0, a], dist.p(a)) return fn_sample, fn_p sess = U.make_session(make_default=True) results = [] fn_sample, fn_p = get_fn_sample() for logsigma0 in logsigma0s: prefix_save = f'{path_data_processed}/logsigma0={logsigma0}' Name_f = f"{Name},logsigma0={logsigma0}" file_fig = f'{prefix_save}.png' # a_s_batch = fn_sample( mu, logsigma0, batch_size ) a_s_batch = np.linspace(-5, 5, batch_size).reshape((-1, 1)) logsigma0s_batch = np.tile(logsigma0, (batch_size, 1)) print(a_s_batch.max(), a_s_batch.min()) # --- sort the data: have problem in 2-dim # inds = np.argsort(a_s_batch, axis=0) # inds = inds.reshape(-1) # a_s_batch = a_s_batch[inds] # logsigma0s_batch = logsigma0s_batch[inds] # tools.reset_time() # a_s_batch.fill(0) # print(a_s_batch.shape) # a_s_batch[0, :]=0 # if search_delta: # for i in range( batch_size): # a_s_batch[i,:] = 0.001 * (batch_size-i) if not os.path.exists(f'{prefix_save}.pkl'): # ress_tf = opt( mu0_logsigma0_tuple=(a_s_batch, logsigma0s_batch), a=None, delta=delta, clip_clipratio=clip_clipratio) ress_tf = opt(mu0_logsigma0_tuple=(np.zeros_like(logsigma0s_batch), logsigma0s_batch), a=a_s_batch, delta=delta, clip_clipratio=clip_clipratio) print(a_s_batch[0], ress_tf.x.max[0], ress_tf.x.min[0]) save_vars(f'{prefix_save}.pkl', a_s_batch, logsigma0, logsigma0s_batch, ress_tf) print(prefix_save) a_s_batch, logsigma0, logsigma0s_batch, ress_tf = load_vars( f'{prefix_save}.pkl') if search_delta: results.append(ress_tf) break if cliprange == clipranges[0]: # TODO tmp fig = plt.figure(figsize=(20, 10)) markers = ['^', '.'] colors = [['blue', 'red'], ['green', 'hotpink']] # for ind, opt_name in enumerate(['max']): for ind, opt_name in enumerate(['max', 'min']): # if ind == 1: # continue # --- plot tensorflow result ratios, cons = ress_tf.ratio[opt_name], ress_tf.con[opt_name] print( f'clip-{opt_name}_mean:{ratios.mean()}, clip-{opt_name}_min:{ratios.min()}, clip-{opt_name}_max:{ratios.max()}' ) if search_delta: continue if DEBUG: pass inds_good = cons <= get_ConstraintThreshold(ress_tf.delta) inds_bad = np.logical_not(inds_good) if dim == 1: if ind == 0 and 1: ps = fn_p(mu, logsigma0, a_s_batch) # +np.abs(ps.max()) + 1 ratio_new = -np.log(ps) ratio_new = ratio_new - ratio_new.min() + ratios.min() alpha = np.exp(-ps * 2) print(alpha) # plt.scatter(a_s_batch, ratio_new, s=5, label='ratio_new0') ratio_new = ratio_new.min() + alpha * (ratio_new - ratio_new.min()) # plt.scatter( a_s_batch, ratio_new, s=5, label='ratio_new1' ) # ps = -ps # ratios = ps - ps.min() + ratios.min() # print( ps.min() ) # ratios_new =np.square( a_s_batch-mu ) * np.exp( -logsigma0 ) # ratio_min = ps / (ps.max()-ps.min()) * ress_tf.ratio.min.max() # plt.scatter(a_s_batch, ratio_min, s=5, label='square') # plt.scatter(a_s_batch, 1./ratio_min, s=5, label='square') # plt.scatter(a_s_batch, 1./ratios, s=5, label='1/max') def plot_new(alpha): clip_max_new, clip_min_new = get_clip_new( alpha, ress_tf.ratio['max'], ress_tf.ratio['min'], clipcontroltype=clipcontroltype) plt.scatter(a_s_batch, clip_max_new, s=5, label=f'clip_max_{alpha}') plt.scatter(a_s_batch, clip_min_new, s=5, label=f'clip_min_{alpha}') if ind == 0: pass # plot_new(0.5) # plot_new(0.5) # plot_new(-1) plt.scatter(a_s_batch[inds_good], ratios[inds_good], label='ratio_predict-good_' + opt_name, s=5, color=colors[ind][0], marker=markers[ind]) plt.scatter(a_s_batch[inds_bad], ratios[inds_bad], label='ratio_predict-bad_' + opt_name, s=5, color=colors[ind][1], marker=markers[ind]) elif dim == 2: ax = fig.gca(projection='3d') # ax.view_init(30, 30) ax.view_init(90, 90) # ax.plot_trisurf(a_s_batch[:, 0], a_s_batch[:, 1], ratios) ax.scatter(a_s_batch[inds_good, 0], a_s_batch[inds_good, 1], ratios[inds_good], label='ratio_predict-good_' + opt_name, s=5, color=colors[ind][0], marker=markers[ind]) ax.scatter(a_s_batch[inds_bad, 0], a_s_batch[inds_bad, 1], ratios[inds_bad], label='ratio_predict-bad_' + opt_name, s=5, color=colors[ind][1], marker=markers[ind]) if dim <= 2 and not search_delta: plt.title( Name_f + f'\nstep:{ress_tf.step},rate_satisfycon:{ress_tf.rate_satisfycon_}, rate_statisfydifference_:{ress_tf.rate_statisfydifference_}, difference_max_:{ress_tf.difference_max_}' ) plt.legend(loc='best') if not DEBUG: plt.savefig(file_fig) opt.close() if dim <= 2 and not search_delta: if DEBUG: if cliprange == clipranges[-1]: plt_tools.set_postion() plt.show() plt.close()