def __init__(self): name = traffic.TYPES[np.random.randint(0, len(traffic.TYPES))] if obstacle.NOISE: x = add_noise(obstacle.X, obstacle.STD_X) y = add_noise(obstacle.Y, obstacle.STD_Y) speed = add_noise(obstacle.SPEED, obstacle.STD_SPEED) else: x = obstacle.X y = obstacle.Y speed = obstacle.SPEED self.init_x = x self.init_y = y self.init_speed = speed self.init_theta = obstacle.THETA self.stuck_time = 0 self.total_stuck_time = obstacle.TOTAL_STUCK_TIME super(Obstacle, self).__init__(self.init_x, self.init_y, self.init_theta, self.init_speed, name) self.crash = obstacle.CRASH and np.random.random( ) < obstacle.PROB_CRASH self.crash_y = obstacle.CRASH_Y self.crashing = False
def experiment(indlinks_obs, delaytype, noise=0.0, display=False, soft=1000.0): """find parameters that minimizes the distance between x^obs_true in NOISY case and x^obs generated by each candidate function with PARTIAL observation Parameters ---------- indlinks_obs: indices of the observed links delaytype: type of the delay noise: std of the noise added to the measured link flows, ff delays, OD demands display: if True, display results soft: weight put on the observation """ if delaytype == 'Polynomial': true_theta = coef if delaytype == 'Hyperbolic': true_theta = (a,b) print 'generate graph...' g1, g2, g3, g4 = los_angeles(true_theta, delaytype) print 'compute ue...' l1, l2, l3, l4 = ue.solver(g1, update=True), ue.solver(g2, update=True), \ ue.solver(g3, update=True), ue.solver(g4, update=True) c1 = sum([link.delay*link.flow for link in g1.links.values()]) c2 = sum([link.delay*link.flow for link in g2.links.values()]) c3 = sum([link.delay*link.flow for link in g3.links.values()]) c4 = sum([link.delay*link.flow for link in g4.links.values()]) print 'ue costs: ', c1, c2, c3, c4 obs = [g1.indlinks[id] for id in indlinks_obs] obs = [int(i) for i in list(np.sort(obs))] x1,x2,x3,x4 = l1,l2,l3,l4 if noise > 0.0: x1, x2, x3, x4 = add_noise(l1,noise), add_noise(l2,noise), add_noise(l3,noise), add_noise(l4,noise) g1, g2, g3, g4 = los_angeles(true_theta, 'Polynomial', noise) theta, xs = invopt.main_solver([g1,g2,g3,g4], [x1[obs],x2[obs],x3[obs],x4[obs]], obs, degree, soft) u, v = matrix([l1,l2,l3,l4]), matrix(xs) error = np.linalg.norm(u-v, 1) / np.linalg.norm(u, 1) if display: display_results(error, true_theta, [theta], delaytype) return error, theta
def train_epoch(self, device, dataloader, validation_dataloader, loss_fn, optimizer, noise_factor=0.5): mean_loss = [] for i, AE in enumerate(self.AEs): # Set train mode for both the encoder and the decoder AE.train() train_loss = [] # Iterate the dataloader (we do not need the label values, this is unsupervised learning) for image_batch, _ in dataloader: # Move tensor to the proper device image_noisy = add_noise(image_batch, noise_factor) image_batch = image_batch.to(device) image_noisy = image_noisy.to(device) # Encode data reconstuction = AE(image_noisy) # Evaluate loss loss = loss_fn(reconstuction, image_batch) # Backward pass optimizer.zero_grad() loss.backward() optimizer.step() # Print batch loss print( f'\t AE{i} : partial train loss (single batch): {loss.data}' ) train_loss.append(loss.detach().cpu().numpy()) with torch.no_grad(): AE.eval() val_loss = [] for image_batch, _ in validation_dataloader: # Move tensor to the proper device image_noisy = add_noise(image_batch, noise_factor) image_batch = image_batch.to(device) image_noisy = image_noisy.to(device) # Encode data reconstuction = AE(image_noisy) # Evaluate loss loss = loss_fn(reconstuction, image_batch) # Print batch loss print( f'\t AE{i} : partial validation loss (single batch): {loss.data}' ) val_loss.append(loss.detach().cpu().numpy()) mean_loss.append({ 'train': np.mean(train_loss), 'valid': np.mean(val_loss) }) return mean_loss
def reset(self, noise=True): theta = obstacle.THETA if obstacle.NOISE and noise: x = add_noise(obstacle.X, obstacle.STD_X) y = add_noise(obstacle.Y, obstacle.STD_Y) speed = add_noise(obstacle.SPEED, obstacle.STD_SPEED) else: x = self.init_x y = self.init_y speed = self.init_speed self.crash = obstacle.CRASH and np.random.random( ) < obstacle.PROB_CRASH self.crashing = False self.set_state(x, y, theta, speed) self.bounding_box.move_to(x, y, theta)
def committe(solver, solver_name, intervals, reps): np.random.seed() X, y = util.basic_data() polls = util.add_noise(y) curr_labels = np.random.choice(range(len(X)), size=4, replace=False) X_train = X[curr_labels] square_errors = np.zeros([2, len(intervals)]) for i in range(len(intervals)): print("interval: ", intervals[i]) for j in range(reps): while len(curr_labels) <= intervals[i]: next_points = next_countys(solver, curr_labels, X, polls) curr_labels = np.append(curr_labels, next_points) curr_labels = curr_labels[:intervals[i]] preds = solver(X, X[curr_labels], polls[curr_labels]) square_errors[:, i] += util.square_error(y, preds) square_errors[:, i] /= reps square_errors = np.vstack( (square_errors.mean(axis=0), util.performance(solver, intervals, reps).mean(axis=0))) util.plot("committe", intervals / len(X), square_errors, legend=[solver_name, "random"], x_label="% counties", y_label="MSE", title="Committe")
def CIO(goals, world, p, single=False, start_stage=0, traj_data=None, gif_tag=''): if single: # FOR TESTING A SINGLE traj S = world.traj_func(world, goals, p, traj_data) S = add_noise(S) visualize_result(world, goals, p, 'initial' + gif_tag + '.gif', S) tot_cost = L(S, goals, world, p, start_stage) print_final(*function_costs) return {} S = world.traj_func(world, goals, p, traj_data) if start_stage == 0: S = add_noise(S) visualize_result(world, goals, p, 'initial' + gif_tag + '.gif', S) tot_cost = L(S, goals, world, p) print_final(*function_costs) bounds = get_bounds(world, p) ret_info = {} x_init = S for stage in range(start_stage, len(p.stage_weights)): print('BEGINNING PHASE:', stage) p.print_stage_weights(stage) res = minimize(fun=L, x0=x_init, args=(goals, world, p, stage), method='L-BFGS-B', bounds=bounds, options={'eps': 10**-3}) x_final = res['x'] nit = res['nit'] final_cost = res['fun'] visualize_result(world, goals, p, 'stage_{}'.format(stage) + gif_tag + '.gif', x_final) print_final(*function_costs) all_final_costs = function_costs ret_info[stage] = world.s0, x_final, final_cost, nit, all_final_costs x_init = x_final return ret_info
def reset(self, noise=True): """Resets the agent to the initial position. Args: noise: Whether to add noise when resetting. Defaults to True. """ if agent.NOISE and noise: x = add_noise(self.init_x, agent.STD_X) y = add_noise(self.init_y, agent.STD_Y) theta = add_noise(self.init_theta, agent.STD_THETA) speed = add_noise(self.init_speed, agent.STD_SPEED) else: x = self.init_x y = self.init_y theta = self.init_theta speed = self.init_speed self.set_state(x, y, theta, speed) self.bounding_box.move_to(x, y, theta)
def __init__(self, x=agent.X, y=agent.Y, theta=agent.THETA, speed=agent.SPEED, name="red_car"): """Initializes the agent function. Args: x: Initial agent x position (pixels). Optional. y: Initial agent y position (pixels). Optional. theta: Initial agent angle (radians). Optional. speed: Initial agent speed (pixels/second). Optional. name: Name of the car image. Optional. """ self.init_x = x self.init_y = y self.init_theta = theta self.init_speed = speed if agent.NOISE: x = add_noise(x, agent.STD_X) y = add_noise(y, agent.STD_Y) theta = add_noise(theta, agent.STD_THETA) speed = add_noise(speed, agent.STD_SPEED) super(Agent, self).__init__(x, y, theta, speed) self.name = name self.img = pygame.image.load( os.path.join(global_var.PATH, "media", name + ".png")) self.width = self.img.get_width() self.height = self.img.get_height() # Setup a simple bounding box of the correct size. self.bounding_box = Rectangle([[0, 0], [0, self.width], [self.height, self.width], [self.height, 0]]) # Shift the bounding box to the correct orientation and position. self.bounding_box.move_to(x, y, theta)
def get_time_series(self, t_total, n_pt, random_seed=None): """ Simulate the heating by convection Add random noise as given by sigma attribute of this class instance. :param t_total: total elapse time :param n_pt: number of time points, including the zero-time. :param random_seed: random seed (integer) :return: TimeSeries. An array of temperatures and times """ times = self.times(t_total, n_pt) temps = temperature( t=times, a=self.t_hot, b=(self.t_init - self.t_hot), c=self.rate_const ) if random_seed is not None: set_random_seed(random_seed) add_noise(temps, self.sigma) return TimeSeries.from_time_temp(times, temps)
def experiment(indlinks_obs, delaytype, noise=0.0, display=False, soft=1000.0): """find parameters that minimizes the distance between x^obs_true in NOISY case and x^obs generated by each candidate function with PARTIAL observation Parameters ---------- indlinks_obs: indices of the observed links delaytype: type of the delay noise: std of the noise added to the measured link flows, ff delays, OD demands display: if True, display results soft: weight put on the observation """ if delaytype == 'Polynomial': true_theta = coef if delaytype == 'Hyperbolic': true_theta = (a, b) print 'generate graph...' g1, g2, g3, g4 = los_angeles(true_theta, delaytype) print 'compute ue...' l1, l2, l3, l4 = ue.solver(g1, update=True), ue.solver(g2, update=True), \ ue.solver(g3, update=True), ue.solver(g4, update=True) c1 = sum([link.delay * link.flow for link in g1.links.values()]) c2 = sum([link.delay * link.flow for link in g2.links.values()]) c3 = sum([link.delay * link.flow for link in g3.links.values()]) c4 = sum([link.delay * link.flow for link in g4.links.values()]) print 'ue costs: ', c1, c2, c3, c4 obs = [g1.indlinks[id] for id in indlinks_obs] obs = [int(i) for i in list(np.sort(obs))] x1, x2, x3, x4 = l1, l2, l3, l4 if noise > 0.0: x1, x2, x3, x4 = add_noise(l1, noise), add_noise(l2, noise), add_noise( l3, noise), add_noise(l4, noise) g1, g2, g3, g4 = los_angeles(true_theta, 'Polynomial', noise) theta, xs = invopt.main_solver([g1, g2, g3, g4], [x1[obs], x2[obs], x3[obs], x4[obs]], obs, degree, soft) u, v = matrix([l1, l2, l3, l4]), matrix(xs) error = np.linalg.norm(u - v, 1) / np.linalg.norm(u, 1) if display: display_results(error, true_theta, [theta], delaytype) return error, theta
def __getitem__(self, idx): pose = genfromtxt(self.pose_filepaths[self.file_idxs[idx]], delimiter=',')[1:] pose = torch.from_numpy(pose).float() colour = io_image.read_RGB_image( self.colour_filepaths[self.file_idxs[idx]]) mask = io_image.read_RGB_image( self.mask_filepaths[self.file_idxs[idx]]) cropped_img = self.crop_image(colour, mask) colour = io_image.change_res_image(colour, self.img_res) mask = io_image.change_res_image(mask, self.img_res) cropped_img = io_image.change_res_image(cropped_img, self.img_res) with_imagenet = io_image.read_RGB_image( self.with_imagenet_filepaths[self.file_idxs[idx]], new_res=self.img_res) data_image = with_imagenet if (not self.noise_channel) and self.num_channels > 3: depth = io_image.read_RGB_image( self.depth_filepaths[self.file_idxs[idx]], new_res=self.img_res) depth = np.reshape(depth, (depth.shape[0], depth.shape[1], 1)) data_image = np.concatenate((data_image, depth), axis=-1).astype(float) #cropped_img_non_noisy = np.copy(cropped_img) #cropped_img_noisy, noise_idxs = util.add_noise(cropped_img, 0.2) data_image_noisy, noise_idxs = util.add_noise(data_image, self.noise_level) #colour = np.concatenate((colour, noise_idxs), axis=-1).astype(float) if self.transform: data_image_noisy = self.transform(data_image_noisy).float() noise_idxs = self.transform(noise_idxs).float() #cropped_img_noisy = self.transform(cropped_img_noisy) #cropped_img_non_noisy = self.transform(cropped_img_non_noisy) colour = self.transform(colour).float() data_image_noisy = torch.cat((data_image_noisy, noise_idxs), 0) #vis.plot_image((data_image_noisy.numpy()[0:3, :, :] + 0.5) * 255) #vis.show() #vis.plot_image((colour.numpy() + 0.5) * 255) #vis.show() return data_image_noisy, colour
def tax_mst(entities, relfile, clusfile=None, minscore=0., noise=0., wn=False): ''' :param lmda : Lambda hyperparameter (set to negative value to use raw scores) ''' ## Retrieve nodes, and IS-A relation weight between each pair d = util.get_relspecific_graph(entities, relfile, minscore=minscore) if wn: d = util.add_wn_relations(d) if noise > 0: d = util.add_noise(d, noise) g = util.dct2nx(d) ## Consolidate same-cluster nodes if clusfile is not None: g = util.consolidate_clusters(g, clusfile, resolveweights=np.mean) ## Initialize solution g_ = nx.DiGraph() g_.add_nodes_from(g) stats = {'node_cnt': 0, 'runtime': 0, 'keptedge_cnt': 0} ## Decompose ccomps = cc(g) for v_i in ccomps: ## Get subgraph g_i = g.subgraph(v_i) ## Solve MST problem restricted to g_i keptedges, g_i_stats = solve_mst(g_i) g_.add_edges_from(keptedges) # g_.add_edges_from([(i,j,{'relation': reltype, 'weight': g[i].get(j,{}).get('weight',-1000)}) # for i,j in keptedges]) stats = util.update_stats(stats, g_i_stats) pruned = util.nx2dct(g_) # checks assert len(g_.edges()) == stats['keptedge_cnt'] assert len(g_.nodes()) == stats['node_cnt'] return pruned, stats
def tax_nocyc(entities, relfile, clusfile=None, minscore=0., noise=0., lmda=0.5, wn=False): ''' :param lmda : Lambda hyperparameter (set to negative value to use raw scores) ''' ## Retrieve nodes, and IS-A relation weight between each pair d = util.get_relspecific_graph(entities, relfile, minscore=minscore) if wn: d = util.add_wn_relations(d) if noise > 0: d = util.add_noise(d, noise) for lnk in d['links']: lnk['weight'] = lnk['weight'] - lmda g = util.dct2nx(d) ## Consolidate same-cluster nodes if clusfile is not None: g = util.consolidate_clusters(g, clusfile, resolveweights=np.mean) ## Initialize solution g_ = nx.DiGraph() g_.add_nodes_from(g) stats = {'node_cnt': 0, 'runtime': 0, 'keptedge_cnt': 0 } keptedges, g_stats = solve_nocyc(g) g_.add_edges_from(keptedges) stats = util.update_stats(stats, g_stats) pruned = util.nx2dct(g_) # checks assert len(g_.edges()) == stats['keptedge_cnt'] assert len(g_.nodes()) == stats['node_cnt'] return pruned, stats
def main(): options = parse_args() import theano.sandbox.cuda theano.sandbox.cuda.use(options.device) plt.rcParams['image.cmap'] = 'gray' if not os.path.exists(options.output_folder): os.mkdir(options.output_folder) sys.path.insert(0, options.experiment) generate_fn, objective_fn = compile(options) import util X, ids = util.load_dataset(options.input_folder, False) mem_file = open(os.path.join(options.output_folder, 'mem.csv'), 'w') print("id,from,to", file=mem_file) for id, img in zip(ids, X): input_img = util.preprocess(np.expand_dims(img, axis=0)) output_img = generate_fn(util.add_noise(input_img)) plt.imsave(os.path.join(options.output_folder, id), np.squeeze(util.deprocess(output_img))) line = "%s,%s,%s" % (id, np.squeeze( objective_fn(input_img)), np.squeeze(objective_fn(output_img))) print(line) print(line, file=mem_file) mem_file.close()
def main(argv): learnfile = "ngrams.txt" testfile = "europarl-v7.es-en.en" verbose = False noise = 0.05 numIterations = 0 minLength = 10 maxLength = 60 def printHelpMessage(): print 'decryptor.py [-i <n-gram file> -t <testfile> -n <noise level>]' print '-v verbose' print '-h help' try: opts, args = getopt.getopt(argv,"hvi:t:n:") except getopt.GetoptError: printHelpMessage() sys.exit(2) for opt, arg in opts: if opt == '-h': printHelpMessage() sys.exit() elif opt in ("-i"): learnfile = arg elif opt in ("-t"): testfile = arg elif opt in ("-n"): noise = float(arg) elif opt in ("-v"): verbose = True print "Learning..." sys.stdout.flush() languagemodel = LanguageModel.LanguageModel(learnfile) original_text_file = open(testfile, "r") cipher_solver = solver.Solver(languagemodel) cipher_baseline = baseline.Baseline() solver_accuracy = [] baseline_accuracy = [] max_counts = [] for original_text in original_text_file: if len(original_text) < minLength: continue if len(original_text) > maxLength: continue numIterations += 1 encryption_key = util.generateKey() original_text_noised = util.add_noise(original_text, noise) cipher_text = util.encryptCase(original_text_noised, encryption_key) startTime = datetime.datetime.now() if verbose: print "============================" print "Iteration ", numIterations print "Length ", len(original_text) print "Start Time", startTime print "Original Text", original_text print "Original Text Noised", original_text_noised print "Key", encryption_key print "Cipher Text Noised", cipher_text baseline_text, baseline_decryption_key = cipher_baseline.decrypt(cipher_text) guess_text, guess_decryption_key, num_guesses = cipher_solver.decrypt(cipher_text) baseline_score = score_accuracy(encryption_key, baseline_decryption_key, cipher_text, original_text) baseline_accuracy.append(baseline_score) solver_score = score_accuracy(encryption_key, guess_decryption_key, cipher_text, original_text) solver_accuracy.append(solver_score) max_counts.append(num_guesses) if verbose: print "End Time", datetime.datetime.now() print "Duration", datetime.datetime.now() - startTime print "Length, Accuracy, Duration,", len(original_text), ',', solver_score, ',', datetime.datetime.now() - startTime print "Baseline Accuracy: ", baseline_score print "Average Accuracy of Baseline: ", sum(baseline_accuracy)/len(baseline_accuracy) print "Solver Accuracy: ", solver_score print "Average Accuracy of Solver: ", sum(solver_accuracy)/len(solver_accuracy) print "Reached same thing many times", max_counts print "Average Accuracy of Baseline: ", sum(baseline_accuracy)/len(baseline_accuracy) print "Average Accuracy of Solver: ", sum(solver_accuracy)/len(solver_accuracy) print "Over %d cipher texts" % len(solver_accuracy)
def onpolicy_main(): print("onpolicy main") torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) if args.cuda and torch.cuda.is_available() and args.cuda_deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") summary_name = args.log_dir + '{0}_{1}' writer = SummaryWriter(summary_name.format(args.env_name, args.save_name)) # Make vector env envs = make_vec_envs( args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, device, False, env_kwargs=env_kwargs, ) # agly ways to access to the environment attirubutes if args.env_name.find('doorenv') > -1: if args.num_processes > 1: visionnet_input = envs.venv.venv.visionnet_input nn = envs.venv.venv.nn env_name = envs.venv.venv.xml_path else: visionnet_input = envs.venv.venv.envs[ 0].env.env.env.visionnet_input nn = envs.venv.venv.envs[0].env.env.env.nn env_name = envs.venv.venv.envs[0].env.env.env.xml_path dummy_obs = np.zeros(nn * 2 + 3) else: dummy_obs = envs.observation_space visionnet_input = None nn = None if pretrained_policy_load: print("loading", pretrained_policy_load) actor_critic, ob_rms = torch.load(pretrained_policy_load) else: actor_critic = Policy(dummy_obs.shape, envs.action_space, base_kwargs={'recurrent': args.recurrent_policy}) if visionnet_input: visionmodel = load_visionmodel(env_name, args.visionmodel_path, VisionModelXYZ()) actor_critic.visionmodel = visionmodel.eval() actor_critic.nn = nn actor_critic.to(device) #disable normalizer vec_norm = get_vec_normalize(envs) vec_norm.eval() if args.algo == 'a2c': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, alpha=args.alpha, max_grad_norm=args.max_grad_norm) elif args.algo == 'ppo': agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) rollouts = RolloutStorage(args.num_steps, args.num_processes, dummy_obs.shape, envs.action_space, actor_critic.recurrent_hidden_state_size) full_obs = envs.reset() initial_state = full_obs[:, :envs.action_space.shape[0]] if args.env_name.find('doorenv') > -1 and visionnet_input: obs = actor_critic.obs2inputs(full_obs, 0) else: if knob_noisy: obs = add_noise(full_obs, 0) else: obs = full_obs rollouts.obs[0].copy_(obs) rollouts.to(device) episode_rewards = deque(maxlen=10) start = time.time() num_updates = int( args.num_env_steps) // args.num_steps // args.num_processes for j in range(num_updates): if args.use_linear_lr_decay: # decrease learning rate linearly utils.update_linear_schedule(agent.optimizer, j, num_updates, args.lr) pos_control = False total_switches = 0 prev_selection = "" for step in range(args.num_steps): with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts.obs[step], rollouts.recurrent_hidden_states[step], rollouts.masks[step]) next_action = action if pos_control: frame_skip = 2 if step % (512 / frame_skip - 1) == 0: current_state = initial_state next_action = current_state + next_action for kk in range(frame_skip): full_obs, reward, done, infos = envs.step(next_action) current_state = full_obs[:, :envs.action_space.shape[0]] else: full_obs, reward, done, infos = envs.step(next_action) # convert img to obs if door_env and using visionnet if args.env_name.find('doorenv') > -1 and visionnet_input: obs = actor_critic.obs2inputs(full_obs, j) else: if knob_noisy: obs = add_noise(full_obs, j) else: obs = full_obs for info in infos: if 'episode' in info.keys(): episode_rewards.append(info['episode']['r']) masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) bad_masks = torch.FloatTensor( [[0.0] if 'bad_transition' in info.keys() else [1.0] for info in infos]) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks, bad_masks) with torch.no_grad(): next_value = actor_critic.get_value( rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.gae_lambda, args.use_proper_time_limits) value_loss, action_loss, dist_entropy = agent.update(rollouts) rollouts.after_update() writer.add_scalar("Value loss", value_loss, j) writer.add_scalar("action loss", action_loss, j) writer.add_scalar("dist entropy loss", dist_entropy, j) writer.add_scalar("Episode rewards", np.mean(episode_rewards), j) # save for every interval-th episode or for the last epoch if (j % args.save_interval == 0 or j == num_updates - 1) and args.save_dir != "": save_path = os.path.join(args.save_dir, args.algo) try: os.makedirs(save_path) except OSError: pass torch.save([ actor_critic, getattr(utils.get_vec_normalize(envs), 'ob_rms', None) ], os.path.join( save_path, args.env_name + "_{}.{}.pt".format(args.save_name, j))) if j % args.log_interval == 0 and len(episode_rewards) > 1: total_num_steps = (j + 1) * args.num_processes * args.num_steps end = time.time() print( "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n" .format(j, total_num_steps, int(total_num_steps / (end - start)), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), dist_entropy, value_loss, action_loss)) if (args.eval_interval is not None and len(episode_rewards) > 1 and j % args.eval_interval == 0): ob_rms = utils.get_vec_normalize(envs).ob_rms evaluate(actor_critic, ob_rms, args.env_name, args.seed, args.num_processes, eval_log_dir, device) DR = True #Domain Randomization ################## for multiprocess world change ###################### if DR: print("changing world") envs.close_extras() envs.close() del envs envs = make_vec_envs( args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, device, False, env_kwargs=env_kwargs, ) full_obs = envs.reset() if args.env_name.find('doorenv') > -1 and visionnet_input: obs = actor_critic.obs2inputs(full_obs, j) else: obs = full_obs
def mtg(entities, relfile, reltype='hypernym', minscore=1.5, lmda=1.0, noise=0., wn=False): ''' :param lmda : Lambda hyperparameter (set to negative value to use raw scores) ''' ## Retrieve nodes, and reltype weight between each pair d = util.get_relspecific_graph(entities, relfile, minscore=minscore, reltypes=[reltype], equivrel=reltype) ## Convert weights to logodds and subtract lambda for lnk in d['links']: lnk['weight'] = lnk['weight'] - lmda if wn: d = util.add_wn_relations(d) if noise > 0: d = util.add_noise(d, noise) g = util.dct2nx(d) ## Initialize solution g_ = nx.DiGraph() g_.add_nodes_from(g) stats = { 'node_cnt': 0, 'num_vars': 0, 'num_constrs': 0, 'runtime': 0, 'keptedge_cnt': 0, 'possedge_cnt': 0, 'gt0edge_cnt': 0, 'timeout': 0 } ## Decompose ccomps = cc(g) for v_i in ccomps: ## Get subgraph g_i = g.subgraph(v_i) ## Solve ILP problem restricted to g_i keptedges, g_i_stats = solve_exact(g_i) g_.add_edges_from([(i, j, { 'relation': reltype, 'weight': g[i].get(j, {}).get('weight', 0) }) for i, j in keptedges]) stats = util.update_stats(stats, g_i_stats) pruned = util.nx2dct(g_) # checks assert len(g_.edges()) == stats['keptedge_cnt'] assert len(g_.nodes()) == stats['node_cnt'] return pruned, stats
num_channels_out = 3 img_path = 'C:/Users/Administrator/Documents/Datasets/ycb_unreal_colour (493).png' img_save_path = results_dir + 'output_img.png' img_res = (640, 480) noise_level = 0.01 transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5, 0.5), (0.5, 0.5, 0.5, 0.5))]) # load image input_image = io_image.read_RGB_image(img_path, img_res) if input_image.shape[2] == 4: input_image = input_image[:, :, 0:3] data_image_noisy, noise_idxs = util.add_noise(input_image, noise_level) data_image_noisy = transform(data_image_noisy).float() noise_idxs = transform(noise_idxs).float() data_image_noisy = torch.cat((data_image_noisy, noise_idxs), 0) # load model checkpoint = torch.load(results_dir + 'ycb_checkpoint.pth.tar') args = checkpoint['args'] epoch = checkpoint['epoch'] model = VQ_CVAE(d=hidden, k=k, num_channels_in=num_channels_in, num_channels_out=num_channels_out) model.load_state_dict(checkpoint['state_dict']) if use_cuda: print('Using Cuda') model.cuda() data_image_noisy = data_image_noisy.cuda()
def train(options): print("Compiling...") G_train_fn, D_train_fn, generate_fn, G, lr, obj_coef = compile(options) import util print("Loading dataset...") X, _ = util.load_dataset(options.dataset, True) print("Training...") log_file = open(os.path.join(options.experiment_folder, 'log.txt'), 'w') log_str = ("Experiment params: %s" % (options.__dict__, )) print(log_str) print(log_str, file=log_file) buffer = ExperienceBuffer(buffer_size=100, batch_shape=(options.batch_size, 3, 256, 256)) for epoch in range(options.num_iter): # if (epoch + 1) % 20 == 0: # lr.set_value(np.array(lr.get_value() * 0.3, dtype='float32')) # if (epoch + 1) % 3 == 0: # obj_coef.set_value(np.array(obj_coef.get_value() + 0.1 * options.obj_coef, dtype='float32')) discriminator_order = np.arange(len(X)) generator_order = np.arange(len(X)) np.random.shuffle(discriminator_order) np.random.shuffle(generator_order) discriminator_loss_list = [] generator_loss_list = [] for start in tqdm(range(0, len(X), options.batch_size)): end = min(start + options.batch_size, len(X)) generator_batch = util.preprocess(X[generator_order[start:end]]) discriminator_batch = util.preprocess( X[discriminator_order[start:end]], True) generator_batch_with_noise = util.add_noise(generator_batch) #print (generate_fn(generator_batch_with_noise)) generator_output = G_train_fn(generator_batch_with_noise, generator_batch, discriminator_batch) generated_batch, generator_loss = generator_output[ 0], generator_output[1:] #buffer.push_to_buffer(generator_batch) discriminator_loss = D_train_fn(generated_batch, discriminator_batch) discriminator_loss_list.append(discriminator_loss) generator_loss_list.append(generator_loss) log_str = ( ("Epoch %i" % epoch) + '\n' + ("Discriminator loss %f" % tuple(np.mean(np.array(discriminator_loss_list), axis=0))) + '\n' + ("Generator loss %f, obj_loss %f, cont_loss %f, disc_loss %f, total_variation loss %f" % tuple(np.mean(np.array(generator_loss_list), axis=0)))) print(log_str) img_for_ploting = util.preprocess(X[0:options.num_img_to_show]) plot(options, epoch, img_for_ploting, generate_fn(util.add_noise(img_for_ploting))) log_str = ( ("Epoch %i" % epoch) + '\n' + ("Discriminator loss %f" % tuple(np.mean(np.array(discriminator_loss_list), axis=0))) + '\n' + ("Generator loss %f, obj_loss %f, cont_loss %f, disc_loss %f, total_variation loss %f" % tuple(np.mean(np.array(generator_loss_list), axis=0)))) print(log_str) print(log_str, file=log_file) if epoch % options.save_mode_it == 0: save_model(options, epoch, G) log_file.flush() log_file.close() save_model(options, options.num_iter - 1, G) return G
save_vars = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1] saver = tf.train.Saver(var_list=save_vars, max_to_keep=3) saver.restore(sess, 'your_model_path') # merge所有的summary,这里是为了在tensorboard里可以看到更清晰的层次 merged = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(event_path, graph=sess.graph) # 增加错误样本,提高模型容错/看模型效果 noise_indexes = [ random.randint(0, train_samples_count) for _ in range(int(train_samples_count * noise * 0.01)) ] for index in noise_indexes: mnist.train.labels[index] = util.add_noise(mnist.train.labels[index]) print('noises size: {}, and are added.'.format(len(noise_indexes))) start_time = datetime.datetime.now() fp = open( './model/mnist/noise{}_poison{}/result.txt'.format(noise, poison), 'ab+') for i in range(1, epoch + 1): batch = mnist.train.next_batch(50) sess.run(train_step, feed_dict={ x_raw: batch[0], y: batch[1], keep_prob: 0.5 })
def main(raw_args=None): # If this is being called as a function from another python script if raw_args is not None: args = get_args(raw_args) else: args = main_args if args.algo != 'ipo': raise NotImplementedError # Total number of envs (both domains) args.num_processes = args.num_envs1 + args.num_envs2 knob_noisy = args.knob_noisy pretrained_policy_load = args.pretrained_policy_load args.world_path_domain1 = os.path.expanduser(args.world_path_domain1) args.world_path_domain2 = os.path.expanduser(args.world_path_domain2) # Env kwargs for domain 1 env_kwargs1 = dict(port = args.port, visionnet_input = args.visionnet_input, unity = args.unity, world_path = args.world_path_domain1) # Env kwargs for domain 2 env_kwargs2 = dict(port = args.port, visionnet_input = args.visionnet_input, unity = args.unity, world_path = args.world_path_domain2) print("Training with IPO.") torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) if args.cuda and torch.cuda.is_available() and args.cuda_deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") summary_name = args.log_dir + '{0}_{1}' writer = SummaryWriter(summary_name.format(args.env_name, args.save_name)) # Make vector env for two domains (each contains num_processes/2 envs) envs1 = make_vec_envs(args.env_name, args.seed, args.num_envs1, args.gamma, args.log_dir, device, False, env_kwargs=env_kwargs1) envs2 = make_vec_envs(args.env_name, args.seed, args.num_envs2, args.gamma, args.log_dir, device, False, env_kwargs=env_kwargs2) # agly ways to access to the environment attirubutes if args.env_name.find('doorenv')>-1: visionnet_input = envs1.venv.venv.visionnet_input nn = envs1.venv.venv.nn env_name = envs1.venv.venv.xml_path dummy_obs = np.zeros(nn*2+3) else: dummy_obs = envs1.observation_space visionnet_input = None nn = None if pretrained_policy_load: print("loading", pretrained_policy_load) actor_critic, ob_rms = torch.load(pretrained_policy_load) else: actor_critic = Policy_av( dummy_obs.shape, envs1.action_space, base_kwargs={'recurrent': args.recurrent_policy}) # actor_critic = Policy( # dummy_obs.shape, # envs1.action_space, # base_kwargs={'recurrent': args.recurrent_policy}) if visionnet_input: raise NotImplementedError visionmodel = load_visionmodel(env_name, args.visionmodel_path, VisionModelXYZ()) actor_critic.visionmodel = visionmodel.eval() actor_critic.nn = nn actor_critic.to(device) #disable normalizer vec_norm1 = get_vec_normalize(envs1) vec_norm1.eval() vec_norm2 = get_vec_normalize(envs2) vec_norm2.eval() # Create two agents (one for each domain) params1 = [{'params': actor_critic.base.actor1.parameters()}, {'params': actor_critic.base.critic1.parameters()}, {'params': actor_critic.base.critic_linear1.parameters()}, {'params': actor_critic.base.fc_mean1.parameters()}, {'params': actor_critic.base.logstd1.parameters()}] params2 = [{'params': actor_critic.base.actor2.parameters()}, {'params': actor_critic.base.critic2.parameters()}, {'params': actor_critic.base.critic_linear2.parameters()}, {'params': actor_critic.base.fc_mean2.parameters()}, {'params': actor_critic.base.logstd2.parameters()}] # params1 = None # params2 = None agent1 = algo.PPO( actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm, optim_params = params1) agent2 = algo.PPO( actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm, optim_params = params2) # Rollout storage for each domain rollouts1 = RolloutStorage(args.num_steps, args.num_envs1, dummy_obs.shape, envs1.action_space, actor_critic.recurrent_hidden_state_size) rollouts2 = RolloutStorage(args.num_steps, args.num_envs2, dummy_obs.shape, envs2.action_space, actor_critic.recurrent_hidden_state_size) full_obs1 = envs1.reset() initial_state1 = full_obs1[:,:envs1.action_space.shape[0]] full_obs2 = envs2.reset() initial_state2 = full_obs2[:,:envs2.action_space.shape[0]] if args.env_name.find('doorenv')>-1 and visionnet_input: obs1 = actor_critic.obs2inputs(full_obs1, 0) obs2 = actor_critic.obs2inputs(full_obs2, 0) else: if knob_noisy: obs1 = add_noise(full_obs1, 0) obs2 = add_noise(full_obs2, 0) else: obs1 = full_obs1 obs2 = full_obs2 rollouts1.obs[0].copy_(obs1) rollouts1.to(device) rollouts2.obs[0].copy_(obs2) rollouts2.to(device) episode_rewards1 = deque(maxlen=10) episode_rewards2 = deque(maxlen=10) start = time.time() num_updates = int( args.num_env_steps) // args.num_steps // args.num_processes num_updates = int(num_updates/2) # Since have two domains per iteration best_training_reward = -np.inf for j in range(num_updates): if args.use_linear_lr_decay: # decrease learning rate linearly utils.update_linear_schedule( agent1.optimizer, j, num_updates, args.lr) utils.update_linear_schedule( agent2.optimizer, j, num_updates, args.lr) ################## Do rollouts and updates for domain 1 ################## pos_control = False total_switches = 0 prev_selection = "" for step in range(args.num_steps): with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts1.obs[step], rollouts1.recurrent_hidden_states[step], rollouts1.masks[step]) next_action = action try: # print(next_action) full_obs, reward, done, infos = envs1.step(next_action) except: ipy.embed() if knob_noisy: obs = add_noise(full_obs, j) else: obs = full_obs for info in infos: if 'episode' in info.keys(): episode_rewards1.append(info['episode']['r']) masks = torch.FloatTensor( [[0.0] if done_ else [1.0] for done_ in done]) bad_masks = torch.FloatTensor( [[0.0] if 'bad_transition' in info.keys() else [1.0] for info in infos]) rollouts1.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks, bad_masks) with torch.no_grad(): next_value = actor_critic.get_value( rollouts1.obs[-1], rollouts1.recurrent_hidden_states[-1], rollouts1.masks[-1]).detach() rollouts1.compute_returns(next_value, args.use_gae, args.gamma, args.gae_lambda, args.use_proper_time_limits) value_loss, action_loss, dist_entropy = agent1.update(rollouts1) rollouts1.after_update() value_loss1 = value_loss action_loss1 = action_loss dist_entropy1 = dist_entropy ################## Do rollouts and updates for domain 2 ################## pos_control = False total_switches = 0 prev_selection = "" for step in range(args.num_steps): with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts2.obs[step], rollouts2.recurrent_hidden_states[step], rollouts2.masks[step]) next_action = action try: # print(next_action) full_obs, reward, done, infos = envs2.step(next_action) except: ipy.embed() if knob_noisy: obs = add_noise(full_obs, j) else: obs = full_obs for info in infos: if 'episode' in info.keys(): episode_rewards2.append(info['episode']['r']) masks = torch.FloatTensor( [[0.0] if done_ else [1.0] for done_ in done]) bad_masks = torch.FloatTensor( [[0.0] if 'bad_transition' in info.keys() else [1.0] for info in infos]) rollouts2.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks, bad_masks) with torch.no_grad(): next_value = actor_critic.get_value( rollouts2.obs[-1], rollouts2.recurrent_hidden_states[-1], rollouts2.masks[-1]).detach() rollouts2.compute_returns(next_value, args.use_gae, args.gamma, args.gae_lambda, args.use_proper_time_limits) value_loss, action_loss, dist_entropy = agent2.update(rollouts2) rollouts2.after_update() value_loss2 = value_loss action_loss2 = action_loss dist_entropy2 = dist_entropy ###################### Logs and storage ######################## value_loss = (value_loss1 + value_loss2)/2 action_loss = (action_loss1 + action_loss2)/2 dist_entropy = (dist_entropy1 + dist_entropy2)/2 episode_rewards = [] for ii in range(len(episode_rewards1)): episode_rewards.append((episode_rewards1[ii]+episode_rewards2[ii])/2) # episode_rewards = episode_rewards1 writer.add_scalar("Value loss", value_loss, j) writer.add_scalar("action loss", action_loss, j) writer.add_scalar("dist entropy loss", dist_entropy, j) writer.add_scalar("Episode rewards", np.mean(episode_rewards), j) if np.mean(episode_rewards) > best_training_reward: best_training_reward = np.mean(episode_rewards) current_is_best = True else: current_is_best = False # save for every interval-th episode or for the last epoch or for best so far if (j % args.save_interval == 0 or j == num_updates - 1 or current_is_best) and args.save_dir != "": save_path = os.path.join(args.save_dir, args.algo) try: os.makedirs(save_path) except OSError: pass torch.save([ actor_critic, None ], os.path.join(save_path, args.env_name + "_{}.{}.pt".format(args.save_name,j))) if current_is_best: torch.save([ actor_critic, None ], os.path.join(save_path, args.env_name + "_{}.best.pt".format(args.save_name))) # torch.save([ # actor_critic, # getattr(utils.get_vec_normalize(envs1), 'ob_rms', None) # ], os.path.join(save_path, args.env_name + "_{}.{}.pt".format(args.save_name,j))) if j % args.log_interval == 0 and len(episode_rewards) > 1: total_num_steps = (j + 1) * args.num_processes * args.num_steps end = time.time() print( "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n" .format(j, total_num_steps, int(total_num_steps / (end - start)), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), dist_entropy, value_loss, action_loss)) if (args.eval_interval is not None and len(episode_rewards) > 1 and j % args.eval_interval == 0): raise NotImplementedError ob_rms = utils.get_vec_normalize(envs).ob_rms evaluate(actor_critic, ob_rms, args.env_name, args.seed, args.num_processes, eval_log_dir, device) DR=False # True #Domain Randomization ################## for multiprocess world change ###################### if DR: raise NotImplementedError print("changing world") envs.close_extras() envs.close() del envs envs = make_vec_envs_domains(args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, device, False, env_kwargs1=env_kwargs1, env_kwargs2=env_kwargs2) full_obs = envs.reset() if args.env_name.find('doorenv')>-1 and visionnet_input: obs = actor_critic.obs2inputs(full_obs, j) else: obs = full_obs