def main(): doers = _get_doers(shell) doers.update(_get_doers(utils)) possible_actions = doers.keys() + ['start', 'stop', 'status'] args = arguments.get_args(possible_actions) if args.action is None: print "No action" sys.exit(1) apiclient = None if args.no_api is False: os_options = arguments.OpenstackOptions(args, os.environ) if args.debug: print os_options apiclient = client.Client(username=os_options.username, password=os_options.password, tenant_name=os_options.tenant_name, endpoint=os_options.endpoint, auth_url=os_options.auth_url) if args.client_id: apiclient.client_id = args.client_id if args.action in doers: try: return doers[args.action](apiclient, args) except Exception as e: print ('ERROR {0}'.format(e)) return 1 create_dir(args.jobs_dir, do_log=False) freezer_scheduler = FreezerScheduler(apiclient=apiclient, interval=int(args.interval), job_path=args.jobs_dir) daemon = Daemon(daemonizable=freezer_scheduler) if args.action == 'start': daemon.start(log_file=args.log_file) elif args.action == 'stop': daemon.stop() elif args.action == 'reload': daemon.reload() elif args.action == 'status': daemon.status() return os.EX_OK
def main(): """ Main entry point for the program. """ args = get_args(get_version()) if not args.solution_file: args.solution_file = default_solution() # create a new MonoTool object using our project path. mt = MonoTool(args.solution_file) method = getattr(mt, args.method) res = method(**args.__dict__) if res: print res
def main(): doers = _get_doers(shell) doers.update(_get_doers(utils)) possible_actions = doers.keys() + ['start', 'stop', 'status'] args = arguments.get_args(possible_actions) if args.action is None: print ('No action') return os.EX_DATAERR apiclient = None if args.no_api is False: apiclient = client.Client(opts=args) if args.client_id: apiclient.client_id = args.client_id if args.action in doers: try: return doers[args.action](apiclient, args) except Exception as e: print ('ERROR {0}'.format(e)) return os.EX_SOFTWARE freezer_scheduler = FreezerScheduler(apiclient=apiclient, interval=int(args.interval), job_path=args.jobs_dir) if args.no_daemon: print ('Freezer Scheduler running in no-daemon mode') daemon = NoDaemon(daemonizable=freezer_scheduler) else: daemon = Daemon(daemonizable=freezer_scheduler) if args.action == 'start': daemon.start(log_file=args.log_file) elif args.action == 'stop': daemon.stop() elif args.action == 'reload': daemon.reload() elif args.action == 'status': daemon.status() return os.EX_OK
def main(): """Main training program.""" # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() args.mem_length = args.mem_length if args.transformer_xl else 0 if args.load: args.experiment_name = os.path.basename(os.path.normpath(args.load)) else: args.experiment_name = args.experiment_name + datetime.now().strftime( "%m-%d-%H-%M") if args.save: args.save = os.path.join(args.save, args.experiment_name) # Pytorch distributed. initialize_distributed(args) # Random seeds for reproducability. set_random_seed(args.seed) # Data stuff. train_data, val_data, test_data, args.vocab_size, \ args.eod_token = get_train_val_test_data(args) # Model, optimizer, and learning rate. model, optimizer, lr_scheduler = setup_model_and_optimizer(args) if args.load is not None: with FileLock("/root/checkpoint_lock", timeout=-1): args.iteration = load_checkpoint(model, optimizer, lr_scheduler, args) else: args.iteration = 0 torch.distributed.barrier() summary_writer = None if torch.distributed.get_rank() == 0: print('Pretrain GPT2 model') print_args(args) summary_writer = get_sample_writer(base=args.summary_dir, name=args.experiment_name, iteration=args.iteration) # Resume data loader if necessary. if args.resume_dataloader: if train_data is not None: train_data.batch_sampler.start_iter = args.iteration % \ len(train_data) if val_data is not None: start_iter_val = (args.train_iters // args.save_interval) * \ args.eval_interval val_data.batch_sampler.start_iter = start_iter_val % \ len(val_data) if train_data is not None: train_data_iterator = iter(train_data) else: train_data_iterator = None if val_data is not None: val_data_iterator = iter(val_data) else: val_data_iterator = None # TODO: figure out how to properly set this especially when resuming training iteration = 0 if args.train_iters > 0: if args.do_train: with ExitStack() as stack: def save_on_exit(args_, model_, optimizer_, lr_scheduler_): save_checkpoint(args_.iteration, model_, optimizer_, lr_scheduler_, args_) # stack.callback(save_on_exit, args, model, optimizer, lr_scheduler) iteration, skipped = train(model, optimizer, lr_scheduler, train_data_iterator, val_data_iterator, timers, args, summary_writer=summary_writer) if args.do_valid: prefix = 'the end of training for val data' val_loss = evaluate_and_print_results(prefix, val_data_iterator, model, args, timers, False) if args.save and iteration != 0: save_checkpoint(iteration, model, optimizer, lr_scheduler, args) if test_data is not None: test_data_iterator = iter(test_data) else: test_data_iterator = None if args.do_test: # Run on test data. prefix = 'the end of training for test data' evaluate_and_print_results(prefix, test_data_iterator, model, args, timers, True)
import torch.nn.functional as F import torch.optim as optim import algo from arguments import get_args from envs import make_vec_envs from model_nomodulation import Policy from storage import RolloutStorage from utils import get_vec_normalize from visualize import visdom_plot from tensorboardX import SummaryWriter ##################################### # prepare args = get_args() assert args.algo in ['a2c', 'ppo', 'acktr'] if args.recurrent_policy: assert args.algo in ['a2c', 'ppo'], \ 'Recurrent policy is not implemented for ACKTR' num_updates = int(args.num_frames) // args.num_steps // args.num_processes torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) try: os.makedirs(args.log_dir) except OSError:
def load_populations( dataset_name, base_path="../logs-2020-neurips/log_when_to_stop/_recorded", filter_func=None): result_info_dict = {} data_path = os.path.join(base_path, dataset_name) for i, exp_dir in enumerate(os.listdir(data_path)): if not filter_func(exp_dir): continue full_path = os.path.join(data_path, exp_dir) exp_tuple = exp_dir.split("+")[0].split("-")[:3] try: args_kwargs = { "model_name": exp_tuple[0], "dataset_class": "Planetoid" if exp_tuple[1] != "PPI" else "PPI", "dataset_name": exp_tuple[1], "custom_key": exp_tuple[2] + ("" if exp_tuple[1] != "PubMed" else "-500") + "-ES", } args = get_args(**args_kwargs) result_info = {"full_path": full_path, "args": args} for perf_file in os.listdir(full_path): if "val_loss" in perf_file: result_info["val_loss"] = os.path.join( full_path, perf_file) elif "val_perf" in perf_file: result_info["val_perf"] = os.path.join( full_path, perf_file) elif "test_perf" in perf_file: result_info["test_perf"] = os.path.join( full_path, perf_file) result_info_dict[exp_dir] = result_info except Exception as e: cprint(f"Exception in {full_path} and {exp_tuple}, {e}", "red") for exp_dir, v in result_info_dict.items(): args = v["args"] val_loss_matrix = np.load(v["val_loss"], allow_pickle=True) val_perf_matrix = np.load(v["val_perf"], allow_pickle=True) test_perf_matrix = np.load(v["test_perf"], allow_pickle=True) if "PPI" in exp_dir: val_loss_matrix = val_loss_matrix[:30, :] val_perf_matrix = val_perf_matrix[:30, :] test_perf_matrix = test_perf_matrix[:30, :] test_perf_at_best_val_list = simulate_early_stop( val_loss_matrix, val_perf_matrix, test_perf_matrix, args.early_stop_patience, args.early_stop_queue_length, args.early_stop_threshold_loss, args.early_stop_threshold_perf, args.epochs) result_info_dict[exp_dir][ "test_perf_at_best_val_list"] = test_perf_at_best_val_list return result_info_dict
def main(): # load hyper parameters args = get_args() num_updates = int(args.num_frames // args.num_steps) start = time.time() record = {'steps': [0], 'max': [0], 'mean': [0], 'min': [0], 'query': [0]} config = configparser.ConfigParser() config.read('config.ini', encoding='utf-8') data_index = config.getint('data', 'data_index') actions = [0, 1, 2, 3, 4, 5, 11, 12] key_map = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 11, 7: 12, 8: -1} # query_cnt counts queries to the expert query_cnt = data_index # environment initial envs = Env(args.env_name, args.num_stacks) # action_shape is the size of the discrete action set, here is 18 # Most of the 18 actions are useless, find important actions # in the tips of the homework introduction document action_shape = envs.action_space.n # observation_shape is the shape of the observation # here is (210,160,3)=(height, weight, channels) observation_shape = envs.observation_space.shape print(action_shape, observation_shape) # agent initial # you should finish your agent with DaggerAgent # e.g. agent = MyDaggerAgent() agent = ExampleAgent() # You can play this game yourself for fun if args.play_game: obs = envs.reset() while True: im = Image.fromarray(obs) im.save('imgs/' + str('screen') + '.jpeg') action = int(input('input action')) while action < 0 or action >= action_shape: action = int(input('re-input action')) obs_next, reward, done, _ = envs.step(action) obs = obs_next if done: obs = envs.reset() data_set = {'data': [], 'label': []} # start train your agent for i in range(data_index): data_path = 'data/data_batch_' + str(i) + '/' for j in range(args.num_steps): pic_path = data_path + str(j) + '.jpeg' data_set['data'].append(cv2.imread(pic_path)) with open(data_path + 'label.txt', 'r') as f: for label_tmp in f.readlines(): data_set['label'].append(int(label_tmp)) agent.update(data_set['data'], data_set['label']) with open('performance.txt') as f: record_temp = eval(f.readline()) if record_temp is not None: record = record_temp for i in range(data_index, num_updates): # an example of interacting with the environment # we init the environment and receive the initial observation obs = envs.reset() # we get a trajectory with the length of args.num_steps for step in range(args.num_steps): # Sample actions epsilon = 0.05 if np.random.rand() < epsilon: # we choose a random action action = envs.action_space.sample() else: # we choose a special action according to our model action = agent.select_action(obs) # interact with the environment # we input the action to the environments and it returns some information # obs_next: the next observation after we do the action # reward: (float) the reward achieved by the action # down: (boolean) whether it’s time to reset the environment again. # done being True indicates the episode has terminated. obs_next, reward, done, _ = envs.step(action) # we view the new observation as current observation obs = obs_next # if the episode has terminated, we need to reset the environment. if done: envs.reset() # an example of saving observations if args.save_img: im = Image.fromarray(obs) im.save('imgs/' + str(step) + '.jpeg') data_set['data'].append(obs) # You need to label the images in 'imgs/' by recording the right actions in label.txt with open('imgs/label.txt', 'w+') as f: img_set = data_set['data'][-args.num_steps:] for img in img_set: cv2.imshow('Current Frame', img) cmd_in = cv2.waitKey(0) - 48 while cmd_in not in key_map.keys(): pass cmd_in = key_map.get(cmd_in) print(cmd_in) if cmd_in is -1: f.write(str(actions[random.randint(0, 7)]) + '\n') else: f.write(str(cmd_in) + '\n') if not os.path.exists('data/data_batch_' + str(data_index) + '/'): shutil.copytree('./imgs', 'data/data_batch_' + str(data_index)) data_index += 1 config.set('data', 'data_index', str(data_index)) config.write(open('config.ini', 'w')) # After you have labeled all the images, you can load the labels # for training a model with open('imgs/label.txt', 'r') as f: for label_tmp in f.readlines(): data_set['label'].append(int(label_tmp)) # design how to train your model with labeled data agent.update(data_set['data'], data_set['label']) query_cnt += 1 if (i + 1) % args.log_interval == 0: total_num_steps = (i + 1) * args.num_steps obs = envs.reset() reward_episode_set = [] reward_episode = 0 # evaluate your model by testing in the environment for step in range(args.test_steps): action = agent.select_action(obs) # you can render to get visual results # envs.render() obs_next, reward, done, _ = envs.step(action) reward_episode += reward obs = obs_next if done: reward_episode_set.append(reward_episode) reward_episode = 0 envs.reset() if len(reward_episode_set) == 0: reward_episode_set.append(0) end = time.time() print( "TIME {} Updates {}, num timesteps {}, FPS {} \n query {}, avrage/min/max reward {:.1f}/{:.1f}/{:.1f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start)), i, total_num_steps, int(total_num_steps / (end - start)), query_cnt, np.mean(reward_episode_set), np.min(reward_episode_set), np.max(reward_episode_set))) record['steps'].append(total_num_steps) record['mean'].append(np.mean(reward_episode_set)) record['max'].append(np.max(reward_episode_set)) record['min'].append(np.min(reward_episode_set)) record['query'].append(query_cnt) plot(record)
import random import pickle #import visdom from utils import * from loader import * from model import BiLSTM_CRF from arguments import get_args from processor import generate_batch_data, generate_batch_para, generate_batch_rep t = time.time() opts, parameters=get_args() experiment=None models_path = "models/" use_gpu = parameters['use_gpu'] mapping_file = 'models/mapping.pkl' name = parameters['name'] model_name = models_path + name #get_name(parameters) tmp_model = model_name + '.tmp' if not os.path.exists(models_path): os.makedirs(models_path)
def initialize_agent_and_env(is_test=False): """ Read and parse commandline arguments to the args variable. Initiate an agent and environment based on the arguments. :return: agent, env, args """ args = get_args(is_test) # set schema cfg.schema = args.schema # set dataset nubmer cfg.dataset_number = args.dataset_number # set number of steps in session cfg.MAX_NUM_OF_STEPS = args.episode_length # hack to change the default value of the max_steps argument in the __init__ of ATENAEnvCont to cfg.MAX_NUM_OF_STEPS atena_init_default_args = list(ATENAEnvCont.__init__.__defaults__) atena_init_default_args[0] = cfg.MAX_NUM_OF_STEPS ATENAEnvCont.__init__.__defaults__ = tuple(atena_init_default_args) # set env settings cfg.stack_obs_num = args.stack_obs_num cfg.obs_with_step_num = args.obs_with_step_num cfg.no_back = args.no_back cfg.bins_sizes = args.bins_sizes #filter_terms_bins_sizes_helper(FilterTermsBinsSizes(cfg.bins_sizes)) #paremetric_softmax_idx_action_maps_helper() # set reward types to use cfg.no_diversity = args.no_diversity cfg.no_interestingness = args.no_inter cfg.use_humans_reward = args.use_humans_reward cfg.humans_reward_interval = args.humans_reward_interval cfg.count_data_driven = args.count_data_driven # set number of hidden units for gaussian policy cfg.n_hidden_channels = args.n_hidden_channels # set architecture type cfg.arch = args.arch cfg.beta = args.beta # optimization settings cfg.max_nn_tokens = args.max_nn_tokens cfg.cache_dfs_size = args.cache_dfs_size cfg.cache_tokenization_size = args.cache_tokenization_size cfg.cache_distances_size = args.cache_distances_size # set reward coefficients cfg.humanity_coeff = args.humanity_coeff cfg.diversity_coeff = args.diversity_coeff cfg.kl_coeff = args.kl_coeff cfg.compaction_coeff = args.compaction_coeff args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir) cfg.outdir = args.outdir # https://stackoverflow.com/questions/13479295/python-using-basicconfig-method-to-log-to-console-and-file # logging file path log_path = os.path.join(args.outdir, 'training_results.log') # set up logging to file logging.basicConfig( level=logging.INFO, format='[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', filename=log_path, datefmt='%H:%M:%S' ) # set up logging to console console = logging.StreamHandler() console.setLevel(args.logger_level) # add the handler to the root logger logging.getLogger('').addHandler(console) # set logging of the entire episode every LOG_INTERVAL steps cfg.log_interval = args.log_interval cfg.num_envs = args.num_envs ATENAEnvCont.LOG_INTERVAL = int(args.log_interval / args.num_envs) # TODO (baelo): delete it # Set filter term bins #filter_terms_bins_sizes_helper(FilterTermsBinsSizes(cfg.bins_sizes)) #paremetric_softmax_idx_action_maps_helper() # Set random seed chainerrl.misc.set_random_seed(args.seed, gpus=(args.gpu,)) # create environment env = make_env(args, args.env, args.seed, args.render, args.outdir) # choose algorithm args.algo = AlgoName(args.algo) if args.algo is AlgoName.CAPG_PPO: # capg model = PPOModel(env, args.gpu, args.n_hidden_channels, args.adam_lr, args.ppo_update_interval, args.outdir, args.load, args.use_clipped_gaussian) elif args.algo is AlgoName.CAPG_TRPO: # capg model = TRPOModel(env, args.gpu, args.n_hidden_channels, args.trpo_update_interval, args.outdir, args.load, args.use_clipped_gaussian) elif args.algo == AlgoName.CHAINERRL_PPO: model = PPOchianerrl(args, env) else: raise NotImplementedError agent = model.agent return agent, env, args
def load_ocnli_data(data_path, data_type, tokenizer): args = get_args() filename = os.path.join(data_path, data_type+'.json') objs = [] with open(filename) as fin: for line in fin: objs.append(json.loads(line.strip())) pad_id = tokenizer.encoder['<pad>'] args.eod_token = tokenizer.encoder['<eod>'] all_tokens_1 = [] all_masks_1 = [] all_tokens_2 = [] all_masks_2 = [] all_tokens_3 = [] all_masks_3 = [] all_labels = [] for obj in objs: if obj['label'] == '-': continue prompt = "{}?对,".format(obj['sentence1']) prompt_tokens = tokenizer.encode(prompt) prompt_len = len(prompt_tokens) tokens = prompt_tokens + tokenizer.encode(obj['sentence2']) second_mask = [0] * (args.seq_length-1) for idx in range(prompt_len-1, len(tokens)-1): second_mask[idx] = 1 all_masks_1.append(second_mask) token_length = len(tokens) assert token_length < args.seq_length tokens.extend([pad_id] * (args.seq_length - token_length)) all_tokens_1.append(tokens) prompt = "{}?错,".format(obj['sentence1']) prompt_tokens = tokenizer.encode(prompt) prompt_len = len(prompt_tokens) tokens = prompt_tokens + tokenizer.encode(obj['sentence2']) second_mask = [0] * (args.seq_length-1) for idx in range(prompt_len-1, len(tokens)-1): second_mask[idx] = 1 all_masks_2.append(second_mask) token_length = len(tokens) assert token_length < args.seq_length tokens.extend([pad_id] * (args.seq_length - token_length)) all_tokens_2.append(tokens) prompt = "{}?也许,".format(obj['sentence1']) prompt_tokens = tokenizer.encode(prompt) prompt_len = len(prompt_tokens) tokens = prompt_tokens + tokenizer.encode(obj['sentence2']) second_mask = [0] * (args.seq_length-1) for idx in range(prompt_len-1, len(tokens)-1): second_mask[idx] = 1 all_masks_3.append(second_mask) token_length = len(tokens) assert token_length < args.seq_length tokens.extend([pad_id] * (args.seq_length - token_length)) all_tokens_3.append(tokens) if obj['label'] == 'entailment': all_labels.append([0]) elif obj['label'] == 'contradiction': all_labels.append([1]) else: all_labels.append([2]) all_tokens_1 = torch.tensor(all_tokens_1, dtype=torch.long) all_masks_1 = torch.tensor(all_masks_1, dtype=torch.float) all_tokens_2 = torch.tensor(all_tokens_2, dtype=torch.long) all_masks_2 = torch.tensor(all_masks_2, dtype=torch.float) all_tokens_3 = torch.tensor(all_tokens_3, dtype=torch.long) all_masks_3 = torch.tensor(all_masks_3, dtype=torch.float) all_labels = torch.tensor(all_labels, dtype=torch.long) dataset = TensorDataset(all_tokens_1, all_masks_1, all_tokens_2, all_masks_2, all_tokens_3, all_masks_3, all_labels) # Data parallel arguments. world_size = mpu.get_data_parallel_world_size() rank = mpu.get_data_parallel_rank() global_batch_size = args.batch_size * world_size num_workers = args.num_workers # Use a random sampler with distributed batch sampler. if data_type == 'train': sampler = RandomSampler(dataset) else: sampler = torch.utils.data.SequentialSampler(dataset) batch_sampler = DistributedBatchSampler(sampler=sampler, batch_size=global_batch_size, drop_last=True, rank=rank, world_size=world_size) # Torch dataloader. return torch.utils.data.DataLoader(dataset, batch_sampler=batch_sampler, num_workers=num_workers, pin_memory=True)
# NOTE: Here's where you can set hyperparameters for PPO. I don't include them as part of # ArgumentParser because it's too annoying to type them every time at command line. Instead, you can change them here. # To see a list of hyperparameters, look in ppo.py at function _init_hyperparameters hyperparameters = { 'timesteps_per_batch': 2048, 'max_timesteps_per_episode': 200, 'gamma': 0.99, 'n_updates_per_iteration': 10, 'lr': 3e-4, 'clip': 0.2 } # Creates the environment we'll be running. If you want to replace with your own # custom environment, note that it must inherit Gym and have both continuous # observation and action spaces. env = gym.make('Pendulum-v0') # Train or test, depending on the mode specified if args.mode == 'train': train(env=env, hyperparameters=hyperparameters, actor_model=args.actor_model, critic_model=args.critic_model) else: test(env=env, actor_model=args.actor_model) if __name__ == '__main__': args = get_args() # Parse arguments from command line main(args)
def main(): """Main training program.""" print('Pretrain BERT model') # Disable CuDNN. torch.backends.cudnn.enabled = False # Arguments. args = get_args() # Pytorch distributed. initialize_distributed(args) set_random_seed(args.seed) print(args) # Data stuff. data_config = configure_data() data_config.set_defaults(data_set_type='BERT', transpose=False) (train_data, val_data), tokenizer = data_config.apply(args) args.train_iters = len(train_data) evaluate.best_val_loss = float("inf") # Model, optimizer, and learning rate. model, optimizer, lr_scheduler, criterion = setup_model_and_optimizer( args, tokenizer) # evaluate(val_data, model, tokenizer, criterion, args) # At any point you can hit Ctrl + C to break out of training early. try: total_iters = 0 skipped_iters = 0 start_epoch = 1 best_val_loss = float('inf') # Resume data loader if necessary. if args.resume_dataloader: start_epoch = args.epoch total_iters = args.total_iters # For all epochs. for epoch in range(start_epoch, args.epochs + 1): timers = Timers() # if args.shuffle: # train_data.batch_sampler.sampler.set_epoch(epoch + args.seed) timers('epoch time').start() iteration, skipped = train_epoch(epoch, model, tokenizer, optimizer, train_data, val_data, lr_scheduler, criterion, timers, args) elapsed_time = timers('epoch time').elapsed() total_iters += iteration skipped_iters += skipped lm_loss, nsp_loss = evaluate(val_data, model, tokenizer, criterion, args) val_loss = lm_loss + nsp_loss print('-' * 100) print( '| end of epoch {:3d} | time: {:.3f}s | valid loss {:.3f} | ' 'valid LM Loss {:.3f} | valid LM PPL {:.3f} | valid NSP Loss {:.3f}' .format(epoch, elapsed_time, val_loss, lm_loss, math.exp(lm_loss), nsp_loss)) print('-' * 100) if val_loss < evaluate.best_val_loss: evaluate.best_val_loss = val_loss if args.save: best_path = 'checkpoints-best.pt' print('saving best model to:', os.path.join(args.save, best_path)) save_checkpoint(best_path, epoch + 1, 0, model, optimizer, lr_scheduler, args) except KeyboardInterrupt: print('-' * 100) print('Exiting from training early') if args.save: cur_path = 'checkpoints-last.pt' print('saving current model to:', os.path.join(args.save, cur_path)) save_checkpoint(cur_path, epoch, args.cur_iteration, model, optimizer, lr_scheduler, args) exit()
import torch from torch.utils.tensorboard import SummaryWriter import numpy as np from preprocess import * #Preprocess, ChampDataset from model import * #CNNModel, MUCNNModel, RNNModel, MURNNModel, OHModel, MUOHModel from arguments import get_args import train from datetime import datetime now = datetime.now() import os import pdb preprocess = Preprocess() arg = get_args() ''' def oneHotEncodding(labels): onehot_encoded = list() for value in labels: target = [0 for _ in range(2)] target[value] = 1 onehot_encoded.append(target) return onehot_encoded ''' def main(): allChamp, matchComp, blueWin = preprocess.lolDataSet(arg, "train") _, test_x, test_y = preprocess.lolDataSet(arg, "test")
def visualize_attention_metric_for_multiple_models( name_prefix_and_kwargs: List[Tuple[str, Dict]], unit_width_per_name=3, extension="png"): res = None total_args, num_layers, custom_key_list, name_prefix_list = None, None, [], [] kld1_list, kld2_list, jsd_list, ent_list = [], [], [], [] # [L * M, N] for name_prefix, kwargs in name_prefix_and_kwargs: args = get_args(**kwargs) custom_key_list.append(args.custom_key) num_layers = args.num_layers train_d, val_d, test_d = get_dataset_or_loader( args.dataset_class, args.dataset_name, args.data_root, batch_size=args.batch_size, seed=args.seed, ) if val_d is None and test_d is None: data_list = [train_d[0]] else: data_list = [] for _data in chain(train_d, val_d, test_d): if _data.x.size(0) != len(_data.agreement_dist): _data.agreement_dist = [ _ad for _ad in _data.agreement_dist[0] ] _data.uniform_att_dist = [ _uad for _uad in _data.uniform_att_dist[0] ] data_list.append(_data) gpu_id = [ int( np.random.choice([ g for g in range(args.num_gpus_total) if g not in args.gpu_deny_list ], 1)) ][0] if args.verbose >= 1: pprint_args(args) cprint("Use GPU the ID of which is {}".format(gpu_id), "yellow") device = "cpu" if gpu_id is None \ else torch.device('cuda:{}'.format(gpu_id) if torch.cuda.is_available() else 'cpu') model, ret = run(args, gpu_id=gpu_id, return_model=True) kld1_layer, kld2_layer, jsd_layer, ent_layer, *res = \ get_attention_metric_for_single_model_and_multiple_data(model, data_list, device) kld1_list += kld1_layer kld2_list += kld2_layer jsd_list += jsd_layer ent_list += ent_layer name_prefix_list.append(name_prefix) total_args = args torch.cuda.empty_cache() total_args.custom_key = "-".join(sorted(custom_key_list)) plot_kld_jsd_ent(kld1_list, kld2_list, jsd_list, ent_list, *res, num_layers=num_layers, model_args=total_args, epoch=-1, name_prefix_list=name_prefix_list, unit_width_per_name=unit_width_per_name, extension=extension, flierprops={ "marker": "x", "markersize": 12 })
def eval_networks(): # get args args = get_args() seed = set_seed(args.seed, args.use_cuda) _, testset, nr_channels, mlp_input_neurons, classes = get_dataset(args) testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False, num_workers=1) # get student and teacher models student_model_class = get_model_class(args.student_model) teacher_model_class = get_model_class(args.teacher_model) if "MLP" in args.student_model: stud_model_simple = student_model_class(mlp_input_neurons, 10, args.dropout) stud_model_teacher = student_model_class(mlp_input_neurons, 10, args.dropout) teacher_model = teacher_model_class(mlp_input_neurons, 10, args.dropout) else: stud_model_simple = student_model_class(nr_channels, 10, args.dropout) stud_model_teacher = student_model_class(nr_channels, 10, args.dropout) teacher_model = teacher_model_class(nr_channels, 10, args.dropout) if torch.cuda.is_available() and args.use_cuda: device = torch.device("cuda:0") else: device = torch.device("cpu") with open( args.dataset + "_teacher_network_" + args.teacher_model + "_" + str(seed), "rb") as f: teacher_model.load_state_dict(torch.load(f)) with open( args.dataset + "_student_network_simple" + args.student_model + str(seed) + "_10", "rb") as f: stud_model_simple.load_state_dict(torch.load(f)) with open( args.dataset + "_student_network_teacher" + args.student_model + str(seed) + "_10", "rb") as f: stud_model_teacher.load_state_dict(torch.load(f)) stud_model_simple.to(device) stud_model_teacher.to(device) teacher_model.to(device) stud_model_simple.eval() stud_model_teacher.eval() teacher_model.eval() print("Eval teacher model") show_results(testloader, teacher_model, classes, use_cuda=True) print("Eval student model simple") #show_results(testloader, stud_model_simple, classes, use_cuda=True) print("Eval student model twacher")
def analyze_rpg_by_degree_and_homophily(degree_list: List[float], homophily_list: List[float], legend_list: List[str], model_list: List[str], custom_key_list: List[str], att_lambda_list: List[float], l2_lambda_list: List[float], num_total_runs: int, num_nodes_per_class: int = 500, num_classes: int = 10, verbose=2, is_test=False, plot_part_by_part=False, draw_plot=True, draw_diff_between_first=False, extension="pdf"): def to_log10(v, eps=1e-5): return float(np.log10(v + eps)) base_key = "analysis_rpg" + ("" if not is_test else "_test") base_path = os.path.join("../figs", base_key) best_meta_dict = defaultdict(dict) deg_and_legend_to_mean_over_hp_list, deg_and_legend_to_std_over_hp_list = {}, {} for deg in degree_list: avg_deg_ratio = deg / num_nodes_per_class for legend, model, key in zip(legend_list, model_list, custom_key_list): base_kwargs = { "model_name": model, "dataset_class": "RandomPartitionGraph", "dataset_name": f"rpg-{num_classes}-{num_nodes_per_class}-h-d", "custom_key": key, } args = get_args(**base_kwargs) args.verbose = verbose deg_and_legend = (deg, legend) if is_test: args.epochs = 2 mean_over_hp_list, std_over_hp_list = [], [] for hp in homophily_list: args.dataset_name = f"rpg-{num_classes}-{num_nodes_per_class}-{hp}-{avg_deg_ratio}" model_key, model_path = _get_key_and_makedirs( args=args, base_path=base_path, args_prefix=legend) max_mean_perf = -1 for att_lambda in att_lambda_list: for l2_lambda in l2_lambda_list: args.att_lambda = att_lambda args.l2_lambda = l2_lambda pprint_args(args) result_key = (att_lambda, l2_lambda) result_path = os.path.join( model_path, "ms_result_{}.pkl".format(s_join("-", result_key))) try: many_seeds_result = pickle.load( open(result_path, "rb")) cprint("Load: {}".format(result_path), "blue") except FileNotFoundError: many_seeds_result = run_with_many_seeds_with_gpu( args, num_total_runs) with open(result_path, "wb") as f: pickle.dump(many_seeds_result, f) cprint("Dump: {}".format(result_path), "green") garbage_collection_cuda() cprint("Garbage collected", "green") cur_mean_perf = float( np.mean( many_seeds_result["test_perf_at_best_val"])) cur_std_perf = float( np.std(many_seeds_result["test_perf_at_best_val"])) if cur_mean_perf > max_mean_perf: max_mean_perf = cur_mean_perf best_meta_dict[model_key][ "mean_perf"] = cur_mean_perf best_meta_dict[model_key][ "std_perf"] = cur_std_perf best_meta_dict[model_key][ "att_lambda"] = att_lambda best_meta_dict[model_key]["l2_lambda"] = l2_lambda best_meta_dict[model_key][ "many_seeds_result"] = many_seeds_result if not args.is_super_gat: break mean_over_hp_list.append( best_meta_dict[model_key]["mean_perf"]) std_over_hp_list.append(best_meta_dict[model_key]["std_perf"]) deg_and_legend_to_mean_over_hp_list[ deg_and_legend] = mean_over_hp_list deg_and_legend_to_std_over_hp_list[ deg_and_legend] = std_over_hp_list pprint(deg_and_legend_to_mean_over_hp_list) if not draw_plot: return plot_line_with_std( tuple_to_mean_list= deg_and_legend_to_mean_over_hp_list, # (deg, legend) -> List[perf] by homophily tuple_to_std_list=deg_and_legend_to_std_over_hp_list, x_label="Homophily", y_label="Test Accuracy", name_label_list=["Avg. Degree", "Model"], x_list=homophily_list, hue="Model", style="Model", col="Avg. Degree", hue_order=legend_list, x_lim=(0, None), custom_key=base_key, extension=extension, ) hp_and_legend_to_mean_over_deg_list, hp_and_legend_to_std_over_deg_list = defaultdict( list), defaultdict(list) legend_to_mean_std_num_agreed_neighbors_list = defaultdict(list) for deg, legend in deg_and_legend_to_mean_over_hp_list.keys(): mean_over_hp_list = deg_and_legend_to_mean_over_hp_list[(deg, legend)] std_over_hp_list = deg_and_legend_to_std_over_hp_list[(deg, legend)] for hp, mean_of_hp, std_of_hp in zip(homophily_list, mean_over_hp_list, std_over_hp_list): hp_and_legend = (hp, legend) hp_and_legend_to_mean_over_deg_list[hp_and_legend].append( mean_of_hp) hp_and_legend_to_std_over_deg_list[hp_and_legend].append(std_of_hp) legend_to_mean_std_num_agreed_neighbors_list[legend].append( (mean_of_hp, std_of_hp, hp * deg)) mean_perf_list = [] num_agreed_neighbors_list = [] model_legend_list = [] for legend, mean_std_num_agr_neighbors_list in legend_to_mean_std_num_agreed_neighbors_list.items( ): for mean_perf, std_perf, num_agr_neighbors in sorted( mean_std_num_agr_neighbors_list, key=lambda t: t[2]): mean_perf_list.append(mean_perf) model_legend_list.append(legend) num_agreed_neighbors_list.append(num_agr_neighbors) plot_scatter( xs=num_agreed_neighbors_list, ys=mean_perf_list, hues=model_legend_list, xlabel="Avg. Number of Agreed Neighbors", ylabel="Test Performance (Acc.)", hue_name="Model", custom_key=base_key, ) plot_line_with_std( tuple_to_mean_list=hp_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", # Log y_label="Test Accuracy", name_label_list=["Homophily", "Model"], x_list=[to_log10(d) for d in degree_list], # Log hue="Model", style="Model", col="Homophily", aspect=0.75, hue_order=legend_list, x_lim=(None, None), custom_key=base_key, extension=extension, ) if plot_part_by_part: # manual. # deg: [2.5, 5.0, 10.0, 25.0, 50.0, 75.0, 100.0] def filtered_by_hp(hp_list, num_deg=None): return ({ (hp, legend): (mean_list if not num_deg else mean_list[:num_deg]) for (hp, legend), mean_list in hp_and_legend_to_mean_over_deg_list.items() if hp in hp_list }, {(hp, legend): (std_list if not num_deg else std_list[:num_deg]) for (hp, legend ), std_list in hp_and_legend_to_std_over_deg_list.items() if hp in hp_list}) def get_mean_diff(h_and_l_to_m_over_d_list, first_legend, x100=True): h_and_l_to_mean_diff_over_d_list = dict() for (hp, legend), mean_list in h_and_l_to_m_over_d_list.items(): if legend == first_legend: continue mean_list_of_first = h_and_l_to_m_over_d_list[(hp, first_legend)] mean_diff_list = (np.asarray(mean_list) - np.asarray(mean_list_of_first)) if x100: mean_diff_list = 100 * mean_diff_list mean_diff_list = mean_diff_list.tolist() h_and_l_to_mean_diff_over_d_list[(hp, legend)] = mean_diff_list return h_and_l_to_mean_diff_over_d_list if 0.1 in degree_list: b1, b2, b3, b4 = [0.1, 0.3, 0.5], [0.7], [0.9], [0.7, 0.9] else: b1, b2, b3, b4 = [0.2, 0.4], [0.6], [0.8], [0.6, 0.8] hp135_and_legend_to_mean_over_deg_list, hp135_and_legend_to_std_over_deg_list = filtered_by_hp( b1) hp7_and_legend_to_mean_over_deg_list, hp7_and_legend_to_std_over_deg_list = filtered_by_hp( b2) hp9_and_legend_to_mean_over_deg_list, hp9_and_legend_to_std_over_deg_list = filtered_by_hp( b3) hp79_and_legend_to_mean_over_deg_list, hp79_and_legend_to_std_over_deg_list = filtered_by_hp( b4) if draw_diff_between_first: lf = legend_list[0] hp135_and_legend_to_mean_over_deg_list = get_mean_diff( hp135_and_legend_to_mean_over_deg_list, lf) hp7_and_legend_to_mean_over_deg_list = get_mean_diff( hp7_and_legend_to_mean_over_deg_list, lf) hp79_and_legend_to_mean_over_deg_list = get_mean_diff( hp79_and_legend_to_mean_over_deg_list, lf) hp9_and_legend_to_mean_over_deg_list = get_mean_diff( hp9_and_legend_to_mean_over_deg_list, lf) hp135_and_legend_to_std_over_deg_list = None hp7_and_legend_to_std_over_deg_list = None hp79_and_legend_to_std_over_deg_list = None hp9_and_legend_to_std_over_deg_list = None legend_list = legend_list[1:] y_lim = None y_label = "Diff. of Test Acc. vs. GO (%p)" else: y_lim = None y_label = "Test Accuracy", degree_list = np.log10(degree_list).tolist() palette = ["grey", "#1976D2", "#D32F2F"] plot_line_with_std( tuple_to_mean_list=hp135_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp135_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", y_label=y_label, name_label_list=["Homophily", "Model"], x_list=degree_list, hue="Model", style="Model", col="Homophily", aspect=0.9, hue_order=legend_list, legend=False, x_lim=(0, None), y_lim=y_lim, palette=palette, custom_key=base_key + "_part135", extension=extension, ) plot_line_with_std( tuple_to_mean_list=hp79_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp79_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", y_label="Test Accuracy", name_label_list=["Homophily", "Model"], x_list=degree_list, hue="Model", style="Model", col="Homophily", aspect=0.9, hue_order=legend_list, legend="full", x_lim=(0, None), y_lim=y_lim, use_ylabel=False, palette=palette, custom_key=base_key + "_part79", extension=extension, ) plot_line_with_std( tuple_to_mean_list=hp7_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp7_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", y_label="Test Accuracy", name_label_list=["Homophily", "Model"], x_list=degree_list, hue="Model", style="Model", col="Homophily", aspect=1.0, hue_order=legend_list, legend=False, x_lim=(0, None), y_lim=y_lim, use_ylabel=False, palette=palette, custom_key=base_key + "_part7", extension=extension, ) plot_line_with_std( tuple_to_mean_list=hp9_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp9_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", y_label="Test Accuracy", name_label_list=["Homophily", "Model"], x_list=degree_list, hue="Model", style="Model", col="Homophily", aspect=1.0, hue_order=legend_list, legend="full", x_lim=(0, None), y_lim=y_lim, use_ylabel=False, palette=palette, custom_key=base_key + "_part9", extension=extension, )
def main(): args = get_args() device = torch.device('cuda' if args.cuda else 'cpu') env = gym.make(args.env_name) input_size = env.observation_space.shape # 4 output_size = env.action_space.n # 2 if 'Breakout' in args.env_name: output_size -= 1 env.close() is_render = False if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) model_path = os.path.join(args.save_dir, args.env_name + '.model') predictor_path = os.path.join(args.save_dir, args.env_name + '.pred') target_path = os.path.join(args.save_dir, args.env_name + '.target') writer = SummaryWriter(log_dir=args.log_dir) reward_rms = RunningMeanStd() obs_rms = RunningMeanStd(shape=(1, 1, 84, 84)) discounted_reward = RewardForwardFilter(args.ext_gamma) model = CnnActorCriticNetwork(input_size, output_size, args.use_noisy_net) rnd = RNDModel(input_size, output_size) model = model.to(device) rnd = rnd.to(device) optimizer = optim.Adam(list(model.parameters()) + list(rnd.predictor.parameters()), lr=args.lr) if args.load_model: if args.cuda: model.load_state_dict(torch.load(model_path)) else: model.load_state_dict(torch.load(model_path, map_location='cpu')) works = [] parent_conns = [] child_conns = [] for idx in range(args.num_worker): parent_conn, child_conn = Pipe() work = AtariEnvironment( args.env_name, is_render, idx, child_conn, sticky_action=args.sticky_action, p=args.sticky_action_prob, max_episode_steps=args.max_episode_steps) work.start() works.append(work) parent_conns.append(parent_conn) child_conns.append(child_conn) states = np.zeros([args.num_worker, 4, 84, 84]) sample_env_index = 0 # Sample Environment index to log sample_episode = 0 sample_rall = 0 sample_step = 0 sample_i_rall = 0 global_update = 0 global_step = 0 # normalize observation print('Initializes observation normalization...') next_obs = [] for step in range(args.num_step * args.pre_obs_norm_steps): actions = np.random.randint(0, output_size, size=(args.num_worker,)) for parent_conn, action in zip(parent_conns, actions): parent_conn.send(action) for parent_conn in parent_conns: next_state, reward, done, realdone, log_reward = parent_conn.recv() next_obs.append(next_state[3, :, :].reshape([1, 84, 84])) if len(next_obs) % (args.num_step * args.num_worker) == 0: next_obs = np.stack(next_obs) obs_rms.update(next_obs) next_obs = [] print('Training...') while True: total_state, total_reward, total_done, total_next_state, total_action, total_int_reward, total_next_obs, total_ext_values, total_int_values, total_action_probs = [], [], [], [], [], [], [], [], [], [] global_step += (args.num_worker * args.num_step) global_update += 1 # Step 1. n-step rollout for _ in range(args.num_step): actions, value_ext, value_int, action_probs = get_action(model, device, np.float32(states) / 255.) for parent_conn, action in zip(parent_conns, actions): parent_conn.send(action) next_states, rewards, dones, real_dones, log_rewards, next_obs = [], [], [], [], [], [] for parent_conn in parent_conns: next_state, reward, done, real_done, log_reward = parent_conn.recv() next_states.append(next_state) rewards.append(reward) dones.append(done) real_dones.append(real_done) log_rewards.append(log_reward) next_obs.append(next_state[3, :, :].reshape([1, 84, 84])) next_states = np.stack(next_states) rewards = np.hstack(rewards) dones = np.hstack(dones) real_dones = np.hstack(real_dones) next_obs = np.stack(next_obs) # total reward = int reward + ext Reward intrinsic_reward = compute_intrinsic_reward(rnd, device, ((next_obs - obs_rms.mean) / np.sqrt(obs_rms.var)).clip(-5, 5)) intrinsic_reward = np.hstack(intrinsic_reward) sample_i_rall += intrinsic_reward[sample_env_index] total_next_obs.append(next_obs) total_int_reward.append(intrinsic_reward) total_state.append(states) total_reward.append(rewards) total_done.append(dones) total_action.append(actions) total_ext_values.append(value_ext) total_int_values.append(value_int) total_action_probs.append(action_probs) states = next_states[:, :, :, :] sample_rall += log_rewards[sample_env_index] sample_step += 1 if real_dones[sample_env_index]: sample_episode += 1 writer.add_scalar('data/reward_per_epi', sample_rall, sample_episode) writer.add_scalar('data/reward_per_rollout', sample_rall, global_update) writer.add_scalar('data/step', sample_step, sample_episode) sample_rall = 0 sample_step = 0 sample_i_rall = 0 # calculate last next value _, value_ext, value_int, _ = get_action(model, device, np.float32(states) / 255.) total_ext_values.append(value_ext) total_int_values.append(value_int) # -------------------------------------------------- total_state = np.stack(total_state).transpose([1, 0, 2, 3, 4]).reshape([-1, 4, 84, 84]) total_reward = np.stack(total_reward).transpose().clip(-1, 1) total_action = np.stack(total_action).transpose().reshape([-1]) total_done = np.stack(total_done).transpose() total_next_obs = np.stack(total_next_obs).transpose([1, 0, 2, 3, 4]).reshape([-1, 1, 84, 84]) total_ext_values = np.stack(total_ext_values).transpose() total_int_values = np.stack(total_int_values).transpose() total_logging_action_probs = np.vstack(total_action_probs) # Step 2. calculate intrinsic reward # running mean intrinsic reward total_int_reward = np.stack(total_int_reward).transpose() total_reward_per_env = np.array([discounted_reward.update(reward_per_step) for reward_per_step in total_int_reward.T]) mean, std, count = np.mean(total_reward_per_env), np.std(total_reward_per_env), len(total_reward_per_env) reward_rms.update_from_moments(mean, std ** 2, count) # normalize intrinsic reward total_int_reward /= np.sqrt(reward_rms.var) writer.add_scalar('data/int_reward_per_epi', np.sum(total_int_reward) / args.num_worker, sample_episode) writer.add_scalar('data/int_reward_per_rollout', np.sum(total_int_reward) / args.num_worker, global_update) # ------------------------------------------------------------------------------------------- # logging Max action probability writer.add_scalar('data/max_prob', total_logging_action_probs.max(1).mean(), sample_episode) # Step 3. make target and advantage # extrinsic reward calculate ext_target, ext_adv = make_train_data(total_reward, total_done, total_ext_values, args.ext_gamma, args.gae_lambda, args.num_step, args.num_worker, args.use_gae) # intrinsic reward calculate # None Episodic int_target, int_adv = make_train_data(total_int_reward, np.zeros_like(total_int_reward), total_int_values, args.int_gamma, args.gae_lambda, args.num_step, args.num_worker, args.use_gae) # add ext adv and int adv total_adv = int_adv * args.int_coef + ext_adv * args.ext_coef # ----------------------------------------------- # Step 4. update obs normalize param obs_rms.update(total_next_obs) # ----------------------------------------------- # Step 5. Training! train_model(args, device, output_size, model, rnd, optimizer, np.float32(total_state) / 255., ext_target, int_target, total_action, total_adv, ((total_next_obs - obs_rms.mean) / np.sqrt(obs_rms.var)).clip(-5, 5), total_action_probs) if global_step % (args.num_worker * args.num_step * args.save_interval) == 0: print('Now Global Step :{}'.format(global_step)) torch.save(model.state_dict(), model_path) torch.save(rnd.predictor.state_dict(), predictor_path) torch.save(rnd.target.state_dict(), target_path)
def main(): """Main training program.""" # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() # Pytorch distributed. initialize_distributed(args) # Random seeds for reproducability. set_random_seed(args.seed) # get the tokenizer tokenizer = GPT2Tokenizer( os.path.join(args.tokenizer_path, 'vocab.json'), os.path.join(args.tokenizer_path, 'chinese_vocab.model')) # load train data if args.do_train: train_dataloader, _ = load_data(args, 'train', tokenizer, 1) dev_dataloader, dev_dataset = load_data(args, 'dev', tokenizer, 1) with open(args.deepspeed_config, "r") as f: deepspeed_conf = json.load(f) epoch = args.epoch grad_acc = deepspeed_conf["gradient_accumulation_steps"] args.train_iters = len(train_dataloader) * epoch / grad_acc # Model, optimizer, and learning rate. # TODO: maybe need to reinitialize optimizer elif args.do_eval: # Set an arbitrary positive integer since the optimizer and the scheduler will not be used when do eval. args.train_iters = 1 model, optimizer, lr_scheduler = setup_model_and_optimizer(args) device = torch.cuda.current_device() # give a time stemp to the model cur_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime()) results_dir = os.path.join(args.results_dir, "{}-{}".format(args.model_name, cur_time)) os.makedirs(results_dir, exist_ok=True) if args.do_train and torch.distributed.get_rank() == 0: with open(os.path.join(results_dir, "train_log.txt"), "w") as f: f.write("Train losses:\n") with open(os.path.join(results_dir, "dev_log.txt"), "w") as f: f.write("Dev accs:\n") torch.distributed.barrier() if args.do_train: cand_ids = torch.tensor(dev_dataset.cand_ids).to(device) total_loss, logging_loss, best_acc = 0.0, 0.0, 0.0 global_step, total_step, best_step = 0, 0, 0 for e in range(epoch): model.train() for batch, no_model_batch in tqdm( train_dataloader, disable=(torch.distributed.get_rank() != 0)): for k in batch: batch[k] = batch[k].to(device) for k in no_model_batch: no_model_batch[k] = no_model_batch[k].to(device) output = model(**batch) # get the loss of the last token output = torch.sum( output * no_model_batch["loss_mask"].unsqueeze(-1), 1) / torch.sum(no_model_batch["loss_mask"], -1).unsqueeze(-1) # get the label of the last token labels = no_model_batch["labels"].float() labels = (torch.sum(labels * no_model_batch["loss_mask"], 1) / torch.sum(no_model_batch["loss_mask"], -1)).long() # cross_entropy loss losses = mpu.vocab_parallel_cross_entropy( output.unsqueeze(1).contiguous().float(), labels.unsqueeze(1)) loss = torch.mean(losses) model.backward(loss) model.step() torch.distributed.all_reduce( loss.data, group=mpu.get_data_parallel_group()) loss.data = loss.data / mpu.get_data_parallel_world_size() total_loss += loss.item() / grad_acc if total_step % grad_acc == 0: global_step += 1 if global_step != 0 and global_step % args.log_interval == 0: # logging if torch.distributed.get_rank() == 0: train_log = "Epoch {}, global step {}, total step {}, train lm loss: {}".format( e, global_step, epoch * len(train_dataloader), (total_loss - logging_loss) / args.log_interval) yprint(train_log) with open( os.path.join(results_dir, "train_log.txt"), "a") as f: f.write(train_log + "\n") logging_loss = total_loss if global_step != 0 and global_step % args.eval_interval == 0: # evaluate on the dev acc, _, _ = evaluate(args, model, dev_dataloader, cand_ids, device, mode="dev") dev_results_dir = os.path.join( results_dir, "dev_step-{}".format(global_step)) if acc > best_acc: best_acc = acc best_step = global_step if torch.distributed.get_rank() == 0: # we will only write the log file once dev_log = "Epoch: {}, Global step: {}, Acc: {}".format( e, global_step, acc) yprint(dev_log) os.makedirs(dev_results_dir, exist_ok=True) with open( os.path.join(dev_results_dir, "dev_result.txt"), "w") as f: f.write(dev_log + "\n") with open(os.path.join(results_dir, "dev_log.txt"), "a") as f: f.write(dev_log + "\n") torch.distributed.barrier() args.save = dev_results_dir save_checkpoint(global_step, model, optimizer, lr_scheduler, args) total_step += 1 with open(os.path.join(dev_results_dir, "dev_log.txt"), "a") as f: f.write("Best acc: {} Best step: {}\n".format(best_acc, best_step)) if args.do_eval: # evaluate on the test test_dataloader, test_dataset = load_data(args, 'test', tokenizer, 1) cand_ids = torch.tensor(test_dataset.cand_ids).to(device) if args.do_train: # if do training, then evaluate the one with the max acc on dev set. eval_ckpt_path = os.path.join(results_dir, "dev_step-{}".format(best_step)) args.load = eval_ckpt_path else: # if only do eval, then evaluate the one specified by the user. args.load = args.eval_ckpt_path load_checkpoint(model=model, optimizer=None, lr_scheduler=None, args=args) acc, _, _ = evaluate(args, model, test_dataloader, cand_ids, device, mode="test") if torch.distributed.get_rank() == 0: eval_log = "Checkpoint from {}: Acc: {}".format(args.load, acc) yprint(eval_log) with open(os.path.join(results_dir, "eval_log"), "w") as f: f.write(eval_log + "\n") torch.distributed.barrier()
if opts.expert_trajectories: save_state['T_sup'] = agent.T_sup torch.save(save_state, os.path.join(opts.save_path, 'model_latest.net')) print('Epoch %d : Train loss: %9.6f Val loss: %9.6f'%(epoch+1, train_err, val_err)) # Reduce supervision gradually if opts.expert_trajectories and opts.hybrid_train: if (epoch+1) % opts.hybrid_schedule == 0 and agent.T_sup > 0: agent.T_sup -= 1 # Save the model after the first schedule is over if epoch+1 == opts.hybrid_schedule: torch.save(save_state, os.path.join(opts.save_path, 'model_after_one_schedule.net')) # Decay expert reward gradually if opts.expert_rewards and (epoch+1) % opts.expert_rewards_decay == 0: agent.reward_scale_expert /= opts.expert_rewards_decay_factor # Display three randomly selected batches of panoramas every 10 epochs if (epoch+1) % 10 == 0 or epoch == 0: for choice in rng_choices: for pano_count in range(decoded_images[choice].size(0)): x = vutils.make_grid(decoded_images[choice][pano_count], padding=5, normalize=True, scale_each=True, nrow=opts.T//2+1) writer.add_image('Validation batch # : %d image # : %d'%(choice, pano_count), x, 0) # Converting this to 0 to save disk space, should be epoch ideally if __name__ == '__main__': opts = get_args() assert not(opts.expert_rewards and opts.expert_trajectories), "Cannot use both sidekicks at once!" train(opts)
import numpy as np from autooed.problem import build_problem from autooed.mobo import build_algorithm from autooed.utils.seed import set_seed from autooed.utils.initialization import generate_random_initial_samples from autooed.utils.plot import plot_performance_space, plot_performance_metric from arguments import get_args if __name__ == '__main__': # load arguments args, module_cfg = get_args() # set random seed set_seed(args.seed) # build problem problem = build_problem(args.problem) print(problem) # build algorithm algorithm = build_algorithm(args.algo, problem, module_cfg) print(algorithm) # generate initial random samples X = generate_random_initial_samples(problem, args.n_init_sample) Y = np.array([problem.evaluate_objective(x) for x in X])
def main(): import copy import glob import os import time import matplotlib.pyplot as plt import gym import numpy as np import torch torch.multiprocessing.set_start_method('spawn') import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from gym.spaces import Discrete from arguments import get_args from baselines.common.vec_env.dummy_vec_env import DummyVecEnv from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv from baselines.common.vec_env.vec_normalize import VecNormalize from envs import make_env from img_env import ImgEnv, IMG_ENVS from model import Policy from storage import RolloutStorage from utils import update_current_obs, eval_episode from torchvision import transforms from visdom import Visdom import algo viz = Visdom(port=8097) print("#######") print( "WARNING: All rewards are clipped or normalized so you need to use a monitor (see envs.py) or visdom plot to get true rewards" ) print("#######") plot_rewards = [] plot_policy_loss = [] plot_value_loss = [] # x = np.array([0]) # y = np.array([0]) # counter = 0 # win = viz.line( # X=x, # Y=y, # win="test1", # name='Line1', # opts=dict( # title='Reward', # ) # ) # win2 = viz.line( # X=x, # Y=y, # win="test2", # name='Line2', # opts=dict( # title='Policy Loss', # ) # ) # win3 = viz.line( # X=x, # Y=y, # win="test3", # name='Line3', # opts=dict( # title='Value Loss', # ) # ) args = get_args() if args.no_cuda: args.cuda = False print(args) assert args.algo in ['a2c', 'ppo', 'acktr'] if args.recurrent_policy: assert args.algo in ['a2c', 'ppo'], \ 'Recurrent policy is not implemented for ACKTR' num_updates = int(args.num_frames) // args.num_steps // args.num_processes torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) toprint = ['seed', 'lr', 'nat', 'resnet'] if args.env_name in IMG_ENVS: toprint += ['window', 'max_steps'] toprint.sort() name = args.tag args_param = vars(args) os.makedirs(os.path.join(args.out_dir, args.env_name), exist_ok=True) for arg in toprint: if arg in args_param and (args_param[arg] or arg in ['gamma', 'seed']): if args_param[arg] is True: name += '{}_'.format(arg) else: name += '{}{}_'.format(arg, args_param[arg]) model_dir = os.path.join(args.out_dir, args.env_name, args.algo) os.makedirs(model_dir, exist_ok=True) results_dict = {'episodes': [], 'rewards': [], 'args': args} torch.set_num_threads(1) eval_env = make_env(args, 'cifar10', args.seed, 1, None, args.add_timestep, natural=args.nat, train=False) envs = make_env(args, 'cifar10', args.seed, 1, None, args.add_timestep, natural=args.nat, train=True) #print(envs) # envs = envs[0] # if args.num_processes > 1: # envs = SubprocVecEnv(envs) # else: # envs = DummyVecEnv(envs) # eval_env = DummyVecEnv(eval_env) # if len(envs.observation_space.shape) == 1: # envs = VecNormalize(envs, gamma=args.gamma) obs_shape = envs.observation_space.shape obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:]) actor_critic = Policy(obs_shape, envs.action_space, args.recurrent_policy, dataset=args.env_name, resnet=args.resnet, pretrained=args.pretrained) if envs.action_space.__class__.__name__ == "Discrete": action_shape = 1 else: action_shape = envs.action_space.shape[0] if args.cuda: actor_critic.cuda() if args.algo == 'a2c': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, alpha=args.alpha, max_grad_norm=args.max_grad_norm) elif args.algo == 'ppo': agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) elif args.algo == 'acktr': agent = algo.A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True) action_space = envs.action_space if args.env_name in IMG_ENVS: action_space = np.zeros(2) # obs_shape = envs.observation_space.shape rollouts = RolloutStorage(args.num_steps, args.num_processes, obs_shape, action_space, actor_critic.state_size) current_obs = torch.zeros(args.num_processes, *obs_shape) obs = envs.reset() update_current_obs(obs, current_obs, obs_shape, args.num_stack) rollouts.observations[0].copy_(current_obs) # These variables are used to compute average rewards for all processes. episode_rewards = torch.zeros([args.num_processes, 1]) final_rewards = torch.zeros([args.num_processes, 1]) if args.cuda: current_obs = current_obs.cuda() rollouts.cuda() start = time.time() for j in range(num_updates): # envs.display_original(j) for step in range(args.num_steps): # Sample actions with torch.no_grad(): value, action, action_log_prob, states = actor_critic.act( rollouts.observations[step], rollouts.states[step], rollouts.masks[step]) cpu_actions = action.squeeze(1).cpu().numpy() # Obser reward and next obs obs, reward, done, info = envs.step(cpu_actions) # envs.display_step(step, j) # print("OBS", obs) # print("REWARD", reward) # print("DONE", done) # print("INFO", info) reward = torch.from_numpy(np.expand_dims(np.stack([reward]), 1)).float() episode_rewards += reward # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in [done]]) final_rewards *= masks final_rewards += (1 - masks) * episode_rewards episode_rewards *= masks if args.cuda: masks = masks.cuda() if current_obs.dim() == 4: current_obs *= masks.unsqueeze(2).unsqueeze(2) else: current_obs *= masks update_current_obs(obs, current_obs, obs_shape, args.num_stack) rollouts.insert(current_obs, states, action, action_log_prob, value, reward, masks) # print("envs.curr_img SHAPE: ", envs.curr_img.shape) #display_state = envs.curr_img # display_state[:, envs.pos[0]:envs.pos[0]+envs.window, envs.pos[1]:envs.pos[1]+envs.window] = 5 # display_state = custom_replace(display_state, 1, 0) # display_state[:, envs.pos[0]:envs.pos[0]+envs.window, envs.pos[1]:envs.pos[1]+envs.window] = \ # envs.curr_img[:, envs.pos[0]:envs.pos[0]+envs.window, envs.pos[1]:envs.pos[1]+envs.window] # img = transforms.ToPILImage()(display_state) # img.save("state_cifar/"+"state"+str(j)+"_"+str(step)+".png") with torch.no_grad(): next_value = actor_critic.get_value(rollouts.observations[-1], rollouts.states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) value_loss, action_loss, dist_entropy = agent.update(rollouts) rollouts.after_update() if j % args.save_interval == 0: torch.save((actor_critic.state_dict(), results_dict), os.path.join(model_dir, name + 'cifar_model_ppo_ex1_center.pt')) if j % args.log_interval == 0: end = time.time() total_reward = eval_episode(eval_env, actor_critic, args) results_dict['rewards'].append(total_reward) total_num_steps = (j + 1) * args.num_processes * args.num_steps print( "Updates {}, num timesteps {}, FPS {}, reward {:.1f} entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}" .format(j, total_num_steps, int(total_num_steps / (end - start)), np.mean(results_dict['rewards'][-10:]), dist_entropy, value_loss, action_loss)) plot_rewards.append(np.mean(results_dict['rewards'][-10:])) plot_policy_loss.append(action_loss) plot_value_loss.append(value_loss) plt.plot(range(len(plot_rewards)), plot_rewards) plt.savefig("rewards_center.png") plt.close() plt.plot(range(len(plot_policy_loss)), plot_policy_loss) plt.savefig("policyloss_center.png") plt.close() plt.plot(range(len(plot_value_loss)), plot_value_loss) plt.savefig("valueloss_center.png") plt.close()
def load_tnews_data(data_path, data_type, tokenizer, few_shot=False): args = get_args() filename = os.path.join(data_path, data_type+'.json') objs = [] with open(filename) as fin: for line in fin: objs.append(json.loads(line.strip())) pad_id = tokenizer.encoder['<pad>'] args.eod_token = tokenizer.encoder['<eod>'] labels = [] label_map = {} label_reverse = {} with open(os.path.join(data_path, 'labels.json')) as fin: for i, line in enumerate(fin): obj = json.loads(line.strip()) labels.append(obj['label_desc']) label_map[obj['label_desc']] = i label_reverse[obj['label']] = obj['label_desc'] all_tokens = [] all_masks = [] all_labels = [] for _, obj in enumerate(objs): sentence = obj['sentence'] tokenized_sentence = tokenizer.encode(sentence)[:args.seq_length-20] obj['label_desc'] = label_reverse[obj['label']] if few_shot: cur_labels = random.sample(labels, 3) while obj['label_desc'] in cur_labels: cur_labels = random.sample(labels, 3) cur_labels.append(obj['label_desc']) cur_label = cur_labels.index(obj['label_desc']) assert cur_label != -1 else: cur_labels = labels cur_label = label_map[obj['label_desc']] all_labels.append(cur_label) for _, label in enumerate(cur_labels): prompt = "这是关于{}的文章:".format(label) prompt_tokens = tokenizer.encode(prompt) prompt_len = len(prompt_tokens) tokens = prompt_tokens + tokenized_sentence second_mask = [0] * (args.seq_length-1) for idx in range(prompt_len-1, len(tokens)-1): second_mask[idx] = 1 all_masks.append(second_mask) token_length = len(tokens) assert token_length < args.seq_length tokens.extend([pad_id] * (args.seq_length - token_length)) all_tokens.append(tokens) all_tokens = torch.tensor(all_tokens, dtype=torch.long) all_masks = torch.tensor(all_masks, dtype=torch.float) dataset = TensorDataset(all_tokens, all_masks) # Data parallel arguments. world_size = mpu.get_data_parallel_world_size() rank = mpu.get_data_parallel_rank() global_batch_size = args.batch_size * world_size num_workers = args.num_workers sampler = torch.utils.data.SequentialSampler(dataset) batch_sampler = DistributedBatchSampler(sampler=sampler, batch_size=global_batch_size, drop_last=True, rank=rank, world_size=world_size) # Torch dataloader. return torch.utils.data.DataLoader(dataset, batch_sampler=batch_sampler, num_workers=num_workers, pin_memory=True), all_labels
def main(): args = get_args() torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) log_dir = os.path.expanduser(args.log_dir) eval_log_dir = log_dir + "_eval" utils.cleanup_log_dir(log_dir) utils.cleanup_log_dir(eval_log_dir) if args.cuda and torch.cuda.is_available() and args.cuda_deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.set_num_threads(1) device = torch.device("cuda:0" if args.cuda else "cpu") base=SEVN actor_critic, obs_rms = torch.load(save_dir, map_location=device) actor_critic.to(device) actor_critic.max_eval_success_rate = 0 print("Passed!") num_processes = args.num_processes eval_recurrent_hidden_states = torch.zeros( args.num_processes, actor_critic.recurrent_hidden_state_size, device=device) eval_masks = torch.zeros(num_processes, 1, device=device) x = 0 while x < 10: torch.manual_seed(args.seed + x) torch.cuda.manual_seed_all(args.seed + x) eval_envs = make_vec_envs(args.env_name, args.seed + x, args.num_processes, args.gamma, args.log_dir, device, False, args.custom_gym) eval_episode_rewards = [] eval_episode_length = [] eval_episode_success_rate = [] obs = eval_envs.reset() while len(eval_episode_rewards) < num_processes*100: with torch.no_grad(): _, action, _, eval_recurrent_hidden_states = actor_critic.act( obs, eval_recurrent_hidden_states, eval_masks, deterministic=True) eval_envs.render() obs, _, done, infos = eval_envs.step(action) eval_masks = torch.tensor( [[0.0] if done_ else [1.0] for done_ in done], dtype=torch.float32, device=device) for info in infos: if 'episode' in info.keys(): if info['was_successful_trajectory']: if args.mod: #Modified Reward Function reward[idx]=10 episode_rewards.append(10) else: eval_episode_rewards.append(info['episode']['r']) eval_episode_length.append(info['episode']['l']) eval_episode_success_rate.append(info['was_successful_trajectory']) x+=1 print(" Evaluation using {} episodes: mean reward {:.5f}, mean_length {:.2f}, mean_success {:.2f} \n".format( len(eval_episode_rewards), np.mean(eval_episode_rewards), np.mean(eval_episode_length), np.mean(eval_episode_success_rate))) eval_envs.close() print(eval_episode_rewards) print(eval_episode_success_rate)
e = 0 while stats['total_samples'] < args.max_samples: train(args, env, model, opt, opt_v, kf, stats, ep=e) avg_eval = eval(args, env, model, stats) log_writer.writerow([ stats['total_samples'], stats['max_reward'], stats['avg_reward'], avg_eval ]) log_file.flush() e += 1 print("total samples: ", stats['total_samples'], stats['total_samples'] - last_iter_samples) last_iter_samples = stats['total_samples'] if avg_eval > best_eval or last_save_step - stats[ 'total_samples'] > 10000: best_eval = avg_eval last_save_step = stats['total_samples'] # save model if evaluation was better torch.save( model.state_dict(), os.path.join( args.log_dir, "model_ep" + str(e) + "_samples" + str(stats['total_samples']) + "_eval" + str(avg_eval) + ".pth")) log_file.close() if __name__ == '__main__': import arguments optimize(arguments.get_args())
def main(): """Main training program.""" global global_example_count, global_token_count, event_writer, logdir, train_step, train_loss, best_val_loss, eval_start_time, log_start_time, epoch global_token_count = 0 # Arguments. args = get_args() # global global_example_count, global_token_count, event_writer, logdir logdir = f'{args.logdir}' os.system(f'mkdir -p {logdir}') event_writer = SummaryWriter(logdir) log_tb("first", time.time()) print('Pretrain BERT model') # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Pytorch distributed. initialize_distributed(args) # Random seeds for reproducability. set_random_seed(args.seed) # Data stuff. data_config = configure_data() data_config.set_defaults(data_set_type='BERT', transpose=False) (train_data, val_data, test_data), tokenizer = data_config.apply(args) args.data_size = tokenizer.num_tokens # Model, optimizer, and learning rate. model, optimizer, lr_scheduler, criterion = setup_model_and_optimizer( args, tokenizer) # At any point you can hit Ctrl + C to break out of training early. try: total_iters = 0 skipped_iters = 0 start_epoch = 1 best_val_loss = float('inf') # Resume data loader if necessary. if args.resume_dataloader: start_epoch = args.epoch total_iters = args.total_iters train_data.batch_sampler.start_iter = total_iters % len(train_data) # For all epochs. for epoch in range(start_epoch, args.epochs + 1): timers('epoch time').start() iteration, skipped = train_epoch(epoch, model, optimizer, train_data, lr_scheduler, criterion, timers, args) elapsed_time = timers('epoch time').elapsed() total_iters += iteration skipped_iters += skipped lm_loss, nsp_loss = evaluate(val_data, model, criterion, args) val_loss = lm_loss + nsp_loss print('-' * 100) print( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:.4E} | ' 'valid LM Loss {:.4E} | valid NSP Loss {:.4E}'.format( epoch, elapsed_time, val_loss, lm_loss, nsp_loss)) print('-' * 100) if val_loss < best_val_loss: best_val_loss = val_loss if args.save: best_path = 'best/model.pt' print('saving best model to:', os.path.join(args.save, best_path)) save_checkpoint(best_path, epoch + 1, total_iters, model, optimizer, lr_scheduler, args) except KeyboardInterrupt: print('-' * 100) print('Exiting from training early') if args.save: cur_path = 'current/model.pt' print('saving current model to:', os.path.join(args.save, cur_path)) save_checkpoint(cur_path, epoch, total_iters, model, optimizer, lr_scheduler, args) exit() if args.save: final_path = 'final/model.pt' print('saving final model to:', os.path.join(args.save, final_path)) save_checkpoint(final_path, args.epochs, total_iters, model, optimizer, lr_scheduler, args) if test_data is not None: # Run on test data. print('entering test') lm_loss, nsp_loss = evaluate(test_data, model, criterion, args) test_loss = lm_loss + nsp_loss print('=' * 100) print('| End of training | test loss {:5.4f} | valid LM Loss {:.4E} |' ' valid NSP Loss {:.4E}'.format(test_loss, lm_loss, nsp_loss)) print('=' * 100)
np.save('MRR' + str(opt.num_qbots) + str(opt.num_abots), mean_rec_rank_final.cpu().numpy()) np.save('r1' + str(opt.num_qbots) + str(opt.num_abots), r1_final.cpu().numpy()) np.save('r5' + str(opt.num_qbots) + str(opt.num_abots), r5_final.cpu().numpy()) np.save('r10' + str(opt.num_qbots) + str(opt.num_abots), r10_final.cpu().numpy()) return ############################## # Main Code Execution Starts Here ############################## opt = get_args() opt.manualSeed = random.randint(1, 10000) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) np.random.seed(opt.manualSeed) if opt.cuda: torch.cuda.manual_seed_all(opt.manualSeed) if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) t = datetime.datetime.now() cur_time = '%s-%s-%s' % (t.day, t.month, t.hour) save_path = os.path.join(opt.outf, cur_time)
def apt_select(): """Run apt-select: Ubuntu archive mirror reporting tool""" parser = get_args() args = parser.parse_args() top_number = args.top_number[0] ping_only = args.ping_only list_only = args.list_only choose = args.choose min_status = args.min_status[0].replace('-', ' ') if not ping_only and (min_status != 'unknown'): # Convert status argument to format used by Launchpad min_status = min_status[0].upper() + min_status[1:] if choose and (not top_number or top_number < 2): parser.print_usage() exit(( "error: -c/--choose option requires -t/--top-number NUMBER " "where NUMBER is greater than 1." )) try: release = check_output(["lsb_release", "-ics"]) except OSError: not_ubuntu() else: release = [s.strip() for s in release.decode('utf-8').split()] if release[0] == 'Debian': exit("Debian is not currently supported") elif release[0] != 'Ubuntu': not_ubuntu() directory = '/etc/apt/' apt_file = 'sources.list' sources_path = directory + apt_file if not path.isfile(sources_path): exit("%s must exist as file" % sources_path) mirrors_loc = "mirrors.ubuntu.com" mirrors_url = "http://%s/mirrors.txt" % mirrors_loc stderr.write("Getting list of mirrors...") try: mirrors_list = get_html(mirrors_url) except HTMLGetError as err: exit("Error getting list from %s:\n\t%s" % (mirrors_list, err)) stderr.write("done.\n") mirrors_list = mirrors_list.splitlines() codename = release[1][0].upper() + release[1][1:] hardware = check_output(["uname", "-m"]).strip().decode('utf-8') if hardware == 'x86_64': hardware = 'amd64' else: hardware = 'i386' archives = Mirrors(mirrors_list, ping_only, min_status) archives.get_rtts() if archives.got["ping"] < top_number: top_number = archives.got["ping"] if top_number == 0: exit("Cannot connect to any mirrors in %s\n." % mirrors_list) if not ping_only: archives.get_launchpad_urls() if not archives.abort_launch: # Mirrors needs a limit to stop launching threads archives.status_num = top_number stderr.write("Looking up %d status(es)\n" % top_number) archives.lookup_statuses(min_status, codename, hardware) if top_number > 1: stderr.write('\n') repo_name = "" found = False skip_gen_msg = "Skipping file generation." with open(sources_path, 'r') as sources_file: lines = sources_file.readlines() repos = [] required_repo = "main" for line in lines: fields = line.split() if confirm_mirror(fields): if (not found and (release[1] in fields[2]) and (fields[3] == required_repo)): repos += [fields[1]] found = True continue elif fields[2] == '%s-security' % (release[1]): repos += [fields[1]] break if not repos: stderr.write(( "Error finding current %s repository in %s\n%s\n" % (required_repo, sources_path, skip_gen_msg) )) else: repo_name = repos[0] rank = 0 current_key = -1 if ping_only: archives.top_list = archives.ranked[:top_number+1] for url in archives.top_list: info = archives.urls[url] host = info["Host"] if url == repo_name: host += " (current)" current_key = rank if not ping_only and not archives.abort_launch: if "Status" in info: assign_defaults(info, ("Org", "Speed"), "N/A") print(( "%(rank)d. %(mirror)s\n%(tab)sLatency: %(ms)d ms\n" "%(tab)sOrg: %(org)s\n%(tab)sStatus: %(status)s\n" "%(tab)sSpeed: %(speed)s" % { 'tab': ' ', 'rank': rank + 1, 'mirror': host, 'ms': info["Latency"], 'org': info["Organisation"], 'status': info["Status"], 'speed': info["Speed"] } )) else: print("%d. %s: %d ms" % (rank+1, info["Host"], info["Latency"])) rank += 1 if rank == top_number: break key = 0 if choose: key = ask(( "Choose a mirror (1 - %d)\n'q' to quit " % len(archives.top_list) )) while True: try: key = int(key) except ValueError: if key == 'q': exit() if (type(key) is not str) and (key >= 1) and (key <= rank): break key = ask("Invalid entry ") key -= 1 if list_only: exit() # Avoid generating duplicate sources.list if current_key == key: exit(( "%s is the currently used mirror.\n%s" % (archives.urls[repo_name]["Host"], skip_gen_msg) )) mirror = archives.top_list[key] lines = ''.join(lines) for repo in repos: lines = lines.replace(repo, mirror) work_dir = getcwd() if work_dir == directory[0:-1]: query = ( "'%(dir)s' is the current directory.\n" "Generating a new '%(apt)s' file will " "overwrite the current file.\n" "You should copy or backup '%(apt)s' before replacing it.\n" "Continue?\n[yes|no] " % { 'dir': directory, 'apt': apt_file } ) yes_or_no(query) write_file = work_dir.rstrip('/') + '/' + apt_file try: with open(write_file, 'w') as sources_file: sources_file.write(lines) except IOError as err: exit("Unable to generate sources.list:\n\t%s\n" % err) else: print("New config file saved to %s" % write_file) exit()
import torch import torch.utils.data as td import numpy as np import scipy.io as sio import data_handler import networks import trainer import arguments # import deepspeed from sklearn.utils import shuffle from sklearn.metrics import roc_auc_score from tqdm import tqdm args = arguments.get_args() torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True dataset = data_handler.DatasetFactory.get_dataset(args.dataset) #loader = dataset.loader seed = args.seed m = args.memory_budget # Fix the seed. args.seed = seed torch.manual_seed(seed) torch.cuda.manual_seed(seed)
def main(): """Main training program.""" # Disable CuDNN. torch.backends.cudnn.enabled = False # Timer. timers = Timers() # Arguments. args = get_args() # Pytorch distributed. initialize_distributed(args) if torch.distributed.get_rank() == 0: print('Pretrain GPT2 model') print_args(args) # Random seeds for reproducability. set_random_seed(args.seed) # prepare log file os.makedirs(args.save, exist_ok=True) with open(args.log_file, "w") as f: f.write("Logging:\n") # Model, optimizer, and learning rate. with open(args.student_config_path, "r") as f: student_config = json.load(f) student_model, optimizer, lr_scheduler, student_iteration = setup_model_and_optimizer( args, student_config, need_optim=True, ckpt_path=args.student_load, do_fp16=args.fp16) args.iteration = student_iteration teacher_model = None if args.teacher_config_path is not None: with open(args.teacher_config_path, "r") as f: teacher_config = json.load(f) teacher_model, _, _, _ = setup_model_and_optimizer( args, teacher_config, need_optim=True, ckpt_path=args.teacher_load, do_fp16=(args.fp16 or args.teacher_fp16)) if torch.distributed.get_rank() == 0: print(student_iteration) train_data_iterator, val_data_iterator, test_data_iterator = \ build_train_valid_test_data_iterators( train_valid_test_dataset_provider, args) iteration = 0 if args.do_train: iteration, skipped = train(student_model, teacher_model, optimizer, lr_scheduler, train_data_iterator, val_data_iterator, timers, args) prefix = 'the end of training for val data' evaluate_and_print_results(prefix, val_data_iterator, student_model, teacher_model, args, timers, False) if args.save and iteration != 0: save_checkpoint(iteration, student_model, optimizer, lr_scheduler, args) if args.do_test: # Run on test data. prefix = 'the end of training for test data' evaluate_and_print_results(prefix, test_data_iterator, student_model, teacher_model, args, timers, True)
if is_value_list: for rk, rv in sorted(results_dict.items()): if keys_to_print is not None and rk not in keys_to_print: continue cprint_and_append("{}: {}".format(rk, rv)) return line_list if __name__ == '__main__': num_total_runs = 7 main_args = get_args( model_name="GAT", # GAT, GCN dataset_class= "Planetoid", # Planetoid, FullPlanetoid, RandomPartitionGraph dataset_name="Cora", # Cora, CiteSeer, PubMed, rpg-10-500-0.1-0.025 custom_key= "EV13NSO8", # NEO8, NEDPO8, EV13NSO8, EV9NSO8, EV1O8, EV2O8, -500, -Link, -ES, -ATT ) pprint_args(main_args) if len(main_args.gpu_deny_list) == main_args.num_gpus_total: alloc_gpu = [None] cprint("Use CPU", "yellow") else: alloc_gpu = blind_other_gpus(num_gpus_total=main_args.num_gpus_total, num_gpus_to_use=main_args.num_gpus_to_use, gpu_deny_list=main_args.gpu_deny_list) if not alloc_gpu: alloc_gpu = [ int(
def main(): config = None args = get_args() config, checkpoint = get_config_and_checkpoint(args) set_random_seeds(args, config) eval_log_dir = args.save_dir + "_eval" try: os.makedirs(args.save_dir) os.makedirs(eval_log_dir) except OSError: pass now = datetime.datetime.now() experiment_name = args.experiment_name + '_' + now.strftime("%Y-%m-%d_%H-%M-%S") # Create checkpoint file save_dir_model = os.path.join(args.save_dir, 'model', experiment_name) save_dir_config = os.path.join(args.save_dir, 'config', experiment_name) try: os.makedirs(save_dir_model) os.makedirs(save_dir_config) except OSError as e: logger.error(e) exit() if args.config: shutil.copy2(args.config, save_dir_config) curriculum = args.follow_curriculum if args.follow_curriculum: print('Using preset curriculum') # Tensorboard Logging writer = SummaryWriter(os.path.join(args.save_dir, 'tensorboard', experiment_name)) # Logger that writes to STDOUT and a file in the save_dir logger = setup_carla_logger(args.save_dir, experiment_name) device = torch.device("cuda:0" if args.cuda else "cpu") norm_reward = not config.no_reward_norm norm_obs = not config.no_obs_norm assert not (config.num_virtual_goals > 0) or (config.reward_class == 'SparseReward'), 'Cant use HER with dense reward' obs_converter = CarlaObservationConverter(h=84, w=84, rel_coord_system=config.rel_coord_system) action_converter = CarlaActionsConverter(config.action_type) envs = make_vec_envs(obs_converter, action_converter, args.starting_port, config.seed, config.num_processes, config.gamma, device, config.reward_class, num_frame_stack=1, subset=config.experiments_subset, norm_reward=norm_reward, norm_obs=norm_obs, apply_her=config.num_virtual_goals > 0, video_every=args.video_interval, video_dir=os.path.join(args.save_dir, 'video', experiment_name), curriculum=curriculum) if config.agent == 'forward': agent = agents.ForwardCarla() if config.agent == 'vpg': agent = agents.VPGCarla(obs_converter, action_converter, config.value_loss_coef, config.entropy_coef, lr=config.lr, eps=config.eps, alpha=config.alpha, gamma=config.gamma, max_grad_norm=config.max_grad_norm) if config.agent == 'a2c': agent = agents.A2CCarla(obs_converter, action_converter, config.value_loss_coef, config.entropy_coef, lr=config.lr, eps=config.eps, alpha=config.alpha, max_grad_norm=config.max_grad_norm) elif config.agent == 'acktr': agent = agents.A2CCarla(obs_converter, action_converter, config.value_loss_coef, config.entropy_coef, lr=config.lr, eps=config.eps, alpha=config.alpha, max_grad_norm=config.max_grad_norm, acktr=True) elif config.agent == 'ppo': agent = agents.PPOCarla(obs_converter, action_converter, config.clip_param, config.ppo_epoch, config.num_mini_batch, config.value_loss_coef, config.entropy_coef, lr=config.lr, eps=config.eps, max_grad_norm=config.max_grad_norm) if checkpoint is not None: load_modules(agent.optimizer, agent.model, checkpoint) rollouts = RolloutStorage(config.num_steps, config.num_processes, envs.observation_space, envs.action_space, 20, config.num_virtual_goals, config.rel_coord_system, obs_converter) obs = envs.reset() # Save the first observation obs = obs_to_dict(obs) rollouts.obs = obs_to_dict(rollouts.obs) for k in rollouts.obs: rollouts.obs[k][rollouts.step + 1].copy_(obs[k]) rollouts.obs = dict_to_obs(rollouts.obs) rollouts.to(device) start = time.time() total_steps = 0 total_episodes = 0 total_reward = 0 episode_reward = torch.zeros(config.num_processes) for j in range(config.num_updates): for step in range(config.num_steps): # Sample actions with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = agent.act( rollouts.get_obs(step), rollouts.recurrent_hidden_states[step], rollouts.masks[step]) # Observe reward and next obs obs, reward, done, info = envs.step(action) # For logging purposes carla_rewards = torch.tensor([i['carla-reward'] for i in info], dtype=torch.float) episode_reward += carla_rewards total_reward += carla_rewards.sum().item() total_steps += config.num_processes * config.num_steps if done.any(): total_episodes += done.sum() torch_done = torch.tensor(done.astype(int)).byte() mean_episode_reward = episode_reward[torch_done].mean().item() logger.info('{} episode(s) finished with reward {}'.format(done.sum(), mean_episode_reward)) writer.add_scalar('train/mean_ep_reward_vs_steps', mean_episode_reward, total_steps) writer.add_scalar('train/mean_ep_reward_vs_episodes', mean_episode_reward, total_episodes) episode_reward[torch_done] = 0 # If done then clean the history of observations. masks = torch.FloatTensor(1-done) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks.unsqueeze(-1)) if config.num_virtual_goals > 0: rollouts.apply_her(config.num_virtual_goals, device, beta=config.beta) with torch.no_grad(): next_value = agent.get_value(rollouts.get_obs(-1), # Get last observation rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, config.use_gae, config.gamma, config.tau) value_loss, action_loss, dist_entropy = agent.update(rollouts) rollouts.after_update() if j % args.save_interval == 0 and args.save_dir != "" and config.agent !='forward': save_path = os.path.join(save_dir_model, str(j) + '.pth.tar') save_modules(agent.optimizer, agent.model, args, config, save_path) total_num_steps = (j + 1) * config.num_processes * config.num_steps if j % args.log_interval == 0: # Logging to the stdout/our logs end = time.time() logger.info('------------------------------------') logger.info('Episodes {}, Updates {}, num timesteps {}, FPS {}'\ .format(total_episodes, j + 1, total_num_steps, total_num_steps / (end - start))) logger.info('------------------------------------') # Logging to tensorboard writer.add_scalar('train/cum_reward_vs_steps', total_reward, total_steps) writer.add_scalar('train/cum_reward_vs_updates', total_reward, j+1) if config.agent in ['a2c', 'acktr', 'ppo']: writer.add_scalar('debug/value_loss_vs_steps', value_loss, total_steps) writer.add_scalar('debug/value_loss_vs_updates', value_loss, j+1) writer.add_scalar('debug/action_loss_vs_steps', action_loss, total_steps) writer.add_scalar('debug/action_loss_vs_updates', action_loss, j+1) writer.add_scalar('debug/dist_entropy_vs_steps', dist_entropy, total_steps) writer.add_scalar('debug/dist_entropy_vs_updates', dist_entropy, j+1) # Sample the last reward writer.add_scalar('debug/sampled_normalized_reward_vs_steps', reward.mean(), total_steps) writer.add_scalar('debug/sampled_normalized_reward_vs_updates', reward.mean(), j+1) writer.add_scalar('debug/sampled_carla_reward_vs_steps', carla_rewards.mean(), total_steps) writer.add_scalar('debug/sampled_carla_reward_vs_updates', carla_rewards.mean(), j+1) if (args.eval_interval is not None and j % args.eval_interval == 0): eval_envs = make_vec_envs( args.env_name, args.starting_port, obs_converter, args.x + config.num_processes, config.num_processes, config.gamma, eval_log_dir, config.add_timestep, device, True, curriculum) vec_norm = get_vec_normalize(eval_envs) if vec_norm is not None: vec_norm.ob_rms = get_vec_normalize(envs).ob_rms eval_episode_rewards = [] obs = eval_envs.reset() eval_recurrent_hidden_states = torch.zeros(config.num_processes, 20, device=device) eval_masks = torch.zeros(config.num_processes, 1, device=device) while len(eval_episode_rewards) < 10: with torch.no_grad(): _, action, _, eval_recurrent_hidden_states = agent.act( obs, eval_recurrent_hidden_states, eval_masks, deterministic=True) # Obser reward and next obs carla_obs, reward, done, infos = eval_envs.step(action) eval_masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) for info in infos: if 'episode' in info.keys(): eval_episode_rewards.append(info['episode']['r']) eval_envs.close() logger.info(" Evaluation using {} episodes: mean reward {:.5f}\n". format(len(eval_episode_rewards), np.mean(eval_episode_rewards)))
import os import random import sys from pathlib import Path from shutil import copyfile from arguments import get_args random.seed(1) args = get_args('args for datasplit (citiscapes)', mode='data') current_path = os.path.abspath('') train_path = "/home/himanshu/cityscape/leftImg8bit/train" train_path_lb = "/home/himanshu/cityscape/gtFine/train" perc = args.percentage if perc < 0 or perc > 100: print('Illegal usage of -p, only between 0 and 100') sys.exit(0) destination = Path("/home/akshay/cityscape/frac" + str(perc) + "/leftImg8bit/train") os.makedirs(str(destination), exist_ok=True) destination_lb = Path("/home/akshay/cityscape/frac" + str(perc) + "/gtFine/train") os.makedirs(str(destination_lb), exist_ok=True) for folder in os.listdir(train_path): des1 = Path(destination / folder)
feature = model(images.to(args.device)) preds = classifier(feature) loss = F.cross_entropy(preds, labels.to(args.device)) loss.backward() optimizer.step() loss_meter.update(loss.item()) lr = lr_scheduler.step() local_progress.set_postfix({ 'lr': lr, "loss": loss_meter.val, 'loss_avg': loss_meter.avg }) classifier.eval() correct, total = 0, 0 acc_meter.reset() for idx, (images, labels) in enumerate(test_loader): with torch.no_grad(): feature = model(images.to(args.device)) preds = classifier(feature).argmax(dim=1) correct = (preds == labels.to(args.device)).sum().item() acc_meter.update(correct / preds.shape[0]) print(f'Accuracy = {acc_meter.avg * 100:.2f}') if __name__ == "__main__": main(args=get_args())
def main(): doers = _get_doers(shell) doers.update(_get_doers(utils)) possible_actions = doers.keys() + ['start', 'stop', 'status'] args = arguments.get_args(possible_actions) if args.action is None: print ('No action') return 65 # os.EX_DATAERR apiclient = None verify = True if args.insecure: verify = False if args.no_api is False: apiclient = client.Client(opts=args, verify=verify) if args.client_id: apiclient.client_id = args.client_id else: if winutils.is_windows(): print("--no-api mode is not available on windows") return 69 # os.EX_UNAVAILABLE if args.action in doers: try: return doers[args.action](apiclient, args) except Exception as e: print ('ERROR {0}'.format(e)) return 70 # os.EX_SOFTWARE freezer_scheduler = FreezerScheduler(apiclient=apiclient, interval=int(args.interval), job_path=args.jobs_dir) if args.no_daemon: print ('Freezer Scheduler running in no-daemon mode') daemon = NoDaemon(daemonizable=freezer_scheduler) else: if winutils.is_windows(): daemon = Daemon(daemonizable=freezer_scheduler, interval=int(args.interval), job_path=args.jobs_dir, insecure=args.insecure) else: daemon = Daemon(daemonizable=freezer_scheduler) if args.action == 'start': daemon.start(log_file=args.log_file) elif args.action == 'stop': daemon.stop() elif args.action == 'reload': daemon.reload() elif args.action == 'status': daemon.status() # os.RETURN_CODES are only available to posix like systems, on windows # we need to translate the code to an actual number which is the equivalent return 0 # os.EX_OK
def main(): args = get_args() model_types = ['conv_net', 'conv_net_attn', 'transformer'] assert args.model_type in model_types raw_data = data_utils.read_smiles_ring_data('%s/raw.csv' % args.data) atom_predictor, optimizer = init_model(args, args.n_classes) data_utils.load_shortest_paths(args) # Shortest paths includes all splits agg_stats = ['loss', 'nei_score', 'acc', 'auc', 'gnorm', 'gnorm_clip'] selection_stat = 'acc' select_higher = True if args.test_mode: dataset_loaders = load_datasets(raw_data, 0, args) test_model( dataset_loaders=dataset_loaders, model=atom_predictor, stat_names=agg_stats, train_func=run_epoch, args=args, ) exit() all_stats = {} for name in agg_stats: all_stats[name] = [] output_dir = args.output_dir all_model_paths = [] for round_idx in range(args.n_rounds): dataset_loaders = load_datasets(raw_data, round_idx, args, n_workers=0) atom_predictor, optimizer = init_model(args, args.n_classes) cur_output_dir = '%s/run_%d' % (output_dir, round_idx) args.output_dir = cur_output_dir create_dirs(args, cur_output_dir) test_stats, best_model_path = train_model( dataset_loaders=dataset_loaders, model=atom_predictor, optimizer=optimizer, stat_names=agg_stats, selection_stat=selection_stat, train_func=run_epoch, args=args, select_higher=select_higher, ) # Aggregate stats of interest for name in agg_stats: all_stats[name].append(test_stats[name]) all_model_paths.append(best_model_path) # Write summary file summary_file = open('%s/summary.txt' % output_dir, 'w+') for name, stats_arr in all_stats.items(): stats = np.array(stats_arr) mean, std = np.mean(stats), np.std(stats) stats_string = '%s: %s, mean: %.3f, std: %.3f' % (name, str(stats_arr), mean, std) print(stats_string) summary_file.write('%s\n' % stats_string) for model_path in all_model_paths: summary_file.write('%s\n' % model_path) summary_file.close()