def main(): args = argparser() args.clip_rewards = False env = make_atari(args.env) env = wrap_atari_dqn(env, args) seed = args.seed + 1122 utils.set_global_seeds(seed, use_torch=True) env.seed(seed) model = DuelingDQN(env) model.load_state_dict(torch.load('model.pth', map_location='cpu')) episode_reward, episode_length = 0, 0 state = env.reset() while True: if args.render: env.render() action, _ = model.act(torch.FloatTensor(np.array(state)), 0.) next_state, reward, done, _ = env.step(action) state = next_state episode_reward += reward episode_length += 1 if done: state = env.reset() print("Episode Length / Reward: {} / {}".format( episode_length, episode_reward)) episode_reward = 0 episode_length = 0
async def main(): """ main event loop """ args = argparser() utils.set_global_seeds(args.seed, use_torch=False) procs = [ Process(target=recv_batch_device), Process(target=recv_prios_device), Process(target=send_batch_device), ] for p in procs: p.start() buffer = CustomPrioritizedReplayBuffer(args.replay_buffer_size, args.alpha) exe = ThreadPoolExecutor() event = asyncio.Event() lock = asyncio.Lock() # TODO: How to decide the proper number of asyncio workers? workers = [] for _ in range(args.n_recv_batch_worker): w = recv_batch_worker(buffer, exe, event, lock, args.threshold_size) workers.append(w) for _ in range(args.n_recv_prios_worker): w = recv_prios_worker(buffer, exe, event, lock) workers.append(w) for _ in range(args.n_send_batch_worker): w = send_batch_worker(buffer, exe, event, lock, args.batch_size, args.beta) workers.append(w) await asyncio.gather(*workers) return True
def main(): learner_ip = get_environ() args = argparser() writer = SummaryWriter(comment="-{}-eval".format(args.env)) ctx = zmq.Context() param_socket = ctx.socket(zmq.SUB) param_socket.setsockopt(zmq.SUBSCRIBE, b'') param_socket.setsockopt(zmq.CONFLATE, 1) param_socket.connect('tcp://{}:52001'.format(learner_ip)) env = make_atari(args.env) env = wrap_atari_dqn(env, args) seed = args.seed + 1122 utils.set_global_seeds(seed, use_torch=True) env.seed(seed) model = DuelingDQN(env) data = param_socket.recv(copy=False) param = pickle.loads(data) model.load_state_dict(param) print("Loaded first parameter from learner") episode_reward, episode_length, episode_idx = 0, 0, 0 state = env.reset() while True: if args.render: env.render() action, _ = model.act(torch.FloatTensor(np.array(state)), 0.01) next_state, reward, done, _ = env.step(action) state = next_state episode_reward += reward episode_length += 1 if done: state = env.reset() writer.add_scalar("eval/episode_reward", episode_reward, episode_idx) writer.add_scalar("eval/episode_length", episode_length, episode_idx) episode_reward = 0 episode_length = 0 episode_idx += 1 if episode_idx % args.eval_update_interval == 0: data = param_socket.recv(copy=False) param = pickle.loads(data) model.load_state_dict(param)
def main(): args = argparser() args.clip_rewards = False args.episode_life=False env = make_atari(args.env) env = wrap_atari_dqn(env, args) # seed = args.seed + 1122 # utils.set_global_seeds(seed, use_torch=True) # env.seed(seed) model = DuelingDQN(env, args) model.load_state_dict(torch.load('model.pth', map_location='cpu')) episode_reward, episode_length = 0, 0 state = env.reset() if not os.path.exists('plays'): os.mkdir('plays') video = cv2.VideoWriter('plays/tmp.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, (160, 210)) while True: img = env.render(mode='rgb_array') model.zero_grad() state = torch.tensor(state[np.newaxis, :], dtype=torch.float32, requires_grad=True) value, action = model(state).max(1) value = value[0] action = action[0] value.backward() img_gradient = np.abs(state.grad.numpy()) img_gradient = np.sum(img_gradient, axis=(0,1)) img_gradient = (img_gradient - np.min(img_gradient)) / (np.max(img_gradient) - np.min(img_gradient)) img_gradient = img_gradient.transpose() img_gradient = cv2.resize(img_gradient, (160, 210))[...,np.newaxis] img_gradient = img_gradient * 255 masked_img = (img + img_gradient).astype(np.uint8) masked_img = np.clip(masked_img, 0, 255) video.write(masked_img) next_state, reward, done, _ = env.step(int(action)) state = next_state episode_reward += reward episode_length += 1 if done: state = env.reset() print("Episode Length / Reward: {} / {}".format(episode_length, episode_reward)) video.release() os.rename('plays/tmp.avi', f'plays/{args.env}-{episode_reward}.avi') video = cv2.VideoWriter('plays/tmp.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, (160, 210)) episode_reward = 0 episode_length = 0
def main(): learner_ip = get_environ() args = argparser() param_queue = Queue(maxsize=3) procs = [ Process(target=exploration_eval, args=(args, -1, param_queue)), Process(target=recv_param_eval, args=(learner_ip, -1, param_queue)), ] for p in procs: p.start() for p in procs: p.join() return True
def main(): actor_id, n_actors, replay_ip, learner_ip = get_environ() args = argparser() param_queue = Queue(maxsize=3) procs = [ Process(target=vector_exploration, args=(args, actor_id, n_actors, replay_ip, param_queue)), Process(target=recv_param, args=(learner_ip, actor_id, param_queue)), ] for p in procs: p.start() for p in procs: p.join() return True
def run(): args = argparser() path = utils.create_log_dir(sys.argv) utils.start(args.http_port) env = Env(args) agents = [Agent(args) for _ in range(args.n_agent)] master = Master(args) for agent in agents: master.add_agent(agent) master.add_env(env) success_list = [] time_list = [] for idx in range(args.n_episode): print('=' * 80) print("Episode {}".format(idx + 1)) # 서버의 stack, timer 초기화 print("서버를 초기화하는중...") master.reset(path) # 에피소드 시작 master.start() # 에이전트 학습 master.train() print('=' * 80) success_list.append(master.infos["is_success"]) time_list.append(master.infos["end_time"] - master.infos["start_time"]) if (idx + 1) % args.print_interval == 0: print("=" * 80) print("EPISODE {}: Avg. Success Rate / Time: {:.2} / {:.2}".format( idx + 1, np.mean(success_list), np.mean(time_list))) success_list.clear() time_list.clear() print("=" * 80) if (idx + 1) % args.checkpoint_interval == 0: utils.save_checkpoints(path, agents, idx + 1) if args.visual: visualize(path, args) print("끝") utils.close()
def main(): n_actors, replay_ip = get_environ() args = argparser() # TODO: Need to adjust the maxsize of prios, param queue batch_queue = Queue(maxsize=args.queue_size) prios_queue = Queue(maxsize=args.prios_queue_size) param_queue = Queue(maxsize=3) procs = [ Process(target=train, args=(args, n_actors, batch_queue, prios_queue, param_queue)), Process(target=send_param, args=(param_queue, )), Process(target=send_prios, args=(prios_queue, replay_ip)), ] for _ in range(args.n_recv_batch_process): p = Process(target=recv_batch, args=(batch_queue, replay_ip, args.device)) procs.append(p) for p in procs: p.start() for p in procs: p.join()
def run_trial(cnt): args = argparser() output = Value('f', 0) myredis = redis.StrictRedis() pub_list = [] sub_proc = Process(target=sub, kwargs={ 'myredis': myredis, 'name': 'reader1', 'n_seconds': args.n_seconds }) sub_proc.start() for _ in range(args.n_threads): pub_proc = Process(target=pub, args=[myredis, args.n_seconds, output]) pub_list.append(pub_proc) pub_proc.start() procs = [sub_proc] + pub_list for proc in procs: proc.join() print(f'pub_sum : {output.value}') nni.report_final_result(output.value)
import matplotlib.pyplot as plt import os import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from custom_trading_env import TradingEnv from utils import device import DQNTradingAgent.dqn_agent as dqn_agent from custom_hyperparameters import hyperparams from arguments import argparser args = argparser() # device_num, save_num, risk_aversion, n_episodes, fee device = torch.device("cuda:{}".format(args.device_num)) dqn_agent.set_device(device) save_location = 'saves/{}'.format(args.save_num) if not os.path.exists(save_location): os.makedirs(save_location) save_interval = 200 print_interval = 1 n_episodes = args.n_episodes sample_len = 480
perfmeasure = get_cindex deepmethod = build_combined_categorical experiment(FLAGS, perfmeasure, deepmethod) class CustomStopper(keras.callbacks.EarlyStopping): def __init__(self, monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', start_epoch=100): # add argument for starting epoch super(CustomStopper, self).__init__() self.start_epoch = start_epoch def on_epoch_end(self, epoch, logs=None): if epoch > self.start_epoch: super().on_epoch_end(epoch, logs) if __name__ == "__main__": FLAGS = argparser() FLAGS.log_dir = FLAGS.log_dir + str(time.time()) + "/" if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir) logging(str(FLAGS), FLAGS) run_regression(FLAGS)
def main(): # used from arguments.py args = argparser().parse_args() if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir): raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count( ) if torch.cuda.is_available() and not args.no_cuda else 0 else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) # Set seed set_seed(args) # Prepare our task args.task_name = args.task_name.lower() if args.task_name not in processors: raise ValueError("Task not found: %s" % (args.task_name)) processor = processors[args.task_name] label_list = processor.get_labels() num_labels = len(label_list) # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab args.model_type = args.model_type.lower() config = AutoConfig.from_pretrained( args.config_name if args.config_name else args.model_path, num_labels=num_labels, finetuning_task=args.task_name, cache_dir=args.cache_dir if args.cache_dir else None) tokenizer = AutoTokenizer.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, ) model = AutoModelForSequenceClassification.from_pretrained( args.model_path, config=config, cache_dir=args.cache_dir if args.cache_dir else None, ) add_special_tokens(model, tokenizer, processor) if args.local_rank == 0: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Training if args.do_train: train_dataset = load_and_cache_examples(tokenizer, "train", args) global_step, tr_loss = train(model, tokenizer, train_dataset, processor, args) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` model_to_save = (model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training model_to_save.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Load a trained model and vocabulary that you have fine-tuned model = AutoModelForSequenceClassification.from_pretrained( args.output_dir) tokenizer = AutoTokenizer.from_pretrained(args.output_dir) model.to(args.device) # Evaluation assert not (args.do_test and args.do_eval) results = {} if (args.do_eval or args.do_test) and args.local_rank in [-1, 0]: mode = "dev" if args.do_eval else "test" tokenizer = AutoTokenizer.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))) logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate(%s) the following checkpoints: %s", mode, checkpoints) for checkpoint in checkpoints: logger.info("Checkpoint: %s", checkpoint) global_step = checkpoint.split( "-")[-1] if len(checkpoints) > 1 else "" prefix = checkpoint.split( "/")[-1] if checkpoint.find("checkpoint") != -1 else "" model = AutoModelForSequenceClassification.from_pretrained( checkpoint) model.to(args.device) result = evaluate(model, tokenizer, processor, mode, args, prefix=prefix) result = dict( (k + "_{}".format(global_step), v) for k, v in result.items()) results.update(result) return results
label_row_inds, label_col_inds = np.where( np.isnan(Y) == False) # basically finds the point address of affinity [x,Y] print("Logdir: " + FLAGS.log_dir) s1_avgperf, s1_avgloss, s1_teststd = nfold_1_2_3_setting_sample(XD, XT, Y, label_row_inds, label_col_inds, perfmeasure, FLAGS, dataset) logging("Setting " + str(FLAGS.problem_type), FLAGS) logging("avg_perf = %.5f, avg_mse = %.5f, std = %.5f" % (s1_avgperf, s1_avgloss, s1_teststd), FLAGS) print("Setting " + str(FLAGS.problem_type)) print("avg_perf = %.5f, avg_mse = %.5f, std = %.5f" % (s1_avgperf, s1_avgloss, s1_teststd)) def run_regression(FLAGS): perfmeasure = get_cindex experiment(FLAGS, perfmeasure) if __name__ == "__main__": FLAGS = argparser() FLAGS.log_dir = FLAGS.log_dir + str(time.time()) + "/" if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir) logging(str(FLAGS), FLAGS) print(str(FLAGS)) run_regression(FLAGS)
import numpy as np import pandas as pd import matplotlib.pyplot as plt import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import DQNTradingAgent.dqn_agent as dqn_agent from envs.trading_env_integrated import TradingEnv from custom_hyperparameters import hyperparams from arguments import argparser args = argparser() # device_num, save_num, risk_aversion, n_episodes torch.cuda.manual_seed_all(7) device = torch.device("cuda:{}".format(args.device_num)) dqn_agent.set_device(device) save_location = 'saves/Original/{}'.format(args.save_num) if not os.path.exists(save_location): os.makedirs(save_location) save_interval = 1000 print_interval = 1 n_episodes = args.n_episodes sample_len = 480