f'agent-{i}': SharedAdam(shared_models[f'agent-{i}'].parameters(), lr=args.lr) for i in range(args.agents) } shared_schedulers = { f'agent-{i}': torch.optim.lr_scheduler.StepLR(shared_optimizers[f'agent-{i}'], 1000000, 0.99) for i in range(args.agents) } info = { info_name: torch.DoubleTensor([0]).share_memory_() for info_name in ['run_epr', 'run_loss', 'episodes', 'frames', 'start_frames'] } logger.info("Loading previous shared models parameters.") frames = [] for agent_name, shared_model in shared_models.items(): frames.append(shared_model.try_load(args.save_dir, agent_name, logger) * 1e6) if min(frames) != max(frames): logger.warning("Loaded models do not have the same number of training frames between agents") info['frames'] += max(frames) info['start_frames'] += max(frames) logger.info("Launching processes...") processes = [] for rank in range(args.processes): p = mp.Process(target=train, args=(shared_models, shared_optimizers, shared_schedulers, rank, args, info)) p.start() processes.append(p) for p in processes: p.join()
def main(): args = flag_parser.parse_arguments() if args.model == "BaseModel" or args.model == "GCN": args.learned_loss = False args.num_steps = 50 target = nonadaptivea3c_val if args.eval else nonadaptivea3c_train else: args.learned_loss = True args.num_steps = 6 target = savn_val if args.eval else savn_train create_shared_model = model_class(args.model) init_agent = agent_class(args.agent_type) optimizer_type = optimizer_class(args.optimizer) if args.eval: main_eval(args, create_shared_model, init_agent) return start_time = time.time() local_start_time_str = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime(start_time)) np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) if args.log_dir is not None: tb_log_dir = args.log_dir + "/" + args.title + "-" + local_start_time_str log_writer = SummaryWriter(log_dir=tb_log_dir) else: log_writer = SummaryWriter(comment=args.title) if args.gpu_ids == -1: args.gpu_ids = [-1] else: torch.cuda.manual_seed(args.seed) mp.set_start_method("spawn") shared_model = create_shared_model(args) train_total_ep = 0 n_frames = 0 if shared_model is not None: shared_model.share_memory() optimizer = optimizer_type( filter(lambda p: p.requires_grad, shared_model.parameters()), args) optimizer.share_memory() print(shared_model) else: assert (args.agent_type == "RandomNavigationAgent" ), "The model is None but agent is not random agent" optimizer = None processes = [] end_flag = mp.Value(ctypes.c_bool, False) train_res_queue = mp.Queue() for rank in range(0, args.workers): p = mp.Process( target=target, args=( rank, args, create_shared_model, shared_model, init_agent, optimizer, train_res_queue, end_flag, ), ) p.start() processes.append(p) time.sleep(0.1) print("Train agents created.") train_thin = args.train_thin train_scalars = ScalarMeanTracker() from tqdm import tqdm pbar = tqdm(total=1e8) try: while train_total_ep < args.max_ep: train_result = train_res_queue.get() train_scalars.add_scalars(train_result) train_total_ep += 1 pbar.update(train_result["ep_length"]) n_frames += train_result["ep_length"] if (train_total_ep % train_thin) == 0: log_writer.add_scalar("n_frames", n_frames, train_total_ep) tracked_means = train_scalars.pop_and_reset() for k in tracked_means: log_writer.add_scalar(k + "/train", tracked_means[k], train_total_ep) if (train_total_ep % args.ep_save_freq) == 0: print(n_frames) if not os.path.exists(args.save_model_dir): os.makedirs(args.save_model_dir) state_to_save = shared_model.state_dict() save_path = os.path.join( args.save_model_dir, "{0}_{1}_{2}_{3}.dat".format(args.title, n_frames, train_total_ep, local_start_time_str), ) torch.save(state_to_save, save_path) finally: pbar.close() log_writer.close() end_flag.value = True for p in processes: time.sleep(0.1) p.join()
device = "cuda" if args.cuda else "cpu" writer = SummaryWriter(comment=f"-a3c-data_pong_{args.name}") env = make_env() net = utils.A2C(env.observation_space.shape, env.action_space.n).to(device) net.share_memory() optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) train_queue = mp.Queue(maxsize=PROCESSES_COUNT) data_proc_list = [] for _ in range(PROCESSES_COUNT): data_proc = mp.Process(target=data_func, args=(net, device, train_queue)) data_proc.start() data_proc_list.append(data_proc) batch_states = [] batch_actions = [] batch_vals_ref = [] step_idx = 0 batch_size = 0 try: with utils.RewardTracker(writer, REWARD_BOUND) as tracker: with ptan.common.utils.TBMeanTracker(writer, 100) as tb_tracker: while True: train_entry = train_queue.get() if isinstance(train_entry, TotalReward):
env = make_env() net = common.AtariA2C(env.observation_space.shape, env.action_space.n).to(device) net.share_memory() # The policy network will share memory (for ??) print(net) optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, eps=1e-3) train_queue = mp.Queue(maxsize=PROCESSES_COUNT) # ?? data_proc_list = [] # Spawn processes to run data_func for _ in range(PROCESSES_COUNT): data_proc = mp.Process( target=data_func, args=(net, device, train_queue)) # The processes will run data_func() data_proc.start() data_proc_list.append(data_proc) batch = [] step_idx = 0 try: with common.RewardTracker( writer, stop_reward=REWARD_BOUND ) as tracker: # Run until reward goal reached with ptan.common.utils.TBMeanTracker( writer, batch_size=100) as tb_tracker: # ?? while True: # Get one transition from the training queue
'optimizer': optimizer.state_dict() }, '%s/batch_%d' % (args.save_dir, batch_acm)) def init_processes(args, local_rank, fn, backend='nccl'): """ Initialize the distributed environment. """ os.environ['MASTER_ADDR'] = args.MASTER_ADDR os.environ['MASTER_PORT'] = args.MASTER_PORT dist.init_process_group(backend, rank=args.start_rank + local_rank, world_size=args.world_size) fn(args, local_rank) if __name__ == "__main__": mp.set_start_method('spawn') args = parse_config() if args.world_size == 1: run(args, 0) exit(0) processes = [] for rank in range(args.gpus): p = mp.Process(target=init_processes, args=(args, rank, run, args.backend)) p.start() processes.append(p) for p in processes: p.join()
shared_model.load_state_dict(saved_state) shared_model.share_memory() if args.shared_optimizer: if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr) if args.optimizer == 'Adam': optimizer = SharedAdam(shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) optimizer.share_memory() else: optimizer = None processes = [] print(shared_model) p = mp.Process(target=test, args=(args, shared_model, env_conf)) p.start() processes.append(p) time.sleep(0.1) for rank in range(0, args.workers): p = mp.Process(target=train, args=(rank, args, shared_model, optimizer, env_conf)) p.start() processes.append(p) time.sleep(0.1) for p in processes: time.sleep(0.1) p.join()
if __name__ == '__main__': args = parser.parse_args() torch.manual_seed(args.seed) env = create_atari_env(args.env_name) shared_model = ActorCritic(env.observation_space.shape[0], env.action_space) shared_model.share_memory() if args.no_shared: optimizer = None else: optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr) optimizer.share_memory() processes = [] p = mp.Process(target=test, args=(args.num_processes, args, shared_model)) p.start() processes.append(p) for rank in range(0, args.num_processes): p = mp.Process(target=train, args=(rank, args, shared_model, optimizer)) p.start() processes.append(p) for p in processes: p.join()
args = parser.parse_args() device = "cuda" if args.cuda else "cpu" writer = SummaryWriter(comment="-cheetah-es_lr=%.3e_sigma=%.3e" % (args.lr, args.noise_std)) env = make_env() net = Net(env.observation_space.shape[0], env.action_space.shape[0]) print(net) params_queues = [mp.Queue(maxsize=1) for _ in range(PROCESSES_COUNT)] rewards_queue = mp.Queue(maxsize=ITERS_PER_UPDATE) workers = [] for idx, params_queue in enumerate(params_queues): p_args = (idx, params_queue, rewards_queue, device, args.noise_std) proc = mp.Process(target=worker_func, args=p_args) proc.start() workers.append(proc) print("All started!") optimizer = optim.Adam(net.parameters(), lr=args.lr) for step_idx in range(args.iters): # broadcasting network params params = net.state_dict() for q in params_queues: q.put(params) # waiting for results t_start = time.time() batch_noise = []
def evaluate(args, iteration_1, iteration_2, net_class, game_class): logging.info("Loading nets...") current_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_2) best_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_1) current_net_filename = os.path.join("./model_data/", current_net) best_net_filename = os.path.join("./model_data/", best_net) logging.info("Current net: %s" % current_net) logging.info("Previous (Best) net: %s" % best_net) current_cnet = net_class(game_class()) best_cnet = net_class(game_class()) cuda = torch.cuda.is_available() if cuda: current_cnet.cuda() best_cnet.cuda() if not os.path.isdir("./evaluator_data/"): os.mkdir("evaluator_data") if args.MCTS_num_processes > 1: mp.set_start_method("spawn", force=True) current_cnet.share_memory() best_cnet.share_memory() current_cnet.eval() best_cnet.eval() checkpoint = torch.load(current_net_filename) current_cnet.load_state_dict(checkpoint['state_dict']) checkpoint = torch.load(best_net_filename) best_cnet.load_state_dict(checkpoint['state_dict']) processes = [] if args.MCTS_num_processes > mp.cpu_count(): num_processes = mp.cpu_count() logging.info( "Required number of processes exceed number of CPUs! Setting MCTS_num_processes to %d" % num_processes) else: num_processes = args.MCTS_num_processes logging.info("Spawning %d processes..." % num_processes) with torch.no_grad(): for i in range(num_processes): p = mp.Process(target=fork_process, args=(arena(current_cnet, best_cnet, game_class), args.num_evaluator_games, i)) p.start() processes.append(p) for p in processes: p.join() wins_ratio = 0.0 for i in range(num_processes): stats = load_pickle("wins_cpu_%i" % (i)) wins_ratio += stats['best_win_ratio'] wins_ratio = wins_ratio / num_processes if wins_ratio >= 0.55: return iteration_2 else: return iteration_1 elif args.MCTS_num_processes == 1: current_cnet.eval() best_cnet.eval() checkpoint = torch.load(current_net_filename) current_cnet.load_state_dict(checkpoint['state_dict']) checkpoint = torch.load(best_net_filename) best_cnet.load_state_dict(checkpoint['state_dict']) arena1 = arena(current_cnet=current_cnet, best_cnet=best_cnet, game_class=game_class) arena1.evaluate(num_games=args.num_evaluator_games, cpu=0) stats = load_pickle("wins_cpu_%i" % (0)) if stats["best_win_ratio"] >= 0.55: return iteration_2 else: return iteration_1
group_name="comm_proc1") #while True: # time.sleep(1) print("Will exit") if __name__ == '__main__': wn = args.wn wid = args.wid bs = args.bs num_processes = 2 comm_proc_list1 = [] comm_proc_list2 = [] for rank in range(num_processes): comm_p1 = mp.Process(target=comm_proc1, args={ rank, }) comm_p1.start() comm_proc_list1.append(comm_p1) #extra_p1 = mp.Process(target=extra_proc) #extra_p1.start() #comm_proc_list1.append(extra_p1) for rank in range(num_processes): comm_p2 = mp.Process(target=comm_proc2, args={ rank, }) comm_p2.start() comm_proc_list2.append(comm_p2) for proc in comm_proc_list1:
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory and torch.cuda.is_available() self.timeout = loader.timeout self.sample_iter = iter(self.batch_sampler) base_seed = torch.LongTensor(1).random_().item() if self.num_workers > 0: self.worker_init_fn = loader.worker_init_fn self.worker_queue_idx = 0 self.worker_result_queue = multiprocessing.Queue() self.batches_outstanding = 0 self.worker_pids_set = False self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.done_event = multiprocessing.Event() self.index_queues = [] self.workers = [] for i in range(self.num_workers): index_queue = multiprocessing.Queue() w = multiprocessing.Process( target=_worker_loop, args=(self.dataset, index_queue, self.worker_result_queue, self.done_event, self.collate_fn, base_seed + i, self.worker_init_fn, i)) w.daemon = True # ensure that the worker exits on process exit # Process.start() actually take some time as it needs to start a # process and pass the arguments over via a pipe. Therefore, we # only add a worker to self.workers list after it started, so # that we do not call .join() if program dies before it starts, # and __del__ tries to join it but will get: # AssertionError: can only join a started process. w.start() self.index_queues.append(index_queue) self.workers.append(w) if self.pin_memory: self.data_queue = queue.Queue() pin_memory_thread = threading.Thread( target=_pin_memory_loop, args=(self.worker_result_queue, self.data_queue, self.done_event, self.pin_memory, torch.cuda.current_device())) pin_memory_thread.daemon = True pin_memory_thread.start() # Similar to workers (see comment above), we only register # pin_memory_thread once it is started. self.pin_memory_thread = pin_memory_thread else: self.data_queue = self.worker_result_queue _update_worker_pids(id(self), tuple(w.pid for w in self.workers)) _set_SIGCHLD_handler() self.worker_pids_set = True # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
def run_acer(variant): # BLAS setup os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_NUM_THREADS'] = '1' # Setup # args = parser.parse_args() # Creating directories. save_dir = os.path.join('results', 'results') if not os.path.exists(save_dir): os.makedirs(save_dir) print(' ' * 26 + 'Options') """ # Saving parameters with open(os.path.join(save_dir, 'params.txt'), 'w') as f: for k, v in vars(args).items(): print(' ' * 26 + k + ': ' + str(v)) f.write(k + ' : ' + str(v) + '\n') """ # args.env = 'CartPole-v1' # TODO: Remove hardcoded environment when code is more adaptable # mp.set_start_method(platform.python_version()[0] == '3' and 'spawn' or 'fork') # Force true spawning (not forking) if available torch.manual_seed(variant['seed']) T = Counter() # Global shared counter # gym.logger.set_level(gym.logger.ERROR) # Disable Gym warnings # Create shared network env = gym.make(variant['env']) shared_model = ActorCritic(env.observation_space, env.action_space, variant['hidden_size']) shared_model.share_memory() """ if args.model and os.path.isfile(args.model): # Load pretrained weights shared_model.load_state_dict(torch.load(args.model)) """ # Create average network shared_average_model = ActorCritic(env.observation_space, env.action_space, variant['hidden_size']) shared_average_model.load_state_dict(shared_model.state_dict()) shared_average_model.share_memory() for param in shared_average_model.parameters(): param.requires_grad = False # Create optimiser for shared network parameters with shared statistics optimiser = SharedRMSprop(shared_model.parameters(), lr=variant['lr'], alpha=0.99) optimiser.share_memory() env.close() fields = ['t', 'rewards', 'avg_steps', 'time'] with open(os.path.join(save_dir, 'test_results.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(fields) # Start validation agent processes = [] p = mp.Process(target=test, args=(0, variant, T, shared_model)) p.start() processes.append(p) if not variant['evaluate']: # Start training agents for rank in range(1, variant['num-processes'] + 1): p = mp.Process(target=train, args=(rank, variant, T, shared_model, shared_average_model, optimiser)) p.start() print('Process ' + str(rank) + ' started') processes.append(p) # Clean up for p in processes: p.join()
env.close() plt.plot(loss_curve) plt.show() if __name__ == '__main__': global_model = ActorCritic() if load_model: global_model.load_state_dict(torch.load("./agent.pth")) global_model.share_memory() processes = [] for i in range(n_train_processes): p = mp.Process(target=train, args=(global_model, )) p.start() processes.append(p) for p in processes: p.join() torch.save(global_model.state_dict(), "./agent.pth") #test env = gym.make("CartPole-v1") state = env.reset() total_reward = 0 while True: env.render() prob = global_model.pi(torch.from_numpy(state.reshape(1, -1)).float()) m = Categorical(prob)
for param in shared_average_model.parameters(): param.requires_grad = False # Create optimiser for shared network parameters with shared statistics optimiser = SharedRMSprop(shared_model.parameters(), lr=args.lr, alpha=args.rmsprop_decay) optimiser.share_memory() env.close() fields = ['t', 'rewards', 'avg_steps', 'time'] with open(os.path.join(save_dir, 'test_results.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(fields) # Start validation agent processes = [] p = mp.Process(target=test, args=(0, args, T, shared_model)) p.start() processes.append(p) if not args.evaluate: # Start training agents for rank in range(1, args.num_processes + 1): p = mp.Process(target=train, args=(rank, args, T, shared_model, shared_average_model, optimiser)) p.start() print('Process ' + str(rank) + ' started') processes.append(p) # Clean up for p in processes:
log_dirs = ["runs/actor_run", "runs/learner_run"] for log_dir in log_dirs: if exists(log_dir): rmtree(log_dir) Path(log_dir).mkdir(parents=True) actor_log_dir, learner_log_dir = log_dirs queue = Queue(maxsize=500_000) replay = BinaryPrioritizeReplayMemory(capacity=500_000, priority_fraction=0.5) processes = [] collecting_process = mp.Process( target=collect_experience, kwargs={ "game_files": games, "buffer": queue, "train_params": train_params, "eps_scheduler_params": params.pop("epsilon"), "target_net": target_net, "policy_net": policy_net, "log_dir": actor_log_dir }, ) training_process = mp.Process( target=learn, kwargs={ "policy_net": policy_net, "target_net": target_net, "replay_buffer": replay, "queue": queue, "params": train_params, "log_dir": learner_log_dir
def __init__(self, module, device_ids=None, distributed=True, master_addr=None, master_port=None, backend=None, world_size=None, rank=None, graph=None, mixing=None, comm_device=None, lr=0.1, momentum=0.9, weight_decay=1e-4, nesterov=True, verbose=True): super(BilatGossipDataParallel, self).__init__() # whether we're using multiple agents for training self.distributed = distributed # devices available locally if device_ids is None: device_ids = list(range(torch.cuda.device_count())) self.output_device = device_ids[0] self.device_ids = device_ids # put model on output device self.module = module.cuda(self.output_device) # prepare local intra-node all-reduce objects if len(self.device_ids) > 1: self.broadcast_bucket_size = 10 * 1024 * 1024 # bytes self.nccl_reduce_bucket_size = 256 * 1024 * 1024 # bytes self._module_copies = replicate(self.module, self.device_ids, detach=True) self._module_copies[0] = self.module for cmodule in self._module_copies[1:]: for p, cp in zip(self.module.parameters(), cmodule.parameters()): cp.requires_grad = p.requires_grad else: self._module_copies = [self.module] # prepare inter-node gossip objects if self.distributed: # communicate over cpu's if not specified if comm_device is None: comm_device = torch.device('cpu') self.__cpu_comm = comm_device.type == 'cpu' # distributed backend config self.dist_config = { 'verbose': verbose, 'graph': graph, 'master_addr': master_addr, 'master_port': master_port, 'backend': backend, 'world_size': world_size, 'rank': rank, 'mixing': mixing, 'lr': lr, 'momentum': momentum, 'nesterov': nesterov, 'weight_decay': weight_decay } self.num_updates = 0 # logger used to print to stdout self.logger = make_logger(rank, verbose) # prepare parameters for gossip self.gossip_enable = True self.gossip_params = [] self.gossip_grads = [] for p in module.parameters(): cp = p.clone().detach_() cp = cp.cpu().pin_memory() if self.__cpu_comm else cp.cuda() cp.requires_grad = p.requires_grad self.gossip_params.append(cp) if p.requires_grad: g = cp.clone().zero_().detach_() g = g.cpu().pin_memory() if self.__cpu_comm else g.cuda() self.gossip_grads.append(g) self.gossip_queue = mp.Queue() self.gossip_lock = mp.Lock() self.gossip_enable_flag = mp.Event() self.train_write_flag = mp.Event() # signal train-proc write event self.gossip_read_flag = mp.Event() # signal gossip-proc read event self.gossip_update_flag = mp.Event( ) # signal 2 gossip-proc need update self._lr = mp.Value('f', lr, lock=self.gossip_lock) self.gossip_thread = mp.Process( target=BilatGossipDataParallel._gossip_target, args=(self.dist_config, self.gossip_enable_flag, self.train_write_flag, self.gossip_read_flag, self.gossip_update_flag, self._lr, self.gossip_lock, self.gossip_queue)) self.gossip_thread.daemon = True self.gossip_thread.name = 'Gossip-Thread' self.gossip_thread.start() # pass handle to gossip_params and gossip_grads, and put in shared # memory self.gossip_queue.put((self.gossip_params, self.gossip_grads)) else: # logger used to print to stdout self.logger = make_logger(0, verbose) # register ps/grad-reduction hooks self.__register_hooks()
def main(parser): parser = parse_additional_args(parser) args = parser.parse_args(None) set_global_seed(args.seed) gpus = [int(i) for i in args.gpus.split(".")] assert args.gpus == '.'.join( [str(i) for i in range(len(gpus))] ), 'only support continuous gpu ids starting from 0, please set CUDA_VISIBLE_DEVICES instead' setup_train_mode(args) setup_save_path(args) writer = set_logger(args) model = get_model(args) logging.info('-------------------------------' * 3) logging.info('Geo: %s' % args.geo) logging.info('Data Path: %s' % args.data_path) logging.info('#entity: %d' % args.nentity) logging.info('#relation: %d' % args.nrelation) logging.info('#max steps: %d' % args.max_steps) logging.info('Evaluate unions using: %s' % args.evaluate_union) kg_mem = KGMem(dtype=args.kg_dtype) kg_mem.load(os.path.join(args.data_path, 'train_bidir.bin')) kg_mem.share_memory() opt_stats = load_lse_checkpoint(args, model) logging.info('tasks = %s' % args.tasks) logging.info('init_step = %d' % opt_stats['init_step']) if args.do_train: logging.info("Training info:") logging.info("{}: infinite".format(args.training_tasks)) logging.info('Start Training...') logging.info('learning_rate = %d' % opt_stats['current_learning_rate']) logging.info('batch_size = %d' % args.batch_size) logging.info('hidden_dim = %d' % args.hidden_dim) logging.info('gamma = %f' % args.gamma) eval_dict = {} aggr_procs = [] args.gpus = gpus for phase in ['valid', 'test']: if getattr(args, 'do_%s' % phase, False): d = load_question_eval_data(args, phase) result_aggregator = mp.Process(target=async_aggr, args=(args, d.buffer, d.writer_buffer, 'phase')) result_aggregator.start() aggr_procs.append(result_aggregator) eval_dict[phase] = d if args.feature_folder is not None: logging.info('loading static entity+relation features from %s' % args.feature_folder) ro_feat = torch.load(os.path.join(args.feature_folder, 'feat.pt')) else: ro_feat = None procs = [] training_tasks = args.training_tasks.split('.') for rank, gpu_id in enumerate(gpus): logging.info("[GPU {}] tasks: {}".format(gpu_id, args.training_tasks)) local_eval_dict = {} for phase in eval_dict: q_data = eval_dict[phase] nq_per_proc = math.ceil(len(q_data.data) / len(gpus)) local_eval_dict[phase] = QueryData( q_data.data.subset(rank * nq_per_proc, nq_per_proc), q_data.buffer, q_data.writer_buffer) proc = mp.Process(target=train_mp, args=(args, kg_mem, opt_stats, model, local_eval_dict, training_tasks, ro_feat, gpu_id)) procs.append(proc) proc.start() write_to_writer(eval_dict, writer) for proc in procs + aggr_procs: proc.join() logging.info("Training finished!!")
mp.set_start_method("spawn", force=True) current_net = "current_net.pth.tar" best_net = "current_net_trained.pth.tar" current_net_filename = os.path.join("./model_data/",\ current_net) best_net_filename = os.path.join("./model_data/",\ best_net) current_chessnet = cnet() best_chessnet = cnet() checkpoint = torch.load(current_net_filename) current_chessnet.load_state_dict(checkpoint['state_dict']) checkpoint = torch.load(best_net_filename) best_chessnet.load_state_dict(checkpoint['state_dict']) cuda = torch.cuda.is_available() if cuda: current_chessnet.cuda() best_chessnet.cuda() current_chessnet.eval() best_chessnet.eval() current_chessnet.share_memory() best_chessnet.share_memory() processes = [] for i in range(6): p = mp.Process(target=fork_process, args=(arena(current_chessnet, best_chessnet), 50, i)) p.start() processes.append(p) for p in processes: p.join()
def main(args): torch.manual_seed(args['seed']) npr.seed(args['seed'] + 1) # Create the save directory try: os.makedirs(args['save_directory']) except OSError: if not os.path.isdir(args['save_directory']): raise print('saving to: ' + args['save_directory'] + '/') if args['gpu_ids'] == -1: args['gpu_ids'] = [-1] else: torch.cuda.manual_seed(args['seed']) mp.set_start_method('spawn') env = create_env(args['env'], args) # Create model AC = importlib.import_module(args['model_name']) shared_model = AC.ActorCritic(env.observation_space, env.action_space, args['stack_frames'], args) shared_model.share_memory() if args['shared_optimizer']: if args['optimizer'] == 'RMSprop': optimizer = SharedRMSprop(shared_model.parameters(), lr=args['lr']) if args['optimizer'] == 'Adam': optimizer = SharedAdam(shared_model.parameters(), lr=args['lr'], amsgrad=args['amsgrad']) optimizer.share_memory() else: optimizer = None # Keep track of all steps taken in each thread all_step_counters = [mp.Value('i', 0) for i in range(args['workers'])] global_step_counter = mp.Value('i', 0) # Keep track of stats if we want to load from a checkpoint all_scores = [] all_global_steps = [] if args['load_file'] != '': print('Loading model from: {0}'.format(args['load_file'])) pthfile = torch.load('{0}'.format(args['load_file']), map_location=lambda storage, loc: storage.cpu()) if args['load_best']: shared_model.load_state_dict(pthfile['best_state_dict']) if optimizer is not None: optimizer.load_state_dict(pthfile['best_optimizer']) else: shared_model.load_state_dict(pthfile['state_dict']) if optimizer is not None: optimizer.load_state_dict(pthfile['optimizer']) all_scores = pthfile['all_scores'] all_global_steps = pthfile['all_global_steps'] # Only test process will write to this to avoid each thread waiting every # gradient step to update. Threads will read from global_step_counter to # know when to terminate if args['test_until'] is used if len(all_global_steps) > 0: # This increment doesn't have to be atomic with global_step_counter.get_lock(): global_step_counter.value = all_global_steps[-1] processes = [] p = mp.Process(target=test, args=(args, shared_model, optimizer, all_scores, all_global_steps, all_step_counters, global_step_counter)) p.start() processes.append(p) time.sleep(0.1) for rank in range(0, args['workers']): p = mp.Process(target=train, args=(rank, args, shared_model, optimizer, all_step_counters[rank], global_step_counter)) p.start() processes.append(p) time.sleep(0.1) for p in processes: time.sleep(0.1) p.join()
def main(): parser = argparse.ArgumentParser() # decode setting parser.add_argument("--feats", required=True, type=str, help="list or directory of source eval feat files") parser.add_argument("--spk", required=True, type=str, help="speaker name to be reconstructed") parser.add_argument("--model", required=True, type=str, help="model file") parser.add_argument("--config", required=True, type=str, help="configure file") parser.add_argument("--n_gpus", default=1, type=int, help="number of gpus") parser.add_argument("--outdir", required=True, type=str, help="directory to save log") parser.add_argument("--string_path", required=True, type=str, help="path of h5 generated feature") # other setting parser.add_argument("--GPU_device", default=None, type=int, help="selection of GPU device") parser.add_argument("--GPU_device_str", default=None, type=str, help="selection of GPU device") parser.add_argument("--verbose", default=1, type=int, help="log level") args = parser.parse_args() if args.GPU_device is not None or args.GPU_device_str is not None: os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" if args.GPU_device_str is None: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.GPU_device) else: os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU_device_str # check directory existence if not os.path.exists(args.outdir): os.makedirs(args.outdir) # set log level if args.verbose > 0: logging.basicConfig(level=logging.INFO, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S', filemode='w', filename=args.outdir + "/decode.log") logging.getLogger().addHandler(logging.StreamHandler()) elif args.verbose > 1: logging.basicConfig(level=logging.DEBUG, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S', filemode='w', filename=args.outdir + "/decode.log") logging.getLogger().addHandler(logging.StreamHandler()) else: logging.basicConfig(level=logging.WARN, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S', filemode='w', filename=args.outdir + "/decode.log") logging.getLogger().addHandler(logging.StreamHandler()) logging.warn("logging is disabled.") # load config config = torch.load(args.config) # get source feat list if os.path.isdir(args.feats): feat_list = sorted(find_files(args.feats, "*.h5")) elif os.path.isfile(args.feats): feat_list = read_txt(args.feats) else: logging.error("--feats should be directory or list.") sys.exit(1) # prepare the file list for parallel decoding feat_lists = np.array_split(feat_list, args.n_gpus) feat_lists = [f_list.tolist() for f_list in feat_lists] for i in range(args.n_gpus): logging.info('%d: %d' % (i+1, len(feat_lists[i]))) spk_list = config.spk_list.split('@') n_spk = len(spk_list) spk_idx = spk_list.index(args.spk) stats_list = config.stats_list.split('@') assert(n_spk == len(stats_list)) spk_stat = stats_list[spk_idx] gv_mean = read_hdf5(spk_stat, "/gv_melsp_mean") model_epoch = os.path.basename(args.model).split('.')[0].split('-')[1] logging.info('epoch: '+model_epoch) str_split = os.path.basename(os.path.dirname(args.model)).split('_') model_name = str_split[1]+"_"+str_split[2] logging.info('mdl_name: '+model_name) logging.info(config) # define gpu decode function def gpu_decode(feat_list, gpu, cvlist=None, lsd_cvlist=None, lsdstd_cvlist=None, cvlist_dv=None, lsd_cvlist_dv=None, lsdstd_cvlist_dv=None, f0rmse_cvlist=None, f0corr_cvlist=None, caprmse_cvlist=None, f0rmse_cvlist_dv=None, f0corr_cvlist_dv=None, caprmse_cvlist_dv=None, cvlist_cyc=None, lsd_cvlist_cyc=None, lsdstd_cvlist_cyc=None, cvlist_cyc_dv=None, lsd_cvlist_cyc_dv=None, lsdstd_cvlist_cyc_dv=None, f0rmse_cvlist_cyc=None, f0corr_cvlist_cyc=None, caprmse_cvlist_cyc=None, f0rmse_cvlist_cyc_dv=None, f0corr_cvlist_cyc_dv=None, caprmse_cvlist_cyc_dv=None): with torch.cuda.device(gpu): # define model and load parameters with torch.no_grad(): model_encoder_melsp = GRU_VAE_ENCODER( in_dim=config.mel_dim, n_spk=n_spk, lat_dim=config.lat_dim, hidden_layers=config.hidden_layers_enc, hidden_units=config.hidden_units_enc, kernel_size=config.kernel_size_enc, dilation_size=config.dilation_size_enc, causal_conv=config.causal_conv_enc, bi=False, ar=False, pad_first=True, right_size=config.right_size_enc) logging.info(model_encoder_melsp) model_decoder_melsp = GRU_SPEC_DECODER( feat_dim=config.lat_dim+config.lat_dim_e, excit_dim=config.excit_dim, out_dim=config.mel_dim, n_spk=n_spk, hidden_layers=config.hidden_layers_dec, hidden_units=config.hidden_units_dec, kernel_size=config.kernel_size_dec, dilation_size=config.dilation_size_dec, causal_conv=config.causal_conv_dec, bi=False, ar=False, pad_first=True, right_size=config.right_size_dec) logging.info(model_decoder_melsp) model_encoder_excit = GRU_VAE_ENCODER( in_dim=config.mel_dim, n_spk=n_spk, lat_dim=config.lat_dim_e, hidden_layers=config.hidden_layers_enc, hidden_units=config.hidden_units_enc, kernel_size=config.kernel_size_enc, dilation_size=config.dilation_size_enc, causal_conv=config.causal_conv_enc, bi=False, ar=False, pad_first=True, right_size=config.right_size_enc) logging.info(model_encoder_excit) model_decoder_excit = GRU_EXCIT_DECODER( feat_dim=config.lat_dim_e, cap_dim=config.cap_dim, n_spk=n_spk, hidden_layers=config.hidden_layers_lf0, hidden_units=config.hidden_units_lf0, kernel_size=config.kernel_size_lf0, dilation_size=config.dilation_size_lf0, causal_conv=config.causal_conv_lf0, bi=False, ar=False, pad_first=True, right_size=config.right_size_lf0) logging.info(model_decoder_excit) if (config.spkidtr_dim > 0): model_spkidtr = SPKID_TRANSFORM_LAYER( n_spk=n_spk, spkidtr_dim=config.spkidtr_dim) logging.info(model_spkidtr) model_post = GRU_POST_NET( spec_dim=config.mel_dim, excit_dim=config.excit_dim+config.cap_dim+1, n_spk=n_spk, hidden_layers=config.hidden_layers_post, hidden_units=config.hidden_units_post, kernel_size=config.kernel_size_post, dilation_size=config.dilation_size_post, causal_conv=config.causal_conv_post, pad_first=True, right_size=config.right_size_post) logging.info(model_post) model_encoder_melsp.load_state_dict(torch.load(args.model)["model_encoder_melsp"]) model_decoder_melsp.load_state_dict(torch.load(args.model)["model_decoder_melsp"]) model_encoder_excit.load_state_dict(torch.load(args.model)["model_encoder_excit"]) model_decoder_excit.load_state_dict(torch.load(args.model)["model_decoder_excit"]) if (config.spkidtr_dim > 0): model_spkidtr.load_state_dict(torch.load(args.model)["model_spkidtr"]) model_post.load_state_dict(torch.load(args.model)["model_post"]) model_encoder_melsp.cuda() model_decoder_melsp.cuda() model_encoder_excit.cuda() model_decoder_excit.cuda() model_post.cuda() if (config.spkidtr_dim > 0): model_spkidtr.cuda() model_encoder_melsp.eval() model_decoder_melsp.eval() model_encoder_excit.eval() model_decoder_excit.eval() model_post.eval() if (config.spkidtr_dim > 0): model_spkidtr.eval() for param in model_encoder_melsp.parameters(): param.requires_grad = False for param in model_decoder_melsp.parameters(): param.requires_grad = False for param in model_encoder_excit.parameters(): param.requires_grad = False for param in model_decoder_excit.parameters(): param.requires_grad = False for param in model_post.parameters(): param.requires_grad = False if (config.spkidtr_dim > 0): for param in model_spkidtr.parameters(): param.requires_grad = False count = 0 pad_left = (model_encoder_melsp.pad_left + model_decoder_melsp.pad_left*2 + model_post.pad_left)*2 pad_right = (model_encoder_melsp.pad_right + model_decoder_melsp.pad_right*2 + model_post.pad_right)*2 outpad_lefts = [None]*7 outpad_rights = [None]*7 outpad_lefts[0] = pad_left-model_encoder_melsp.pad_left outpad_rights[0] = pad_right-model_encoder_melsp.pad_right outpad_lefts[1] = outpad_lefts[0]-model_decoder_melsp.pad_left outpad_rights[1] = outpad_rights[0]-model_decoder_melsp.pad_right outpad_lefts[2] = outpad_lefts[1]-model_decoder_melsp.pad_left outpad_rights[2] = outpad_rights[1]-model_decoder_melsp.pad_right outpad_lefts[3] = outpad_lefts[2]-model_post.pad_left outpad_rights[3] = outpad_rights[2]-model_post.pad_right outpad_lefts[4] = outpad_lefts[3]-model_encoder_melsp.pad_left outpad_rights[4] = outpad_rights[3]-model_encoder_melsp.pad_right outpad_lefts[5] = outpad_lefts[4]-model_decoder_melsp.pad_left outpad_rights[5] = outpad_rights[4]-model_decoder_melsp.pad_right outpad_lefts[6] = outpad_lefts[5]-model_decoder_melsp.pad_left outpad_rights[6] = outpad_rights[5]-model_decoder_melsp.pad_right for feat_file in feat_list: # reconst. melsp logging.info("recmelsp " + feat_file) feat_org = read_hdf5(feat_file, "/log_1pmelmagsp") logging.info(feat_org.shape) with torch.no_grad(): feat = F.pad(torch.FloatTensor(feat_org).cuda().unsqueeze(0).transpose(1,2), (pad_left,pad_right), "replicate").transpose(1,2) spk_logits, _, lat_src, _ = model_encoder_melsp(feat, sampling=False) spk_logits_e, _, lat_src_e, _ = model_encoder_excit(feat, sampling=False) logging.info('input spkpost') if outpad_rights[0] > 0: logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[0]:-outpad_rights[0]], dim=-1), 1)) else: logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[0]:], dim=-1), 1)) logging.info('input spkpost_e') if outpad_rights[0] > 0: logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[0]:-outpad_rights[0]], dim=-1), 1)) else: logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[0]:], dim=-1), 1)) if config.spkidtr_dim > 0: src_code = model_spkidtr((torch.ones((1, lat_src_e.shape[1]))*spk_idx).cuda().long()) else: src_code = (torch.ones((1, lat_src_e.shape[1]))*spk_idx).cuda().long() cvlf0_src, _ = model_decoder_excit(src_code, lat_src_e) if model_decoder_melsp.pad_right > 0: lat_cat = torch.cat((lat_src_e[:,model_decoder_melsp.pad_left:-model_decoder_melsp.pad_right], lat_src[:,model_decoder_melsp.pad_left:-model_decoder_melsp.pad_right]), 2) else: lat_cat = torch.cat((lat_src_e[:,model_decoder_melsp.pad_left:], lat_src[:,model_decoder_melsp.pad_left:]), 2) if config.spkidtr_dim > 0: src_code = model_spkidtr((torch.ones((1, lat_cat.shape[1]))*spk_idx).cuda().long()) else: src_code = (torch.ones((1, lat_cat.shape[1]))*spk_idx).cuda().long() cvmelsp_src, _ = model_decoder_melsp(src_code, lat_cat, e=cvlf0_src[:,:,:config.excit_dim]) if config.spkidtr_dim > 0: src_code = model_spkidtr((torch.ones((1, cvmelsp_src.shape[1]))*spk_idx).cuda().long()) else: src_code = (torch.ones((1, cvmelsp_src.shape[1]))*spk_idx).cuda().long() if model_decoder_melsp.pad_right > 0: e_post = cvlf0_src[:,model_decoder_melsp.pad_left:-model_decoder_melsp.pad_right] else: e_post = cvlf0_src[:,model_decoder_melsp.pad_left:] cvmelsp_src_post, _ = model_post(cvmelsp_src, y=src_code, e=e_post) if model_post.pad_right > 0: cvmelsp_src = cvmelsp_src[:,model_post.pad_left:-model_post.pad_right] else: cvmelsp_src = cvmelsp_src[:,model_post.pad_left:] spk_logits, _, lat_rec, _ = model_encoder_melsp(cvmelsp_src, sampling=False) spk_logits_e, _, lat_rec_e, _ = model_encoder_excit(cvmelsp_src, sampling=False) logging.info('rec spkpost') if outpad_rights[4] > 0: logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[4]:-outpad_rights[4]], dim=-1), 1)) else: logging.info(torch.mean(F.softmax(spk_logits[:,outpad_lefts[4]:], dim=-1), 1)) logging.info('rec spkpost_e') if outpad_rights[4] > 0: logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[4]:-outpad_rights[4]], dim=-1), 1)) else: logging.info(torch.mean(F.softmax(spk_logits_e[:,outpad_lefts[4]:], dim=-1), 1)) if config.spkidtr_dim > 0: src_code = model_spkidtr((torch.ones((1, lat_rec_e.shape[1]))*spk_idx).cuda().long()) else: src_code = (torch.ones((1, lat_rec_e.shape[1]))*spk_idx).cuda().long() cvlf0_cyc, _ = model_decoder_excit(src_code, lat_rec_e) if model_decoder_melsp.pad_right > 0: lat_cat = torch.cat((lat_rec_e[:,model_decoder_melsp.pad_left:-model_decoder_melsp.pad_right], lat_rec[:,model_decoder_melsp.pad_left:-model_decoder_melsp.pad_right]), 2) else: lat_cat = torch.cat((lat_rec_e[:,model_decoder_melsp.pad_left:], lat_rec[:,model_decoder_melsp.pad_left:]), 2) if config.spkidtr_dim > 0: src_code = model_spkidtr((torch.ones((1, lat_cat.shape[1]))*spk_idx).cuda().long()) else: src_code = (torch.ones((1, lat_cat.shape[1]))*spk_idx).cuda().long() cvmelsp_cyc, _ = model_decoder_melsp(src_code, lat_cat, e=cvlf0_cyc[:,:,:config.excit_dim]) if config.spkidtr_dim > 0: src_code = model_spkidtr((torch.ones((1, cvmelsp_cyc.shape[1]))*spk_idx).cuda().long()) else: src_code = (torch.ones((1, cvmelsp_cyc.shape[1]))*spk_idx).cuda().long() if model_decoder_melsp.pad_right > 0: e_post = cvlf0_cyc[:,model_decoder_melsp.pad_left:-model_decoder_melsp.pad_right] else: e_post = cvlf0_cyc[:,model_decoder_melsp.pad_left:] cvmelsp_cyc_post, _ = model_post(cvmelsp_cyc, y=src_code, e=e_post) if outpad_rights[1] > 0: cvlf0_src = cvlf0_src[:,outpad_lefts[1]:-outpad_rights[1]] else: cvlf0_src = cvlf0_src[:,outpad_lefts[1]:] if outpad_rights[3] > 0: cvmelsp_src_post = cvmelsp_src_post[:,outpad_lefts[3]:-outpad_rights[3]] else: cvmelsp_src_post = cvmelsp_src_post[:,outpad_lefts[3]:] if outpad_rights[5] > 0: cvlf0_cyc = cvlf0_cyc[:,outpad_lefts[5]:-outpad_rights[5]] else: cvlf0_cyc = cvlf0_cyc[:,outpad_lefts[5]:] feat_rec = cvmelsp_src_post[0].cpu().data.numpy() feat_cyc = cvmelsp_cyc_post[0].cpu().data.numpy() cvmelsp_src = np.array(cvmelsp_src_post[0].cpu().data.numpy(), dtype=np.float64) cvlf0_src = np.array(cvlf0_src[0].cpu().data.numpy(), dtype=np.float64) cvmelsp_cyc = np.array(cvmelsp_cyc_post[0].cpu().data.numpy(), dtype=np.float64) cvlf0_cyc = np.array(cvlf0_cyc[0].cpu().data.numpy(), dtype=np.float64) logging.info(cvlf0_src.shape) logging.info(cvmelsp_src.shape) logging.info(cvlf0_cyc.shape) logging.info(cvmelsp_cyc.shape) melsp = np.array(feat_org) feat_world = read_hdf5(feat_file, "/feat_mceplf0cap") f0 = np.array(np.rint(feat_world[:,0])*np.exp(feat_world[:,1])) codeap = np.array(np.rint(feat_world[:,2:3])*(-np.exp(feat_world[:,3:config.full_excit_dim]))) cvf0_src = np.array(np.rint(cvlf0_src[:,0])*np.exp(cvlf0_src[:,1])) cvcodeap_src = np.array(np.rint(cvlf0_src[:,2:3])*(-np.exp(cvlf0_src[:,3:]))) f0_rmse = np.sqrt(np.mean((cvf0_src-f0)**2)) logging.info('F0_rmse_rec: %lf Hz' % (f0_rmse)) cvf0_src_mean = np.mean(cvf0_src) f0_mean = np.mean(f0) f0_corr = np.sum((cvf0_src-cvf0_src_mean)*(f0-f0_mean))/\ (np.sqrt(np.sum((cvf0_src-cvf0_src_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2))) logging.info('F0_corr_rec: %lf' % (f0_corr)) codeap_rmse = np.sqrt(np.mean((cvcodeap_src-codeap)**2, axis=0)) for i in range(codeap_rmse.shape[-1]): logging.info('codeap-%d_rmse_rec: %lf dB' % (i+1, codeap_rmse[i])) cvf0_cyc = np.array(np.rint(cvlf0_cyc[:,0])*np.exp(cvlf0_cyc[:,1])) cvcodeap_cyc = np.array(np.rint(cvlf0_cyc[:,2:3])*(-np.exp(cvlf0_cyc[:,3:]))) f0_rmse_cyc = np.sqrt(np.mean((cvf0_cyc-f0)**2)) logging.info('F0_rmse_cyc: %lf Hz' % (f0_rmse_cyc)) cvf0_cyc_mean = np.mean(cvf0_cyc) f0_mean = np.mean(f0) f0_corr_cyc = np.sum((cvf0_cyc-cvf0_cyc_mean)*(f0-f0_mean))/\ (np.sqrt(np.sum((cvf0_cyc-cvf0_cyc_mean)**2))*np.sqrt(np.sum((f0-f0_mean)**2))) logging.info('F0_corr_cyc: %lf' % (f0_corr_cyc)) codeap_rmse_cyc = np.sqrt(np.mean((cvcodeap_cyc-codeap)**2, axis=0)) for i in range(codeap_rmse_cyc.shape[-1]): logging.info('codeap-%d_rmse_cyc: %lf dB' % (i+1, codeap_rmse_cyc[i])) spcidx = np.array(read_hdf5(feat_file, "/spcidx_range")[0]) melsp_rest = (np.exp(melsp)-1)/10000 melsp_src_rest = (np.exp(cvmelsp_src)-1)/10000 melsp_cyc_rest = (np.exp(cvmelsp_cyc)-1)/10000 lsd_arr = np.sqrt(np.mean((20*(np.log10(np.clip(melsp_src_rest[spcidx], a_min=1e-16, a_max=None))\ -np.log10(np.clip(melsp_rest[spcidx], a_min=1e-16, a_max=None))))**2, axis=-1)) lsd_mean = np.mean(lsd_arr) lsd_std = np.std(lsd_arr) logging.info("lsd_rec: %.6f dB +- %.6f" % (lsd_mean, lsd_std)) lsd_arr = np.sqrt(np.mean((20*(np.log10(np.clip(melsp_cyc_rest[spcidx], a_min=1e-16, a_max=None))\ -np.log10(np.clip(melsp_rest[spcidx], a_min=1e-16, a_max=None))))**2, axis=-1)) lsd_mean_cyc = np.mean(lsd_arr) lsd_std_cyc = np.std(lsd_arr) logging.info("lsd_cyc: %.6f dB +- %.6f" % (lsd_mean_cyc, lsd_std_cyc)) logging.info('org f0') logging.info(f0[10:15]) logging.info('rec f0') logging.info(cvf0_src[10:15]) logging.info('cyc f0') logging.info(cvf0_cyc[10:15]) logging.info('org cap') logging.info(codeap[10:15]) logging.info('rec cap') logging.info(cvcodeap_src[10:15]) logging.info('cyc cap') logging.info(cvcodeap_cyc[10:15]) dataset = feat_file.split('/')[1].split('_')[0] if 'tr' in dataset: logging.info('trn') f0rmse_cvlist.append(f0_rmse) f0corr_cvlist.append(f0_corr) caprmse_cvlist.append(codeap_rmse) lsd_cvlist.append(lsd_mean) lsdstd_cvlist.append(lsd_std) cvlist.append(np.var(melsp_src_rest, axis=0)) logging.info(len(cvlist)) f0rmse_cvlist_cyc.append(f0_rmse_cyc) f0corr_cvlist_cyc.append(f0_corr_cyc) caprmse_cvlist_cyc.append(codeap_rmse_cyc) lsd_cvlist_cyc.append(lsd_mean_cyc) lsdstd_cvlist_cyc.append(lsd_std_cyc) cvlist_cyc.append(np.var(melsp_cyc_rest, axis=0)) elif 'dv' in dataset: logging.info('dev') f0rmse_cvlist_dv.append(f0_rmse) f0corr_cvlist_dv.append(f0_corr) caprmse_cvlist_dv.append(codeap_rmse) lsd_cvlist_dv.append(lsd_mean) lsdstd_cvlist_dv.append(lsd_std) cvlist_dv.append(np.var(melsp_src_rest, axis=0)) logging.info(len(cvlist_dv)) f0rmse_cvlist_cyc_dv.append(f0_rmse_cyc) f0corr_cvlist_cyc_dv.append(f0_corr_cyc) caprmse_cvlist_cyc_dv.append(codeap_rmse_cyc) lsd_cvlist_cyc_dv.append(lsd_mean_cyc) lsdstd_cvlist_cyc_dv.append(lsd_std_cyc) cvlist_cyc_dv.append(np.var(melsp_cyc_rest, axis=0)) logging.info('write rec to h5') outh5dir = os.path.join(os.path.dirname(os.path.dirname(feat_file)), args.spk+"-"+args.spk) if not os.path.exists(outh5dir): os.makedirs(outh5dir) feat_file = os.path.join(outh5dir, os.path.basename(feat_file)) logging.info(feat_file + ' ' + args.string_path) logging.info(feat_rec.shape) write_hdf5(feat_file, args.string_path, feat_rec) logging.info('write cyc to h5') outh5dir = os.path.join(os.path.dirname(os.path.dirname(feat_file)), args.spk+"-"+args.spk+"-"+args.spk) if not os.path.exists(outh5dir): os.makedirs(outh5dir) feat_file = os.path.join(outh5dir, os.path.basename(feat_file)) logging.info(feat_file + ' ' + args.string_path) logging.info(feat_cyc.shape) write_hdf5(feat_file, args.string_path, feat_cyc) count += 1 #if count >= 3: # break # parallel decode training with mp.Manager() as manager: gpu = 0 processes = [] cvlist = manager.list() lsd_cvlist = manager.list() lsdstd_cvlist = manager.list() f0rmse_cvlist = manager.list() f0corr_cvlist = manager.list() caprmse_cvlist = manager.list() cvlist_dv = manager.list() lsd_cvlist_dv = manager.list() lsdstd_cvlist_dv = manager.list() f0rmse_cvlist_dv = manager.list() f0corr_cvlist_dv = manager.list() caprmse_cvlist_dv = manager.list() cvlist_cyc = manager.list() lsd_cvlist_cyc = manager.list() lsdstd_cvlist_cyc = manager.list() f0rmse_cvlist_cyc = manager.list() f0corr_cvlist_cyc = manager.list() caprmse_cvlist_cyc = manager.list() cvlist_cyc_dv = manager.list() lsd_cvlist_cyc_dv = manager.list() lsdstd_cvlist_cyc_dv = manager.list() f0rmse_cvlist_cyc_dv = manager.list() f0corr_cvlist_cyc_dv = manager.list() caprmse_cvlist_cyc_dv = manager.list() for i, feat_list in enumerate(feat_lists): logging.info(i) p = mp.Process(target=gpu_decode, args=(feat_list, gpu, cvlist, lsd_cvlist, lsdstd_cvlist, cvlist_dv, lsd_cvlist_dv, lsdstd_cvlist_dv, f0rmse_cvlist, f0corr_cvlist, caprmse_cvlist, f0rmse_cvlist_dv, f0corr_cvlist_dv, caprmse_cvlist_dv, cvlist_cyc, lsd_cvlist_cyc, lsdstd_cvlist_cyc, cvlist_cyc_dv, lsd_cvlist_cyc_dv, lsdstd_cvlist_cyc_dv, f0rmse_cvlist_cyc, f0corr_cvlist_cyc, caprmse_cvlist_cyc, f0rmse_cvlist_cyc_dv, f0corr_cvlist_cyc_dv, caprmse_cvlist_cyc_dv,)) p.start() processes.append(p) gpu += 1 if (i + 1) % args.n_gpus == 0: gpu = 0 # wait for all process for p in processes: p.join() # calculate cv_gv statistics if len(lsd_cvlist) > 0: logging.info("lsd_rec: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(lsd_cvlist)), \ np.std(np.array(lsd_cvlist)),np.mean(np.array(lsdstd_cvlist)),\ np.std(np.array(lsdstd_cvlist)))) cvgv_mean = np.mean(np.array(cvlist), axis=0) cvgv_var = np.var(np.array(cvlist), axis=0) logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \ np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))))) logging.info("f0rmse_rec: %.6f Hz (+- %.6f)" % (np.mean(np.array(f0rmse_cvlist)),np.std(np.array(f0rmse_cvlist)))) logging.info("f0corr_rec: %.6f (+- %.6f)" % (np.mean(np.array(f0corr_cvlist)),np.std(np.array(f0corr_cvlist)))) caprmse_cvlist = np.array(caprmse_cvlist) for i in range(caprmse_cvlist.shape[-1]): logging.info("caprmse-%d_rec: %.6f dB (+- %.6f)" % (i+1, np.mean(caprmse_cvlist[:,i]),np.std(caprmse_cvlist[:,i]))) logging.info("lsd_cyc: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(lsd_cvlist_cyc)), \ np.std(np.array(lsd_cvlist_cyc)),np.mean(np.array(lsdstd_cvlist_cyc)),\ np.std(np.array(lsdstd_cvlist_cyc)))) cvgv_mean = np.mean(np.array(cvlist_cyc), axis=0) cvgv_var = np.var(np.array(cvlist_cyc), axis=0) logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \ np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))))) logging.info("f0rmse_cyc: %.6f Hz (+- %.6f)" % (np.mean(np.array(f0rmse_cvlist_cyc)),np.std(np.array(f0rmse_cvlist_cyc)))) logging.info("f0corr_cyc: %.6f (+- %.6f)" % (np.mean(np.array(f0corr_cvlist_cyc)),np.std(np.array(f0corr_cvlist_cyc)))) caprmse_cvlist_cyc = np.array(caprmse_cvlist_cyc) for i in range(caprmse_cvlist_cyc.shape[-1]): logging.info("caprmse-%d_cyc: %.6f dB (+- %.6f)" % (i+1, np.mean(caprmse_cvlist_cyc[:,i]),np.std(caprmse_cvlist_cyc[:,i]))) cvgv_mean = np.mean(np.array(np.r_[cvlist,cvlist_cyc]), axis=0) cvgv_var = np.var(np.array(np.r_[cvlist,cvlist_cyc]), axis=0) logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \ np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))))) string_path = model_name+"-"+str(config.n_half_cyc)+"-"+str(config.lat_dim)+"-"+str(config.lat_dim_e)\ +"-"+str(config.spkidtr_dim)+"-"+model_epoch logging.info(string_path) string_mean = "/recgv_mean_"+string_path string_var = "/recgv_var_"+string_path write_hdf5(spk_stat, string_mean, cvgv_mean) write_hdf5(spk_stat, string_var, cvgv_var) if len(lsd_cvlist_dv) > 0: logging.info("lsd_rec_dv: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(lsd_cvlist_dv)), \ np.std(np.array(lsd_cvlist_dv)),np.mean(np.array(lsdstd_cvlist_dv)),\ np.std(np.array(lsdstd_cvlist_dv)))) cvgv_mean = np.mean(np.array(cvlist_dv), axis=0) cvgv_var = np.var(np.array(cvlist_dv), axis=0) logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \ np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))))) logging.info("f0rmse_rec_dv: %.6f Hz (+- %.6f)" % (np.mean(np.array(f0rmse_cvlist_dv)),np.std(np.array(f0rmse_cvlist_dv)))) logging.info("f0corr_rec_dv: %.6f (+- %.6f)" % (np.mean(np.array(f0corr_cvlist_dv)),np.std(np.array(f0corr_cvlist_dv)))) caprmse_cvlist_dv = np.array(caprmse_cvlist_dv) for i in range(caprmse_cvlist.shape[-1]): logging.info("caprmse-%d_rec_dv: %.6f dB (+- %.6f)" % (i+1, np.mean(caprmse_cvlist_dv[:,i]),np.std(caprmse_cvlist_dv[:,i]))) logging.info("lsd_cyc_dv: %.6f dB (+- %.6f) +- %.6f (+- %.6f)" % (np.mean(np.array(lsd_cvlist_cyc_dv)), \ np.std(np.array(lsd_cvlist_cyc_dv)),np.mean(np.array(lsdstd_cvlist_cyc_dv)),\ np.std(np.array(lsdstd_cvlist_cyc_dv)))) cvgv_mean = np.mean(np.array(cvlist_cyc_dv), axis=0) cvgv_var = np.var(np.array(cvlist_cyc_dv), axis=0) logging.info("%lf +- %lf" % (np.mean(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))), \ np.std(np.sqrt(np.square(np.log(cvgv_mean)-np.log(gv_mean)))))) logging.info("f0rmse_cyc_dv: %.6f Hz (+- %.6f)" % (np.mean(np.array(f0rmse_cvlist_cyc_dv)),np.std(np.array(f0rmse_cvlist_cyc_dv)))) logging.info("f0corr_cyc_dv: %.6f (+- %.6f)" % (np.mean(np.array(f0corr_cvlist_cyc_dv)),np.std(np.array(f0corr_cvlist_cyc_dv)))) caprmse_cvlist_cyc_dv = np.array(caprmse_cvlist_cyc_dv) for i in range(caprmse_cvlist_cyc_dv.shape[-1]): logging.info("caprmse-%d_cyc_dv: %.6f dB (+- %.6f)" % (i+1, np.mean(caprmse_cvlist_cyc_dv[:,i]),np.std(caprmse_cvlist_cyc_dv[:,i])))
parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--num-processes', type=int, default=2, metavar='N', help='how many training processes to use (default: 2)') args = parser.parse_args() if __name__ == '__main__': args = get_args() torch.manual_seed(args.seed) model = SimpleCNN() model.share_memory() # gradients are allocated lazily, so they are not shared here processes = [] for rank in range(args.num_processes): p = mp.Process(target=train, args=(rank, args, model)) p.start() processes.append(p) for p in processes: p.join()
def main(): parser = argparse.ArgumentParser() # decode setting parser.add_argument("--feats", required=True, type=str, help="list or directory of aux feat files") parser.add_argument("--stats", required=True, type=str, help="hdf5 file including statistics") parser.add_argument("--checkpoint", required=True, type=str, help="model file") parser.add_argument("--config", required=True, type=str, help="configure file") parser.add_argument("--outdir", required=True, type=str, help="directory to save generated samples") parser.add_argument("--fs", default=16000, type=int, help="sampling rate") parser.add_argument("--batch_size", default=32, type=int, help="number of batch size in decoding") parser.add_argument("--n_gpus", default=1, type=int, help="number of gpus") # other setting parser.add_argument("--intervals", default=1000, type=int, help="log interval") parser.add_argument("--seed", default=1, type=int, help="seed number") parser.add_argument("--verbose", default=1, type=int, help="log level") args = parser.parse_args() # check directory existence if not os.path.exists(args.outdir): os.makedirs(args.outdir) # set log level if args.verbose > 0: logging.basicConfig( level=logging.INFO, format= '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S', filename=args.outdir + "/decode.log") logging.getLogger().addHandler(logging.StreamHandler()) elif args.verbose > 1: logging.basicConfig( level=logging.DEBUG, format= '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S', filename=args.outdir + "/decode.log") logging.getLogger().addHandler(logging.StreamHandler()) else: logging.basicConfig( level=logging.WARN, format= '%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S', filename=args.outdir + "/decode.log") logging.getLogger().addHandler(logging.StreamHandler()) logging.warn("logging is disabled.") # fix seed os.environ['PYTHONHASHSEED'] = str(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) # load config config = torch.load(args.config) # get file list if os.path.isdir(args.feats): feat_list = sorted(find_files(args.feats, "*.h5")) elif os.path.isfile(args.feats): feat_list = read_txt(args.feats) else: logging.error("--feats should be directory or list.") sys.exit(1) # prepare the file list for parallel decoding feat_lists = np.array_split(feat_list, args.n_gpus) feat_lists = [f_list.tolist() for f_list in feat_lists] # define transform scaler = StandardScaler() scaler.mean_ = read_hdf5(args.stats, "/mean") scaler.scale_ = read_hdf5(args.stats, "/scale") wav_transform = transforms.Compose( [lambda x: encode_mu_law(x, config.n_quantize)]) feat_transform = transforms.Compose([lambda x: scaler.transform(x)]) # define gpu decode function def gpu_decode(feat_list, gpu): with torch.cuda.device(gpu): # define model and load parameters model = WaveNet(n_quantize=config.n_quantize, n_aux=config.n_aux, n_resch=config.n_resch, n_skipch=config.n_skipch, dilation_depth=config.dilation_depth, dilation_repeat=config.dilation_repeat, kernel_size=config.kernel_size, upsampling_factor=config.upsampling_factor) model.load_state_dict(torch.load(args.checkpoint)["model"]) model.eval() model.cuda() torch.backends.cudnn.benchmark = True # define generator generator = decode_generator( feat_list, batch_size=args.batch_size, wav_transform=wav_transform, feat_transform=feat_transform, use_speaker_code=config.use_speaker_code, upsampling_factor=config.upsampling_factor) # decode if args.batch_size > 1: for feat_ids, (batch_x, batch_h, n_samples_list) in generator: logging.info("decoding start") samples_list = model.batch_fast_generate( batch_x, batch_h, n_samples_list, args.intervals) for feat_id, samples in zip(feat_ids, samples_list): wav = decode_mu_law(samples, config.n_quantize) sf.write(args.outdir + "/" + feat_id + ".wav", wav, args.fs, "PCM_16") logging.info("wrote %s.wav in %s." % (feat_id, args.outdir)) else: for feat_id, (x, h, n_samples) in generator: logging.info("decoding %s (length = %d)" % (feat_id, n_samples)) samples = model.fast_generate(x, h, n_samples, args.intervals) wav = decode_mu_law(samples, config.n_quantize) sf.write(args.outdir + "/" + feat_id + ".wav", wav, args.fs, "PCM_16") logging.info("wrote %s.wav in %s." % (feat_id, args.outdir)) # parallel decode processes = [] gpu = 0 for i, feat_list in enumerate(feat_lists): p = mp.Process(target=gpu_decode, args=( feat_list, gpu, )) p.start() processes.append(p) gpu += 1 if (i + 1) % args.n_gpus == 0: gpu = 0 # wait for all process for p in processes: p.join()
done = False state = env.reset() while not done: action = shared_model.move(state) state_, reward, done, _ = env.step(action) state = state_ score += reward if n_epi % print_interval == 0 and n_epi != 0: print("# of episode :{}, avg score : {:.1f}".format( n_epi, score/print_interval)) score = 0.0 time.sleep(1) env.close() if __name__ == '__main__': global_model = ActorCritic(state_dim, action_dim) global_model.share_memory() processes = [] for rank in range(cpu_count + 1): # + 1 for test process if rank == 0: p = mp.Process(target=test, args=(global_model, )) else: p = mp.Process(target=train, args=(global_model, rank,)) p.start() processes.append(p) for p in processes: p.join()
env = create_atari_env(args.env_name) shared_model = ActorCritic(env.observation_space.shape[0], env.action_space) shared_model.share_memory() if args.no_shared: optimizer = None else: optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr) optimizer.share_memory() processes = [] counter = mp.Value('i', 0) lock = mp.Lock() p = mp.Process(target=test, args=(args.num_processes, args, shared_model, counter, logger)) p.start() processes.append(p) for rank in range(0, args.num_processes): p = mp.Process(target=train, args=(rank, args, shared_model, counter, lock, logger, optimizer)) p.start() processes.append(p) for p in processes: p.join()
def main(scripts, args): scripts = " ".join(sys.argv[0:]) args = parser.parse_args() args.scripts = scripts torch.manual_seed(args.seed) if args.gpu_ids == -1: args.gpu_ids = [-1] else: torch.cuda.manual_seed(args.seed) mp.set_start_method('spawn') if (args.deploy): raw, gt_lbl, raw_valid, gt_lbl_valid, raw_test, gt_lbl_test, raw_test_upsize, gt_lbl_test_upsize = setup_data( args) else: raw, gt_lbl, raw_valid, gt_lbl_valid, raw_test, gt_lbl_test = setup_data( args) env_conf = setup_env_conf(args) shared_model = get_model(args, args.model, env_conf["observation_shape"], args.features, atrous_rates=args.atr_rate, num_actions=2, split=args.data_channel, multi=args.multi) manager = mp.Manager() shared_dict = manager.dict() if args.wctrl == "s2m": shared_dict["spl_w"] = args.spl_w shared_dict["mer_w"] = args.mer_w if args.load: saved_state = torch.load(args.load, map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) shared_model.share_memory() if args.shared_optimizer: if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr) if args.optimizer == 'Adam': optimizer = SharedAdam(shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) optimizer.share_memory() else: optimizer = None processes = [] if not args.no_test: if raw_test is not None: if (args.deploy): p = mp.Process(target=test_func, args=(args, shared_model, env_conf, [raw_valid, gt_lbl_valid], (raw_test, gt_lbl_test, raw_test_upsize, gt_lbl_test_upsize, shared_dict))) else: p = mp.Process(target=test_func, args=(args, shared_model, env_conf, [raw_valid, gt_lbl_valid ], (raw_test, gt_lbl_test), shared_dict)) else: p = mp.Process(target=test_func, args=(args, shared_model, env_conf, [raw_valid, gt_lbl_valid], None, shared_dict)) p.start() processes.append(p) time.sleep(0.1) for rank in range(0, args.workers): p = mp.Process(target=train_func, args=(rank, args, shared_model, optimizer, env_conf, [raw, gt_lbl], shared_dict)) p.start() processes.append(p) time.sleep(0.1) for p in processes: time.sleep(0.1) p.join()
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory and torch.cuda.is_available() self.timeout = loader.timeout self.sample_iter = iter(self.batch_sampler) base_seed = torch.LongTensor(1).random_().item() if self.num_workers > 0: self.worker_init_fn = loader.worker_init_fn self.worker_queue_idx = 0 self.worker_result_queue = multiprocessing.Queue() self.batches_outstanding = 0 self.worker_pids_set = False self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.done_event = multiprocessing.Event() base_seed = torch.LongTensor(1).random_()[0] self.index_queues = [] self.workers = [] for i in range(self.num_workers): index_queue = multiprocessing.Queue() index_queue.cancel_join_thread() w = multiprocessing.Process( target=_ms_loop, args=( self.dataset, index_queue, self.worker_result_queue, self.done_event, self.collate_fn, base_seed + i, self.worker_init_fn, i ) ) w.daemon = True w.start() self.index_queues.append(index_queue) self.workers.append(w) if self.pin_memory: self.data_queue = queue.Queue() pin_memory_thread = threading.Thread( target=_utils.pin_memory._pin_memory_loop, args=( self.worker_result_queue, self.data_queue, torch.cuda.current_device(), self.done_event ) ) pin_memory_thread.daemon = True pin_memory_thread.start() self.pin_memory_thread = pin_memory_thread else: self.data_queue = self.worker_result_queue _utils.signal_handling._set_worker_pids( id(self), tuple(w.pid for w in self.workers) ) _utils.signal_handling._set_SIGCHLD_handler() self.worker_pids_set = True for _ in range(2 * self.num_workers): self._put_indices()
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() tcn = create_model(use_cuda) tcn = torch.nn.DataParallel(tcn, device_ids=range(torch.cuda.device_count())) triplet_builder = builder(args.n_views, \ args.train_directory, IMAGE_SIZE, args, sample_size=SAMPLE_SIZE) queue = multiprocessing.Queue(1) dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True) dataset_builder_process.start() optimizer = optim.SGD(tcn.parameters(), lr=args.lr_start, momentum=0.9) # This will diminish the learning rate at the milestones. # 0.1, 0.01, 0.001 learning_rate_scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=[200, 500, 1000], gamma=0.1) criterion = nn.CrossEntropyLoss() trn_losses_ = [] val_losses_ = [] val_acc_margin_ = [] val_acc_no_margin_ = [] n_iter = 0 n_valid_iter = 0 for epoch in range(args.start_epoch, args.start_epoch + args.epochs): print("=" * 20) logger.info("Starting epoch: {0} learning rate: {1}".format( epoch, learning_rate_scheduler.get_lr())) learning_rate_scheduler.step() dataset = queue.get() data_loader = DataLoader( dataset=dataset, batch_size=args. minibatch_size, # batch_size(epoch, args.max_minibatch_size), shuffle=True, pin_memory=use_cuda, ) for _ in range(0, ITERATE_OVER_TRIPLETS): losses = [] losses_triplet = [] losses_pose = [] for minibatch in data_loader: # frames = Variable(minibatch, require_grad=False) loss, loss_triplet, loss_pose, _, _ = loss_fn(tcn, minibatch) losses.append(loss.data.cpu().numpy()) losses_triplet.append(loss_triplet.data.cpu().numpy()) losses_pose.append(loss_pose.data.cpu().numpy()) optimizer.zero_grad() loss.backward() optimizer.step() writer.add_scalar('data/train_loss', np.mean(losses), n_iter) writer.add_scalar('data/train_triplet_loss', np.mean(losses_triplet), n_iter) writer.add_scalar('data/train_pose_loss', np.mean(losses_pose), n_iter) n_iter += 1 trn_losses_.append(np.mean(losses)) logger.info('train loss: ', np.mean(losses)) if epoch % 1 == 0: acc_margin, acc_no_margin, loss, n_valid_iter = validate( tcn, use_cuda, n_valid_iter) val_losses_.append(loss) val_acc_margin_.append(acc_margin) val_acc_no_margin_.append(acc_no_margin) if epoch % args.save_every == 0 and epoch != 0: logger.info('Saving model to {}'.format(args.model_folder)) save_model(tcn, model_filename(args.model_name, epoch), args.model_folder) plot_mean(trn_losses_, args.model_folder, 'train_loss') plot_mean(val_losses_, args.model_folder, 'validation_loss') # plot_mean(train_acc_, args.model_folder, 'train_acc') plot_mean(val_acc_margin_, args.model_folder, 'validation_accuracy_margin') plot_mean(val_acc_no_margin_, args.model_folder, 'validation_accuracy_no_margin')
optimizer = RiemannianSGD( model.parameters(), rgrad=opt.rgrad, retraction=opt.retraction, lr=opt.lr, ) # if nproc == 0, run single threaded, otherwise run Hogwild if opt.nproc == 0: train.train(model, data, optimizer, opt, log, 0) else: queue = mp.Manager().Queue() model.share_memory() processes = [] for rank in range(opt.nproc): p = mp.Process(target=train.train_mp, args=(model, data, optimizer, opt, log, rank + 1, queue)) p.start() processes.append(p) ctrl = mp.Process(target=control, args=(queue, log, adjacency, data, opt.fout, distfn, opt.epochs, processes)) ctrl.start() ctrl.join() print("training complete -- saving embedding to {}".format(embedding_file)) embedding_df = pd.DataFrame(model.lt.weight.detach().numpy()) embedding_df.to_csv(embedding_file, compression="gzip")
def generate_proposals(ann_file, tem_results_dir, pgm_proposals_dir, pgm_proposals_thread, **kwargs): """Generate proposals using multi-process. Args: ann_file (str): A json file path of the annotation file for all videos to be processed. tem_results_dir (str): Directory to read tem results pgm_proposals_dir (str): Directory to save generated proposals. pgm_proposals_thread (int): Total number of threads. kwargs (dict): Keyword arguments for "generate_candidate_proposals". """ video_infos = load_video_infos(ann_file) num_videos = len(video_infos) num_videos_per_thread = num_videos // pgm_proposals_thread processes = [] manager = mp.Manager() result_dict = manager.dict() kwargs['result_dict'] = result_dict for tid in range(pgm_proposals_thread - 1): tmp_video_list = range(tid * num_videos_per_thread, (tid + 1) * num_videos_per_thread) p = mp.Process( target=generate_candidate_proposals, args=( tmp_video_list, video_infos, tem_results_dir, ), kwargs=kwargs) p.start() processes.append(p) tmp_video_list = range((pgm_proposals_thread - 1) * num_videos_per_thread, num_videos) p = mp.Process( target=generate_candidate_proposals, args=( tmp_video_list, video_infos, tem_results_dir, ), kwargs=kwargs) p.start() processes.append(p) for p in processes: p.join() # save results os.makedirs(pgm_proposals_dir, exist_ok=True) prog_bar = mmcv.ProgressBar(num_videos) header = 'tmin,tmax,tmin_score,tmax_score,score,match_iou,match_ioa' for video_name in result_dict: proposals = result_dict[video_name] proposal_path = osp.join(pgm_proposals_dir, video_name + '.csv') np.savetxt( proposal_path, proposals, header=header, delimiter=',', comments='') prog_bar.update()
if __name__ == "__main__": mp.set_start_method('spawn') writer = SummaryWriter(comment="-pong-ga") parser = argparse.ArgumentParser() parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") args = parser.parse_args() device = "cuda" if args.cuda else "cpu" input_queues = [] output_queue = mp.Queue(maxsize=WORKERS_COUNT) workers = [] for _ in range(WORKERS_COUNT): input_queue = mp.Queue(maxsize=1) input_queues.append(input_queue) w = mp.Process(target=worker_func, args=(input_queue, output_queue, device)) w.start() seeds = [(np.random.randint(MAX_SEED),) for _ in range(SEEDS_PER_WORKER)] input_queue.put(seeds) gen_idx = 0 elite = None while True: t_start = time.time() batch_steps = 0 population = [] while len(population) < SEEDS_PER_WORKER * WORKERS_COUNT: out_item = output_queue.get() population.append((out_item.seeds, out_item.reward)) batch_steps += out_item.steps if elite is not None: