def main(): args = parse() save_model = Saver(args.modelfolder) use_cuda = args.cuda model_G = SpeechEggEncoder() model_D = Discriminator() model_G, model_D, _, _, _, _ = save_model.load_checkpoint( model_G, model_D, file_name=args.modelfile) speechfiles = glob(os.path.join(args.speechfolder, "*.npy")) eggfiles = glob(os.path.join(args.eggfolder, "*.npy")) reconstruction_save_path = args.outputfolder test_data = AudioFileDataset(speechfiles, eggfiles, args.window, args.stride, transform=detrend) test_dataloader = DataLoader(test_data, 1, num_workers=4, shuffle=False) os.makedirs(reconstruction_save_path, exist_ok=True) for egg_reconstructed, f in test(model_G, model_D, test_dataloader, args.window, args.stride, use_cuda=use_cuda): outputfile = os.path.join(reconstruction_save_path, f[0]) np.save(outputfile, egg_reconstructed)
def saveLabels(self, dirname): if not dirname: return saver = Saver() shapes = [item.shape() for item in self.allLabelList] saver.saveLabels(dirname, shapes, self.loader.image_path) self.saver = saver self.setClean()
def expo(args): def filename_fn(args): rs = 'N({}, {})'.format(args.radius, args.sigma) return rs def fpath(fname): _fpath = os.path.join(args.output_dir, fname) return _fpath length = 5 * args.radius linspace, data = SyntheticDataset.grid_data(args.num_points, length=length) # loader = dataset[args.dataset](args) # trainData = loader.train # for batch_idx, samples in enumerate(trainData): # data,labels = samples[DatasetType.InD] plt.xlim(-1 * length, length) plt.ylim(-1 * length, length) for scale in tqdm([1, 2, 3, 4]): sigma = scale * args.sigma scale_args = deepcopy(args) scale_args.sigma = sigma fname = filename_fn(scale_args) checkpoint_dir = os.path.join(args.work_dir, 'checkpoints') saver = Saver(checkpoint_dir) # makes directory if already not present payload = saver.load(hash_args( scale_args)) #hash_args(scale_args) generates the hex string def run_and_save(scale_args): export = main(scale_args) #Model creation?? payload = export['model'] saver.save(hash_args(scale_args), payload) return payload export = payload or run_and_save(scale_args) with torch.no_grad(): scores = inference(export, data) np_x = data.cpu().numpy() for key in scores: score = scores[key].cpu().numpy() plot_pcolormesh(np_x, linspace, score) score_fname = '{}_{}'.format(fname, key) plt.title(score_fname) flush_plot(plt, fpath(score_fname) + '.png')
def main(args): tf = '_tf' if args.tf else '' run_dir = os.path.join( 'results/', args.config + tf + '_%.4f' % args.lambda_ient + '_%.4f' % args.lambda_tent) word_dict = json.load(open(args.data + '/word_dict.json', 'r')) vocabulary_size = len(word_dict) encoder = Encoder(args.network, args.config) decoder = Decoder(vocabulary_size, encoder.dim, args.tf) optimizer = optim.Adam(decoder.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, args.step_size) encoder.cuda() decoder.cuda() cross_entropy_loss = nn.CrossEntropyLoss().cuda() saver = Saver(Trainer(encoder, decoder, optimizer, scheduler), run_dir) writer = SummaryWriter(saver.log_dir) train_loader = torch.utils.data.DataLoader(ImageCaptionDataset( data_transforms, args.data), batch_size=args.batch_size, shuffle=True, num_workers=1) val_loader = torch.utils.data.DataLoader(ImageCaptionDataset( data_transforms, args.data, split_type='val'), batch_size=args.batch_size, shuffle=True, num_workers=1) test_loader = torch.utils.data.DataLoader(ImageCaptionDataset( data_transforms, args.data, split_type='test'), batch_size=args.batch_size, shuffle=False, num_workers=1) print('Starting training with {}'.format(args)) for epoch in range(saver.start_epoch, args.epochs + 1): train(epoch, encoder, decoder, optimizer, cross_entropy_loss, train_loader, word_dict, args.alpha_c, args.log_interval, writer, saver, val_loader, args) saver.save_model(epoch) validate(epoch, encoder, decoder, cross_entropy_loss, val_loader, word_dict, args.alpha_c, args.log_interval, writer, saver) test(epoch, encoder, decoder, cross_entropy_loss, test_loader, word_dict, args.alpha_c, args.log_interval, writer, saver) old_lr = optimizer.param_groups[0]['lr'] scheduler.step() lr = optimizer.param_groups[0]['lr'] print('learning rate %.7f -> %.7f' % (old_lr, lr)) writer.close()
def loadJson(self, file): if file: with open(file, encoding='UTF-8') as f: j = json.load(f, strict=False) if str.endswith(file, 'tags.json'): self.colorTableWidget.loadFromJson(j) self._config['tags'] = j else: self.clearLabels() self.view.clear() saver = Saver() shapes = saver.loadLabels(j) [self.addLabel(s) for s in shapes] for canvas in self.view: canvas.updateToCenter() self._json_file = file
def __init__(self, device): model_path = 'weights/itemscorer_action_generator_32' encoder_hidden_dim = 32 self.model = ItemScorerModel(device=device, encoder_hidden_dim=encoder_hidden_dim, linear_hidden_dim=encoder_hidden_dim) self.saver = Saver(model=self.model, load_pretrained=True, pretrained_model_path=_FILE_PREFIX + model_path, device=device)
def __init__(self, args): self.args = args # determine the type of tensor (GPU or CPU) self.is_cuda = True if torch.cuda.is_available() else False self.Tensor = torch.cuda.FloatTensor if self.is_cuda else torch.FloatTensor # network initialization img_shape = (args.channels, args.img_size, args.img_size) self.generator = Generator(args, img_shape) self.discriminator = Discriminator(img_shape) # to GPU if self.is_cuda: self.generator.cuda() self.discriminator.cuda() # init dirs self.save_dir = os.path.join(args.exp_dir, args.exp_name) self.gen_dir = os.path.join(self.save_dir, args.gen_dir) self.result_dir = os.path.join(self.save_dir, args.result_dir) if not os.path.exists(self.gen_dir): os.makedirs(self.gen_dir) if not os.path.exists(self.result_dir): os.makedirs(self.result_dir) # save self.saver = Saver(self.save_dir) self.save_dict = { 'generator': self.generator, 'discriminator': self.discriminator, } # Info print('\n------ Model Info ------') print('amount of parameters:', self.network_paras()) print('Using GPU:', self.is_cuda) print('{:=^40}'.format(' Completed '))
def main(): test_data = create_dataloader( 64, "CMU_new/bdl_test/speech", "CMU_new/bdl_test/egg_detrended", # "Childers/M_test/speech", # "Childers/M_test/egg_detrended", # "Temp1/speech", # "Temp1/egg_detrended", 200, 200, select=1, ) # save_model = Saver('Models/DotModel/Childers_clean') save_model = Saver("checkpoints/clean300") use_cuda = True model_G = SpeechEggEncoder() model_D = Discriminator() model_G, model_D, _, _, _, _ = save_model.load_checkpoint( model_G, model_D, file_name="bce_epoch_45.pt") test(model_G, model_D, test_data, use_cuda=use_cuda)
help='Validation image path') parser.add_argument('--val-label-path', type=str, default='/path/to/VesselNN/train/label', metavar='N', help='Validation label path') parser.add_argument('--validate', action='store_true', help='validate') # checking point parser.add_argument('--resume', type=str, default=None, help='put the path to resuming file if needed') parser.add_argument('--checkname', type=str, default='VesselNN_Unsupervised', help='set the checkpoint name') args = parser.parse_args() # Define Saver saver = Saver(args) saver.save_experiment_config() # Define Tensorboard Summary summary = TensorboardSummary(saver.experiment_dir) writer = summary.create_summary() # Data dataset = Directory_Image_Train(images_path=args.train_images_path, labels_path=args.train_labels_path, data_shape=(32, 128, 128), lables_shape=(32, 128, 128), range_norm=args.range_norm) dataloader = DataLoader(dataset, batch_size=torch.cuda.device_count() * args.batch_size, shuffle=True, num_workers=2) # Data - validation
import json import numpy as np from utils import Mapper, LinearLearning, Saver, PendulumEnv parameters_file = "experiments/exp_1_linear_learning.json" with open(parameters_file) as j: parameters = json.loads(j.read()) mapping = Mapper() env = PendulumEnv() saver = Saver() state_map, state_reverse_map = mapping.get_state_map( parameters["step_state"], parameters["decimal_state"]) action_map, action_reverse_map = mapping.get_action_map( parameters["step_action"], parameters["decimal_action"]) steps = [] rewards = [] final_mean_reward = [] for i in range(parameters["n_simulations"]): lr_learner = LowRankLearning(env=env, state_set=parameters["state_set"], state_map=state_map, action_map=action_map, state_reverse_map=state_reverse_map, action_reverse_map=action_reverse_map, decimal_state=parameters["decimal_state"], decimal_action=parameters["decimal_action"], step_state=parameters["step_state"],
from matplotlib import rcParams from utils import Saver, TestUtils saver = Saver() test_utils = TestUtils() rcParams['font.family'] = 'sans-serif' rcParams['font.sans-serif'] = ['Tahoma'] rcParams['font.size'] = 16 medians_q_learning = saver.load_from_pickle( "results/q_learning_medians.pickle") stds_q_learning = saver.load_from_pickle("results/q_learning_stds.pickle") frob_errors_q_learning = saver.load_from_pickle( "results/q_learning_frob_errors.pickle") colors = ['b', 'r', 'g', 'y'] epsilons = sorted([float(epsilon) for epsilon in medians_q_learning.keys()]) medians_lr_learning = saver.load_from_pickle( "results/lr_learning_medians.pickle") stds_lr_learning = saver.load_from_pickle("results/lr_learning_stds.pickle") frob_errors_lr_learning = saver.load_from_pickle( "results/lr_learning_frob_errors.pickle") test_utils.plot_smoothed_steps(medians_q=medians_q_learning, medians_lr=medians_lr_learning, epsilons=epsilons, colors=colors) test_utils.plot_sfe(epsilons=epsilons,
def restore(args, model): # load saver = Saver(args) return saver.load(model)
class Trainer: def __init__(self, args, n_workers=4, memory_ram=2, time_saving=45): # Getting Ray self.Ray = args.ray if self.Ray and not RAY: print( "-- ray is not available --\nNormal initialization in progress" ) self.Ray = False # Some parameters self.time_save = time_saving self.seed = args.seed self.exp_steps = args.steps self.test_steps = args.test_steps self.test_freq = args.test_freq self.memory_start = args.memory_start self.update_online = args.update_online self.play_steps = args.play self.game = args.game self.double = args.double self.game_actions = args.game_actions self.seed = args.seed # Other variables to save progress self.time = Tocker() self.ckTime = Tocker() self.acc_test = 0 self.mean_test = [] self.std_test = [] self.actor_test_episodes = 0 self.test_episodes = 0 name_temp = args.game name_temp += '_double' if self.double else '' name_temp += '_' + args.optimizer + '_lr_' + str(args.learning_rate) self.saver = Saver(name_temp) name_sum = os.path.join(self.saver.dir, "tensorboard_{}".format(name_temp)) self.writer = SummaryWriter(name_sum) #Generating the actors self.policy = atariDQN(args.lhist, args.game_actions, args.dropouts) self.memoryReplay = MemoryReplay( capacity=args.memory_size, LHist=args.lhist, ) self.main_actor = ActorDQN( self.game, args.game_actions, self.policy, lHist=args.lhist, steps_per_update=args.update_online, buffer_size=args.buffer_size, test_steps=self.test_steps, start_steps=self.memory_start, device=DEVICE, seed=args.seed, writer=self.writer, ) self.steps_to_fill_mem = math.ceil(self.memory_start / (args.buffer_size * args.lhist)) self.n_actors = NCPUS if n_workers >= NCPUS else n_workers if NCPUS > 1 and self.Ray: # Actors with ray are created to speed up # the filling and testing of the buffer and net actors_start_steps = math.ceil(self.memory_start / self.n_actors) self.steps_to_fill_mem = math.ceil(actors_start_steps / (args.buffer_size * args.lhist)) actors_test_steps = math.ceil(self.test_steps / self.n_actors) # ---- Initialize Ray ---- ray.init(num_cpus=self.n_actors, _memory=memory_ram * GIGA, object_store_memory=400 * MEGA) # Buffers for the actors #self.buffers = [Buffer(capacity=actors_buffer_size) for _ in range(self.n_actors)] # Actors of the ActorDQN to fill and evaluate-only Access to their buffers only actor = ray.remote(ActorDQN) self.actors = [actor.remote(self.game, args.game_actions, self.policy, lHist=args.lhist, buffer_size=args.buffer_size, test_steps = actors_test_steps, start_steps = actors_start_steps, seed = args.seed, ray_actor = True) \ for i in range(self.n_actors)] time.sleep(10) print( "Trainer set with Ray\nRay resources {} workers with {} GB of RAM" .format(self.n_actors, memory_ram)) else: print(timeFormated(), "Trainer set") self.main_learner = LearnerDQN( self.policy, self.memoryReplay, args.mini_batch_size, learning_rate=args.learning_rate, update_target=args.update_target, device=DEVICE, double=args.double, optimizer=args.optimizer, seed=args.seed, writer=self.writer, ) def fillMemory(self): I = tqdm(range(self.steps_to_fill_mem), desc='Filling Memory') self.time.tick if self.Ray: print("Ray has started. Filling Memory . . .") for i in I: buffers = ray.get( [actor.fillBuffer.remote() for actor in self.actors]) self.memoryReplay.combineBuffers(*buffers) self.main_actor.steps = self.memory_start else: for i in I: buffer = self.main_actor.fillBuffer() self.memoryReplay.combineBuffers(buffer) print( timeFormated(), "Memory Filled to {} in {}".format(self.memory_start, self.time.tock)) # Saving a fixed ammount of frames to Test on. s, a, r, s2, t = self.memoryReplay.Sample(500) self.main_actor.passTestHistories(s) del a, r, s2, t def sampleBuffer(self, samples: int = 20): print("Displaying {} sample histories from the buffer".format(samples)) try: self.memoryReplay.showBuffer(samples) except: print("Display samples Stopped") def __del__(self): print("Trainer Terminated") def close(self): if self.ckTime.tocktock > 5: self.saveAll() self.writer.flush() self.writer.close() ray.shutdown() def train(self): self.main_actor.newGame() I = tqdm( range(0, self.exp_steps), desc='Executing and learning', unit='updates', ) for i in I: bufferReady = self.main_actor.autoStep() if bufferReady: self.memoryReplay.combineBuffers(self.main_actor.getBuffer()) self.main_learner.trainStep() #self.main_actor.updateModel(self.main_learner.onlineModel()) # They got the same object now self.writer.flush() if i % self.test_freq == 0: self.test() if self.ckTime.tocktock >= self.time_save: self.saveAll() self.ckTime.tick def test(self): #self.memoryReplay.add(*self.memoryReplay.zeroe) # ---- Testing the perfomance ---- self.time.tick q_mean = self.main_actor.testQHistories() if self.Ray: print("Ray starting test . . . ") # ---- Dividing the total test_steps per actor ---- # Update the online model in the actors updatedOnline = self.main_learner.onlineModel(cpu=True) ray.get([ actor.updateModel.remote(updatedOnline.copy()) for actor in self.actors ]) # Get test results testRes = ray.get( [actor.testRun.remote() for actor in self.actors]) # Consolidate results episodeRwd = [] for rE in testRes: episodeRwd += rE print("Ray testing done") else: # ------ main actor perfoms all the steps sequentially ------ episodeRwd = self.main_actor.testRun() # Saving results and logging len_episodeRwd = len(episodeRwd) tot_episodeRwd = sum(episodeRwd) self.actor_test_episodes += len(episodeRwd) self.mean_test += [np.mean(episodeRwd if len_episodeRwd != 0 else 0.0)] self.std_test += [np.std(episodeRwd if len_episodeRwd != 0 else 0.0)] self.acc_test += tot_episodeRwd self.writer.add_scalar('test/accumulated_reward', tot_episodeRwd, self.test_episodes) self.writer.add_scalar('test/mean_reward', self.mean_test[-1], self.test_episodes) self.writer.add_scalar('test/std_reward', self.std_test[-1], self.test_episodes) self.writer.add_scalar('test/actor_episodes', len_episodeRwd, self.test_episodes) self.writer.add_scalar('test/Q-mean', q_mean, self.test_episodes) self.writer.flush() self.test_episodes += 1 print( timeFormated(), "Test done in {}. Reward Accumulated:{}, Mean:{}. Q_mean {}". format(self.time.tock, np.round(tot_episodeRwd, 2), np.round(self.mean_test[-1], 2), np.round(q_mean, 3))) def saveAll(self): # --- Saving buffer and trainer ---- models = dict() self.saver.saveObject(self.dictToSave(), "trainer") self.saver.saveObject(self.memoryReplay.dictToSave(), "memory") models['online'] = self.main_learner.onlineModel() models['target'] = self.main_learner.targetModel() models['optimizer'] = self.main_learner.optimizerState() self.saver.saveModel(models, "models") def loadActor(self, Dir): os.chdir(Dir) files = os.listdir() print("Files on direction:") for n, File in enumerate(files): print("{} : {}".format(n, File)) while 1: choice = input("Enter the number for the model to load :") choice = int(choice) if choice > len(files) or not isinstance(choice, int) or choice < 0: print("Number not valid. Please try again.") else: break model = os.path.join(Dir, files[choice]) models = self.saver.loadModel(model, DEVICE) self.main_actor.updateModel(models['online']) print("Actor restored from", Dir) def dictToSave(self): this = dict() res = dict() res['means'] = self.mean_test res['stds'] = self.std_test res['acc'] = self.acc_test res['episodes'] = self.actor_test_episodes this['results'] = res this['seed'] = self.seed this['Steps'] = self.exp_steps this['testSteps'] = self.test_steps this['memoryStart'] = self.memory_start this['double'] = self.double this['game'] = self.game this['game_actions'] = self.game_actions return this def loadFromDict(self, this): try: res = this['results'] self.mean_test = res['means'] self.sdt_test = res['stds'] self.acc_test = res['acc'] self.actor_test_episodes = res['episodes'] self.seed = this['seed'] self.exp_steps = this['Steps'] self.test_steps = this['testSteps'] self.memory_start = this['memoryStart'] self.double = this['double'] self.game_actions = this['game_actions'] print("Successfully loading Trainer from dict") except: print("Error loading Trainer loaded from dict") def playTest(self): try: import imageio GIF = True bufferFrame = [] except: GIF = False print( "imageio is missing from the packages. A .gif from the run won't be made." ) if self.play_steps > 0: # Start playing sequence # --- wait user to watch ---- a = input("Press any key to initialize test . . .") self.main_actor.isTest = True self.main_actor.updateModel(self.main_learner.onlineModel()) self.main_actor.newGame() env = self.main_actor.env game = self.main_actor.game print("Test of the agent in {}".format(game)) episodes, reward = 0, 0 I = tqdm(range(0, self.play_steps), 'Test in progress', unit=' plays') for _ in I: self.time.tick if GIF: bufferFrame.append(env._get_image()) env.render() stepRwd = self.main_actor.step() #60Hz / Skip_Frame as the environment will do self.time.lockHz(15 if game != 'space_invaders' else 20) if self.main_actor.done: episodes += 1 reward += stepRwd env.close() if GIF: imageio.mimsave( "./testPlay {} frames {} episodes {} points {} - {}.gif". format(game, self.play_steps, episodes, reward, timeFormated()), bufferFrame, fps=15 if game != 'space_invaders' else 20) print( timeFormated(), "Test play done. Completed {} episodes and accumulated {} points" .format(episodes, reward)) else: None
from slgep_lib import wrap_config from utils import Saver from mfea import mfea import argparse import yaml # Load configuration config = yaml.load(open('config.yaml').read()) # Load benchmark benchmark = yaml.load(open('atari_benchmark/multitask-benchmark.yaml').read()) instances = [] for i in range(1, 41): if i not in [100]: instances.append('multi-' + str(i)) seeds = range(1, 21) for seed in seeds: for instance in instances: data = benchmark[instance] config.update(data) config = wrap_config(config) saver = Saver(config, instance, seed) mfea(config, saver.append) saver.save()
class CustomAgent: def __init__(self, verbose=False, **kwargs) -> None: # Load the config file config_file = kwargs['config_file_path'] if 'config_file_path' in kwargs else "config/config.yaml" with open(config_file) as reader: self.config = yaml.safe_load(reader) if 'update_config_fun' in kwargs and kwargs['update_config_fun'] is not None: self.config = kwargs['update_config_fun'](self.config) if verbose: pprint.pprint(self.config, width=1) # choose device self.device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu' if 'gpu' in kwargs and kwargs['gpu'] is not None: self.device = 'cuda:{}'.format(kwargs['gpu']) # training settings self.batch_size = self.config['training']['batch_size'] self.max_nb_steps_per_episode = self.config['training']['max_nb_steps_per_episode'] self.nb_epochs = self.config['training']['nb_epochs'] # set the statistics self._episode_has_started = False self.last_done = None self.mode = "test" self.counter = StepCounter(self.batch_size, self.max_nb_steps_per_episode) # Init the models and its optimizer self.model = Model(hidden_size=self.config['model']['hidden_size'], device=self.device, bidirectional=self.config['model']['bidirectional'], hidden_linear_size=self.config['model']['hidden_linear_size']) self.item_scorer = ItemScorer(device=self.device) self.navigation_model = Navigation(device=self.device) if 'optimizer' in self.config['training']: self.optimizer = optim.Adam(self.model.parameters(), self.config['training']['optimizer']['learning_rate']) self.model_updates = 0 self.model_loss = 0. if verbose: print(self.model) print('Total Model Parameters: {}'.format(count_parameters(self.model))) # choose the agent self.agent = lambda device, model: HAgent(device=device, model=model, item_scorer=self.item_scorer, hcp=self.config['general']['hcp'], navigation_model=self.navigation_model) # Command Queue self.command_q = None # Saving and Loading self.experiment_tag = self.config['checkpoint'].get('experiment_tag', 'NONAME') self.saver = Saver(model=self.model, ckpt_path=self.config['checkpoint'].get('model_checkpoint_path', 'NOPATH'), experiment_tag=self.experiment_tag, load_pretrained=len(self.config['checkpoint']['pretrained_experiment_path']) > 0, pretrained_model_path=os.path.join(_FILE_PREFIX, self.config['checkpoint']['pretrained_experiment_path']), device=self.device, save_frequency=self.config['checkpoint'].get('save_frequency', 1E10)) # Logging Statistics tb_dir = None if 'tensorboard' not in self.config else os.path.join(self.config['tensorboard']['directory'], self.experiment_tag) self.statistics = StatisticsTracker(tb_dir=tb_dir) # EventHandler self.event_handler = EventHandler() self.event_handler.add(self.statistics.stats_episode_clear, Event.NEWEPISODE) self.event_handler.add(self.counter.new_episode, Event.NEWEPISODE) def _init_episode(self): """ Initialize settings for the start of a new game. """ self.event_handler(Event.NEWEPISODE) self._episode_has_started = True self.transitions = [[] for _ in range(self.batch_size)] self.model.reset_hidden() self.last_score = np.array([0] * self.batch_size) self.last_done = [False] * self.batch_size self.model_updates = 0 self.model_loss = 0. self.agents = [self.agent(device=self.device, model=self.model) for _ in range(self.batch_size)] self.command_q = [[] for _ in range(self.batch_size)] def act_eval(self, obs: List[str], scores: List[int], dones: List[bool], infos: List[Dict]): """ Agent step if its in test mode. """ if all(dones): self._end_episode(obs, scores) return # individually for every agent in the batch for idx, (observation, score, done, info, cmd_q) in enumerate(zip(obs, scores, dones, infos, self.command_q)): if done: # placeholder command self.command_q[idx] = ['look'] if len(cmd_q) == 0: # only if add new command if there is nothing left in the queue for this agent new_cmds, _ = self.agents[idx].step(observation=observation, info=info) [self.command_q[idx].append(cmd) for cmd in new_cmds] self.counter.step() return [cmd_q.pop(0) for cmd_q in self.command_q] def act(self, obs: List[str], scores: List[int], dones: List[bool], infos: Dict[str, List[Any]]) -> Optional[List[str]]: """ Step of the agent. """ # re-structure infos infos = [{k: v[i] for k, v in infos.items()} for i in range(len(obs))] if not self._episode_has_started: self._init_episode() if self.mode == 'test': return self.act_eval(obs, scores, dones, infos) elif self.mode == 'manual_eval': return self.manual_eval(obs, scores, dones, infos) current_score = [] # individually for every agent in the batch for idx, (observation, score, done, last_done, info, cmd_q) in enumerate(zip(obs, scores, dones, self.last_done, infos, self.command_q)): just_finished = (last_done != done) if not done or just_finished: self.counter.increase_steps_taken(idx) if len(cmd_q) > 0: # has still commands to fire current_score.append(0.) continue if done and not just_finished: self.command_q[idx] = ['look'] current_score.append(0.) continue else: self.agents[idx].update_score(score) # update score current_score.append(self.agents[idx].current_score) # add new command new_cmds, learning_info = self.agents[idx].step(observation=observation, info=info) [self.command_q[idx].append(cmd) for cmd in new_cmds] # update the model self.model_update(done=done, index=learning_info.index, output=learning_info.score, value=learning_info.value, score=self.agents[idx].current_score, batch_idx=idx) self.last_done = dones self.statistics.stats_episode_append(score=np.mean(current_score)) if all(dones): self._end_episode(obs, scores, cmds=[agent.cmd_memory for agent in self.agents]) return self.saver.save(epoch=self.counter('epoch'), episode=self.counter('episode')) self.counter.step() return [cmd_q.pop(0) for cmd_q in self.command_q] def model_update(self, done, index, output, value, score, batch_idx): """ Store the information for the model update. After invoking it 'update_frequency' times for a specific agent the a2c update is performed. """ if self.transitions[batch_idx]: self.transitions[batch_idx][-1].reward = torch.Tensor([score])[0].type(torch.float).to(self.device) if len(self.transitions[batch_idx]) >= self.config['training']['update_frequency'] or done: # done == just_finished # do the update self._a2c_update(value, batch_idx) else: # add the transition self.transitions[batch_idx].append(Transition(reward=None, index=index, output=output, value=value, done=done)) def _a2c_update(self, value, batch_idx): """ Uses the stored model information from the last 'update_frequency' steps to perform an A2C update. """ # compute the returns and advantages from the last 'update_frequency' model steps returns, advantages = self._discount_rewards(value, self.transitions[batch_idx]) for transition, _return, advantage in zip(self.transitions[batch_idx], returns, advantages): reward, index, output, value, done = transition if done: continue advantage = advantage.detach() probs = F.softmax(output, dim=-1) log_probs = torch.log(probs) log_action_prob = log_probs[index] policy_loss = -log_action_prob * advantage value_loss = (.5 * (value - _return)**2) entropy = (-log_probs * probs).mean() # add up the loss over time self.model_loss += policy_loss + 0.5 * value_loss - 0.1 * entropy self.statistics.stats_episode_append( reward=reward, policy=policy_loss.item(), value=value_loss.item(), entropy=entropy.item(), confidence=torch.mean(torch.exp(log_action_prob)).item() ) self.model_updates += 1 self.transitions[batch_idx] = [] if self.model_loss == 0 or self.model_updates % self.batch_size != 0: # print('skipped') return # Only if all of the agents in the batch have performed their update the backpropagation is invoked to reduce # computational complexity self.statistics.stats_episode_append(loss=self.model_loss.item()) self.optimizer.zero_grad() self.model_loss.backward(retain_graph=True) nn.utils.clip_grad_norm_(self.model.parameters(), self.config['training']['optimizer']['clip_grad_norm']) self.optimizer.step() self.model_loss = 0. def _discount_rewards(self, last_value, transitions): """ Discounts the rewards of the agent over time to compute the returns and advantages. """ returns, advantages = [], [] R = last_value.data for t in reversed(range(len(transitions))): rewards, _, _, values, done = transitions[t] R = rewards + self.config['general']['discount_gamma'] * R adv = R - values returns.append(R) advantages.append(adv) return returns[::-1], advantages[::-1] def _end_episode(self, observation, scores, **kwargs): self._episode_has_started = False if self.mode != 'test': points, possible_points = self._get_points(observation, scores) self.statistics.flush_episode_statistics(possible_points=possible_points, episode_no=self.counter('episode'), steps=np.mean(self.counter('steps_taken')), points=points, **kwargs) def _get_points(self, obs, scores): """ Parses the obtained points from the last observation. """ batch_size = len(obs) points = [] possible_points = None for i in range(batch_size): try: points.append(int(obs[i].split('You scored ')[1].split(' out of a possible')[0])) possible_points = int(obs[i].split('out of a possible ')[1].split(',')[0]) except: points.append(scores[i]) possible_points = possible_points if possible_points is not None else 5 return points, possible_points def train(self) -> None: """ Tell the agent it is in training mode. """ self.mode = 'train' def eval(self) -> None: """ Tell the agent it is in evaluation mode. """ self.mode = 'test' self.model.reset_hidden() def select_additional_infos(self) -> EnvInfos: request_infos = EnvInfos() request_infos.description = True request_infos.inventory = True if self.config['general']['hcp'] >= 2: request_infos.entities = True request_infos.verbs = True if self.config['general']['hcp'] >= 4: request_infos.extras = ["recipe"] if self.config['general']['hcp'] >= 5: request_infos.admissible_commands = True # TEST request_infos.entities = True request_infos.verbs = True request_infos.extras = ["recipe", "walkthrough"] request_infos.admissible_commands = True return request_infos def started_new_epoch(self): """ Call this function from outside to let the agent know that a new epoch has started. """ self.counter.new_epoch()
import json import numpy as np from utils import Dqn, Buffer, Saver import keras from keras.models import Sequential from keras.layers import Dense from keras.optimizers import Adam parameters_file = "experiments/exp_dqn_learning.json" with open(parameters_file) as j: parameters = json.loads(j.read()) env = gym.make('Acrobot-v1') env._max_episode_steps = np.inf saver = Saver() rewards = [] steps = [] for _ in range(parameters["n_simulations"]): keras.backend.clear_session() alpha = 0.001 model = Sequential() model.add( Dense(parameters["hidden_size"], input_dim=env.observation_space.shape[0], activation='tanh'))
class Trainer(object): def __init__(self, mode): # Define Saver self.saver = Saver(opt, mode) # visualize self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Dataset dataloader self.train_dataset, self.train_loader = make_data_loader(opt) self.nbatch_train = len(self.train_loader) self.val_dataset, self.val_loader = make_data_loader(opt, mode="val") self.nbatch_val = len(self.val_loader) # model if opt.sync_bn is None and len(opt.gpu_id) > 1: opt.sync_bn = True else: opt.sync_bn = False model = DeepLab(opt) # model = CSRNet() self.model = model.to(opt.device) # Define Optimizer train_params = [{ 'params': model.get_1x_lr_params(), 'lr': opt.lr }, { 'params': model.get_10x_lr_params(), 'lr': opt.lr * 10 }] self.optimizer = torch.optim.SGD(train_params, momentum=opt.momentum, weight_decay=opt.decay) # loss if opt.use_balanced_weights: classes_weights_file = os.path.join(opt.root_dir, 'train_classes_weights.npy') if os.path.isfile(classes_weights_file): weight = np.load(classes_weights_file) else: weight = calculate_weigths_labels(self.train_loader, opt.root_dir, opt.num_classes) weight = torch.from_numpy(weight.astype(np.float32)) print(weight) opt.loss['weight'] = weight self.loss = build_loss(opt.loss) # Define Evaluator self.evaluator = Evaluator() # Define lr scheduler self.scheduler = LR_Scheduler(mode=opt.lr_scheduler, base_lr=opt.lr, num_epochs=opt.epochs, iters_per_epoch=self.nbatch_train, lr_step=140) # Resuming Checkpoint self.best_pred = 0.0 self.start_epoch = opt.start_epoch if opt.resume: if os.path.isfile(opt.pre): print("=> loading checkpoint '{}'".format(opt.pre)) checkpoint = torch.load(opt.pre) opt.start_epoch = checkpoint['epoch'] self.best_pred = checkpoint['best_pred'] self.model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opt.pre, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.pre)) if len(opt.gpu_id) > 1: print("Using multiple gpu") self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_id) self.loss_hist = collections.deque(maxlen=500) self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val) self.step_time = collections.deque(maxlen=opt.print_freq) def train(self, epoch): self.model.train() if opt.freeze_bn: self.model.module.freeze_bn() if len(opt.gpu_id) > 1 \ else self.model.freeze_bn() last_time = time.time() for iter_num, sample in enumerate(self.train_loader): # if iter_num >= 1: break try: imgs = sample["image"].to(opt.device) labels = sample["label"].to(opt.device) output = self.model(imgs) loss = self.loss(output, labels) # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 3) loss.backward() self.loss_hist.append(float(loss)) self.optimizer.step() self.optimizer.zero_grad() self.scheduler(self.optimizer, iter_num, epoch) # Visualize global_step = iter_num + self.nbatch_train * epoch + 1 self.writer.add_scalar('train/loss', loss.cpu().item(), global_step) batch_time = time.time() - last_time last_time = time.time() eta = self.timer.eta(global_step, batch_time) self.step_time.append(batch_time) if global_step % opt.print_freq == 0: printline = ('Epoch: [{}][{}/{}] ' 'lr: (1x:{:1.5f}, 10x:{:1.5f}), ' 'eta: {}, time: {:1.3f}, ' 'Loss: {:1.4f} '.format( epoch, iter_num + 1, self.nbatch_train, self.optimizer.param_groups[0]['lr'], self.optimizer.param_groups[1]['lr'], eta, np.sum(self.step_time), np.mean(self.loss_hist))) print(printline) self.saver.save_experiment_log(printline) last_time = time.time() del loss except Exception as e: print(e) continue def validate(self, epoch): self.model.eval() self.evaluator.reset() test_loss = 0.0 with torch.no_grad(): tbar = tqdm(self.val_loader, desc='\r') for i, sample in enumerate(tbar): # if i > 3: break imgs = sample['image'].to(opt.device) labels = sample['label'].to(opt.device) path = sample["path"] output = self.model(imgs) loss = self.loss(output, labels) test_loss += loss.item() tbar.set_description('Test loss: %.4f' % (test_loss / (i + 1))) # Visualize global_step = i + self.nbatch_val * epoch + 1 if global_step % opt.plot_every == 0: # pred = output.data.cpu().numpy() if output.shape[1] > 1: pred = torch.argmax(output, dim=1) else: pred = torch.clamp(output, min=0) self.summary.visualize_image(self.writer, opt.dataset, imgs, labels, pred, global_step) # metrics pred = output.data.cpu().numpy() target = labels.cpu().numpy() > 0 if pred.shape[1] > 1: pred = np.argmax(pred, axis=1) pred = (pred > opt.region_thd).reshape(target.shape) self.evaluator.add_batch(target, pred, path, opt.dataset) # Fast test during the training Acc = self.evaluator.Pixel_Accuracy() Acc_class = self.evaluator.Pixel_Accuracy_Class() mIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() RRecall = self.evaluator.Region_Recall() RNum = self.evaluator.Region_Num() mean_loss = test_loss / self.nbatch_val result = 2 / (1 / mIoU + 1 / RRecall) self.writer.add_scalar('val/mean_loss_epoch', mean_loss, epoch) self.writer.add_scalar('val/mIoU', mIoU, epoch) self.writer.add_scalar('val/Acc', Acc, epoch) self.writer.add_scalar('val/Acc_class', Acc_class, epoch) self.writer.add_scalar('val/fwIoU', FWIoU, epoch) self.writer.add_scalar('val/RRecall', RRecall, epoch) self.writer.add_scalar('val/RNum', RNum, epoch) self.writer.add_scalar('val/Result', result, epoch) printline = ("Val[Epoch: [{}], mean_loss: {:.4f}, mIoU: {:.4f}, " "Acc: {:.4f}, Acc_class: {:.4f}, fwIoU: {:.4f}, " "RRecall: {:.4f}, RNum: {:.1f}]").format( epoch, mean_loss, mIoU, Acc, Acc_class, FWIoU, RRecall, RNum) print(printline) self.saver.save_eval_result(printline) return result
def __init__(self, verbose=False) -> None: # Loading the config file config_file = "config/config.yaml" with open(config_file) as reader: self.config = yaml.safe_load(reader) if verbose: pprint.pprint(self.config, width=1) # Choose device self.device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu' # Training settings self.batch_size = self.config['training']['batch_size'] self.max_nb_steps_per_episode = self.config['training'][ 'max_nb_steps_per_episode'] self.nb_epochs = self.config['training']['nb_epochs'] # Set stats self._episode_has_started = False self.last_done = None self.mode = 'test' self.counter = StepCounter(self.batch_size, self.max_nb_steps_per_episode) # Init the models and its optimizer self.model = Model( hidden_size=self.config['model']['hidden_size'], device=self.device, bidirectional=self.config['model']['bidirectional'], hidden_linear_size=self.config['model']['hidden_linear_size']) self.item_scorer = ItemScorer(device=self.device) self.navigation_model = Navigation(device=self.device) if 'optimizer' in self.config['training']: self.optimizer = optim.Adam( self.model.parameters(), self.config['training']['optimizer']['learning_rate']) self.model_updates = 0 self.model_loss = 0.0 if verbose: print(self.model) print('Total Model Parameters: {}'.format( count_parameters(self.model))) # choose the agent self.agent = lambda device, model: HAgent( device=device, model=model, item_scorer=self.item_scorer, hcp=self.config['general']['hcp'], navigation_model=self.navigation_model) # Command Queue self.command_q = None # Saving and Loading self.experiment_tag = self.config['checkpoint'].get( 'experiment_tag', 'NONAME') self.saver = Saver( model=self.model, ckpt_path=self.config['checkpoint'].get('model_checkpoint_path', 'NOPATH'), experiment_tag=self.experiment_tag, load_pretrained=len( self.config['checkpoint']['pretrained_experiment_path']) > 0, pretrained_model_path=os.path.join( _FILE_PREFIX, self.config['checkpoint']['pretrained_experiment_path']), device=self.device, save_frequency=self.config['checkpoint'].get( 'save_frequency', 1E10))
def main(): train_data, test_data, _ = train_validate_test_loader( "../data/Childers/M/speech", "../data/Childers/M/egg", split={ "train": 0.7, "validate": 0.1, "test": 0.2 }, batch_size=1, workers=2, stride={ "train": 2, "validate": 20 }, pin_memory=False, model_folder="data/irish_clean_data", ) model_G = SpeechEggEncoder() model_D = Discriminator() save_model = Saver("checkpoints/vmodels/childers_clean_l2") encoder = EGGEncoder() save_encoder = Saver_Encoder("encoder") encoder, _, _ = save_encoder.load_checkpoint(encoder, file_name="epoch_65.pt") use_cuda = True epochs = 100 optimizer_G = optim.Adam(list(model_G.parameters())[:12], lr=2e-3) optimizer_R = optim.Adam(model_G.parameters(), lr=2e-3) optimizer_D = optim.Adam(model_D.parameters(), lr=2e-3) scheduler_G = optim.lr_scheduler.StepLR(optimizer_G, 10, 0.9) scheduler_D = optim.lr_scheduler.StepLR(optimizer_D, 10, 0.9) scheduler_R = optim.lr_scheduler.StepLR(optimizer_D, 10, 0.5) with warnings.catch_warnings(): warnings.simplefilter("ignore") for i in range(1, epochs + 1): net_loss, D_loss, G_loss, R_loss, D_real_prob, D_fake_prob = train( model_G, model_D, encoder, optimizer_G, optimizer_R, optimizer_D, train_data, use_cuda, ) print( "Train loss {:4.4} D_loss {:4.4} G_loss {:4.4} reconstruction loss {:4.4} Real D prob. {:4.4} Fake D prob. {:4.4} @epoch {}" .format(net_loss, D_loss, G_loss, R_loss, D_real_prob, D_fake_prob, i)) if i % 5 == 0: checkpoint = save_model.create_checkpoint( model_G, model_D, optimizer_G, optimizer_R, optimizer_D, { "win": 100, "stride": 3 }, ) save_model.save_checkpoint(checkpoint, file_name="epoch_{}.pt".format(i), append_time=False) test(model_G, model_D, encoder, test_data, use_cuda) if scheduler_G is not None: scheduler_G.step() scheduler_D.step() scheduler_R.step()
import json import numpy as np from utils import LowRankLearning, Saver, TestUtils, get_env parameters_file = "experiments/exp_lr_learning.json" env = get_env() saver = Saver() test_utils = TestUtils() Q_optimal = saver.load_from_pickle("results/Q_optimal.pickle") with open(parameters_file) as j: parameters = json.loads(j.read()) medians = {} standard_devs = {} frob_errors = {} for epsilon in parameters["epsilons"]: medians_temp = [] standard_devs_temp = [] frob_errors_temp = [] for i in range(parameters["n_simulations"]): lr_learner = LowRankLearning( env=env, episodes=parameters["episodes"], max_steps=parameters["max_steps"], epsilon=epsilon, gamma=parameters["gamma"], k=parameters["k"],
def test(**kwargs): opt._parse(kwargs) saver = Saver(opt, "test") # imgs_name = os.listdir(opt.test_dir) imgs_set = opt.root_dir + "ImageSets/Main/val.txt" with open(imgs_set, 'r') as f: imgs_name = [x.strip() + '.jpg' for x in f.readlines()] resize = Letterbox(input_size=(opt.min_size, opt.max_size)) normalize = Normalizer(mean=opt.mean, std=opt.std) # Define Network # initilize the network here. model = Model(opt, num_classes=10) model = model.to(opt.device) post_pro = PostProcess(**opt.nms) if os.path.isfile(opt.pre): print("=> loading checkpoint '{}'".format(opt.pre)) checkpoint = torch.load(opt.pre) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( opt.pre, checkpoint['epoch'])) else: raise FileNotFoundError results = [] model.eval() with torch.no_grad(): for ii, img_name in enumerate(tqdm(imgs_name)): # if ii >= 3: break; # data read and transforms img_path = osp.join(opt.test_dir, img_name) img = cv2.imread(img_path)[:, :, ::-1] sample = {'img': img, 'annot': None} sample = normalize(sample) sample = resize(sample) input = sample['img'].unsqueeze(0).to(opt.device).permute( 0, 3, 1, 2) # predict scores, labels, boxes = model(input) scores_bt, labels_bt, boxes_bt = post_pro(scores, labels, boxes, input.shape[-2:]) boxes_bt[0] = re_resize(boxes_bt[0], sample['scale'], opt.resize_type) if show: # draw labels = labels_bt[0].float().view(-1, 1) scores = scores_bt[0].float().view(-1, 1) output = torch.cat((boxes_bt[0], labels, scores), dim=1) output = output.numpy() img = plot_img(img, output, classes) plt.figure(figsize=(10, 10)) plt.subplot(1, 1, 1).imshow(img) plt.show() for box, label, score in zip(boxes_bt[0], labels_bt[0], scores_bt[0]): box[2:] = box[2:] - box[:2] results.append({ "image_id": img_name, "category_id": label.numpy(), "bbox": box[:4].numpy(), "score": score.numpy() }) saver.save_test_result(results)
def __init__(self, mode): # Define Saver self.saver = Saver(opt, mode) self.logger = self.saver.logger # Visualize self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() # Dataset dataloader self.train_dataset, self.train_loader = make_data_loader(opt) self.nbatch_train = len(self.train_loader) self.val_dataset, self.val_loader = make_data_loader(opt, mode="val") self.nbatch_val = len(self.val_loader) # Model if opt.sync_bn is None and len(opt.gpu_id) > 1: opt.sync_bn = True else: opt.sync_bn = False # model = DeepLab(opt) # model = CSRNet() model = CRGNet(opt) model_info(model, self.logger) self.model = model.to(opt.device) # Loss if opt.use_balanced_weights: classes_weights_file = osp.join(opt.root_dir, 'train_classes_weights.npy') if os.path.isfile(classes_weights_file): weight = np.load(classes_weights_file) else: weight = calculate_weigths_labels( self.train_loader, opt.root_dir) print(weight) opt.loss['weight'] = weight self.loss = build_loss(opt.loss) # Define Evaluator self.evaluator = Evaluator() # use region to eval: class_num is 2 # Resuming Checkpoint self.best_pred = 0.0 self.start_epoch = 0 if opt.resume: if os.path.isfile(opt.pre): print("=> loading checkpoint '{}'".format(opt.pre)) checkpoint = torch.load(opt.pre) self.start_epoch = checkpoint['epoch'] self.best_pred = checkpoint['best_pred'] self.model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(opt.pre, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.pre)) if len(opt.gpu_id) > 1: print("Using multiple gpu") self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_id) # Define Optimizer # train_params = [{'params': model.get_1x_lr_params(), 'lr': opt.lr}, # {'params': model.get_10x_lr_params(), 'lr': opt.lr * 10}] # self.optimizer = torch.optim.SGD(train_params, # momentum=opt.momentum, # weight_decay=opt.decay) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.decay) # Define lr scheduler # self.scheduler = LR_Scheduler(mode=opt.lr_scheduler, # base_lr=opt.lr, # num_epochs=opt.epochs, # iters_per_epoch=self.nbatch_train, # lr_step=140) self.scheduler = optim.lr_scheduler.MultiStepLR( self.optimizer, milestones=[round(opt.epochs * x) for x in opt.steps], gamma=opt.gamma) # Time self.loss_hist = collections.deque(maxlen=500) self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val) self.step_time = collections.deque(maxlen=opt.print_freq)
class Trainer(object): def __init__(self, mode): # Define Saver self.saver = Saver(opt, mode) self.logger = self.saver.logger # visualize self.summary = TensorboardSummary(self.saver.experiment_dir, opt) self.writer = self.summary.writer # Define Dataloader # train dataset self.train_dataset, self.train_loader = make_data_loader(opt, train=True) self.nbatch_train = len(self.train_loader) self.num_classes = self.train_dataset.num_classes # val dataset self.val_dataset, self.val_loader = make_data_loader(opt, train=False) self.nbatch_val = len(self.val_loader) # Define Network # initilize the network here. self.model = Model(opt, self.num_classes) self.model = self.model.to(opt.device) # Detection post process(NMS...) self.post_pro = PostProcess(**opt.nms) # Define Optimizer if opt.adam: self.optimizer = optim.Adam(self.model.parameters(), lr=opt.lr) else: self.optimizer = optim.SGD(self.model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.decay) # Apex if opt.use_apex: self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O1') # Resuming Checkpoint self.best_pred = 0.0 self.start_epoch = 0 if opt.resume: if os.path.isfile(opt.pre): print("=> loading checkpoint '{}'".format(opt.pre)) checkpoint = torch.load(opt.pre) self.start_epoch = checkpoint['epoch'] + 1 self.best_pred = checkpoint['best_pred'] self.model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(opt.pre, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.pre)) # Define lr scherduler # self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( # self.optimizer, patience=3, verbose=True) self.scheduler = optim.lr_scheduler.MultiStepLR( self.optimizer, milestones=[round(opt.epochs * x) for x in opt.steps], gamma=opt.gamma) self.scheduler.last_epoch = self.start_epoch - 1 # Using mul gpu if len(opt.gpu_id) > 1: self.logger.info("Using multiple gpu") self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_id) # metrics if opt.eval_type == 'cocoeval': self.eval = COCO_eval(self.val_dataset.coco) else: self.eval = VOC_eval(self.num_classes) self.loss_hist = collections.deque(maxlen=500) self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val) self.step_time = collections.deque(maxlen=opt.print_freq) def training(self, epoch): self.model.train() epoch_loss = [] last_time = time.time() for iter_num, data in enumerate(self.train_loader): # if iter_num >= 0: break try: self.optimizer.zero_grad() inputs = data['img'].to(opt.device) targets = data['annot'].to(opt.device) losses = self.model(inputs, targets) loss, log_vars = parse_losses(losses) if bool(loss == 0): continue if opt.use_apex: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), opt.grad_clip) self.optimizer.step() self.loss_hist.append(float(loss.cpu().item())) epoch_loss.append(float(loss.cpu().item())) # visualize global_step = iter_num + self.nbatch_train * epoch + 1 loss_logs = "" for _key, _value in log_vars.items(): loss_logs += "{}: {:.4f} ".format(_key, _value) self.writer.add_scalar('train/{}'.format(_key), _value, global_step) batch_time = time.time() - last_time last_time = time.time() eta = self.timer.eta(global_step, batch_time) self.step_time.append(batch_time) if global_step % opt.print_freq == 0: printline = ("Epoch: [{}][{}/{}] " "lr: {} eta: {} time: {:1.1f} " "{}" "Running loss: {:1.5f}").format( epoch, iter_num + 1, self.nbatch_train, self.optimizer.param_groups[0]['lr'], eta, np.sum(self.step_time), loss_logs, np.mean(self.loss_hist)) self.logger.info(printline) except Exception as e: print(e) continue # self.scheduler.step(np.mean(epoch_loss)) self.scheduler.step() def validate(self, epoch): self.model.eval() # torch.backends.cudnn.benchmark = False # self.model.apply(uninplace_relu) # start collecting results with torch.no_grad(): for ii, data in enumerate(self.val_loader): # if ii > 0: break scale = data['scale'] index = data['index'] inputs = data['img'].to(opt.device) targets = data['annot'] # run network scores, labels, boxes = self.model(inputs) scores_bt, labels_bt, boxes_bt = self.post_pro( scores, labels, boxes, inputs.shape[-2:]) outputs = [] for k in range(len(boxes_bt)): outputs.append(torch.cat(( boxes_bt[k].clone(), labels_bt[k].clone().unsqueeze(1).float(), scores_bt[k].clone().unsqueeze(1)), dim=1)) # visualize global_step = ii + self.nbatch_val * epoch if global_step % opt.plot_every == 0: self.summary.visualize_image( inputs, targets, outputs, self.val_dataset.labels, global_step) # eval if opt.eval_type == "voceval": self.eval.statistics(outputs, targets, iou_thresh=0.5) elif opt.eval_type == "cocoeval": self.eval.statistics(outputs, scale, index) print('{}/{}'.format(ii, len(self.val_loader)), end='\r') if opt.eval_type == "voceval": stats, ap_class = self.eval.metric() for key, value in stats.items(): self.writer.add_scalar('val/{}'.format(key), value.mean(), epoch) self.saver.save_voc_eval_result(stats, ap_class, self.val_dataset.labels) return stats['AP'] elif opt.eval_type == "cocoeval": stats = self.eval.metirc() self.saver.save_coco_eval_result(stats) self.writer.add_scalar('val/mAP', stats[0], epoch) return stats[0] else: raise NotImplementedError
def __init__(self, mode): # Define Saver self.saver = Saver(opt, mode) self.logger = self.saver.logger # visualize self.summary = TensorboardSummary(self.saver.experiment_dir, opt) self.writer = self.summary.writer # Define Dataloader # train dataset self.train_dataset, self.train_loader = make_data_loader(opt, train=True) self.nbatch_train = len(self.train_loader) self.num_classes = self.train_dataset.num_classes # val dataset self.val_dataset, self.val_loader = make_data_loader(opt, train=False) self.nbatch_val = len(self.val_loader) # Define Network # initilize the network here. self.model = Model(opt, self.num_classes) self.model = self.model.to(opt.device) # Detection post process(NMS...) self.post_pro = PostProcess(**opt.nms) # Define Optimizer if opt.adam: self.optimizer = optim.Adam(self.model.parameters(), lr=opt.lr) else: self.optimizer = optim.SGD(self.model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.decay) # Apex if opt.use_apex: self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O1') # Resuming Checkpoint self.best_pred = 0.0 self.start_epoch = 0 if opt.resume: if os.path.isfile(opt.pre): print("=> loading checkpoint '{}'".format(opt.pre)) checkpoint = torch.load(opt.pre) self.start_epoch = checkpoint['epoch'] + 1 self.best_pred = checkpoint['best_pred'] self.model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(opt.pre, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(opt.pre)) # Define lr scherduler # self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( # self.optimizer, patience=3, verbose=True) self.scheduler = optim.lr_scheduler.MultiStepLR( self.optimizer, milestones=[round(opt.epochs * x) for x in opt.steps], gamma=opt.gamma) self.scheduler.last_epoch = self.start_epoch - 1 # Using mul gpu if len(opt.gpu_id) > 1: self.logger.info("Using multiple gpu") self.model = torch.nn.DataParallel(self.model, device_ids=opt.gpu_id) # metrics if opt.eval_type == 'cocoeval': self.eval = COCO_eval(self.val_dataset.coco) else: self.eval = VOC_eval(self.num_classes) self.loss_hist = collections.deque(maxlen=500) self.timer = Timer(opt.epochs, self.nbatch_train, self.nbatch_val) self.step_time = collections.deque(maxlen=opt.print_freq)
def __init__(self, verbose=False, **kwargs) -> None: # Load the config file config_file = kwargs['config_file_path'] if 'config_file_path' in kwargs else "config/config.yaml" with open(config_file) as reader: self.config = yaml.safe_load(reader) if 'update_config_fun' in kwargs and kwargs['update_config_fun'] is not None: self.config = kwargs['update_config_fun'](self.config) if verbose: pprint.pprint(self.config, width=1) # choose device self.device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu' if 'gpu' in kwargs and kwargs['gpu'] is not None: self.device = 'cuda:{}'.format(kwargs['gpu']) # training settings self.batch_size = self.config['training']['batch_size'] self.max_nb_steps_per_episode = self.config['training']['max_nb_steps_per_episode'] self.nb_epochs = self.config['training']['nb_epochs'] # set the statistics self._episode_has_started = False self.last_done = None self.mode = "test" self.counter = StepCounter(self.batch_size, self.max_nb_steps_per_episode) # Init the models and its optimizer self.model = Model(hidden_size=self.config['model']['hidden_size'], device=self.device, bidirectional=self.config['model']['bidirectional'], hidden_linear_size=self.config['model']['hidden_linear_size']) self.item_scorer = ItemScorer(device=self.device) self.navigation_model = Navigation(device=self.device) if 'optimizer' in self.config['training']: self.optimizer = optim.Adam(self.model.parameters(), self.config['training']['optimizer']['learning_rate']) self.model_updates = 0 self.model_loss = 0. if verbose: print(self.model) print('Total Model Parameters: {}'.format(count_parameters(self.model))) # choose the agent self.agent = lambda device, model: HAgent(device=device, model=model, item_scorer=self.item_scorer, hcp=self.config['general']['hcp'], navigation_model=self.navigation_model) # Command Queue self.command_q = None # Saving and Loading self.experiment_tag = self.config['checkpoint'].get('experiment_tag', 'NONAME') self.saver = Saver(model=self.model, ckpt_path=self.config['checkpoint'].get('model_checkpoint_path', 'NOPATH'), experiment_tag=self.experiment_tag, load_pretrained=len(self.config['checkpoint']['pretrained_experiment_path']) > 0, pretrained_model_path=os.path.join(_FILE_PREFIX, self.config['checkpoint']['pretrained_experiment_path']), device=self.device, save_frequency=self.config['checkpoint'].get('save_frequency', 1E10)) # Logging Statistics tb_dir = None if 'tensorboard' not in self.config else os.path.join(self.config['tensorboard']['directory'], self.experiment_tag) self.statistics = StatisticsTracker(tb_dir=tb_dir) # EventHandler self.event_handler = EventHandler() self.event_handler.add(self.statistics.stats_episode_clear, Event.NEWEPISODE) self.event_handler.add(self.counter.new_episode, Event.NEWEPISODE)
from matplotlib import rcParams import matplotlib.pyplot as plt from utils import Saver import numpy as np saver = Saver() rcParams['font.family'] = 'sans-serif' rcParams['font.sans-serif'] = ['Tahoma'] rcParams['font.size'] = 16 rewards_dqn_light = saver.load_from_pickle("results/rewards_1_layer_2000_light.pck") steps_dqn_light = saver.load_from_pickle("results/steps_1_layer_2000_light.pck") rewards_dqn_large = saver.load_from_pickle("results/rewards_1_layer_2000_large.pck") steps_dqn_large = saver.load_from_pickle("results/steps_1_layer_2000_large.pck") rewards_lr = saver.load_from_pickle("results/rewards_k_2.pck") steps_lr = saver.load_from_pickle("results/steps_k_2.pck") rewards_lr_norm = saver.load_from_pickle("results/rewards_k_2_norm.pck") steps_lr_norm = saver.load_from_pickle("results/steps_k_2_norm.pck") median_rewards_dqn_light = np.median(rewards_dqn_light, axis=0) median_steps_dqn_light = np.median(steps_dqn_light, axis=0) median_rewards_dqn_large = np.median(rewards_dqn_large, axis=0) median_steps_dqn_large = np.median(steps_dqn_large, axis=0) median_reward_lr = np.median(rewards_lr, axis=0) median_steps_lr = np.median(steps_lr, axis=0)
import numpy as np from matplotlib import rcParams from utils import Saver, TestUtils from matplotlib import rcParams rcParams['font.family'] = 'sans-serif' rcParams['font.sans-serif'] = ['Tahoma'] rcParams['font.size'] = 16 saver = Saver() test_utils = TestUtils() q_learner = saver.load_from_pickle("results/q_learner_example.pickle") lr_learner = saver.load_from_pickle("results/low_rank_learner_example.pickle") steps_q_large = saver.load_from_pickle("results/exp_1_q_learning_steps.pickle") rewards_q_large = saver.load_from_pickle( "results/exp_1_q_learning_rewards.pickle") final_mean_reward_q_large = saver.load_from_pickle( "results/exp_1_q_learning_final_reward.pickle") steps_q_small = saver.load_from_pickle("results/exp_2_q_learning_steps.pickle") rewards_q_small = saver.load_from_pickle( "results/exp_2_q_learning_rewards.pickle") final_mean_reward_q_small = saver.load_from_pickle( "results/exp_2_q_learning_final_reward.pickle") steps_lr = saver.load_from_pickle("results/exp_1_lr_learning_steps.pickle") rewards_lr = saver.load_from_pickle("results/exp_1_lr_learning_rewards.pickle") final_mean_reward_lr = saver.load_from_pickle(
def train(): olp = OneLinePrint() logger.info('start building batch data') vocab = Vocab(hps.vocab_file, hps.vocab_size) batcher = Batcher(hps.data_path, vocab, hps, hps.single_pass) logger.info('end building batch data') logger.info('vocab size: %s' % vocab.size()) criterion = nn.NLLLoss(ignore_index=vocab.pad_id()) model = Model(vocab, hps) if hps.use_cuda: model = model.cuda() if hps.restore: model.load_state_dict(torch.load(hps.restore)) opt = optimzier(hps.opt, model.parameters()) if hps.ckpt_name != '': saver = Saver(hps.ckpt_path, hps.ckpt_name, model) # for store summary if hps.store_summary: writer = SummaryWriter(comment='_' + hps.ckpt_name) # loss_sum = 0 logger.info('----Start training----') timer = Timer() timer.start() for step in range(hps.start_step, hps.num_iters + 1): # # Decay learning rate # if step % hps.lr_decay_step == 0: # olp.write( # 'decay learning rate to %f' % decay_lr(opt, step)) # Forward ------------------------------------------------------------- opt.zero_grad() batch = batcher.next_batch() (inputs, inp_lens, inp_pad, dec_inps, targets, dec_lens, dec_pad) = batch.expand(hps.use_cuda) outputs = model(dec_inps, dec_lens) # output: (B*T*(1~3)U) loss = criterion(outputs.view(-1, vocab.size()), targets.view(-1)) # Backward ------------------------------------------------------------ loss.backward() # gradient clipping global_norm = nn.utils.clip_grad_norm(model.parameters(), hps.clip) opt.step() # loss_sum += loss.data[0] # Utils --------------------------------------------------------------- # save checkpoint if step % hps.ckpt_steps == 0 and hps.ckpt_name != '': saver.save(step, loss.data[0]) olp.write('save checkpoint (step=%d)\n' % step) # print the train loss and ppl ppl = np.exp(loss.data[0]) olp.write('step %s train loss: %f, ppl: %8.2f' % (step, loss.data[0], ppl)) olp.flush() # store summary if hps.store_summary and (step - 1) % hps.summary_steps == 0: writer.add_scalar('loss', loss, step) writer.add_scalar('ppl', ppl, step) writer.add_scalar('global_norm', global_norm, step) if step - 1 != 0: lap_time, _ = timer.lap('summary') steps = hps.summary_steps writer.add_scalar('avg time/step', lap_time / steps, step) # print output and target # if step % hps.summary_steps == 0: # logger.info('\nstep:%d~%d avg loss: %f', step - hps.summary_steps, # step, loss_sum / hps.summary_steps) # loss_sum = 0 if hps.store_summary: writer.close()
def __init__(self, args, n_workers=4, memory_ram=2, time_saving=45): # Getting Ray self.Ray = args.ray if self.Ray and not RAY: print( "-- ray is not available --\nNormal initialization in progress" ) self.Ray = False # Some parameters self.time_save = time_saving self.seed = args.seed self.exp_steps = args.steps self.test_steps = args.test_steps self.test_freq = args.test_freq self.memory_start = args.memory_start self.update_online = args.update_online self.play_steps = args.play self.game = args.game self.double = args.double self.game_actions = args.game_actions self.seed = args.seed # Other variables to save progress self.time = Tocker() self.ckTime = Tocker() self.acc_test = 0 self.mean_test = [] self.std_test = [] self.actor_test_episodes = 0 self.test_episodes = 0 name_temp = args.game name_temp += '_double' if self.double else '' name_temp += '_' + args.optimizer + '_lr_' + str(args.learning_rate) self.saver = Saver(name_temp) name_sum = os.path.join(self.saver.dir, "tensorboard_{}".format(name_temp)) self.writer = SummaryWriter(name_sum) #Generating the actors self.policy = atariDQN(args.lhist, args.game_actions, args.dropouts) self.memoryReplay = MemoryReplay( capacity=args.memory_size, LHist=args.lhist, ) self.main_actor = ActorDQN( self.game, args.game_actions, self.policy, lHist=args.lhist, steps_per_update=args.update_online, buffer_size=args.buffer_size, test_steps=self.test_steps, start_steps=self.memory_start, device=DEVICE, seed=args.seed, writer=self.writer, ) self.steps_to_fill_mem = math.ceil(self.memory_start / (args.buffer_size * args.lhist)) self.n_actors = NCPUS if n_workers >= NCPUS else n_workers if NCPUS > 1 and self.Ray: # Actors with ray are created to speed up # the filling and testing of the buffer and net actors_start_steps = math.ceil(self.memory_start / self.n_actors) self.steps_to_fill_mem = math.ceil(actors_start_steps / (args.buffer_size * args.lhist)) actors_test_steps = math.ceil(self.test_steps / self.n_actors) # ---- Initialize Ray ---- ray.init(num_cpus=self.n_actors, _memory=memory_ram * GIGA, object_store_memory=400 * MEGA) # Buffers for the actors #self.buffers = [Buffer(capacity=actors_buffer_size) for _ in range(self.n_actors)] # Actors of the ActorDQN to fill and evaluate-only Access to their buffers only actor = ray.remote(ActorDQN) self.actors = [actor.remote(self.game, args.game_actions, self.policy, lHist=args.lhist, buffer_size=args.buffer_size, test_steps = actors_test_steps, start_steps = actors_start_steps, seed = args.seed, ray_actor = True) \ for i in range(self.n_actors)] time.sleep(10) print( "Trainer set with Ray\nRay resources {} workers with {} GB of RAM" .format(self.n_actors, memory_ram)) else: print(timeFormated(), "Trainer set") self.main_learner = LearnerDQN( self.policy, self.memoryReplay, args.mini_batch_size, learning_rate=args.learning_rate, update_target=args.update_target, device=DEVICE, double=args.double, optimizer=args.optimizer, seed=args.seed, writer=self.writer, )
def train(args, model, train_set): # to cuda model.cuda() model.train() # dataloader train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, drop_last=True, shuffle=True, num_workers=int( args.num_threads)) # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=args.scheduler_step_size, gamma=args.scheduler_gamma) # saver saver = Saver(args) # loss function criterion = torch.nn.L1Loss() # time time_start_train = time.time() # misc num_batch = train_set.__len__() // args.batch_size counter = 0 backup_codes(args) # compute paras params = network_paras(model) log = "num of parameters: {:,}".format(params) saver.save_log(log) print(log) # init weights def weights_init(m): if isinstance(m, torch.nn.Conv2d): init.kaiming_normal_(m.weight.data) if not args.is_finetuning: model.apply(weights_init) # start training print('{:=^40}'.format(' training start ')) for epoch in range(args.epochs): scheduler.step(epoch) running_loss = 0.0 for bidx, (_, im_lr, im_hr) in enumerate(train_loader): im_lr = Variable(im_lr.cuda(), volatile=False) im_hr = Variable(im_hr.cuda()) # zero the parameter gradients model.zero_grad() # forward output = model(im_lr) # loss loss = criterion(output, im_hr) # backward & update loss.backward() optimizer.step() # accumulate running loss running_loss += loss.cpu().item() # print for every N batch if counter % args.step_print_loss == 0: # time acc_time = time.time() - time_start_train # log log = 'epoch: (%d/%d) [%5d/%5d], loss: %.6f | time: %s' % \ (epoch, args.epochs, bidx, num_batch, running_loss, str(datetime.timedelta(seconds=acc_time))) print(log) saver.save_log(log) running_loss = 0.0 print_lr(optimizer) if counter and counter % args.step_save == 0: # save saver.save_model(model) # counter increment counter += 1 print('{:=^40}'.format(' Finish ')) runtime = time.time() - time_start_train print('training time:', str(datetime.timedelta(seconds=runtime)) + '\n\n')