def _validate(self) -> tuple: self.model.eval() losses_sum = 0 benchmark_losses_sum = 0 for input, target in self.valid_dataloader: if cuda_is_available(): input = input.cuda( non_blocking=self.valid_dataloader.pin_memory) target = target.cuda( non_blocking=self.valid_dataloader.pin_memory) prediction = self.model.forward(input) if type(prediction) is tuple: benchmark_loss = self.benchmark_MSE_loss.compute( prediction[0], target) else: benchmark_loss = self.benchmark_MSE_loss.compute( prediction, target) benchmark_losses_sum += float(benchmark_loss.data.cpu().numpy()) loss = self.custom_loss.compute(prediction, target) losses_sum += float(loss.data.cpu().numpy()) return losses_sum / len( self.valid_dataloader), benchmark_losses_sum / len( self.train_dataloader)
def launch_experiment(experiments: list, repo: Repo): param = Parameters() # The datasets needs to be segmented before any experiments is launched to prevent process conflicts if param.segment_dataset: Loaders.segment_datasets(param) world_size = torch.cuda.device_count() if cuda_is_available( ) and world_size > 1 and 1 < len(experiments) <= world_size: print("-> Launching {} parallel experiments...".format( torch.cuda.device_count())) experiment_keys = [experiment.get_key() for experiment in experiments] print("-> experiment keys: {}".format(experiment_keys)) experiment_params = [experiment.params for experiment in experiments] api_key = experiments[0].api_key print("-> spawning the experiments' processes") multiprocessing.spawn(launch_parallel_experiment, nprocs=len(experiments), args=(api_key, experiment_keys, experiment_params, repo.git_dir)) elif len(experiments) == 1: with CometLogger(experiments[0]): print("-> launching single experiment") launch_single_GPU_experiment(experiments[0], repo, param) else: raise NotImplementedError()
def launch_single_GPU_experiment(experiment: Experiment, repo: Repo, param: Parameters): setup_comet_experiment(experiment, param, repo) loss, model, optimizer, train_dataloader, valid_dataloader = load_experiment_assets( param) if param.train: print("~~ Launching the training ~~") launch_training(model, train_dataloader, valid_dataloader, optimizer, loss, param) if param.test: print("~~ Testing the model ~~") launch_testing(model, param) del train_dataloader, valid_dataloader, model, optimizer, loss if cuda_is_available(): torch.cuda.empty_cache()
def _test(self, trajectory_dataloader: DataLoader): """ Performs an inference pass on the trajectory. @param trajectory_dataloader: @return: """ self.model.eval() rotation_losses = [] translation_losses = [] predictions = Trajectory( trajectory_dataloader.dataset.data_is_relative(), is_groundtruth=False, sliding_window_size=self.sliding_window_size, sliding_window_overlap=self.sliding_window_overlap) ground_truth = Trajectory( trajectory_dataloader.dataset.data_is_relative(), is_groundtruth=True, sliding_window_size=self.sliding_window_size, sliding_window_overlap=self.sliding_window_overlap) for segments_batch in trajectory_dataloader: x, batch_target = segments_batch if cuda_is_available(): x = x.cuda() batch_target = torch.squeeze(batch_target.cuda()) prediction = self.model.forward(x) if type(prediction) is tuple: batch_predict = torch.squeeze(prediction[0]) else: batch_predict = torch.squeeze(prediction) batch_target = torch.squeeze(batch_target) rotation_loss = self.loss.compute(batch_predict[:, :3], batch_target[:, :3]) translation_loss = self.loss.compute(batch_predict[:, 3:], batch_target[:, 3:]) rotation_losses.append(float(rotation_loss.data.cpu().numpy())) translation_losses.append( float(translation_loss.data.cpu().numpy())) batch_predict = batch_predict.detach().cpu().numpy() batch_target = batch_target.cpu().numpy() for batch_element_id in range(0, batch_target.shape[0]): # If there's only 1 element in the batch, process the whole batch, otherwise, process element wise if len(batch_target.shape) == 2: predict = batch_predict target = batch_target else: predict = numpy.squeeze(batch_predict[batch_element_id]) target = numpy.squeeze(batch_target[batch_element_id]) ground_truth.append(target) predictions.append(predict) if len(batch_target.shape) == 2: break return predictions.assembled_pose, rotation_losses, translation_losses, ground_truth.assembled_pose
def _train(self) -> tuple: timer_start_time = time.time() self.model.train() losses_sum = 0 benchmark_losses_sum = 0 for i, (input, target) in enumerate(self.train_dataloader): CometLogger.get_experiment().log_metric("Current batch", i + 1) CometLogger.get_experiment().log_metric("Total nbr of batches", len(self.train_dataloader)) # Only log this if we are NOT in a multiprocessing session if CometLogger.gpu_id is None: print("--> processing batch {}/{} of size {}".format( i + 1, len(self.train_dataloader), len(input))) if cuda_is_available(): with ThreadingTimeout(14400.0) as timeout_ctx1: input = input.cuda( non_blocking=self.train_dataloader.pin_memory) target = target.cuda( non_blocking=self.train_dataloader.pin_memory) if not bool(timeout_ctx1): CometLogger.fatalprint( 'Encountered fatally long delay when moving tensors to GPUs' ) prediction = self.model.forward(input) with ThreadingTimeout(14400.0) as timeout_ctx3: if type(prediction) is tuple: benchmark_loss = self.benchmark_MSE_loss.compute( prediction[0], target) else: benchmark_loss = self.benchmark_MSE_loss.compute( prediction, target) if not bool(timeout_ctx3): CometLogger.fatalprint( 'Encountered fatally long delay during computation of benchmark loss' ) with ThreadingTimeout(14400.0) as timeout_ctx4: benchmark_losses_sum += float( benchmark_loss.data.cpu().numpy()) if not bool(timeout_ctx4): CometLogger.fatalprint( 'Encountered fatally long delay during summation of benchmark losses' ) with ThreadingTimeout(14400.0) as timeout_ctx4: loss = self.custom_loss.compute(prediction, target) if not bool(timeout_ctx4): CometLogger.fatalprint( 'Encountered fatally long delay during computation of the custom loss' ) self._backpropagate(loss) with ThreadingTimeout(14400.0) as timeout_ctx6: losses_sum += float(loss.data.cpu().numpy()) if not bool(timeout_ctx6): CometLogger.fatalprint( 'Encountered fatally long delay during loss addition') timer_end_time = time.time() CometLogger.get_experiment().log_metric( "Epoch training time", timer_end_time - timer_start_time) return losses_sum / len( self.train_dataloader), benchmark_losses_sum / len( self.train_dataloader)
def load_model(param: Parameters) -> nn.Module: if param.model == "DeepVO": CometLogger.print("Using DeepVO") model = DeepVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "CoordConvDeepVO": CometLogger.print("Using CoordConvDeepVO") model = CoordConvDeepVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "MagicVO": CometLogger.print("Using MagicVO") model = MagicVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SelfAttentionVO": CometLogger.print("Using SelfAttentionVO") model = SelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SplitSelfAttentionVO": CometLogger.print("Using SplitSelfAttentionVO") model = SplitSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "CoordConvSelfAttentionVO": CometLogger.print("Using CoordConvSelfAttentionVO") model = CoordConvSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SimpleSelfAttentionVO": CometLogger.print("Using SimpleSelfAttentionVO") model = SimpleSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "PositionalSimpleSelfAttentionVO": CometLogger.print("Using PositionalSimpleSelfAttentionVO") model = PositionalSimpleSelfAttentionVO( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SkippedSelfAttention": CometLogger.print("Using SkippedSelfAttention") model = SkippedSelfAttention(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "WeightedSelfAttentionVO": CometLogger.print("Using WeightedSelfAttentionVO") model = WeightedSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SelfAttentionVO_GlobRelOutput": CometLogger.print("Using SelfAttentionVO_GlobRelOutput") model = SelfAttentionVO_GlobRelOutput( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "StackedSelfAttentionVO": CometLogger.print("Using StackedSelfAttentionVO") model = StackedSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "NoSelfAttentionVO": CometLogger.print("Using NoSelfAttentionVO") model = NoSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SnailSelfAttentionVO": CometLogger.print("Using SnailSelfAttentionVO") model = SnailSelfAttentionVO(param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "SnailVO": CometLogger.print("Using SnailSelfAttentionVO") model = SnailVO(param.img_h, param.img_w, 5) elif param.model == "GlobalRelativeSelfAttentionVO": CometLogger.print("Using GlobalRelativeSelfAttentionVO") model = GlobalRelativeSelfAttentionVO( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "GlobalRelativeTransformerVO": CometLogger.print("Using GlobalRelativeTransformerVO") model = GlobalRelativeTransformerVO( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "GlobalRelativeTransformerVO_globXAsKeyVal": CometLogger.print("Using GlobalRelativeTransformerVO_globXAsKeyVal") model = GlobalRelativeTransformerVO_globXAsKeyVal( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) elif param.model == "GlobalRelativeSelfAttentionVO_globXasKeyVal": CometLogger.print("Using GlobalRelativeSelfAttentionVO_globXasKeyVal") model = GlobalRelativeSelfAttentionVO_globXasKeyVal( param.img_h, param.img_w, rnn_hidden_size=param.rnn_hidden_size) else: CometLogger.print("{} was not implemented".format(param.model)) raise NotImplementedError() _map_pretrained_model_to_current_model(param.pretrained_model, model) if cuda_is_available(): CometLogger.print("Training with CUDA") model.cuda() else: CometLogger.print("CUDA not available. Training on the CPU.") return model
def load_optimizer_checkpoint(self) -> Any: device = "cpu" if cuda_is_available(): device = "cuda" return torch.load(self.destination_path + "_optimizer.checkpoint", map_location=torch.device(device))
project_name = "candidate-tests" if len(sys.argv) > 1: # get the config file from the arguments print("-> Loading the optimizer...") opt = CometOptimizer(config=sys.argv[1]) active_parallel_experiements = [] for experiment in opt.get_experiments(project_name=project_name, workspace="olibd"): print("-> Registering experiment {} with Comet...".format( experiment.get_key())) active_parallel_experiements.append(experiment) if len(active_parallel_experiements) == torch.cuda.device_count( ) or (not cuda_is_available() and len(active_parallel_experiements) == 1): launch_experiment(active_parallel_experiements, repo) active_parallel_experiements = [] # If the last batch of experiments had a lower experiment count # than the number of GPUs, then it hasn't run yet. So we need to # run them now. if len(active_parallel_experiements) > 0: launch_experiment(active_parallel_experiements, repo) else: experiment = Experiment(project_name=project_name, workspace="olibd") launch_experiment([experiment], repo)
def inference_thread(param: Parameters, trajectory: Trajectory): global inference_started, fps image_transformer = get_image_transformer(param) model = load_model(param) print("When stream is ready, press enter:") for line in sys.stdin: if '\n' == line: break device = "cpu" if cuda_is_available(): device = "cuda" video_input_reader = (ffmpeg.input(source).output( 'pipe:', format='rawvideo', pix_fmt='rgb24').run_async( cmd=["ffmpeg", "-hide_banner", "-loglevel", "error"], pipe_stdout=True)) i = 0 new_frames = param.sliding_window_size - param.sliding_window_overlap assert new_frames >= 1 previous_frames = None while True: time_start = time.time() if previous_frames is None or len( previous_frames) < param.sliding_window_size: video_bytes_buffer = video_input_reader.stdout.read(width * height * 3) else: video_bytes_buffer = video_input_reader.stdout.read( new_frames * width * height * 3) if not video_bytes_buffer: break if previous_frames is None or len( previous_frames) < param.sliding_window_size: frames = numpy.frombuffer(video_bytes_buffer, numpy.uint8).reshape( (1, height, width, 3)) else: frames = numpy.frombuffer(video_bytes_buffer, numpy.uint8).reshape( (new_frames, height, width, 3)) if previous_frames is None: previous_frames = frames else: previous_frames = numpy.concatenate((previous_frames, frames)) if len(previous_frames) < param.sliding_window_size: # Do not start inference yet, build frame count up to window size continue elif len(previous_frames) > param.sliding_window_size: # Keep frame count same size as window size previous_frames = previous_frames[new_frames:] assert len(previous_frames) == param.sliding_window_size frame_tensors = torch.Tensor(previous_frames).to(device).permute( 0, 3, 1, 2).div(255) frame_tensors = image_transformer(frame_tensors) if param.minus_point_5: frame_tensors = frame_tensors - 0.5 frame_tensors = torch.unsqueeze(frame_tensors, 0) prediction = torch.squeeze( model.forward(frame_tensors)).detach().cpu().numpy() trajectory.append(prediction) time_stop = time.time() total_time = time_stop - time_start fps = len(previous_frames) / total_time print(f"fps: {len(previous_frames) / total_time}") i = i + 1 inference_started = True