class CoILAgent(Agent): def __init__(self, checkpoint): #experiment_name='None', driver_conf=None, memory_fraction=0.18, #image_cut=[115, 510]): # use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]): Agent.__init__(self) # This should likely come from global #config_gpu = tf.ConfigProto() #config_gpu.gpu_options.visible_device_list = '0' #config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction #self._sess = tf.Session(config=config_gpu) # THIS DOES NOT WORK FOR FUSED PLUS LSTM #if self._config.number_frames_sequenced > self._config.number_frames_fused: # self._config_train.batch_size = self._config.number_frames_sequenced #else: # self._config_train.batch_size = self._config.number_frames_fused #self._train_manager = load_system(self._config_train) #self._config.train_segmentation = False self.model = CoILModel(g_conf.MODEL_NAME) self.model.load_state_dict(checkpoint['state_dict']) self.model.cuda() #self.model.load_network(checkpoint) #self._sess.run(tf.global_variables_initializer()) #self._control_function = getattr(machine_output_functions, # self._train_manager._config.control_mode) # More elegant way to merge with autopilot #self._agent = Autopilot(ConfigAutopilot(driver_conf.city_name)) #self._image_cut = driver_conf.image_cut #self._auto_pilot = driver_conf.use_planner #self._recording = False #self._start_time = 0 def run_step(self, measurements, sensor_data, directions, target): # pos = (rewards.player_x,rewards.player_y,22) # ori =(rewards.ori_x,rewards.ori_y,rewards.ori_z) # pos,point = self.planner.get_defined_point(pos,ori,(target[0],target[1],22),(1.0,0.02,-0.001),self._select_goal) # direction = convert_to_car_coord(point[0],point[1],pos[0],pos[1],ori[0],ori[1]) # image_filename_format = '_images/episode_{:0>3d}/{:s}/image_{:0>5d}.png' # sys.stdout = open(str("direction" + ".out", "a", buffering=1)) #control_agent = self._agent.run_step(measurements, None, target) print(" RUnning STEP ") speed = torch.cuda.FloatTensor( [measurements.player_measurements.forward_speed]).unsqueeze(0) print("Speed shape ", speed) directions_tensor = torch.cuda.LongTensor([2]) print("dir", directions_tensor) model_outputs = self.model.forward_branch( self._process_sensors(sensor_data), speed, directions_tensor) print(model_outputs) steer, throttle, brake = self._process_model_outputs( model_outputs[0], measurements.player_measurements.forward_speed) #control = self.compute_action(, # , # directions) control = carla_protocol.Control() control.steer = steer control.throttle = throttle control.brake = brake # if self._auto_pilot: # control.steer = control_agent.steer # TODO: adapt the client side agent for the new version. ( PROBLEM ) #control.throttle = control_agent.throttle #control.brake = control_agent.brake # TODO: maybe change to a more meaningfull message ?? return control def _process_sensors(self, sensors): iteration = 0 for name, size in g_conf.SENSORS.items(): sensor = sensors[name].data[ g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] # if sensors[name].type == 'SemanticSegmentation': # # # # TODO: the camera name has to be sincronized with what is in the experiment... # sensor = join_classes(sensor) # # sensor = sensor[:, :, np.newaxis] # # image_transform = transforms.Compose([transforms.ToTensor(), # transforms.Resize((size[1], size[2]), interpolation=Image.NEAREST), # iag.ToGPU(), iag.Multiply((1 / (number_of_seg_classes - 1)))]) # else: plt.figure(1) plt.subplot(1, 2, 1) plt.imshow(sensor) print("Sensor size:", sensor.shape) #300 x 800 x 3 sensor = np.transpose(sensor, (2, 0, 1)) # image = resize(image,[self._image_size2,self._image_size1]) print("begin transform Sensor size:", sensor.shape) #300 x 800 x 3 print("orginal sensor", sensor[0][0][:10]) image_transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((size[1], size[2])), transforms.ToTensor(), iag.ToGPU() ]) # print ("Sensor Olala") # sensor = np.transpose(sensor, (2, 0, 1)) # print (sensor.shape) # sensor = np.swapaxes(sensor, 0, 1) #800 x 300 x 3 # print ("Sensor Previous SHape") # print (sensor.shape) # sensor = np.transpose(sensor, (2, 1, 0)) #3 x 300 x 800 # print ("Sensor Previous SHape PT2") # print (sensor.shape) # if iteration == 0: image_input = image_transform(sensor) print("After transform", image_input.size()) # print("After making to numpy", img_np.shape) # image_input = np.transpose(img_np, (2, 0, 1)) # print("1st transform", image_input.shape) img_np = image_input.cpu().numpy() print("img np pix", img_np[0][0][:10]) img_np = np.uint8(np.transpose(img_np, (1, 2, 0))) print("2nd transform", img_np.shape) plt.subplot(1, 2, 2) plt.imshow(img_np) print("Before div by 255", image_input[0][0][:10]) image_input = image_input / 255.0 print("After div by 255", image_input[0][0][:10]) # else: # image_input = torch.cat((image_input, sensor), 0) # iteration += 1 # print("New shape", image_input.size()) # img_np = image_input.cpu() # img_np = img_np.numpy()*255 # print("Newew shape", img_np.shape) # img_np = np.uint8(np.transpose(img_np, (1,2,0))) # # img_np = np.uint8((image_input[0].cpu()).numpy() * 255) # # print (img_np.shape) # plt.subplot(1, 3, 3) # plt.imshow(img_np) plt.show() print(image_input.shape) image_input = image_input.unsqueeze(0) print(image_input.shape) return image_input def _process_model_outputs(self, outputs, speed): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] # if brake < 0.2: # brake = 0.0 # # if throttle > brake: # brake = 0.0 # else: # throttle = throttle * 2 # if speed > 35.0 and brake == 0.0: # throttle = 0.0 return steer, throttle, brake """ def compute_action(self, sensors, speed, direction): capture_time = time.time() sensor_pack = [] for i in range(len(sensors)): sensor = sensors[i] sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :] if g_conf.param.SENSORS.keys()[i] == 'rgb': sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0], self._config.sensors_size[i][1]]) elif g_conf.param.SENSORS.keys()[i] == 'labels': sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0], self._config.sensors_size[i][1]], interp='nearest') sensor = join_classes(sensor) * int(255 / (number_of_seg_classes - 1)) sensor = sensor[:, :, np.newaxis] sensor_pack.append(sensor) if len(sensor_pack) > 1: image_input = np.concatenate((sensor_pack[0], sensor_pack[1]), axis=2) else: image_input = sensor_pack[0] image_input = image_input.astype(np.float32) image_input = np.multiply(image_input, 1.0 / 255.0) image_input = sensors[0] image_input = image_input.astype(np.float32) image_input = np.multiply(image_input, 1.0 / 255.0) # TODO: This will of course depend on the model , if it is based on sequences there are # TODO: different requirements #tensor = self.model(image_input) outputs = self.model.forward_branch(image_input, speed, direction) return control # ,machine_output_functions.get_intermediate_rep(image_input,speed,self._config,self._sess,self._train_manager) """ """
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output): latest = None try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) # The validation dataset is always fully loaded, so we fix a very high number of hours g_conf.NUMBER_OF_HOURS = 10000 set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) # Define the dataset. full_dataset = [ os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) ] augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(full_dataset, transform=augmenter, preload_names=[dataset_name]) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) # Create model. model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) # The window used to keep track of the validation loss l1_window = [] # If we have evaluated a checkpoint, get the validation losses of all the previously # evaluated checkpoints (validation loss is used for early stopping) latest = get_latest_evaluated_checkpoint() if latest is not None: # When latest is noe l1_window = coil_logger.recover_loss_window(dataset_name, None) model.cuda() best_mse = 1000 best_error = 1000 best_mse_iter = 0 best_error_iter = 0 # Loop to validate all checkpoints as they are saved during training while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): with torch.no_grad(): # Get and load latest checkpoint latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(latest) + '.pth')) checkpoint_iteration = checkpoint['iteration'] print("Validation loaded ", checkpoint_iteration) model.load_state_dict(checkpoint['state_dict']) model.eval() accumulated_mse = 0 accumulated_error = 0 iteration_on_checkpoint = 0 if g_conf.USE_REPRESENTATION_LOSS: accumulated_perception_rep_mse = 0 accumulated_speed_rep_mse = 0 accumulated_intentions_rep_mse = 0 accumulated_rep_mse = 0 accumulated_perception_rep_error = 0 accumulated_speed_rep_error = 0 accumulated_intentions_rep_error = 0 accumulated_rep_error = 0 # Validation loop for data in data_loader: # Compute the forward pass on a batch from the validation dataset controls = data['directions'] # Run model forward and get outputs # First case corresponds to squeeze network, second case corresponds to driving model without # mimicking losses, last case corresponds to mimic network if "seg" in g_conf.SENSORS.keys(): output = model.forward_branch( data, dataset.extract_inputs(data).cuda(), controls, dataset.extract_intentions(data).cuda()) elif not g_conf.USE_REPRESENTATION_LOSS: output = model.forward_branch( data, dataset.extract_inputs(data).cuda(), controls) else: output, intermediate_reps = model.forward_branch( data, dataset.extract_inputs(data).cuda(), controls) write_regular_output(checkpoint_iteration, output) # Compute control loss on current validation batch and accumulate it targets_to_use = dataset.extract_targets(data) mse = torch.mean( (output - targets_to_use.cuda())**2).data.tolist() mean_error = torch.mean( torch.abs(output - targets_to_use.cuda())).data.tolist() accumulated_error += mean_error accumulated_mse += mse error = torch.abs(output - targets_to_use.cuda()) # Compute mimicking losses on current validation batch and accumulate it if g_conf.USE_REPRESENTATION_LOSS: expert_reps = dataset.extract_representations(data) # First L1 losses (seg mask, speed, intention mimicking losses) if g_conf.USE_PERCEPTION_REP_LOSS: perception_rep_loss = torch.sum( torch.abs(intermediate_reps[0] - expert_reps[0].cuda()) ).data.tolist() / (3 * output.shape[0]) else: perception_rep_loss = 0 if g_conf.USE_SPEED_REP_LOSS: speed_rep_loss = torch.sum( torch.abs(intermediate_reps[1] - expert_reps[1].cuda()) ).data.tolist() / (3 * output.shape[0]) else: speed_rep_loss = 0 if g_conf.USE_INTENTION_REP_LOSS: intentions_rep_loss = torch.sum( torch.abs(intermediate_reps[2] - expert_reps[2].cuda()) ).data.tolist() / (3 * output.shape[0]) else: intentions_rep_loss = 0 rep_error = g_conf.REP_LOSS_WEIGHT * ( perception_rep_loss + speed_rep_loss + intentions_rep_loss) accumulated_perception_rep_error += perception_rep_loss accumulated_speed_rep_error += speed_rep_loss accumulated_intentions_rep_error += intentions_rep_loss accumulated_rep_error += rep_error # L2 losses now if g_conf.USE_PERCEPTION_REP_LOSS: perception_rep_loss = torch.sum( (intermediate_reps[0] - expert_reps[0].cuda())** 2).data.tolist() / (3 * output.shape[0]) else: perception_rep_loss = 0 if g_conf.USE_SPEED_REP_LOSS: speed_rep_loss = torch.sum( (intermediate_reps[1] - expert_reps[1].cuda())** 2).data.tolist() / (3 * output.shape[0]) else: speed_rep_loss = 0 if g_conf.USE_INTENTION_REP_LOSS: intentions_rep_loss = torch.sum( (intermediate_reps[2] - expert_reps[2].cuda())** 2).data.tolist() / (3 * output.shape[0]) else: intentions_rep_loss = 0 rep_mse = g_conf.REP_LOSS_WEIGHT * ( perception_rep_loss + speed_rep_loss + intentions_rep_loss) accumulated_perception_rep_mse += perception_rep_loss accumulated_speed_rep_mse += speed_rep_loss accumulated_intentions_rep_mse += intentions_rep_loss accumulated_rep_mse += rep_mse # Log a random position position = random.randint( 0, len(output.data.tolist()) - 1) # Logging if g_conf.USE_REPRESENTATION_LOSS: total_mse = mse + rep_mse total_error = mean_error + rep_error coil_logger.add_message( 'Iterating', { 'Checkpoint': latest, 'Iteration': (str(iteration_on_checkpoint * 120) + '/' + str(len(dataset))), 'MeanError': mean_error, 'MSE': mse, 'RepMeanError': rep_error, 'RepMSE': rep_mse, 'MeanTotalError': total_error, 'TotalMSE': total_mse, 'Output': output[position].data.tolist(), 'GroundTruth': targets_to_use[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs( data)[position].data.tolist() }, latest) else: coil_logger.add_message( 'Iterating', { 'Checkpoint': latest, 'Iteration': (str(iteration_on_checkpoint * 120) + '/' + str(len(dataset))), 'MeanError': mean_error, 'MSE': mse, 'Output': output[position].data.tolist(), 'GroundTruth': targets_to_use[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs( data)[position].data.tolist() }, latest) iteration_on_checkpoint += 1 if g_conf.USE_REPRESENTATION_LOSS: print("Iteration %d on Checkpoint %d : Error %f" % (iteration_on_checkpoint, checkpoint_iteration, total_error)) else: print("Iteration %d on Checkpoint %d : Error %f" % (iteration_on_checkpoint, checkpoint_iteration, mean_error)) """ ######## Finish a round of validation, write results, wait for the next ######## """ # Compute average L1 and L2 losses over whole round of validation and log them checkpoint_average_mse = accumulated_mse / ( len(data_loader)) checkpoint_average_error = accumulated_error / ( len(data_loader)) coil_logger.add_scalar('L2 Loss', checkpoint_average_mse, latest, True) coil_logger.add_scalar('Loss', checkpoint_average_error, latest, True) if g_conf.USE_REPRESENTATION_LOSS: checkpoint_average_perception_rep_mse = accumulated_perception_rep_mse / ( len(data_loader)) checkpoint_average_speed_rep_mse = accumulated_speed_rep_mse / ( len(data_loader)) checkpoint_average_intentions_rep_mse = accumulated_intentions_rep_mse / ( len(data_loader)) checkpoint_average_rep_mse = accumulated_rep_mse / ( len(data_loader)) checkpoint_average_total_mse = checkpoint_average_mse + checkpoint_average_rep_mse checkpoint_average_perception_rep_error = accumulated_perception_rep_error / ( len(data_loader)) checkpoint_average_speed_rep_error = accumulated_speed_rep_error / ( len(data_loader)) checkpoint_average_intentions_rep_error = accumulated_intentions_rep_error / ( len(data_loader)) checkpoint_average_rep_error = accumulated_rep_error / ( len(data_loader)) checkpoint_average_total_error = checkpoint_average_error + checkpoint_average_rep_mse # Log L1/L2 loss terms coil_logger.add_scalar( 'Perception Rep Loss', checkpoint_average_perception_rep_mse, latest, True) coil_logger.add_scalar( 'Speed Rep Loss', checkpoint_average_speed_rep_mse, latest, True) coil_logger.add_scalar( 'Intentions Rep Loss', checkpoint_average_intentions_rep_mse, latest, True) coil_logger.add_scalar('Overall Rep Loss', checkpoint_average_rep_mse, latest, True) coil_logger.add_scalar('Total L2 Loss', checkpoint_average_total_mse, latest, True) coil_logger.add_scalar( 'Perception Rep Error', checkpoint_average_perception_rep_error, latest, True) coil_logger.add_scalar( 'Speed Rep Error', checkpoint_average_speed_rep_error, latest, True) coil_logger.add_scalar( 'Intentions Rep Error', checkpoint_average_intentions_rep_error, latest, True) coil_logger.add_scalar('Total Rep Error', checkpoint_average_rep_error, latest, True) coil_logger.add_scalar('Total Loss', checkpoint_average_total_error, latest, True) else: checkpoint_average_total_mse = checkpoint_average_mse checkpoint_average_total_error = checkpoint_average_error if checkpoint_average_total_mse < best_mse: best_mse = checkpoint_average_total_mse best_mse_iter = latest if checkpoint_average_total_error < best_error: best_error = checkpoint_average_total_error best_error_iter = latest # Print for logging / to terminal validation results if g_conf.USE_REPRESENTATION_LOSS: coil_logger.add_message( 'Iterating', { 'Summary': { 'Control Error': checkpoint_average_error, 'Control Loss': checkpoint_average_mse, 'Rep Error': checkpoint_average_rep_error, 'Rep Loss': checkpoint_average_rep_mse, 'Error': checkpoint_average_total_error, 'Loss': checkpoint_average_total_mse, 'BestError': best_error, 'BestMSE': best_mse, 'BestMSECheckpoint': best_mse_iter, 'BestErrorCheckpoint': best_error_iter }, 'Checkpoint': latest }, latest) else: coil_logger.add_message( 'Iterating', { 'Summary': { 'Error': checkpoint_average_error, 'Loss': checkpoint_average_mse, 'BestError': best_error, 'BestMSE': best_mse, 'BestMSECheckpoint': best_mse_iter, 'BestErrorCheckpoint': best_error_iter }, 'Checkpoint': latest }, latest) # Save validation loss history (validation loss is used for early stopping) l1_window.append(checkpoint_average_total_error) coil_logger.write_on_error_csv( dataset_name, checkpoint_average_total_error) # Early stopping if g_conf.FINISH_ON_VALIDATION_STALE is not None: if dlib.count_steps_without_decrease(l1_window) > 3 and \ dlib.count_steps_without_decrease_robust(l1_window) > 3: coil_logger.write_stop(dataset_name, latest) break else: latest = get_latest_evaluated_checkpoint() time.sleep(1) coil_logger.add_message('Loading', {'Message': 'Waiting Checkpoint'}) print("Waiting for the next Validation") coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest) except RuntimeError as e: if latest is not None: coil_logger.erase_csv(latest) coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest)
class CoILAgent(Agent): def __init__(self, checkpoint, architecture_name): #experiment_name='None', driver_conf=None, memory_fraction=0.18, #image_cut=[115, 510]): # use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]): Agent.__init__(self) # This should likely come from global #config_gpu = tf.ConfigProto() #config_gpu.gpu_options.visible_device_list = '0' #config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction #self._sess = tf.Session(config=config_gpu) # THIS DOES NOT WORK FOR FUSED PLUS LSTM #if self._config.number_frames_sequenced > self._config.number_frames_fused: # self._config_train.batch_size = self._config.number_frames_sequenced #else: # self._config_train.batch_size = self._config.number_frames_fused #self._train_manager = load_system(self._config_train) #self._config.train_segmentation = False self.architecture_name = architecture_name if architecture_name == 'coil_unit': self.model_task, self.model_gen = CoILModel('coil_unit') self.model_task, self.model_gen = self.model_task.cuda( ), self.model_gen.cuda() elif architecture_name == 'unit_task_only': self.model_task, self.model_gen = CoILModel('unit_task_only') self.model_task, self.model_gen = self.model_task.cuda( ), self.model_gen.cuda() else: self.model = CoILModel(architecture_name) self.model.cuda() if architecture_name == 'wgangp_lsd': # print(ckpt, checkpoint['best_loss_iter_F']) self.model.load_state_dict(checkpoint['stateF_dict']) self.model.eval() elif architecture_name == 'coil_unit': self.model_task.load_state_dict(checkpoint['task']) self.model_gen.load_state_dict(checkpoint['b']) self.model_task.eval() self.model_gen.eval() elif architecture_name == 'coil_icra': self.model.load_state_dict(checkpoint['state_dict']) self.model.eval() elif architecture_name == 'unit_task_only': self.model_task.load_state_dict(checkpoint['task_state_dict']) self.model_gen.load_state_dict(checkpoint['enc_state_dict']) self.model_task.eval() self.model_gen.eval() #self.model.load_network(checkpoint) #self._sess.run(tf.global_variables_initializer()) #self._control_function = getattr(machine_output_functions, # self._train_manager._config.control_mode) # More elegant way to merge with autopilot #self._agent = Autopilot(ConfigAutopilot(driver_conf.city_name)) #self._image_cut = driver_conf.image_cut #self._auto_pilot = driver_conf.use_planner #self._recording = False #self._start_time = 0 def run_step(self, measurements, sensor_data, directions, target): #control_agent = self._agent.run_step(measurements, None, target) print(" RUnning STEP ") speed = torch.cuda.FloatTensor( [measurements.player_measurements.forward_speed]).unsqueeze(0) print("Speed is", speed) print("Speed shape ", speed) directions_tensor = torch.cuda.LongTensor([directions]) # model_outputs = self.model.forward_branch(self._process_sensors(sensor_data), speed, # directions_tensor) if self.architecture_name == 'wgangp_lsd': embed, model_outputs = self.model( self._process_sensors(sensor_data), speed) elif self.architecture_name == 'coil_unit': embed, n_b = self.model_gen.encode( self._process_sensors(sensor_data)) model_outputs = self.model_task(embed, speed) elif self.architecture_name == 'unit_task_only': embed, n_b = self.model_gen.encode( self._process_sensors(sensor_data)) model_outputs = self.model_task(embed, speed) elif self.architecture_name == 'coil_icra': model_outputs = self.model.forward_branch( self._process_sensors(sensor_data), speed, directions_tensor) print(model_outputs) if self.architecture_name == 'coil_icra': steer, throttle, brake = self._process_model_outputs( model_outputs[0], measurements.player_measurements.forward_speed) else: steer, throttle, brake = self._process_model_outputs( model_outputs[0][0], measurements.player_measurements.forward_speed) control = carla_protocol.Control() control.steer = steer control.throttle = throttle control.brake = brake # if self._auto_pilot: # control.steer = control_agent.steer # TODO: adapt the client side agent for the new version. ( PROBLEM ) #control.throttle = control_agent.throttle #control.brake = control_agent.brake # TODO: maybe change to a more meaningfull message ?? return control def _process_sensors(self, sensors): iteration = 0 for name, size in g_conf.SENSORS.items(): sensor = sensors[name].data[140:260, ...] #300*800*3 image_input = transform.resize(sensor, (128, 128)) # transforms.Normalize([ 0.5315, 0.5521, 0.5205], [ 0.1960, 0.1810, 0.2217]) image_input = np.transpose(image_input, (2, 0, 1)) image_input = torch.from_numpy(image_input).type( torch.FloatTensor).cuda() image_input = image_input #normalization print("torch size", image_input.size()) img_np = np.uint8( np.transpose(image_input.cpu().numpy() * 255, (1, 2, 0))) # plt.figure(1) # plt.subplot(1, 2, 1) # plt.imshow(sensor) # # plt.subplot(1,2,2) # plt.imshow(img_np) # # # plt.show() iteration += 1 # print (image_input.shape) image_input = image_input.unsqueeze(0) print(image_input.shape) return image_input def _process_model_outputs(self, outputs, speed): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ print("OUTPUTS", outputs) steer, throttle, brake = outputs[0], outputs[1], outputs[2] # if steer > 0.5: # throttle *= (1 - steer + 0.3) # steer += 0.3 # if steer > 1: # steer = 1 # if steer < -0.5: # throttle *= (1 + steer + 0.3) # steer -= 0.3 # if steer < -1: # steer = -1 # if brake < 0.2: # brake = 0.0 # if throttle > brake: brake = 0.0 # else: # throttle = throttle * 2 # if speed > 35.0 and brake == 0.0: # throttle = 0.0 return steer, throttle, brake
class MPSCAgent(AutonomousAgent): def setup(self, path_to_config_file): yaml_conf, checkpoint_number = checkpoint_parse_configuration_file(path_to_config_file) # Take the checkpoint name and load it checkpoint = torch.load(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2] , 'checkpoints', str(checkpoint_number) + '.pth')) # merge the specific agent config with global config _g_conf merge_with_yaml(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), yaml_conf)) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. # TODO: retrain the model with MPSC self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 # check map waypoint format => carla_data_provider & http://carla.org/2018/11/16/release-0.9.1/ # e.g. from map.get_waypoint Waypoint(Transform(Location(x=338.763, y=226.453, z=0), Rotation(pitch=360, yaw=270.035, roll=0))) self.track = Track.ALL_SENSORS_HDMAP_WAYPOINTS # specify available track info, see autonomous_agent.py def sensors(self): # currently give the full suite of available sensors # check the config/installation of the sensors => https://carla.readthedocs.io/en/latest/cameras_and_sensors/ sensors = [{'type': 'sensor.camera.rgb', 'x': 0.7, 'y': 0.0, 'z': 1.60, 'roll':0.0, 'pitch':0.0, 'yaw': 0.0, 'width': 800, 'height': 600, 'fov':100, 'id': 'Center'}, {'type': 'sensor.camera.rgb', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, 'yaw': -45.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'Left'}, {'type': 'sensor.camera.rgb', 'x': 0.7, 'y': 0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, 'yaw': 45.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'Right'}, {'type': 'sensor.lidar.ray_cast', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, 'yaw': -45.0, 'id': 'LIDAR'}, {'type': 'sensor.other.gnss', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'id': 'GPS'}, {'type': 'sensor.can_bus', 'reading_frequency': 25, 'id': 'can_bus'}, {'type': 'sensor.hd_map', 'reading_frequency': 1, 'id': 'hdmap'}, ] return sensors def run_step(self, input_data, timestamp): # the core method # TODO # 1. request current localization # input_data is obtained from sensors. => autonomous_agent.py def __call__(self) for key, value in input_data.items(): print("input_data ", key, value) # # ======[Agent] Wallclock_time = 2019-07-08 14:26:54.522155 / Sim_time = 1.4500000216066837 # input_data key GPS (3755, array([49.00202793, 8.00463308, 1.58916414])) # input_data key can_bus (43, {'moi': 1.0, 'center_of_mass': {'x': 60.0, 'y': 0.0, 'z': -60.0}, 'linear_velocity': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0e2348>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e2450>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e2608>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0e22f0>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e2870>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e26b8>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0e2500>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e2818>, # <carla.libcarla.Vector3D object at 0x7fb4fa0ddfa8>]], dtype=object), 'speed': -1.6444947256841175e-06, 'lateral_speed': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0e4ad8>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e49d0>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e23a0>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0e48c8>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e4ce8>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e23f8>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0e4d40>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e4c90>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e28c8>]], dtype=object), 'transform': <carla.libcarla.Transform object at 0x7fb4fa0de3f0>, 'damping_rate_zero_throttle_clutch_disengaged': 0.3499999940395355, 'max_rpm': 6000.0, 'clutch_strength': 10.0, 'drag_coefficient': 0.30000001192092896, 'linear_acceleration': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0dd0e0>, # <carla.libcarla.Vector3D object at 0x7fb4fa0ddf50>, # <carla.libcarla.Vector3D object at 0x7fb4fa0d58c8>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0dd088>, # <carla.libcarla.Vector3D object at 0x7fb4fa0dd138>, # <carla.libcarla.Vector3D object at 0x7fb4fa0d5088>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0dd1e8>, # <carla.libcarla.Vector3D object at 0x7fb4fa0f6d98>, # <carla.libcarla.Vector3D object at 0x7fb4fa0d5920>]], dtype=object), 'damping_rate_full_throttle': 0.15000000596046448, 'use_gear_autobox': True, 'torque_curve': [{'x': 0.0, 'y': 400.0}, {'x': 1890.7607421875, 'y': 500.0}, {'x': 5729.57763671875, 'y': 400.0}], 'dimensions': {'width': 0.9279687404632568, 'height': 0.6399999856948853, 'length': 2.4543750286102295}, 'steering_curve': [{'x': 0.0, 'y': 1.0}, {'x': 20.0, 'y': 0.8999999761581421}, {'x': 60.0, 'y': 0.800000011920929}, {'x': 120.0, 'y': 0.699999988079071}], 'mass': 1850.0, 'wheels': [{'tire_friction': 3.5, 'steer_angle': 70.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 70.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 0.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 0.0, 'damping_rate': 0.25, 'disable_steering': False}]}) # input_data key rgb (3753, array([[[135, 118, 110, 255], # [135, 118, 110, 255], # [136, 119, 110, 255], # ..., # [[114, 108, 105, 255], # [110, 105, 102, 255], # [112, 106, 104, 255], # ..., # [118, 112, 109, 255], # [118, 112, 109, 255], # [121, 115, 113, 255]]], dtype=uint8)) # Direction RoadOption.LANEFOLLOW # ego_trans # Transform(Location(x=338.763, y=226.453, z=-0.0109183), Rotation(pitch=0.000136604, yaw=-89.9654, roll=-0.000274658)) # 1.9995784804148584/0.0 localization = input_data['GPS'] directions = self._get_current_direction(input_data['GPS'][1]) logging.debug("Directions {}".format(directions)) # 2. get recommended action from the NN controller (copy from CoILBaseline) # Take the forward speed and normalize it for it to go from 0-1 norm_speed = input_data['can_bus'][1]['speed'] / g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # End-to-end part, feed in images from rgb sensor, then parse network output as controller # Compute the forward pass processing the sensors got from CARLA. model_outputs = self._model.forward_branch(self._process_sensors(input_data['rgb'][1]), norm_speed, directions_tensor) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) # 3. use inner-loop to simulate/approximate vehicle model # save the NN output as vehicle control sim_control = carla.VehicleControl() sim_control.steer = float(steer) sim_control.throttle = float(throttle) sim_control.brake = float(brake) logging.debug("inner loop for sim_control", sim_control) # TODO # copy a "parallel world" and create a "virtual agent" that has the same state with ego_vehicle sim_world = self.world # TODO: check how to copy the world, roads info are necessary, the rest optional sim_ego = sim_world.create_ego_vehicle(current_ego_states) sim_world.agent_instance = getattr(sim_world.module_agent, sim_world.module_agent.__name__)(args.config) correct_sensors, error_message = sim_world.valid_sensors_configuration(sim_world.sim_agent, sim_world.track) # pass the sim_control to virtual agent and run T timesteps sim_ego.apply_control(sim_control) # use current model to predict the following state-action series MPSC_controls = [] # TODO: check where u should init it for i in range(T): sim_ego.run_step() # TODO def run_step, update for sim_ego sim_ego.update() # 4. use MPSC to check safety at each future timestep safe = MPSC.check_safety(sim_ego.state, safety_boundary) if not safe: # if not safe, obtain MPSC control output logging.debug("use MPSC controller") control = MPSC_control MPSC_controls.append(MPSC_control) # collect all "safe" o/p # 7. execute MPSC control and add it to new dataset break else: if i < T-1: continue else: # final step # if safe within all T timesteps, proceed to use NN control output logging.debug("use NN controller") control = sim_control # 8. retrain the network and/or do policy aggregation if len(MPSC_controls): self.model.train(self.model, MPSC_controls) logging.debug("Control output ", control) # There is the posibility to replace some of the predictions with oracle predictions. self.first_iter = False return control
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output=True, yaml_file=None): latest = None # try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias+'.yaml') if yaml_file is not None: path_to_yaml_file = os.path.join(yaml_file, exp_alias+'.yaml') merge_with_yaml(path_to_yaml_file) # The validation dataset is always fully loaded, so we fix a very high number of hours # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code """ # commented out to simplify the code set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join('_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) """ # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name print ('full dataset path: ', full_dataset) dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=dataset_name) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader(dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) """ removing this segment to simplify code # The window used to keep track of the trainings l1_window = [] latest = get_latest_evaluated_checkpoint() if latest is not None: # When latest is noe l1_window = coil_logger.recover_loss_window(dataset_name, None) """ model.cuda() best_mse = 1000 best_error = 1000 best_mse_iter = 0 best_error_iter = 0 # modified validation code from here to run a single model # checkpoint = torch.load(os.path.join(g_conf.VALIDATION_CHECKPOINT_PATH # , 'checkpoints', g_conf.VALIDATION_CHECKPOINT_ITERATION + '.pth')) checkpoint = torch.load(args.checkpoint) checkpoint_iteration = checkpoint['iteration'] print("model loaded ", checkpoint_iteration) model.load_state_dict(checkpoint['state_dict']) model.eval() accumulated_mse = 0 accumulated_error = 0 iteration_on_checkpoint = 0 # considering steer, throttle & brake so 3x3 matrix normalized_covariate_shift = torch.zeros(3,3) print ('data_loader size: ', len(data_loader)) total_output = [] path_names = [] for data in data_loader: # Compute the forward pass on a batch from the validation dataset path_names += data[1] controls = data[0]['directions'] # get prefinal branch activations, only the last layers have dropout output = model.get_prefinal_layer(torch.squeeze(data[0]['rgb']).cuda(), dataset.extract_inputs(data[0]).cuda(), controls) total_output += output.detach().cpu().tolist() iteration_on_checkpoint += 1 if iteration_on_checkpoint % 50 == 0: print ('iter: ', iteration_on_checkpoint) print (len(total_output), len(path_names)) i = 0 st = time.time() for act, name in zip(total_output, path_names): episode_num = name.split('/')[-2] frame_num = name.split('/')[-1].split('_')[-1].split('.')[0] if not os.path.isdir(os.path.join(args.save_path, args.dataset_name, episode_num)): os.mkdir(os.path.join(args.save_path, args.dataset_name, episode_num)) file_name = 'Activation_'+frame_num i += 1 if i%1000 == 0: print ('iteration: ', i) # save activations for each image, to be used for computing the uncertainity later torch.save(act, os.path.join(args.save_path, args.dataset_name, episode_num, file_name)) print ('time taken: ', time.time()-st)
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12): """ The main training function. This functions loads the latest checkpoint for a given, exp_batch (folder) and exp_alias (experiment configuration). With this checkpoint it starts from the beginning or continue some training. Args: gpu: The GPU number exp_batch: the folder with the experiments exp_alias: the alias, experiment name suppress_output: if the output are going to be saved on a file number_of_workers: the number of threads used for data loading Returns: None """ try: # We set the visible cuda devices to select the GPU os.environ["CUDA_VISIBLE_DEVICES"] = gpu g_conf.VARIABLE_WEIGHT = {} # At this point the log file with the correct naming is created. # You merge the yaml file with the global configuration structure. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') # Set the process into loading status. coil_logger.add_message('Loading', {'GPU': gpu}) # Put the output to a separate file if it is the case if suppress_output: if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join('_output_logs', exp_alias + '_err_'+g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) # Preload option if g_conf.PRELOAD_MODEL_ALIAS is not None: checkpoint = torch.load(os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH, g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints', str(g_conf.PRELOAD_MODEL_CHECKPOINT)+'.pth')) # Get the latest checkpoint to be loaded # returns none if there are no checkpoints saved for this model checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file is not None: checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] else: iteration = 0 best_loss = 10000.0 best_loss_iter = 0 # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) # By instantiating the augmenter we get a callable that augment images and transform them # into tensors. augmenter = Augmenter(g_conf.AUGMENTATION) # Instantiate the class used to read a dataset. The coil dataset generator # can be found dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=str(g_conf.NUMBER_OF_HOURS) + 'hours_' + g_conf.TRAIN_DATASET_NAME) print ("Loaded dataset") data_loader = select_balancing_strategy(dataset, iteration, number_of_workers) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model.cuda() optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE) if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) accumulated_time = checkpoint['total_time'] loss_window = coil_logger.recover_loss_window('train', iteration) else: # We accumulate iteration time and keep the average speed accumulated_time = 0 loss_window = [] print ("Before the loss") criterion = Loss(g_conf.LOSS_FUNCTION) # Loss time series window for data in data_loader: # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times, # add a stop on the _logs folder that is going to be read by this process if g_conf.FINISH_ON_VALIDATION_STALE is not None and \ check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE): break """ #################################### Main optimization loop #################################### """ iteration += 1 if iteration % 1000 == 0: adjust_learning_rate_auto(optimizer, loss_window) # get the control commands from float_data, size = [120,1] capture_time = time.time() controls = data['directions'] # The output(branches) is a list of 5 branches results, each branch is with size [120,3] model.zero_grad() branches = model(torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda()) loss_function_params = { 'branches': branches, 'targets': dataset.extract_targets(data).cuda(), 'controls': controls.cuda(), 'inputs': dataset.extract_inputs(data).cuda(), 'branch_weights': g_conf.BRANCH_LOSS_WEIGHT, 'variable_weights': g_conf.VARIABLE_WEIGHT } loss, _ = criterion(loss_function_params) loss.backward() optimizer.step() """ #################################### Saving the model if necessary #################################### """ if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save(state, os.path.join('_logs', exp_batch, exp_alias , 'checkpoints', str(iteration) + '.pth')) """ ################################################ Adding tensorboard logs. Making calculations for logging purposes. These logs are monitored by the printer module. ################################################# """ coil_logger.add_scalar('Loss', loss.data, iteration) coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration) if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration # Log a random position position = random.randint(0, len(data) - 1) output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(data).cuda()) accumulated_time += time.time() - capture_time coil_logger.add_message('Iterating', {'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets(data)[ position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data)[ position].data.tolist()}, iteration) loss_window.append(loss.data.tolist()) coil_logger.write_on_error_csv('train', loss.data) print("Iteration: %d Loss: %f" % (iteration, loss.data)) coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except RuntimeError as e: coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
def execute(gpu, exp_batch='nocrash', exp_alias='resnet34imnet10S1', suppress_output=True, yaml_file=None): latest = None # try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias + '.yaml') if yaml_file is not None: path_to_yaml_file = os.path.join(yaml_file, exp_alias + '.yaml') merge_with_yaml(path_to_yaml_file) # The validation dataset is always fully loaded, so we fix a very high number of hours # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code """ # commenting this segment to simplify code, uncomment if necessary set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join('_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) """ # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join( os.environ["COIL_DATASET_PATH"], g_conf.DART_COVMAT_DATA ) # dataset used for computing dart covariance matrix augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name print('full dataset path: ', full_dataset) dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=g_conf.DART_COVMAT_DATA ) # specify DART_COVMAT_DATA in the config file # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) """ removing this segment to simplify code # The window used to keep track of the trainings l1_window = [] latest = get_latest_evaluated_checkpoint() if latest is not None: # When latest is noe l1_window = coil_logger.recover_loss_window(g_conf.DART_COVMAT_DATA, None) """ model.cuda() best_mse = 1000 best_error = 1000 best_mse_iter = 0 best_error_iter = 0 # modified validation code from here to run a single model checkpoint # used for computing the covariance matrix with the DART model checkpoint checkpoint = torch.load( g_conf.DART_MODEL_CHECKPOINT ) # specify DART_MODEL_CHECKPOINT in the config file checkpoint_iteration = checkpoint['iteration'] print("Validation loaded ", checkpoint_iteration) model.load_state_dict(checkpoint['state_dict']) model.eval() accumulated_mse = 0 accumulated_error = 0 iteration_on_checkpoint = 0 # considering steer, throttle & brake so 3x3 matrix normalized_covariate_shift = torch.zeros(3, 3) print('data_loader size: ', len(data_loader)) for data in data_loader: # Compute the forward pass on a batch from the validation dataset controls = data['directions'] output = model.forward_branch( torch.squeeze(data['rgb']).cuda(), dataset.extract_inputs(data).cuda(), controls) """ removing this segment to simplify code # It could be either waypoints or direct control if 'waypoint1_angle' in g_conf.TARGETS: write_waypoints_output(checkpoint_iteration, output) else: write_regular_output(checkpoint_iteration, output) """ mse = torch.mean( (output - dataset.extract_targets(data).cuda())**2).data.tolist() mean_error = torch.mean( torch.abs(output - dataset.extract_targets(data).cuda())).data.tolist() accumulated_error += mean_error accumulated_mse += mse error = torch.abs(output - dataset.extract_targets(data).cuda()).data.cpu() ### covariate shift segment starts error = error.unsqueeze(dim=2) error_transpose = torch.transpose(error, 1, 2) # compute covariate shift covariate_shift = torch.matmul(error, error_transpose) # expand traj length tensor to Bx3x3 (considering steer, throttle & brake) traj_lengths = torch.stack([ torch.stack([data['current_traj_length'].squeeze(dim=1)] * 3, dim=1) ] * 3, dim=2) covariate_shift = covariate_shift / traj_lengths covariate_shift = torch.sum(covariate_shift, dim=0) # print ('current covariate shift: ', covariate_shift.shape) normalized_covariate_shift += covariate_shift ### covariate shift segment ends total_episodes = data['episode_count'][-1].data iteration_on_checkpoint += 1 if iteration_on_checkpoint % 50 == 0: print('iteration: ', iteration_on_checkpoint) print('total episodes: ', total_episodes) normalized_covariate_shift = normalized_covariate_shift / total_episodes print('normalized covariate shift: ', normalized_covariate_shift.shape, normalized_covariate_shift) # save the matrix to restart directly from the mat file # np.save(os.path.join(g_conf.COVARIANCE_MATRIX_PATH, 'covariance_matrix_%s.npy'%g_conf.DART_COVMATH_DATA), normalized_covariate_shift) return normalized_covariate_shift.numpy() '''
class CoILBaseline(AutonomousAgent): def setup(self, path_to_config_file): yaml_conf, checkpoint_number = checkpoint_parse_configuration_file( path_to_config_file) # Take the checkpoint name and load it checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2], 'checkpoints', str(checkpoint_number) + '.pth')) # do the merge here merge_with_yaml( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), yaml_conf)) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 def sensors(self): sensors = [{ 'type': 'sensor.camera.rgb', 'x': 2.0, 'y': 0.0, 'z': 1.40, 'roll': 0.0, 'pitch': 0.0, 'yaw': 0.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'rgb' }, { 'type': 'sensor.can_bus', 'reading_frequency': 25, 'id': 'can_bus' }, { 'type': 'sensor.other.gnss', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'id': 'GPS' }] return sensors def run_step(self, input_data): # measurements, sensor_data, directions, target #print("=====================>") #or key, val in input_data.items(): # shape = val[1].shape # #print("[{} -- {:06d}] with shape {}".format(key, val[0], shape)) #print("<=====================") #print ("speed: ", input_data['speed']) #print ("gps: ", input_data['GPS']) directions = self._get_current_direction(input_data['GPS'][1]) #print ("Directions : ", directions) # Take the forward speed and normalize it for it to go from 0-1 norm_speed = input_data['can_bus'][1]['speed'] / g_conf.SPEED_FACTOR # norm_speed = 0.2 norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # Compute the forward pass processing the sensors got from CARLA. model_outputs = self._model.forward_branch( self._process_sensors(input_data['rgb'][1]), norm_speed, directions_tensor) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) #print ("outputs: ", steer,throttle,brake) control = carla.VehicleControl() control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) # There is the posibility to replace some of the predictions with oracle predictions. self.first_iter = False return control def set_global_plan(self, topological_plan): # We expand the commands before the curves in order to give more time # for the agent to respond. print(" Set the plan ") #topological_plan = self._expand_commands(topological_plan) self._global_plan = topological_plan #print (" plan after ") #print (self._global_plan) def get_attentions(self, layers=None): """ Returns The activations obtained from the first layers of the latest iteration. """ if layers is None: layers = [0, 1, 2] if self.latest_image_tensor is None: raise ValueError( 'No step was ran yet. ' 'No image to compute the activations, Try Running ') all_layers = self._model.get_perception_layers( self.latest_image_tensor) cmap = plt.get_cmap('inferno') attentions = [] for layer in layers: y = all_layers[layer] att = torch.abs(y).mean(1)[0].data.cpu().numpy() att = att / att.max() att = cmap(att) att = np.delete(att, 3, 2) attentions.append(imresize(att, [88, 200])) return attentions def _process_sensors(self, sensor): iteration = 0 sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] sensor = scipy.misc.imresize( sensor, (g_conf.SENSORS['rgb'][1], g_conf.SENSORS['rgb'][2])) self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type( torch.FloatTensor).cuda() if iteration == 0: image_input = sensor iteration += 1 image_input = image_input.unsqueeze(0) self.latest_image_tensor = image_input return image_input def _get_current_direction(self, vehicle_position): # for the current position and orientation try to get the closest one from the waypoints closest_id = 0 min_distance = 100000 for index in range(len(self._global_plan)): waypoint = self._global_plan[index][0] computed_distance = distance_vehicle(waypoint, vehicle_position) if computed_distance < min_distance: min_distance = computed_distance closest_id = index #print ("Closest waypoint ", closest_id, "dist ", min_distance) direction = self._global_plan[closest_id][1] if direction == RoadOption.LEFT: direction = 3.0 elif direction == RoadOption.RIGHT: direction = 4.0 elif direction == RoadOption.STRAIGHT: direction = 5.0 else: direction = 2.0 return direction def _process_model_outputs(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] if brake < 0.05: brake = 0.0 if throttle > brake: brake = 0.0 return steer, throttle, brake def _expand_commands(self, topological_plan): """ The idea is to make the intersection indications to last longer""" # O(2*N) algorithm , probably it is possible to do in O(N) with queues. # Get the index where curves start and end curves_start_end = [] inside = False start = -1 current_curve = RoadOption.LANEFOLLOW for index in range(len(topological_plan)): command = topological_plan[index][1] print(command) if command != RoadOption.LANEFOLLOW and not inside: inside = True start = index current_curve = command if command == RoadOption.LANEFOLLOW and inside: inside = False # End now is the index. curves_start_end.append([start, index, current_curve]) if start == -1: raise ValueError("End of curve without start") start = -1 for start_end_index_command in curves_start_end: start_index = start_end_index_command[0] end_index = start_end_index_command[1] command = start_end_index_command[2] # Add the backwards curves ( Before the begginning) for index in range(1, self._expand_command_front + 1): changed_index = start_index - index if changed_index > 0: topological_plan[changed_index] = ( topological_plan[changed_index][0], command) # add the onnes after the end for index in range(0, self._expand_command_back): changed_index = end_index + index if changed_index < len(topological_plan): topological_plan[changed_index] = ( topological_plan[changed_index][0], command) return topological_plan
def execute(gpu, exp_batch, exp_alias, dataset_name, validation_set=False): latest = None # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu g_conf.immutable(False) # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) # If using validation dataset, fix a very high number of hours if validation_set: g_conf.NUMBER_OF_HOURS = 10000 g_conf.immutable(True) # Define the dataset. full_dataset = [ os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) ] augmenter = Augmenter(None) if validation_set: # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(full_dataset, transform=augmenter, preload_names=[dataset_name]) else: dataset = CoILDataset(full_dataset, transform=augmenter, preload_names=[ str(g_conf.NUMBER_OF_HOURS) + 'hours_' + dataset_name ], train_dataset=True) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) # Define model model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) """ ###### Run a single driving benchmark specified by the checkpoint were validation is stale ###### """ if g_conf.FINISH_ON_VALIDATION_STALE is not None: while validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) is None: time.sleep(0.1) validation_state_iteration = validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(validation_state_iteration) + '.pth')) print("Validation loaded ", validation_state_iteration) else: """ ##### Main Loop , Run a benchmark for each specified checkpoint on the "Test Configuration" ##### """ while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): # Get the correct checkpoint # We check it for some task name, all of then are ready at the same time if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]): latest = get_next_checkpoint( g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(latest) + '.pth')) print("Validation loaded ", latest) else: time.sleep(0.1) # Load the model and prepare set it for evaluation model.load_state_dict(checkpoint['state_dict']) model.cuda() model.eval() first_iter = True for data in data_loader: # Compute the forward pass on a batch from the dataset and get the intermediate # representations of the squeeze network if "seg" in g_conf.SENSORS.keys(): perception_rep, speed_rep, intentions_rep = \ model.get_intermediate_representations(data, dataset.extract_inputs(data).cuda(), dataset.extract_intentions(data).cuda()) perception_rep = perception_rep.data.cpu() speed_rep = speed_rep.data.cpu() intentions_rep = intentions_rep.data.cpu() if first_iter: perception_rep_all = perception_rep speed_rep_all = speed_rep intentions_rep_all = intentions_rep else: perception_rep_all = torch.cat( [perception_rep_all, perception_rep], 0) speed_rep_all = torch.cat([speed_rep_all, speed_rep], 0) intentions_rep_all = torch.cat( [intentions_rep_all, intentions_rep], 0) first_iter = False # Save intermediate representations perception_rep_all = perception_rep_all.tolist() speed_rep_all = speed_rep_all.tolist() intentions_rep_all = intentions_rep_all.tolist() np.save( os.path.join( '_preloads', exp_batch + '_' + exp_alias + '_' + dataset_name + '_representations'), [perception_rep_all, speed_rep_all, intentions_rep_all])
class CoILAgent(object): def __init__(self, checkpoint, town_name, carla_version='0.84'): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() # Set ERFnet for segmentation self.model_erf = ERFNet(20) self.model_erf = torch.nn.DataParallel(self.model_erf) self.model_erf.cuda() print("LOAD ERFNet - validate") def load_my_state_dict( model, state_dict ): #custom function to load model when not all dict elements own_state = model.state_dict() for name, param in state_dict.items(): if name not in own_state: continue own_state[name].copy_(param) return model self.model_erf = load_my_state_dict( self.model_erf, torch.load(os.path.join('trained_models/erfnet_pretrained.pth'))) self.model_erf.eval() print("ERFNet and weights LOADED successfully") self.latest_image = None self.latest_image_tensor = None if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: self.control_agent = CommandFollower(town_name) def run_step(self, measurements, sensor_data, directions, target): """ Run a step on the benchmark simulation Args: measurements: All the float measurements from CARLA ( Just speed is used) sensor_data: All the sensor data used on this benchmark directions: The directions, high level commands target: Final objective. Not used when the agent is predicting all outputs. Returns: Controls for the vehicle on the CARLA simulator. """ # Take the forward speed and normalize it for it to go from 0-1 norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # Compute the forward pass processing the sensors got from CARLA. rgbs = self._process_sensors(sensor_data) with torch.no_grad(): outputs = self.model_erf(rgbs) labels = outputs.max(1)[1].byte().cpu().data seg_road = (labels == 0) seg_not_road = (labels != 0) seg = torch.stack((seg_road, seg_not_road), 1).float() model_outputs = self._model.forward_branch(seg.cuda(), norm_speed, directions_tensor) # model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed, # directions_tensor) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) if self._carla_version == '0.9': import carla control = carla.VehicleControl() else: control = VehicleControl() control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) # There is the posibility to replace some of the predictions with oracle predictions. if g_conf.USE_ORACLE: _, control.throttle, control.brake = self._get_oracle_prediction( measurements, target) if self.first_iter: coil_logger.add_message('Iterating', { "Checkpoint": self.checkpoint['iteration'], 'Agent': str(steer) }, self.checkpoint['iteration']) self.first_iter = False return control def get_attentions(self, layers=None): """ Returns The activations obtained from the first layers of the latest iteration. """ if layers is None: layers = [0, 1, 2] if self.latest_image_tensor is None: raise ValueError( 'No step was ran yet. ' 'No image to compute the activations, Try Running ') all_layers = self._model.get_perception_layers( self.latest_image_tensor) cmap = plt.get_cmap('inferno') attentions = [] for layer in layers: y = all_layers[layer] att = torch.abs(y).mean(1)[0].data.cpu().numpy() att = att / att.max() att = cmap(att) att = np.delete(att, 3, 2) attentions.append(imresize(att, [88, 200])) return attentions def _process_sensors(self, sensors): iteration = 0 for name, size in g_conf.SENSORS.items(): if self._carla_version == '0.9': sensor = sensors[name][g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] else: sensor = sensors[name].data[ g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] sensor = scipy.misc.imresize(sensor, (size[1], size[2])) self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type( torch.FloatTensor).cuda() if iteration == 0: image_input = sensor else: image_input = torch.cat((image_input, sensor), 0) iteration += 1 image_input = image_input.unsqueeze(0) self.latest_image_tensor = image_input return image_input def _process_model_outputs(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] if brake < 0.05: brake = 0.0 if throttle > brake: brake = 0.0 return steer, throttle, brake def _process_model_outputs_wp(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ wpa1, wpa2, throttle, brake = outputs[3], outputs[4], outputs[ 1], outputs[2] if brake < 0.2: brake = 0.0 if throttle > brake: brake = 0.0 steer = 0.7 * wpa2 if steer > 0: steer = min(steer, 1) else: steer = max(steer, -1) return steer, throttle, brake def _get_oracle_prediction(self, measurements, target): # For the oracle, the current version of sensor data is not really relevant. control, _, _, _, _ = self.control_agent.run_step( measurements, [], [], target) return control.steer, control.throttle, control.brake
class CoILAgent(object): def __init__(self, checkpoint, town_name, carla_version='0.84'): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: self.control_agent = CommandFollower(town_name) def run_step(self, measurements, sensor_data, directions, target): """ Run a step on the benchmark simulation Args: measurements: All the float measurements from CARLA ( Just speed is used) sensor_data: All the sensor data used on this benchmark directions: The directions, high level commands target: Final objective. Not used when the agent is predicting all outputs. Returns: Controls for the vehicle on the CARLA simulator. """ # Take the forward speed and normalize it for it to go from 0-1 norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # Compute the forward pass processing the sensors got from CARLA. model_outputs = self._model.forward_branch( self._process_sensors(sensor_data), norm_speed, directions_tensor) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) if self._carla_version == '0.9': import carla control = carla.VehicleControl() else: control = VehicleControl() control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) # There is the posibility to replace some of the predictions with oracle predictions. if g_conf.USE_ORACLE: _, control.throttle, control.brake = self._get_oracle_prediction( measurements, target) if self.first_iter: coil_logger.add_message('Iterating', { "Checkpoint": self.checkpoint['iteration'], 'Agent': str(steer) }, self.checkpoint['iteration']) self.first_iter = False return control def get_attentions(self, layers=None): """ Returns The activations obtained from the first layers of the latest iteration. """ if layers is None: layers = [0, 1, 2] if self.latest_image_tensor is None: raise ValueError( 'No step was ran yet. ' 'No image to compute the activations, Try Running ') all_layers = self._model.get_perception_layers( self.latest_image_tensor) cmap = plt.get_cmap('inferno') attentions = [] for layer in layers: y = all_layers[layer] att = torch.abs(y).mean(1)[0].data.cpu().numpy() att = att / att.max() att = cmap(att) att = np.delete(att, 3, 2) attentions.append(imresize(att, [150, 200])) return attentions def _process_sensors(self, sensors): colors = [[0, 0, 0], [70, 70, 70], [190, 153, 153], [250, 170, 160], [220, 20, 60], [153, 153, 153], [157, 234, 50], [128, 64, 128], [244, 35, 232], [107, 142, 35], [0, 0, 142], [102, 102, 156], [220, 220, 0]] def label_to_color_0(e): return colors[e][0] def label_to_color_1(e): return colors[e][1] def label_to_color_2(e): return colors[e][2] iteration = 0 for name, size in g_conf.SENSORS.items(): sensor = sensors[name].data if (sensor.shape == (600, 800)): labels = sensor tmp = np.zeros((600, 800, 3)) f0 = np.vectorize(label_to_color_0) f1 = np.vectorize(label_to_color_1) f2 = np.vectorize(label_to_color_2) tmp[:, :, 0] = f0(sensor) tmp[:, :, 1] = f1(sensor) tmp[:, :, 2] = f2(sensor) sensor = tmp sensor = scipy.misc.imresize(sensor, (size[1], size[2])) ##### Save sensor self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type( torch.FloatTensor).cuda() if iteration == 0: image_input = sensor else: image_input = torch.cat((image_input, sensor), 0) iteration += 1 image_input = image_input.unsqueeze(0) self.latest_image_tensor = image_input return image_input def _process_model_outputs(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] if brake < 0.05: brake = 0.0 if throttle > brake: brake = 0.0 return steer, throttle, brake def _process_model_outputs_wp(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ wpa1, wpa2, throttle, brake = outputs[3], outputs[4], outputs[ 1], outputs[2] if brake < 0.2: brake = 0.0 if throttle > brake: brake = 0.0 steer = 0.7 * wpa2 if steer > 0: steer = min(steer, 1) else: steer = max(steer, -1) return steer, throttle, brake def _get_oracle_prediction(self, measurements, target): # For the oracle, the current version of sensor data is not really relevant. control, _, _, _, _ = self.control_agent.run_step( measurements, [], [], target) return control.steer, control.throttle, control.brake
class CoILAgent(Agent): def __init__(self, checkpoint): Agent.__init__(self) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self.model = CoILModel(g_conf.MODEL_NAME) self.model.load_state_dict(checkpoint['state_dict']) self.model.cuda() def run_step(self, measurements, sensor_data, directions, target): #control_agent = self._agent.run_step(measurements, None, target) speed = torch.cuda.FloatTensor( [measurements.player_measurements.forward_speed]).unsqueeze(0) print("Speed shape ", speed) directions_tensor = torch.cuda.LongTensor([directions]) model_outputs = self.model.forward_branch( self._process_sensors(sensor_data), speed, directions_tensor) print(model_outputs) steer, throttle, brake = self._process_model_outputs( model_outputs[0], measurements.player_measurements.forward_speed) #control = self.compute_action(, # , # directions) control = carla_protocol.Control() control.steer = steer control.throttle = throttle control.brake = brake # if self._auto_pilot: # control.steer = control_agent.steer # TODO: adapt the client side agent for the new version. ( PROBLEM ) #control.throttle = control_agent.throttle #control.brake = control_agent.brake # TODO: maybe change to a more meaningfull message ?? return control def _process_sensors(self, sensors): iteration = 0 for name, size in g_conf.SENSORS.items(): sensor = sensors[name].data[ g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] if sensors[name].type == 'SemanticSegmentation': # TODO: the camera name has to be sincronized with what is in the experiment... sensor = join_classes(sensor) sensor = sensor[:, :, np.newaxis] image_transform = transforms.Compose([ transforms.ToTensor(), transforms.Resize((size[1], size[2]), interpolation=Image.NEAREST), iag.ToGPU(), iag.Multiply((1 / (number_of_seg_classes - 1))) ]) else: image_transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((size[1], size[2])), transforms.ToTensor(), transforms.Normalize((0, 0, 0), (255, 255, 255)), iag.ToGPU() ]) sensor = np.swapaxes(sensor, 0, 1) sensor = np.flip(sensor.transpose((2, 0, 1)), axis=0) if iteration == 0: image_input = image_transform(sensor) else: image_input = torch.cat((image_input, sensor), 0) iteration += 1 image_input = image_input.unsqueeze(0) return image_input def _process_model_outputs(self, outputs, speed): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] if brake < 0.2: brake = 0.0 if throttle > brake: brake = 0.0 else: throttle = throttle * 2 if speed > 35.0 and brake == 0.0: throttle = 0.0 return steer, throttle, brake """ def compute_action(self, sensors, speed, direction): capture_time = time.time() sensor_pack = [] for i in range(len(sensors)): sensor = sensors[i] sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :] if g_conf.param.SENSORS.keys()[i] == 'rgb': sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0], self._config.sensors_size[i][1]]) elif g_conf.param.SENSORS.keys()[i] == 'labels': sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0], self._config.sensors_size[i][1]], interp='nearest') sensor = join_classes(sensor) * int(255 / (number_of_seg_classes - 1)) sensor = sensor[:, :, np.newaxis] sensor_pack.append(sensor) if len(sensor_pack) > 1: image_input = np.concatenate((sensor_pack[0], sensor_pack[1]), axis=2) else: image_input = sensor_pack[0] image_input = image_input.astype(np.float32) image_input = np.multiply(image_input, 1.0 / 255.0) image_input = sensors[0] image_input = image_input.astype(np.float32) image_input = np.multiply(image_input, 1.0 / 255.0) # TODO: This will of course depend on the model , if it is based on sequences there are # TODO: different requirements #tensor = self.model(image_input) outputs = self.model.forward_branch(image_input, speed, direction) return control # ,machine_output_functions.get_intermediate_rep(image_input,speed,self._config,self._sess,self._train_manager) """ """
def execute(gpu, exp_batch, exp_alias, dataset_name): # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if monitorer.get_status(exp_batch, exp_alias + '.yaml', g_conf.PROCESS_NAME)[0] == "Finished": # TODO: print some cool summary or not ? return #Define the dataset. This structure is has the __get_item__ redefined in a way #that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) dataset = CoILDataset(full_dataset, transform=transforms.Compose([transforms.ToTensor() ])) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. # TODO: batch size an number of workers go to some configuration file data_loader = torch.utils.data.DataLoader(dataset, batch_size=120, shuffle=False, num_workers=12, pin_memory=True) # TODO: here there is clearly a posibility to make a cool "conditioning" system. model = CoILModel(g_conf.MODEL_NAME) model.cuda() criterion = Loss() latest = get_latest_evaluated_checkpoint() if latest is None: # When nothing was tested, get latest returns none, we fix that. latest = 0 print(dataset.meta_data) best_loss = 1000 best_error = 1000 best_loss_iter = 0 best_error_iter = 0 while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(latest) + '.pth')) checkpoint_iteration = checkpoint['iteration'] print("Validation loaded ", checkpoint_iteration) accumulated_loss = 0 accumulated_error = 0 iteration_on_checkpoint = 0 for data in data_loader: input_data, float_data = data control_position = np.where( dataset.meta_data[:, 0] == 'control')[0][0] speed_position = np.where( dataset.meta_data[:, 0] == 'speed_module')[0][0] print(torch.squeeze(input_data['rgb']).shape) print(control_position) print(speed_position) # Obs : Maybe we could also check for other branches ?? output = model.forward_branch( torch.squeeze(input_data['rgb']).cuda(), float_data[:, speed_position, :].cuda(), float_data[:, control_position, :].cuda()) for i in range(input_data['rgb'].shape[0]): coil_logger.write_on_csv( checkpoint_iteration, [output[i][0], output[i][1], output[i][2]]) # TODO: Change this a functional standard using the loss functions. loss = torch.mean( (output - dataset.extract_targets(float_data).cuda() )**2).data.tolist() mean_error = torch.mean( torch.abs(output - dataset.extract_targets(float_data).cuda()) ).data.tolist() accumulated_error += mean_error accumulated_loss += loss error = torch.abs(output - dataset.extract_targets(float_data).cuda()) # Log a random position position = random.randint(0, len(float_data) - 1) #print (output[position].data.tolist()) coil_logger.add_message( 'Iterating', { 'Checkpoint': latest, 'Iteration': (str(iteration_on_checkpoint * 120) + '/' + str(len(dataset))), 'MeanError': mean_error, 'Loss': loss, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets( float_data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(float_data) [position].data.tolist() }, latest) iteration_on_checkpoint += 1 checkpoint_average_loss = accumulated_loss / len(dataset) checkpoint_average_error = accumulated_error / len(dataset) coil_logger.add_scalar('Loss', checkpoint_average_loss, latest) coil_logger.add_scalar('Error', checkpoint_average_error, latest) if checkpoint_average_loss < best_loss: best_loss = checkpoint_average_loss best_loss_iter = latest if checkpoint_average_error < best_loss: best_error = checkpoint_average_error best_error_iter = latest coil_logger.add_message( 'Iterating', { 'Summary': { 'Error': checkpoint_average_error, 'Loss': checkpoint_average_loss, 'BestError': best_error, 'BestLoss': best_loss, 'BestLossCheckpoint': best_loss_iter, 'BestErrorCheckpoint': best_error_iter }, 'Checkpoint': latest }) else: time.sleep(1) print("Waiting for the next Validation")
class AffordancesAgent(object): def __init__(self, path_to_config_file): # params for now it is not used but we might want to use this to set self.setup(path_to_config_file) self.save_attentions = False def setup(self, path_to_config_file): self._agent = None self.route_assigned = False self.count = 0 exp_dir = os.path.join( '/', os.path.join(*path_to_config_file.split('/')[:-1])) yaml_conf, checkpoint_number, agent_name, encoder_params = checkpoint_parse_configuration_file( path_to_config_file) if encoder_params == "None": encoder_params = None g_conf.immutable(False) merge_with_yaml( os.path.join('/', os.path.join(*path_to_config_file.split('/')[:-4]), yaml_conf), encoder_params) if g_conf.MODEL_TYPE in ['one-step-affordances']: # one step training, no need to retrain FC layers, we just get the output of encoder model as prediciton self._model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '.pth')) print("Affordances Model ", str(checkpoint_number) + '.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() elif g_conf.MODEL_TYPE in ['separate-affordances']: if encoder_params is not None: self.encoder_model = EncoderModel( g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.encoder_model.cuda() # Here we load the pre-trained encoder (not fine-tunned) if g_conf.FREEZE_ENCODER: encoder_checkpoint = torch.load( os.path.join( os.path.join( '/', os.path.join( *path_to_config_file.split('/')[:-4])), '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) print( "Encoder model ", str(encoder_params['encoder_checkpoint']), "loaded from ", os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints')) self.encoder_model.load_state_dict( encoder_checkpoint['state_dict']) self.encoder_model.eval() for param_ in self.encoder_model.parameters(): param_.requires_grad = False else: encoder_checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '_encoder.pth')) print("FINE TUNNED encoder model ", str(checkpoint_number) + '_encoder.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self.encoder_model.load_state_dict( encoder_checkpoint['state_dict']) self.encoder_model.eval() for param_ in self.encoder_model.parameters(): param_.requires_grad = False else: raise RuntimeError( 'encoder_params can not be None in MODEL_TYPE --> separate-affordances' ) self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) self.checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '.pth')) print("Affordances Model ", str(checkpoint_number) + '.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() def get_sensors_dict(self): """ The agent sets the sensors that it is going to use. That has to be set into the environment for it to produce this data. """ sensors_dict = [{ 'type': 'sensor.camera.rgb', 'x': 2.0, 'y': 0.0, 'z': 1.40, 'roll': 0.0, 'pitch': -15.0, 'yaw': 0.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'rgb_central' }] return sensors_dict # TODO we set the sensors here directly. def sensors(self): return self._sensors_dict def get_state(self, exp_list, target_speed=20.0): """ Based on the exp object it makes all the affordances. :param exp: :return: """ exp = exp_list[0] self._vehicle = exp._ego_actor if self._agent is None: self._agent = True self._state = AgentState.NAVIGATING args_lateral_dict = { 'K_P': 1, 'K_D': 0.02, 'K_I': 0, 'dt': 1.0 / 20.0 } self._local_planner = LocalPlanner(self._vehicle, opt_dict={ 'target_speed': target_speed, 'lateral_control_dict': args_lateral_dict }) self._hop_resolution = 2.0 self._path_seperation_hop = 2 self._path_seperation_threshold = 0.5 self._grp = None if not self.route_assigned: plan = [] for transform, road_option in exp._route: wp = exp._ego_actor.get_world().get_map().get_waypoint( transform.location) plan.append((wp, road_option)) self._local_planner.set_global_plan(plan) self.route_assigned = True input_data = exp._sensor_interface.get_data() input_data = self._process_sensors( input_data['rgb_central'][1]) #torch.Size([1, 3, 88, 200] if g_conf.MODEL_TYPE in ['one-step-affordances']: c_output, r_output, layers = self._model.forward_outputs( input_data.cuda(), torch.cuda.FloatTensor( [exp._forward_speed / g_conf.SPEED_FACTOR]).unsqueeze(0), torch.cuda.FloatTensor(encode_directions( exp._directions)).unsqueeze(0)) elif g_conf.MODEL_TYPE in ['separate-affordances']: if g_conf.ENCODER_MODEL_TYPE in [ 'action_prediction', 'stdim', 'ETEDIM', 'FIMBC', 'one-step-affordances' ]: e, layers = self.encoder_model.forward_encoder( input_data.cuda(), torch.cuda.FloatTensor([ exp._forward_speed / g_conf.SPEED_FACTOR ]).unsqueeze(0), torch.cuda.FloatTensor(encode_directions( exp._directions)).unsqueeze(0)) c_output, r_output = self._model.forward_test(e) elif g_conf.ENCODER_MODEL_TYPE in [ 'ETE', 'ETE_inverse_model', 'forward', 'ETE_stdim' ]: e, layers = self.encoder_model.forward_encoder( input_data.cuda(), torch.cuda.FloatTensor([ exp._forward_speed / g_conf.SPEED_FACTOR ]).unsqueeze(0), torch.cuda.FloatTensor(encode_directions( exp._directions)).unsqueeze(0)) c_output, r_output = self._model.forward_test(e) if self.save_attentions: exp_params = exp._exp_params attentions_full_path = os.path.join( os.environ["SRL_DATASET_PATH"], exp_params['package_name'], exp_params['env_name'], str(exp_params['env_number']) + '_' + exp._agent_name, str(exp_params['exp_number'])) save_attentions(input_data.cuda(), layers, self.count, attentions_full_path, save_input=False, big_size=False) self.count += 1 affordances = {} output_relative_angle = torch.squeeze( r_output[0]).cpu().detach().numpy() * 1.0 is_pedestrian_hazard = False if c_output[0][0, 0] < c_output[0][0, 1]: is_pedestrian_hazard = True is_red_tl_hazard = False if c_output[1][0, 0] < c_output[1][0, 1]: is_red_tl_hazard = True is_vehicle_hazard = False if (c_output[2][0, 0] < c_output[2][0, 1]): is_vehicle_hazard = True affordances.update({'is_pedestrian_hazard': is_pedestrian_hazard}) affordances.update({'is_red_tl_hazard': is_red_tl_hazard}) affordances.update({'is_vehicle_hazard': is_vehicle_hazard}) affordances.update({'relative_angle': output_relative_angle}) # Now we consider all target speed to be 20.0 affordances.update({'target_speed': target_speed}) #affordances.update({'GT_is_pedestrian_hazard': }) #affordances.update({'GT_is_red_tl_hazard': }) #affordances.update({'GT_is_vehicle_hazard': }) gt_relative_angle = compute_relative_angle( self._vehicle, self._local_planner.get_target_waypoint()) affordances.update({'GT_relative_angle': gt_relative_angle}) affordances.update({ 'ERROR_relative_angle': output_relative_angle - gt_relative_angle }) return affordances def make_reward(self, exp): # Just basically return None since the reward is not used for a non return None def step(self, affordances): hazard_detected = False is_vehicle_hazard = affordances['is_vehicle_hazard'] is_red_tl_hazard = affordances['is_red_tl_hazard'] is_pedestrian_hazard = affordances['is_pedestrian_hazard'] relative_angle = affordances['relative_angle'] target_speed = affordances['target_speed'] # once we meet a speed limit sign, the target speed changes #if target_speed != self._local_planner._target_speed: # self._local_planner.set_speed(target_speed) #forward_speed = affordances['forward_speed'] if is_vehicle_hazard: self._state = AgentState.BLOCKED_BY_VEHICLE hazard_detected = True if is_red_tl_hazard: self._state = AgentState.BLOCKED_RED_LIGHT hazard_detected = True if is_pedestrian_hazard: self._state = AgentState.BLOCKED_BY_PEDESTRIAN hazard_detected = True if hazard_detected: control = self.emergency_stop() else: self._state = AgentState.NAVIGATING control = self._local_planner.run_step(relative_angle, target_speed) logging.debug("Output %f %f %f " % (control.steer, control.throttle, control.brake)) return control def reinforce(self, rewards): """ This agent cannot learn so there is no reinforce """ pass def reset(self): print(" Correctly reseted the agent") self.route_assigned = False self._agent = None self.count = 0 def emergency_stop(self): """ Send an emergency stop command to the vehicle :return: """ control = carla.VehicleControl() control.steer = 0.0 control.throttle = 0.0 control.brake = 1.0 control.hand_brake = False return control def _process_sensors(self, sensor): sensor = sensor[:, :, 0:3] # BGRA->BRG drop alpha channel sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :, :] # crop sensor = scipy.misc.imresize(sensor, (g_conf.SENSORS['rgb_central'][1], g_conf.SENSORS['rgb_central'][2])) self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type( torch.FloatTensor).cuda() image_input = sensor.unsqueeze(0) self.latest_image_tensor = image_input return image_input
class CoILAgent(object): def __init__(self, checkpoint, town_name, carla_version='0.84'): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. # Create model self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() # If we are evaluating squeeze model (so we are using ground truth seg mask), # also run the autopilot to get its stop intentions if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE or "seg" in g_conf.SENSORS.keys(): self.control_agent = CommandFollower(town_name) def run_step(self, measurements, sensor_data, directions, target): """ Run a step on the benchmark simulation Args: measurements: All the float measurements from CARLA ( Just speed is used) sensor_data: All the sensor data used on this benchmark directions: The directions, high level commands target: Final objective. Not used when the agent is predicting all outputs. Returns: Controls for the vehicle on the CARLA simulator. """ # Get speed and high-level turning command # Take the forward speed and normalize it for it to go from 0-1 norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # If we're evaluating squeeze network (so we are using ground truth seg mask) if "seg" in g_conf.SENSORS.keys(): # Run the autopilot agent to get stop intentions _, state = self.control_agent.run_step(measurements, [], [], target) inputs_vec = [] for input_name in g_conf.INTENTIONS: inputs_vec.append(float(state[input_name])) intentions = torch.cuda.FloatTensor(inputs_vec).unsqueeze(0) # Run squeeze network model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed, directions_tensor, intentions, benchmark=True) else: # Run driving model model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed, directions_tensor, benchmark=True) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) if self._carla_version == '0.9': import carla control = carla.VehicleControl() else: control = VehicleControl() control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) # There is the posibility to replace some of the predictions with oracle predictions. if g_conf.USE_ORACLE: _, control.throttle, control.brake = self._get_oracle_prediction( measurements, target) if self.first_iter: coil_logger.add_message('Iterating', {"Checkpoint": self.checkpoint['iteration'], 'Agent': str(steer)}, self.checkpoint['iteration']) self.first_iter = False return control def _process_sensors(self, sensors): iteration = 0 sensor_dict = {} for name, size in g_conf.SENSORS.items(): if self._carla_version == '0.9': sensor = sensors[name][g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] else: sensor = sensors[name].data[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] # Process RGB image or CARLA seg mask if name == 'rgb': # Resize image, convert it to [0, 1] BGR image sensor = scipy.misc.imresize(sensor, (size[1], size[2])) sensor = sensor[:, :, ::-1] sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type(torch.FloatTensor) elif name == 'seg': seg = scipy.misc.imresize(sensor, (size[1], size[2]), 'nearest') # Re-map classes, mapping irrelevant classes to a "nuisance" class class_map = \ {0: 0, # None 1: 0, # Buildings -> None 2: 0, # Fences -> None 3: 0, # Other -> None 4: 1, # Pedestrians kept 5: 0, # Poles -> None 6: 2, # RoadLines kept 7: 3, # Roads kept 8: 2, # Sidewalks mapped to roadlines (both are boundaries of road) 9 : 0, # Vegetation -> None 10: 4, # Vehicles kept 11: 0, # Walls -> None 12: 5} # TrafficSigns kept (for traffic lights) new_seg = np.zeros((seg.shape[0], seg.shape[1])) # Remap classes for key, value in class_map.items(): new_seg[np.where(seg == key)] = value # One hot encode seg mask, for now hardcode max of class map values + 1 new_seg = np.eye(6)[new_seg.astype(np.int32)] new_seg = new_seg.transpose(2, 0, 1) new_seg = new_seg.astype(np.float) sensor = torch.from_numpy(new_seg).type(torch.FloatTensor) sensor = sensor.unsqueeze(0) sensor_dict[name] = sensor return sensor_dict def _process_model_outputs(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] if brake < 0.05: brake = 0.0 if throttle > brake: brake = 0.0 return steer, throttle, brake def _get_oracle_prediction(self, measurements, target): # For the oracle, the current version of sensor data is not really relevant. control, _ = self.control_agent.run_step(measurements, [], [], target) return control.steer, control.throttle, control.brake
class CoILAgent(object): def __init__(self, checkpoint, town_name, carla_version='0.84'): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() # this entire segment is for loading models for ensemble evaluation - take care for the paths and checkpoints ''' self.weights = [0.25, 0.25, 0.25, 0.25] # simple ensemble self.model_ids = ['660000', '670000', '1070000', '2640000'] # model checkpoints self.models_dir = '/is/sg2/aprakash/Projects/carla_autonomous_driving/code/coiltraine/_logs/ensemble' self._ensemble_model_list = [] for i in range(len(self.model_ids)): curr_checkpoint = torch.load(self.models_dir+'/resnet34imnet10S1/checkpoints/'+self.model_ids[i]+'.pth') self._ensemble_model_list.append(CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)) self._ensemble_model_list[i].load_state_dict(curr_checkpoint['state_dict']) self._ensemble_model_list[i].cuda().eval() ''' self.latest_image = None self.latest_image_tensor = None # for image corruptions self.corruption_number = None self.severity = None if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: # for evaluating expert self.control_agent = CommandFollower(town_name) def run_step(self, measurements, sensor_data, directions, target, **kwargs): """ Run a step on the benchmark simulation Args: measurements: All the float measurements from CARLA ( Just speed is used) sensor_data: All the sensor data used on this benchmark directions: The directions, high level commands target: Final objective. Not used when the agent is predicting all outputs. Returns: Controls for the vehicle on the CARLA simulator. """ # only required if using corruptions module # self.corruption_number = kwargs.get('corruption_number', None) # self.severity = kwargs.get('severity', None) # Take the forward speed and normalize it for it to go from 0-1 norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # Compute the forward pass processing the sensors got from CARLA. model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed, directions_tensor) # run forward pass using felipe model # model_outputs_felipe = self._model_felipe.forward_branch(self._process_sensors(sensor_data), norm_speed, # directions_tensor) # model_outputs[0] = torch.FloatTensor([(model_outputs[0][i].item()+model_outputs_felipe[0][i].item())/2.0 for i in range(3)]).cuda() steer, throttle, brake = self._process_model_outputs(model_outputs[0]) # steer_f, throttle_f, brake_f = self._process_model_outputs(model_outputs_felipe[0]) # ensemble ''' steer_c = [] throttle_c = [] brake_c = [] for i in range(len(self.model_ids)): mo = self._ensemble_model_list[i].forward_branch(self._process_sensors(sensor_data), norm_speed, directions_tensor) s, t, b = self._process_model_outputs(mo[0]) steer_c.append(s) throttle_c.append(t) brake_c.append(b) ''' if self._carla_version == '0.9': import carla control = carla.VehicleControl() else: control = VehicleControl() # single model control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) # ensemble # control.steer = float(np.average(steer_c, weights=self.weights)) # control.throttle = float(np.average(throttle_c, weights=self.weights)) # control.brake = float(np.average(brake_c, weights=self.weights)) # There is the posibility to replace some of the predictions with oracle predictions. if g_conf.USE_ORACLE: control.steer, control.throttle, control.brake = self._get_oracle_prediction( measurements, sensor_data, target) if self.first_iter: coil_logger.add_message('Iterating', {"Checkpoint": self.checkpoint['iteration'], 'Agent': str(control.steer)}, self.checkpoint['iteration']) self.first_iter = False return control # define run step for carla 9 def run_step_carla9(self, observations): norm_speed = np.linalg.norm(observations['velocity'])/g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([int(observations['command'])]) # print ('rgb: ', observations['big_cam'].shape) # print ('velocity: ', observations['velocity']) # print ('norm velocity: ', np.linalg.norm(observations['velocity'])) # print ('norm_speed: ', norm_speed.shape, norm_speed.item()) # print ('directions_tensor: ', directions_tensor.shape, directions_tensor.item()) model_outputs = self._model.forward_branch(self._process_sensors(observations), norm_speed, directions_tensor) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) if self._carla_version == '0.9': import carla control = carla.VehicleControl() else: control = VehicleControl() # single model control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) return control def get_attentions(self, layers=None): """ Returns The activations obtained from the first layers of the latest iteration. """ if layers is None: layers = [0, 1, 2] if self.latest_image_tensor is None: raise ValueError('No step was ran yet. ' 'No image to compute the activations, Try Running ') all_layers = self._model.get_perception_layers(self.latest_image_tensor) cmap = plt.get_cmap('inferno') attentions = [] for layer in layers: y = all_layers[layer] att = torch.abs(y).mean(1)[0].data.cpu().numpy() att = att / att.max() att = cmap(att) att = np.delete(att, 3, 2) attentions.append(imresize(att, [88, 200])) # attentions.append(np.array(Image.fromarray(sensor).resize((200, 88)))) return attentions def _process_sensors(self, sensors): iteration = 0 for name, size in g_conf.SENSORS.items(): if self._carla_version == '0.9': sensor = sensors[name][g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] else: sensor = sensors[name].data[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] sensor = scipy.misc.imresize(sensor, (size[1], size[2])) # depreciated # sensor = np.array(Image.fromarray(sensor).resize((size[2], size[1]))) # for running corruptions ''' # corrupt the image here # print ('out of corruption: ', self.corruption_number, self.severity) if self.corruption_number is not None and self.severity is not None: # print ('in corruption: ', self.corruption_number, self.severity) sensor = corrupt(sensor, corruption_number=self.corruption_number, severity=self.severity+1) ''' self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type(torch.FloatTensor).cuda() if iteration == 0: image_input = sensor else: image_input = torch.cat((image_input, sensor), 0) iteration += 1 image_input = image_input.unsqueeze(0) self.latest_image_tensor = image_input return image_input def _process_model_outputs(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0].item(), outputs[1].item(), outputs[2].item() # print ('steer: ', steer, 'throttle: ', throttle, 'brake: ', brake) # these heuristics are a part of the original benchmark, evaluation doesn't run properly without these if brake < 0.05: brake = 0.0 if throttle > brake: brake = 0.0 # print ('steer after heuristic: ', steer, 'throttle after heuristic: ', throttle, 'brake after heuristic: ', brake) return steer, throttle, brake def _process_model_outputs_wp(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ wpa1, wpa2, throttle, brake = outputs[3], outputs[4], outputs[1], outputs[2] if brake < 0.2: brake = 0.0 if throttle > brake: brake = 0.0 steer = 0.7 * wpa2 if steer > 0: steer = min(steer, 1) else: steer = max(steer, -1) return steer, throttle, brake def _get_oracle_prediction(self, measurements, sensor_data, target): # For the oracle, the current version of sensor data is not really relevant. control, _ = self.control_agent.run_step(measurements, sensor_data, [], target) return control.steer, control.throttle, control.brake
def execute(gpu, exp_batch, exp_alias): # We set the visible cuda devices try: os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') coil_logger.add_message('Loading', {'GPU': gpu}) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if monitorer.get_status(exp_batch, exp_alias + '.yaml', g_conf.PROCESS_NAME)[0] == "Finished": # TODO: print some cool summary or not ? return #Define the dataset. This structure is has the __get_item__ redefined in a way #that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) #augmenter_cpu = iag.AugmenterCPU(g_conf.AUGMENTATION_SUITE_CPU) dataset = CoILDataset(full_dataset, transform=transforms.Compose( [transforms.ToTensor()])) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. sampler = BatchSequenceSampler( splitter.control_steer_split(dataset.measurements, dataset.meta_data), g_conf.BATCH_SIZE, g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. # TODO: batch size an number of workers go to some configuration file data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, shuffle=False, num_workers=12, pin_memory=True) # By instanciating the augmenter we get a callable that augment images and transform them # into tensors. augmenter = iag.Augmenter(g_conf.AUGMENTATION_SUITE) # TODO: here there is clearly a posibility to make a cool "conditioning" system. model = CoILModel(g_conf.MODEL_NAME) model.cuda() exit() print(model) criterion = Loss() # TODO: DATASET SIZE SEEMS WEIRD optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file != None: checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] accumulated_time = checkpoint['total_time'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] else: iteration = 0 best_loss = 10000.0 accumulated_time = 0 # We accumulate iteration time and keep the average speed best_loss_iter = 0 # TODO: The checkpoint will continue, so it should erase everything up to the iteration print(dataset.meta_data) print(model) capture_time = time.time() for data in data_loader: input_data, float_data = data #TODO, ADD ITERATION SCHEDULE input_rgb_data = augmenter(0, input_data['rgb']) #coil_logger.add_images(input_rgb_data) # get the control commands from float_data, size = [120,1] controls = float_data[:, dataset.controls_position(), :] print(" CONTROLS ", controls.shape) # The output(branches) is a list of 5 branches results, each branch is with size [120,3] model.zero_grad() print('INPUTS', dataset.extract_inputs(float_data).shape) branches = model(input_rgb_data, dataset.extract_inputs(float_data).cuda()) #print ("len ",len(branches)) #targets = torch.cat([steer_gt, gas_gt, brake_gt], 1) print("Extracted targets ", dataset.extract_targets(float_data).shape[0]) loss = criterion.MSELoss( branches, dataset.extract_targets(float_data).cuda(), controls.cuda(), dataset.extract_inputs(float_data).cuda()) # TODO: All these logging things could go out to clean up the main if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration # Log a random position position = random.randint(0, len(float_data) - 1) output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(float_data).cuda()) # TODO: For now we are computing the error for just the correct branch, it could be multi- branch, coil_logger.add_scalar('Loss', loss.data, iteration) loss.backward() optimizer.step() accumulated_time += time.time() - capture_time capture_time = time.time() # TODO: Get only the float_data that are actually generating output # TODO: itearation is repeating , and that is dumb coil_logger.add_message( 'Iterating', { 'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets( float_data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(float_data)[position].data.tolist() }, iteration) # TODO: For now we are computing the error for just the correct branch, it could be multi-branch, # TODO: save also the optimizer state dictionary if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'best_loss_iter': best_loss_iter } # TODO : maybe already summarize the best model ??? torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) iteration += 1 except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
class CoILAgent(Agent): def __init__(self, checkpoint): #experiment_name='None', driver_conf=None, memory_fraction=0.18, #image_cut=[115, 510]): # use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]): Agent.__init__(self) # This should likely come from global #config_gpu = tf.ConfigProto() #config_gpu.gpu_options.visible_device_list = '0' #config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction #self._sess = tf.Session(config=config_gpu) # THIS DOES NOT WORK FOR FUSED PLUS LSTM #if self._config.number_frames_sequenced > self._config.number_frames_fused: # self._config_train.batch_size = self._config.number_frames_sequenced #else: # self._config_train.batch_size = self._config.number_frames_fused #self._train_manager = load_system(self._config_train) #self._config.train_segmentation = False self.model = CoILModel(g_conf.MODEL_NAME) self.model.load_state_dict(checkpoint['state_dict']) self.model.cuda() self.model.eval() #self.model.load_network(checkpoint) #self._sess.run(tf.global_variables_initializer()) #self._control_function = getattr(machine_output_functions, # self._train_manager._config.control_mode) # More elegant way to merge with autopilot #self._agent = Autopilot(ConfigAutopilot(driver_conf.city_name)) #self._image_cut = driver_conf.image_cut #self._auto_pilot = driver_conf.use_planner #self._recording = False #self._start_time = 0 def run_step(self, measurements, sensor_data, directions, target): self.model.eval() #control_agent = self._agent.run_step(measurements, None, target) print (" RUnning STEP ") speed = torch.cuda.FloatTensor([measurements.player_measurements.forward_speed]).unsqueeze(0) print("Speed is", speed) print ("Speed shape ", speed) directions_tensor = torch.cuda.LongTensor([directions]) model_outputs = self.model.forward_branch(self._process_sensors(sensor_data), speed, directions_tensor) print (model_outputs) steer, throttle, brake = self._process_model_outputs(model_outputs[0], measurements.player_measurements.forward_speed) #control = self.compute_action(, # , # directions) control = carla_protocol.Control() control.steer = steer control.throttle = throttle control.brake = brake # if self._auto_pilot: # control.steer = control_agent.steer # TODO: adapt the client side agent for the new version. ( PROBLEM ) #control.throttle = control_agent.throttle #control.brake = control_agent.brake # TODO: maybe change to a more meaningfull message ?? return control def _process_sensors(self, sensors): iteration = 0 for name, size in g_conf.SENSORS.items(): sensor = sensors[name].data[175:375, ...] #300*800*3 image_input = transform.resize(sensor, (size[1], size[2])) # transforms.Normalize([ 0.5315, 0.5521, 0.5205], [ 0.1960, 0.1810, 0.2217]) print ("Image pixL ", image_input[:10][0][0]) image_input = np.transpose(image_input, (2, 0, 1)) image_input = torch.from_numpy(image_input).type(torch.FloatTensor).cuda() print ("torch size", image_input.size()) img_np = np.uint8(np.transpose(image_input.cpu().numpy() * 255, (1 , 2, 0))) # plt.figure(1) # plt.subplot(1, 2, 1) # plt.imshow(sensor) # plt.subplot(1,2,2) # plt.imshow(img_np) # # # plt.show() # if sensors[name].type == 'SemanticSegmentation': # # TODO: the camera name has to be sincronized with what is in the experiment... # sensor = join_classes(sensor) # # sensor = sensor[:, :, np.newaxis] # # image_transform = transforms.Compose([transforms.ToTensor(), # transforms.Resize((size[1], size[2]), interpolation=Image.NEAREST), # iag.ToGPU(), iag.Multiply((1 / (number_of_seg_classes - 1)))]) # else: # # image_transform = transforms.Compose([transforms.ToPILImage(), # transforms.Resize((size[1], size[2])), # transforms.ToTensor(), transforms.Normalize((0, 0 ,0), (255, 255, 255)), # iag.ToGPU()]) # sensor = np.swapaxes(sensor, 0, 1) # print ("Sensor Previous SHape") # print (sensor.shape) # sensor = np.flip(sensor.transpose((2, 0, 1)), axis=0) # print ("Sensor Previous SHape PT2") # print (sensor.shape) # if iteration == 0: # image_input = image_transform(sensor) # else: # image_input = torch.cat((image_input, sensor), 0) iteration += 1 # print (image_input.shape) image_input = image_input.unsqueeze(0) print (image_input.shape) return image_input def _process_model_outputs(self,outputs, speed): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] # if steer > 0.5: # throttle *= (1 - steer + 0.3) # steer += 0.3 # if steer > 1: # steer = 1 # if steer < -0.5: # throttle *= (1 + steer + 0.3) # steer -= 0.3 # if steer < -1: # steer = -1 # if brake < 0.2: # brake = 0.0 # if throttle > brake: brake = 0.0 # else: # throttle = throttle * 2 # if speed > 35.0 and brake == 0.0: # throttle = 0.0 print ("Steer", steer, "Throttle", throttle) return steer, throttle, brake """ def compute_action(self, sensors, speed, direction): capture_time = time.time() sensor_pack = [] for i in range(len(sensors)): sensor = sensors[i] sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :] if g_conf.param.SENSORS.keys()[i] == 'rgb': sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0], self._config.sensors_size[i][1]]) elif g_conf.param.SENSORS.keys()[i] == 'labels': sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0], self._config.sensors_size[i][1]], interp='nearest') sensor = join_classes(sensor) * int(255 / (number_of_seg_classes - 1)) sensor = sensor[:, :, np.newaxis] sensor_pack.append(sensor) if len(sensor_pack) > 1: image_input = np.concatenate((sensor_pack[0], sensor_pack[1]), axis=2) else: image_input = sensor_pack[0] image_input = image_input.astype(np.float32) image_input = np.multiply(image_input, 1.0 / 255.0) image_input = sensors[0] image_input = image_input.astype(np.float32) image_input = np.multiply(image_input, 1.0 / 255.0) # TODO: This will of course depend on the model , if it is based on sequences there are # TODO: different requirements #tensor = self.model(image_input) outputs = self.model.forward_branch(image_input, speed, direction) return control # ,machine_output_functions.get_intermediate_rep(image_input,speed,self._config,self._sess,self._train_manager) """ """
def execute(gpu, exp_batch, exp_alias, dataset_name, architecture, suppress_output): try: # We set the visible cuda devices torch.manual_seed(2) os.environ["CUDA_VISIBLE_DEVICES"] = gpu # Validation available for: # coil_unit (UNIT + task combined) # coil_icra (Also used for finetuned models) # wgangp_lsd (Our architecture) architecture_name = architecture # At this point the log file with the correct naming is created. if architecture_name == 'coil_unit': pass elif architecture_name == 'wgangp_lsd': merge_with_yaml( os.path.join('/home/rohitrishabh/CoilWGAN/configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('validation', dataset_name) elif architecture_name == 'coil_icra': merge_with_yaml( os.path.join( '/home/adas/CleanedCode/CoIL_Codes/coil_20-06/configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('validation', dataset_name) if monitorer.get_status(exp_batch, exp_alias + '.yaml', g_conf.PROCESS_NAME)[0] == "Finished": # TODO: print some cool summary or not ? return if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join( '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) #Define the dataset. This structure is has the __get_item__ redefined in a way #that you can access the HDFILES positions from the root directory as a in a vector. if dataset_name != []: full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) else: full_dataset = os.environ["COIL_DATASET_PATH"] augmenter = Augmenter(None) dataset = CoILDataset(full_dataset, transform=augmenter) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. # TODO: batch size an number of workers go to some configuration file batchsize = 30 data_loader = torch.utils.data.DataLoader(dataset, batch_size=batchsize, shuffle=False, num_workers=1, pin_memory=True) # TODO: here there is clearly a posibility to make a cool "conditioning" system. if architecture_name == 'coil_unit': model_task, model_gen = CoILModel('coil_unit') model_task, model_gen = model_task.cuda(), model_gen.cuda() else: model = CoILModel(architecture_name) model.cuda() latest = 0 # print (dataset.meta_data) best_loss = 1000 best_error = 1000 best_loss_mini = 1000 best_loss_iter = 0 best_error_iter = 0 batch_size = 30 best_loss_ckpt = '' if architecture_name == 'coil_unit': ckpts = glob.glob('/home/rohitrishabh/UNIT_DA/outputs/' + exp_alias + '/checkpoints/gen*.pt') else: ckpts = glob.glob( os.path.join( '/home/adas/CleanedCode/CoIL_Codes/coil_20-06/_logs', exp_batch, exp_alias) + '/*.pth') if architecture_name == 'coil_unit': model_task.eval() model_gen.eval() else: model.eval() ckpts = sorted(ckpts) # TODO: refactor on the getting on the checkpoint organization needed for ckpt in ckpts: # if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): # latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) # ckpt = os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias # , 'checkpoints', str(latest) + '.pth') checkpoint = torch.load(ckpt) print("Validation loaded ", ckpt) if architecture_name == 'wgangp_lsd': print(ckpt, checkpoint['best_loss_iter_F']) model.load_state_dict(checkpoint['stateF_dict']) model.eval() elif architecture_name == 'coil_unit': model_task.load_state_dict(checkpoint['task']) model_gen.load_state_dict(checkpoint['b']) model_task.eval() model_gen.eval() elif architecture_name == 'coil_icra': model.load_state_dict(checkpoint['state_dict']) model.eval() accumulated_loss = 0 accumulated_error = 0 iteration_on_checkpoint = 0 datacount = 0 for data in data_loader: input_data, float_data = data controls = float_data[:, dataset.controls_position(), :] camera_angle = float_data[:, 26, :] camera_angle = camera_angle.cuda() steer = float_data[:, 0, :] steer = steer.cuda() speed = float_data[:, 10, :] speed = speed.cuda() time_use = 1.0 car_length = 3.0 extra_factor = 2.5 threshold = 1.0 pos = camera_angle > 0.0 pos = pos.type(torch.FloatTensor) neg = camera_angle <= 0.0 neg = neg.type(torch.FloatTensor) pos = pos.cuda() neg = neg.cuda() rad_camera_angle = math.pi * (torch.abs(camera_angle)) / 180.0 val = extra_factor * (torch.atan( (rad_camera_angle * car_length) / (time_use * speed + 0.05))) / 3.1415 steer -= pos * torch.min(val, torch.Tensor([0.6]).cuda()) steer += neg * torch.min(val, torch.Tensor([0.6]).cuda()) steer = steer.cpu() float_data[:, 0, :] = steer float_data[:, 0, :][float_data[:, 0, :] > 1.0] = 1.0 float_data[:, 0, :][float_data[:, 0, :] < -1.0] = -1.0 datacount += 1 control_position = 24 speed_position = 10 if architecture_name == 'wgangp_lsd': embed, output = model( torch.squeeze(input_data['rgb']).cuda(), float_data[:, speed_position, :].cuda()) loss = torch.sum( (output[0] - dataset.extract_targets(float_data).cuda() )**2).data.tolist() mean_error = torch.sum( torch.abs(output[0] - dataset.extract_targets(float_data).cuda()) ).data.tolist() elif architecture_name == 'coil_unit': embed, n_b = model_gen.encode( torch.squeeze(input_data['rgb']).cuda()) output = model_task( embed, Variable(float_data[:, speed_position, :]).cuda()) loss = torch.sum( (output[0].data - dataset.extract_targets(float_data).cuda())**2) mean_error = torch.sum( torch.abs(output[0].data - dataset.extract_targets(float_data).cuda())) elif architecture_name == 'coil_icra': output = model.forward_branch( torch.squeeze(input_data['rgb']).cuda(), float_data[:, speed_position, :].cuda(), float_data[:, control_position, :].cuda()) loss = torch.sum( (output - dataset.extract_targets(float_data).cuda() )**2).data.tolist() mean_error = torch.sum( torch.abs(output - dataset.extract_targets(float_data).cuda()) ).data.tolist() if loss < best_loss_mini: best_loss_mini = loss accumulated_error += mean_error accumulated_loss += loss # error = torch.abs(output[0] - dataset.extract_targets(float_data).cuda()) # Log a random position position = random.randint(0, len(float_data) - 1) iteration_on_checkpoint += 1 print(datacount, len(data_loader), accumulated_loss) checkpoint_average_loss = accumulated_loss / float( datacount * batchsize) checkpoint_average_error = accumulated_error / float( datacount * batchsize) if checkpoint_average_loss < best_loss: best_loss = checkpoint_average_loss best_loss_iter = latest best_loss_ckpt = ckpt if checkpoint_average_error < best_error: best_error = checkpoint_average_error best_error_iter = latest print("current loss", checkpoint_average_loss) print("best_loss", best_loss) coil_logger.add_message( 'Iterating', { 'Summary': { 'Error': checkpoint_average_error, 'Loss': checkpoint_average_loss, 'BestError': best_error, 'BestLoss': best_loss, 'BestLossCheckpoint': best_loss_iter, 'BestErrorCheckpoint': best_error_iter }, 'Checkpoint': latest }, latest) latest += 2000 coil_logger.add_message('Finished', {}) print("Best Validation Loss ckpt:", best_loss_ckpt) # TODO: DO ALL THE AMAZING LOGGING HERE, as a way to very the status in paralell. # THIS SHOULD BE AN INTERELY PARALLEL PROCESS except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12): """ The main training function. This functions loads the latest checkpoint for a given, exp_batch (folder) and exp_alias (experiment configuration). With this checkpoint it starts from the beginning or continue some training. Args: gpu: The GPU number exp_batch: the folder with the experiments exp_alias: the alias, experiment name suppress_output: if the output are going to be saved on a file number_of_workers: the number of threads used for data loading Returns: None """ try: # We set the visible cuda devices to select the GPU os.environ["CUDA_VISIBLE_DEVICES"] = gpu g_conf.VARIABLE_WEIGHT = {} # At this point the log file with the correct naming is created. # You merge the yaml file with the global configuration structure. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') # Set the process into loading status. coil_logger.add_message('Loading', {'GPU': gpu}) # Put the output to a separate file if it is the case if suppress_output: if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join('_output_logs', exp_alias + '_err_'+g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if coil_logger.check_finish('train'): coil_logger.add_message('Finished', {}) return # Preload option if g_conf.PRELOAD_MODEL_ALIAS is not None: checkpoint = torch.load(os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH, g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints', str(g_conf.PRELOAD_MODEL_CHECKPOINT)+'.pth')) # Get the latest checkpoint to be loaded # returns none if there are no checkpoints saved for this model checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file is not None: checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] else: iteration = 0 best_loss = 10000.0 best_loss_iter = 0 # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) # By instantiating the augmenter we get a callable that augment images and transform them # into tensors. augmenter = Augmenter(g_conf.AUGMENTATION) # Instantiate the class used to read a dataset. The coil dataset generator # can be found dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=str(g_conf.NUMBER_OF_HOURS) + 'hours_' + g_conf.TRAIN_DATASET_NAME) print ("Loaded dataset") data_loader = select_balancing_strategy(dataset, iteration, number_of_workers) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model.cuda() optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE) # Set ERFnet for segmentation model_erf = ERFNet(20) model_erf = torch.nn.DataParallel(model_erf) model_erf = model_erf.cuda() print("LOAD ERFNet") def load_my_state_dict(model, state_dict): #custom function to load model when not all dict elements own_state = model.state_dict() for name, param in state_dict.items(): if name not in own_state: continue own_state[name].copy_(param) return model model_erf = load_my_state_dict(model_erf, torch.load(os.path.join('trained_models/erfnet_pretrained.pth'))) model_erf.eval() print ("ERFNet and weights LOADED successfully") if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) accumulated_time = checkpoint['total_time'] loss_window = coil_logger.recover_loss_window('train', iteration) else: # We accumulate iteration time and keep the average speed accumulated_time = 0 loss_window = [] print ("Before the loss") criterion = Loss(g_conf.LOSS_FUNCTION) # Loss time series window for data in data_loader: # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times, # add a stop on the _logs folder that is going to be read by this process if g_conf.FINISH_ON_VALIDATION_STALE is not None and \ check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE): break """ #################################### Main optimization loop #################################### """ iteration += 1 if iteration % 1000 == 0: adjust_learning_rate_auto(optimizer, loss_window) # get the control commands from float_data, size = [120,1] capture_time = time.time() controls = data['directions'] # The output(branches) is a list of 5 branches results, each branch is with size [120,3] model.zero_grad() # print("Segmentation") # use ERFNet to convert RGB to Segmentation rgbs = data['rgb'] filenames = data['rgb_name'] # # seg one by one # seg_road = [] # seg_not_road = [] # i = 0 # for inputs in rgbs: # inputs = inputs.unsqueeze(0) # # print("inputs ",inputs.shape) # with torch.no_grad(): # outputs = model_erf(inputs) # label = outputs[0].max(0)[1].byte().cpu().data # road = (label == 0) # not_road = (label != 0) # seg_road.append(road) # seg_not_road.append(not_road) # # # print("label ",label.shape) # # label_color = Colorize()(label.unsqueeze(0)) # # filename = filenames[i] # # filenameSave = "./save_color/" + filename.split("CoILTrain/")[1] # # os.makedirs(os.path.dirname(filenameSave), exist_ok=True) # # label_save = ToPILImage()(label_color) # # label_save.save(filenameSave) # # # print (i, filenameSave) # # i += 1 # seg_road = torch.stack(seg_road) # seg_not_road = torch.stack(seg_not_road) # seg = torch.stack([seg_road,seg_not_road]).transpose(0,1).float() # # print(seg.shape) # seg batch with torch.no_grad(): outputs = model_erf(rgbs) # print("outputs.shape ",outputs.shape) labels = outputs.max(1)[1].byte().cpu().data # print("labels.shape",labels.shape) # print(np.unique(labels[0])) seg_road = (labels==0) seg_not_road = (labels!=0) seg = torch.stack((seg_road,seg_not_road),1).float() # save 1st batch's segmentation results if iteration == 1: for i in range(120): label = seg[i,0,:,:] label_color = Colorize()(label.unsqueeze(0)) filenameSave = "./save_color/batch_road_mask/%d.png"%(i) os.makedirs(os.path.dirname(filenameSave), exist_ok=True) label_save = ToPILImage()(label_color) label_save.save(filenameSave) label = labels[i,:,:] label_color = Colorize()(label.unsqueeze(0)) filenameSave = "./save_color/batch_road/%d.png"%(i) os.makedirs(os.path.dirname(filenameSave), exist_ok=True) label_save = ToPILImage()(label_color) label_save.save(filenameSave) branches = model(torch.squeeze(seg).cuda(), dataset.extract_inputs(data).cuda()) # branches = model(torch.squeeze(rgbs.cuda()), # dataset.extract_input(data).cuda()) loss_function_params = { 'branches': branches, 'targets': dataset.extract_targets(data).cuda(), 'controls': controls.cuda(), 'inputs': dataset.extract_inputs(data).cuda(), 'branch_weights': g_conf.BRANCH_LOSS_WEIGHT, 'variable_weights': g_conf.VARIABLE_WEIGHT } loss, _ = criterion(loss_function_params) loss.backward() optimizer.step() """ #################################### Saving the model if necessary #################################### """ if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save(state, os.path.join('_logs', exp_batch, exp_alias , 'checkpoints', str(iteration) + '.pth')) """ ################################################ Adding tensorboard logs. Making calculations for logging purposes. These logs are monitored by the printer module. ################################################# """ coil_logger.add_scalar('Loss', loss.data, iteration) coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration) if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration # Log a random position position = random.randint(0, len(data) - 1) output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(data).cuda()) accumulated_time += time.time() - capture_time coil_logger.add_message('Iterating', {'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets(data)[ position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data)[ position].data.tolist()}, iteration) loss_window.append(loss.data.tolist()) coil_logger.write_on_error_csv('train', loss.data) print("Iteration: %d Loss: %f" % (iteration, loss.data)) coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except RuntimeError as e: coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
def execute(gpu, exp_batch, exp_alias): # We set the visible cuda devices try: os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') coil_logger.add_message('Loading', {'GPU': gpu}) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if monitorer.get_status(exp_batch, exp_alias + '.yaml', g_conf.PROCESS_NAME)[0] == "Finished": # TODO: print some cool summary or not ? return #Define the dataset. This structure is has the __get_item__ redefined in a way #that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) #augmenter_cpu = iag.AugmenterCPU(g_conf.AUGMENTATION_SUITE_CPU) dataset = CoILDataset(full_dataset, transform=transforms.Compose( [transforms.ToTensor()])) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. sampler = BatchSequenceSampler( splitter.control_steer_split(dataset.measurements, dataset.meta_data), g_conf.BATCH_SIZE, g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. # TODO: batch size an number of workers go to some configuration file data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler, shuffle=False, num_workers=12, pin_memory=False) # By instanciating the augmenter we get a callable that augment images and transform them # into tensors. st = lambda aug: iag.Sometimes(aug, 0.4) oc = lambda aug: iag.Sometimes(aug, 0.3) rl = lambda aug: iag.Sometimes(aug, 0.09) augmenter = iag.Augmenter([iag.ToGPU()] + [ rl(iag.GaussianBlur( (0, 1.5))), # blur images with a sigma between 0 and 1.5 rl( iag.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05), per_channel=0.5)), # add gaussian noise to images oc(iag.Dropout((0.0, 0.10), per_channel=0.5) ), # randomly remove up to X% of the pixels oc( iag.CoarseDropout( (0.0, 0.10), size_percent=(0.08, 0.2), per_channel=0.5) ), # randomly remove up to X% of the pixels oc(iag.Add((-40, 40), per_channel=0.5) ), # change brightness of images (by -X to Y of original value) st(iag.Multiply((0.10, 2), per_channel=0.2) ), # change brightness of images (X-Y% of original value) rl(iag.ContrastNormalization(( 0.5, 1.5), per_channel=0.5)), # improve or worsen the contrast rl(iag.Grayscale((0.0, 1))), # put grayscale ] # do all of the above in random order ) # augmenter = iag.Augmenter(g_conf.AUGMENTATION_SUITE) # TODO: here there is clearly a posibility to make a cool "conditioning" system. model = CoILModel(g_conf.MODEL_NAME) model.cuda() print(model) criterion = Loss() # TODO: DATASET SIZE SEEMS WEIRD optimizer = optim.Adam(model.parameters(), lr=0.0002) checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file != None: checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] accumulated_time = checkpoint['total_time'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] else: iteration = 0 best_loss = 10000.0 accumulated_time = 0 # We accumulate iteration time and keep the average speed best_loss_iter = 0 # TODO: The checkpoint will continue, so it should erase everything up to the iteration best_loss_save = 10000.0 best_loss_save_iter = 0 curr_loss_save = 0.0 print(dataset.meta_data) print(model) capture_time = time.time() model.train() for data in data_loader: input_data, float_data = data #TODO, ADD ITERATION SCHEDULE input_rgb_data = augmenter(0, input_data['rgb']) augment_for_controls = 1 adjustlr = 1 if augment_for_controls: #and self._config.targets_names[j] == "Steer": camera_angle = float_data[:, 26, :] camera_angle = camera_angle.cuda( ) #self._config.variable_names.index('Angle'),i] print("Camera angle", camera_angle[0]) steer = float_data[:, 0, :] # print("Original", steer[0]) steer = steer.cuda() speed = float_data[:, 10, :] speed = speed.cuda() # print (steer) time_use = 1.0 car_length = 3.0 extra_factor = 2.5 threshold = 1.0 pos = camera_angle > 0.0 pos = pos.type(torch.FloatTensor) neg = camera_angle <= 0.0 neg = neg.type(torch.FloatTensor) pos = pos.cuda() neg = neg.cuda() rad_camera_angle = math.pi * (torch.abs(camera_angle)) / 180.0 val = extra_factor * (torch.atan( (rad_camera_angle * car_length) / (time_use * speed + 0.05))) / 3.1415 # print(val) steer -= pos * torch.min(val, torch.tensor([0.6]).cuda()) steer += neg * torch.min(val, torch.tensor([0.6]).cuda()) print("val", val[0]) print("speed", speed[0]) steer = steer.cpu() float_data[:, 0, :] = steer float_data[:, 0, :][float_data[:, 0, :] > 1.0] = 1.0 float_data[:, 0, :][float_data[:, 0, :] < -1.0] = -1.0 #coil_logger.add_images(input_rgb_data) # get the control commands from float_data, size = [120,1] controls = float_data[:, dataset.controls_position(), :] # print(" CONTROLS ", controls.shape) # The output(branches) is a list of 5 branches results, each branch is with size [120,3] model.zero_grad() # print ( 'INPUTS', dataset.extract_inputs(float_data).shape ) branches = model(input_rgb_data, dataset.extract_inputs(float_data).cuda()) #print ("len ",len(branches)) #targets = torch.cat([steer_gt, gas_gt, brake_gt], 1) # print ("Extracted targets ", dataset.extract_targets(float_data).shape[0]) loss = criterion.MSELoss( branches, dataset.extract_targets(float_data).cuda(), controls.cuda(), dataset.extract_inputs(float_data).cuda()) # TODO: All these logging things could go out to clean up the main if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration curr_loss_save += loss.data # Log a random position position = random.randint(0, len(float_data) - 1) output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(float_data).cuda()) # TODO: For now we are computing the error for just the correct branch, it could be multi- branch, coil_logger.add_scalar('Loss', loss.data, iteration) loss.backward() optimizer.step() accumulated_time += time.time() - capture_time capture_time = time.time() # TODO: Get only the float_data that are actually generating output # TODO: itearation is repeating , and that is dumb coil_logger.add_message( 'Iterating', { 'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'BestLossSave': best_loss_save, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets( float_data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(float_data)[position].data.tolist() }, iteration) # TODO: For now we are computing the error for just the correct branch, it could be multi-branch, # TODO: save also the optimizer state dictionary if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'best_loss_iter': best_loss_iter } # TODO : maybe already summarize the best model ??? torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) print("before best save") if iteration % 5 == 0 and iteration > 4: curr_loss_save /= 5000.0 if curr_loss_save < best_loss_save: best_loss_save = curr_loss_save curr_loss_save = 0 state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss_save, 'total_time': accumulated_time, 'best_loss_iter': best_loss_save_iter } # TODO : maybe already summarize the best model ??? torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'best_loss_save' + '.pth')) print("after best save") if iteration == best_loss_iter: state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'best_loss_iter': best_loss_iter } # TODO : maybe already summarize the best model ??? torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'best_loss' + '.pth')) iteration += 1 if adjustlr and iteration % 1000: adjust_learning_rate(optimizer, iteration) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output=True, yaml_file=None): latest = None # try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias+'.yaml') if yaml_file is not None: path_to_yaml_file = os.path.join(yaml_file, exp_alias+'.yaml') merge_with_yaml(path_to_yaml_file) # The validation dataset is always fully loaded, so we fix a very high number of hours # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code """ # check again if this segment is required or not set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join('_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) """ # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. dataset_name = dataset_name.split('_')[-1] # since preload file has '<X>hours_' as prefix whereas dataset folder does not full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) # original code augmenter = Augmenter(None) print ('full dataset path: ', full_dataset) dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=args.dataset_name) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader(dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) """ removing this segment to simplify code # The window used to keep track of the trainings l1_window = [] latest = get_latest_evaluated_checkpoint() if latest is not None: # When latest is noe l1_window = coil_logger.recover_loss_window(dataset_name, None) """ model.cuda() best_mse = 1000 best_error = 1000 best_mse_iter = 0 best_error_iter = 0 # modified validation code from here to run a single model checkpoint = torch.load(args.checkpoint) checkpoint_iteration = checkpoint['iteration'] print("model loaded ", checkpoint_iteration) model.load_state_dict(checkpoint['state_dict']) model.eval() accumulated_mse = 0 accumulated_error = 0 iteration_on_checkpoint = 0 print ('data_loader size: ', len(data_loader)) total_error = [] for data in data_loader: # Compute the forward pass on a batch from the loaded dataset controls = data['directions'] branches = model(torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda()) output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(data).cuda()) total_error += error.detach().cpu().tolist() iteration_on_checkpoint += 1 if iteration_on_checkpoint % 50 == 0: print ('iteration: ', iteration_on_checkpoint) total_error = np.array(total_error) print (len(total_error), total_error.shape) np.save(os.path.join(args.save_path, args.dataset_name, 'computed_error.npy'), total_error) '''
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12): """ The main training function. This functions loads the latest checkpoint for a given, exp_batch (folder) and exp_alias (experiment configuration). With this checkpoint it starts from the beginning or continue some training. Args: gpu: The GPU number exp_batch: the folder with the experiments exp_alias: the alias, experiment name suppress_output: if the output are going to be saved on a file number_of_workers: the number of threads used for data loading Returns: None """ try: # We set the visible cuda devices to select the GPU os.environ["CUDA_VISIBLE_DEVICES"] = gpu g_conf.VARIABLE_WEIGHT = {} # At this point the log file with the correct naming is created. # You merge the yaml file with the global configuration structure. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') # Set the process into loading status. coil_logger.add_message('Loading', {'GPU': gpu}) # Seed RNGs torch.manual_seed(g_conf.MAGICAL_SEED) random.seed(g_conf.MAGICAL_SEED) # Put the output to a separate file if it is the case if suppress_output: if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if coil_logger.check_finish('train'): coil_logger.add_message('Finished', {}) return # Preload option if g_conf.PRELOAD_MODEL_ALIAS is not None: checkpoint = torch.load( os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH, g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints', str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth')) # Get the latest checkpoint to be loaded # returns none if there are no checkpoints saved for this model checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file is not None: checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] else: iteration = 0 best_loss = 10000.0 best_loss_iter = 0 # Define the dataset. # Can specify a list of training datasets or just a single training dataset if len(g_conf.TRAIN_DATASET_NAMES) == 0: train_dataset_list = [g_conf.TRAIN_DATASET_NAME] else: train_dataset_list = g_conf.TRAIN_DATASET_NAMES full_dataset = [ os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) for dataset_name in train_dataset_list ] # By instantiating the augmenter we get a callable that augment images and transform them # into tensors. augmenter = Augmenter(g_conf.AUGMENTATION) # Instantiate the class used to read a dataset. The coil dataset generator # can be found dataset = CoILDataset(full_dataset, transform=augmenter, preload_names=[ str(g_conf.NUMBER_OF_HOURS) + 'hours_' + dataset_name for dataset_name in train_dataset_list ], train_dataset=True) print("Loaded dataset") # Create dataloader, model, and optimizer data_loader = select_balancing_strategy(dataset, iteration, number_of_workers) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model.cuda() optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE) # If we have a previous checkpoint, load model, optimizer, and record of previous # train loss values (used for the learning rate schedule) if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) accumulated_time = checkpoint['total_time'] loss_window = coil_logger.recover_loss_window('train', iteration) else: # We accumulate iteration time and keep the average speed accumulated_time = 0 loss_window = [] print("Before the loss") # Define control loss function criterion = Loss(g_conf.LOSS_FUNCTION) if iteration == 0 and is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) # Training loop for data in data_loader: # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times, # add a stop on the _logs folder that is going to be read by this process if g_conf.FINISH_ON_VALIDATION_STALE is not None and \ check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE): break """ #################################### Main optimization loop #################################### """ iteration += 1 # Adjust learning rate based on training loss if iteration % 1000 == 0: adjust_learning_rate_auto(optimizer, loss_window) capture_time = time.time() model.zero_grad() controls = data['directions'] # Run model forward and get outputs # First case corresponds to training squeeze network, second case corresponds to training driving model without # mimicking losses, last case corresponds to training mimic network if "seg" in g_conf.SENSORS.keys(): branches = model(data, dataset.extract_inputs(data).cuda(), dataset.extract_intentions(data).cuda()) elif not g_conf.USE_REPRESENTATION_LOSS: branches = model(data, dataset.extract_inputs(data).cuda()) else: branches, intermediate_reps = model( data, dataset.extract_inputs(data).cuda()) # Compute control loss targets_to_use = dataset.extract_targets(data) loss_function_params = { 'branches': branches, 'targets': targets_to_use.cuda(), 'controls': controls.cuda(), 'inputs': dataset.extract_inputs(data).cuda(), 'branch_weights': g_conf.BRANCH_LOSS_WEIGHT, 'variable_weights': g_conf.VARIABLE_WEIGHT } loss, _ = criterion(loss_function_params) # Compute mimicking loss if g_conf.USE_REPRESENTATION_LOSS: expert_reps = dataset.extract_representations(data) # Seg mask mimicking loss if g_conf.USE_PERCEPTION_REP_LOSS: perception_rep_loss_elementwise = ( intermediate_reps[0] - expert_reps[0].cuda())**2 perception_rep_loss = g_conf.PERCEPTION_REP_WEIGHT * torch.sum( perception_rep_loss_elementwise) / branches[0].shape[0] else: perception_rep_loss = torch.tensor(0.).cuda() # Speed mimicking loss if g_conf.USE_SPEED_REP_LOSS: speed_rep_loss_elementwise = (intermediate_reps[1] - expert_reps[1].cuda())**2 speed_rep_loss = g_conf.SPEED_REP_WEIGHT * torch.sum( speed_rep_loss_elementwise) / branches[0].shape[0] else: speed_rep_loss = torch.tensor(0.).cuda() # Stop intentions mimicking loss if g_conf.USE_INTENTION_REP_LOSS: intentions_rep_loss_elementwise = ( intermediate_reps[2] - expert_reps[2].cuda())**2 intentions_rep_loss = g_conf.INTENTIONS_REP_WEIGHT * torch.sum( intentions_rep_loss_elementwise) / branches[0].shape[0] else: intentions_rep_loss = torch.tensor(0.).cuda() rep_loss = g_conf.REP_LOSS_WEIGHT * ( perception_rep_loss + speed_rep_loss + intentions_rep_loss) overall_loss = loss + rep_loss else: overall_loss = loss overall_loss.backward() optimizer.step() """ #################################### Saving the model if necessary #################################### """ if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) """ ################################################ Adding tensorboard logs. Making calculations for logging purposes. These logs are monitored by the printer module. ################################################# """ coil_logger.add_scalar('Loss', loss.data, iteration) if g_conf.USE_REPRESENTATION_LOSS: coil_logger.add_scalar('Perception Rep Loss', perception_rep_loss.data, iteration) coil_logger.add_scalar('Speed Rep Loss', speed_rep_loss.data, iteration) coil_logger.add_scalar('Intentions Rep Loss', intentions_rep_loss.data, iteration) coil_logger.add_scalar('Overall Rep Loss', rep_loss.data, iteration) coil_logger.add_scalar('Total Loss', overall_loss.data, iteration) if 'rgb' in data: coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration) if overall_loss.data < best_loss: best_loss = overall_loss.data.tolist() best_loss_iter = iteration # Log a random position position = random.randint(0, len(data) - 1) output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - targets_to_use.cuda()) accumulated_time += time.time() - capture_time # Log to terminal and log file if g_conf.USE_REPRESENTATION_LOSS: coil_logger.add_message( 'Iterating', { 'Iteration': iteration, 'Loss': overall_loss.data.tolist(), 'Control Loss': loss.data.tolist(), 'Rep Loss': rep_loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': targets_to_use[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data)[position].data.tolist() }, iteration) else: coil_logger.add_message( 'Iterating', { 'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': targets_to_use[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data)[position].data.tolist() }, iteration) # Save training loss history (useful for restoring training runs since learning rate is adjusted # based on training loss) loss_window.append(overall_loss.data.tolist()) coil_logger.write_on_error_csv('train', overall_loss.data) print("Iteration: %d Loss: %f" % (iteration, overall_loss.data)) coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except RuntimeError as e: coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output): latest = None try: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(gpu) # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias+'.yaml')) # The validation dataset is always fully loaded, so we fix a very high number of hours g_conf.NUMBER_OF_HOURS = 10000 set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join('_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=dataset_name) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader(dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) # The window used to keep track of the trainings l1_window = [] latest = get_latest_evaluated_checkpoint() if latest is not None: # When latest is noe l1_window = coil_logger.recover_loss_window(dataset_name, None) model.cuda() best_mse = 1000 best_error = 1000 best_mse_iter = 0 best_error_iter = 0 while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias , 'checkpoints', str(latest) + '.pth')) checkpoint_iteration = checkpoint['iteration'] print("Validation loaded ", checkpoint_iteration) model.load_state_dict(checkpoint['state_dict']) model.eval() if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) accumulated_mse = 0 accumulated_error = 0 iteration_on_checkpoint = 0 for data in data_loader: # Compute the forward pass on a batch from the validation dataset controls = data['directions'] if torch.cuda.device_count() > 1: output = model.module.forward_branch(torch.squeeze(data['rgb']).cuda(), dataset.extract_inputs(data).cuda(), controls) else: output = model.forward_branch(torch.squeeze(data['rgb']).cuda(), dataset.extract_inputs(data).cuda(), controls) # It could be either waypoints or direct control if 'waypoint1_angle' in g_conf.TARGETS: write_waypoints_output(checkpoint_iteration, output) else: write_regular_output(checkpoint_iteration, output) mse = torch.mean((output - dataset.extract_targets(data).cuda())**2).data.tolist() mean_error = torch.mean( torch.abs(output - dataset.extract_targets(data).cuda())).data.tolist() accumulated_error += mean_error accumulated_mse += mse error = torch.abs(output - dataset.extract_targets(data).cuda()) # Log a random position position = random.randint(0, len(output.data.tolist())-1) coil_logger.add_message('Iterating', {'Checkpoint': latest, 'Iteration': (str(iteration_on_checkpoint*120)+'/'+str(len(dataset))), 'MeanError': mean_error, 'MSE': mse, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets(data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data)[position].data.tolist()}, latest) iteration_on_checkpoint += 1 print("Iteration %d on Checkpoint %d : Error %f" % (iteration_on_checkpoint, checkpoint_iteration, mean_error)) """ ######## Finish a round of validation, write results, wait for the next ######## """ checkpoint_average_mse = accumulated_mse/(len(data_loader)) checkpoint_average_error = accumulated_error/(len(data_loader)) coil_logger.add_scalar('Loss', checkpoint_average_mse, latest, True) coil_logger.add_scalar('Error', checkpoint_average_error, latest, True) if checkpoint_average_mse < best_mse: best_mse = checkpoint_average_mse best_mse_iter = latest if checkpoint_average_error < best_error: best_error = checkpoint_average_error best_error_iter = latest coil_logger.add_message('Iterating', {'Summary': { 'Error': checkpoint_average_error, 'Loss': checkpoint_average_mse, 'BestError': best_error, 'BestMSE': best_mse, 'BestMSECheckpoint': best_mse_iter, 'BestErrorCheckpoint': best_error_iter }, 'Checkpoint': latest}, latest) l1_window.append(checkpoint_average_error) coil_logger.write_on_error_csv(dataset_name, checkpoint_average_error) # If we are using the finish when validation stops, we check the current if g_conf.FINISH_ON_VALIDATION_STALE is not None: if dlib.count_steps_without_decrease(l1_window) > 3 and \ dlib.count_steps_without_decrease_robust(l1_window) > 3: coil_logger.write_stop(dataset_name, latest) break else: latest = get_latest_evaluated_checkpoint() time.sleep(1) # print ('checkpoint: ', latest) coil_logger.add_message('Loading', {'Message': 'Waiting Checkpoint'}) print("Waiting for the next Validation") coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest) except RuntimeError as e: if latest is not None: coil_logger.erase_csv(latest) coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest)
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output=True, yaml_file=None): latest = None # try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias + '.yaml') if yaml_file is not None: path_to_yaml_file = os.path.join(yaml_file, exp_alias + '.yaml') merge_with_yaml(path_to_yaml_file) # The validation dataset is always fully loaded, so we fix a very high number of hours # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code """ # commenting out this segment to simplify code set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join('_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) """ # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) # original code augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name print('full dataset path: ', full_dataset) dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=dataset_name) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) """ removing this segment to simplify code # The window used to keep track of the trainings l1_window = [] latest = get_latest_evaluated_checkpoint() if latest is not None: # When latest is noe l1_window = coil_logger.recover_loss_window(dataset_name, None) """ model.cuda() best_mse = 1000 best_error = 1000 best_mse_iter = 0 best_error_iter = 0 # modified validation code from here to run a single model # checkpoint = torch.load(os.path.join(g_conf.VALIDATION_CHECKPOINT_PATH # , 'checkpoints', g_conf.VALIDATION_CHECKPOINT_ITERATION + '.pth')) checkpoint = torch.load(args.checkpoint) checkpoint_iteration = checkpoint['iteration'] print("model loaded ", checkpoint_iteration) model.load_state_dict(checkpoint['state_dict']) model.train() accumulated_mse = 0 accumulated_error = 0 iteration_on_checkpoint = 0 print('data_loader size: ', len(data_loader)) total_var = [] for data in data_loader: # dataloader directly loads the saved activations # Compute the forward pass on a batch from the validation dataset controls = data['directions'] curr_var = [] for i in range(100): output = model.branches(data['activation'].cuda()) output_vec = model.extract_branch(torch.stack(output), controls) curr_var.append(output_vec.detach().cpu().numpy()) curr_var = np.array(curr_var) compute_var = np.var(curr_var, axis=0) total_var += compute_var.tolist() iteration_on_checkpoint += 1 if iteration_on_checkpoint % 50 == 0: print('iteration: ', iteration_on_checkpoint) total_var = np.array(total_var) print(len(total_var), total_var.shape) # save the computed variance array, this would be used for uncertainty based sampling in 'tools/filter_dagger_data_var.py' np.save( os.path.join(args.save_path, args.dataset_name, 'computed_var.npy'), total_var)
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12): """ The main training function. This functions loads the latest checkpoint for a given, exp_batch (folder) and exp_alias (experiment configuration). With this checkpoint it starts from the beginning or continue some training. Args: gpu: gpus ids for training exp_batch: the folder with the experiments exp_alias: the alias, experiment name suppress_output: if the output are going to be saved on a file number_of_workers: the number of threads used for data loading Returns: None """ try: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(gpu) g_conf.VARIABLE_WEIGHT = {} # At this point the log file with the correct naming is created. # You merge the yaml file with the global configuration structure. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') # Set the process into loading status. coil_logger.add_message('Loading', {'GPU': gpu}) # Put the output to a separate file if it is the case if suppress_output: if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if coil_logger.check_finish('train'): coil_logger.add_message('Finished', {}) return # Preload option if g_conf.PRELOAD_MODEL_ALIAS is not None: checkpoint = torch.load( os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH, g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints', str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth')) # Get the latest checkpoint to be loaded # returns none if there are no checkpoints saved for this model checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file is not None: checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] print('iteration: ', iteration, 'best_loss: ', best_loss) else: iteration = 0 best_loss = 10000.0 best_loss_iter = 0 # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) # By instantiating the augmenter we get a callable that augment images and transform them into tensors. augmenter = Augmenter(g_conf.AUGMENTATION) # Instantiate the class used to read the dataset dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=str(g_conf.NUMBER_OF_HOURS) + 'hours_' + g_conf.TRAIN_DATASET_NAME) print("Loaded dataset") # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # define the sampling strategy for mini-batch, different samplers can be found in 'splitter.py' data_loader = select_balancing_strategy(dataset, iteration, number_of_workers) # Instatiate the network architecture model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model.cuda() optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE ) # adabound and adamio can also be used here if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) accumulated_time = checkpoint['total_time'] loss_window = coil_logger.recover_loss_window('train', iteration) else: # We accumulate iteration time and keep the average speed accumulated_time = 0 loss_window = [] # freeze the perception module weights if required # for m in model.perception.parameters(): # m.requires_grad = False # total trainable parameters model_parameters = filter(lambda p: p.requires_grad, model.parameters()) total_params = sum([np.prod(p.size()) for p in model_parameters]) print('trainable parameters: ', total_params) # multi-gpu print('number of gpus: ', torch.cuda.device_count()) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) criterion = Loss(g_conf.LOSS_FUNCTION) print('Start Training') st = time.time() for data in data_loader: # use this for early stopping if the validation loss is not coming down if g_conf.FINISH_ON_VALIDATION_STALE is not None and \ check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE): break """ #################################### Main optimization loop #################################### """ iteration += 1 if iteration % 1000 == 0: adjust_learning_rate_auto(optimizer, loss_window) # additional learning rate scheduler - cyclic cosine annealing (https://arxiv.org/pdf/1704.00109.pdf) # adjust_learning_rate_cosine_annealing(optimizer, loss_window, iteration) capture_time = time.time() controls = data['directions'] model.zero_grad() branches = model(torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda()) loss_function_params = { 'branches': branches, 'targets': dataset.extract_targets(data).cuda(), 'controls': controls.cuda(), 'inputs': dataset.extract_inputs(data).cuda(), 'branch_weights': g_conf.BRANCH_LOSS_WEIGHT, 'variable_weights': g_conf.VARIABLE_WEIGHT } loss, _ = criterion(loss_function_params) loss.backward() optimizer.step() """ #################################### Saving the model if necessary #################################### """ if is_ready_to_save(iteration): if torch.cuda.device_count() > 1: state_dict_save = model.module.state_dict() else: state_dict_save = model.state_dict() state = { 'iteration': iteration, 'state_dict': state_dict_save, 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) """ ################################################ Adding tensorboard logs. Making calculations for logging purposes. These logs are monitored by the printer module. ################################################# """ coil_logger.add_scalar('Loss', loss.data, iteration) coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration) if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration # Log a random position position = random.randint(0, len(data) - 1) if torch.cuda.device_count() > 1: output = model.module.extract_branch( torch.stack(branches[0:4]), controls) else: output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(data).cuda()) accumulated_time += time.time() - capture_time coil_logger.add_message( 'Iterating', { 'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets(data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data)[position].data.tolist() }, iteration) loss_window.append(loss.data.tolist()) coil_logger.write_on_error_csv('train', loss.data) print("Iteration: %d Loss: %f" % (iteration, loss.data)) st = time.time() coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except RuntimeError as e: coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
def execute(gpu, exp_batch, exp_alias, suppress_output=True): # We set the visible cuda devices # TODO: probable race condition, the train has to be started before. try: os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') coil_logger.add_message('Loading', {'GPU': gpu}) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') # Put the output to a separate file if suppress_output: sys.stdout = open(os.path.join( '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file is not None: checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] else: iteration = 0 best_loss = 10000.0 best_loss_iter = 0 # TODO: The checkpoint will continue, so it should erase everything up to the iteration on tensorboard # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HD_FILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) # augmenter_cpu = iag.AugmenterCPU(g_conf.AUGMENTATION_SUITE_CPU) # By instanciating the augmenter we get a callable that augment images and transform them # into tensors. augmenter = Augmenter(g_conf.AUGMENTATION) dataset = CoILDataset(full_dataset, transform=augmenter) data_loader = select_balancing_strategy(dataset, iteration) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model.cuda() if checkpoint_file is not None: model.load_state_dict(checkpoint['state_dict']) print(model) criterion = Loss(g_conf.LOSS_FUNCTION) optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE) print(dataset.meta_data) print(model) if checkpoint_file is not None: accumulated_time = checkpoint['total_time'] else: accumulated_time = 0 # We accumulate iteration time and keep the average speed #TODO: test experiment continuation. Is the data sampler going to continue were it started.. ? capture_time = time.time() for data in data_loader: input_data, float_data = data # get the control commands from float_data, size = [120,1] controls = float_data[:, dataset.controls_position(), :] # The output(branches) is a list of 5 branches results, each branch is with size [120,3] model.zero_grad() branches = model(torch.squeeze(input_data['rgb'].cuda()), dataset.extract_inputs(float_data).cuda()) loss = criterion(branches, dataset.extract_targets(float_data).cuda(), controls.cuda(), dataset.extract_inputs(float_data).cuda(), branch_weights=g_conf.BRANCH_LOSS_WEIGHT, variable_weights=g_conf.VARIABLE_WEIGHT) # TODO: All these logging things could go out to clean up the main if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration # Log a random position position = random.randint(0, len(float_data) - 1) output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(float_data).cuda()) # TODO: For now we are computing the error for just the correct branch, it could be multi- branch, coil_logger.add_scalar('Loss', loss.data, iteration) coil_logger.add_image('Image', torch.squeeze(input_data['rgb']), iteration) loss.backward() optimizer.step() accumulated_time += time.time() - capture_time capture_time = time.time() # TODO: Get only the float_data that are actually generating output # TODO: itearation is repeating , and that is dumb coil_logger.add_message( 'Iterating', { 'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets( float_data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(float_data)[position].data.tolist() }, iteration) # TODO: For now we are computing the error for just the correct branch, it could be multi-branch, # TODO: save also the optimizer state dictionary if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'best_loss_iter': best_loss_iter } # TODO : maybe already summarize the best model ??? torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) iteration += 1 print(iteration) if iteration % 1000 == 0: adjust_learning_rate(optimizer, iteration) del data coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})