示例#1
0
    def test_init(self):

        # Assert for error when read on wrong place
        with self.assertRaises(ValueError):
            _ = CoILDataset("Wrong place")

        #
        dataset = CoILDataset(self.root_test_dir)

        print(len(dataset.sensor_data))
        print(dataset.sensor_data[0])
示例#2
0
    def test_get_item_augmented(self):
        # Function to test the augmentation

        if not os.path.exists(self.test_images_write_path + 'augmented'):
            os.mkdir(self.test_images_write_path + 'augmented')

        # This depends on the number of fused frames. A image could have
        # A certain number of fused frames
        dataset = CoILDataset(self.root_test_dir, transform=aug_cpu)
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=120,
                                                  shuffle=True,
                                                  num_workers=12,
                                                  pin_memory=True)
        capture_time = time.time()
        count = 0
        for data in data_loader:

            image, labels = data
            print(image['rgb'].shape)

            image_to_save = transforms.ToPILImage()(image['rgb'][0][0])

            image_to_save.save(
                os.path.join(self.test_images_write_path + 'augmented',
                             str(count) + '.png'))
            count += 1

        print("Time to load AUGMENT", time.time() - capture_time)
示例#3
0
    def test_merge_yaml_line_globaldict(self):

        g_conf.NAME = 'experiment_1'
        merge_with_yaml('configs/eccv/experiment_1.yaml')
        # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS
        set_type_of_process('train')

        print(g_conf)
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    g_conf.TRAIN_DATASET_NAME)

        # augmenter_cpu = iag.AugmenterCPU(g_conf.AUGMENTATION_SUITE_CPU)

        dataset = CoILDataset(full_dataset,
                              transform=transforms.Compose(
                                  [transforms.ToTensor()]))

        # all keys depending on the measurements and produces a set of keys for each bach.

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        # TODO: batch size an number of workers go to some configuration file
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=120,
                                                  shuffle=False,
                                                  num_workers=12,
                                                  pin_memory=True)
        # By instanciating the augmenter we get a callable that augment images and transform them
        for data in data_loader:

            a, b = data
示例#4
0
    def test_get_item_augmented_gpu(self):

        if not os.path.exists(self.test_images_write_path + 'augmented_gpu'):
            os.mkdir(self.test_images_write_path + 'augmented_gpu')
        # This depends on the number of fused frames. A image could have
        # A certain number of fused frames
        dataset = CoILDataset(self.root_test_dir,
                              transform=transforms.Compose(
                                  [transforms.ToTensor()]))
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=120,
                                                  shuffle=True,
                                                  num_workers=12,
                                                  pin_memory=True)
        capture_time = time.time()
        count = 0
        for data in data_loader:
            image, labels = data
            result = aug(image['rgb'])

            image_to_save = transforms.ToPILImage()(result[0][0].cpu())
            image_to_save.save(
                os.path.join(self.test_images_write_path + 'augmented_gpu',
                             str(count) + '.png'))
            count += 1

        print("Time to load AUGMENT", time.time() - capture_time)
示例#5
0
    def test_basic_data(self):
        # the town2-town01 data, try to load.
        g_conf.immutable(False)
        g_conf.EXPERIMENT_NAME = 'coil_icra'
        create_log_folder('sample')
        create_exp_path('sample', 'coil_icra')
        merge_with_yaml('configs/sample/coil_icra.yaml')

        set_type_of_process('train')

        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    'CoILTrain')

        dataset = CoILDataset(full_dataset,
                              transform=None,
                              preload_name=str(g_conf.NUMBER_OF_HOURS) +
                              'hours_' + g_conf.TRAIN_DATASET_NAME)
示例#6
0
    def test_town3_data(self):
        # the town3 data has different names and does not have pedestrians of vehicle stop
        # indications
        g_conf.immutable(False)
        g_conf.EXPERIMENT_NAME = 'resnet34imnet'
        create_log_folder('town03')
        create_exp_path('town03', 'resnet34imnet')
        merge_with_yaml('configs/town03/resnet34imnet.yaml')

        set_type_of_process('train')

        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    'CoILTrainTown03')

        dataset = CoILDataset(full_dataset,
                              transform=None,
                              preload_name=str(g_conf.NUMBER_OF_HOURS) +
                              'hours_' + g_conf.TRAIN_DATASET_NAME)
示例#7
0
    def test_split_real_data(self):

        root_test_dir = '/home/felipe/Datasets/CVPR02Noise/SeqTrain'

        print('SPLITING REAL DATA !')
        dataset = CoILDataset(root_test_dir)
        steerings = dataset.measurements[0, :]

        print(dataset.meta_data)
        print(dataset.meta_data[:, 0])
        print(" Where is control ",
              np.where(dataset.meta_data[:, 0] == b'control'))
        labels = dataset.measurements[np.where(
            dataset.meta_data[:, 0] == b'control'), :]

        keys = range(0, len(steerings))

        print(labels)
        splitted_labels = splitter.label_split(labels[0][0], keys,
                                               g_conf.LABELS_DIVISION)

        # Another level of splitting
        splitted_steer_labels = []
        for keys in splitted_labels:
            splitter_steer = splitter.float_split(steerings, keys,
                                                  g_conf.STEERING_DIVISION)

            print(splitter_steer)

            for i in range(0, len(splitter_steer)):
                sum_now = 0
                for key in splitter_steer[i]:
                    sum_now += steerings[key]

                avg_now = sum_now / len(splitter_steer[i])
                #print (avg_now)
                if i > 0:
                    self.assertLess(avg_previous, avg_now)

                avg_previous = avg_now

            splitted_steer_labels.append(splitter_steer)
    def test_split_real_data(self):

        root_test_dir = 'testing/unit_tests/data'

        dataset = CoILDataset(root_test_dir)
        steerings = dataset.measurements[0, :]
        print(dataset.meta_data)
        # TODO: read meta data and turn into a coool dictionary ?
        print(np.where(dataset.meta_data[:, 0] == 'control'))
        labels = dataset.measurements[np.where(
            dataset.meta_data[:, 0] == 'control'), :]
        print(labels)

        print(np.unique(labels))

        keys = range(0, len(steerings))

        splitted_labels = splitter.label_split(
            labels[0][0], keys, g_conf.param.INPUT.LABELS_DIVISION)

        print(splitted_labels)
        # Another level of splitting
        splitted_steer_labels = []
        for keys in splitted_labels:
            splitter_steer = splitter.float_split(
                steerings, keys, g_conf.param.INPUT.STEERING_DIVISION)

            print(splitter_steer)

            for i in range(0, len(splitter_steer)):
                sum_now = 0
                for key in splitter_steer[i]:
                    sum_now += steerings[key]

                avg_now = sum_now / len(splitter_steer[i])
                #print (avg_now)
                if i > 0:
                    self.assertLess(avg_previous, avg_now)

                avg_previous = avg_now

            splitted_steer_labels.append(splitter_steer)
    def test_speed_reading(self):
        if not os.path.exists(self.test_images_write_path + 'normal_steer'):
            os.mkdir(self.test_images_write_path + 'normal_steer')

        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    '1HoursW1-3-6-8')
        # This depends on the number of fused frames. A image could have
        # A certain number of fused frames

        augmenter = Augmenter(None)
        dataset = CoILDataset(full_dataset, transform=augmenter)

        keys = range(
            0,
            len(dataset.measurements[0, :]) - g_conf.NUMBER_IMAGES_SEQUENCE)
        sampler = BatchSequenceSampler(
            splitter.control_steer_split(dataset.measurements,
                                         dataset.meta_data, keys),
            0 * g_conf.BATCH_SIZE, g_conf.BATCH_SIZE,
            g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)

        # data_loader = torch.utils.data.DataLoader(dataset, batch_size=120,
        #                                          shuffle=True, num_workers=12, pin_memory=True)
        # capture_time = time.time()
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_sampler=sampler,
                                                  shuffle=False,
                                                  num_workers=12,
                                                  pin_memory=True)

        count = 0
        print('len ', len(data_loader))
        max_steer = 0
        for data in data_loader:
            print(count)
            image, labels = data

            count += 1
        print("MAX STEER ", max_steer)
示例#10
0
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output):
    latest = None
    try:
        # We set the visible cuda devices
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        # The validation dataset is always fully loaded, so we fix a very high number of hours
        g_conf.NUMBER_OF_HOURS = 10000
        set_type_of_process('validation', dataset_name)

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        if suppress_output:
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the HDFILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    dataset_name)
        augmenter = Augmenter(None)
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=dataset_name)

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=g_conf.BATCH_SIZE,
            shuffle=False,
            num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
            pin_memory=True)

        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)

        # Set ERFnet for segmentation
        model_erf = ERFNet(20)
        model_erf = torch.nn.DataParallel(model_erf)
        model_erf = model_erf.cuda()

        print("LOAD ERFNet - validate")

        def load_my_state_dict(
            model, state_dict
        ):  #custom function to load model when not all dict elements
            own_state = model.state_dict()
            for name, param in state_dict.items():
                if name not in own_state:
                    continue
                own_state[name].copy_(param)
            return model

        model_erf = load_my_state_dict(
            model_erf,
            torch.load(os.path.join('trained_models/erfnet_pretrained.pth')))
        model_erf.eval()
        print("ERFNet and weights LOADED successfully")

        # The window used to keep track of the trainings
        l1_window = []
        latest = get_latest_evaluated_checkpoint()
        if latest is not None:  # When latest is noe
            l1_window = coil_logger.recover_loss_window(dataset_name, None)

        model.cuda()

        best_mse = 1000
        best_error = 1000
        best_mse_iter = 0
        best_error_iter = 0

        while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

            if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

                latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

                checkpoint = torch.load(
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(latest) + '.pth'))
                checkpoint_iteration = checkpoint['iteration']
                print("Validation loaded ", checkpoint_iteration)

                model.load_state_dict(checkpoint['state_dict'])

                model.eval()
                accumulated_mse = 0
                accumulated_error = 0
                iteration_on_checkpoint = 0
                for data in data_loader:

                    # Compute the forward pass on a batch from  the validation dataset
                    controls = data['directions']

                    # Seg batch
                    rgbs = data['rgb']
                    with torch.no_grad():
                        outputs = model_erf(rgbs)
                    labels = outputs.max(1)[1].byte().cpu().data

                    seg_road = (labels == 0)
                    seg_not_road = (labels != 0)
                    seg = torch.stack((seg_road, seg_not_road), 1).float()

                    output = model.forward_branch(
                        torch.squeeze(seg).cuda(),
                        dataset.extract_inputs(data).cuda(), controls)

                    #                    output = model.foward_branch(torch.squeeze(rgbs).cuda(),
                    #                                                 dataset.extract_inputs(data).cuda(),controls)
                    # It could be either waypoints or direct control
                    if 'waypoint1_angle' in g_conf.TARGETS:
                        write_waypoints_output(checkpoint_iteration, output)
                    else:
                        write_regular_output(checkpoint_iteration, output)

                    mse = torch.mean(
                        (output - dataset.extract_targets(data).cuda()
                         )**2).data.tolist()
                    mean_error = torch.mean(
                        torch.abs(output -
                                  dataset.extract_targets(data).cuda())
                    ).data.tolist()

                    accumulated_error += mean_error
                    accumulated_mse += mse
                    error = torch.abs(output -
                                      dataset.extract_targets(data).cuda())

                    # Log a random position
                    position = random.randint(0, len(output.data.tolist()) - 1)

                    coil_logger.add_message(
                        'Iterating', {
                            'Checkpoint':
                            latest,
                            'Iteration': (str(iteration_on_checkpoint * 120) +
                                          '/' + str(len(dataset))),
                            'MeanError':
                            mean_error,
                            'MSE':
                            mse,
                            'Output':
                            output[position].data.tolist(),
                            'GroundTruth':
                            dataset.extract_targets(
                                data)[position].data.tolist(),
                            'Error':
                            error[position].data.tolist(),
                            'Inputs':
                            dataset.extract_inputs(data)
                            [position].data.tolist()
                        }, latest)
                    iteration_on_checkpoint += 1
                    print("Iteration %d  on Checkpoint %d : Error %f" %
                          (iteration_on_checkpoint, checkpoint_iteration,
                           mean_error))
                """
                    ########
                    Finish a round of validation, write results, wait for the next
                    ########
                """

                checkpoint_average_mse = accumulated_mse / (len(data_loader))
                checkpoint_average_error = accumulated_error / (
                    len(data_loader))
                coil_logger.add_scalar('Loss', checkpoint_average_mse, latest,
                                       True)
                coil_logger.add_scalar('Error', checkpoint_average_error,
                                       latest, True)

                if checkpoint_average_mse < best_mse:
                    best_mse = checkpoint_average_mse
                    best_mse_iter = latest

                if checkpoint_average_error < best_error:
                    best_error = checkpoint_average_error
                    best_error_iter = latest

                coil_logger.add_message(
                    'Iterating', {
                        'Summary': {
                            'Error': checkpoint_average_error,
                            'Loss': checkpoint_average_mse,
                            'BestError': best_error,
                            'BestMSE': best_mse,
                            'BestMSECheckpoint': best_mse_iter,
                            'BestErrorCheckpoint': best_error_iter
                        },
                        'Checkpoint': latest
                    }, latest)

                l1_window.append(checkpoint_average_error)
                coil_logger.write_on_error_csv(dataset_name,
                                               checkpoint_average_error)

                # If we are using the finish when validation stops, we check the current
                if g_conf.FINISH_ON_VALIDATION_STALE is not None:
                    if dlib.count_steps_without_decrease(l1_window) > 3 and \
                            dlib.count_steps_without_decrease_robust(l1_window) > 3:
                        coil_logger.write_stop(dataset_name, latest)
                        break

            else:

                latest = get_latest_evaluated_checkpoint()
                time.sleep(1)

                coil_logger.add_message('Loading',
                                        {'Message': 'Waiting Checkpoint'})
                print("Waiting for the next Validation")

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)

    except RuntimeError as e:
        if latest is not None:
            coil_logger.erase_csv(latest)
        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)
示例#11
0
def execute(gpu, exp_batch, exp_alias, dataset_name):
    # We set the visible cuda devices

    os.environ["CUDA_VISIBLE_DEVICES"] = '0'

    # At this point the log file with the correct naming is created.
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)

    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        # TODO: print some cool summary or not ?
        return

    #Define the dataset. This structure is has the __get_item__ redefined in a way
    #that you can access the HDFILES positions from the root directory as a in a vector.
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)

    print(full_dataset)
    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    # Creates the sampler, this part is responsible for managing the keys. It divides
    # all keys depending on the measurements and produces a set of keys for each bach.

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    # TODO: batch size an number of workers go to some configuration file
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=120,
                                              shuffle=False,
                                              num_workers=12,
                                              pin_memory=True)

    # TODO: here there is clearly a posibility to make a cool "conditioning" system.
    model = CoILModel(g_conf.MODEL_NAME)
    model.cuda()
    model.eval()

    criterion = Loss()

    latest = get_latest_evaluated_checkpoint()
    if latest is None:  # When nothing was tested, get latest returns none, we fix that.
        latest = 0

    latest = 200000

    best_loss = 1000.0
    best_error = 1000.0
    best_loss_iter = 0
    best_error_iter = 0
    print(dataset.meta_data[0][0])
    for k in dataset.meta_data:
        k[0] = str(k[0], 'utf-8')

    print(dataset.meta_data[0][0])
    cpts = glob.glob(
        '/home-local/rohitrishabh/coil_20-06/_logs/eccv/experiment_1/checkpoints/*.pth'
    )
    # while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):
    for ckpt in cpts:

        # if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

        # latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)
        latest = int(ckpt[-10:-4])

        # checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias
        #                         , 'checkpoints', str(latest) + '.pth'))
        checkpoint = torch.load(ckpt)
        checkpoint_iteration = checkpoint['iteration']
        print("Validation loaded ", checkpoint_iteration)

        accumulated_loss = 0.0
        accumulated_error = 0.0
        iteration_on_checkpoint = 0
        for data in data_loader:

            input_data, float_data = data
            control_position = np.where(
                dataset.meta_data[:, 0] == 'control')[0][0]
            speed_position = np.where(
                dataset.meta_data[:, 0] == 'speed_module')[0][0]
            # print (torch.squeeze(input_data['rgb']).shape)
            # print (control_position)
            # print (speed_position)
            # Obs : Maybe we could also check for other branches ??

            output = model.forward_branch(
                torch.squeeze(input_data['rgb']).cuda(),
                float_data[:, speed_position, :].cuda(),
                float_data[:, control_position, :].cuda())

            for i in range(input_data['rgb'].shape[0]):

                coil_logger.write_on_csv(
                    checkpoint_iteration,
                    [output[i][0], output[i][1], output[i][2]])

            # TODO: Change this a functional standard using the loss functions.

            loss = torch.mean(
                (output -
                 dataset.extract_targets(float_data).cuda())**2).data.tolist()
            mean_error = torch.mean(
                torch.abs(
                    output -
                    dataset.extract_targets(float_data).cuda())).data.tolist()
            accumulated_error += mean_error
            accumulated_loss += loss
            error = torch.abs(output -
                              dataset.extract_targets(float_data).cuda())

            # Log a random position
            position = random.randint(0, len(float_data) - 1)
            #print (output[position].data.tolist())
            coil_logger.add_message(
                'Iterating in Validation', {
                    'Checkpoint':
                    latest,
                    'Iteration': (str(iteration_on_checkpoint * 120) + '/' +
                                  str(len(dataset))),
                    'MeanError':
                    mean_error,
                    'Loss':
                    loss,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(float_data)
                    [position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(float_data)[position].data.tolist()
                }, latest)
            iteration_on_checkpoint += 1

        checkpoint_average_loss = accumulated_loss / len(dataset)
        checkpoint_average_error = accumulated_error / len(dataset)
        coil_logger.add_scalar('Loss', checkpoint_average_loss, latest)
        coil_logger.add_scalar('Error', checkpoint_average_error, latest)
        print('Loss: ', checkpoint_average_loss, "----Error: ",
              checkpoint_average_error)

        if checkpoint_average_loss < best_loss:
            best_loss = checkpoint_average_loss
            best_loss_iter = latest

            state = {
                'state_dict': model.state_dict(),
                'best_loss': best_loss,
                'best_loss_iter': best_loss_iter
            }
            # TODO : maybe already summarize the best model ???
            torch.save(
                state,
                os.path.join('_logs', exp_batch, exp_alias,
                             'best_model_l2' + '.pth'))

        if checkpoint_average_error < best_error:
            best_error = checkpoint_average_error
            best_error_iter = latest

            state = {
                'state_dict': model.state_dict(),
                'best_error': best_error,
                'best_error_iter': best_error_iter
            }
            # TODO : maybe already summarize the best model ???
            torch.save(
                state,
                os.path.join('_logs', exp_batch, exp_alias,
                             'best_model_l1' + '.pth'))

        print('Best Loss: ', best_loss, "Checkpoint", best_loss_iter)
        print('Best Error: ', best_error, "Checkpoint", best_error_iter)

        coil_logger.add_message(
            'Iterating in Validation', {
                'Summary': {
                    'Error': checkpoint_average_error,
                    'Loss': checkpoint_average_loss,
                    'BestError': best_error,
                    'BestLoss': best_loss,
                    'BestLossCheckpoint': best_loss_iter,
                    'BestErrorCheckpoint': best_error_iter
                },
                'Checkpoint': latest
            })
示例#12
0
                    required=True,
                    help='path to save gradcam heatmap')
parser.add_argument('--type',
                    type=str,
                    required=True,
                    help='type of evaluation')

args = parser.parse_args()

merge_with_yaml(args.config)

os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus

augmenter = Augmenter(None)
dataset = CoILDataset(args.dataset_path,
                      transform=augmenter,
                      preload_name=args.preload_name)

dataloader = torch.utils.data.DataLoader(
    dataset,
    batch_size=g_conf.BATCH_SIZE,
    shuffle=False,
    num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
    pin_memory=True)

model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
model = model.cuda()

checkpoint = torch.load(args.checkpoint)
model.load_state_dict(checkpoint['state_dict'])
def execute(gpu, exp_batch, exp_alias):

    from time import gmtime, strftime

    manualSeed = g_conf.SEED
    torch.cuda.manual_seed(manualSeed)
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                g_conf.TRAIN_DATASET_NAME)
    real_dataset = g_conf.TARGET_DOMAIN_PATH
    #main data loader
    dataset = CoILDataset(full_dataset,
                          real_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    sampler = BatchSequenceSampler(
        splitter.control_steer_split(dataset.measurements,
                                     dataset.meta_data), g_conf.BATCH_SIZE,
        g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_sampler=sampler,
                                              shuffle=False,
                                              num_workers=6,
                                              pin_memory=True)

    st = lambda aug: iag.Sometimes(aug, 0.4)
    oc = lambda aug: iag.Sometimes(aug, 0.3)
    rl = lambda aug: iag.Sometimes(aug, 0.09)
    augmenter = iag.Augmenter([iag.ToGPU()] + [
        rl(iag.GaussianBlur(
            (0, 1.5))),  # blur images with a sigma between 0 and 1.5
        rl(iag.AdditiveGaussianNoise(loc=0, scale=(
            0.0, 0.05), per_channel=0.5)),  # add gaussian noise to images
        oc(iag.Dropout((0.0, 0.10), per_channel=0.5)
           ),  # randomly remove up to X% of the pixels
        oc(
            iag.CoarseDropout(
                (0.0, 0.10), size_percent=(0.08, 0.2),
                per_channel=0.5)),  # randomly remove up to X% of the pixels
        oc(iag.Add((-40, 40), per_channel=0.5)
           ),  # change brightness of images (by -X to Y of original value)
        st(iag.Multiply((0.10, 2), per_channel=0.2)
           ),  # change brightness of images (X-Y% of original value)
        rl(iag.ContrastNormalization(
            (0.5, 1.5), per_channel=0.5)),  # improve or worsen the contrast
        rl(iag.Grayscale((0.0, 1))),  # put grayscale
    ]  # do all of the above in random order
                              )

    l1weight = g_conf.L1_WEIGHT
    task_adv_weight = g_conf.TASK_ADV_WEIGHT
    image_size = tuple([88, 200])

    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    print("GPU", gpu)
    print("Configurations of ", exp_alias)
    print("GANMODEL_NAME", g_conf.GANMODEL_NAME)
    print("LOSS_FUNCTION", g_conf.LOSS_FUNCTION)
    print("LR_G, LR_D, LR", g_conf.LR_G, g_conf.LR_D, g_conf.LEARNING_RATE)
    print("SKIP", g_conf.SKIP)
    print("TYPE", g_conf.TYPE)
    print("L1 WEIGHT", g_conf.L1_WEIGHT)
    print("TASK ADV WEIGHT", g_conf.TASK_ADV_WEIGHT)
    print("LAB SMOOTH", g_conf.LABSMOOTH)

    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        netD = ganmodels._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels._netG(loss=g_conf.LOSS_FUNCTION,
                               skip=g_conf.SKIP).cuda()
    elif g_conf.GANMODEL_NAME == 'LSDcontrol_nopatch':
        netD = ganmodels_nopatch._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_nopatch._netG(loss=g_conf.LOSS_FUNCTION).cuda()
    elif g_conf.GANMODEL_NAME == 'LSDcontrol_nopatch_smaller':
        netD = ganmodels_nopatch_smaller._netD(
            loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_nopatch_smaller._netG(
            loss=g_conf.LOSS_FUNCTION).cuda()

    elif g_conf.GANMODEL_NAME == 'LSDcontrol_task':
        netD_task = ganmodels_task._netD_task(loss=g_conf.LOSS_FUNCTION).cuda()
        netD_img = ganmodels_task._netD_img(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_task._netG(loss=g_conf.LOSS_FUNCTION).cuda()
        netF = ganmodels_task._netF(loss=g_conf.LOSS_FUNCTION).cuda()

        if g_conf.PRETRAINED == 'RECON':
            netF_statedict = torch.load('netF_GAN_Pretrained.wts')
            netF.load_state_dict(netF_statedict)

        elif g_conf.PRETRAINED == 'IL':
            print("Loading IL")
            model_IL = torch.load('best_loss_20-06_EpicClearWeather.pth')
            model_IL_state_dict = model_IL['state_dict']

            netF_state_dict = netF.state_dict()

            print(len(netF_state_dict.keys()), len(model_IL_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netF_state_dict.keys(), model_IL_state_dict.keys())):
                newkey, oldkey = keys
                # if newkey.split('.')[0] == "branch" and oldkey.split('.')[0] == "branches":
                #     print("No Transfer of ",  newkey, " to ", oldkey)
                # else:
                print("Transferring ", newkey, " to ", oldkey)
                netF_state_dict[newkey] = model_IL_state_dict[oldkey]

            netF.load_state_dict(netF_state_dict)
            print("IL Model Loaded!")

    elif g_conf.GANMODEL_NAME == 'LSDcontrol_task_2d':
        netD_bin = ganmodels_task._netD_task().cuda()
        netD_img = ganmodels_task._netD_img().cuda()
        netG = ganmodels_task._netG().cuda()
        netF = ganmodels_task._netF().cuda()

        if g_conf.PRETRAINED == 'IL':
            print("Loading IL")
            model_IL = torch.load(g_conf.IL_AGENT_PATH)
            model_IL_state_dict = model_IL['state_dict']

            netF_state_dict = netF.state_dict()

            print(len(netF_state_dict.keys()), len(model_IL_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netF_state_dict.keys(), model_IL_state_dict.keys())):
                newkey, oldkey = keys
                print("Transferring ", newkey, " to ", oldkey)
                netF_state_dict[newkey] = model_IL_state_dict[oldkey]

            netF.load_state_dict(netF_state_dict)
            print("IL Model Loaded!")

            ####
            if g_conf.IF_AUG:
                print("Loading Aug Decoder")
                model_dec = torch.load(g_conf.DECODER_RECON_PATH)
            else:
                print("Loading Decoder")
                model_dec = torch.load(g_conf.DECODER_RECON_PATH)
            model_dec_state_dict = model_dec['stateG_dict']

            netG_state_dict = netG.state_dict()

            print(len(netG_state_dict.keys()),
                  len(model_dec_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netG_state_dict.keys(), model_dec_state_dict.keys())):
                newkey, oldkey = keys
                print("Transferring ", newkey, " to ", oldkey)
                netG_state_dict[newkey] = model_dec_state_dict[oldkey]

            netG.load_state_dict(netG_state_dict)
            print("Decoder Model Loaded!")

    init_weights(netD_bin)
    init_weights(netD_img)
    # init_weights(netG)

    print(netD_bin)
    print(netF)

    optimD_bin = torch.optim.Adam(netD_bin.parameters(),
                                  lr=g_conf.LR_D,
                                  betas=(0.5, 0.999))
    optimD_img = torch.optim.Adam(netD_img.parameters(),
                                  lr=g_conf.LR_D,
                                  betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(),
                              lr=g_conf.LR_D,
                              betas=(0.5, 0.999))
    if g_conf.TYPE == 'task':
        optimF = torch.optim.Adam(netF.parameters(), lr=g_conf.LEARNING_RATE)
        Task_Loss = TaskLoss()

    if g_conf.GANMODEL_NAME == 'LSDcontrol_task_2d':
        print("Using cross entropy!")
        Loss = torch.nn.CrossEntropyLoss().cuda()

    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter_F = 0
    best_loss_iter_G = 0
    best_lossF = 1000000.0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0
    gen_iterations = 0
    n_critic = g_conf.N_CRITIC

    lossF = Variable(torch.Tensor([100.0]))
    lossG_adv = Variable(torch.Tensor([100.0]))
    lossG_smooth = Variable(torch.Tensor([100.0]))
    lossG = Variable(torch.Tensor([100.0]))

    netD_bin.train()
    netD_img.train()
    netG.train()
    netF.train()
    capture_time = time.time()

    if not os.path.exists('./imgs_' + exp_alias):
        os.mkdir('./imgs_' + exp_alias)

    #TODO check how C network is optimized in LSDSEG
    #TODO put family for losses
    #IMPORTANT WHILE RUNNING THIS, CONV.PY MUST HAVE BATCHNORMS

    fake_img_pool_src = ImagePool(50)
    fake_img_pool_tgt = ImagePool(50)

    for data in data_loader:

        set_requires_grad(netD_bin, True)
        set_requires_grad(netD_img, True)
        set_requires_grad(netG, True)
        set_requires_grad(netF, True)

        # print("ITERATION:", iteration)

        val = 0.0
        input_data, float_data, tgt_imgs = data

        if g_conf.IF_AUG:
            inputs = augmenter(0, input_data['rgb'])
            tgt_imgs = augmenter(0, tgt_imgs)
        else:
            inputs = input_data['rgb'].cuda()
            tgt_imgs = tgt_imgs.cuda()

        inputs = inputs.squeeze(1)
        inputs = inputs - val  #subtracted by 0.5
        tgt_imgs = tgt_imgs - val  #subtracted by 0.5

        controls = float_data[:, dataset.controls_position(), :]

        src_embed_inputs, src_branches = netF(
            inputs,
            dataset.extract_inputs(float_data).cuda())
        tgt_embed_inputs = netF(tgt_imgs, None)

        src_img_fake = netG(src_embed_inputs)
        tgt_img_fake = netG(tgt_embed_inputs)

        if iteration % 100 == 0:
            imgs_to_save = torch.cat(
                (inputs[:1] + val, src_img_fake[:1] + val, tgt_imgs[:1] + val,
                 tgt_img_fake[:1] + val), 0).cpu().data
            coil_logger.add_image("Images", imgs_to_save, iteration)
            imgs_to_save = imgs_to_save.clamp(0.0, 1.0)
            vutils.save_image(imgs_to_save,
                              './imgs_' + exp_alias + '/' + str(iteration) +
                              '_real_and_fake.png',
                              normalize=False)

        ##--------------------Discriminator part!!!!!!!!!!-------------------##
        set_requires_grad(netD_bin, True)
        set_requires_grad(netD_img, False)
        set_requires_grad(netG, False)
        set_requires_grad(netF, False)
        optimD_bin.zero_grad()

        outputsD_real_src_bin = netD_bin(src_embed_inputs)
        outputsD_real_tgt_bin = netD_bin(tgt_embed_inputs)

        gradient_penalty = calc_gradient_penalty(netD_bin, src_embed_inputs,
                                                 tgt_embed_inputs)
        lossD_bin = torch.mean(outputsD_real_tgt_bin -
                               outputsD_real_src_bin) + gradient_penalty
        lossD_bin.backward(retain_graph=True)
        optimD_bin.step()

        coil_logger.add_scalar('Total LossD Bin', lossD_bin.data, iteration)

        #### Discriminator img update ####
        set_requires_grad(netD_bin, False)
        set_requires_grad(netD_img, True)
        set_requires_grad(netG, False)
        set_requires_grad(netF, False)

        optimD_img.zero_grad()
        outputsD_fake_src_img = netD_img(src_img_fake.detach())
        outputsD_fake_tgt_img = netD_img(tgt_img_fake.detach())

        outputsD_real_src_img = netD_img(inputs)
        outputsD_real_tgt_img = netD_img(tgt_imgs)

        gradient_penalty_src = calc_gradient_penalty(netD_img, inputs,
                                                     src_img_fake)
        lossD_img_src = torch.mean(
            outputsD_fake_src_img -
            outputsD_real_src_img) + gradient_penalty_src

        gradient_penalty_tgt = calc_gradient_penalty(netD_img, tgt_imgs,
                                                     tgt_img_fake)
        lossD_img_tgt = torch.mean(
            outputsD_fake_tgt_img -
            outputsD_real_tgt_img) + gradient_penalty_tgt

        lossD_img = (lossD_img_src + lossD_img_tgt) * 0.5
        lossD_img.backward(retain_graph=True)
        optimD_img.step()

        coil_logger.add_scalar('Total LossD img', lossD_img.data, iteration)

        if ((iteration + 1) % n_critic) == 0:

            #####Generator updates#######
            set_requires_grad(netD_bin, False)
            set_requires_grad(netD_img, False)
            set_requires_grad(netG, True)
            set_requires_grad(netF, False)

            outputsD_fake_src_img = netD_img(src_img_fake)
            outputsD_real_tgt_img = netD_img(tgt_imgs)
            outputsD_fake_tgt_img = netD_img(tgt_img_fake)

            lossG_src_smooth = L1_loss(src_img_fake, inputs)
            lossG_tgt_smooth = L1_loss(tgt_img_fake, tgt_imgs)
            lossG_smooth = (lossG_src_smooth + lossG_tgt_smooth) * 0.5

            lossG_adv = 0.5 * (-1.0 * outputsD_fake_src_img.mean() -
                               1.0 * outputsD_fake_tgt_img.mean())
            lossG = (lossG_smooth + 0.0 * lossG_adv)
            lossG.backward(retain_graph=True)
            optimG.step()

            coil_logger.add_scalar('Total LossG', lossG.data, iteration)

            #####Task network updates##########################
            set_requires_grad(netD_bin, False)
            set_requires_grad(netD_img, False)
            set_requires_grad(netG, False)
            set_requires_grad(netF, True)
            optimF.zero_grad()

            src_embed_inputs, src_branches = netF(
                inputs,
                dataset.extract_inputs(float_data).cuda())
            tgt_embed_inputs = netF(tgt_imgs, None)
            src_img_fake = netG(src_embed_inputs)
            tgt_img_fake = netG(tgt_embed_inputs)

            outputsD_fake_src_img = netD_img(src_img_fake)
            outputsD_real_tgt_img = netD_img(tgt_imgs)

            lossF_task = Task_Loss.MSELoss(
                src_branches,
                dataset.extract_targets(float_data).cuda(), controls.cuda(),
                dataset.extract_inputs(float_data).cuda())

            lossF_adv_bin = netD_bin(src_embed_inputs).mean() - netD_bin(
                tgt_embed_inputs).mean()
            lossF_adv_img = outputsD_fake_src_img.mean(
            ) - outputsD_real_tgt_img.mean()
            lossF_adv = 0.5 * (lossF_adv_bin + 0.1 * lossF_adv_img)
            lossF = (lossF_task + task_adv_weight * lossF_adv)

            coil_logger.add_scalar('Total Task Loss', lossF.data, iteration)
            coil_logger.add_scalar('Adv Task Loss', lossF_adv.data, iteration)
            coil_logger.add_scalar('Only Task Loss', lossF_task.data,
                                   iteration)
            lossF.backward(retain_graph=True)
            optimF.step()

            if lossF.data < best_lossF:
                best_lossF = lossF.data.tolist()
                best_loss_iter_F = iteration

        #optimization for one iter done!

        position = random.randint(0, len(float_data) - 1)

        accumulated_time += time.time() - capture_time
        capture_time = time.time()

        if is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'stateD_bin_dict': netD_bin.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'total_time': accumulated_time,
                'best_loss_iter_F': best_loss_iter_F
            }
            torch.save(
                state,
                os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch,
                             exp_alias, 'checkpoints',
                             str(iteration) + '.pth'))

        if iteration == best_loss_iter_F and iteration > 10000:

            state = {
                'iteration': iteration,
                'stateD_bin_dict': netD_bin.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'best_lossF': best_lossF,
                'total_time': accumulated_time,
                'best_loss_iter_F': best_loss_iter_F
            }
            torch.save(
                state,
                os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch,
                             exp_alias, 'best_modelF' + '.pth'))

        iteration += 1
示例#14
0
def execute(gpu, exp_batch, exp_alias, validation_dataset, suppress_output):
    latest = None
    try:
        # We set the visible cuda devices
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     f'{exp_alias}.yaml'))
        # The validation dataset is always fully loaded, so we fix a very high number of hours
        g_conf.NUMBER_OF_HOURS = 10000
        set_type_of_process(process_type='validation',
                            param=validation_dataset)

        # Save the output to a file if so desired
        if suppress_output:
            save_output(exp_alias)

        # Define the dataset. This structure has the __get_item__ redefined in a way
        # that you can access the HDFILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    validation_dataset)
        augmenter = Augmenter(None)
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=validation_dataset,
                              process_type='validation')

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=g_conf.BATCH_SIZE,
            shuffle=False,
            num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
            pin_memory=True)

        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION,
                          g_conf.SENSORS).cuda()
        # The window used to keep track of the trainings
        l1_window = []
        latest = get_latest_evaluated_checkpoint()
        if latest is not None:  # When latest is noe
            l1_window = coil_logger.recover_loss_window(
                validation_dataset, None)

        # Keep track of the best loss and the iteration where it happens
        best_loss = 1000
        best_loss_iter = 0

        print(20 * '#')
        print('Starting validation!')
        print(20 * '#')

        # Check if the maximum checkpoint for validating has been reached
        while not maximum_checkpoint_reached(latest):
            # Wait until the next checkpoint is ready (assuming this is run whilst training the model)
            if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):
                # Get next checkpoint for validation according to the test schedule and load it
                latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)
                checkpoint = torch.load(
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 f'{latest}.pth'))
                checkpoint_iteration = checkpoint['iteration']

                model.load_state_dict(checkpoint['state_dict'])
                model.eval()  # Turn off dropout and batchnorm (if any)
                print(f"Validation loaded, checkpoint {checkpoint_iteration}")

                # Main metric will be the used loss for training the network
                criterion = Loss(g_conf.LOSS_FUNCTION)
                checkpoint_average_loss = 0

                # Counter
                iteration_on_checkpoint = 0

                with torch.no_grad():  # save some computation/memory
                    for data in data_loader:
                        # Compute the forward pass on a batch from the validation dataset
                        controls = data['directions'].cuda()
                        img = torch.squeeze(data['rgb']).cuda()
                        speed = dataset.extract_inputs(
                            data).cuda()  # this might not always be speed

                        # For auxiliary metrics
                        output = model.forward_branch(img, speed, controls)

                        # For the loss function
                        branches = model(img, speed)
                        loss_function_params = {
                            'branches': branches,
                            'targets': dataset.extract_targets(data).cuda(),
                            'controls': controls,
                            'inputs': speed,
                            'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                            'variable_weights': g_conf.VARIABLE_WEIGHT
                        }
                        # It could be either waypoints or direct control
                        if 'waypoint1_angle' in g_conf.TARGETS:
                            write_waypoints_output(checkpoint_iteration,
                                                   output)
                        else:
                            write_regular_output(checkpoint_iteration, output)

                        loss, _ = criterion(loss_function_params)
                        loss = loss.data.tolist()

                        # Log a random position
                        position = random.randint(
                            0,
                            len(output.data.tolist()) - 1)

                        coil_logger.add_message(
                            'Iterating', {
                                'Checkpoint':
                                latest,
                                'Iteration':
                                f'{iteration_on_checkpoint * g_conf.BATCH_SIZE}/{len(dataset)}',
                                f'Validation Loss ({g_conf.LOSS_FUNCTION})':
                                loss,
                                'Output':
                                output[position].data.tolist(),
                                'GroundTruth':
                                dataset.extract_targets(
                                    data)[position].data.tolist(),
                                'Inputs':
                                dataset.extract_inputs(data)
                                [position].data.tolist()
                            }, latest)

                        # We get the average with a growing list of values
                        # Thanks to John D. Cook: http://www.johndcook.com/blog/standard_deviation/
                        iteration_on_checkpoint += 1
                        checkpoint_average_loss += (
                            loss -
                            checkpoint_average_loss) / iteration_on_checkpoint

                        print(
                            f"\rProgress: {100 * iteration_on_checkpoint * g_conf.BATCH_SIZE / len(dataset):3.4f}% - "
                            f"Average Loss ({g_conf.LOSS_FUNCTION}): {checkpoint_average_loss:.16f}",
                            end='')
                """
                    ########
                    Finish a round of validation, write results, wait for the next
                    ########
                """

                coil_logger.add_scalar(
                    f'Validation Loss ({g_conf.LOSS_FUNCTION})',
                    checkpoint_average_loss, latest, True)

                # Let's visualize the distribution of the loss
                coil_logger.add_histogram(
                    f'Validation Checkpoint Loss ({g_conf.LOSS_FUNCTION})',
                    checkpoint_average_loss, latest)

                if checkpoint_average_loss < best_loss:
                    best_loss = checkpoint_average_loss
                    best_loss_iter = latest

                coil_logger.add_message(
                    'Iterating', {
                        'Summary': {
                            'Loss': checkpoint_average_loss,
                            'BestLoss': best_loss,
                            'BestLossCheckpoint': best_loss_iter
                        },
                        'Checkpoint': latest
                    }, latest)

                l1_window.append(checkpoint_average_loss)
                coil_logger.write_on_error_csv(validation_dataset,
                                               checkpoint_average_loss, latest)

                # If we are using the finish when validation stops, we check the current checkpoint
                if g_conf.FINISH_ON_VALIDATION_STALE is not None:
                    if dlib.count_steps_without_decrease(l1_window) > 3 and \
                            dlib.count_steps_without_decrease_robust(l1_window) > 3:
                        coil_logger.write_stop(validation_dataset, latest)
                        break

            else:
                latest = get_latest_evaluated_checkpoint()
                time.sleep(1)

                coil_logger.add_message('Loading',
                                        {'Message': 'Waiting Checkpoint'})
                print("Waiting for the next Validation")

        print('\n' + 20 * '#')
        print('Finished validation!')
        print(20 * '#')
        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)

    except RuntimeError as e:
        if latest is not None:
            coil_logger.erase_csv(latest)
        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)
示例#15
0
def execute(gpu, exp_batch, exp_alias):

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                g_conf.TRAIN_DATASET_NAME)
    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    sampler = BatchSequenceSampler(
        splitter.control_steer_split(dataset.measurements,
                                     dataset.meta_data), g_conf.BATCH_SIZE,
        g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_sampler=sampler,
                                              shuffle=False,
                                              num_workers=6,
                                              pin_memory=True)

    l1weight = 1.0
    image_size = tuple([88, 200])
    testmode = 1

    # print("helllooooo", g_conf.MODEL_NAME)
    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        # netD = ganmodels._netD().cuda()
        netG = ganmodels._netG(skip=g_conf.SKIP).cuda()
    # else:
    #     netD = ganmodels._oldnetD().cuda()
    #     netG = ganmodels._oldnetG().cuda()

    # init_weights(netD)
    init_weights(netG)
    # print(netD)
    print(netG)

    # optimD = torch.optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(), lr=0.0002, betas=(0.7, 0.999))
    MSE_loss = torch.nn.MSELoss().cuda()
    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter = 0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0

    netG.eval()
    # netD.eval()
    capture_time = time.time()
    for data in data_loader:

        val = 0.5
        input_data, float_data = data
        inputs = input_data['rgb'].cuda()
        inputs = inputs.squeeze(1)
        inputs_in = inputs - val

        #forward pass
        # print(inputs[0][0][0][0], inputs_in[0][0][0][0])
        fake_inputs = netG(inputs_in)  #subtracted by 0.5
        fake_inputs_in = fake_inputs
        # print(fake_inputs[0][0][0][0], fake_inputs_in[0][0][0][0])
        if iteration % 200 == 0:
            imgs_to_save = torch.cat((inputs_in[:2] + val, fake_inputs_in[:2]),
                                     0).cpu().data
            vutils.save_image(imgs_to_save,
                              './noganimgs/' + str(iteration) +
                              'noganreal_samples.png',
                              normalize=True)
            coil_logger.add_image("Images", imgs_to_save, iteration)

        optimG.zero_grad()

        print("~~~~~~~~~__________")
        print(inputs_in[0][0][0][0])
        print(fake_inputs[0][0][0][0])
        lossG_mse = MSE_loss(fake_inputs, inputs)
        print(lossG_mse)
        lossG_mse /= len(inputs_in)

        print("~~~~~~~~~__________--------------")

        lossG_mse.backward()  #retain_graph=True needed?
        optimG.step()

        coil_logger.add_scalar('MSE LossG', lossG_mse.data / len(inputs_in),
                               iteration)

        #optimization for one iter done!

        position = random.randint(0, len(float_data) - 1)

        # if lossD.data < best_lossD:
        #     best_lossD = lossD.data.tolist()

        if lossG_mse.data < best_lossG:
            best_lossG = lossG_mse.data.tolist()
            best_loss_iter = iteration

        accumulated_time += time.time() - capture_time
        capture_time = time.time()
        # print("LossD", lossD.data.tolist(), "LossG", lossG.data.tolist(), "BestLossD", best_lossD, "BestLossG", best_lossG, "Iteration", iteration, "Best Loss Iteration", best_loss_iter)

        # coil_logger.add_message('Iterating',
        #                         {'Iteration': iteration,
        #                             'LossD': lossD.data.tolist(),
        #                             'LossG': lossG.data.tolist(),
        #                             'Images/s': (iteration*g_conf.BATCH_SIZE)/accumulated_time,
        #                             'BestLossD': best_lossD, 'BestLossIteration': best_loss_iter,
        #                             'BestLossG': best_lossG, 'BestLossIteration': best_loss_iter,
        #                             'GroundTruth': dataset.extract_targets(float_data)[position].data.tolist(),
        #                             'Inputs': dataset.extract_inputs(float_data)[position].data.tolist()},
        #                         iteration)
        # if is_ready_to_save(iteration):
        #
        #     state = {
        #         'iteration': iteration,
        #         'stateD_dict': netD.state_dict(),
        #         'stateG_dict': netG.state_dict(),
        #         'best_lossD': best_lossD,
        #         'best_lossG': best_lossG,
        #         'total_time': accumulated_time,
        #         'best_loss_iter': best_loss_iter
        #
        #     }
        #     torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
        #                                    , 'checkpoints', str(iteration) + '.pth'))
        # if iteration == best_loss_iter:
        #
        #     state = {
        #         'iteration': iteration,
        #         'stateD_dict': netD.state_dict(),
        #         'stateG_dict': netG.state_dict(),
        #         'best_lossD': best_lossD,
        #         'best_lossG': best_lossG,
        #         'total_time': accumulated_time,
        #         'best_loss_iter': best_loss_iter
        #
        #     }
        #     torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
        #                                    , 'best_modelG' + '.pth'))
        #
        iteration += 1
示例#16
0
def gather_data():
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'


    exp_batch = 'EXP'
    exp_alias = 'customized'
    number_of_workers = 12
    encoder_params = {'encoder_checkpoint': 10000,
                      'encoder_folder': 'ENCODER',
                      'encoder_exp': 'customized'}



    try:
        merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'), encoder_params)
        g_conf.PROCESS_NAME = 'analyze_embeddings'


        # We can save preload dataset depends on the json file name, then no need to load dataset for each time with the same dataset
        if len(g_conf.EXPERIENCE_FILE) == 1:
            json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2]
        else:
            json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2] + '_' + str(g_conf.EXPERIENCE_FILE[1]).split('/')[-1].split('.')[-2]
        dataset = CoILDataset(transform=None,
                              preload_name=g_conf.PROCESS_NAME + '_' + json_file_name + '_' + g_conf.DATA_USED)

        data_loader = torch.utils.data.DataLoader(dataset, batch_size=g_conf.BATCH_SIZE, sampler=None, num_workers=12, pin_memory=True)


        print ("Loaded Training dataset")

        # we use the pre-trained encoder model to extract bottleneck Z and train the E-t-E model
        iteration = 0
        if g_conf.MODEL_TYPE in ['separate-affordances']:
            encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION)
            encoder_model.cuda()
            encoder_model.eval()
            # To freeze the pre-trained encoder model
            if g_conf.FREEZE_ENCODER:
                for param_ in encoder_model.parameters():
                    param_.requires_grad = False

            encoder_checkpoint = torch.load(
                os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints',
                             str(encoder_params['encoder_checkpoint']) + '.pth'))
            print("Encoder model ", str(encoder_params['encoder_checkpoint']), "loaded from ",
                  os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints'))
            encoder_model.load_state_dict(encoder_checkpoint['state_dict'])

        print('g_conf.ENCODER_MODEL_TYPE', g_conf.ENCODER_MODEL_TYPE)



        behaviors = []
        route_folder = 'customized'
        route_files = os.listdir(route_folder)
        count = 0

        kept_inds = []
        total_length = 0
        for route_file in sorted(route_files):
            print('-'*100, route_file)
            route_path = os.path.join(route_folder, route_file)
            if os.path.isdir(route_path):
                measurements_path = route_path + '/' + 'driving_log.csv'

                import pandas as pd
                df = pd.read_csv(measurements_path, delimiter=',')





                # remove cases which has ego_linear_speed < 0.1 or other_actor_linear_speed < 0
                ego_fault = True

                events_path = route_path + '/' + 'events.txt'
                with open(events_path) as json_file:
                    events = json.load(json_file)
                infractions = events['_checkpoint']['records'][0]['infractions']
                import re
                infraction_types = ['collisions_layout', 'collisions_pedestrian', 'collisions_vehicle', 'red_light', 'on_sidewalk', 'outside_lane_infraction', 'wrong_lane', 'off_road']
                for infraction_type in infraction_types:
                    for infraction in infractions[infraction_type]:
                        if 'collisions' in infraction_type:
                            typ = re.search('.*with type=(.*) and id.*', infraction).group(1)
                            if 'walker' not in typ:
                                ego_fault = False
                            else:
                                loc = re.search('.*x=(.*), y=(.*), z=(.*), ego_linear_speed=(.*), other_actor_linear_speed=(.*)\)', infraction)
                                if loc:
                                    ego_linear_speed = float(loc.group(4))
                                    other_actor_linear_speed = float(loc.group(5))
                                    if ego_linear_speed < 0.1 or other_actor_linear_speed < 0:
                                        ego_fault = False
                        else:
                            loc = re.search('.*x=(.*), y=(.*), z=(.*)', infraction)
                            if loc:
                                ego_fault = False

                cur_behaviors = df[['behaviors']].to_numpy()
                if ego_fault:
                    behaviors.append(cur_behaviors)
                    kept_inds.append(np.array(range(total_length, total_length+cur_behaviors.shape[0])))
                    count += 1
                total_length += cur_behaviors.shape[0]

        kept_inds = np.concatenate(kept_inds)

        print(f'{count}/{len(route_files)} are used')




        misbehavior_labels = np.concatenate(behaviors).squeeze()
        print(misbehavior_labels.shape, len(dataset))
        print(np.sum(misbehavior_labels==0), np.sum(misbehavior_labels==1))




        num_of_iterations = len(dataset) // g_conf.BATCH_SIZE
        image_embeddings = []
        for data in data_loader:
            inputs_data = torch.squeeze(data['rgb'].cuda())

            # separate_affordance
            e, inter = encoder_model.forward_encoder(inputs_data,
                                                      dataset.extract_inputs(data).cuda(),torch.squeeze(dataset.extract_commands(data).cuda()))
            # print(len(e), e[0].shape)
            image_embeddings.append(e.cpu())
            iteration += 1
            if iteration == num_of_iterations:
                break
        image_embeddings = np.concatenate(image_embeddings, axis=0)
        image_embeddings = image_embeddings[kept_inds]

        print(image_embeddings.shape, misbehavior_labels.shape)
        np.savez('misbehavior/embeddings_and_misbehavior_labels', image_embeddings=image_embeddings, misbehavior_labels=misbehavior_labels[:len(image_embeddings)])

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
示例#17
0
def execute(gpu,
            exp_batch,
            exp_alias,
            json_file_path,
            suppress_output,
            encoder_params=None,
            plot_attentions=False):
    try:
        # We set the visible cuda devices
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        if json_file_path is not None:
            json_file_name = json_file_path.split('/')[-1].split('.')[-2]
        else:
            raise RuntimeError(
                "You need to define the validation json file path")

        # At this point the log file with the correct naming is created.
        merge_with_yaml(
            os.path.join('configs', exp_batch, exp_alias + '.yaml'),
            encoder_params)
        if plot_attentions:
            set_type_of_process('validation',
                                json_file_name + '_plotAttention')
        else:
            set_type_of_process('validation', json_file_name)

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        if suppress_output:
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        # We create file for saving validation results
        summary_file = os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME,
                                    g_conf.PROCESS_NAME + '_csv',
                                    'valid_summary_1camera.csv')
        g_conf.immutable(False)
        g_conf.DATA_USED = 'central'
        g_conf.immutable(True)
        if not os.path.exists(summary_file):
            csv_outfile = open(summary_file, 'w')
            csv_outfile.write(
                "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" %
                ('step', 'accumulated_pedestrian_TP',
                 'accumulated_pedestrian_FP', 'accumulated_pedestrian_FN',
                 'accumulated_pedestrian_TN', 'accumulated_vehicle_stop_TP',
                 'accumulated_vehicle_stop_FP', 'accumulated_vehicle_stop_FN',
                 'accumulated_vehicle_stop_TN', 'accumulated_red_tl_TP',
                 'accumulated_red_tl_FP', 'accumulated_red_tl_FN',
                 'accumulated_red_tl_TN', 'MAE_relative_angle'))
            csv_outfile.close()

        latest = get_latest_evaluated_checkpoint_2(summary_file)

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the HDFILES positions from the root directory as a in a vector.
        #full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)
        augmenter = Augmenter(None)
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(transform=augmenter,
                              preload_name=g_conf.PROCESS_NAME + '_' +
                              g_conf.DATA_USED,
                              process_type='validation',
                              vd_json_file_path=json_file_path)
        print("Loaded Validation dataset")

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=g_conf.BATCH_SIZE,
            shuffle=False,
            num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
            pin_memory=True)

        if g_conf.MODEL_TYPE in ['one-step-affordances']:
            # one step training, no need to retrain FC layers, we just get the output of encoder model as prediciton
            model = EncoderModel(g_conf.ENCODER_MODEL_TYPE,
                                 g_conf.ENCODER_MODEL_CONFIGURATION)
            model.cuda()
            #print(model)

        elif g_conf.MODEL_TYPE in ['separate-affordances']:
            model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION,
                              g_conf.ENCODER_MODEL_CONFIGURATION)
            model.cuda()
            #print(model)

            encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE,
                                         g_conf.ENCODER_MODEL_CONFIGURATION)
            encoder_model.cuda()
            encoder_model.eval()

            # Here we load the pre-trained encoder (not fine-tunned)
            if g_conf.FREEZE_ENCODER:
                if encoder_params is not None:
                    encoder_checkpoint = torch.load(
                        os.path.join(
                            '_logs', encoder_params['encoder_folder'],
                            encoder_params['encoder_exp'], 'checkpoints',
                            str(encoder_params['encoder_checkpoint']) +
                            '.pth'))
                    print(
                        "Encoder model ",
                        str(encoder_params['encoder_checkpoint']),
                        "loaded from ",
                        os.path.join('_logs', encoder_params['encoder_folder'],
                                     encoder_params['encoder_exp'],
                                     'checkpoints'))
                    encoder_model.load_state_dict(
                        encoder_checkpoint['state_dict'])
                    encoder_model.eval()
                for param_ in encoder_model.parameters():
                    param_.requires_grad = False

        while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):
            latest = get_next_checkpoint_2(g_conf.TEST_SCHEDULE, summary_file)
            if os.path.exists(
                    os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME,
                                 'checkpoints',
                                 str(latest) + '.pth')):
                checkpoint = torch.load(
                    os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME,
                                 'checkpoints',
                                 str(latest) + '.pth'))
                checkpoint_iteration = checkpoint['iteration']
                model.load_state_dict(checkpoint['state_dict'])
                print("Validation checkpoint ", checkpoint_iteration)
                model.eval()
                for param_ in model.parameters():
                    param_.requires_grad = False

                # Here we load the fine-tunned encoder
                if not g_conf.FREEZE_ENCODER and g_conf.MODEL_TYPE not in [
                        'one-step-affordances'
                ]:
                    encoder_checkpoint = torch.load(
                        os.path.join('_logs', exp_batch,
                                     g_conf.EXPERIMENT_NAME, 'checkpoints',
                                     str(latest) + '_encoder.pth'))
                    print(
                        "FINE TUNNED encoder model ",
                        str(latest) + '_encoder.pth', "loaded from ",
                        os.path.join('_logs', exp_batch,
                                     g_conf.EXPERIMENT_NAME, 'checkpoints'))
                    encoder_model.load_state_dict(
                        encoder_checkpoint['state_dict'])
                    encoder_model.eval()
                    for param_ in encoder_model.parameters():
                        param_.requires_grad = False

                accumulated_mae_ra = 0
                accumulated_pedestrian_TP = 0
                accumulated_pedestrian_TN = 0
                accumulated_pedestrian_FN = 0
                accumulated_pedestrian_FP = 0

                accumulated_red_tl_TP = 0
                accumulated_red_tl_TN = 0
                accumulated_red_tl_FP = 0
                accumulated_red_tl_FN = 0

                accumulated_vehicle_stop_TP = 0
                accumulated_vehicle_stop_TN = 0
                accumulated_vehicle_stop_FP = 0
                accumulated_vehicle_stop_FN = 0

                iteration_on_checkpoint = 0

                for data in data_loader:
                    if g_conf.MODEL_TYPE in ['one-step-affordances']:
                        c_output, r_output, layers = model.forward_outputs(
                            torch.squeeze(data['rgb'].cuda()),
                            dataset.extract_inputs(data).cuda(),
                            dataset.extract_commands(data).cuda())

                    elif g_conf.MODEL_TYPE in ['separate-affordances']:
                        if g_conf.ENCODER_MODEL_TYPE in [
                                'action_prediction', 'stdim', 'ETEDIM',
                                'FIMBC', 'one-step-affordances'
                        ]:
                            e, layers = encoder_model.forward_encoder(
                                torch.squeeze(data['rgb'].cuda()),
                                dataset.extract_inputs(data).cuda(),
                                torch.squeeze(
                                    dataset.extract_commands(data).cuda()))
                            c_output, r_output = model.forward_test(e)

                        elif g_conf.ENCODER_MODEL_TYPE in [
                                'ETE', 'ETE_inverse_model', 'forward',
                                'ETE_stdim'
                        ]:
                            e, layers = encoder_model.forward_encoder(
                                torch.squeeze(data['rgb'].cuda()),
                                dataset.extract_inputs(data).cuda(),
                                torch.squeeze(
                                    dataset.extract_commands(data).cuda()))
                            c_output, r_output = model.forward_test(e)

                    if plot_attentions:
                        attentions_path = os.path.join(
                            '_logs', exp_batch, g_conf.EXPERIMENT_NAME,
                            g_conf.PROCESS_NAME + '_attentions_' + str(latest))

                        write_attentions(torch.squeeze(data['rgb']), layers,
                                         iteration_on_checkpoint,
                                         attentions_path)

                    # Accurancy = (TP+TN)/(TP+TN+FP+FN)
                    # F1-score = 2*TP / (2*TP + FN + FP)
                    classification_gt = dataset.extract_affordances_targets(
                        data, 'classification')
                    regression_gt = dataset.extract_affordances_targets(
                        data, 'regression')

                    TP = 0
                    FN = 0
                    FP = 0
                    TN = 0
                    for i in range(classification_gt.shape[0]):
                        if classification_gt[i, 0] == (
                                c_output[0][i, 0] < c_output[0][i, 1]).type(
                                    torch.FloatTensor) == 1:
                            TP += 1

                        elif classification_gt[
                                i, 0] == 1 and classification_gt[i, 0] != (
                                    c_output[0][i, 0] <
                                    c_output[0][i, 1]).type(torch.FloatTensor):
                            FN += 1

                        elif classification_gt[
                                i, 0] == 0 and classification_gt[i, 0] != (
                                    c_output[0][i, 0] <
                                    c_output[0][i, 1]).type(torch.FloatTensor):
                            FP += 1

                        if classification_gt[i, 0] == (
                                c_output[0][i, 0] < c_output[0][i, 1]).type(
                                    torch.FloatTensor) == 0:
                            TN += 1

                    accumulated_pedestrian_TP += TP
                    accumulated_pedestrian_TN += TN
                    accumulated_pedestrian_FP += FP
                    accumulated_pedestrian_FN += FN

                    TP = 0
                    FN = 0
                    FP = 0
                    TN = 0
                    for i in range(classification_gt.shape[0]):
                        if classification_gt[i, 1] == (
                                c_output[1][i, 0] < c_output[1][i, 1]).type(
                                    torch.FloatTensor) == 1:
                            TP += 1

                        elif classification_gt[
                                i, 1] == 1 and classification_gt[i, 1] != (
                                    c_output[1][i, 0] <
                                    c_output[1][i, 1]).type(torch.FloatTensor):
                            FN += 1

                        elif classification_gt[
                                i, 1] == 0 and classification_gt[i, 1] != (
                                    c_output[1][i, 0] <
                                    c_output[1][i, 1]).type(torch.FloatTensor):
                            FP += 1

                        if classification_gt[i, 1] == (
                                c_output[1][i, 0] < c_output[1][i, 1]).type(
                                    torch.FloatTensor) == 0:
                            TN += 1

                    accumulated_red_tl_TP += TP
                    accumulated_red_tl_TN += TN
                    accumulated_red_tl_FP += FP
                    accumulated_red_tl_FN += FN

                    TP = 0
                    FN = 0
                    FP = 0
                    TN = 0
                    for i in range(classification_gt.shape[0]):
                        if classification_gt[i, 2] == (
                                c_output[2][i, 0] < c_output[2][i, 1]).type(
                                    torch.FloatTensor) == 1:
                            TP += 1

                        elif classification_gt[i, 2] == 1 and classification_gt[i, 2] !=\
                                (c_output[2][i, 0] < c_output[2][i, 1]).type(torch.FloatTensor):
                            FN += 1

                        elif classification_gt[i, 2] == 0 and classification_gt[i, 2] !=\
                                (c_output[2][i, 0] < c_output[2][i, 1]).type(torch.FloatTensor):
                            FP += 1

                        if classification_gt[i, 2] == (
                                c_output[2][i, 0] < c_output[2][i, 1]).type(
                                    torch.FloatTensor) == 0:
                            TN += 1

                    accumulated_vehicle_stop_TP += TP
                    accumulated_vehicle_stop_TN += TN
                    accumulated_vehicle_stop_FP += FP
                    accumulated_vehicle_stop_FN += FN

                    # if the data was normalized during training, we need to transform it to its unit

                    write_regular_output(checkpoint_iteration,
                                         torch.squeeze(r_output[0]),
                                         regression_gt[:, 0])
                    mae_ra = torch.abs(regression_gt[:, 0] -
                                       torch.squeeze(r_output[0]).type(torch.FloatTensor)).\
                                            numpy()
                    accumulated_mae_ra += np.sum(mae_ra)

                    if iteration_on_checkpoint % 100 == 0:
                        print(
                            "Validation iteration: %d [%d/%d)] on Checkpoint %d "
                            %
                            (iteration_on_checkpoint, iteration_on_checkpoint,
                             len(data_loader), checkpoint_iteration))

                    iteration_on_checkpoint += 1

                # Here also need a better analysis. TODO divide into curve and other things
                MAE_relative_angle = accumulated_mae_ra / (len(dataset))

                csv_outfile = open(summary_file, 'a')
                csv_outfile.write(
                    "%s, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f" %
                    (checkpoint_iteration, accumulated_pedestrian_TP,
                     accumulated_pedestrian_FP, accumulated_pedestrian_FN,
                     accumulated_pedestrian_TN, accumulated_vehicle_stop_TP,
                     accumulated_vehicle_stop_FP, accumulated_vehicle_stop_FN,
                     accumulated_vehicle_stop_TN, accumulated_red_tl_TP,
                     accumulated_red_tl_FP, accumulated_red_tl_FN,
                     accumulated_red_tl_TN, MAE_relative_angle))

                csv_outfile.write("\n")
                csv_outfile.close()

            else:
                print('The checkpoint you want to validate is not yet ready ',
                      str(latest))

        coil_logger.add_message('Finished', {})
        print('VALIDATION FINISHED !!')
        print('  Validation results saved in ==> ', summary_file)

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)

    except RuntimeError as e:
        if latest is not None:
            coil_logger.erase_csv(latest)
        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)
def execute(gpu, exp_batch, exp_alias):

    from time import gmtime, strftime

    manualSeed = g_conf.SEED
    torch.cuda.manual_seed(manualSeed)
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                g_conf.TRAIN_DATASET_NAME)
    real_dataset = g_conf.TARGET_DOMAIN_PATH
    # real_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], "FinalRealWorldDataset")

    #main data loader
    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    sampler = BatchSequenceSampler(
        splitter.control_steer_split(dataset.measurements,
                                     dataset.meta_data), g_conf.BATCH_SIZE,
        g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_sampler=sampler,
                                              shuffle=False,
                                              num_workers=6,
                                              pin_memory=True)

    real_dl = real_dataloader.RealDataset(real_dataset, g_conf.BATCH_SIZE)

    st = lambda aug: iag.Sometimes(aug, 0.4)
    oc = lambda aug: iag.Sometimes(aug, 0.3)
    rl = lambda aug: iag.Sometimes(aug, 0.09)
    augmenter = iag.Augmenter([iag.ToGPU()] + [
        rl(iag.GaussianBlur(
            (0, 1.5))),  # blur images with a sigma between 0 and 1.5
        rl(iag.AdditiveGaussianNoise(loc=0, scale=(
            0.0, 0.05), per_channel=0.5)),  # add gaussian noise to images
        oc(iag.Dropout((0.0, 0.10), per_channel=0.5)
           ),  # randomly remove up to X% of the pixels
        oc(
            iag.CoarseDropout(
                (0.0, 0.10), size_percent=(0.08, 0.2),
                per_channel=0.5)),  # randomly remove up to X% of the pixels
        oc(iag.Add((-40, 40), per_channel=0.5)
           ),  # change brightness of images (by -X to Y of original value)
        st(iag.Multiply((0.10, 2), per_channel=0.2)
           ),  # change brightness of images (X-Y% of original value)
        rl(iag.ContrastNormalization(
            (0.5, 1.5), per_channel=0.5)),  # improve or worsen the contrast
        rl(iag.Grayscale((0.0, 1))),  # put grayscale
    ]  # do all of the above in random order
                              )

    l1weight = g_conf.L1_WEIGHT
    task_adv_weight = g_conf.TASK_ADV_WEIGHT
    image_size = tuple([88, 200])

    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    print("Configurations of ", exp_alias)
    print("GANMODEL_NAME", g_conf.GANMODEL_NAME)
    print("LOSS_FUNCTION", g_conf.LOSS_FUNCTION)
    print("LR_G, LR_D, LR", g_conf.LR_G, g_conf.LR_D, g_conf.LEARNING_RATE)
    print("SKIP", g_conf.SKIP)
    print("TYPE", g_conf.TYPE)
    print("L1 WEIGHT", g_conf.L1_WEIGHT)
    print("TASK ADV WEIGHT", g_conf.TASK_ADV_WEIGHT)
    print("LAB SMOOTH", g_conf.LABSMOOTH)

    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        netD = ganmodels._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels._netG(loss=g_conf.LOSS_FUNCTION,
                               skip=g_conf.SKIP).cuda()
    elif g_conf.GANMODEL_NAME == 'LSDcontrol_nopatch':
        netD = ganmodels_nopatch._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_nopatch._netG(loss=g_conf.LOSS_FUNCTION).cuda()
    elif g_conf.GANMODEL_NAME == 'LSDcontrol_nopatch_smaller':
        netD = ganmodels_nopatch_smaller._netD(
            loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_nopatch_smaller._netG(
            loss=g_conf.LOSS_FUNCTION).cuda()

    elif g_conf.GANMODEL_NAME == 'LSDcontrol_task':
        netD = ganmodels_task._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_task._netG(loss=g_conf.LOSS_FUNCTION).cuda()
        netF = ganmodels_task._netF(loss=g_conf.LOSS_FUNCTION).cuda()

        if g_conf.PRETRAINED == 'RECON':
            netF_statedict = torch.load('netF_GAN_Pretrained.wts')
            netF.load_state_dict(netF_statedict)

        elif g_conf.PRETRAINED == 'IL':
            print("Loading IL")
            model_IL = torch.load('best_loss_20-06_EpicClearWeather.pth')
            model_IL_state_dict = model_IL['state_dict']

            netF_state_dict = netF.state_dict()

            print(len(netF_state_dict.keys()), len(model_IL_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netF_state_dict.keys(), model_IL_state_dict.keys())):
                newkey, oldkey = keys
                # if newkey.split('.')[0] == "branch" and oldkey.split('.')[0] == "branches":
                #     print("No Transfer of ",  newkey, " to ", oldkey)
                # else:
                print("Transferring ", newkey, " to ", oldkey)
                netF_state_dict[newkey] = model_IL_state_dict[oldkey]

            netF.load_state_dict(netF_state_dict)
            print("IL Model Loaded!")

    elif g_conf.GANMODEL_NAME == 'LSDcontrol_task_2d':
        netD = ganmodels_taskAC_shared._netD().cuda()
        netG = ganmodels_taskAC_shared._netG().cuda()
        netF = ganmodels_taskAC_shared._netF().cuda()

        if g_conf.PRETRAINED == 'IL':
            print("Loading IL")
            model_IL = torch.load(g_conf.IL_AGENT_PATH)
            model_IL_state_dict = model_IL['state_dict']

            netF_state_dict = netF.state_dict()

            print(len(netF_state_dict.keys()), len(model_IL_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netF_state_dict.keys(), model_IL_state_dict.keys())):
                newkey, oldkey = keys
                print("Transferring ", newkey, " to ", oldkey)
                netF_state_dict[newkey] = model_IL_state_dict[oldkey]

            netF.load_state_dict(netF_state_dict)
            print("IL Model Loaded!")

            #####
            if g_conf.IF_AUG:
                print("Loading Aug Decoder")
                model_dec = torch.load(g_conf.DECODER_RECON_PATH)
            else:
                print("Loading Decoder")
                model_dec = torch.load(g_conf.DECODER_RECON_PATH)
            model_dec_state_dict = model_dec['stateG_dict']

            netG_state_dict = netG.state_dict()

            print(len(netG_state_dict.keys()),
                  len(model_dec_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netG_state_dict.keys(), model_dec_state_dict.keys())):
                newkey, oldkey = keys
                print("Transferring ", newkey, " to ", oldkey)
                netG_state_dict[newkey] = model_dec_state_dict[oldkey]

            netG.load_state_dict(netG_state_dict)
            print("Decoder Model Loaded!")

    init_weights(netD)

    print(netD)
    print(netF)
    print(netG)

    optimD = torch.optim.Adam(netD.parameters(),
                              lr=g_conf.LR_D,
                              betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(),
                              lr=g_conf.LR_G,
                              betas=(0.5, 0.999))
    if g_conf.TYPE == 'task':
        optimF = torch.optim.Adam(netF.parameters(), lr=g_conf.LEARNING_RATE)
        Task_Loss = TaskLoss()

    if g_conf.GANMODEL_NAME == 'LSDcontrol_task_2d':
        print("Using cross entropy!")
        Loss = torch.nn.CrossEntropyLoss().cuda()

    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter_F = 0
    best_loss_iter_G = 0
    best_lossF = 1000000.0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0
    n_critic = g_conf.N_CRITIC

    lossF = Variable(torch.Tensor([100.0]))
    lossG_adv = Variable(torch.Tensor([100.0]))
    lossG_smooth = Variable(torch.Tensor([100.0]))
    lossG = Variable(torch.Tensor([100.0]))

    netG.train()
    netD.train()
    netF.train()
    capture_time = time.time()

    if not os.path.exists('./imgs_' + exp_alias):
        os.mkdir('./imgs_' + exp_alias)

    fake_img_pool_src = ImagePool(50)
    fake_img_pool_tgt = ImagePool(50)

    for data in data_loader:

        set_requires_grad(netD, True)
        set_requires_grad(netF, True)
        set_requires_grad(netG, True)

        input_data, float_data = data
        tgt_imgs = real_dl.get_imgs()

        if g_conf.IF_AUG:
            inputs = augmenter(0, input_data['rgb'])
        else:
            inputs = input_data['rgb'].cuda()

        tgt_imgs = tgt_imgs.cuda()

        inputs = inputs.squeeze(1)
        inputs = inputs
        tgt_imgs = tgt_imgs

        controls = float_data[:, dataset.controls_position(), :]

        camera_angle = float_data[:, 26, :]
        camera_angle = camera_angle.cuda()
        steer = float_data[:, 0, :]
        steer = steer.cuda()
        speed = float_data[:, 10, :]
        speed = speed.cuda()

        time_use = 1.0
        car_length = 3.0
        extra_factor = 2.5
        threshold = 1.0

        pos = camera_angle > 0.0
        pos = pos.type(torch.FloatTensor)
        neg = camera_angle <= 0.0
        neg = neg.type(torch.FloatTensor)
        pos = pos.cuda()
        neg = neg.cuda()

        rad_camera_angle = math.pi * (torch.abs(camera_angle)) / 180.0
        val = extra_factor * (torch.atan((rad_camera_angle * car_length) /
                                         (time_use * speed + 0.05))) / 3.1415
        steer -= pos * torch.min(val, torch.Tensor([0.6]).cuda())
        steer += neg * torch.min(val, torch.Tensor([0.6]).cuda())

        steer = steer.cpu()
        float_data[:, 0, :] = steer
        float_data[:, 0, :][float_data[:, 0, :] > 1.0] = 1.0
        float_data[:, 0, :][float_data[:, 0, :] < -1.0] = -1.0

        src_embed_inputs, src_branches = netF(
            inputs,
            dataset.extract_inputs(float_data).cuda())
        tgt_embed_inputs = netF(tgt_imgs, None)

        src_fake_inputs = netG(src_embed_inputs.detach())
        tgt_fake_inputs = netG(tgt_embed_inputs.detach())

        if iteration % 100 == 0:
            imgs_to_save = torch.cat((inputs[:1], src_fake_inputs[:1],
                                      tgt_imgs[:1], tgt_fake_inputs[:1]),
                                     0).cpu().data
            coil_logger.add_image("Images", imgs_to_save, iteration)
            imgs_to_save = imgs_to_save.clamp(0.0, 1.0)
            vutils.save_image(imgs_to_save,
                              './imgs_' + exp_alias + '/' + str(iteration) +
                              '_real_and_fake.png',
                              normalize=False)

        ##--------------------Discriminator part!!!!!!!!!!-------------------##

        ##source fake
        if g_conf.IF_POOL:
            src_fake_inputs_forD = fake_img_pool_src.query(src_fake_inputs)
            tgt_fake_inputs_forD = fake_img_pool_tgt.query(tgt_fake_inputs)
        else:
            src_fake_inputs_forD = src_fake_inputs
            tgt_fake_inputs_forD = tgt_fake_inputs

        set_requires_grad(netD, True)
        set_requires_grad(netF, False)
        set_requires_grad(netG, False)
        optimD.zero_grad()

        outputsD_fake_src_bin, __ = netD(src_fake_inputs_forD.detach())
        outputsD_fake_tgt_bin, __ = netD(tgt_fake_inputs_forD.detach())

        outputsD_real_src_bin, __ = netD(inputs)
        outputsD_real_tgt_bin, __ = netD(tgt_imgs)

        gradient_penalty_src = calc_gradient_penalty(netD, inputs,
                                                     src_fake_inputs_forD,
                                                     "recon")
        lossD_bin_src = torch.mean(
            outputsD_fake_src_bin -
            outputsD_real_src_bin) + gradient_penalty_src

        gradient_penalty_tgt = calc_gradient_penalty(netD, tgt_imgs,
                                                     tgt_fake_inputs_forD,
                                                     "recon")
        lossD_bin_tgt = torch.mean(
            outputsD_fake_tgt_bin -
            outputsD_real_tgt_bin) + gradient_penalty_tgt

        lossD = (lossD_bin_src + lossD_bin_tgt) * 0.5
        lossD.backward(retain_graph=True)
        optimD.step()

        coil_logger.add_scalar('Total LossD Bin', lossD.data, iteration)
        coil_logger.add_scalar('Src LossD Bin', lossD_bin_src.data, iteration)
        coil_logger.add_scalar('Tgt LossD Bin', lossD_bin_tgt.data, iteration)

        ##--------------------Generator part!!!!!!!!!!-----------------------##
        set_requires_grad(netD, False)
        set_requires_grad(netF, False)
        set_requires_grad(netG, True)
        optimG.zero_grad()

        #fake outputs for bin
        outputsD_bin_src_fake_forG, __ = netD(src_fake_inputs)
        outputsD_bin_tgt_fake_forG, __ = netD(tgt_fake_inputs)

        #Generator updates

        if ((iteration + 1) % n_critic) == 0:
            #for netD_bin

            optimG.zero_grad()
            outputsD_bin_fake_forG = netD(tgt_imgs)

            #Generator updates
            lossG_src_smooth = L1_loss(
                src_fake_inputs, inputs)  # L1 loss with real domain image
            lossG_tgt_smooth = L1_loss(
                tgt_fake_inputs, tgt_imgs)  # L1 loss with real domain image

            lossG_src_adv_bin = -1.0 * torch.mean(outputsD_bin_src_fake_forG)
            lossG_tgt_adv_bin = -1.0 * torch.mean(outputsD_bin_tgt_fake_forG)

            lossG_adv_bin = 0.5 * (lossG_src_adv_bin + lossG_tgt_adv_bin)

            lossG_Adv = lossG_adv_bin
            lossG_L1 = 0.5 * (lossG_src_smooth + lossG_tgt_smooth)

            lossG = (lossG_Adv + l1weight * lossG_L1) / (1.0 + l1weight)

            lossG.backward(retain_graph=True)
            optimG.step()

            coil_logger.add_scalar('Total LossG', lossG.data, iteration)
            coil_logger.add_scalar('LossG Adv', lossG_Adv.data, iteration)
            coil_logger.add_scalar('Adv Bin LossG', lossG_adv_bin.data,
                                   iteration)
            coil_logger.add_scalar('Smooth LossG', lossG_L1.data, iteration)

            #####Task network updates##########################
            set_requires_grad(netD, False)
            set_requires_grad(netF, True)
            set_requires_grad(netG, False)

            optimF.zero_grad()
            lossF_task = Task_Loss.MSELoss(
                src_branches,
                dataset.extract_targets(float_data).cuda(), controls.cuda(),
                dataset.extract_inputs(float_data).cuda())

            __, outputsD_fake_src_da = netD(src_fake_inputs_forD.detach())
            __, outputsD_real_tgt_da = netD(tgt_imgs)

            __, outputsD_fake_tgt_da = netD(tgt_fake_inputs_forD.detach())
            __, outputsD_real_src_da = netD(inputs)

            gradient_penalty_da_1 = calc_gradient_penalty(
                netD, tgt_imgs, src_fake_inputs_forD, "da")
            lossF_da_1 = torch.mean(outputsD_fake_src_da - outputsD_real_tgt_da
                                    ) + gradient_penalty_da_1

            gradient_penalty_da_2 = calc_gradient_penalty(
                netD, inputs, tgt_fake_inputs_forD, "da")
            lossF_da_2 = torch.mean(outputsD_fake_tgt_da - outputsD_real_src_da
                                    ) + gradient_penalty_da_2

            lossF_da = 0.5 * (lossF_da_1 + lossF_da_2)
            lossF = (lossF_task +
                     task_adv_weight * lossF_da) / (1.0 + task_adv_weight)

            coil_logger.add_scalar('Total Task Loss', lossF.data, iteration)
            coil_logger.add_scalar('Adv Task Loss', lossF_da.data, iteration)
            coil_logger.add_scalar('Only Task Loss', lossF_task.data,
                                   iteration)
            lossF.backward(retain_graph=True)
            optimF.step()

            if lossG.data < best_lossG:
                best_lossG = lossG.data.tolist()
                best_loss_iter_G = iteration

            if lossF.data < best_lossF:
                best_lossF = lossF.data.tolist()
                best_loss_iter_F = iteration

        #optimization for one iter done!

        position = random.randint(0, len(float_data) - 1)

        if lossD.data < best_lossD:
            best_lossD = lossD.data.tolist()

        accumulated_time += time.time() - capture_time
        capture_time = time.time()

        if is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter_G': best_loss_iter_G,
                'best_loss_iter_F': best_loss_iter_F
            }
            torch.save(
                state,
                os.path.join('/datatmp/Datasets/rohitgan/_logs', exp_batch,
                             exp_alias, 'checkpoints',
                             str(iteration) + '.pth'))

        if iteration == best_loss_iter_F and iteration > 10000:

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'best_lossF': best_lossF,
                'total_time': accumulated_time,
                'best_loss_iter_F': best_loss_iter_F
            }
            torch.save(
                state,
                os.path.join('/datatmp/Datasets/rohitgan/_logs', exp_batch,
                             exp_alias, 'best_modelF' + '.pth'))

        iteration += 1
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12):
    """
        The main encoder training function.
    Args:
        gpu: The GPU id number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading
    Returns:
        None
    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
        set_type_of_process('train_encoder')
        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': os.environ["CUDA_VISIBLE_DEVICES"]})

        # we set a seed for this exp
        seed_everything(seed=g_conf.MAGICAL_SEED)

        # Put the output to a separate file if it is the case
        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' +
                                           g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a",
                              buffering=1)
            sys.stderr = open(os.path.join('_output_logs',
                                           exp_alias + '_err_' + g_conf.PROCESS_NAME + '_'
                                           + str(os.getpid()) + ".out"),
                              "a", buffering=1)

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                                                 g_conf.PRELOAD_MODEL_ALIAS,
                                                 'checkpoints',
                                                 str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias,
                                                 'checkpoints', str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 1000000000.0
            best_loss_iter = 0

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        # full_dataset = os.path.join(os.environ["SRL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        if len(g_conf.EXPERIENCE_FILE) == 1:
            json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2]
        else:
            json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2] + '_' + str(g_conf.EXPERIENCE_FILE[1]).split('/')[-1].split('.')[-2]

        dataset = CoILDataset(transform=augmenter,
                              preload_name=g_conf.PROCESS_NAME + '_' + json_file_name + '_' + g_conf.DATA_USED)

        print ("Loaded dataset")

        data_loader = select_balancing_strategy(dataset, iteration, number_of_workers)

        encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION)
        encoder_model.cuda()
        encoder_model.train()

        print(encoder_model)

        optimizer = optim.Adam(encoder_model.parameters(), lr=g_conf.LEARNING_RATE)

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            encoder_model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        print ("Before the loss")

        if g_conf.ENCODER_MODEL_TYPE in ['ETE']:
            criterion = Loss(g_conf.LOSS_FUNCTION)

        # Loss time series window
        for data in data_loader:
            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            capture_time = time.time()
            encoder_model.zero_grad()

            """
                ####################################
                    ENCODER_MODEL_TYPE can be: one-step-affordances, ETE, stdim, action_prediction
                    
                ####################################
              - one-step-affordances: input RGB images, compute affordances loss.
              - ETE: input RGB images and speed, compute action loss (steering, throttle, brake)
              - stdim: input two consecutive RGB images, compute the feature loss
              - action_prediction: input two consecutive RGB images, compute action classification loss
              - forward: input two consecutive RGB images, compute action loss + feature loss
              
            """

            if g_conf.ENCODER_MODEL_TYPE in ['one-step-affordances']:
                loss_function_params = {
                    'classification_gt': dataset.extract_affordances_targets(data, 'classification').cuda(),
                # harzard stop, red_light....
                    'class_weights': g_conf.AFFORDANCES_CLASS_WEIGHT,
                    'regression_gt': dataset.extract_affordances_targets(data, 'regression').cuda(),
                    'variable_weights': g_conf.AFFORDANCES_VARIABLE_WEIGHT
                }
                # we input RGB images, speed and command to train affordances
                loss = encoder_model(torch.squeeze(data['rgb'].cuda()),
                                     dataset.extract_inputs(data).cuda(),
                                     torch.squeeze(dataset.extract_commands(data).cuda()),
                                     loss_function_params)

                if iteration == 0:
                    state = {
                        'iteration': iteration,
                        'state_dict': encoder_model.state_dict(),
                        'best_loss': best_loss,
                        'total_time': accumulated_time,
                        'optimizer': optimizer.state_dict(),
                        'best_loss_iter': best_loss_iter
                    }
                    torch.save(state, os.path.join('_logs', exp_batch, exp_alias
                                                   , 'checkpoints', 'inital.pth'))

                loss.backward()
                optimizer.step()

            elif g_conf.ENCODER_MODEL_TYPE in ['forward']:
                # We sample another batch to avoid the superposition

                inputs_data = [data['rgb'][0].cuda(), data['rgb'][1].cuda()]
                loss, loss_other, loss_ete = encoder_model(inputs_data,
                                           dataset.extract_inputs(data),
                                           # We also add measurements and commands
                                           dataset.extract_commands(data),
                                           dataset.extract_targets(data)[0].cuda()
                                           )
                loss.backward()
                optimizer.step()


            elif g_conf.ENCODER_MODEL_TYPE in ['ETE']:
                branches = encoder_model(torch.squeeze(data['rgb'].cuda()),
                                         dataset.extract_inputs(data).cuda(),
                                         torch.squeeze(dataset.extract_commands(data).cuda()))

                loss_function_params = {
                    'branches': branches,
                    'targets': dataset.extract_targets(data).cuda(),  # steer, throttle, brake
                    'inputs': dataset.extract_inputs(data).cuda(),  # speed
                    'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                    'variable_weights': g_conf.VARIABLE_WEIGHT
                }

                loss, _ = criterion(loss_function_params)
                loss.backward()
                optimizer.step()

            elif g_conf.ENCODER_MODEL_TYPE in ['stdim']:
                inputs_data = [data['rgb'][0].cuda(), data['rgb'][1].cuda()]
                loss, _, _ = encoder_model(inputs_data,
                                           dataset.extract_inputs(data),
                                           # We also add measurements and commands
                                           dataset.extract_commands(data)
                                           )
                loss.backward()
                optimizer.step()

            elif g_conf.ENCODER_MODEL_TYPE in ['action_prediction']:
                inputs_data = [data['rgb'][0].cuda(), data['rgb'][1].cuda()]
                loss, _, _ = encoder_model(inputs_data,
                                           dataset.extract_inputs(data),
                                           # We also add measurements and commands
                                           dataset.extract_commands(data),
                                           dataset.extract_targets(data)[0].cuda()
                                           )
                loss.backward()
                optimizer.step()

            else:
                raise ValueError("The encoder model type is not know")

            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):
                state = {
                    'iteration': iteration,
                    'state_dict': encoder_model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(state, os.path.join('_logs', exp_batch, exp_alias
                                               , 'checkpoints', str(iteration) + '.pth'))

            iteration += 1

            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """

            if g_conf.ENCODER_MODEL_TYPE in ['stdim', 'action_prediction', 'forward']:
                coil_logger.add_scalar('Loss', loss.data, iteration)
                coil_logger.add_image('f_t', torch.squeeze(data['rgb'][0]), iteration)
                coil_logger.add_image('f_ti', torch.squeeze(data['rgb'][1]), iteration)

            elif g_conf.ENCODER_MODEL_TYPE in ['one-step-affordances', 'ETE']:
                coil_logger.add_scalar('Loss', loss.data, iteration)
                coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration)

            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            accumulated_time += time.time() - capture_time
            coil_logger.add_message('Iterating',
                                    {'Iteration': iteration,
                                     'Loss': loss.data.tolist(),
                                     'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                                     'BestLoss': best_loss, 'BestLossIteration': best_loss_iter},
                                    iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)

            if iteration % 100 == 0:
                print('Train Iteration: {} [{}/{} ({:.0f}%)] \t Loss: {:.6f}'.format(
                    iteration, iteration, g_conf.NUMBER_ITERATIONS,
                    100. * iteration / g_conf.NUMBER_ITERATIONS, loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
示例#20
0
def execute(gpu, exp_batch, exp_alias):
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    merge_with_yaml(os.path.join(exp_batch, exp_alias + '.yaml'))
    set_type_of_process('validation')

    sys.stdout = open(str(os.getpid()) + ".out", "a", buffering=1)

    if monitorer.get_status(exp_batch, exp_alias,
                            g_conf.PROCESS_NAME)[0] == "Finished":
        # TODO: print some cool summary or not ?
        return

    #Define the dataset. This structure is has the __get_item__ redefined in a way
    #that you can access the HDFILES positions from the root directory as a in a vector.
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                g_conf.DATASET_NAME)

    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    # Creates the sampler, this part is responsible for managing the keys. It divides
    # all keys depending on the measurements and produces a set of keys for each bach.

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    # TODO: batch size an number of workers go to some configuration file
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=120,
                                              shuffle=False,
                                              num_workers=12,
                                              pin_memory=True)

    # TODO: here there is clearly a posibility to make a cool "conditioning" system.
    model = CoILModel(g_conf.MODEL_NAME)
    model.cuda()

    # TODO: The checkpoint will continue, so the logs should restart ??? OR continue were it was

    latest = get_latest_evaluated_checkpoint()
    if latest is None:  # When nothing was tested, get latest returns none, we fix that.
        latest = 0

    print(dataset.meta_data)

    while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

        if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

            latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(latest) + '.pth'))
            checkpoint_iteration = checkpoint['iteration']
            print("Validation loaded ", checkpoint_iteration)

            for data in data_loader:

                input_data, labels = data
                control_position = np.where(
                    dataset.meta_data[:, 0] == 'control')[0][0]
                speed_position = np.where(
                    dataset.meta_data[:, 0] == 'speed_module')[0][0]
                print(torch.squeeze(input_data['rgb']).shape)

                print(control_position)
                print(speed_position)
                # Obs : Maybe we could also check for other branches ??
                output = model.forward_branch(
                    torch.squeeze(input_data['rgb']).cuda(),
                    labels[:, speed_position, :].cuda(),
                    labels[:, control_position, :].cuda())
                # TODO: clean this squeeze and dimension things

                for i in range(input_data['rgb'].shape[0]):

                    coil_logger.write_on_csv(
                        checkpoint_iteration,
                        [output[i][0], output[i][1], output[i][2]])

                #loss = criterion(output, labels)

                #loss.backward()

                #optimizer.step()

                #shutil.copyfile(filename, 'model_best.pth.tar')
        else:
            time.sleep(1)
            print("Waiting for the next Validation")
def execute(gpu, exp_batch, exp_alias):

    manualSeed = 123
    torch.cuda.manual_seed(manualSeed)
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join('_output_logs',
                      g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml', g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)
    dataset = CoILDataset(full_dataset, transform=transforms.Compose([transforms.ToTensor()]))

    sampler = BatchSequenceSampler(splitter.control_steer_split(dataset.measurements, dataset.meta_data),
                          g_conf.BATCH_SIZE, g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler,
                                              shuffle=False, num_workers=6, pin_memory=True)


    l1weight = g_conf.L1
    image_size = tuple([88, 200])
    testmode = 1

    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        netD = ganmodels._netD().cuda()
        netG = ganmodels._netG(skip=g_conf.SKIP).cuda()
    if g_conf.GANMODEL_NAME == 'LSDcontrol_acgan_nopatch':
        netD = acganmodels_nopatch._netD().cuda()
        netG = acganmodels_nopatch._netG(skip=g_conf.SKIP).cuda()

    init_weights(netD)
    init_weights(netG)
    print(netD)
    print(netG)

    optimD = torch.optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999))
    MSE_loss = torch.nn.MSELoss().cuda()
    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter = 0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0

    netG.train()
    netD.train()
    capture_time = time.time()
    if not os.path.exists('./imgs_' + exp_alias):
        os.mkdir('./imgs_' + exp_alias)

    for data in data_loader:

        input_data, float_data = data
        inputs = input_data['rgb'].cuda()
        inputs = inputs.squeeze(1)

        fake_inputs = netG(inputs)

        if iteration % 200 == 0:
            imgs_to_save = torch.cat((inputs[:2], fake_inputs[:2]), 0).cpu().data
            vutils.save_image(imgs_to_save, './imgs_' + exp_alias + '/' + str(iteration) + '_real_and_fake.png', normalize=True)
            coil_logger.add_image("Images", imgs_to_save, iteration)

        controls = float_data[:, dataset.controls_position(), :]
        steer = controls[:, 0].cuda()

        ##--------------------Discriminator part!!!!!!!!!!-----------------------
        ##fake
        set_requires_grad(netD, True)
        optimD.zero_grad()

        outputsD_fake_forD, fakeD_steer = netD(fake_inputs.detach())

        labsize = outputsD_fake_forD.size()
        labels_fake = torch.zeros(labsize) #Fake labels
        label_fake_noise = torch.rand(labels_fake.size()) * 0.1 #Label smoothing
        labels_fake = labels_fake + label_fake_noise
        labels_fake = Variable(labels_fake).cuda()

        lossD_fake_aux = MSE_loss(fakeD_steer, steer)

        lossD_fake_total = lossD_fake + lossD_fake_aux
        lossD_fake_total.backward()
        optimD.step()

        ##real
        set_requires_grad(netD, True)
        optimD.zero_grad()

        outputsD_real_forD, realD_steer = netD(inputs)

        labsize = outputsD_real_forD.size()
        labels_real = torch.ones(labsize) #Real labels
        label_real_noise = torch.rand(labels_real.size()) * 0.1 #Label smoothing
        labels_real = labels_real - label_real_noise
        labels_real = Variable(labels_real).cuda()

        lossD_real = torch.mean(outputsD_real_forD)
        lossD_real_aux = MSE_loss(realD_steer, steer)

        #Discriminator updates

        lossD_real_total = lossD_real + lossD_real_aux
        lossD_real_total.backward()
        optimD.step()

        lossD = lossD_real_total + lossD_fake_total

        coil_logger.add_scalar('Aux Real LossD', lossD_real_aux.data, iteration)
        coil_logger.add_scalar('Aux Fake LossD', lossD_fake_aux.data, iteration)
        coil_logger.add_scalar('Total Real LossD', lossD_real_total.data , iteration)
        coil_logger.add_scalar('Total Fake LossD', lossD_fake_total.data , iteration)
        coil_logger.add_scalar('Real LossD', lossD_real.data , iteration)
        coil_logger.add_scalar('Fake LossD', lossD_fake.data , iteration)

        ##--------------------Generator part!!!!!!!!!!-----------------------

        set_requires_grad(netD, False)
        optimG.zero_grad()
        outputsD_fake_forG, G_steer = netD(fake_inputs)
        #Generator updates

        lossG_smooth = L1_loss(fake_inputs, inputs)
        lossG_aux = MSE_loss(G_steer, steer)

        lossG = lossG_adv + lossG_aux + l1weight * lossG_smooth

        lossG.backward()
        optimG.step()

        coil_logger.add_scalar('Total LossG', lossG.data / len(inputs), iteration)
        coil_logger.add_scalar('Adv LossG', lossG_adv.data , iteration)
        coil_logger.add_scalar('Smooth LossG', lossG_smooth.data , iteration)
        coil_logger.add_scalar('Aux LossG', lossG_aux.data , iteration)

        #optimization for one iter done!
        position = random.randint(0, len(float_data)-1)

        if lossD.data < best_lossD:
            best_lossD = lossD.data.tolist()

        if lossG.data < best_lossG:
            best_lossG = lossG.data.tolist()
            best_loss_iter = iteration

        accumulated_time += time.time() - capture_time
        capture_time = time.time()
        print("LossD", lossD.data.tolist(), "LossG", lossG.data.tolist(), "BestLossD", best_lossD, "BestLossG", best_lossG, "Iteration", iteration, "Best Loss Iteration", best_loss_iter)

        coil_logger.add_message('Iterating',
                                {'Iteration': iteration,
                                    'LossD': lossD.data.tolist(),
                                    'LossG': lossG.data.tolist(),
                                    'Images/s': (iteration*g_conf.BATCH_SIZE)/accumulated_time,
                                    'BestLossD': best_lossD, 'BestLossIteration': best_loss_iter,
                                    'BestLossG': best_lossG, 'BestLossIteration': best_loss_iter,
                                    'GroundTruth': dataset.extract_targets(float_data)[position].data.tolist(),
                                    'Inputs': dataset.extract_inputs(float_data)[position].data.tolist()},
                                iteration)
        if is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter': best_loss_iter

            }
            torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
                                           , 'checkpoints', str(iteration) + '.pth'))
        if iteration == best_loss_iter:

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter': best_loss_iter

            }
            torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
                                           , 'best_modelG' + '.pth'))

        iteration += 1
示例#22
0
def execute(gpu, exp_batch, exp_alias):

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                g_conf.TRAIN_DATASET_NAME)
    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    sampler = BatchSequenceSampler(
        splitter.control_steer_split(dataset.measurements,
                                     dataset.meta_data), g_conf.BATCH_SIZE,
        g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_sampler=sampler,
                                              shuffle=False,
                                              num_workers=6,
                                              pin_memory=True)

    l1weight = g_conf.L1_WEIGHT
    image_size = tuple([88, 200])

    if g_conf.TRAIN_TYPE == 'WGAN':
        clamp_value = g_conf.CLAMP
        n_critic = g_conf.N_CRITIC

    print("Configurations of ", exp_alias)
    print("GANMODEL_NAME", g_conf.GANMODEL_NAME)
    print("LOSS_FUNCTION", g_conf.LOSS_FUNCTION)
    print("LR_G, LR_D, LR", g_conf.LR_G, g_conf.LR_D, g_conf.LEARNING_RATE)
    print("SKIP", g_conf.SKIP)
    print("TYPE", g_conf.TYPE)
    print("L1 WEIGHT", g_conf.L1_WEIGHT)
    print("LAB SMOOTH", g_conf.LABSMOOTH)

    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        netD = ganmodels._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels._netG(loss=g_conf.LOSS_FUNCTION,
                               skip=g_conf.SKIP).cuda()
    elif g_conf.GANMODEL_NAME == 'LSDcontrol_nopatch':
        netD = ganmodels_nopatch._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_nopatch._netG(loss=g_conf.LOSS_FUNCTION).cuda()
    elif g_conf.GANMODEL_NAME == 'LSDcontrol_nopatch_smaller':
        netD = ganmodels_nopatch_smaller._netD(
            loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_nopatch_smaller._netG(
            loss=g_conf.LOSS_FUNCTION).cuda()

    elif g_conf.GANMODEL_NAME == 'LSDcontrol_task':
        netD = ganmodels_task._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_task._netG(loss=g_conf.LOSS_FUNCTION).cuda()
        netF = ganmodels_task._netF(loss=g_conf.LOSS_FUNCTION).cuda()

        if g_conf.PRETRAINED == 'RECON':
            netF_statedict = torch.load('netF_GAN_Pretrained.wts')
            netF.load_state_dict(netF_statedict)
        elif g_conf.PRETRAINED == 'IL':
            model_IL = torch.load('best_loss_20-06_EpicClearWeather.pth')
            model_IL_state_dict = model_IL['state_dict']

            netF_state_dict = netF.state_dict()
            for i, keys in enumerate(
                    zip(netF_state_dict.keys(), model_IL_state_dict.keys())):
                newkey, oldkey = keys
                if newkey.split('.')[0] == "branch" and oldkey.split(
                        '.')[0] == "branches":
                    print("No Transfer of ", newkey, " to ", oldkey)
                else:
                    print("Transferring ", newkey, " to ", oldkey)
                    netF_state_dict[newkey] = model_IL_state_dict[oldkey]
                netF.load_state_dict(netF_state_dict)

    init_weights(netD)
    init_weights(netG)
    #do init for netF also later but now it is in the model code itself

    print(netD)
    print(netF)
    print(netG)

    optimD = torch.optim.Adam(netD.parameters(),
                              lr=g_conf.LR_D,
                              betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(),
                              lr=g_conf.LR_G,
                              betas=(0.5, 0.999))
    if g_conf.TYPE == 'task':
        optimF = torch.optim.Adam(netF.parameters(), lr=g_conf.LEARNING_RATE)
        Task_Loss = TaskLoss()

    if g_conf.LOSS_FUNCTION == 'LSGAN':
        Loss = torch.nn.MSELoss().cuda()
    elif g_conf.LOSS_FUNCTION == 'NORMAL':
        Loss = torch.nn.BCEWithLogitsLoss().cuda()

    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter_F = 0
    best_loss_iter_G = 0
    best_lossF = 1000000.0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0
    lossF = Variable(torch.Tensor([100.0]))

    lossG_adv = Variable(torch.Tensor([100.0]))
    lossG_smooth = Variable(torch.Tensor([100.0]))
    lossG = Variable(torch.Tensor([100.0]))

    netG.train()
    netD.train()
    netF.train()
    capture_time = time.time()

    if not os.path.exists('./imgs_' + exp_alias):
        os.mkdir('./imgs_' + exp_alias)

    #TODO put family for losses

    fake_img_pool = ImagePool(50)

    for data in data_loader:

        set_requires_grad(netD, True)
        set_requires_grad(netF, True)
        set_requires_grad(netG, True)

        # print("ITERATION:", iteration)

        val = 0.0
        input_data, float_data = data
        inputs = input_data['rgb'].cuda()
        inputs = inputs.squeeze(1)
        inputs_in = inputs - val  #subtracted by 0.5

        #TODO: make sure the F network does not get optimized by G optim
        controls = float_data[:, dataset.controls_position(), :]
        embed, branches = netF(inputs_in,
                               dataset.extract_inputs(float_data).cuda())
        print("Branch Outputs:::", branches[0][0])

        embed_inputs = embed
        fake_inputs = netG(embed_inputs)
        fake_inputs_in = fake_inputs

        if iteration % 500 == 0:
            imgs_to_save = torch.cat(
                (inputs_in[:2] + val, fake_inputs_in[:2] + val), 0).cpu().data
            vutils.save_image(imgs_to_save,
                              './imgs_' + exp_alias + '/' + str(iteration) +
                              '_real_and_fake.png',
                              normalize=True)
            coil_logger.add_image("Images", imgs_to_save, iteration)

        ##--------------------Discriminator part!!!!!!!!!!-------------------##
        set_requires_grad(netD, True)
        set_requires_grad(netF, False)
        set_requires_grad(netG, False)
        optimD.zero_grad()

        ##fake
        # fake_inputs_forD = fake_img_pool.query(fake_inputs)
        outputsD_fake_forD = netD(fake_inputs)

        labsize = outputsD_fake_forD.size()
        labels_fake = torch.zeros(labsize)  #Fake labels
        label_fake_noise = torch.rand(
            labels_fake.size()) * 0.1  #Label smoothing

        if g_conf.LABSMOOTH == 1:
            labels_fake = labels_fake + label_fake_noise

        labels_fake = Variable(labels_fake).cuda()
        lossD_fake = torch.mean(
            outputsD_fake_forD)  #Loss(outputsD_fake_forD, labels_fake)

        ##real
        outputsD_real = netD(inputs_in)

        labsize = outputsD_real.size()
        labels_real = torch.ones(labsize)  #Real labels
        label_real_noise = torch.rand(
            labels_real.size()) * 0.1  #Label smoothing

        if g_conf.LABSMOOTH == 1:
            labels_real = labels_real - label_real_noise

        labels_real = Variable(labels_real).cuda()
        lossD_real = -1.0 * torch.mean(
            outputsD_real)  #Loss(outputsD_real, labels_real)

        ### Gradient Penalty ###
        gradient_penalty = calc_gradient_penalty(netD, inputs, fake_inputs)
        # alpha = torch.rand((g_conf.BATCH_SIZE, 1, 1, 1))
        # alpha = alpha.cuda()
        #
        # x_hat = alpha * inputs.data + (1 - alpha) * fake_inputs.data
        # x_hat.requires_grad = True
        #
        # pred_hat = netD(x_hat)
        # gradients = grad(outputs=pred_hat, inputs=x_hat, grad_outputs=torch.ones(pred_hat.size()).cuda(),
        #                 create_graph=True, retain_graph=True, only_inputs=True)[0]
        #
        # gradient_penalty = 10 * ((gradients.view(gradients.size()[0], -1).norm(2, 1) - 1) ** 2).mean()

        #Discriminator updates

        lossD = torch.mean(
            outputsD_fake_forD -
            outputsD_real) + gradient_penalty  #(lossD_real + lossD_fake) * 0.5
        # lossD /= len(inputs)
        print("Loss d", lossD)
        lossD.backward(retain_graph=True)
        optimD.step()

        # if g_conf.TRAIN_TYPE == 'WGAN':
        #     for p in netD.parameters():
        #         p.data.clamp_(-clamp_value, clamp_value)

        coil_logger.add_scalar('Total LossD', lossD.data, iteration)
        coil_logger.add_scalar('Real LossD', lossD_real.data / len(inputs),
                               iteration)
        coil_logger.add_scalar('Fake LossD', lossD_fake.data / len(inputs),
                               iteration)

        ##--------------------Generator part!!!!!!!!!!-----------------------##
        set_requires_grad(netD, False)
        set_requires_grad(netF, False)
        set_requires_grad(netG, True)

        if ((iteration + 1) % n_critic) == 0:
            optimG.zero_grad()
            outputsD_fake_forG = netD(fake_inputs)

            #Generator updates
            lossG_adv = -1.0 * torch.mean(
                outputsD_fake_forG)  #Loss(outputsD_fake_forG, labels_real)
            lossG_smooth = L1_loss(fake_inputs, inputs)
            lossG = (lossG_adv + l1weight * lossG_smooth) / (1.0 + l1weight)
            # lossG /= len(inputs)
            print(lossG)
            lossG.backward(retain_graph=True)
            optimG.step()

            #####Task network updates##########################
            set_requires_grad(netD, False)
            set_requires_grad(netF, True)
            set_requires_grad(netG, False)

            optimF.zero_grad()
            lossF = Variable(torch.Tensor())
            lossF = Task_Loss.MSELoss(
                branches,
                dataset.extract_targets(float_data).cuda(), controls.cuda(),
                dataset.extract_inputs(float_data).cuda())
            coil_logger.add_scalar('Task Loss', lossF.data, iteration)
            lossF.backward()
            optimF.step()

        coil_logger.add_scalar('Total LossG', lossG.data, iteration)
        coil_logger.add_scalar('Adv LossG', lossG_adv.data / len(inputs),
                               iteration)
        coil_logger.add_scalar('Smooth LossG', lossG_smooth.data / len(inputs),
                               iteration)

        #optimization for one iter done!

        position = random.randint(0, len(float_data) - 1)
        if lossD.data < best_lossD:
            best_lossD = lossD.data.tolist()
        # print (lossG.item(), best_lossG)
        if lossG.item() < best_lossG:
            best_lossG = lossG.item()
            best_loss_iter_G = iteration

        if lossF.item() < best_lossF:
            best_lossF = lossF.item()
            best_loss_iter_F = iteration

        accumulated_time += time.time() - capture_time
        capture_time = time.time()
        print("LossD", lossD.data.tolist(), "LossG", lossG.data.tolist(),
              "BestLossD", best_lossD, "BestLossG", best_lossG, "LossF", lossF,
              "BestLossF", best_lossF, "Iteration", iteration,
              "Best Loss Iteration G", best_loss_iter_G,
              "Best Loss Iteration F", best_loss_iter_F)

        coil_logger.add_message(
            'Iterating', {
                'Iteration':
                iteration,
                'LossD':
                lossD.data.tolist(),
                'LossG':
                lossG.data.tolist(),
                'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                'BestLossD':
                best_lossD,
                'BestLossG':
                best_lossG,
                'BestLossIterationG':
                best_loss_iter_G,
                'BestLossF':
                best_lossF,
                'BestLossIterationF':
                best_loss_iter_F,
                'GroundTruth':
                dataset.extract_targets(float_data)[position].data.tolist(),
                'Inputs':
                dataset.extract_inputs(float_data)[position].data.tolist()
            }, iteration)

        if is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter_G': best_loss_iter_G,
                'best_loss_iter_F': best_loss_iter_F
            }
            torch.save(
                state,
                os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch,
                             exp_alias, 'checkpoints',
                             str(iteration) + '.pth'))
        if iteration == best_loss_iter_G and iteration > 10000:

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter_G': best_loss_iter_G
            }
            torch.save(
                state,
                os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch,
                             exp_alias, 'best_modelG' + '.pth'))

        if iteration == best_loss_iter_F and iteration > 10000:

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'best_lossF': best_lossF,
                'total_time': accumulated_time,
                'best_loss_iter_F': best_loss_iter_F
            }
            torch.save(
                state,
                os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch,
                             exp_alias, 'best_modelF' + '.pth'))

        iteration += 1
示例#23
0
def execute(gpu,
            exp_batch,
            exp_alias,
            suppress_output=True,
            number_of_workers=12,
            encoder_params=None):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: The GPU number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(
            os.path.join('configs', exp_batch, exp_alias + '.yaml'),
            encoder_params)
        set_type_of_process('train')
        # Set the process into loading status.
        coil_logger.add_message('Loading',
                                {'GPU': os.environ["CUDA_VISIBLE_DEVICES"]})

        seed_everything(seed=g_conf.MAGICAL_SEED)

        # Put the output to a separate file if it is the case

        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        print(" GOING TO LOAD")
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            print(" LOADING A PRELOAD")
            checkpoint = torch.load(
                os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                             g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints',
                             str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        else:

            # Get the latest checkpoint to be loaded
            # returns none if there are no checkpoints saved for this model
            checkpoint_file = get_latest_saved_checkpoint()
            if checkpoint_file is not None:
                print('loading previous checkpoint ', checkpoint_file)
                checkpoint = torch.load(
                    os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                 g_conf.EXPERIMENT_NAME, 'checkpoints',
                                 str(get_latest_saved_checkpoint())))
                iteration = checkpoint['iteration']
                best_loss = checkpoint['best_loss']
                best_loss_iter = checkpoint['best_loss_iter']
            else:
                iteration = 0
                best_loss = 100000000.0
                best_loss_iter = 0

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        #full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # We can save preload dataset depends on the json file name, then no need to load dataset for each time with the same dataset
        if len(g_conf.EXPERIENCE_FILE) == 1:
            json_file_name = str(
                g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2]
        else:
            json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split(
                '/')[-1].split('.')[-2] + '_' + str(
                    g_conf.EXPERIENCE_FILE[1]).split('/')[-1].split('.')[-2]
        dataset = CoILDataset(transform=augmenter,
                              preload_name=g_conf.PROCESS_NAME + '_' +
                              json_file_name + '_' + g_conf.DATA_USED)

        #dataset = CoILDataset(transform=augmenter, preload_name=str(g_conf.NUMBER_OF_HOURS)+ 'hours_' + g_conf.TRAIN_DATASET_NAME)
        print("Loaded Training dataset")

        data_loader = select_balancing_strategy(dataset, iteration,
                                                number_of_workers)
        if g_conf.MODEL_TYPE in ['separate-affordances']:
            model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION,
                              g_conf.ENCODER_MODEL_CONFIGURATION)

        model.cuda()
        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)

        print(model)

        # we use the pre-trained encoder model to extract bottleneck Z and train the E-t-E model

        if g_conf.MODEL_TYPE in ['separate-affordances']:
            encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE,
                                         g_conf.ENCODER_MODEL_CONFIGURATION)
            encoder_model.cuda()
            encoder_model.eval()
            # To freeze the pre-trained encoder model
            if g_conf.FREEZE_ENCODER:
                for param_ in encoder_model.parameters():
                    param_.requires_grad = False
            if encoder_params is not None:
                encoder_checkpoint = torch.load(
                    os.path.join(
                        '_logs', encoder_params['encoder_folder'],
                        encoder_params['encoder_exp'], 'checkpoints',
                        str(encoder_params['encoder_checkpoint']) + '.pth'))
                print(
                    "Encoder model ",
                    str(encoder_params['encoder_checkpoint']), "loaded from ",
                    os.path.join('_logs', encoder_params['encoder_folder'],
                                 encoder_params['encoder_exp'], 'checkpoints'))
                encoder_model.load_state_dict(encoder_checkpoint['state_dict'])
                if g_conf.FREEZE_ENCODER:
                    encoder_model.eval()
                    # To freeze the pre-trained encoder model
                    for param_ in encoder_model.parameters():
                        param_.requires_grad = False
                else:
                    optimizer = optim.Adam(list(model.parameters()) +
                                           list(encoder_model.parameters()),
                                           lr=g_conf.LEARNING_RATE)

            for name_encoder, param_encoder in encoder_model.named_parameters(
            ):
                if param_encoder.requires_grad:
                    print('  Unfrozen layers', name_encoder)
                else:
                    print('  Frozen layers', name_encoder)

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        for name, param in model.named_parameters():
            if param.requires_grad:
                print('  Unfrozen layers', name)
            else:
                print('  Frozen layers', name)

        print("Before the loss")

        # Loss time series window
        for data in data_loader:

            # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times,
            # add a stop on the _logs folder that is going to be read by this process
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            model.zero_grad()
            if not g_conf.FREEZE_ENCODER:
                encoder_model.zero_grad()

            if g_conf.LABELS_SUPERVISED:
                inputs_data = torch.cat(
                    (data['rgb'], torch.zeros(g_conf.BATCH_SIZE, 1, 88, 200)),
                    dim=1).cuda()
            else:
                inputs_data = torch.squeeze(data['rgb'].cuda())

            if g_conf.MODEL_TYPE in ['separate-affordances']:
                #TODO: for this two encoder models training, we haven't put speed as input to train yet

                if g_conf.ENCODER_MODEL_TYPE in [
                        'action_prediction', 'stdim', 'forward',
                        'one-step-affordances'
                ]:

                    e, inter = encoder_model.forward_encoder(
                        inputs_data,
                        dataset.extract_inputs(data).cuda(),
                        # We also add measurements and commands
                        torch.squeeze(dataset.extract_commands(data).cuda()))

                elif g_conf.ENCODER_MODEL_TYPE in ['ETE']:
                    e, inter = encoder_model.forward_encoder(
                        inputs_data,
                        dataset.extract_inputs(data).cuda(),
                        torch.squeeze(dataset.extract_commands(data).cuda()))

                loss_function_params = {
                    'classification_gt':
                    dataset.extract_affordances_targets(
                        data, 'classification').cuda(),
                    # harzard stop, red_light....
                    'class_weights':
                    g_conf.AFFORDANCES_CLASS_WEIGHT,
                    'regression_gt':
                    dataset.extract_affordances_targets(data,
                                                        'regression').cuda(),
                    'variable_weights':
                    g_conf.AFFORDANCES_VARIABLE_WEIGHT
                }
                loss = model(e, loss_function_params)
                loss.backward()
                optimizer.step()

            else:
                raise RuntimeError(
                    'Not implement yet, this branch is only work for g_conf.MODEL_TYPE in [separate-affordances]'
                )
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(
                    state,
                    os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                 g_conf.EXPERIMENT_NAME, 'checkpoints',
                                 str(iteration) + '.pth'))

                if not g_conf.FREEZE_ENCODER:
                    encoder_state = {
                        'iteration': iteration,
                        'state_dict': encoder_model.state_dict(),
                        'best_loss': best_loss,
                        'total_time': accumulated_time,
                        'optimizer': optimizer.state_dict(),
                        'best_loss_iter': best_loss_iter
                    }
                    torch.save(
                        encoder_state,
                        os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                     g_conf.EXPERIMENT_NAME, 'checkpoints',
                                     str(iteration) + '_encoder.pth'))

            iteration += 1
            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']),
                                  iteration)

            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            if iteration % 100 == 0:
                print('Train Iteration: {} [{}/{} ({:.0f}%)] \t Loss: {:.6f}'.
                      format(iteration, iteration, g_conf.NUMBER_ITERATIONS,
                             100. * iteration / g_conf.NUMBER_ITERATIONS,
                             loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
示例#24
0
def execute(gpu, exp_batch, exp_alias):

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join('_output_logs',
                      g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml', g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)
    dataset = CoILDataset(full_dataset, transform=transforms.Compose([transforms.ToPILImage(), transforms.Resize(128, 128), transforms.ToTensor(), transforms.Normalize([ 0.5,  0.5,  0.5], [ 1.0, 1.0, 1.0])]))

    sampler = BatchSequenceSampler(splitter.control_steer_split(dataset.measurements, dataset.meta_data),
                          g_conf.BATCH_SIZE, g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler,
                                              shuffle=False, num_workers=6, pin_memory=True)

    transform = transforms.Compose([transforms.Resize((88, 200))])
    l1weight = 1.0
    image_size = tuple([88, 200])
    testmode = 1

    # print("helllooooo", g_conf.MODEL_NAME)
    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        netD = ganmodels._netD().cuda()
        netG = ganmodels._netG(skip=g_conf.SKIP).cuda()
    # else:
    #     netD = ganmodels._oldnetD().cuda()
    #     netG = ganmodels._oldnetG().cuda()

    init_weights(netD)
    init_weights(netG)
    print(netD)
    print(netG)

    optimD = torch.optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999))
    MSE_loss = torch.nn.MSELoss().cuda()
    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter = 0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0

    netG.eval()
    netD.eval()
    capture_time = time.time()
    for data in data_loader:

        input_data, float_data = data
        inputs = input_data['rgb'].cuda()
        inputs = inputs.squeeze(1)
        print ("Inputs", i)

        #forward pass
        fake_inputs = netG(inputs)
        if iteration % 1000 == 0:
            coil_logger.add_image("Images", torch.cat((inputs[:3], fake_inputs[:3]), 0), iteration)

        ##--------------------Discriminator part!!!!!!!!!!-----------------------
        set_requires_grad(netD, True)
        optimD.zero_grad()

        ##fake
        outputsD_fake_forD = netD(fake_inputs.detach())

        labsize = outputsD_fake_forD.size()
        #Create labels of patchgan style with label smoothing
        labels_fake = torch.zeros(labsize[0], labsize[1], labsize[2], labsize[3]) #Fake labels
        label_fake_noise = torch.rand(labels_fake.size()) * 0.5 - 0.25 #Label smoothing
        labels_fake = labels_fake + label_fake_noise
        labels_fake = Variable(labels_fake).cuda()

        lossD_fake = MSE_loss(outputsD_fake_forD, labels_fake)

        ##real
        outputsD_real = netD(inputs)

        labsize = outputsD_real.size()
        #Create labels of patchgan style with label smoothing
        labels_real = torch.ones(labsize[0], labsize[1], labsize[2], labsize[3]) #Real labels
        label_real_noise = torch.rand(labels_real.size()) * 0.5 - 0.25 #Label smoothing
        labels_real = labels_real + label_real_noise
        labels_real = Variable(labels_real).cuda()

        lossD_real = MSE_loss(outputsD_real, labels_real)

        #Discriminator updates

        lossD = (lossD_real + lossD_fake) * 0.5
        lossD /= len(inputs)
        lossD.backward() #retain_graph=True needed?
        optimD.step()


        coil_logger.add_scalar('Total LossD', lossD.data, iteration)
        coil_logger.add_scalar('Real LossD', lossD_real.data / len(inputs), iteration)
        coil_logger.add_scalar('Fake LossD', lossD_fake.data / len(inputs), iteration)

        ##--------------------Generator part!!!!!!!!!!-----------------------

        #TODO change decoder architecture
        #TODO check norms of gradients later
        #TODO add auxiliary regression loss for steering

        set_requires_grad(netD, False)
        optimG.zero_grad()
        outputsD_fake_forG = netD(fake_inputs)
        #Generator updates

        lossG_adv = MSE_loss(outputsD_fake_forG, labels_real)
        lossG_smooth = L1_loss(fake_inputs, inputs)
        lossG = lossG_adv + l1weight * lossG_smooth
        lossG /= len(inputs)

        lossG.backward() #retain_graph=True needed?
        optimG.step()

        coil_logger.add_scalar('Total LossG', lossG.data, iteration)
        coil_logger.add_scalar('Adv LossG', lossG_adv.data / len(inputs), iteration)
        coil_logger.add_scalar('Smooth LossG', lossG_smooth.data / len(inputs), iteration)

        #optimization for one iter done!

        position = random.randint(0, len(float_data)-1)

        if lossD.data < best_lossD:
            best_lossD = lossD.data.tolist()

        if lossG.data < best_lossG:
            best_lossG = lossG.data.tolist()
            best_loss_iter = iteration

        accumulated_time += time.time() - capture_time
        capture_time = time.time()
        print("LossD", lossD.data.tolist(), "LossG", lossG.data.tolist(), "BestLossD", best_lossD, "BestLossG", best_lossG, "Iteration", iteration, "Best Loss Iteration", best_loss_iter)

        coil_logger.add_message('Iterating',
                                {'Iteration': iteration,
                                    'LossD': lossD.data.tolist(),
                                    'LossG': lossG.data.tolist(),
                                    'Images/s': (iteration*g_conf.BATCH_SIZE)/accumulated_time,
                                    'BestLossD': best_lossD, 'BestLossIteration': best_loss_iter,
                                    'BestLossG': best_lossG, 'BestLossIteration': best_loss_iter,
                                    'GroundTruth': dataset.extract_targets(float_data)[position].data.tolist(),
                                    'Inputs': dataset.extract_inputs(float_data)[position].data.tolist()},
                                iteration)
        if is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter': best_loss_iter

            }
            torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
                                           , 'checkpoints', str(iteration) + '.pth'))
        if iteration == best_loss_iter:

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter': best_loss_iter

            }
            torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
                                           , 'best_modelG' + '.pth'))

        iteration += 1
示例#25
0
def execute(gpu,
            exp_batch,
            exp_alias,
            state_dict,
            suppress_output=True,
            number_of_workers=12):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: The GPU number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        set_type_of_process('train')
        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': gpu})

        # Put the output to a separate file if it is the case

        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(
                os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                             g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints',
                             str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # Instantiate the class used to read a dataset. The coil dataset generator
        # can be found
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=str(g_conf.NUMBER_OF_HOURS) +
                              'hours_' + g_conf.TRAIN_DATASET_NAME)
        print("Loaded dataset")

        data_loader = select_balancing_strategy(dataset, iteration,
                                                number_of_workers)
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()

        if state_dict != '':
            seg_model = ERFNet_Fast(2)
            seg_model = load_my_state_dict(seg_model, torch.load(state_dict))
            seg_model.cuda()

        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        print("Before the loss")

        criterion = Loss(g_conf.LOSS_FUNCTION)
        color_transforms = Colorizes(2)
        board = Dashboard(8097)

        # Loss time series window
        for data in data_loader:

            # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times,
            # add a stop on the _logs folder that is going to be read by this process
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            iteration += 1
            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            # get the control commands from float_data, size = [120,1]

            capture_time = time.time()
            controls = data['directions']
            # The output(branches) is a list of 5 branches results, each branch is with size [120,3]
            model.zero_grad()
            if state_dict != '':
                with torch.no_grad():
                    repre = seg_model(torch.squeeze(data['rgb'].cuda()),
                                      only_encode=False)
                    inputs = repre
                    imgs = color_transforms(inputs)
                inputs = inputs.float().cuda()
            else:
                inputs = torch.squeeze(data['rgb'].cuda())

            # vis
            board.image(
                torch.squeeze(data['rgb'])[0].cpu().data,
                '(train) input iter: ' + str(iteration))
            board.image(imgs[0].cpu().data,
                        '(train) output iter: ' + str(iteration))

            branches = model(inputs, dataset.extract_inputs(data).cuda())
            loss_function_params = {
                'branches': branches,
                'targets': dataset.extract_targets(data).cuda(),
                'controls': controls.cuda(),
                'inputs': dataset.extract_inputs(data).cuda(),
                'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                'variable_weights': g_conf.VARIABLE_WEIGHT
            }
            loss, _ = criterion(loss_function_params)
            loss.backward()
            optimizer.step()
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(iteration) + '.pth'))
            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']),
                                  iteration)
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output - dataset.extract_targets(data).cuda())

            accumulated_time += time.time() - capture_time

            coil_logger.add_message(
                'Iterating', {
                    'Iteration':
                    iteration,
                    'Loss':
                    loss.data.tolist(),
                    'Images/s':
                    (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                    'BestLoss':
                    best_loss,
                    'BestLossIteration':
                    best_loss_iter,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(data)[position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(data)[position].data.tolist()
                }, iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)
            print("Iteration: %d  Loss: %f" % (iteration, loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
示例#26
0
def execute(gpu,
            exp_batch,
            exp_alias,
            suppress_output=True,
            number_of_workers=12):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: gpus ids for training
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(gpu)
        g_conf.VARIABLE_WEIGHT = {}

        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        set_type_of_process('train')

        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': gpu})

        # Put the output to a separate file if it is the case
        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(
                os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                             g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints',
                             str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
            print('iteration: ', iteration, 'best_loss: ', best_loss)
        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # Instantiate the class used to read the dataset
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=str(g_conf.NUMBER_OF_HOURS) +
                              'hours_' + g_conf.TRAIN_DATASET_NAME)
        print("Loaded dataset")

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.
        # define the sampling strategy for mini-batch, different samplers can be found in 'splitter.py'
        data_loader = select_balancing_strategy(dataset, iteration,
                                                number_of_workers)

        # Instatiate the network architecture
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()

        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE
                               )  # adabound and adamio can also be used here

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:
            # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        # freeze the perception module weights if required
        # for m in model.perception.parameters():
        #     m.requires_grad = False

        # total trainable parameters
        model_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
        total_params = sum([np.prod(p.size()) for p in model_parameters])
        print('trainable parameters: ', total_params)

        # multi-gpu
        print('number of gpus: ', torch.cuda.device_count())
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)

        criterion = Loss(g_conf.LOSS_FUNCTION)

        print('Start Training')

        st = time.time()
        for data in data_loader:

            # use this for early stopping if the validation loss is not coming down
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            iteration += 1

            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            # additional learning rate scheduler - cyclic cosine annealing (https://arxiv.org/pdf/1704.00109.pdf)
            # adjust_learning_rate_cosine_annealing(optimizer, loss_window, iteration)

            capture_time = time.time()
            controls = data['directions']
            model.zero_grad()
            branches = model(torch.squeeze(data['rgb'].cuda()),
                             dataset.extract_inputs(data).cuda())
            loss_function_params = {
                'branches': branches,
                'targets': dataset.extract_targets(data).cuda(),
                'controls': controls.cuda(),
                'inputs': dataset.extract_inputs(data).cuda(),
                'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                'variable_weights': g_conf.VARIABLE_WEIGHT
            }
            loss, _ = criterion(loss_function_params)
            loss.backward()
            optimizer.step()
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):
                if torch.cuda.device_count() > 1:
                    state_dict_save = model.module.state_dict()
                else:
                    state_dict_save = model.state_dict()

                state = {
                    'iteration': iteration,
                    'state_dict': state_dict_save,
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(iteration) + '.pth'))
            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']),
                                  iteration)
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(data) - 1)

            if torch.cuda.device_count() > 1:
                output = model.module.extract_branch(
                    torch.stack(branches[0:4]), controls)
            else:
                output = model.extract_branch(torch.stack(branches[0:4]),
                                              controls)
            error = torch.abs(output - dataset.extract_targets(data).cuda())

            accumulated_time += time.time() - capture_time

            coil_logger.add_message(
                'Iterating', {
                    'Iteration':
                    iteration,
                    'Loss':
                    loss.data.tolist(),
                    'Images/s':
                    (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                    'BestLoss':
                    best_loss,
                    'BestLossIteration':
                    best_loss_iter,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(data)[position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(data)[position].data.tolist()
                }, iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)
            print("Iteration: %d  Loss: %f" % (iteration, loss.data))
            st = time.time()

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
示例#27
0
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: The GPU number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
        set_type_of_process('train')
        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': gpu})

        # Put the output to a separate file if it is the case

        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' +
                              g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a",
                              buffering=1)
            sys.stderr = open(os.path.join('_output_logs',
                              exp_alias + '_err_'+g_conf.PROCESS_NAME + '_'
                                           + str(os.getpid()) + ".out"),
                              "a", buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                                                  g_conf.PRELOAD_MODEL_ALIAS,
                                                 'checkpoints',
                                                 str(g_conf.PRELOAD_MODEL_CHECKPOINT)+'.pth'))


        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias,
                                    'checkpoints', str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0


        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # Instantiate the class used to read a dataset. The coil dataset generator
        # can be found
        dataset = CoILDataset(full_dataset, transform=augmenter,
                              preload_name=str(g_conf.NUMBER_OF_HOURS)
                                               + 'hours_' + g_conf.TRAIN_DATASET_NAME)
        print ("Loaded dataset")

        data_loader = select_balancing_strategy(dataset, iteration, number_of_workers)
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()
        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)
        

        # Set ERFnet for segmentation
        model_erf = ERFNet(20)
        model_erf = torch.nn.DataParallel(model_erf)
        model_erf = model_erf.cuda()        
        
        print("LOAD ERFNet")
        def load_my_state_dict(model, state_dict):  #custom function to load model when not all dict elements
            own_state = model.state_dict()
            for name, param in state_dict.items():
                if name not in own_state:
                    continue
                own_state[name].copy_(param)
            return model
        
        model_erf = load_my_state_dict(model_erf, torch.load(os.path.join('trained_models/erfnet_pretrained.pth')))
        model_erf.eval()
        print ("ERFNet and weights LOADED successfully")

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []
       

        print ("Before the loss")

        criterion = Loss(g_conf.LOSS_FUNCTION)

        # Loss time series window
        for data in data_loader:

            # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times,
            # add a stop on the _logs folder that is going to be read by this process
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            iteration += 1
            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            # get the control commands from float_data, size = [120,1]

            capture_time = time.time()
            controls = data['directions']
            # The output(branches) is a list of 5 branches results, each branch is with size [120,3]
            model.zero_grad()

            # print("Segmentation")
            # use ERFNet to convert RGB to Segmentation
            rgbs = data['rgb']
            filenames = data['rgb_name']

            # # seg one by one
            # seg_road = []
            # seg_not_road = []
            # i = 0
            # for inputs in rgbs:
            #     inputs = inputs.unsqueeze(0)
            #     # print("inputs ",inputs.shape)
            #     with torch.no_grad():
            #         outputs = model_erf(inputs)

            #     label = outputs[0].max(0)[1].byte().cpu().data

            #     road = (label == 0)
            #     not_road = (label != 0)
            #     seg_road.append(road)
            #     seg_not_road.append(not_road)   

            #     # # print("label ",label.shape)
            #     # label_color = Colorize()(label.unsqueeze(0))
            #     # filename = filenames[i]                
            #     # filenameSave = "./save_color/" + filename.split("CoILTrain/")[1]
            #     # os.makedirs(os.path.dirname(filenameSave), exist_ok=True)
                   
            #     # label_save = ToPILImage()(label_color)           
            #     # label_save.save(filenameSave) 
            #     # # print (i, filenameSave)
            #     # i += 1                 

            # seg_road = torch.stack(seg_road)
            # seg_not_road = torch.stack(seg_not_road)
            # seg = torch.stack([seg_road,seg_not_road]).transpose(0,1).float()
            # # print(seg.shape)
            
            # seg batch
            with torch.no_grad():
                outputs = model_erf(rgbs)
            # print("outputs.shape ",outputs.shape)
            labels = outputs.max(1)[1].byte().cpu().data
            # print("labels.shape",labels.shape)
            # print(np.unique(labels[0])) 

            seg_road = (labels==0)
            seg_not_road = (labels!=0)
            seg = torch.stack((seg_road,seg_not_road),1).float()

            # save 1st batch's segmentation results
            if iteration == 1:
                for i in range(120):
                    label = seg[i,0,:,:]
                    label_color = Colorize()(label.unsqueeze(0))               
                    filenameSave = "./save_color/batch_road_mask/%d.png"%(i)
                    os.makedirs(os.path.dirname(filenameSave), exist_ok=True)                   
                    label_save = ToPILImage()(label_color)           
                    label_save.save(filenameSave)

                    label = labels[i,:,:]
                    label_color = Colorize()(label.unsqueeze(0))               
                    filenameSave = "./save_color/batch_road/%d.png"%(i)
                    os.makedirs(os.path.dirname(filenameSave), exist_ok=True)                   
                    label_save = ToPILImage()(label_color)           
                    label_save.save(filenameSave)


            branches = model(torch.squeeze(seg).cuda(),
                             dataset.extract_inputs(data).cuda())
#             branches = model(torch.squeeze(rgbs.cuda()),
#                              dataset.extract_input(data).cuda())

            loss_function_params = {
                'branches': branches,
                'targets': dataset.extract_targets(data).cuda(),
                'controls': controls.cuda(),
                'inputs': dataset.extract_inputs(data).cuda(),
                'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                'variable_weights': g_conf.VARIABLE_WEIGHT
            }
            loss, _ = criterion(loss_function_params)
            loss.backward()
            optimizer.step()
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(state, os.path.join('_logs', exp_batch, exp_alias
                                               , 'checkpoints', str(iteration) + '.pth'))

            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration)
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output - dataset.extract_targets(data).cuda())

            accumulated_time += time.time() - capture_time

            coil_logger.add_message('Iterating',
                                    {'Iteration': iteration,
                                     'Loss': loss.data.tolist(),
                                     'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                                     'BestLoss': best_loss, 'BestLossIteration': best_loss_iter,
                                     'Output': output[position].data.tolist(),
                                     'GroundTruth': dataset.extract_targets(data)[
                                         position].data.tolist(),
                                     'Error': error[position].data.tolist(),
                                     'Inputs': dataset.extract_inputs(data)[
                                         position].data.tolist()},
                                    iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)
            print("Iteration: %d  Loss: %f" % (iteration, loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
示例#28
0
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output=True, yaml_file=None):
    latest = None
    # try:
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias+'.yaml')
    if yaml_file is not None:
      path_to_yaml_file = os.path.join(yaml_file, exp_alias+'.yaml')
    merge_with_yaml(path_to_yaml_file)
    # The validation dataset is always fully loaded, so we fix a very high number of hours
    # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code
    
    """
    # check again if this segment is required or not
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    if suppress_output:
        sys.stdout = open(os.path.join('_output_logs',
                                       exp_alias + '_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
        sys.stderr = open(os.path.join('_output_logs',
                          exp_alias + '_err_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
    """

    # Define the dataset. This structure is has the __get_item__ redefined in a way
    # that you can access the HDFILES positions from the root directory as a in a vector.
    
    dataset_name = dataset_name.split('_')[-1] # since preload file has '<X>hours_' as prefix whereas dataset folder does not
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) # original code
    augmenter = Augmenter(None)

    print ('full dataset path: ', full_dataset)
    dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=args.dataset_name)

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=g_conf.BATCH_SIZE,
                                              shuffle=False,
                                              num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
                                              pin_memory=True)

    model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)

    """ removing this segment to simplify code
    # The window used to keep track of the trainings
    l1_window = []
    latest = get_latest_evaluated_checkpoint()
    if latest is not None:  # When latest is noe
        l1_window = coil_logger.recover_loss_window(dataset_name, None)
    """
    
    model.cuda()

    best_mse = 1000
    best_error = 1000
    best_mse_iter = 0
    best_error_iter = 0

    # modified validation code from here to run a single model
    checkpoint = torch.load(args.checkpoint)
    checkpoint_iteration = checkpoint['iteration']
    print("model loaded ", checkpoint_iteration)

    model.load_state_dict(checkpoint['state_dict'])

    model.eval()
    accumulated_mse = 0
    accumulated_error = 0
    iteration_on_checkpoint = 0

    print ('data_loader size: ', len(data_loader))
    total_error = []
    for data in data_loader:

        # Compute the forward pass on a batch from the loaded dataset
        controls = data['directions']
        branches = model(torch.squeeze(data['rgb'].cuda()),
                             dataset.extract_inputs(data).cuda())
        output = model.extract_branch(torch.stack(branches[0:4]), controls)
        error = torch.abs(output - dataset.extract_targets(data).cuda())
        total_error += error.detach().cpu().tolist()
        
        iteration_on_checkpoint += 1
        if iteration_on_checkpoint % 50 == 0:
            print ('iteration: ', iteration_on_checkpoint)

    total_error = np.array(total_error)
    print (len(total_error), total_error.shape)

    np.save(os.path.join(args.save_path, args.dataset_name, 'computed_error.npy'), total_error)
    '''
示例#29
0
def execute(gpu, exp_batch, exp_alias):
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    merge_with_yaml(os.path.join(exp_batch, exp_alias+'.yaml'))
    set_type_of_process('train')


    sys.stdout = open(str(os.getpid()) + ".out", "a", buffering=1)



    if monitorer.get_status(exp_batch, exp_alias, g_conf.PROCESS_NAME)[0] == "Finished":
        # TODO: print some cool summary or not ?
        return

    #Define the dataset. This structure is has the __get_item__ redefined in a way
    #that you can access the HDFILES positions from the root directory as a in a vector.
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.DATASET_NAME)

    dataset = CoILDataset(full_dataset, transform=transforms.Compose([transforms.ToTensor()]))

    # Creates the sampler, this part is responsible for managing the keys. It divides
    # all keys depending on the measurements and produces a set of keys for each bach.
    sampler = CoILSampler(splitter.control_steer_split(dataset.measurements, dataset.meta_data))

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    # TODO: batch size an number of workers go to some configuration file
    data_loader = torch.utils.data.DataLoader(dataset, sampler=sampler, batch_size=120,
                                              shuffle=False, num_workers=12, pin_memory=True)
    # By instanciating the augmenter we get a callable that augment images and transform them
    # into tensors.
    augmenter = iag.Augmenter(g_conf.AUGMENTATION_SUITE)

    # TODO: here there is clearly a posibility to make a cool "conditioning" system.

    model = CoILModel(g_conf.MODEL_NAME)
    model.cuda()
    print(model)

    criterion = Loss()

    # TODO: DATASET SIZE SEEMS WEIRD
    optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)


    checkpoint_file = get_latest_saved_checkpoint()
    if checkpoint_file != None:
        checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias,
                                 'checkpoints', str(get_latest_saved_checkpoint())))
        iteration = checkpoint['iteration']
    else:
        iteration = 0

    # TODO: The checkpoint will continue, so the logs should restart ??? OR continue were it was



    print (dataset.meta_data)

    print (model)

    for data in data_loader:

        input_data, labels = data

        #TODO we have to divide the input with other data.

        #TODO, ADD ITERATION SCHEDULE
        input_rgb_data = augmenter(0, input_data['rgb'])

        # get the control commands from labels, size = [120,1]
        controls = labels[:, 24, :]

        # The output(branches) is a list of 5 branches results, each branch is with size [120,3]
        model.zero_grad()
        branches = model(input_rgb_data, labels[:, 10, :].cuda())

        #print ("len ",len(branches))

        # get the steer, gas and brake ground truth from labels
        steer_gt = labels[:, 0, :]
        gas_gt = labels[:, 1, :]
        brake_gt = labels[:, 2, :]
        speed_gt = labels[:, 10, :]

        targets = torch.cat([steer_gt, gas_gt, brake_gt], 1)

        loss = criterion.MSELoss(branches, targets.cuda(), controls.cuda(), speed_gt.cuda())

        loss.backward()
        optimizer.step()

        # TODO: save also the optimizer state dictionary
        if is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'state_dict': model.state_dict()
            }
            # TODO : maybe already summarize the best model ???
            torch.save(state, os.path.join('_logs', exp_batch, exp_alias
                                           , 'checkpoints', str(iteration) + '.pth'))
        iteration += 1
示例#30
0
from time import gmtime, strftime

manualSeed = config['seed']
torch.cuda.manual_seed(manualSeed)
os.environ["CUDA_VISIBLE_DEVICES"] = opts.gpu

print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
if opts.nospeed:
    print("Wayve style with no speed")

full_dataset = config['train_dataset_name']
real_dataset = config['target_domain_path']

dataset = CoILDataset(full_dataset,
                      transform=transforms.Compose([
                          transforms.ToTensor(),
                          transforms.Normalize((0.5, 0.5, 0.5),
                                               (0.5, 0.5, 0.5))
                      ]))

real_trans = transforms.Compose(
    [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

sampler = BatchSequenceSampler(
    splitter.control_steer_split(dataset.measurements, dataset.meta_data),
    config['batch_size'], 1, 1, config)
data_loader = torch.utils.data.DataLoader(dataset,
                                          batch_sampler=sampler,
                                          shuffle=False,
                                          num_workers=6,
                                          pin_memory=True)