Пример #1
0
    def eval(self, iteration=-1, summary_writer=None):
        start = time.time()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            # self.derenderer.eval()
            val_metrics = MetricLogger(delimiter="  ")
            val_loss_logger = MetricLogger(delimiter="  ")
            for i, inputs in enumerate(self.val_loader, iteration):
                # data_time = time.time() - last_batch_time

                if torch.cuda.is_available():
                    inputs = to_cuda(inputs)

                output = self.derenderer(inputs)

                loss_dict = gather_loss_dict(output)
                val_loss_logger.update(**loss_dict)
                summary_writer.add_scalars("val_non_smooth", val_loss_logger.last_item, i)

                all_preds.append({k:v.cpu().numpy() for k,v in output["output"].items()})
                all_labels.append({k:v.cpu().numpy() for k,v in inputs["attributes"].items()})
                # all_labels = self.attributes.cat_by_key(all_labels, inputs["attributes"])
                # all_preds = self.attributes.cat_by_key(all_preds, output['output'])


                # batch_time = time.time() - last_batch_time
                # val_metrics.update(time=batch_time, data=data_time)
                if time.time() - start > self.cfg.SOLVER.VALIDATION_MAX_SECS:
                    break

            all_preds, all_labels = map(lambda l: {k: np.concatenate([a[k] for a in l]) for k in l[0].keys()},
                                        [all_preds, all_labels])
            # all_preds = {k: np.concatenate([a[k] for a in all_preds]) for k in all_preds[0].keys()}
            # all_labels = {k: np.concatenate([a[k] for a in all_labels]) for k in all_labels[0].keys()}
            err_dict = self.attributes.pred_error(all_preds, all_labels)
            val_metrics.update(**err_dict)
            log.info(val_metrics.delimiter.join(["VALIDATION", "iter: {iter}", "{meters}"])
                     .format(iter=iteration, meters=str(val_metrics)))
            log.info(val_metrics.delimiter.join(["VALIDATION", "iter: {iter}", "{meters}"])
                     .format(iter=iteration, meters=str(val_loss_logger)))
            if summary_writer is not None:
                summary_writer.add_scalars("val_error", val_metrics.mean, iteration)
                summary_writer.add_scalars("val", val_loss_logger.mean, iteration)
            # self.derenderer.train()
        return err_dict
    def compute_mask(self, attributes, term):
        attributes = to_cuda(attributes)
        if term in self.valid_map:
            all_masks = []
            for val_term, valid_els in self.valid_map[term].items():
                categories_map = eval("self.{}_map".format(val_term))
                valid_categories = torch.LongTensor(
                    [categories_map[el] for el in valid_els])

                val_vector = attributes[val_term].view(-1, 1).repeat(
                    1, len(valid_categories)).cuda()
                valid_categories = valid_categories.repeat(len(val_vector),
                                                           1).cuda()

                mask = ((val_vector - valid_categories).abs().min(
                    dim=1).values == 0).float()
                all_masks.append(mask)
            return reduce(lambda x, y: x * y, all_masks)
        else:
            return torch.ones(len(attributes[term]), dtype=torch.float).cuda()
def write_with_inferred_attributes(cfg, split, attributes_key):
    timer = CodeTimer(
        "adding inferred attributes split:{}, attributes_key:{}".format(
            split, attributes_key))
    module_cfg = os.path.join(cfg.TRAINED_DERENDER.EXP_DIR, "cfg.yaml")
    module_cfg = load_cfg_from_file(module_cfg)
    module_cfg.MODEL.WEIGHTS = cfg.TRAINED_DERENDER.ATTRIBUTES_WEIGHTS_MAP[
        attributes_key]

    module_cfg.DATALOADER.OBJECTS_PER_BATCH = 1000 if cfg.BASE_NAME == "intphys" else 450
    module_cfg.DATALOADER.NUM_WORKERS = 8 if cfg.BASE_NAME == "adept" else module_cfg.DATALOADER.NUM_WORKERS

    if cfg.DEBUG:
        module_cfg.DATALOADER.NUM_WORKERS = 0
        module_cfg.DEBUG = True
        module_cfg.DATALOADER.OBJECTS_PER_BATCH = 50

    predictor = DerenderPredictor(module_cfg)

    # if not cfg.DEBUG:
    #     gpu_ids = [_ for _ in range(torch.cuda.device_count())]
    #     predictor.derenderer = torch.nn.parallel.DataParallel(predictor.derenderer, gpu_ids)

    dataset_name, standard_format_json_file = get_dataset_name_and_json(
        cfg, split)
    dataset = DatasetCatalog.get(dataset_name)
    required_fields = [
        "pred_box"
    ] if cfg.TRAINED_DERENDER.USE_INFERRED_BOXES else ["bbox"]
    filtered_idx, \
    mapped_dataset = image_based_to_annotation_based(dataset, required_fields)
    mapped_dataset = DatasetFromList(mapped_dataset, copy=False)
    mapper = DerenderMapper(cfg.TRAINED_DERENDER.USE_INFERRED_BOXES,
                            predictor.attributes,
                            for_inference=True,
                            use_depth=cfg.TRAINED_DERENDER.USE_DEPTH)
    mapped_dataset = MapDataset(mapped_dataset, mapper)

    data_loader = DataLoader(
        dataset=mapped_dataset,
        batch_size=module_cfg.DATALOADER.OBJECTS_PER_BATCH,
        num_workers=module_cfg.DATALOADER.NUM_WORKERS,
        shuffle=False)

    fil_pointer = 0
    with torch.no_grad():
        for inputs in data_loader:
            inputs = to_cuda(inputs)
            outputs = predictor(inputs)
            batch_size = list(outputs.values())[0].shape[0]
            for oix, (img_idx, an_idx) in zip(
                    range(batch_size),
                    filtered_idx[fil_pointer:fil_pointer + batch_size]):

                dataset[img_idx]["annotations"][an_idx][attributes_key] = \
                    {k: v[oix].item() for k, v in outputs.items()}
                # {k: v[oix].item() if v[oix].size == 1
                #                   else [float(el) for el in v[oix]]
                # for k,v in outputs.items()}

            fil_pointer = fil_pointer + batch_size

    dataset = [fix_for_serialization(d) for d in dataset]

    with open(standard_format_json_file, "w") as f:
        json.dump(dataset, f, indent=4)

    timer.done()
Пример #4
0
    def train(self, log_flag=True):
        train_metrics = MetricLogger(delimiter="  ")
        summary_writer = SummaryWriter(log_dir=os.path.join(self.output_dir, "summary"))

        self.derenderer.train()

        # Initialize timing
        timers  =  create_new_timer()

        done = False
        while not done:
            for iteration, inputs in enumerate(self.train_loader, self.start_iteration):
                iter_time = time.time()
                data_time = iter_time - timers.batch

                if torch.cuda.is_available():
                    inputs = to_cuda(inputs)

                output = self.derenderer(inputs)

                loss_dict = gather_loss_dict(output)
                loss = loss_dict['loss']
                # loss = sum([loss_dict[term] for term in ['x', 'y', 'z']])

                if torch.isnan(loss).any():
                    raise Nan_Exception()

                train_metrics.update(**loss_dict)
                summary_writer.add_scalars("train_non_smooth", train_metrics.last_item, iteration)


                batch_time = iter_time - timers.batch
                timers.batch = iter_time
                train_metrics.update(time=batch_time, data=data_time)
                eta_seconds = timers.start + self.cfg.SOLVER.MAX_TIME_SECS - iter_time
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

                if (iter_time - timers.log > self.cfg.SOLVER.PRINT_METRICS_TIME and log_flag):
                    timers.log = iter_time
                    log.info(train_metrics.delimiter.join(["eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}",
                                                           "max mem: {memory:.0f}"]).format(
                        eta=eta_string,
                        iter=iteration,
                        meters=str(train_metrics),
                        lr=self.optimizer.param_groups[0]["lr"],
                        memory=proc_id.memory_info().rss / 1e9)
                    )
                    summary_writer.add_scalars("train", train_metrics.mean, iteration)

                if iter_time - timers.checkpoint > self.cfg.SOLVER.CHECKPOINT_SECS: #iteration % checkpoint_period == 0:
                    timers.checkpoint = iter_time
                    self.checkpointer.save("model_{:07d}".format(iteration))

                if iter_time - timers.tensorboard > self.cfg.SOLVER.TENSORBOARD_SECS or self.cfg.DEBUG:
                    timers.tensorboard = iter_time
                    summary_writer.add_scalars("train", train_metrics.mean, iteration)


                if iter_time - timers.start > self.cfg.SOLVER.MAX_TIME_SECS:
                    log.info("finished training loop in {}".format(iter_time-timers.start))
                    done = True
                    break

                if iter_time - timers.validation > self.cfg.SOLVER.VALIDATION_SECS:
                    err_dict = self.eval(iteration, summary_writer)
                    timers.validation = time.time()

                loss.backward()
                self.optimizer.step()
                self.scheduler.step()
                self.optimizer.zero_grad()

            log.info("*******  epoch done  after {}  *********".format(time.time() - timers.epoch))
            timers.epoch = time.time()
            self.start_iteration = iteration

        err_dict = self.eval(iteration, summary_writer)

        self.checkpointer.save("model_{:07d}".format(iteration))
        summary_writer.close()
        return err_dict
Пример #5
0
def train(cfg, args):
    train_set = DatasetCatalog.get(cfg.DATASETS.TRAIN, args)
    val_set = DatasetCatalog.get(cfg.DATASETS.VAL, args)
    train_loader = DataLoader(train_set,
                              cfg.SOLVER.IMS_PER_BATCH,
                              num_workers=cfg.DATALOADER.NUM_WORKERS,
                              shuffle=True)
    val_loader = DataLoader(val_set,
                            cfg.SOLVER.IMS_PER_BATCH,
                            num_workers=cfg.DATALOADER.NUM_WORKERS,
                            shuffle=True)

    gpu_ids = [_ for _ in range(torch.cuda.device_count())]
    model = build_model(cfg)
    model.to("cuda")
    model = torch.nn.parallel.DataParallel(
        model, gpu_ids) if not args.debug else model

    logger = logging.getLogger("train_logger")
    logger.info("Start training")
    train_metrics = MetricLogger(delimiter="  ")
    max_iter = cfg.SOLVER.MAX_ITER
    output_dir = cfg.OUTPUT_DIR

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)
    checkpointer = Checkpointer(model, optimizer, scheduler, output_dir,
                                logger)
    start_iteration = checkpointer.load() if not args.debug else 0

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    validation_period = cfg.SOLVER.VALIDATION_PERIOD
    summary_writer = SummaryWriter(log_dir=os.path.join(output_dir, "summary"))
    visualizer = train_set.visualizer(cfg.VISUALIZATION)(summary_writer)

    model.train()
    start_training_time = time.time()
    last_batch_time = time.time()

    for iteration, inputs in enumerate(cycle(train_loader), start_iteration):
        data_time = time.time() - last_batch_time
        iteration = iteration + 1
        scheduler.step()

        inputs = to_cuda(inputs)
        outputs = model(inputs)

        loss_dict = gather_loss_dict(outputs)
        loss = loss_dict["loss"]
        train_metrics.update(**loss_dict)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time = time.time() - last_batch_time
        last_batch_time = time.time()
        train_metrics.update(time=batch_time, data=data_time)

        eta_seconds = train_metrics.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                train_metrics.delimiter.join([
                    "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}",
                    "max mem: {memory:.0f}"
                ]).format(eta=eta_string,
                          iter=iteration,
                          meters=str(train_metrics),
                          lr=optimizer.param_groups[0]["lr"],
                          memory=torch.cuda.max_memory_allocated() / 1024.0 /
                          1024.0))
            summary_writer.add_scalars("train", train_metrics.mean, iteration)

        if iteration % 100 == 0:
            visualizer.visualize(inputs, outputs, iteration)

        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration))

        if iteration % validation_period == 0:
            with torch.no_grad():
                val_metrics = MetricLogger(delimiter="  ")
                for i, inputs in enumerate(val_loader):
                    data_time = time.time() - last_batch_time

                    inputs = to_cuda(inputs)
                    outputs = model(inputs)

                    loss_dict = gather_loss_dict(outputs)
                    val_metrics.update(**loss_dict)

                    batch_time = time.time() - last_batch_time
                    last_batch_time = time.time()
                    val_metrics.update(time=batch_time, data=data_time)

                    if i % 20 == 0 or i == cfg.SOLVER.VALIDATION_LIMIT:
                        logger.info(
                            val_metrics.delimiter.join([
                                "VALIDATION", "eta: {eta}", "iter: {iter}",
                                "{meters}"
                            ]).format(eta=eta_string,
                                      iter=iteration,
                                      meters=str(val_metrics)))

                    if i == cfg.SOLVER.VALIDATION_LIMIT:
                        summary_writer.add_scalars("val", val_metrics.mean,
                                                   iteration)
                        break
        if iteration == max_iter:
            break

    checkpointer.save("model_{:07d}".format(max_iter))
    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
    def train(self):
        # save new  configuration
        with open(os.path.join(self.output_dir, "cfg.yaml"), 'w') as f:
            x = self.cfg.dump(indent=4)
            f.write(x)

        log.info(f'New training run with configuration:\n{self.cfg}\n\n')
        train_metrics = MetricLogger(delimiter="  ")
        summary_writer = SummaryWriter(log_dir=os.path.join(self.output_dir, "summary"))

        self.model.train()
        timers = create_new_timer()
        # Initialize timing

        done = False
        while not done:
            for iteration, inputs in enumerate(self.train_loader, self.start_iteration):
                iter_time = time.time()
                data_time = iter_time - timers.batch
                inputs = to_cuda(inputs)

                out = self.model(inputs)
                loss_dict = out['loss_dict']
                loss = loss_dict["loss"]

                if torch.isnan(loss).any():
                    raise Nan_Exception()

                train_metrics.update(**loss_dict)
                summary_writer.add_scalars("train_non_smooth", train_metrics.last_item, iteration)

                batch_time = iter_time - timers.batch
                timers.batch = iter_time
                train_metrics.update(time=batch_time, data=data_time)
                eta_seconds = timers.start + self.cfg.SOLVER.MAX_TIME_SECS - iter_time
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

                if (iter_time - timers.log > self.cfg.SOLVER.PRINT_METRICS_TIME):
                    timers.log = iter_time
                    log.info(train_metrics.delimiter.join(["eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}",
                                                           "max mem: {memory:.0f}"]).format(
                        eta=eta_string,
                        iter=iteration,
                        meters=str(train_metrics),
                        lr=self.optimizer.param_groups[0]["lr"],
                        memory=proc_id.memory_info().rss / 1e9)
                    )
                    summary_writer.add_scalars("train", train_metrics.mean, iteration)

                if iter_time - timers.checkpoint > self.cfg.SOLVER.CHECKPOINT_SECS:  # iteration % checkpoint_period == 0:
                    timers.checkpoint = iter_time
                    self.checkpointer.save("model_{:07d}".format(iteration))

                if iter_time - timers.tensorboard > self.cfg.SOLVER.TENSORBOARD_SECS or self.cfg.DEBUG:
                    timers.tensorboard = iter_time
                    summary_writer.add_scalars("train", train_metrics.mean, iteration)

                if iter_time - timers.start > self.cfg.SOLVER.MAX_TIME_SECS:
                    log.info("finished training loop in {}".format(iter_time - timers.start))
                    done = True
                    break

                if iter_time - timers.validation > self.cfg.SOLVER.VALIDATION_SECS:
                    err_dict = self.eval(iteration, summary_writer)
                    timers.validation = time.time()

                loss.backward()
                self.optimizer.step()
                self.scheduler.step()
                self.optimizer.zero_grad()

            log.info("*******  epoch done  after {}  *********".format(time.time() - timers.epoch))
            timers.epoch = time.time()
            self.start_iteration = iteration
    def eval(self, iteration, summary_writer):
        start = time.time()
        all_preds = []
        all_labels = []

        evals = []
        with torch.no_grad():
            self.model.eval()
            # self.derenderer.eval()
            val_metrics = MetricLogger(delimiter="  ")
            val_loss_logger = MetricLogger(delimiter="  ")
            for i, inputs in enumerate(self.val_loader, iteration):
                # data_time = time.time() - last_batch_time

                if torch.cuda.is_available():
                    inputs = to_cuda(inputs)

                output = self.model(inputs, match=True)
                loss_dict = output["loss_dict"]
                is_possible = inputs['is_possible']
                magic_penalty = output['magic_penalty']

                for i in range(len(magic_penalty)):
                    frame = {}
                    frame['is_possible'] = bool(is_possible[i])
                    frame['inverse_likelihood'] = float(magic_penalty[i])
                    evals.append(frame)

                # target = inputs['targets']
                # output = output['output']
                # is_possible = inputs['is_possible']

                # loc_x_gt = target['location_x']
                # loc_y_gt = target['location_y']
                # loc_z_gt = target['location_z']

                # output_x = output['location_x'].squeeze()
                # output_y = output['location_y'].squeeze()
                # output_z = output['location_z'].squeeze()
                # existance = target['existance'][:, 1:]

                # loss_trans_x = torch.pow(output_x - loc_x_gt[:, 1:], 2) * existance
                # loss_trans_y = torch.pow(output_y - loc_y_gt[:, 1:], 2) * existance
                # loss_trans_z = torch.pow(output_z - loc_z_gt[:, 1:], 2) * existance

                # loss_trans_x = loss_trans_x.mean(dim=2).mean(dim=1)
                # loss_trans_y = loss_trans_y.mean(dim=2).mean(dim=1)
                # loss_trans_z = loss_trans_z.mean(dim=2).mean(dim=1)

                # loss = loss_trans_z + loss_trans_y + loss_trans_x
                # energy_pos = loss[is_possible]
                # energy_neg = loss[~is_possible]

                # energy_pos = energy_pos.detach().cpu().numpy()
                # energy_neg = energy_neg.detach().cpu().numpy()

                # for i in range(energy_pos.shape[0]):
                #     frame = {}
                #     frame['is_possible'] = True
                #     frame['likelihood'] = float(energy_pos[i])
                #     evals.append(frame)

                # for i in range(energy_neg.shape[0]):
                #     frame = {}
                #     frame['is_possible'] = False
                #     frame['likelihood'] = float(energy_neg[i])
                #     evals.append(frame)

                # print("possible: ", energy_pos.mean())
                # print("not possible: ", energy_neg.mean())




                val_loss_logger.update(**loss_dict)
                # summary_writer.add_scalars("val_non_smooth", val_loss_logger.last_item, i)

                # all_preds.append({k: v.cpu().numpy() for k, v in output["output"].items()})
                # all_labels.append({k: v.cpu().numpy() for k, v in inputs["attributes"].items()})
                # all_labels = self.attributes.cat_by_key(all_labels, inputs["attributes"])
                # all_preds = self.attributes.cat_by_key(all_preds, output['output'])

                # batch_time = time.time() - last_batch_time
                # val_metrics.update(time=batch_time, data=data_time)
                # if time.time() - start > self.cfg.SOLVER.VALIDATION_MAX_SECS:
                #     raise Val_Too_Long

            # all_preds, all_labels = map(lambda l: {k: np.concatenate([a[k] for a in l]) for k in l[0].keys()},
            #                             [all_preds, all_labels])
            # all_preds = {k: np.concatenate([a[k] for a in all_preds]) for k in all_preds[0].keys()}
            # all_labels = {k: np.concatenate([a[k] for a in all_labels]) for k in all_labels[0].keys()}
            # err_dict = self.attributes.pred_error(all_preds, all_labels)
            # val_metrics.update(**err_dict)
            # log.info(val_metrics.delimiter.join(["VALIDATION", "iter: {iter}", "{meters}"])
            #          .format(iter=iteration, meters=str(val_metrics)))
            log.info(val_metrics.delimiter.join(["VALIDATION", "iter: {iter}", "{meters}"])
                     .format(iter=iteration, meters=str(val_loss_logger)))
            if summary_writer is not None:
                # summary_writer.add_scalars("val_error", val_metrics.mean, iteration)
                summary_writer.add_scalars("val", val_loss_logger.mean, iteration)
            # self.derenderer.train()
        json.dump(evals, open("output.json", "w"))
        self.model.train()
        return None