Exemplo n.º 1
0
def infer_slowfast(args):
    config = parse_config(args.config_file)
    infer_config = merge_configs(config, 'infer', vars(args))
    print_configs(infer_config, "Infer")

    if not os.path.isdir(infer_config.INFER.save_path):
        os.makedirs(infer_config.INFER.save_path)

    if not args.use_gpu:
        place = fluid.CPUPlace()
    elif not args.use_data_parallel:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)

    _nranks = ParallelEnv().nranks  # num gpu
    bs_single = int(infer_config.INFER.batch_size /
                    _nranks)  # batch_size of each gpu

    with fluid.dygraph.guard(place):
        #build model
        slowfast = SlowFast(cfg=infer_config, num_classes=400)
        if args.weights:
            assert os.path.exists(args.weights + '.pdparams'),\
                "Given weight dir {} not exist.".format(args.weights)

        logger.info('load test weights from {}'.format(args.weights))
        model_dict, _ = fluid.load_dygraph(args.weights)
        slowfast.set_dict(model_dict)

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
            slowfast = fluid.dygraph.parallel.DataParallel(
                slowfast, strategy, find_unused_parameters=False)

        #create reader
        infer_data = KineticsDataset(mode="infer", cfg=infer_config)
        infer_sampler = DistributedBatchSampler(infer_data,
                                                batch_size=bs_single,
                                                shuffle=False,
                                                drop_last=False)
        infer_loader = DataLoader(infer_data,
                                  batch_sampler=infer_sampler,
                                  places=place,
                                  feed_list=None,
                                  num_workers=0,
                                  return_list=True)

        # start infer
        num_ensemble_views = infer_config.INFER.num_ensemble_views
        num_spatial_crops = infer_config.INFER.num_spatial_crops
        num_cls = infer_config.MODEL.num_classes
        num_clips = num_ensemble_views * num_spatial_crops
        num_videos = len(infer_data) // num_clips
        video_preds = np.zeros((num_videos, num_cls))
        clip_count = {}

        video_paths = []
        with open(infer_config.INFER.filelist, "r") as f:
            for path in f.read().splitlines():
                video_paths.append(path)

        print(
            "[INFER] infer start, number of videos {}, number of clips {}, total number of clips {}"
            .format(num_videos, num_clips, num_clips * num_videos))
        slowfast.eval()
        for batch_id, data in enumerate(infer_loader):
            # call net
            model_inputs = [data[0], data[1]]
            preds = slowfast(model_inputs, training=False)
            clip_ids = data[3]

            # gather mulit card, results of following process in each card is the same.
            if _nranks > 1:
                preds = _all_gather(preds, _nranks)
                clip_ids = _all_gather(clip_ids, _nranks)

            # to numpy
            preds = preds.numpy()
            clip_ids = clip_ids.numpy()

            # preds ensemble
            for ind in range(preds.shape[0]):
                vid_id = int(clip_ids[ind]) // num_clips
                ts_idx = int(clip_ids[ind]) % num_clips
                if vid_id not in clip_count:
                    clip_count[vid_id] = []
                if ts_idx in clip_count[vid_id]:
                    print(
                        "[INFER] Passed!! read video {} clip index {} / {} repeatedly."
                        .format(vid_id, ts_idx, clip_ids[ind]))
                else:
                    clip_count[vid_id].append(ts_idx)
                    video_preds[vid_id] += preds[ind]  # ensemble method: sum
            if batch_id % args.log_interval == 0:
                print("[INFER] Processing batch {}/{} ...".format(
                    batch_id,
                    len(infer_data) // infer_config.INFER.batch_size))

        # check clip index of each video
        for key in clip_count.keys():
            if len(clip_count[key]) != num_clips or sum(
                    clip_count[key]) != num_clips * (num_clips - 1) / 2:
                print(
                    "[INFER] Warning!! video [{}] clip count [{}] not match number clips {}"
                    .format(key, clip_count[key], num_clips))

        res_list = []
        for j in range(video_preds.shape[0]):
            pred = to_variable(video_preds[j] / num_clips)  #mean prob
            video_path = video_paths[j]
            pred = to_variable(pred)
            top1_values, top1_indices = fluid.layers.topk(pred, k=1)
            top5_values, top5_indices = fluid.layers.topk(pred, k=5)
            top1_values = top1_values.numpy().astype("float64")[0]
            top1_indices = int(top1_indices.numpy()[0])
            top5_values = list(top5_values.numpy().astype("float64"))
            top5_indices = [int(item) for item in top5_indices.numpy()
                            ]  #np.int is not JSON serializable
            print(
                "[INFER] video id [{}], top1 value {}, top1 indices {}".format(
                    video_path, top1_values, top1_indices))
            print(
                "[INFER] video id [{}], top5 value {}, top5 indices {}".format(
                    video_path, top5_values, top5_indices))
            save_dict = {
                'video_id': video_path,
                'top1_values': top1_values,
                'top1_indices': top1_indices,
                'top5_values': top5_values,
                'top5_indices': top5_indices
            }
            res_list.append(save_dict)

        with open(
                os.path.join(infer_config.INFER.save_path, 'result' + '.json'),
                'w') as f:
            json.dump(res_list, f)
        print('[INFER] infer finished, results saved in {}'.format(
            infer_config.INFER.save_path))
Exemplo n.º 2
0
def do_train(args):
    paddle.set_device(args.device)
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    set_seed(args)
    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
    trans_func = partial(convert_example,
                         tokenizer=tokenizer,
                         max_seq_length=args.max_seq_length)
    if args.task_type == "cross-lingual-transfer":
        train_ds = load_dataset("xnli", "en", splits="train")
        train_ds = train_ds.map(trans_func, lazy=True)
    elif args.task_type == "translate-train-all":
        all_train_ds = []
        for language in all_languages:
            train_ds = load_dataset("xnli", language, splits="train")
            all_train_ds.append(train_ds.map(trans_func, lazy=True))
        train_ds = XnliDataset(all_train_ds)
    train_batch_sampler = DistributedBatchSampler(train_ds,
                                                  batch_size=args.batch_size,
                                                  shuffle=True)
    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"
            ),  # input_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_id, dtype="int64"
            ),  # position_ids
        Pad(axis=0, pad_val=0, dtype="int64"),  # attention_mask
        Stack(dtype="int64")  # labels
    ): fn(samples)
    train_data_loader = DataLoader(dataset=train_ds,
                                   batch_sampler=train_batch_sampler,
                                   collate_fn=batchify_fn,
                                   num_workers=0,
                                   return_list=True)

    num_classes = 3
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path, num_classes=num_classes, dropout=args.dropout)
    n_layers = model.ernie_m.config['num_hidden_layers']
    if paddle.distributed.get_world_size() > 1:
        model = paddle.DataParallel(model)

    if args.max_steps > 0:
        num_training_steps = args.max_steps
        num_train_epochs = math.ceil(num_training_steps /
                                     len(train_data_loader))
    else:
        num_training_steps = len(train_data_loader) * args.num_train_epochs
        num_train_epochs = args.num_train_epochs

    warmup = args.warmup_steps if args.warmup_steps > 0 else args.warmup_proportion

    lr_scheduler = LinearDecayWithWarmup(args.learning_rate,
                                         num_training_steps, warmup)

    # Generate parameter names needed to perform weight decay.
    # All bias and LayerNorm parameters are excluded.
    decay_params = [
        p.name for n, p in model.named_parameters()
        if not any(nd in n for nd in ["bias", "norm"])
    ]
    # Construct dict
    name_dict = dict()
    for n, p in model.named_parameters():
        name_dict[p.name] = n
    optimizer = AdamWDL(learning_rate=lr_scheduler,
                        beta1=0.9,
                        beta2=0.999,
                        epsilon=args.adam_epsilon,
                        parameters=model.parameters(),
                        weight_decay=args.weight_decay,
                        n_layers=n_layers,
                        layerwise_decay=args.layerwise_decay,
                        apply_decay_param_fun=lambda x: x in decay_params,
                        name_dict=name_dict)

    loss_fct = nn.CrossEntropyLoss()
    if args.use_amp:
        scaler = paddle.amp.GradScaler(init_loss_scaling=args.scale_loss)
    metric = Accuracy()

    global_step = 0
    tic_train = time.time()
    for epoch in range(num_train_epochs):
        for step, batch in enumerate(train_data_loader):
            global_step += 1
            input_ids, position_ids, attention_mask, labels = batch
            with paddle.amp.auto_cast(
                    args.use_amp,
                    custom_white_list=["layer_norm", "softmax", "gelu"]):
                logits = model(input_ids, position_ids, attention_mask)
                loss = loss_fct(logits, labels)
            if args.use_amp:
                scaled_loss = scaler.scale(loss)
                scaled_loss.backward()
                scaler.minimize(optimizer, scaled_loss)
            else:
                loss.backward()
                optimizer.step()
            lr_scheduler.step()
            optimizer.clear_grad()
            if global_step % args.logging_steps == 0:
                print(
                    "global step %d/%d, epoch: %d, batch: %d, rank_id: %s, loss: %f, lr: %.10f, speed: %.4f step/s"
                    % (global_step, num_training_steps, epoch, step,
                       paddle.distributed.get_rank(), loss, optimizer.get_lr(),
                       args.logging_steps / (time.time() - tic_train)))
                tic_train = time.time()
            if global_step % args.save_steps == 0 or global_step == num_training_steps:
                for language in all_languages:
                    tic_eval = time.time()
                    test_data_loader = get_test_dataloader(
                        args, language, batchify_fn, trans_func)
                    evaluate(model, loss_fct, metric, test_data_loader,
                             language)
                    print("eval done total : %s s" % (time.time() - tic_eval))
                if paddle.distributed.get_rank() == 0:
                    output_dir = os.path.join(
                        args.output_dir,
                        "ernie_m_ft_model_%d.pdparams" % (global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    # Need better way to get inner model of DataParallel
                    model_to_save = model._layers if isinstance(
                        model, paddle.DataParallel) else model
                    model_to_save.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)
            if global_step >= num_training_steps:
                break
        if global_step >= num_training_steps:
            break
    if paddle.distributed.get_rank() == 0:
        output_dir = os.path.join(
            args.output_dir, "ernie_m_final_model_%d.pdparams" % global_step)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        # Need better way to get inner model of DataParallel
        model_to_save = model._layers if isinstance(
            model, paddle.DataParallel) else model
        model_to_save.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)
Exemplo n.º 3
0
def main():
    paddle.enable_static() if FLAGS.static else None
    device = paddle.set_device(FLAGS.device)

    if not FLAGS.eval_only:  # training mode
        train_transform = Compose([
            ColorDistort(),
            RandomExpand(),
            RandomCrop(),
            RandomFlip(),
            NormalizeBox(),
            PadBox(),
            BboxXYXY2XYWH()
        ])

        train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_train2017.json',
                              image_dir='train2017',
                              with_background=False,
                              mixup=True,
                              transform=train_transform)
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=FLAGS.batch_size,
                                                shuffle=True,
                                                drop_last=True)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=train_collate_fn)
    else:  # evaluation mode
        eval_transform = Compose([
            ResizeImage(target_size=608),
            NormalizeBox(),
            PadBox(),
            BboxXYXY2XYWH()
        ])

        eval_collate_fn = BatchCompose([NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_val2017.json',
                              image_dir='val2017',
                              with_background=False,
                              transform=eval_transform)
        # batch_size can only be 1 in evaluation for YOLOv3
        # prediction bbox is a LoDTensor
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=1,
                                                shuffle=False,
                                                drop_last=False)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=eval_collate_fn)

    pretrained = FLAGS.eval_only and FLAGS.weights is None
    model = yolov3_darknet53(num_classes=dataset.num_classes,
                             num_max_boxes=NUM_MAX_BOXES,
                             model_mode='eval' if FLAGS.eval_only else 'train',
                             pretrained=pretrained)

    if FLAGS.pretrain_weights and not FLAGS.eval_only:
        model.load(FLAGS.pretrain_weights,
                   skip_mismatch=True,
                   reset_optimizer=True)

    optim = make_optimizer(len(batch_sampler), parameters=model.parameters())

    model.prepare(optimizer=optim,
                  loss=YoloLoss(num_classes=dataset.num_classes))

    # NOTE: we implement COCO metric of YOLOv3 model here, separately
    # from 'prepare' and 'fit' framework for follwing reason:
    # 1. YOLOv3 network structure is different between 'train' and
    # 'eval' mode, in 'eval' mode, output prediction bbox is not the
    # feature map used for YoloLoss calculating
    # 2. COCO metric behavior is also different from defined Metric
    # for COCO metric should not perform accumulate in each iteration
    # but only accumulate at the end of an epoch
    if FLAGS.eval_only:
        if FLAGS.weights is not None:
            model.load(FLAGS.weights, reset_optimizer=True)
        preds = model.predict(loader, stack_outputs=False)
        _, _, _, img_ids, bboxes = preds

        anno_path = os.path.join(FLAGS.data,
                                 'annotations/instances_val2017.json')
        coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
        for img_id, bbox in zip(img_ids, bboxes):
            coco_metric.update(img_id, bbox)
        coco_metric.accumulate()
        coco_metric.reset()
        return

    if FLAGS.resume is not None:
        model.load(FLAGS.resume)

    save_dir = FLAGS.save_dir or 'yolo_checkpoint'

    model.fit(train_data=loader,
              epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
              save_dir=os.path.join(save_dir, "mixup"),
              save_freq=10)

    # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches
    dataset.mixup = False
    model.fit(train_data=loader,
              epochs=FLAGS.no_mixup_epoch,
              save_dir=os.path.join(save_dir, "no_mixup"),
              save_freq=5)
Exemplo n.º 4
0
def build_dataloader(dataset,
                     batch_size,
                     num_workers,
                     places=None,
                     shuffle=True,
                     drop_last=True,
                     multigrid=False,
                     collate_fn_cfg=None,
                     **kwargs):
    """Build Paddle Dataloader.

    XXX explain how the batch_sampler work!

    Args:
        dataset (paddle.dataset): A PaddlePaddle dataset object.
        batch_size (int): batch size on single card.
        num_worker (int): num_worker
        shuffle(bool): whether to shuffle the data at every epoch.
    """

    if not kwargs.get('sampler'):
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=batch_size,
                                                shuffle=shuffle,
                                                drop_last=drop_last)
    else:
        sampler = build_sampler(kwargs['sampler'])
        batch_sampler = BatchSampler(dataset,
                                     sampler=sampler,
                                     batch_size=batch_size,
                                     shuffle=shuffle,
                                     drop_last=drop_last)
    kwargs.update({'batch_sampler': batch_sampler})

    # NOTE(shipping): when switch the mix operator on, such as: mixup, cutmix.

    # batch like: [[img, label, attibute, ...], [imgs, label, attribute, ...], ...] will recollate to:
    # [[img, img, ...], [label, label, ...], [attribute, attribute, ...], ...] as using numpy.transpose.

    def mix_collate_fn(batch):
        pipeline = build_batch_pipeline(collate_fn_cfg)
        batch = pipeline(batch)
        slots = []
        for items in batch:
            for i, item in enumerate(items):
                if len(slots) < len(items):
                    slots.append([item])
                else:
                    slots[i].append(item)
        return [np.stack(slot, axis=0) for slot in slots]

    # if collate_fn_cfg is not None:
    # ugly code here. collate_fn is mix op config
    #    collate_fn = mix_collate_fn(collate_fn_cfg)

    data_loader = DataLoader(
        dataset,
        places=places,
        num_workers=num_workers,
        collate_fn=mix_collate_fn if collate_fn_cfg is not None else None,
        **kwargs)

    return data_loader