示例#1
0
def test(args):
    # parse config
    config = parse_config(args.config)
    test_config = merge_configs(config, 'test', vars(args))
    print_configs(test_config, 'Test')
    place = fluid.CUDAPlace(0)

    with fluid.dygraph.guard(place):
        video_model = TSM_ResNet("TSM", test_config)

        model_dict, _ = fluid.load_dygraph(args.weights)
        video_model.set_dict(model_dict)

        test_reader = KineticsReader(mode='test', cfg=test_config)
        test_reader = test_reader.create_reader()

        video_model.eval()
        total_loss = 0.0
        total_acc1 = 0.0
        total_acc5 = 0.0
        total_sample = 0

        for batch_id, data in enumerate(test_reader()):
            x_data = np.array([item[0] for item in data])
            y_data = np.array([item[1] for item in data]).reshape([-1, 1])

            imgs = to_variable(x_data)
            labels = to_variable(y_data)
            labels.stop_gradient = True
            outputs = video_model(imgs)
            loss = fluid.layers.cross_entropy(input=outputs,
                                              label=labels,
                                              ignore_index=-1)

            avg_loss = fluid.layers.mean(loss)

            acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1)
            acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5)
            total_loss += avg_loss.numpy()
            total_acc1 += acc_top1.numpy()
            total_acc5 += acc_top5.numpy()
            total_sample += 1
            print('TEST iter {}, loss = {}, acc1 {}, acc5 {}'.format(
                batch_id, avg_loss.numpy(), acc_top1.numpy(),
                acc_top5.numpy()))
        print('Finish loss {}, acc1 {}, acc5 {}'.format(
            total_loss / total_sample, total_acc1 / total_sample,
            total_acc5 / total_sample))
示例#2
0
def val(epoch, model, cfg, args):
    reader = KineticsReader(mode="valid", cfg=cfg)
    reader = reader.create_reader()
    total_loss = 0.0
    total_acc1 = 0.0
    total_acc5 = 0.0
    total_sample = 0

    for batch_id, data in enumerate(reader()):
        x_data = np.array([item[0] for item in data])
        y_data = np.array([item[1] for item in data]).reshape([-1, 1])
        imgs = to_variable(x_data)
        labels = to_variable(y_data)
        labels.stop_gradient = True

        outputs = model(imgs)

        loss = fluid.layers.cross_entropy(input=outputs,
                                          label=labels,
                                          ignore_index=-1)
        avg_loss = fluid.layers.mean(loss)
        acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1)
        acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5)

        total_loss += avg_loss.numpy()[0]
        total_acc1 += acc_top1.numpy()[0]
        total_acc5 += acc_top5.numpy()[0]
        total_sample += 1

        print('TEST Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.format(
            epoch, batch_id,
            avg_loss.numpy()[0],
            acc_top1.numpy()[0],
            acc_top5.numpy()[0]))

    print('Finish loss {} , acc1 {} , acc5 {}'.format(
        total_loss / total_sample, total_acc1 / total_sample,
        total_acc5 / total_sample))
示例#3
0
def train(args):
    config = parse_config(args.config)
    train_config = merge_configs(config, 'train', vars(args))
    valid_config = merge_configs(config, 'valid', vars(args))
    print_configs(train_config, 'Train')

    use_data_parallel = False
    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if use_data_parallel else fluid.CUDAPlace(0)

    with fluid.dygraph.guard(place):
        if use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        video_model = TSM_ResNet("TSM", train_config)

        optimizer = create_optimizer(train_config.TRAIN,
                                     video_model.parameters())
        if use_data_parallel:
            video_model = fluid.dygraph.parallel.DataParallel(
                video_model, strategy)

        bs_denominator = 1
        if args.use_gpu:
            # check number of GPUs
            gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
            if gpus == "":
                pass
            else:
                gpus = gpus.split(",")
                num_gpus = len(gpus)
                assert num_gpus == train_config.TRAIN.num_gpus, \
                       "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \
                       "shoud be the same as that" \
                       "set in {}({})".format(
                       num_gpus, args.config, train_config.TRAIN.num_gpus)
            bs_denominator = train_config.TRAIN.num_gpus

        train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
                                            bs_denominator)

        train_reader = KineticsReader(mode="train", cfg=train_config)

        train_reader = train_reader.create_reader()
        if use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        for epoch in range(train_config.TRAIN.epoch):
            video_model.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0
            for batch_id, data in enumerate(train_reader()):
                x_data = np.array([item[0] for item in data])
                y_data = np.array([item[1] for item in data]).reshape([-1, 1])

                imgs = to_variable(x_data)
                labels = to_variable(y_data)
                labels.stop_gradient = True
                outputs = video_model(imgs)
                loss = fluid.layers.cross_entropy(input=outputs,
                                                  label=labels,
                                                  ignore_index=-1)
                avg_loss = fluid.layers.mean(loss)

                acc_top1 = fluid.layers.accuracy(input=outputs,
                                                 label=labels,
                                                 k=1)
                acc_top5 = fluid.layers.accuracy(input=outputs,
                                                 label=labels,
                                                 k=5)

                if use_data_parallel:
                    avg_loss = video_model.scale_loss(avg_loss)
                    avg_loss.backward()
                    video_model.apply_collective_grads()
                else:
                    avg_loss.backward()
                optimizer.minimize(avg_loss)
                video_model.clear_gradients()

                total_loss += avg_loss.numpy()[0]
                total_acc1 += acc_top1.numpy()[0]
                total_acc5 += acc_top5.numpy()[0]
                total_sample += 1

                print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.
                      format(epoch, batch_id,
                             avg_loss.numpy()[0],
                             acc_top1.numpy()[0],
                             acc_top5.numpy()[0]))

            print(
                'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'
                .format(epoch, total_loss / total_sample,
                        total_acc1 / total_sample, total_acc5 / total_sample))
            video_model.eval()
            val(epoch, video_model, valid_config, args)

        if fluid.dygraph.parallel.Env().local_rank == 0:
            fluid.dygraph.save_dygraph(video_model.state_dict(), "final")
        logger.info('[TRAIN] training finished')