示例#1
0
def run(args):
    if args.do_train:
        train_dataloader, processor = load_example(args, 'train')
        num_label = len(processor.get_labels())
        model = RemBertForSequenceClassification.from_pretrained(
            args.model_type, num_classes=num_label)
        if nranks > 1:
            dist.init_parallel_env()
            model = paddle.DataParallel(model)

        num_train_steps_per_epoch = len(
            train_dataloader) // args.gradient_accumulation_steps
        num_train_steps = int(num_train_steps_per_epoch *
                              args.num_train_epochs)
        trainer = Trainer(args,
                          model=model,
                          dataloader=train_dataloader,
                          num_train_steps=num_train_steps,
                          step_callback=evaluate)
        trainer.train()

    if args.do_eval:
        model = RemBertForSequenceClassification.from_pretrained(
            args.output_dir)
        evaluate(model, args)
def train(print_result=True):
    """train"""
    # 1. initialize parallel environment
    train_data_list1 = []
    train_data_list2 = []
    dist.init_parallel_env()

    # 2. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # 3. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)
    assert len(loss) == 1
    if print_result is True:
        train_data_list1.append(loss.numpy())
    assert len(train_data_list1)

    loss.backward()

    adam.step()
    adam.clear_grad()
    def run_trainer_func(self, args):
        if fluid.core.is_compiled_with_cuda():
            device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
            place = fluid.CUDAPlace(device_id)
        else:
            assert ("Only support CUDAPlace for now.")

        with fluid.dygraph.guard(place):
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            np.random.seed(seed)
            random.seed(seed)
            model, train_reader, opt = self.get_model()

            if args.update_method == "nccl2":
                dist.init_parallel_env()
                print_to_err(
                    type(self).__name__,
                    "begin to prepare context in dygraph with nccl2")
                model = paddle.DataParallel(
                    model, find_unused_parameters=args.find_unused_parameters)
            print_to_err(type(self).__name__, "model built in dygraph")
            out_losses = self.model_train(args, model, opt, train_reader)
            print_to_out(out_losses)
            return out_losses
def train():
    # init env
    dist.init_parallel_env()

    # create network
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)
    loss_fn = nn.CrossEntropyLoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # create data loader
    dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
    loader = paddle.io.DataLoader(dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        drop_last=True,
        num_workers=1)

    # train
    for epoch_id in range(EPOCH_NUM):
        for batch_id, (image, label) in enumerate(loader()):
            out = layer(image)
            loss = loss_fn(out, label)

            loss.backward()

            adam.step()
            adam.clear_grad()

            if dist.get_rank() == 0:
                print("Epoch {} batch {}: loss = {}".format(
                    epoch_id, batch_id, np.mean(loss.numpy())))
def train():
    # 设置支持多卡训练
    dist.init_parallel_env()
    train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                   transform=transform)
    batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
                                                      batch_size=32,
                                                      shuffle=True)
    train_loader = DataLoader(dataset=train_dataset,
                              batch_sampler=batch_sampler)
    model = paddle.vision.mobilenet_v2(num_classes=10)
    # 设置支持多卡训练
    model = paddle.DataParallel(model)
    # 设置优化方法
    optimizer = paddle.optimizer.SGD(parameters=model.parameters(),
                                     learning_rate=0.1,
                                     weight_decay=5e-4)
    # 获取损失函数
    loss = paddle.nn.CrossEntropyLoss()
    # 开始训练
    for epoch in range(10):
        for batch_id, (img, label) in enumerate(train_loader()):
            output = model(img)
            # 计算损失值
            los = loss(output, label)
            los.backward()
            if dist.get_rank() == 0:
                print("Epoch {}: batch_id {}, loss {}".format(
                    epoch, batch_id, los))
            optimizer.step()
            optimizer.clear_grad()
示例#6
0
def train(print_result=False):
    # 1. enable dynamic mode
    paddle.disable_static()
    
    # 2. initialize parallel environment
    dist.init_parallel_env()

    # 3. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(
        learning_rate=0.001, parameters=dp_layer.parameters())

    # 4. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)
    
    if print_result is True:
        print("loss:", loss.numpy())
    
    loss.backward()

    adam.step()
    adam.clear_grad()
示例#7
0
def train(print_result=False):
    # 1. initialize parallel environment
    dist.init_parallel_env()

    # 2. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # 3. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)

    if print_result is True:
        print("Rank:", int(os.getenv("PADDLE_TRAINER_ID")))

    loss.backward()
    adam.step()
    adam.clear_grad()

    return int(os.getenv("PADDLE_TRAINER_ID"))
示例#8
0
def train():
    # 1. enable dynamic mode
    paddle.disable_static()

    # 2. initialize parallel environment
    dist.init_parallel_env()

    # 3. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # 4. run layer
    inputs = paddle.randn([10, 10], 'float32')
    outputs = dp_layer(inputs)
    labels = paddle.randn([10, 1], 'float32')
    loss = loss_fn(outputs, labels)

    loss = dp_layer.scale_loss(loss)
    loss.backward()
    dp_layer.apply_collective_grads()

    adam.step()
    adam.clear_grad()
示例#9
0
def train(print_result=True):
    # 1. enable dynamic mode
    # device = paddle.set_device('gpu')
    # paddle.disable_static(device)

    # 2. initialize parallel environment
    dist.init_parallel_env()

    # 3. create data parallel layer & optimizer
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    dataset = FakeDataset()
    # loader = paddle.io.DataLoader(dataset, batch_size=2, places=device, num_workers=2)
    loader = paddle.io.DataLoader(dataset, batch_size=2, num_workers=2)
    # 4. run layer
    for inputs, labels in loader:
        # inputs = paddle.randn([10, 10], 'float32')
        outputs = dp_layer(inputs)
        # labels = paddle.randn([10, 1], 'float32')
        loss = loss_fn(outputs, labels)

        if print_result is True:
            print("loss:", loss.numpy())

        # loss = dp_layer.scale_loss(loss)
        loss.backward()
        # dp_layer.apply_collective_grads()

        adam.step()
        adam.clear_grad()
示例#10
0
 def test_check_env_failed(self):
     os.environ['FLAGS_selected_gpus'] = '0'
     os.environ['PADDLE_TRAINER_ID'] = '0'
     os.environ['PADDLE_CURRENT_ENDPOINT'] = '127.0.0.1:6170'
     os.environ['PADDLE_TRAINERS_NUM'] = '2'
     with self.assertRaises(ValueError):
         dist.init_parallel_env()
    def test_multiple_gpus(self):
        self.trainer_id = dist.get_rank()
        dist.init_parallel_env()

        model_a = SimpleNet(self.trainer_id, 0)
        model_b = SimpleNet(self.trainer_id, 1)

        state_dict = model_a.state_dict()
        model_b.set_state_dict(state_dict)

        model_a = paddle.DataParallel(model_a)
        model_b = paddle.DataParallel(model_b)

        for step in range(10):
            x_data = np.random.randn(batch, in_dim).astype(np.float32)
            x = paddle.to_tensor(x_data)
            x.stop_gradient = False

            with model_a.no_sync():
                y_pred_a = model_a(x)
                loss_a = y_pred_a.mean()
                loss_a.backward()
            fused_allreduce_gradients(list(model_a.parameters()), None)

            y_pred_b = model_b(x)
            loss_b = y_pred_b.mean()
            loss_b.backward()

            self.check_gradient(model_a.parameters())
            self.check_gradient(model_b.parameters())

            self.check_acc(model_a._layers.w.grad, model_b._layers.w.grad)

            model_a.clear_gradients()
            model_b.clear_gradients()
示例#12
0
def finetune(args):
    paddle.set_device(args.device)
    if dist.get_world_size() > 1:
        dist.init_parallel_env()

    pos_file = os.path.join(args.data_dir, 'rt-polarity.pos')
    neg_file = os.path.join(args.data_dir, 'rt-polarity.neg')
    x_text, y = load_data_and_labels(pos_file, neg_file)
    x_train, x_test, y_train, y_test = train_test_split(x_text,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=args.seed)

    if not args.init_from_ckpt:
        raise ValueError('`init_from_ckpt` should be set.')
    model = ELMoBowTextClassification(args.init_from_ckpt, args.batch_size,
                                      args.sent_embedding_dim, args.dropout,
                                      args.num_classes)
    if dist.get_world_size() > 1:
        model = paddle.DataParallel(model)
    model.train()

    adam = paddle.optimizer.Adam(parameters=model.parameters(),
                                 learning_rate=args.lr,
                                 weight_decay=args.weight_decay)
    criterion = nn.CrossEntropyLoss()

    vocab = load_vocab()

    train_dataset = SentencePolarityDatasetV1(x_train, y_train, vocab,
                                              args.max_seq_len)
    test_dataset = SentencePolarityDatasetV1(x_test, y_test, vocab,
                                             args.max_seq_len)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              return_list=True,
                              shuffle=True,
                              collate_fn=lambda batch: generate_batch(batch))
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             return_list=True,
                             shuffle=False,
                             collate_fn=lambda batch: generate_batch(batch))

    for epoch in range(args.epochs):
        print('Epoch {}/{}'.format(epoch + 1, args.epochs))
        for step, batch_data in enumerate(train_loader, start=1):
            ids, ids_reverse, label = batch_data

            output = model((ids, ids_reverse))
            loss = criterion(output, label)
            loss.backward()
            adam.step()
            adam.clear_grad()

            if step % args.logging_step == 0:
                print('step {}, loss {}'.format(step, loss.numpy()[0]))

    acc = test(model, test_loader)
    print('\ntest acc {}\n'.format(acc))
def train():
    # enable dygraph mode
    paddle.disable_static()

    dist.init_parallel_env()

    # create network
    layer = LinearNet()
    dp_layer = paddle.DataParallel(layer)
    loss_fn = nn.CrossEntropyLoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    # print(core._get_device_properties(dist.ParallelEnv().device_id))

    # create data loader
    # loader = paddle.io.DataLoader.from_generator(capacity=5, use_multiprocess=True)
    loader = paddle.io.DataLoader.from_generator(capacity=5)
    loader.set_batch_generator(random_batch_reader())

    for epoch_id in range(EPOCH_NUM):
        for batch_id, (image, label) in enumerate(loader()):
            out = layer(image)
            loss = loss_fn(out, label)

            loss = dp_layer.scale_loss(loss)
            loss.backward()
            dp_layer.apply_collective_grads()

            adam.step()
            adam.clear_grad()
            print("Epoch {} batch {}: loss = {}".format(
                epoch_id, batch_id, np.mean(loss.numpy())))
示例#14
0
def main(config, device, logger, vdl_writer):
    # init dist environment
    if config['Global']['distributed']:
        dist.init_parallel_env()

    global_config = config['Global']

    # build dataloader
    train_dataloader = build_dataloader(config, 'Train', device, logger)
    if len(train_dataloader) == 0:
        logger.error(
            "No Images in train dataset, please ensure\n" +
            "\t1. The images num in the train label_file_list should be larger than or equal with batch size.\n"
            +
            "\t2. The annotation file and path in the configuration file are provided normally."
        )
        return

    if config['Eval']:
        valid_dataloader = build_dataloader(config, 'Eval', device, logger)
    else:
        valid_dataloader = None

    # build post process
    post_process_class = build_post_process(config['PostProcess'],
                                            global_config)

    # build model
    # for rec algorithm
    if hasattr(post_process_class, 'character'):
        char_num = len(getattr(post_process_class, 'character'))
        config['Architecture']["Head"]['out_channels'] = char_num
    model = build_model(config['Architecture'])
    if config['Global']['distributed']:
        model = paddle.DataParallel(model)

    # build loss
    loss_class = build_loss(config['Loss'])

    # build optim
    optimizer, lr_scheduler = build_optimizer(
        config['Optimizer'],
        epochs=config['Global']['epoch_num'],
        step_each_epoch=len(train_dataloader),
        parameters=model.parameters())

    # build metric
    eval_class = build_metric(config['Metric'])
    # load pretrain model
    pre_best_model_dict = init_model(config, model, logger, optimizer)

    logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
    if valid_dataloader is not None:
        logger.info('valid dataloader has {} iters'.format(
            len(valid_dataloader)))
    # start train
    program.train(config, train_dataloader, valid_dataloader, device, model,
                  loss_class, optimizer, lr_scheduler, post_process_class,
                  eval_class, pre_best_model_dict, logger, vdl_writer)
示例#15
0
def train():
    # 1. initialize parallel environment
    dist.init_parallel_env()

    # 2. get current ParallelEnv
    parallel_env = dist.ParallelEnv()
    print("rank: ", parallel_env.rank)
    print("world_size: ", parallel_env.world_size)
示例#16
0
 def test_init_parallel_env_break(self):
     os.environ['FLAGS_selected_gpus'] = '0'
     os.environ['PADDLE_TRAINER_ID'] = '0'
     os.environ['PADDLE_CURRENT_ENDPOINT'] = '127.0.0.1:6170'
     os.environ['PADDLE_TRAINERS_NUM'] = '1'
     os.environ['PADDLE_TRAINER_ENDPOINTS'] = '127.0.0.1:6170'
     # coverage success branch
     dist.init_parallel_env()
     self.assertFalse(parallel_helper._is_parallel_ctx_initialized())
示例#17
0
def train():
    """parallelenv"""
    # 1. initialize parallel env
    dist.init_parallel_env()

    # 2. get current ParallelEnv
    parallel_env = dist.ParallelEnv()
    assert parallel_env.rank == 0
    assert parallel_env.world_size == 2
    print("test_ParallelEnv ... ok")
示例#18
0
def do_train():
    dist.init_parallel_env()
    net = paddle.nn.Linear(2, 2)
    net = paddle.DataParallel(net)
    x = paddle.to_tensor(np.random.random(size=(2, 2)).astype('float32'))
    j = []

    y = net(x)

    dist.all_gather(j, y)
    print(j)
示例#19
0
def main(config, device, logger, vdl_writer):
    # init dist environment
    if config['Global']['distributed']:
        dist.init_parallel_env()

    global_config = config['Global']

    # build dataloader
    train_dataloader = build_dataloader(config, 'Train', device, logger)
    if config['Eval']:
        valid_dataloader = build_dataloader(config, 'Eval', device, logger)
    else:
        valid_dataloader = None

    # build post process
    post_process_class = build_post_process(config['PostProcess'],
                                            global_config)

    # build model
    # for rec algorithm
    if hasattr(post_process_class, 'character'):
        char_num = len(getattr(post_process_class, 'character'))
        config['Architecture']["Head"]['out_channels'] = char_num
    model = build_model(config['Architecture'])

    if config['Global']['distributed']:
        model = paddle.DataParallel(model)

    # build loss
    loss_class = build_loss(config['Loss'])

    # build optim
    optimizer, lr_scheduler = build_optimizer(
        config['Optimizer'],
        epochs=config['Global']['epoch_num'],
        step_each_epoch=len(train_dataloader),
        parameters=model.parameters())

    # build metric
    eval_class = build_metric(config['Metric'])
    # load pretrain model
    pre_best_model_dict = init_model(config, model, logger, optimizer)

    logger.info(
        'train dataloader has {} iters, valid dataloader has {} iters'.format(
            len(train_dataloader), len(valid_dataloader)))
    quanter = QAT(config=quant_config, act_preprocess=PACT)
    quanter.quantize(model)

    # start train
    program.train(config, train_dataloader, valid_dataloader, device, model,
                  loss_class, optimizer, lr_scheduler, post_process_class,
                  eval_class, pre_best_model_dict, logger, vdl_writer)
示例#20
0
def train():
    """bergin train"""
    arr1 = []
    arr2 = []
    dist.init_parallel_env()
    set_seed(2021)
    layer = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer = paddle.DataParallel(layer)
    else:
        dp_layer = layer

    layer2 = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer2 = paddle.DataParallel(layer2)
    else:
        dp_layer2 = layer2

    dp_layer2.set_state_dict(dp_layer.state_dict())

    loss_fn = nn.MSELoss()
    adam = opt.Adam(
        learning_rate=0.001, parameters=dp_layer.parameters())

    adam2 = opt.Adam(
        learning_rate=0.001, parameters=dp_layer2.parameters())

    for i in range(2):
        batch_size = 10
        shard = int(batch_size / dist.get_world_size())
        start_no = shard * dist.get_rank()
        end_no = start_no + shard
        inputs = paddle.randn([10, 10], 'float32')[start_no:end_no]
        outputs = dp_layer(inputs)
        labels = paddle.randn([10, 1], 'float32')[start_no:end_no]
        loss = loss_fn(outputs, labels)
        if dist.get_rank() == 0:
            arr1.append(loss.numpy()[0])
        loss.backward()
        adam.step()
        adam.clear_grad()

        outputs = dp_layer2(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        if dist.get_rank() == 0:
            arr2.append(loss.numpy()[0])
        adam2.step()
        adam2.clear_grad()
    check_data(arr1, arr2)
    def run_trainer(self, args):
        if fluid.core.is_compiled_with_cuda():
            device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
            place = fluid.CUDAPlace(device_id)
        else:
            assert ("Only support CUDAPlace for now.")

        with fluid.dygraph.guard(place):
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            np.random.seed(seed)
            random.seed(seed)
            model, train_reader, opt = self.get_model()

            if args.update_method == "nccl2":
                dist.init_parallel_env()
                print_to_err(
                    type(self).__name__,
                    "begin to prepare context in dygraph with nccl2")
                if not args.find_unused_parameters:
                    model = paddle.DataParallel(model,
                                                find_unused_parameters=False)
                else:
                    model = paddle.DataParallel(model,
                                                find_unused_parameters=True)
                print_to_err(type(self).__name__, "model built in dygraph")
            out_losses = []
            print_to_err(type(self).__name__, "begin to run dygraph training")
            for step_id, data in enumerate(train_reader()):
                data = self._get_data(data, args)
                if step_id == RUN_STEP:
                    break
                if step_id % 3 != 0:
                    if args.update_method == "nccl2":
                        with model.no_sync():
                            loss = self.run_one_loop(model, opt, data)
                            loss.backward()
                    else:
                        loss = self.run_one_loop(model, opt, data)
                        loss.backward()
                else:
                    loss = self.run_one_loop(model, opt, data)
                    loss.backward()
                    opt.minimize(loss)
                    print_to_err(
                        type(self).__name__,
                        "loss at step %d: %f" % (step_id, loss.numpy()))
                    out_losses.append(loss.numpy())

                    if not args.accumulate_gradient:
                        model.clear_gradients()
        print_to_out(out_losses)
示例#22
0
def train():
    dist.init_parallel_env()
    # 1. initialize parallel environment
    set_seed(2021)
    # 2. create data parallel layer & optimizer
    layer = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer = paddle.DataParallel(layer)
    else:
        dp_layer = layer

    layer2 = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer2 = paddle.DataParallel(layer2)
    else:
        dp_layer2 = layer2

    dp_layer2.set_state_dict(dp_layer.state_dict())

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    adam2 = opt.Adam(learning_rate=0.001, parameters=dp_layer2.parameters())
    # 3. run layer

    print("Start")
    for i in range(10):
        batch_size = 10
        shard = int(batch_size / dist.get_world_size())
        start_no = shard * dist.get_rank()
        end_no = start_no + shard
        inputs = paddle.randn([10, 10], 'float32')[start_no:end_no]
        outputs = dp_layer(inputs)
        labels = paddle.randn([10, 1], 'float32')[start_no:end_no]
        loss = loss_fn(outputs, labels)
        if dist.get_rank() == 0:
            print("Loss1", loss.numpy()[0])
            print(dp_layer.parameters())
        loss.backward()
        adam.step()
        adam.clear_grad()

        outputs = dp_layer2(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        if dist.get_rank() == 0:
            print("Loss2", loss.numpy()[0])
            print(dp_layer2.parameters())
        adam2.step()
        adam2.clear_grad()
示例#23
0
def test_send1():
    """test_send1"""
    init_parallel_env()
    if paddle.distributed.ParallelEnv().rank == 0:
        data = paddle.to_tensor([7, 8, 9])
        paddle.distributed.send(data, dst=1)
    else:
        data = paddle.to_tensor([1, 2, 3])
        paddle.distributed.recv(data, src=0)
    out = data.numpy()
    assert out[0] == 7
    assert out[1] == 8
    assert out[2] == 9
示例#24
0
def test_send3():
    """test_send3"""
    init_parallel_env()
    if paddle.distributed.ParallelEnv().rank == 0:
        data = paddle.to_tensor([7, 8, 9])
        paddle.distributed.send(data, dst=1, group=None, use_calc_stream=False)
    else:
        data = paddle.to_tensor([1, 2, 3])
        paddle.distributed.recv(data, src=0)
    out = data.numpy()
    assert out[0] == 7
    assert out[1] == 8
    assert out[2] == 9
示例#25
0
def train(config):
    logging.info('training arguments: %s', config)
    if config.train.use_data_parallel:
        logging.info("parallel mode. init env...")
        dist.init_parallel_env()

    dataset_config = {
        'db_file': config.data.db,
        'input_encoder': g_input_encoder,
        'label_encoder': g_label_encoder,
        'is_cached': True
    }
    train_set = DatasetClass(
        name='train', data_file=config.data.train_set, **dataset_config)
    dev_set = DatasetClass(
        name='dev', data_file=config.data.dev_set, **dataset_config)

    shuf_train = True if not config.general.is_debug else False
    train_reader = DataLoaderClass(
        config,
        train_set,
        batch_size=config.general.batch_size,
        shuffle=shuf_train)
    #dev_reader = dataproc.DataLoader(config, dev_set, batch_size=config.general.batch_size, shuffle=False)
    dev_reader = DataLoaderClass(config, dev_set, batch_size=1, shuffle=False)
    max_train_steps = config.train.epochs * (
        len(train_set) // config.general.batch_size // config.train.trainer_num)

    model = ModelClass(config.model, g_label_encoder)
    if config.model.init_model_params is not None:
        logging.info("loading model param from %s",
                     config.model.init_model_params)
        model.set_state_dict(paddle.load(config.model.init_model_params))
    if config.train.use_data_parallel:
        logging.info("parallel mode. init model...")
        model = paddle.DataParallel(model)

    optimizer = text2sql.optim.init_optimizer(model, config.train,
                                              max_train_steps)
    if config.model.init_model_optim is not None:
        logging.info("loading model optim from %s",
                     config.model.init_model_optim)
        optimizer.set_state_dict(paddle.load(config.model.init_model_optim))

    logging.info("start of training...")
    launch.trainer.train(config, model, optimizer, config.train.epochs,
                         train_reader, dev_reader)
    logging.info("end of training...")
示例#26
0
def init_process_group(strategy=None):
    nranks = ParallelEnv().nranks
    rank = ParallelEnv().local_rank
    is_master = True if rank == 0 else False
    pg_group = dist.init_parallel_env()

    return pg_group.process_group
示例#27
0
    def test_multiple_gpus(self):
        dist.init_parallel_env()
        self.trainer_id = dist.get_rank()

        model_a = SimpleNet(self.trainer_id)
        model_b = SimpleNet(self.trainer_id)

        state_dict = model_a.state_dict()
        model_b.set_state_dict(state_dict)

        model_a = paddle.DataParallel(model_a, find_unused_parameters=True)
        model_b = paddle.DataParallel(model_b, find_unused_parameters=True)

        ones_input = paddle.ones(shape=(batch, in_dim))
        ones_input.stop_gradient = True

        w1_grad_sum = np.zeros((in_dim, out_dim), dtype='float32')
        w2_grad_sum = np.zeros((in_dim, out_dim), dtype='float32')

        for step_id in range(5):
            random_input = paddle.rand(shape=(batch, in_dim))
            random_input.stop_gradient = True

            if step_id % 2 == 0:
                out_a = model_a(random_input)
                out_b = model_b(random_input)
            else:
                out_a = model_a(ones_input)
                out_b = model_b(ones_input)

            out_a.sum().backward()
            out_b.sum().backward()

            self.check_gradient(model_a.parameters())
            self.check_gradient(model_b.parameters())

            # test acc gradient
            w1_grad_sum = self.check_acc(model_a._layers.w1.grad, w1_grad_sum,
                                         model_b._layers.w1.grad)
            w2_grad_sum = self.check_acc(model_a._layers.w2.grad, w2_grad_sum,
                                         model_b._layers.w2.grad)

            model_a.clear_gradients()
示例#28
0
    def run_trainer_with_spawn(self, args):
        paddle.disable_static()
        fluid.default_startup_program().random_seed = seed
        fluid.default_main_program().random_seed = seed
        np.random.seed(seed)
        random.seed(seed)
        args.trainer_id = dist.get_rank()

        if args.update_method == "nccl2":
            dist.init_parallel_env()
        model, train_reader, opt = self.get_model()
        if args.update_method == "nccl2":
            if args.find_unused_parameters:
                model = paddle.DataParallel(model, find_unused_parameters=True)
            else:
                model = paddle.DataParallel(model,
                                            find_unused_parameters=False)

        out_losses = []
        for step_id, data in enumerate(train_reader()):
            data = self._get_data(data, args)
            if step_id == RUN_STEP:
                break
            if step_id % 3 != 0:
                if args.update_method == "nccl2":
                    with model.no_sync():
                        loss = self.run_one_loop(model, opt, data)
                        loss.backward()
                else:
                    loss = self.run_one_loop(model, opt, data)
                    loss.backward()
            else:
                loss = self.run_one_loop(model, opt, data)
                loss.backward()
                opt.minimize(loss)
                print_to_err(
                    type(self).__name__,
                    "loss at step %d: %f" % (step_id, loss.numpy()))
                out_losses.append(loss.numpy())
                model.clear_gradients()
        print_to_out(out_losses)
        return out_losses
示例#29
0
def main(config, device, logger, vdl_writer):
    # init dist environment
    if config['Global']['distributed']:
        dist.init_parallel_env()

    global_config = config['Global']

    # build dataloader
    config['Train']['loader']['num_workers'] = 0
    train_dataloader = build_dataloader(config, 'Train', device, logger)
    if config['Eval']:
        config['Eval']['loader']['num_workers'] = 0
        valid_dataloader = build_dataloader(config, 'Eval', device, logger)
    else:
        valid_dataloader = None

    paddle.enable_static()
    place = paddle.CPUPlace()
    exe = paddle.static.Executor(place)

    if 'inference_model' in global_config.keys():  # , 'inference_model'):
        inference_model_dir = global_config['inference_model']
    else:
        inference_model_dir = os.path.dirname(
            global_config['pretrained_model'])
        if  not (os.path.exists(os.path.join(inference_model_dir, "inference.pdmodel")) and \
            os.path.exists(os.path.join(inference_model_dir, "inference.pdiparams")) ):
            raise ValueError(
                "Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition"
            )

    paddleslim.quant.quant_post_static(
        executor=exe,
        model_dir=inference_model_dir,
        model_filename='inference.pdmodel',
        params_filename='inference.pdiparams',
        quantize_model_path=global_config['save_inference_dir'],
        sample_generator=sample_generator(train_dataloader),
        save_model_filename='inference.pdmodel',
        save_params_filename='inference.pdiparams',
        batch_size=1,
        batch_nums=None)
def test_scatter():
    """scatter"""
    paddle.set_device('gpu:%d' % paddle.distributed.ParallelEnv().dev_id)
    init_parallel_env()
    for t in types:
        if paddle.distributed.ParallelEnv().local_rank == 0:
            np_data1 = np.array([7, 8, 9]).astype(t)
            np_data2 = np.array([10, 11, 12]).astype(t)
        else:
            np_data1 = np.array([1, 2, 3]).astype(t)
            np_data2 = np.array([4, 5, 6]).astype(t)
        data1 = paddle.to_tensor(np_data1)
        data2 = paddle.to_tensor(np_data2)
        if paddle.distributed.ParallelEnv().local_rank == 0:
            paddle.distributed.scatter(data1, src=1)
        else:
            paddle.distributed.scatter(data1,
                                       tensor_list=[data1, data2],
                                       src=1)
        out = data1.numpy()
        assert len(out) == 3
        print("test_scatter %s ... ok" % t)