Пример #1
0
def finetune(args):
    paddle.set_device(args.device)
    if dist.get_world_size() > 1:
        dist.init_parallel_env()

    pos_file = os.path.join(args.data_dir, 'rt-polarity.pos')
    neg_file = os.path.join(args.data_dir, 'rt-polarity.neg')
    x_text, y = load_data_and_labels(pos_file, neg_file)
    x_train, x_test, y_train, y_test = train_test_split(x_text,
                                                        y,
                                                        test_size=0.1,
                                                        random_state=args.seed)

    if not args.init_from_ckpt:
        raise ValueError('`init_from_ckpt` should be set.')
    model = ELMoBowTextClassification(args.init_from_ckpt, args.batch_size,
                                      args.sent_embedding_dim, args.dropout,
                                      args.num_classes)
    if dist.get_world_size() > 1:
        model = paddle.DataParallel(model)
    model.train()

    adam = paddle.optimizer.Adam(parameters=model.parameters(),
                                 learning_rate=args.lr,
                                 weight_decay=args.weight_decay)
    criterion = nn.CrossEntropyLoss()

    vocab = load_vocab()

    train_dataset = SentencePolarityDatasetV1(x_train, y_train, vocab,
                                              args.max_seq_len)
    test_dataset = SentencePolarityDatasetV1(x_test, y_test, vocab,
                                             args.max_seq_len)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              return_list=True,
                              shuffle=True,
                              collate_fn=lambda batch: generate_batch(batch))
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             return_list=True,
                             shuffle=False,
                             collate_fn=lambda batch: generate_batch(batch))

    for epoch in range(args.epochs):
        print('Epoch {}/{}'.format(epoch + 1, args.epochs))
        for step, batch_data in enumerate(train_loader, start=1):
            ids, ids_reverse, label = batch_data

            output = model((ids, ids_reverse))
            loss = criterion(output, label)
            loss.backward()
            adam.step()
            adam.clear_grad()

            if step % args.logging_step == 0:
                print('step {}, loss {}'.format(step, loss.numpy()[0]))

    acc = test(model, test_loader)
    print('\ntest acc {}\n'.format(acc))
Пример #2
0
def train():
    """bergin train"""
    arr1 = []
    arr2 = []
    dist.init_parallel_env()
    set_seed(2021)
    layer = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer = paddle.DataParallel(layer)
    else:
        dp_layer = layer

    layer2 = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer2 = paddle.DataParallel(layer2)
    else:
        dp_layer2 = layer2

    dp_layer2.set_state_dict(dp_layer.state_dict())

    loss_fn = nn.MSELoss()
    adam = opt.Adam(
        learning_rate=0.001, parameters=dp_layer.parameters())

    adam2 = opt.Adam(
        learning_rate=0.001, parameters=dp_layer2.parameters())

    for i in range(2):
        batch_size = 10
        shard = int(batch_size / dist.get_world_size())
        start_no = shard * dist.get_rank()
        end_no = start_no + shard
        inputs = paddle.randn([10, 10], 'float32')[start_no:end_no]
        outputs = dp_layer(inputs)
        labels = paddle.randn([10, 1], 'float32')[start_no:end_no]
        loss = loss_fn(outputs, labels)
        if dist.get_rank() == 0:
            arr1.append(loss.numpy()[0])
        loss.backward()
        adam.step()
        adam.clear_grad()

        outputs = dp_layer2(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        if dist.get_rank() == 0:
            arr2.append(loss.numpy()[0])
        adam2.step()
        adam2.clear_grad()
    check_data(arr1, arr2)
Пример #3
0
def train():
    dist.init_parallel_env()
    # 1. initialize parallel environment
    set_seed(2021)
    # 2. create data parallel layer & optimizer
    layer = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer = paddle.DataParallel(layer)
    else:
        dp_layer = layer

    layer2 = LinearNet()

    if dist.get_world_size() > 1:
        dp_layer2 = paddle.DataParallel(layer2)
    else:
        dp_layer2 = layer2

    dp_layer2.set_state_dict(dp_layer.state_dict())

    loss_fn = nn.MSELoss()
    adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters())

    adam2 = opt.Adam(learning_rate=0.001, parameters=dp_layer2.parameters())
    # 3. run layer

    print("Start")
    for i in range(10):
        batch_size = 10
        shard = int(batch_size / dist.get_world_size())
        start_no = shard * dist.get_rank()
        end_no = start_no + shard
        inputs = paddle.randn([10, 10], 'float32')[start_no:end_no]
        outputs = dp_layer(inputs)
        labels = paddle.randn([10, 1], 'float32')[start_no:end_no]
        loss = loss_fn(outputs, labels)
        if dist.get_rank() == 0:
            print("Loss1", loss.numpy()[0])
            print(dp_layer.parameters())
        loss.backward()
        adam.step()
        adam.clear_grad()

        outputs = dp_layer2(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        if dist.get_rank() == 0:
            print("Loss2", loss.numpy()[0])
            print(dp_layer2.parameters())
        adam2.step()
        adam2.clear_grad()
Пример #4
0
    def shard(self, num_shards=None, index=None):
        """
        Split the dataset into `num_shards` pieces.

        Args:
            num_shards (int, optional): An integer representing the number of
                data shards. If None, `num_shards` would be number of trainers.
                Defaults to None.
            index (int, optional): An integer representing the index of the
                current shard. If None, `index` would be the current trainer rank
                id. Defaults to None.
        """
        if num_shards is None:
            num_shards = dist.get_world_size()
        if index is None:
            index = dist.get_rank()

        def sharder(num_shards, index, num_samples):
            if num_samples % num_shards == index:
                return True
            else:
                return False

        fn = partial(sharder, num_shards=num_shards, index=index)
        self._shard_filter = fn
        return self
Пример #5
0
    def __init__(self,
                 filepattern,
                 batch_size,
                 pad_token_id,
                 bos_token_id,
                 sort_pool_size=2**16,
                 seed=1,
                 n_gpus=None,
                 rank=None,
                 mode='test'):
        super(DialogueDataset, self).__init__()

        self.file_list = glob(filepattern)
        self.sort_pool_size = 0 if mode == 'test' else sort_pool_size
        self.n_gpus = n_gpus if n_gpus else dist.get_world_size()
        self.rank = rank if rank else dist.get_rank()
        self.batch_size = batch_size * self.n_gpus  # len(batch) * max_len <= this value
        self.shuffle = True if mode == 'train' else False
        self.mode = mode
        self.pad_id = pad_token_id  # [PAD]
        self.bos_id = bos_token_id  # [CLS]
        self.global_rng = np.random.RandomState(seed)

        assert len(
            self.file_list) > 0, 'There is no files in %s.' % filepattern
Пример #6
0
def batch_norm_1d(num_channels):
    """tbd"""
    if dist.get_world_size() > 1:
        return nn.SyncBatchNorm.convert_sync_batchnorm(
            nn.BatchNorm1D(num_channels))
    else:
        return nn.BatchNorm1D(num_channels)
Пример #7
0
def all_gather(v):
    if dist.get_world_size() <= 1:
        return v.item()
    ret = []
    dist.all_gather(ret, v)
    concat = paddle.concat(ret, axis=0)
    return concat.mean().item()
Пример #8
0
 def on_epoch_end(self, status):
     # Checkpointer only performed during training
     mode = status['mode']
     epoch_id = status['epoch_id']
     weight = None
     save_name = None
     if dist.get_world_size() < 2 or dist.get_rank() == 0:
         if mode == 'train':
             end_epoch = self.model.cfg.epoch
             if epoch_id % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
                 save_name = str(
                     epoch_id
                 ) if epoch_id != end_epoch - 1 else "model_final"
                 weight = self.weight
         elif mode == 'eval':
             if 'save_best_model' in status and status['save_best_model']:
                 for metric in self.model._metrics:
                     map_res = metric.get_results()
                     key = 'bbox' if 'bbox' in map_res else 'mask'
                     if key not in map_res:
                         logger.warn("Evaluation results empty, this may be due to " \
                                     "training iterations being too few or not " \
                                     "loading the correct weights.")
                         return
                     if map_res[key][0] > self.best_ap:
                         self.best_ap = map_res[key][0]
                         save_name = 'best_model'
                         weight = self.weight
                     logger.info("Best test {} ap is {:0.3f}.".format(
                         key, self.best_ap))
         if weight:
             save_model(weight, self.model.optimizer, self.save_dir,
                        save_name, epoch_id + 1)
Пример #9
0
    def _shard_edges_by_dst(self, edges, edge_feat):
        """Shard Edges by dst

        Args:

            edges: list of (u, v) tuples, 2D numpy.ndarry or 2D paddle.Tensor 

            edge_feat (optional): a dict of numpy array as edge features (should
                                have consistent order with edges)

        Returns:
     
            Return a tuple (shard_edges, shard_edge_feat) as the shard results.

        """
        shard_flag = edges[:, 1]
        mask = (shard_flag % dist.get_world_size()) == dist.get_rank()
        if type(mask) == paddle.Tensor:
            eid = paddle.masked_select(paddle.arange(edges.shape[0]), mask)
            shard_edges = paddle.gather(edges, eid)
            shard_edge_feat = {}
            for key, value in edge_feat.items():
                shard_edge_feat[key] = paddle.gather(value, eid)
        else:
            eid = np.arange(edges.shape[0])[mask]
            shard_edges = edges[eid]
            shard_edge_feat = {}
            for key, value in edge_feat.items():
                shard_edge_feat[key] = value[eid]
        return shard_edges, shard_edge_feat
Пример #10
0
    def shard(self, num_replicas=None, rank=None):
        """
        Operates slice using multi GPU.
        Args:
            num_replicas (int, optional): The number of training process, and is also the number of GPU cards used in training. 
                Default: None.
            rank (int, optional): Number of training process. Equal to the value of the environment variable PADDLE_TRAINER_ID.
                Default: None.
        Returns:
            SamplerHelper
        """
        if num_replicas is None:
            num_replicas = dist.get_world_size()
        if rank is None:
            rank = dist.get_rank()

        def _impl():
            for i, idx in enumerate(self):
                if i % num_replicas == rank:
                    yield idx
            if i % num_replicas != num_replicas - 1 and rank > i % num_replicas:
                # use last samples to make it evenly divisible
                yield idx

        sampler = type(self)(self.data_source, _impl)
        if self.length is not None:
            sampler.length = int(math.ceil(self.length * 1.0 / num_replicas))
        else:
            sampler.length = None
        return sampler
Пример #11
0
    def test_class_center_sample(self):

        rank_id = dist.get_rank()
        nranks = dist.get_world_size()

        seed = 1025
        set_random_seed(seed)
        paddle.seed(rank_id * 10)
        random.seed(seed)
        np.random.seed(seed)

        batch_size = 20
        num_samples = 6

        for dtype in ('int32', 'int64'):
            for _ in range(5):
                classes_list = np.random.randint(10, 15, (nranks, ))
                num_class = np.sum(classes_list)

                np_label = np.random.randint(0,
                                             num_class, (batch_size, ),
                                             dtype=dtype)
                label = paddle.to_tensor(np_label, dtype=dtype)
                np_remapped_label, np_sampled_class_center_per_device = class_center_sample_numpy(
                    np_label, classes_list, num_samples)
                remapped_label, sampled_class_index = paddle.nn.functional.class_center_sample(
                    label, classes_list[rank_id], num_samples)
                np.testing.assert_allclose(remapped_label.numpy(),
                                           np_remapped_label)
                np_sampled_class_index = np_sampled_class_center_per_device[
                    rank_id]
                np.testing.assert_allclose(
                    sampled_class_index.numpy()[:len(np_sampled_class_index)],
                    np_sampled_class_index)
Пример #12
0
    def infer(self):
        assert self.mode == "infer" and self.eval_mode == "classification"
        total_trainer = dist.get_world_size()
        local_rank = dist.get_rank()
        image_list = get_image_list(self.config["Infer"]["infer_imgs"])
        # data split
        image_list = image_list[local_rank::total_trainer]

        batch_size = self.config["Infer"]["batch_size"]
        self.model.eval()
        batch_data = []
        image_file_list = []
        for idx, image_file in enumerate(image_list):
            with open(image_file, 'rb') as f:
                x = f.read()
            for process in self.preprocess_func:
                x = process(x)
            batch_data.append(x)
            image_file_list.append(image_file)
            if len(batch_data) >= batch_size or idx == len(image_list) - 1:
                batch_tensor = paddle.to_tensor(batch_data)
                out = self.model(batch_tensor)
                if isinstance(out, list):
                    out = out[0]
                if isinstance(out, dict) and "logits" in out:
                    out = out["logits"]
                if isinstance(out, dict) and "output" in out:
                    out = out["output"]
                result = self.postprocess_func(out, image_file_list)
                print(result)
                batch_data.clear()
                image_file_list.clear()
Пример #13
0
    def forward(self, input):
        dtype = input.dtype
        flatten = input.reshape([-1, self.dim])
        dist = (flatten.pow(2).sum(1, keepdim=True) -
                2 * flatten.transpose([0, 1]).matmul(self.embed) +
                self.embed.pow(2).sum(0, keepdim=True))
        embed_ind = (-dist).argmax(1)
        embed_onehot = F.one_hot(embed_ind, self.n_embed).astype(dtype)
        embed_ind = embed_ind.reshape(input.shape[:-1])
        quantize = F.embedding(embed_ind,
                               self.embed.transpose([1, 0]),
                               padding_idx=-1)

        if self.training:
            embed_onehot_sum = embed_onehot.sum(0)
            embed_sum = flatten.transpose([1, 0]).matmul(embed_onehot)

            if dist_fn.get_world_size() > 1:
                dist_fn.all_reduce(embed_onehot_sum)
                dist_fn.all_reduce(embed_sum)

            ema_inplace(self.cluster_size, embed_onehot_sum, self.decay)
            ema_inplace(self.embed_avg, embed_sum, self.decay)
            cluster_size = laplace_smoothing(
                self.cluster_size, self.n_embed,
                self.eps) * self.cluster_size.sum()
            embed_normalized = self.embed_avg / cluster_size.unsqueeze(0)
            self.embed[:] = embed_normalized

        loss = F.mse_loss(quantize.detach(), input) * self.commitment
        quantize = input + (quantize - input).detach()
        return quantize, embed_ind, loss
Пример #14
0
    def shard(self, num_shards=None, index=None):
        """
        Use samples whose indices mod `index` equals 0 to update this dataset.
        Args:
            num_shards (int, optional): A integer representing the number of
                data shards. If None, `num_shards` would be number of trainers.
                Default: None
            index (int, optional): A integer representing the index of the
                current shard. If None, index` would be the current trainer rank
                id. Default: None.
        """
        if num_shards is None:
            num_shards = dist.get_world_size()
        if index is None:
            index = dist.get_rank()

        num_samples = int(math.ceil(len(self.new_data) * 1.0 / num_shards))
        total_size = num_samples * num_shards
        # add extra samples to make it evenly divisible
        self.new_data = [
            self.new_data[idx] for idx in range(len(self.new_data))
            if idx % num_shards == index
        ]
        if len(self.new_data) < num_samples:
            self.new_data.append(self.new_data[index + 1 - num_shards])

        return self
Пример #15
0
    def shard(self, num_shards=None, index=None):
        """
        Use samples whose indices mod `index` equals 0 to update this dataset.
        Args:
            num_shards (int, optional): A integer representing the number of
                data shards. If None, `num_shards` would be number of trainers.
                Default: None
            index (int, optional): A integer representing the index of the
                current shard. If None, index` would be the current trainer rank
                id. Default: None.
        """
        if num_shards is None:
            num_shards = dist.get_world_size()
        if index is None:
            index = dist.get_rank()

        def sharder(num_shards, index, num_samples):
            if num_samples % num_shards == index:
                return True
            else:
                return False

        fn = partial(sharder, num_shards=num_shards, index=index)
        self._shard_filter = fn
        return self
Пример #16
0
 def on_epoch_end(self, status):
     if dist.get_world_size() < 2 or dist.get_rank() == 0:
         mode = status['mode']
         if mode == 'eval':
             sample_num = status['sample_num']
             cost_time = status['cost_time']
             logger.info('Total sample number: {}, averge FPS: {}'.format(
                 sample_num, sample_num / cost_time))
Пример #17
0
 def train_iter_end(self, trainer):
     # print('-----------------------------')
     # print('updating target network!')
     # print('-----------------------------')
     if dist.get_world_size() > 1:
         trainer.model._layers.update_target_network_L1()
     else:
         trainer.model.update_target_network_L1()
Пример #18
0
    def __init__(self, cfg, mode='train'):
        self.cfg = cfg
        assert mode.lower() in ['train', 'eval', 'test'], \
                "mode should be 'train', 'eval' or 'test'"
        self.mode = mode.lower()
        self.optimizer = None
        self.is_loaded_weights = False

        # build model
        if 'model' not in self.cfg:
            self.model = create(cfg.architecture)
        else:
            self.model = self.cfg.model
            self.is_loaded_weights = True

        self.use_ema = ('use_ema' in cfg and cfg['use_ema'])
        if self.use_ema:
            self.ema = ModelEMA(cfg['ema_decay'],
                                self.model,
                                use_thres_step=True)

        # build data loader
        self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]
        if self.mode == 'train':
            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
                self.dataset, cfg.worker_num)
        # EvalDataset build with BatchSampler to evaluate in single device
        # TODO: multi-device evaluate
        if self.mode == 'eval':
            self._eval_batch_sampler = paddle.io.BatchSampler(
                self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
                self.dataset, cfg.worker_num, self._eval_batch_sampler)
        # TestDataset build after user set images, skip loader creation here

        # build optimizer in train mode
        if self.mode == 'train':
            steps_per_epoch = len(self.loader)
            self.lr = create('LearningRate')(steps_per_epoch)
            self.optimizer = create('OptimizerBuilder')(
                self.lr, self.model.parameters())

        self._nranks = dist.get_world_size()
        self._local_rank = dist.get_rank()

        self.status = {}

        self.start_epoch = 0
        self.end_epoch = cfg.epoch

        # initial default callbacks
        self._init_callbacks()

        # initial default metrics
        self._init_metrics()
        self._reset_metrics()
Пример #19
0
 def on_epoch_end(self, status):
     mode = status['mode']
     if dist.get_world_size() < 2 or dist.get_rank() == 0:
         if mode == 'eval':
             for metric in self.model._metrics:
                 for key, map_value in metric.get_results().items():
                     self.vdl_writer.add_scalar("{}-mAP".format(key),
                                                map_value[0],
                                                self.vdl_mAP_step)
             self.vdl_mAP_step += 1
Пример #20
0
def create_data_loader(args, places=None, use_all_vocab=False):
    root = None if args.root == "None" else args.root
    if not use_all_vocab:
        WMT14ende.VOCAB_INFO = (os.path.join("WMT14.en-de",
                                             "wmt14_ende_data_bpe",
                                             "vocab_all.bpe.33712"),
                                os.path.join("WMT14.en-de",
                                             "wmt14_ende_data_bpe",
                                             "vocab_all.bpe.33712"),
                                "de485e3c2e17e23acf4b4b70b54682dd",
                                "de485e3c2e17e23acf4b4b70b54682dd")
    (src_vocab, trg_vocab) = WMT14ende.get_vocab(root=root)
    padding_vocab = (
        lambda x:
        (x + args.pad_factor - 1) // args.pad_factor * args.pad_factor)
    args.src_vocab_size = padding_vocab(len(src_vocab))
    args.trg_vocab_size = padding_vocab(len(trg_vocab))
    transform_func = WMT14ende.get_default_transform_func(root=root)
    datasets = [
        WMT14ende.get_datasets(mode=m,
                               root=root,
                               transform_func=transform_func)
        for m in ["train", "dev"]
    ]

    data_loaders = [(None)] * 2
    for i, dataset in enumerate(datasets):
        dataset = dataset.filter(
            partial(min_max_filer, max_len=args.max_length))
        batch_sampler = TransformerBatchSampler(
            dataset=dataset,
            batch_size=args.batch_size,
            pool_size=args.pool_size,
            sort_type=args.sort_type,
            shuffle=args.shuffle,
            shuffle_batch=args.shuffle_batch,
            use_token_batch=True,
            max_length=args.max_length,
            distribute_mode=True if i == 0 else False,
            world_size=dist.get_world_size(),
            rank=dist.get_rank(),
            pad_seq=args.pad_seq,
            bsz_multi=args.bsz_multi)

        data_loader = DataLoader(dataset=dataset,
                                 places=places,
                                 batch_sampler=batch_sampler,
                                 collate_fn=partial(prepare_train_input,
                                                    bos_idx=args.bos_idx,
                                                    eos_idx=args.eos_idx,
                                                    pad_idx=args.bos_idx,
                                                    pad_seq=args.pad_seq),
                                 num_workers=0)
        data_loaders[i] = (data_loader)
    return data_loaders
Пример #21
0
def preprocess(is_train=False):
    FLAGS = ArgsParser().parse_args()
    profiler_options = FLAGS.profiler_options
    config = load_config(FLAGS.config)
    merge_config(FLAGS.opt)
    profile_dic = {"profiler_options": FLAGS.profiler_options}
    merge_config(profile_dic)

    if is_train:
        # save_config
        save_model_dir = config['Global']['save_model_dir']
        os.makedirs(save_model_dir, exist_ok=True)
        with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
            yaml.dump(dict(config),
                      f,
                      default_flow_style=False,
                      sort_keys=False)
        log_file = '{}/train.log'.format(save_model_dir)
    else:
        log_file = None
    logger = get_logger(name='root', log_file=log_file)

    # check if set use_gpu=True in paddlepaddle cpu version
    use_gpu = config['Global']['use_gpu']
    check_gpu(use_gpu)

    alg = config['Architecture']['algorithm']
    assert alg in [
        'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
        'SEED', 'SDMGR'
    ]
    windows_not_support_list = ['PSE']
    if platform.system() == "Windows" and alg in windows_not_support_list:
        logger.warning('{} is not support in Windows now'.format(
            windows_not_support_list))
        sys.exit()

    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'
    device = paddle.set_device(device)

    config['Global']['distributed'] = dist.get_world_size() != 1

    if config['Global']['use_visualdl']:
        from visualdl import LogWriter
        save_model_dir = config['Global']['save_model_dir']
        vdl_writer_path = '{}/vdl/'.format(save_model_dir)
        os.makedirs(vdl_writer_path, exist_ok=True)
        vdl_writer = LogWriter(logdir=vdl_writer_path)
    else:
        vdl_writer = None
    print_dict(config, logger)
    logger.info('train with paddle {} and device {}'.format(
        paddle.__version__, device))
    return config, device, logger, vdl_writer
Пример #22
0
def create_data_loader(args, places=None):
    datasets = load_dataset('wmt14ende', splits=('train', 'dev'))
    if not args.benchmark:
        src_vocab = Vocab.load_vocabulary(**datasets[0].vocab_info["bpe"])
    else:
        src_vocab = Vocab.load_vocabulary(
            **datasets[0].vocab_info["benchmark"])
    trg_vocab = src_vocab

    padding_vocab = (
        lambda x:
        (x + args.pad_factor - 1) // args.pad_factor * args.pad_factor)
    args.src_vocab_size = padding_vocab(len(src_vocab))
    args.trg_vocab_size = padding_vocab(len(trg_vocab))

    def convert_samples(sample):
        source = sample[args.src_lang].split()
        target = sample[args.trg_lang].split()

        source = src_vocab.to_indices(source)
        target = trg_vocab.to_indices(target)

        return source, target

    data_loaders = [(None)] * 2
    for i, dataset in enumerate(datasets):
        dataset = dataset.map(convert_samples, lazy=False).filter(
            partial(min_max_filer, max_len=args.max_length))
        batch_sampler = TransformerBatchSampler(
            dataset=dataset,
            batch_size=args.batch_size,
            pool_size=args.pool_size,
            sort_type=args.sort_type,
            shuffle=args.shuffle,
            shuffle_batch=args.shuffle_batch,
            use_token_batch=True,
            max_length=args.max_length,
            distribute_mode=True if i == 0 else False,
            world_size=dist.get_world_size(),
            rank=dist.get_rank(),
            pad_seq=args.pad_seq,
            bsz_multi=args.bsz_multi)

        data_loader = DataLoader(dataset=dataset,
                                 places=places,
                                 batch_sampler=batch_sampler,
                                 collate_fn=partial(prepare_train_input,
                                                    bos_idx=args.bos_idx,
                                                    eos_idx=args.eos_idx,
                                                    pad_idx=args.bos_idx,
                                                    pad_seq=args.pad_seq),
                                 num_workers=0)
        data_loaders[i] = (data_loader)
    return data_loaders
Пример #23
0
def get_steps_per_epoch(args):
    """tbd"""
    # add as argument
    if args.dataset == 'zinc':
        train_num = int(20000000 * (1 - args.test_ratio))
    else:
        raise ValueError(args.dataset)
    if args.DEBUG:
        train_num = 100
    steps_per_epoch = int(train_num / args.batch_size)
    if args.distributed:
        steps_per_epoch = int(steps_per_epoch / dist.get_world_size())
    return steps_per_epoch
Пример #24
0
    def __init__(self, cfg):
        # base config
        self.logger = logging.getLogger(__name__)
        self.cfg = cfg
        self.output_dir = cfg.output_dir

        self.local_rank = dist.get_rank()
        self.log_interval = cfg.log_config.interval

        self.start_epoch = 0
        self.current_epoch = 0
        self.current_iter = 0
        self.inner_iter = 0
        self.batch_id = 0
        self.global_steps = 0
        self.timestamp = cfg.timestamp
        self.logs = OrderedDict()

        # build model
        self.model = build_model(cfg.model)
        # multiple gpus prepare
        if dist.get_world_size() > 1:
            paddle.distributed.init_parallel_env()
            self.model = DistributedDataParallel(self.model)

        # build train dataloader
        self.train_dataloader = build_dataloader(cfg.dataloader.train)
        self.iters_per_epoch = len(self.train_dataloader)

        # build lr scheduler
        self.lr_scheduler = build_lr_scheduler(cfg.lr_scheduler,
                                               self.iters_per_epoch)

        # build optimizer
        self.optimizer = build_optimizer(cfg.optimizer, self.lr_scheduler,
                                         self.model.parameters())

        # build hooks
        self.hooks = []

        self.add_train_hooks()

        self.add_custom_hooks()

        self.epochs = cfg.get('epochs', None)
        if self.epochs:
            self.total_iters = self.epochs * self.iters_per_epoch
            self.by_epoch = True
        else:
            self.by_epoch = False
            self.total_iters = cfg.total_iters
Пример #25
0
    def __init__(self, cfg):
        self.batch_size = cfg.batch_size
        self.file_path = cfg.file_path

        self.seg_num = cfg.seg_num
        self.seglen = cfg.seglen
        self.short_size = cfg.short_size
        self.target_size = cfg.target_size

        # set num_shards and shard_id when distributed training is implemented
        self.num_shards = dist.get_world_size()
        self.shard_id = ParallelEnv().local_rank
        self.dali_mean = cfg.mean * (self.seg_num * self.seglen)
        self.dali_std = cfg.std * (self.seg_num * self.seglen)
Пример #26
0
 def _get_size(self):
     # random_interval = 10 as default, every 10 iters to change self._input_size
     image_ratio = self.input_size[1] * 1.0 / self.input_size[0]
     if self._step % self.random_interval == 0:
         size_factor = random.randint(*self.size_range)
         size = [
             self.size_stride * size_factor,
             self.size_stride * int(size_factor * image_ratio)
         ]
         size = paddle.to_tensor(size)
         if dist.get_world_size() > 1 and paddle_distributed_is_initialized(
         ):
             dist.barrier()
             dist.broadcast(size, 0)
         self._input_size = size
     self._step += 1
Пример #27
0
def all_gather_tokens(data):
    """Gathers num of tokens from all nodes. 
       `data` should be a tensor of num of tokens.
    """
    if dist.get_world_size() < 2:
        return data
    if not hasattr(all_gather_tokens,
                   '_in_buffer') or all_gather_tokens._in_buffer is None:
        all_gather_tokens._in_buffer = data
        all_gather_tokens._out_buffers = []
    in_buffer = all_gather_tokens._in_buffer
    out_buffers = all_gather_tokens._out_buffers

    dist.all_gather(out_buffers, in_buffer)

    return paddle.add_n(out_buffers)
Пример #28
0
    def on_step_end(self, status):
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            mode = status['mode']
            if mode == 'train':
                epoch_id = status['epoch_id']
                step_id = status['step_id']
                steps_per_epoch = status['steps_per_epoch']
                training_staus = status['training_staus']
                batch_time = status['batch_time']
                data_time = status['data_time']

                epoches = self.model.cfg.epoch
                batch_size = self.model.cfg['{}Reader'.format(
                    mode.capitalize())]['batch_size']

                logs = training_staus.log()
                space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
                if step_id % self.model.cfg.log_iter == 0:
                    eta_steps = (epoches -
                                 epoch_id) * steps_per_epoch - step_id
                    eta_sec = eta_steps * batch_time.global_avg
                    eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
                    ips = float(batch_size) / batch_time.avg
                    fmt = ' '.join([
                        'Epoch: [{}]',
                        '[{' + space_fmt + '}/{}]',
                        'learning_rate: {lr:.6f}',
                        '{meters}',
                        'eta: {eta}',
                        'batch_cost: {btime}',
                        'data_cost: {dtime}',
                        'ips: {ips:.4f} images/s',
                    ])
                    fmt = fmt.format(epoch_id,
                                     step_id,
                                     steps_per_epoch,
                                     lr=status['learning_rate'],
                                     meters=logs,
                                     eta=eta_str,
                                     btime=str(batch_time),
                                     dtime=str(data_time),
                                     ips=ips)
                    logger.info(fmt)
            if mode == 'eval':
                step_id = status['step_id']
                if step_id % 100 == 0:
                    logger.info("Eval iter: {}".format(step_id))
Пример #29
0
    def __init__(self, cfg, mode='train'):
        self.cfg = cfg
        assert mode.lower() in ['train', 'eval', 'test'], \
                "mode should be 'train', 'eval' or 'test'"
        self.mode = mode.lower()
        self.optimizer = None

        # init distillation config
        self.distill_model = None
        self.distill_loss = None

        # build data loader
        self.dataset = cfg['{}Dataset'.format(self.mode.capitalize())]

        if self.mode == 'train':
            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
                self.dataset, cfg.worker_num)

        self.model = create(cfg.architecture)

        #normalize params for deploy
        self.model.load_meanstd(cfg['TestReader']['sample_transforms'])

        # EvalDataset build with BatchSampler to evaluate in single device
        if self.mode == 'eval':
            self._eval_batch_sampler = paddle.io.BatchSampler(
                self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
                self.dataset, cfg.worker_num, self._eval_batch_sampler)
        # TestDataset build after user set images, skip loader creation here

        self._nranks = dist.get_world_size()
        self._local_rank = dist.get_rank()

        self.status = {}

        self.start_epoch = 0
        self.end_epoch = 0 if 'epoch' not in cfg else cfg.epoch

        # initial default callbacks
        self._init_callbacks()

        # initial default metrics
        self._init_metrics()
        self._reset_metrics()
Пример #30
0
def create_data_loader(args):
    root = None if args.root == "None" else args.root
    (src_vocab, trg_vocab) = WMT14ende.get_vocab(root=root)
    padding_vocab = (
        lambda x: (x + args.pad_factor - 1) // args.pad_factor * args.pad_factor
    )
    args.src_vocab_size = padding_vocab(len(src_vocab))
    args.trg_vocab_size = padding_vocab(len(trg_vocab))
    transform_func = WMT14ende.get_default_transform_func(root=root)
    datasets = [
        WMT14ende.get_datasets(
            mode=m, root=root, transform_func=transform_func)
        for m in ["train", "dev"]
    ]

    data_loaders = [(None)] * 2
    for i, dataset in enumerate(datasets):
        dataset = dataset.filter(
            partial(
                min_max_filer, max_len=args.max_length))
        batch_sampler = TransformerBatchSampler(
            dataset=dataset,
            batch_size=args.batch_size,
            pool_size=args.pool_size,
            sort_type=args.sort_type,
            shuffle=args.shuffle,
            shuffle_batch=args.shuffle_batch,
            use_token_batch=True,
            max_length=args.max_length,
            distribute_mode=True if i == 0 else False,
            world_size=dist.get_world_size(),
            rank=dist.get_rank())

        data_loader = DataLoader(
            dataset=dataset,
            batch_sampler=batch_sampler,
            collate_fn=partial(
                prepare_train_input,
                bos_idx=args.bos_idx,
                eos_idx=args.eos_idx,
                pad_idx=args.bos_idx),
            num_workers=0,
            return_list=True)
        data_loaders[i] = (data_loader)
    return data_loaders