def var(input, axis=None, keepdim=False, unbiased=True, out=None, name=None):
    dtype = input.dtype
    if dtype not in ["float32", "float64"]:
        raise ValueError("Layer tensor.var() only supports floating-point "
                         "dtypes, but received {}.".format(dtype))
    rank = len(input.shape)
    axes = axis if axis != None and axis != [] else range(rank)
    axes = [e if e >= 0 else e + rank for e in axes]
    inp_shape = input.shape if fluid.in_dygraph_mode() else layers.shape(input)
    mean = layers.reduce_mean(input, dim=axis, keep_dim=True, name=name)
    tmp = layers.reduce_mean((input - mean)**2,
                             dim=axis,
                             keep_dim=keepdim,
                             name=name)

    if unbiased:
        n = 1
        for i in axes:
            n *= inp_shape[i]
        if not fluid.in_dygraph_mode():
            n = layers.cast(n, dtype)
            zero_const = layers.fill_constant(shape=[1],
                                              dtype=dtype,
                                              value=0.0)
            factor = layers.where(n > 1.0, n / (n - 1.0), zero_const)
        else:
            factor = n / (n - 1.0) if n > 1.0 else 0.0
        tmp *= factor
    if out:
        layers.assign(input=tmp, output=out)
        return out
    else:
        return tmp
Exemplo n.º 2
0
    def predict(self, test_data, batch_size=1, num_workers=0):
        """
        FIXME: add more comments and usage
        Args:
            test_data (Dataset|DataLoader): An iterable data loader is used for
                predict. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader 
                is recomended.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
                When train_data and eval_data are both the instance of Dataloader, this 
                parameter will be ignored.
            num_workers (int): the number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
                both the instance of Dataloader, this parameter will be ignored.
        """

        if fluid.in_dygraph_mode():
            feed_list = None
        else:
            feed_list = [x.forward() for x in self._inputs + self._labels]

        if test_data is not None and isinstance(test_data, Dataset):
            test_sampler = DistributedBatchSampler(test_data,
                                                   batch_size=batch_size)
            test_loader = DataLoader(test_data,
                                     batch_sampler=test_sampler,
                                     places=self._place,
                                     feed_list=feed_list,
                                     num_workers=num_workers,
                                     return_list=True)
        else:
            test_loader = test_data

        self._test_dataloader = test_loader

        loader = test_loader
        if not isinstance(test_loader, Iterable):
            loader = test_loader()

        outputs = None
        for data in tqdm.tqdm(loader):
            if not fluid.in_dygraph_mode():
                data = data[0]

            outs = self.test(*data)

            if outputs is None:
                outputs = outs
            else:
                outputs = [
                    np.vstack([x, outs[i]]) for i, x in enumerate(outputs)
                ]

        self._test_dataloader = None
        if test_loader is not None and self._adapter._nranks > 1 \
                    and isinstance(test_loader, DataLoader):
            outputs = [o[:len(test_loader.dataset)] for o in outputs]
        return outputs
Exemplo n.º 3
0
    def __call__(self, *args, **kwargs):
        """
        Supports to call the returned instance with input `args` and `kwargs` directly.

        Args:
            *args(tuple): tuple of all input arguments from original decorated function.
            **kwargs(dict): dict of all input keyward arguments from original decorated function. 

        Return:
            Outputs of decorated function.
        """

        # 1. call dygraph function directly if not enable `declarative`
        if not self._program_trans.enable_to_static:
            # NOTE(liym27):
            # Here calls `warnings.warn` but not `logging_utils.warn` because by default warnings.warn(message)
            # will show up **only once**. StaticFunction.__call__ will run many times, it is appropriate to
            # display this warning message only once.
            warnings.warn(
                "The decorator '@paddle.jit.to_static' does NOT work when setting ProgramTranslator.enable to False. "
                "We will just return dygraph output. If you would like to get static graph output, please call API "
                "ProgramTranslator.enable(True)")
            return self._call_dygraph_function(*args, **kwargs)

        if not in_dygraph_mode():
            raise RuntimeError(
                "Failed to run the callable object {} decorated by '@paddle.jit.to_static', "
                "because it is NOT in dynamic mode. Please disable the static mode to enter dynamic mode with the "
                "following API: paddle.disable_static().".format(
                    self.dygraph_function))

        # 2. trace ops from dygraph layers and cache the generated program.
        args, kwargs = self._function_spec.unified_args_and_kwargs(
            args, kwargs)

        try:
            concrete_program, partial_program_layer = self.get_concrete_program(
                *args, **kwargs)

            # 3. synchronize self.training attribute.
            if isinstance(self._class_instance, layers.Layer):
                partial_program_layer.training = self._class_instance.training

            # 4. return outputs.
            try:
                return partial_program_layer(args)
            except Exception as e:
                if not hasattr(e, error.ERROR_DATA):
                    # runtime error
                    error.attach_error_data(e, in_runtime=True)
                    raise
        except Exception as e:
            error_data = getattr(e, error.ERROR_DATA, None)
            if error_data:
                error_data.raise_new_exception()
            else:
                logging_utils.warn(
                    "Please file an issue at 'https://github.com/PaddlePaddle/Paddle/issues'"
                    " if you can't handle this {} yourself.".format(type(e)))
                raise e
Exemplo n.º 4
0
def optimizer_setting(parameter_list=None):

    total_images = IMAGENET1000

    step = int(math.ceil(float(total_images) / batch_size))

    epochs = [30, 60, 90]
    bd = [step * e for e in epochs]

    lr = []
    lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
    if fluid.in_dygraph_mode():
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr),
            momentum=momentum_rate,
            regularization=fluid.regularizer.L2Decay(l2_decay),
            parameter_list=parameter_list)
    else:
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr),
            momentum=momentum_rate,
            regularization=fluid.regularizer.L2Decay(l2_decay))
        

    return optimizer
Exemplo n.º 5
0
def optimizer_setting(params, parameter_list=None):
    ls = params["learning_strategy"]
    if "total_images" not in params:
        total_images = 6149
    else:
        total_images = params["total_images"]

    batch_size = ls["batch_size"]
    step = int(math.ceil(float(total_images) / batch_size))
    bd = [step * e for e in ls["epochs"]]
    lr = params["lr"]
    num_epochs = params["num_epochs"]
    if fluid.in_dygraph_mode():
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.cosine_decay(learning_rate=lr,
                                                    step_each_epoch=step,
                                                    epochs=num_epochs),
            momentum=momentum_rate,
            regularization=fluid.regularizer.L2Decay(l2_decay),
            parameter_list=parameter_list)
    else:
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.cosine_decay(learning_rate=lr,
                                                    step_each_epoch=step,
                                                    epochs=num_epochs),
            momentum=momentum_rate,
            regularization=fluid.regularizer.L2Decay(l2_decay))

    return optimizer
Exemplo n.º 6
0
def optimizer_setting(params, parameter_list=None):
    ls = params["learning_strategy"]
    if ls["name"] == "piecewise_decay":
        if "total_images" not in params:
            total_images = 1281167
        else:
            total_images = params["total_images"]
        batch_size = ls["batch_size"]
        step = int(total_images / batch_size + 1)

        bd = [step * e for e in ls["epochs"]]
        base_lr = params["lr"]
        lr = []
        lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
        if fluid.in_dygraph_mode():
            optimizer = fluid.optimizer.SGD(learning_rate=0.01,
                                            parameter_list=parameter_list)
        else:
            optimizer = fluid.optimizer.SGD(learning_rate=0.01)
        # TODO(minqiyang): Add learning rate scheduler support to dygraph mode
        #  optimizer = fluid.optimizer.Momentum(
        #  learning_rate=params["lr"],
        #  learning_rate=fluid.layers.piecewise_decay(
        #  boundaries=bd, values=lr),
        #  momentum=0.9,
        #  regularization=fluid.regularizer.L2Decay(1e-4))

    return optimizer
Exemplo n.º 7
0
 def check_type(op_str, x, y, binary_op):
     op = getattr(paddle, op_str)
     error_type = TypeError
     if isinstance(x, np.ndarray):
         x = paddle.to_tensor(x)
         y = paddle.to_tensor(y)
         error_type = BaseException
     if binary_op:
         if type_str_map['x'] != 'bool' or type_str_map['y'] != 'bool':
             unit_test.assertRaises(error_type, op, x=x, y=y)
         if not fluid.in_dygraph_mode():
             unit_test.assertRaises(error_type, op, x=x, y=y, out=1)
     else:
         if type_str_map['x'] != 'bool':
             unit_test.assertRaises(error_type, op, x=x)
         if not fluid.in_dygraph_mode():
             unit_test.assertRaises(error_type, op, x=x, out=1)
Exemplo n.º 8
0
    def __call__(self, *args, **kwargs):
        """
        Supports to call the returned instance with input `args` and `kwargs` directly.

        Args:
            *args(tuple): tuple of all input arguments from original decorated function.
            **kwargs(dict): dict of all input keyward arguments from original decorated function. 

        Return:
            Outputs of decorated function.
        """

        # 1. call dygraph function directly if not enable `declarative`
        if not self._program_trans.enable_declarative:
            logging_utils.warn(
                "The decorator '@paddle.jit.to_static' does NOT work when setting ProgramTranslator.enable=False. "
                "We will just return dygraph output.")
            return self._call_dygraph_function(*args, **kwargs)

        if not in_dygraph_mode() and self._program_trans.enable_declarative:
            raise RuntimeError(
                "Failed to run the callable object {} decorated by '@paddle.jit.to_static', "
                "because it does NOT in dynamic mode. Please disable the static mode to enter dynamic mode with the "
                "following API: paddle.disable_static().".format(
                    self.dygraph_function))

        # 2. trace ops from dygraph layers and cache the generated program.
        args, kwargs = self._function_spec.unified_args_and_kwargs(args, kwargs)
        try:
            concrete_program, partial_program_layer = self.get_concrete_program(
                *args, **kwargs)

            # 3. synchronize self.training attribute.
            if isinstance(self._class_instance, layers.Layer):
                partial_program_layer.training = self._class_instance.training

            # 4. return outputs.
            return partial_program_layer(args)
        except Exception as e:
            if not hasattr(e, ERROR_DATA):
                # runtime error
                attach_error_data(e, in_runtime=True)
            error_data = getattr(e, ERROR_DATA, None)
            if error_data:
                new_exception = error_data.create_exception()
                if six.PY3:
                    # NOTE(liym27):
                    # 1. Why `raise new_exception from None`?
                    #   In Python 3, by default, an new exception is raised with trace information of the caught exception.
                    #   This only raises new_exception and hides unwanted implementation details from tracebacks of the
                    #   caught exception.
                    # 2. Use exec to bypass syntax error checking in Python 2.

                    six.exec_("raise new_exception from None")
                else:
                    raise new_exception
            else:
                raise
Exemplo n.º 9
0
def calc_gradients(outputs, inputs, no_grad_set):
    if fluid.in_dygraph_mode():
        return fluid.dygraph.grad(outputs=outputs,
                                  inputs=inputs,
                                  no_grad_vars=no_grad_set,
                                  create_graph=True)
    else:
        return fluid.gradients(targets=outputs,
                               inputs=inputs,
                               no_grad_set=no_grad_set)
 def forward(self, input):
     if fluid.in_dygraph_mode():
         out, _, _ = fluid.core.ops.instance_norm(
             input, self.scale, self.bias, 'epsilon', self.epsilon)
         return out
     else:
         return fluid.layers.instance_norm(
             input,
             epsilon=self.epsilon,
             param_attr=fluid.ParamAttr(self.scale.name),
             bias_attr=fluid.ParamAttr(self.bias.name))
Exemplo n.º 11
0
def build_optimizer(layer, cfg, loss=None):
    learning_rate = 1e-3
    beta1 = 0.5
    beta2 = 0.999
    if fluid.in_dygraph_mode():
        return fluid.optimizer.Adam(learning_rate=learning_rate,
                                    beta1=beta1,
                                    beta2=beta2,
                                    parameter_list=layer.parameters())
    else:
        optimizer = fluid.optimizer.Adam(learning_rate=learning_rate,
                                         beta1=beta1,
                                         beta2=beta2)

        optimizer.minimize(loss, parameter_list=layer.parameters())
        return optimizer
Exemplo n.º 12
0
    def __init__(self):
        super(Model, self).__init__(self.__class__.__name__)
        self.mode = 'train'
        self._inputs = None
        self._labels = None
        self._loss_function = None
        self._loss_weights = None
        self._optimizer = None
        self._device = None
        self._optimizer = None
        self._test_dataloader = None

        # init backend
        if fluid.in_dygraph_mode():
            self._adapter = DynamicGraphAdapter(self)
        else:
            self._adapter = StaticGraphAdapter(self)
Exemplo n.º 13
0
    def start(self, places=None):
        """start Pyreader"""
        if places is None:
            places = F.cuda_places() if F.core.is_compiled_with_cuda(
            ) else F.cpu_places()
        #assert self.pyreader is not None, 'use Dataset.features to build net first, then start dataset'
        def _gen():
            try:
                for idx, i in enumerate(self.generator()):
                    yield i
            except Exception as e:
                log.exception(e)
                raise e

        r = F.io.PyReader(feed_list=self.placeholders(),
                          capacity=50,
                          iterable=True,
                          return_list=F.in_dygraph_mode())
        r.decorate_batch_generator(_gen, places=places)
        return r()
def optimizer_setting(params, parameter_list=None):
    ls = params["learning_strategy"]
    if ls["name"] == "piecewise_decay":
        if "total_images" not in params:
            total_images = 6149
        else:
            total_images = params["total_images"]
        # TODO(Yancey1989): using lr decay if it is ready.
        #batch_size = ls["batch_size"]
        #step = int(total_images / batch_size + 1)

        #bd = [step * e for e in ls["epochs"]]
        #base_lr = params["lr"]
        #lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
        if fluid.in_dygraph_mode():
            optimizer = fluid.optimizer.SGD(learning_rate=0.01,
                                            parameter_list=parameter_list)
        else:
            optimizer = fluid.optimizer.SGD(learning_rate=0.01)

    return optimizer
Exemplo n.º 15
0
def optimizer_setting(params, parameter_list=None):
    ls = params["learning_strategy"]
    if ls["name"] == "piecewise_decay":
        if "total_images" not in params:
            total_images = 1281167
        else:
            total_images = params["total_images"]
        batch_size = ls["batch_size"]
        step = int(total_images / batch_size + 1)

        bd = [step * e for e in ls["epochs"]]
        base_lr = params["lr"]
        lr = []
        lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
        if fluid.in_dygraph_mode():
            optimizer = fluid.optimizer.SGD(learning_rate=0.01,
                                            parameter_list=parameter_list)
        else:
            optimizer = fluid.optimizer.SGD(learning_rate=0.01)

    return optimizer
Exemplo n.º 16
0
def prepare_distributed_context(place=None):
    if place is None:
        place = fluid.CUDAPlace(ParallelEnv().dev_id) if ParallelEnv().nranks > 1 \
            else fluid.CUDAPlace(0)

    strategy = ParallelStrategy()
    strategy.nranks = ParallelEnv().nranks
    strategy.local_rank = ParallelEnv().local_rank
    strategy.trainer_endpoints = ParallelEnv().trainer_endpoints
    strategy.current_endpoint = ParallelEnv().current_endpoint

    if strategy.nranks < 2:
        return

    global _parallel_context_initialized

    if not _parallel_context_initialized and isinstance(
            place, fluid.CUDAPlace):

        def _init_context():
            communicator_prog = fluid.Program()
            init_communicator(communicator_prog, strategy.local_rank,
                              strategy.nranks, True, strategy.current_endpoint,
                              strategy.trainer_endpoints)
            exe = fluid.Executor(place)
            exe.run(communicator_prog)

        if fluid.in_dygraph_mode():
            fluid.disable_dygraph()
            _init_context()
            fluid.enable_dygraph(place)
        else:
            _init_context()

    else:
        assert ("Only support CUDAPlace for now.")

    _parallel_context_initialized = True
    return strategy
Exemplo n.º 17
0
    def func_hook_in_double_grad(self):
        def double_print_hook(grad):
            grad = grad * 2
            print(grad)
            return grad

        x = paddle.ones(shape=[1], dtype='float32')
        x.stop_gradient = False

        # hook only works in backward
        # for forward var x, the x.grad generated in
        # paddle.grad will not deal with by hook
        x.register_hook(double_print_hook)

        y = x * x
        # Since y = x * x, dx = 2 * x
        dx = paddle.grad(outputs=[y],
                         inputs=[x],
                         create_graph=True,
                         retain_graph=True)[0]

        z = y + dx
        self.assertTrue(x.grad is None)

        # If create_graph = True, the gradient of dx
        # would be backpropagated. Therefore,
        # z = x * x + dx = x * x + 2 * x, and
        # x.gradient() = 2 * x + 2 = 4.0
        # after changed by hook: 8.0

        # TODO(wuweilong): enable this case when DoubleGrad in eager mode is ready
        if fluid.in_dygraph_mode():
            pass
        else:
            z.backward()
            self.assertTrue(np.array_equal(x.grad.numpy(), np.array([8.])))
Exemplo n.º 18
0
    def forward(self,
                inputs,
                initial_states=None,
                sequence_length=None,
                **kwargs):
        if fluid.in_dygraph_mode():

            class OutputArray(object):
                def __init__(self, x):
                    self.array = [x]

                def append(self, x):
                    self.array.append(x)

            def _maybe_copy(state, new_state, step_mask):
                # TODO: use where_op
                new_state = fluid.layers.elementwise_mul(
                    new_state, step_mask,
                    axis=0) - fluid.layers.elementwise_mul(state,
                                                           (step_mask - 1),
                                                           axis=0)
                return new_state

            flat_inputs = flatten(inputs)
            batch_size, time_steps = (
                flat_inputs[0].shape[self.batch_index],
                flat_inputs[0].shape[self.time_step_index])

            if initial_states is None:
                initial_states = self.cell.get_initial_states(
                    batch_ref=inputs, batch_dim_idx=self.batch_index)

            if not self.time_major:
                inputs = map_structure(
                    lambda x: fluid.layers.transpose(x, [1, 0] + list(
                        range(2, len(x.shape)))), inputs)

            if sequence_length is not None:
                mask = fluid.layers.sequence_mask(
                    sequence_length,
                    maxlen=time_steps,
                    dtype=flatten(initial_states)[0].dtype)
                mask = fluid.layers.transpose(mask, [1, 0])

            if self.is_reverse:
                inputs = map_structure(
                    lambda x: fluid.layers.reverse(x, axis=[0]), inputs)
                mask = fluid.layers.reverse(
                    mask, axis=[0]) if sequence_length is not None else None

            states = initial_states
            outputs = []
            for i in range(time_steps):
                step_inputs = map_structure(lambda x: x[i], inputs)
                step_outputs, new_states = self.cell(step_inputs, states,
                                                     **kwargs)
                if sequence_length is not None:
                    new_states = map_structure(
                        partial(_maybe_copy, step_mask=mask[i]), states,
                        new_states)
                states = new_states
                if i == 0:
                    outputs = map_structure(lambda x: OutputArray(x),
                                            step_outputs)
                else:
                    map_structure(lambda x, x_array: x_array.append(x),
                                  step_outputs, outputs)

            final_outputs = map_structure(
                lambda x: fluid.layers.stack(x.array,
                                             axis=self.time_step_index),
                outputs)

            if self.is_reverse:
                final_outputs = map_structure(
                    lambda x: fluid.layers.reverse(x,
                                                   axis=self.time_step_index),
                    final_outputs)

            final_states = new_states
        else:
            final_outputs, final_states = fluid.layers.rnn(
                self.cell,
                inputs,
                initial_states=initial_states,
                sequence_length=sequence_length,
                time_major=self.time_major,
                is_reverse=self.is_reverse,
                **kwargs)
        return final_outputs, final_states
Exemplo n.º 19
0
    def forward(self,
                inputs,
                initial_states=None,
                sequence_length=None,
                **kwargs):
        if F.in_dygraph_mode():

            class OutputArray(object):
                def __init__(self, x):
                    self.array = [x]

                def append(self, x):
                    self.array.append(x)

            def _maybe_copy(state, new_state, step_mask):
                # TODO: use where_op
                new_state = L.elementwise_mul(new_state, step_mask, axis=0) - \
                        L.elementwise_mul(state, (step_mask - 1), axis=0)
                return new_state

            #logging.info("inputs shape: {}".format(inputs.shape))
            flat_inputs = U.flatten(inputs)
            #logging.info("flat inputs len: {}".format(len(flat_inputs)))
            #logging.info("flat inputs[0] shape: {}".format(flat_inputs[0].shape))

            batch_size, time_steps = (
                flat_inputs[0].shape[self.batch_index],
                flat_inputs[0].shape[self.time_step_index])
            #logging.info("batch_size: {}".format(batch_size))
            #logging.info("time_steps: {}".format(time_steps))

            if initial_states is None:
                initial_states = self.cell.get_initial_states(
                    batch_ref=inputs, batch_dim_idx=self.batch_index)

            if not self.time_major:
                # 如果第一维不是时间步 则第一维和第二维交换
                # 第一维为时间步
                inputs = U.map_structure(
                    lambda x: L.transpose(x, [1, 0] + list(
                        range(2, len(x.shape)))), inputs)

            if sequence_length is not None:
                mask = L.sequence_mask(
                    sequence_length,
                    maxlen=time_steps,
                    dtype=U.flatten(initial_states)[0].dtype)
                # 同样 第一维为时间步
                mask = L.transpose(mask, [1, 0])

            if self.is_reverse:
                # 如果反向
                # 则第一维反向
                inputs = U.map_structure(lambda x: L.reverse(x, axis=[0]), inputs)
                mask = L.reverse(mask, axis=[0]) if sequence_length is not None else None

            states = initial_states
            outputs = []
            # 遍历时间步
            for i in range(time_steps):
                # 取该时间步的输入
                step_inputs = U.map_structure(lambda x: x[i], inputs)
                # 输入当前输入和状态
                # 得到输出和新状态
                step_outputs, new_states = self.cell(step_inputs, states, **kwargs)
                if sequence_length is not None:
                    # 如果有mask 则被mask的地方 用原state的数
                    # _maybe_copy: 未mask的部分用new_states, 被mask的部分用states
                    new_states = U.map_structure(
                        partial(_maybe_copy, step_mask=mask[i]),
                        states,
                        new_states)
                states = new_states
                #logging.info("step_output shape: {}".format(step_outputs.shape))

                if i == 0:
                    # 初始时,各输出
                    outputs = U.map_structure(lambda x: OutputArray(x), step_outputs)
                else:
                    # 各输出加入对应list中
                    U.map_structure(lambda x, x_array: x_array.append(x), step_outputs, outputs)

            # 最后按时间步的维度堆叠
            final_outputs = U.map_structure(
                lambda x: L.stack(x.array, axis=self.time_step_index),
                outputs)
            #logging.info("final_outputs shape: {}".format(final_outputs.shape))

            if self.is_reverse:
                # 如果是反向 则最后结果也反向一下
                final_outputs = U.map_structure(
                    lambda x: L.reverse(x, axis=self.time_step_index),
                    final_outputs)

            final_states = new_states

        else:
            final_outputs, final_states = L.rnn(
                self.cell,
                inputs,
                initial_states=initial_states,
                sequence_length=sequence_length,
                time_major=self.time_major,
                is_reverse=self.is_reverse,
                **kwargs)

        return final_outputs, final_states
Exemplo n.º 20
0
def do_predict(args):
    device = set_device("gpu" if args.use_gpu else "cpu")
    fluid.enable_dygraph(device) if args.eager_run else None

    # define model
    inputs = [
        Input(
            [None, None], "int64", name="src_word"),
        Input(
            [None], "int64", name="src_length"),
    ]

    # def dataloader
    dataset = Seq2SeqDataset(
        fpattern=args.infer_file,
        src_vocab_fpath=args.vocab_prefix + "." + args.src_lang,
        trg_vocab_fpath=args.vocab_prefix + "." + args.tar_lang,
        token_delimiter=None,
        start_mark="<s>",
        end_mark="</s>",
        unk_mark="<unk>")
    trg_idx2word = Seq2SeqDataset.load_dict(
        dict_path=args.vocab_prefix + "." + args.tar_lang, reverse=True)
    (args.src_vocab_size, args.trg_vocab_size, bos_id, eos_id,
     unk_id) = dataset.get_vocab_summary()
    batch_sampler = Seq2SeqBatchSampler(
        dataset=dataset, use_token_batch=False, batch_size=args.batch_size)
    data_loader = DataLoader(
        dataset=dataset,
        batch_sampler=batch_sampler,
        places=device,
        feed_list=None
        if fluid.in_dygraph_mode() else [x.forward() for x in inputs],
        collate_fn=partial(
            prepare_infer_input, bos_id=bos_id, eos_id=eos_id, pad_id=eos_id),
        num_workers=0,
        return_list=True)

    model_maker = AttentionInferModel if args.attention else BaseInferModel
    model = model_maker(
        args.src_vocab_size,
        args.tar_vocab_size,
        args.hidden_size,
        args.hidden_size,
        args.num_layers,
        args.dropout,
        bos_id=bos_id,
        eos_id=eos_id,
        beam_size=args.beam_size,
        max_out_len=256)

    model.prepare(inputs=inputs)

    # load the trained model
    assert args.reload_model, (
        "Please set reload_model to load the infer model.")
    model.load(args.reload_model)

    # TODO(guosheng): use model.predict when support variant length
    with io.open(args.infer_output_file, 'w', encoding='utf-8') as f:
        for data in data_loader():
            finished_seq = model.test_batch(inputs=flatten(data))[0]
            finished_seq = finished_seq[:, :, np.newaxis] if len(
                finished_seq.shape) == 2 else finished_seq
            finished_seq = np.transpose(finished_seq, [0, 2, 1])
            for ins in finished_seq:
                for beam_idx, beam in enumerate(ins):
                    id_list = post_process_seq(beam, bos_id, eos_id)
                    word_list = [trg_idx2word[id] for id in id_list]
                    sequence = " ".join(word_list) + "\n"
                    f.write(sequence)
                    break
Exemplo n.º 21
0
 def get_tracer_mode(self):
     assert fluid.in_dygraph_mode(), "Dygraph mode must be enabled"
Exemplo n.º 22
0
    def evaluate(
        self,
        eval_data,
        batch_size=1,
        log_freq=10,
        verbose=2,
        num_workers=0,
        callbacks=None,
    ):
        """
        FIXME: add more comments and usage
        Args:
            eval_data (Dataset|DataLoader): An iterable data loader is used for
                evaluation. An instance of paddle.fluid.io.Dataset or 
                paddle.fluid.io.Dataloader is recomended.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
                When train_data and eval_data are both the instance of Dataloader, this 
                parameter will be ignored.
            log_freq (int): The frequency, in number of steps, the eval logs
                are printed.
            verbose (int): The verbosity mode, should be 0, 1, or 2.
                0 = silent, 1 = progress bar, 2 = one line per epoch.
            num_workers (int): The number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
                both the instance of Dataloader, this parameter will be ignored.
            callbacks (Callback|None): A list of `Callback` instances to apply
                during training. If None, `ProgBarLogger` and `ModelCheckpoint`
                are automatically inserted.
        """

        if fluid.in_dygraph_mode():
            feed_list = None
        else:
            feed_list = [x.forward() for x in self._inputs + self._labels]

        if eval_data is not None and isinstance(eval_data, Dataset):
            eval_sampler = DistributedBatchSampler(eval_data,
                                                   batch_size=batch_size)
            eval_loader = DataLoader(eval_data,
                                     batch_sampler=eval_sampler,
                                     places=self._place,
                                     feed_list=feed_list,
                                     num_workers=num_workers,
                                     return_list=True)
        else:
            eval_loader = eval_data

        self._test_dataloader = eval_loader
        metrics_name = self._metrics_name()

        cbks = config_callbacks(
            callbacks,
            model=self,
            log_freq=log_freq,
            verbose=verbose,
            metrics=self._metrics_name(),
        )

        loader = eval_loader
        if not isinstance(eval_loader, Iterable):
            loader = eval_loader()

        eval_steps = len(loader) if hasattr(loader, '__len__') else None
        cbks.on_begin('eval', {
            'steps': eval_steps,
            'metrics_name': metrics_name
        })

        logs = self._run_one_epoch(loader, cbks, 'eval', metrics_name)

        cbks.on_end('eval', logs)

        self._test_dataloader = None

        eval_result = {}
        for k in self._metrics_name():
            eval_result[k] = logs[k]

        return eval_result
Exemplo n.º 23
0
    def prepare(self,
                optimizer=None,
                loss_function=None,
                metrics=None,
                inputs=None,
                labels=None,
                device=None):
        """
        FIXME: add comments
        Args:
            optimizer (Optimizer|None): optimizer must be set in training
                and should be a Optimizer instance. It can be None in eval
                and test mode.
            loss_function (Loss|None): loss function must be set in training
                and should be a Loss instance. It can be None when there is
                no loss.
            metrics (Metric|list of Metric|None): if metrics is set, all
                metric will be calculate and output in train/eval mode.
            inputs (Input|list|dict|None): inputs, entry points of network,
                could be a Input layer, or lits of Input layers,
                or dict (name: Input), or None. For static graph,
                inputs must be set. For dynamic graph, it could be None.
            labels (Input|list|None): labels, entry points of network,
                could be a Input layer or lits of Input layers, or None.
                For static graph, if set loss_function in Model.prepare(), it
                must be set. Otherwise, it could be None.
            device (str|None): specify device type, 'CPU' or 'GPU'.
                If None, automatically select device according to
                installation package version.
        """

        if isinstance(device, fluid.CUDAPlace) or \
            (isinstance(device, six.string_types) and device.lower() == 'gpu') \
            or (device is None and fluid.is_compiled_with_cuda()):
            if isinstance(device, fluid.CUDAPlace):
                self._place = device
            else:
                self._place = fluid.CUDAPlace(ParallelEnv().dev_id) \
                    if ParallelEnv().nranks > 1 else fluid.CUDAPlace(0)

            global _parallel_context_initialized
            if ParallelEnv().nranks > 1 and not _parallel_context_initialized:
                if fluid.in_dygraph_mode():
                    fluid.disable_dygraph()
                    fluid.enable_dygraph(self._place)
                    fluid.dygraph.parallel.prepare_context()
                else:
                    prepare_distributed_context(self._place)

                _parallel_context_initialized = True
        elif isinstance(device, fluid.CPUPlace):
            self._place = device
        elif (isinstance(device, six.string_types) and device.lower() == 'cpu') \
            or (device is None):
            self._place = fluid.CPUPlace()
        else:
            raise ValueError(
                "Expected device in ('gpu', 'cpu', fluid.CUDAPlace, fluid.CPUPlace, None), \
                but got {}".format(device))

        self._optimizer = optimizer
        if loss_function:
            if not isinstance(loss_function, Loss):
                raise TypeError(
                    "'loss_function' must be sub classes of 'Loss'")
        self._loss_function = loss_function
        if not in_dygraph_mode():
            if not isinstance(inputs, (list, dict, Input)):
                raise TypeError(
                    "'inputs' must be list or dict in static graph mode")
            if loss_function and not isinstance(labels, (list, Input)):
                raise TypeError("'labels' must be list in static graph mode")

        metrics = metrics or []
        for metric in to_list(metrics):
            assert isinstance(metric, Metric), \
                "{} is not sub class of Metric".format(
                    metric.__class__.__name__)
        self._metrics = to_list(metrics)

        self._inputs = to_list(inputs) if not isinstance(inputs, dict) else [
            inputs[n] for n in extract_args(self.forward) if n != 'self'
        ]
        self._labels = to_list(labels)

        if not in_dygraph_mode():
            self._adapter.prepare()
Exemplo n.º 24
0
 def train(self):
     if F.in_dygraph_mode():
         super(ErnieModel, self).train()
     self.training = True
     for l in self.sublayers():
         l.training = True
Exemplo n.º 25
0
 def eval(self):
     if F.in_dygraph_mode():
         super(ErnieModel, self).eval()
     self.training = False
     for l in self.sublayers():
         l.training = False
Exemplo n.º 26
0
 def __impl__(*args, **kwargs):
     if fluid.in_dygraph_mode():
         return func(*args, **kwargs)
     else:
         with fluid.dygraph.guard():
             return func(*args, **kwargs)
Exemplo n.º 27
0
 def test_func2(self):
     # After test_func1 executed, if fluid.dygraph.guard() in test_func1 safely exited,
     # fluid.in_dygraph_mode() should be false.
     self.assertEqual(fluid.in_dygraph_mode(), False)
Exemplo n.º 28
0
    def fit(
        self,
        train_data=None,
        eval_data=None,
        batch_size=1,
        epochs=1,
        eval_freq=1,
        log_freq=10,
        save_dir=None,
        save_freq=1,
        verbose=2,
        drop_last=False,
        shuffle=True,
        num_workers=0,
        callbacks=None,
    ):
        """
        FIXME: add more comments and usage
        Args:
            train_data (Dataset|DataLoader): An iterable data loader is used for 
                train. An instance of paddle.fluid.io.Dataset or 
                paddle.fluid.io.Dataloader is recomended.
            eval_data (Dataset|DataLoader): An iterable data loader is used for
                evaluation at the end of epoch. If None, will not do evaluation. 
                An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader 
                is recomended.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
                When train_data and eval_data are both the instance of Dataloader, this 
                parameter will be ignored.
            epochs (int): Integer number. The number of epochs to train the model.
            eval_freq (int): The frequency, in number of epochs, an evalutation
                is performed.
            log_freq (int): The frequency, in number of steps, the training logs
                are printed.
            save_dir(str|None): The directory to save checkpoint during training.
                If None, will not save checkpoint.
            save_freq (int): The frequency, in number of epochs, to save checkpoint.
            verbose (int): The verbosity mode, should be 0, 1, or 2.
                0 = silent, 1 = progress bar, 2 = one line per epoch.
            drop_last (bool): whether drop the last incomplete batch of train_data 
                when dataset size is not divisible by the batch size. When train_data 
                is an instance of Dataloader, this parameter will be ignored.
            shuffle (bool): whther to shuffle train_data. When train_data is an instance 
                of Dataloader, this parameter will be ignored.
            num_workers (int): the number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
                both the instance of Dataloader, this parameter will be ignored.
            callbacks (Callback|None): A list of `Callback` instances to apply
                during training. If None, `ProgBarLogger` and `ModelCheckpoint`
                are automatically inserted.
        """

        assert train_data is not None, \
                "train_data must be given!"

        if fluid.in_dygraph_mode():
            feed_list = None
        else:
            feed_list = [x.forward() for x in self._inputs + self._labels]

        if isinstance(train_data, Dataset):
            train_sampler = DistributedBatchSampler(train_data,
                                                    batch_size=batch_size,
                                                    shuffle=shuffle,
                                                    drop_last=drop_last)
            train_loader = DataLoader(train_data,
                                      batch_sampler=train_sampler,
                                      places=self._place,
                                      feed_list=feed_list,
                                      num_workers=num_workers,
                                      return_list=True)
        else:
            train_loader = train_data

        if eval_data is not None and isinstance(eval_data, Dataset):
            eval_sampler = DistributedBatchSampler(eval_data,
                                                   batch_size=batch_size)
            eval_loader = DataLoader(eval_data,
                                     batch_sampler=eval_sampler,
                                     places=self._place,
                                     feed_list=feed_list,
                                     num_workers=num_workers,
                                     return_list=True)
        elif eval_data is not None:
            eval_loader = eval_data
        else:
            eval_loader = None

        do_eval = eval_loader is not None
        self._test_dataloader = eval_loader
        metrics_name = self._metrics_name()
        steps = len(train_loader) if hasattr(train_loader, '__len__') else None
        cbks = config_callbacks(
            callbacks,
            model=self,
            epochs=epochs,
            steps=steps,
            log_freq=log_freq,
            save_freq=save_freq,
            save_dir=save_dir,
            verbose=verbose,
            metrics=self._metrics_name(),
        )

        cbks.on_begin('train')
        for epoch in range(epochs):

            # FIXME: adapt to DataLoader
            loader = train_loader
            if not isinstance(train_loader, Iterable):
                loader = train_loader()
            logs = self._run_one_epoch(loader,
                                       cbks,
                                       'train',
                                       metrics_name,
                                       epoch=epoch)

            if do_eval and epoch % eval_freq == 0:
                # FIXME: adapt to DataLoader
                loader = eval_loader
                if not isinstance(eval_loader, Iterable):
                    loader = eval_loader()

                eval_steps = len(loader) if hasattr(loader,
                                                    '__len__') else None
                cbks.on_begin('eval', {
                    'steps': eval_steps,
                    'metrics_name': metrics_name
                })

                logs = self._run_one_epoch(loader, cbks, 'eval', metrics_name)

                cbks.on_end('eval', logs)

        cbks.on_end('train', logs)
        self._test_dataloader = None