Exemplo n.º 1
0
    def __init__(self,
                 name=None,
                 module_dir=None,
                 signatures=None,
                 module_info=None,
                 assets=None,
                 processor=None,
                 extra_info=None,
                 version=None):
        self.desc = module_desc_pb2.ModuleDesc()
        self.program = None
        self.assets = []
        self.helper = None
        self.signatures = {}
        self.default_signature = None
        self.module_info = None
        self.processor = None
        self.extra_info = {} if extra_info is None else extra_info
        if not isinstance(self.extra_info, dict):
            raise TypeError(
                "The extra_info should be an instance of python dict")

        # cache data
        self.last_call_name = None
        self.cache_feed_dict = None
        self.cache_fetch_dict = None
        self.cache_program = None

        fp_lock = open(os.path.join(CONF_HOME, 'config.json'))
        lock.flock(fp_lock, lock.LOCK_EX)
        if name:
            self._init_with_name(name=name, version=version)
            lock.flock(fp_lock, lock.LOCK_UN)
        elif module_dir:
            self._init_with_module_file(module_dir=module_dir[0])
            lock.flock(fp_lock, lock.LOCK_UN)
            name = module_dir[0].split("/")[-1]
            version = module_dir[1]
        elif signatures:
            if processor:
                if not issubclass(processor, BaseProcessor):
                    raise TypeError(
                        "Processor shoule be an instance of paddlehub.BaseProcessor"
                    )
            if assets:
                self.assets = utils.to_list(assets)
                # for asset in assets:
                #     utils.check_path(assets)
            self.processor = processor
            self._generate_module_info(module_info)
            self._init_with_signature(signatures=signatures)
            lock.flock(fp_lock, lock.LOCK_UN)
        else:
            lock.flock(fp_lock, lock.LOCK_UN)
            raise ValueError("Module initialized parameter is empty")
        CacheUpdater(name, version).start()
Exemplo n.º 2
0
    def __init__(self,
                 name,
                 inputs,
                 outputs,
                 feed_names=None,
                 fetch_names=None,
                 for_predict=False):
        inputs = to_list(inputs)
        outputs = to_list(outputs)

        if not feed_names:
            feed_names = [""] * len(inputs)
        feed_names = to_list(feed_names)
        if len(inputs) != len(feed_names):
            raise ValueError(
                "The length of feed_names must be same with inputs")

        if not fetch_names:
            fetch_names = [""] * len(outputs)
        fetch_names = to_list(fetch_names)
        if len(outputs) != len(fetch_names):
            raise ValueError(
                "the length of fetch_names must be same with outputs")

        self.name = name
        for item in inputs:
            if not isinstance(item, Variable):
                raise TypeError(
                    "Item in inputs list shoule be an instance of fluid.framework.Variable"
                )

        for item in outputs:
            if not isinstance(item, Variable):
                raise TypeError(
                    "Item in outputs list shoule be an instance of fluid.framework.Variable"
                )

        self.inputs = inputs
        self.outputs = outputs
        self.feed_names = feed_names
        self.fetch_names = fetch_names
        self.for_predict = for_predict
Exemplo n.º 3
0
def create_module(sign_arr,
                  module_dir,
                  processor=None,
                  assets=None,
                  module_info=None,
                  exe=None,
                  extra_info=None):
    sign_arr = utils.to_list(sign_arr)
    module = Module(signatures=sign_arr,
                    processor=processor,
                    assets=assets,
                    module_info=module_info,
                    extra_info=extra_info)
    module.serialize_to_path(path=module_dir, exe=exe)
Exemplo n.º 4
0
    def __init__(self,
                 name=None,
                 module_dir=None,
                 signatures=None,
                 module_info=None,
                 assets=None,
                 processor=None,
                 extra_info=None,
                 version=None):
        self.desc = module_desc_pb2.ModuleDesc()
        self.program = None
        self.assets = []
        self.helper = None
        self.signatures = {}
        self.default_signature = None
        self.module_info = None
        self.processor = None
        self.extra_info = {} if extra_info is None else extra_info
        if not isinstance(self.extra_info, dict):
            raise TypeError(
                "The extra_info should be an instance of python dict")

        # cache data
        self.last_call_name = None
        self.cache_feed_dict = None
        self.cache_fetch_dict = None
        self.cache_program = None

        # TODO(wuzewu): print more module loading info log
        if name:
            self._init_with_name(name=name, version=version)
        elif module_dir:
            self._init_with_module_file(module_dir=module_dir[0])
        elif signatures:
            if processor:
                if not issubclass(processor, BaseProcessor):
                    raise TypeError(
                        "Processor shoule be an instance of paddlehub.BaseProcessor"
                    )
            if assets:
                self.assets = utils.to_list(assets)
                # for asset in assets:
                #     utils.check_path(assets)
            self.processor = processor
            self._generate_module_info(module_info)
            self._init_with_signature(signatures=signatures)
        else:
            raise ValueError("Module initialized parameter is empty")
Exemplo n.º 5
0
    def _build_env(self):
        if self.env.is_inititalized:
            return

        self._build_env_start_event()
        self.env.is_inititalized = True
        self.env.main_program = clone_program(self._base_main_program,
                                              for_test=False)

        self.env.startup_program = fluid.Program()
        with fluid.program_guard(self.env.main_program,
                                 self._base_startup_program):
            with fluid.unique_name.guard(self.env.UNG):
                self.env.outputs = self._build_net()
                if self.is_train_phase or self.is_test_phase:
                    self.env.labels = self._add_label()
                    self.env.loss = self._add_loss()
                    self.env.metrics = self._add_metrics()

        if self.is_predict_phase or self.is_test_phase:
            self.env.main_program = clone_program(self.env.main_program,
                                                  for_test=True)
            hub.common.paddle_helper.set_op_attr(self.env.main_program,
                                                 is_test=True)

        if self.config.use_pyreader:
            t_program = fluid.Program()
            with fluid.program_guard(t_program, self.env.startup_program):
                self.env.py_reader = fluid.layers.py_reader(
                    capacity=64,
                    shapes=[var.shape for var in self.feed_var_list],
                    dtypes=[
                        dtype_map[var.dtype] for var in self.feed_var_list
                    ],
                    lod_levels=[var.lod_level for var in self.feed_var_list],
                    use_double_buffer=False)

                feed_var_list = self.feed_var_list
                py_vars = fluid.layers.read_file(self.env.py_reader)
                py_vars = to_list(py_vars)
                input_dict = {
                    feed_var_list[index].name: py_var
                    for index, py_var in enumerate(py_vars)
                }

                hub.connect_program(pre_program=t_program,
                                    next_program=self.env.main_program,
                                    input_dict=input_dict,
                                    need_log=False)

            self.env.main_program = t_program
            if not self.is_predict_phase:
                self.env.loss = self.env.main_program.global_block().vars[
                    self.env.loss.name]
                metrics_name = [var.name for var in self.env.metrics]
                self.env.metrics = [
                    self.env.main_program.global_block().vars[name]
                    for name in metrics_name
                ]

            outputs_name = [var.name for var in self.env.outputs]
            self.env.outputs = [
                self.env.main_program.global_block().vars[name]
                for name in outputs_name
            ]

        if self.config.enable_memory_optim:
            for var_name in self.fetch_list:
                var = self.env.main_program.global_block().vars[var_name]
                var.persistable = True

        if self.is_train_phase:
            with fluid.program_guard(self.env.main_program,
                                     self._base_startup_program):
                with fluid.unique_name.guard(self.env.UNG):
                    self.config.strategy.execute(self.loss,
                                                 self._base_data_reader,
                                                 self.config)

        if self.is_train_phase:
            loss_name = self.env.loss.name
            share_vars_from = None
        else:
            loss_name = None

        if self._base_compiled_program is None:
            share_vars_from = None
        else:
            share_vars_from = self._base_compiled_program

        if not self.config.use_data_parallel:
            if self.config.enable_memory_optim:
                fluid.memory_optimize(self.env.main_program)
            self.env.main_program_compiled = None
        else:
            self.env.main_program_compiled = fluid.CompiledProgram(
                self.env.main_program).with_data_parallel(
                    loss_name=loss_name,
                    share_vars_from=share_vars_from,
                    build_strategy=self.build_strategy)

            if self._base_compiled_program is None:
                self._base_compiled_program = self.env.main_program_compiled

        self.exe.run(self.env.startup_program)
        self._build_env_end_event()
Exemplo n.º 6
0
    def _build_env(self):
        if self.env.is_inititalized:
            return

        self._build_env_start_event()
        self.env.is_inititalized = True
        self.env.main_program = clone_program(self.base_main_program,
                                              for_test=False)

        self.env.startup_program = fluid.Program()
        with fluid.program_guard(self.env.main_program,
                                 self._base_startup_program):
            with fluid.unique_name.guard(self.env.UNG):
                self.env.outputs = self._build_net()
                if self.is_train_phase or self.is_test_phase:
                    self.env.labels = self._add_label()
                    self.env.loss = self._add_loss()
                    self.env.metrics = self._add_metrics()

        if self.is_predict_phase or self.is_test_phase:
            # Todo: paddle.fluid.core_avx.EnforceNotMet: Getting 'tensor_desc' is not supported by the type of var kCUDNNFwdAlgoCache. at
            # self.env.main_program = clone_program(
            #     self.env.main_program, for_test=True)
            hub.common.paddle_helper.set_op_attr(self.env.main_program,
                                                 is_test=True)

        if self.config.use_pyreader:
            t_program = fluid.Program()
            with fluid.program_guard(t_program, self.env.startup_program):
                self.env.py_reader = fluid.layers.py_reader(
                    capacity=64,
                    shapes=[var.shape for var in self.feed_var_list],
                    dtypes=[
                        dtype_map[var.dtype] for var in self.feed_var_list
                    ],
                    lod_levels=[var.lod_level for var in self.feed_var_list],
                    use_double_buffer=False)

                feed_var_list = self.feed_var_list
                py_vars = fluid.layers.read_file(self.env.py_reader)
                py_vars = to_list(py_vars)
                input_dict = {
                    feed_var_list[index].name: py_var
                    for index, py_var in enumerate(py_vars)
                }

                hub.connect_program(pre_program=t_program,
                                    next_program=self.env.main_program,
                                    input_dict=input_dict,
                                    need_log=False)

            self.env.main_program = t_program
            if not self.is_predict_phase:
                self.env.loss = self.env.main_program.global_block().vars[
                    self.env.loss.name]
                metrics_name = [var.name for var in self.env.metrics]
                self.env.metrics = [
                    self.env.main_program.global_block().vars[name]
                    for name in metrics_name
                ]

            outputs_name = [var.name for var in self.env.outputs]
            self.env.outputs = [
                self.env.main_program.global_block().vars[name]
                for name in outputs_name
            ]

        if self.config.enable_memory_optim:
            for var_name in self.fetch_list:
                var = self.env.main_program.global_block().vars[var_name]
                var.persistable = True

        # to avoid to print logger two times in result of the logger usage of paddle-fluid 1.6
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        if self.is_train_phase:
            with fluid.program_guard(self.env.main_program,
                                     self._base_startup_program):
                with fluid.unique_name.guard(self.env.UNG):
                    self.scheduled_lr, self.max_train_steps = self.config.strategy.execute(
                        self.loss, self._base_data_reader, self.config,
                        self.device_count)

        if self.is_train_phase:
            loss_name = self.env.loss.name
        else:
            loss_name = None

        share_vars_from = self._base_compiled_program

        if not self.config.use_data_parallel:
            self.env.main_program_compiled = None
        else:
            self.env.main_program_compiled = fluid.CompiledProgram(
                self.env.main_program).with_data_parallel(
                    loss_name=loss_name,
                    share_vars_from=share_vars_from,
                    build_strategy=self.build_strategy)

        self.exe.run(self.env.startup_program)

        # to avoid to print logger two times in result of the logger usage of paddle-fluid 1.5
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        self._build_env_end_event()