def __new__(cls, name=None, directory=None, module_dir=None, version=None, **kwargs): if cls.__name__ == "Module": if name: module = cls.init_with_name( name=name, version=version, **kwargs) elif directory: module = cls.init_with_directory(directory=directory, **kwargs) elif module_dir: logger.warning( "Parameter module_dir is deprecated, please use directory to specify the path" ) if isinstance(module_dir, list) or isinstance( module_dir, tuple): directory = module_dir[0] version = module_dir[1] else: directory = module_dir module = cls.init_with_directory(directory=directory, **kwargs) CacheUpdater("update_cache", module.name, module.version).start() else: if not name and not directory: directory = os.path.dirname( os.path.abspath(sys.modules[cls.__module__].__file__)) module = Module.init_with_directory( directory=directory, **kwargs) else: module = object.__new__(cls) return module
def preinstall_modules(modules): configs = [] module_exist = {} if modules is not None: for module in modules: module_name = module if "==" not in module else \ module.split("==")[0] module_version = None if "==" not in module else \ module.split("==")[1] if module_exist.get(module_name, "") != "": print(module_name, "==", module_exist.get(module_name), " will be ignored cause new version is specified.") configs.pop() module_exist.update({module_name: module_version}) try: CacheUpdater("hub_serving_start", module=module_name, version=module_version).start() m = hub.Module(name=module_name, version=module_version) method_name = m.desc.attr.map.data['default_signature'].s if method_name == "": raise RuntimeError("{} cannot be use for " "predicting".format(module_name)) configs.append({ "module": module_name, "version": m.version, "category": str(m.type).split("/")[0].upper() }) except Exception as err: print(err, ", start PaddleHub Serving unsuccessfully.") exit(1) return configs
def execute(self, argv): if not argv: print("ERROR: Please specify a module name.\n") self.help() return False extra = {"command": "install"} if argv[0].endswith("tar.gz") or argv[0].endswith("phm"): result, tips, module_dir = default_module_manager.install_module( module_package=argv[0], extra=extra) elif os.path.exists(argv[0]) and os.path.isdir(argv[0]): result, tips, module_dir = default_module_manager.install_module( module_dir=argv[0], extra=extra) else: module_name = argv[0] module_version = None if "==" not in module_name else module_name.split( "==")[1] module_name = module_name if "==" not in module_name else module_name.split( "==")[0] CacheUpdater("hub_install", module_name, module_version).start() result, tips, module_dir = default_module_manager.install_module( module_name=module_name, module_version=module_version, extra=extra) print(tips) return True
def execute(self, argv): if not argv: print("ERROR: Please specify a module or a model\n") self.help() return False module_name = argv[0] CacheUpdater("hub_show", module_name).start() # nlp model model_info_file = os.path.join(module_name, "info.yml") if os.path.exists(model_info_file): self.show_model_info(model_info_file) return True cwd = os.getcwd() module_dir = default_module_manager.search_module(module_name) module_dir = (os.path.join(cwd, module_name), None) if not module_dir else module_dir if not module_dir or not os.path.exists(module_dir[0]): print("%s is not existed!" % module_name) return True self.show_module_info(module_dir) return True
def download(name, save_path, version=None, decompress=True, resource_type='Model', extra={}): file = os.path.join(save_path, name) file = os.path.realpath(file) if os.path.exists(file): return if not hub.HubServer()._server_check(): raise ServerConnectionError search_result = hub.HubServer().get_resource_url( name, resource_type=resource_type, version=version, extra=extra) if not search_result: raise ResourceNotFoundError(name, version) CacheUpdater("x_download", name, version).start() url = search_result['url'] with tmp_dir() as _dir: if not os.path.exists(save_path): os.makedirs(save_path) _, _, savefile = default_downloader.download_file(url=url, save_path=_dir, print_progress=True) if tarfile.is_tarfile(savefile) and decompress: _, _, savefile = default_downloader.uncompress(file=savefile, print_progress=True) shutil.move(savefile, file)
def execute(self, argv): if not argv: argv = ['.*'] resource_name = argv[0] CacheUpdater("hub_search", resource_name).start() extra = {"command": "search"} resource_list = hub.HubServer().search_resource(resource_name, resource_type="Module", extra=extra) if utils.is_windows(): placeholders = [20, 8, 8, 20] else: placeholders = [30, 8, 8, 25] tp = TablePrinter( titles=["ResourceName", "Type", "Version", "Summary"], placeholders=placeholders) if len(resource_list) == 0: if hub.HubServer()._server_check() is False: print( "Request Hub-Server unsuccessfully, please check your network." ) for resource_name, resource_type, resource_version, resource_summary in resource_list: if resource_type == "Module": colors = ["yellow", None, None, None] else: colors = ["light_red", None, None, None] tp.add_line(contents=[ resource_name, resource_type, resource_version, resource_summary ], colors=colors) print(tp.get_text()) return True
def execute(self, argv): args = self.parser.parse_args() if not args.module_name or not args.model_dir: ConvertCommand.show_help() return False self.module = args.module_name self.version = args.module_version if args.module_version is not None else '1.0.0' self.src = args.model_dir if not os.path.isdir(self.src): print('`{}` is not exists or not a directory path'.format(self.src)) return False self.dest = args.output_dir if args.output_dir is not None else os.path.join( '{}_{}'.format(self.module, str(time.time()))) CacheUpdater("hub_convert", self.module, self.version).start() os.makedirs(self.dest) with tmp_dir() as _dir: self._tmp_dir = _dir self.create_module_py() self.create_init_py() self.create_serving_demo_py() self.create_module_tar() print('The converted module is stored in `{}`.'.format(self.dest)) return True
def __init__(self, name=None, module_dir=None, signatures=None, module_info=None, assets=None, processor=None, extra_info=None, version=None): self.desc = module_desc_pb2.ModuleDesc() self.program = None self.assets = [] self.helper = None self.signatures = {} self.default_signature = None self.module_info = None self.processor = None self.extra_info = {} if extra_info is None else extra_info if not isinstance(self.extra_info, dict): raise TypeError( "The extra_info should be an instance of python dict") # cache data self.last_call_name = None self.cache_feed_dict = None self.cache_fetch_dict = None self.cache_program = None fp_lock = open(os.path.join(CONF_HOME, 'config.json')) lock.flock(fp_lock, lock.LOCK_EX) if name: self._init_with_name(name=name, version=version) lock.flock(fp_lock, lock.LOCK_UN) elif module_dir: self._init_with_module_file(module_dir=module_dir[0]) lock.flock(fp_lock, lock.LOCK_UN) name = module_dir[0].split("/")[-1] version = module_dir[1] elif signatures: if processor: if not issubclass(processor, BaseProcessor): raise TypeError( "Processor shoule be an instance of paddlehub.BaseProcessor" ) if assets: self.assets = utils.to_list(assets) # for asset in assets: # utils.check_path(assets) self.processor = processor self._generate_module_info(module_info) self._init_with_signature(signatures=signatures) lock.flock(fp_lock, lock.LOCK_UN) else: lock.flock(fp_lock, lock.LOCK_UN) raise ValueError("Module initialized parameter is empty") CacheUpdater(name, version).start()
def execute(self, argv): if not argv: print("ERROR: Please specify a module\n") self.help() return False module_name = argv[0] CacheUpdater("hub_uninstall", module_name).start() result, tips = default_module_manager.uninstall_module( module_name=module_name) print(tips) return True
def execute(self, argv): CacheUpdater("hub_list").start() all_modules = default_module_manager.all_modules() if utils.is_windows(): placeholders = [20, 40] else: placeholders = [25, 50] tp = TablePrinter( titles=["ModuleName", "Path"], placeholders=placeholders) for module_name, module_dir in all_modules.items(): tp.add_line(contents=[module_name, module_dir[0]]) print(tp.get_text()) return True
def start_bert_serving(args): if platform.system() != "Linux": print("Error. Bert Service only support linux.") return False if ServingCommand.is_port_occupied("127.0.0.1", args.port) is True: print("Port %s is occupied, please change it." % args.port) return False from paddle_gpu_serving.run import BertServer bs = BertServer(with_gpu=args.use_gpu) bs.with_model(model_name=args.modules[0]) CacheUpdater("hub_bert_service", module=args.modules[0], version="0.0.0").start() bs.run(gpu_index=args.gpu, port=int(args.port))
def execute(self, argv): CacheUpdater("hub_help").start() hub_command = BaseCommand.command_dict["hub"] help_text = "\n" help_text += "Usage:\n" help_text += "%s <command> [options]\n" % hub_command.name help_text += "\n" help_text += "Commands:\n" for command_name, command in self.get_all_commands().items(): if not command.show_in_help or not command.description: continue help_text += " %-15s\t\t%s\n" % (command.name, command.description) print(help_text) return True
def __new__(cls, name=None, directory=None, module_dir=None, version=None): if cls.__name__ == "Module": if name: module = cls.init_with_name(name=name, version=version) elif directory: module = cls.init_with_directory(directory=directory) elif module_dir: logger.warning( "Parameter module_dir is deprecated, please use directory to specify the path" ) if isinstance(module_dir, list) or isinstance( module_dir, tuple): directory = module_dir[0] version = module_dir[1] else: directory = module_dir module = cls.init_with_directory(directory=directory) CacheUpdater("update_cache", module.name, module.version).start() else: module = object.__new__(cls) return module
def execute(self, argv): CacheUpdater("hub_clear").start() result = True total_file_size = 0 total_file_count = 0 for rootdir, dirs, files in os.walk(self.cache_dir(), topdown=False): for filename in files: filename = os.path.join(rootdir, filename) try: file_size = os.path.getsize(filename) file_count = file_num_in_dir(filename) os.remove(filename) total_file_size += file_size total_file_count += file_count except Exception as e: result = False for dirname in dirs: dirname = os.path.join(rootdir, dirname) try: dir_size = os.path.getsize(dirname) file_count = file_num_in_dir(dirname) os.rmdir(dirname) total_file_size += dir_size total_file_count += file_count except Exception as e: result = False if total_file_count != 0: print("Clear %d cached files." % total_file_count) print("Free disk space %s." % file_size_in_human_format(total_file_size)) else: if result: print("No cache to release.") else: print("Clear cache failed!") return result
def preinstall_modules(self): for key, value in self.modules_info.items(): init_args = value["init_args"] CacheUpdater("hub_serving_start", module=key, version=init_args.get("version", "0.0.0")).start() if "directory" not in init_args: init_args.update({"name": key}) m = hub.Module(**init_args) method_name = m.serving_func_name if method_name is None: raise RuntimeError("{} cannot be use for " "predicting".format(key)) exit(1) category = str(m.type).split("/")[0].upper() self.modules_info[key].update({ "method_name": method_name, "code_version": m.code_version, "version": m.version, "category": category, "module": m, "name": m.name })
def stop_serving(self, port): filepath = os.path.join(CONF_HOME, "serving_" + str(port) + ".json") info = self.load_pid_file(filepath, port) if info is False: return pid = info["pid"] module = info["module"] start_time = info["start_time"] if os.path.exists(filepath): os.remove(filepath) if not pid_is_exist(pid): print("PaddleHub Serving has been stopped.") return print("PaddleHub Serving will stop.") CacheUpdater("hub_serving_stop", module=module, addition={ "period_time": time.time() - start_time }).start() if platform.system() == "Windows": os.kill(pid, signal.SIGTERM) else: os.killpg(pid, signal.SIGTERM)
def execute(self, argv): CacheUpdater("hub_version").start() print("hub %s" % version.hub_version) return True
def execute(self, argv): if not argv: print("ERROR: Please specify a module name.\n") self.help() return False module_name = argv[0] CacheUpdater("hub_run", module_name).start() self.parser.prog = '%s %s %s' % (ENTRY, self.name, module_name) self.arg_input_group = self.parser.add_argument_group( title="Input options", description="Data input to the module") self.arg_config_group = self.parser.add_argument_group( title="Config options", description= "Run configuration for controlling module behavior, not required") self.module = self.find_module(module_name) if not self.module: return False # If the module is not executable, give an alarm and exit if not self.module.is_runable: print("ERROR! Module %s is not executable." % module_name) return False if self.module.code_version == "v2": results = self.module.run_func(argv[1:]) else: self.module.check_processor() self.add_module_config_arg() self.add_module_input_arg() if not argv[1:]: self.help() return False self.args = self.parser.parse_args(argv[1:]) config = self.get_config() data = self.get_data() try: self.check_data(data) except DataFormatError: self.help() return False results = self.module(sign_name=self.module.default_signature, data=data, use_gpu=self.args.use_gpu, batch_size=self.args.batch_size, **config) if six.PY2: try: results = json.dumps(results, encoding="utf8", ensure_ascii=False) except: pass print(results) return True
def execute(self, argv): if not argv: print("ERROR: Please provide the model/module name\n") self.help() return False mod_name = argv[0] mod_version = None if "==" not in mod_name else mod_name.split("==")[1] mod_name = mod_name if "==" not in mod_name else mod_name.split( "==")[0] CacheUpdater("hub_download", mod_name, mod_version).start() self.args = self.parser.parse_args(argv[1:]) self.args.type = self.check_type(self.args.type) extra = {"command": "download"} if self.args.type in ["Module", "Model"]: search_result = hub.HubServer().get_resource_url( mod_name, resource_type=self.args.type, version=mod_version, extra=extra) else: search_result = hub.HubServer().get_resource_url( mod_name, resource_type="Module", version=mod_version, extra=extra) self.args.type = "Module" if search_result == {}: search_result = hub.HubServer().get_resource_url( mod_name, resource_type="Model", version=mod_version, extra=extra) self.args.type = "Model" url = search_result.get('url', None) except_md5_value = search_result.get('md5', None) if not url: if hub.HubServer()._server_check() is False: tips = "Request Hub-Server unsuccessfully, please check your network." else: tips = "PaddleHub can't find model/module named %s" % mod_name if mod_version: tips += " with version %s" % mod_version tips += ". Please use the 'hub search' command to find the correct model/module name." print(tips) return True need_to_download_file = True file_name = os.path.basename(url) file = os.path.join(self.args.output_path, file_name) if os.path.exists(file): print("File %s already existed\nWait to check the MD5 value" % file_name) file_md5_value = utils.md5_of_file(file) if except_md5_value == file_md5_value: print("MD5 check pass.") need_to_download_file = False else: print("MD5 check failed!\nDelete invalid file.") os.remove(file) if need_to_download_file: result, tips, file = default_downloader.download_file( url=url, save_path=self.args.output_path, print_progress=True) if not result: print(tips) return False if self.args.uncompress: result, tips, file = default_downloader.uncompress( file=file, dirname=self.args.output_path, delete_file=True, print_progress=True) print(tips) if self.args.type == "Model": os.rename(file, "./" + mod_name) return True
def execute(self, argv): CacheUpdater("hub_autofinetune").start() if not argv: print( "ERROR: Please specify a script to be finetuned in python.\n") self.help() return False self.fintunee_script = argv[0] self.parser.prog = '%s %s %s' % (ENTRY, self.name, self.fintunee_script) self.arg_params_to_be_searched_group = self.parser.add_argument_group( title="Input options", description="Hyperparameters to be searched.") self.arg_config_group = self.parser.add_argument_group( title="Autofinetune config options", description= "Autofintune configuration for controlling autofinetune behavior, not required" ) self.arg_finetuned_task_group = self.parser.add_argument_group( title="Finetuned task config options", description= "Finetuned task configuration for controlling finetuned task behavior, not required" ) self.add_params_file_arg() self.add_autoft_config_arg() if not argv[1:]: self.help() return False self.args = self.parser.parse_args(argv[1:]) options_str = "" if self.args.opts is not None: options_str = self.convert_to_other_options(self.args.opts) device_ids = self.args.gpu.strip().split(",") device_ids = [int(device_id) for device_id in device_ids] if self.args.evaluator.lower() == "fulltrail": evaluator = FullTrailEvaluator(self.args.param_file, self.fintunee_script, options_str=options_str) elif self.args.evaluator.lower() == "populationbased": evaluator = PopulationBasedEvaluator(self.args.param_file, self.fintunee_script, options_str=options_str) else: raise ValueError("The evaluate %s is not defined!" % self.args.evaluator) if self.args.tuning_strategy.lower() == "hazero": autoft = HAZero(evaluator, cudas=device_ids, popsize=self.args.popsize, output_dir=self.args.output_dir) elif self.args.tuning_strategy.lower() == "pshe2": autoft = PSHE2(evaluator, cudas=device_ids, popsize=self.args.popsize, output_dir=self.args.output_dir) else: raise ValueError("The tuning strategy %s is not defined!" % self.args.tuning_strategy) run_round_cnt = 0 solutions_modeldirs = {} print("PaddleHub Autofinetune starts.") while (not autoft.is_stop()) and run_round_cnt < self.args.round: print("PaddleHub Autofinetune starts round at %s." % run_round_cnt) output_dir = autoft._output_dir + "/round" + str(run_round_cnt) res = autoft.step(output_dir) solutions_modeldirs.update(res) evaluator.new_round() run_round_cnt = run_round_cnt + 1 print("PaddleHub Autofinetune ends.") best_hparams_origin = autoft.get_best_hparams() best_hparams_origin = autoft.mpi.bcast(best_hparams_origin) with open(autoft._output_dir + "/log_file.txt", "w") as f: best_hparams = evaluator.convert_params(best_hparams_origin) print("The final best hyperparameters:") f.write("The final best hyperparameters:\n") for index, hparam_name in enumerate(autoft.hparams_name_list): print("%s=%s" % (hparam_name, best_hparams[index])) f.write(hparam_name + "\t:\t" + str(best_hparams[index]) + "\n") best_hparams_dir, best_hparams_rank = solutions_modeldirs[tuple( best_hparams_origin)] print("The final best eval score is %s." % autoft.get_best_eval_value()) if autoft.mpi.multi_machine: print("The final best model parameters are saved as " + autoft._output_dir + "/best_model on rank " + str(best_hparams_rank) + " .") else: print("The final best model parameters are saved as " + autoft._output_dir + "/best_model .") f.write("The final best eval score is %s.\n" % autoft.get_best_eval_value()) best_model_dir = autoft._output_dir + "/best_model" if autoft.mpi.rank == best_hparams_rank: shutil.copytree(best_hparams_dir, best_model_dir) if autoft.mpi.multi_machine: f.write( "The final best model parameters are saved as ./best_model on rank " \ + str(best_hparams_rank) + " .") f.write("\t".join(autoft.hparams_name_list) + "\tsaved_params_dir\trank\n") else: f.write( "The final best model parameters are saved as ./best_model ." ) f.write("\t".join(autoft.hparams_name_list) + "\tsaved_params_dir\n") print( "The related information about hyperparamemters searched are saved as %s/log_file.txt ." % autoft._output_dir) for solution, modeldir in solutions_modeldirs.items(): param = evaluator.convert_params(solution) param = [str(p) for p in param] if autoft.mpi.multi_machine: f.write("\t".join(param) + "\t" + modeldir[0] + "\t" + str(modeldir[1]) + "\n") else: f.write("\t".join(param) + "\t" + modeldir[0] + "\n") return True