def process(hive_cmd_env): # 1.1 load model selectors and models' conf model_confs = ConfigHelper.load_config( os.path.join(os.path.dirname(__file__), 'conf', 'model_selectors_and_models.yaml')) model_selector = model_confs.get('model_selector') models = model_confs.get('models') # 1.2 load recmd requirement's conf requirements = ConfigHelper.load_config(os.path.join(os.path.dirname(__file__), 'conf', 'recmd_requirement.yaml')) # 1.3 load recmd score items score_params = ConfigHelper.load_config(os.path.join(os.path.dirname(__file__), 'conf', 'score_params.yaml')) # 2 get model key bu running model selector if model_selector: recmd_model_key = get_recmd_model_key_by_running_model_selector(requirements, model_selector) else: recmd_model_key = 'default' # 3 get model by recmd key and run model if models: recmd_result = get_recmd_result_by_running_models(hive_cmd_env, requirements, score_params, models, recmd_model_key) else: raise return recmd_result
def test_get_recmd_model_key(self): requirement = ConfigHelper.load_config( 'E:\\PythonProject\\Work\\work_3\\dake_recommendation\\conf\\recmd_requirement.yaml' ) model_selector = ConfigHelper.load_config( 'E:\\PythonProject\\Work\\work_3\\dake_recommendation\\conf\\model_selectors_and_models.yaml' ).get('model_selector') results = ['md5_model_0001', 'md5_model_0002'] self._list_in(get_recmd_model_key_by_running_model_selector(requirement, model_selector), results)
def create_database(): if not os.path.exists('./database.db'): print("Create a database\n") database.create_all() sleep(0.1) file_name = "products_79936.json" if not os.path.exists(file_name): # Download of mock database Process.run( 'curl https://servicespub.prod.api.aws.grupokabum.com.br/descricao/v1/descricao/produto/79936 >> %s' % file_name) ## Save database ## # Read $filename config_file = './%s' % file_name config_helper = ConfigHelper(config_file) config = config_helper.load_config() # Read only products of config config = config['familia']['produtos'] for data in config: product = ProductModel(**data) try: # Save products in database product.save_product() sleep(0.01) except: print({"message": "An error ocurred trying to create product."}, 500) # Internal Server Error
def test_rescore(self): col_name = ['id', 'name', 'age', 'money'] recmd_results_without_score = pd.read_csv( 'E:\\PythonProject\\Work\\work_3\\dake_recommendation\\test\\test_1.txt', encoding='utf-8', names=col_name, sep=' ') score_params = ConfigHelper.load_config('E:\\PythonProject\\Work\\work_3\\dake_recommendation\\conf\\tmp.yaml') model_params = {} print rescore(recmd_results_without_score, score_params, model_params)
def set_logging(log_folder_path, log_level): # set log file path log_config = ConfigHelper.load_config(os.path.join(os.path.dirname(__file__), 'conf', 'log.yaml')) log_file_name = 'log' if not os.path.exists(log_folder_path): os.makedirs(log_folder_path) log_file_path = os.path.join(log_folder_path, log_file_name) log_config['handlers']['file_handler']['filename'] = log_file_path log_config['handlers']['file_handler']['level'] = log_level logging.config.dictConfig(log_config)
def process(hive_cmd_env): # read conf 2: download from hive hive_calculate_and_download = ConfigHelper.load_config( os.path.join(os.path.dirname(__file__), 'conf', 'hive_calculate_and_download.yaml') ) for hql_number, hql_detail in hive_calculate_and_download.items(): # download files from hive after calculate download_file_from_hive( hive_cmd_env, hql_detail.get('hql'), hql_detail.get('hql_date_diff'), hql_detail.get('data_store_path'), hql_detail.get('update_mode') )
def set_logging(error_log_config_file_path, error_log_folder_path, error_log_level): if not os.path.exists(error_log_folder_path): os.makedirs(error_log_folder_path) # set log file path and level # log_config = ConfigHelper.load_config(os.path.join(os.path.dirname(__file__), "conf", "log.yaml")) log_config = ConfigHelper.load_config(error_log_config_file_path) log_config["handlers"]["file_handler"]["filename"] = os.path.join( error_log_folder_path, "logs") log_config["handlers"]["file_handler"]["level"] = error_log_level logging.config.dictConfig(log_config)
def process(hive_cmd_env): # read conf 1: download from hive hive_calculate_and_download = ConfigHelper.load_config( os.path.join(os.path.dirname(__file__), 'conf', 'hive_calculate_and_download.yaml') ) for hql_number, hql_detail in hive_calculate_and_download.items(): # download files from hive after calculate download_file_from_hive( hive_cmd_env, hql_detail.get('hql'), hql_detail.get('data_store_path') ) # read conf 2: wrap to json and upload wrap_to_json_and_upload_to_es = ConfigHelper.load_config( os.path.join(os.path.dirname(__file__), 'conf', 'wrap_to_json_and_upload_to_es.yaml') ) for file_number, process_detail in wrap_to_json_and_upload_to_es.items(): # wrap the downloaded files to json json_schema_path, json_file_path = wrap_to_json( process_detail.get('schema_path'), process_detail.get('data_path'), process_detail.get('index_name'), process_detail.get('type_name') ) # and then upload them to it upload_to_es(process_detail.get('index_name'), json_schema_path, json_file_path)
def process(args_config_file_path): args_config = ConfigHelper.load_config(args_config_file_path) # 1. get training or testing mode: is_training = args_config["is_training"] # 2. get middle result log dir params middle_result_log_dir_params = args_config["middle_result_log_dir_params"] # 3. get model params: model_params = args_config["model_params"] # 4. get running params from and run your model training_params = args_config["training_params"] testing_params = args_config["testing_params"] # step 1. get dirname pattern pattern, default_pattern = middle_result_log_dir_params[ "pattern"], middle_result_log_dir_params["default_pattern"] if pattern is None or len(pattern) == 0: pattern = default_pattern # step 2. get and generate middle result log dir if is_training: # get middle result log dir automatically from config args under training mode tensorboard_dir, model_dir = genarate_middle_result_log_dir( args_config_file_path, middle_result_log_dir_params["tensorboard_dir"], middle_result_log_dir_params["model_dir"], middle_result_log_dir_params["note_dir"], middle_result_log_dir_params["self_increasing_mode"] and is_training, pattern) training_params.update({"tensorboard_dir": tensorboard_dir}) training_params.update({"model_dir": model_dir}) # step 3. todo: train or test your model if is_training: set_up_visiable_gpu(training_params.get("gpu_num"), training_params.get("gpu_device_num")) model_handler = ModelHandler(model_params) model_handler.train(training_params) else: set_up_visiable_gpu(testing_params.get("gpu_num"), testing_params.get("gpu_device_num")) model_handler = ModelHandler(model_params) model_handler.test(testing_params)