def generate_key(func_name, args, dict_args_original, skip_args): args_concat = [v for key, v in sorted(dict_args_original.iteritems()) if key not in skip_args] # Get serialized arguments (function name, or string of v if is not reference checked in ugly way args_serialized = \ '_'.join(sorted([ v.__name__ if hasattr(v, '__call__') else (str(v) if len(str(v)) < 200 else hashlib.md5(str(v)).hexdigest()) for v in args_concat if hasattr(v, '__call__') or str(v).find("0x") == -1])) logger.info("Serialized args to " + args_serialized) key = func_name + "_" + ''.join((a for a in args_serialized if a.isalnum() or a in "!@#$%^&**_+-")) full_key = func_name + "(" + "".join([str(k)+"="+(str(v) if len(str(v))<200 else hashlib.md5(str(v)).hexdigest()) for k,v in sorted(dict_args_original.iteritems()) if key not in skip_args]) if len(key) > 400: key = key[0:400] return key, full_key
def on_connect(self, client, userdata, flags, rc): logger.info("Connected with result code: {}".format(rc)) if rc == MQTT_ERR_SUCCESS: self.connected = True else: self.connected = False self.connected_error = True
def test_sales_list(self): url = self.url + '?roleld=3' header = self.header expect = 'success' res = requests.get(url=url, headers=header) logger.info(res.text) r = res.json()['msg'] self.assertEqual(r, expect)
def test_1_10(self): url = self.url header = self.header expect = 'success' res = requests.get(url=url, headers=header) logger.info(res.text) r = res.json()['msg'] self.assertEqual(r, expect)
def run(self): addr = ("", self.port) try: adapter_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) adapter_sock.bind(addr) logger.info("IPv4 UDP服务器已经启动,服务器绑定地址%s" % str(addr)) except Exception, e: logger.error("启动IPv4 UDP服务器失败,错误原因%s" % str(e)) exit(0)
def download_auxiliary_data(): """Load auxiliary data from URL and save to local drive.""" # Download auxiliary data tags_url = "https://raw.githubusercontent.com/GokuMohandas/MadeWithML/main/datasets/tags.json" tags = utils.load_json_from_url(url=tags_url) # Save data tags_fp = Path(config.DATA_DIR, "tags.json") utils.save_dict(d=tags, filepath=tags_fp) logger.info("✅ Auxiliary data downloaded!")
def delete_experiment(experiment_name: str): """Delete an experiment with name `experiment_name`. Args: experiment_name (str): Name of the experiment. """ client = mlflow.tracking.MlflowClient() experiment_id = client.get_experiment_by_name(experiment_name).experiment_id client.delete_experiment(experiment_id=experiment_id) logger.info(f"✅ Deleted experiment {experiment_name}!")
def run(self): addr = ("", self.port) try: self.adapter_sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) self.adapter_sock.bind(addr) self.adapter_sock.listen(10) logger.info("IPv6 TCP服务器已经启动,服务器绑定地址%s" % str(addr)) except Exception, e: logger.error("启动IPv6 TCP服务器失败,错误原因%s" % str(e)) exit(0)
def case_log(data): url = data['url'] feature = data['feature'] test_desc = data['test_desc'] input_args = data['input_args'] expire_result = data['expire_result'] logger.info('%s测试开始' % feature) logger.info("%s开始" % test_desc) logger.info("url:%s" % url) logger.info("入参:%s" % input_args) logger.info("期望结果:%s" % expire_result)
def test_no_page(self): data = readexceldata.get_test_data("test_no_page", self.data_list) case_log.case_log(data) url = data["url"] input_args = eval(data["input_args"]) expire_result = data["expire_result"] # response=requests.post(url,data=input_args) response = requests.post(url, data=input_args) logger.info("响应内容:%s" % response) print(response.text) self.assertIn("成功", response.text)
def test_add_sys_user_ture(self): data = {"password":"******", "name":"123888", "roleIdList":[1], "username": "******", } expect = 'success' logger.info(self.url, self.header, data, expect) res = requests.post(url=self.url,json=data,headers = self.header) r = res.json()['msg'] self.assertEqual(r,expect)
def test_login_username_error(self): expect = '用户名或密码错误' res = requests.post(url=self.url, headers=self.header, json={ "username": "******", "password": "******" }) logger.info(res.text) # print(res.text) r = res.json()['msg'] # print (r) self.assertEqual(r, expect)
def test_student_page_add(self): expect = 'success' url = self.url header = self.header data = { "page": 1, "pageSize": 100, } logger.info(url, header, data, expect) res = requests.post(url=url, json=data, headers=header) logger.info(res.text) r = res.json()['msg'] self.assertEqual(r, expect)
def test_package_add_price(self): url = self.url header = self.header data = { "name": "套餐名称", "price": "好多", "type": "1", "rightsList": [30005] } expect = '价格不能非数字' res = requests.post(url, headers=header, json=data) logger.info(res.text) r = res.json()['msg'] self.assertEqual(r, expect)
def test_package_add_type_2(self): url = self.url header = self.header data = { "name": "测试套餐名称", "price": "1000", "type": "2", "rightsList": [30005] } expect = 'success' res = requests.post(url=url, headers=header, json=data) logger.info(res.text) r = res.json()['msg'] self.assertEqual(r, expect)
def numpy_load_fnc(key): if os.path.exists(os.path.join(c["CACHE_DIR"], key + ".npz")): # Listed numpy array savez_file = np.load(os.path.join(c["CACHE_DIR"], key + ".npz")) ar = [] for k in sorted(list((int(x) for x in savez_file))): logger.info("Loading " + str(k) + " from " + str(key) + " " + str(savez_file[str(k)].shape)) ar.append(savez_file[str(k)]) return ar else: return np.load(os.path.join(c["CACHE_DIR"], key + ".npy"))
def numpy_save_fnc(key, val): if isinstance(val, tuple): raise "Please use list to make numpy_save_fnc work" # Note - not using savez because it is reportedly slow. if isinstance(val, list): logger.info("Saving as list") save_path = os.path.join(c["CACHE_DIR"], key) save_dict = {} for id, ar in enumerate(val): save_dict[str(id)] = ar np.savez(save_path, **save_dict) else: logger.info("Saving as array " + str(val.shape)) np.save(os.path.join(c["CACHE_DIR"], key + ".npy"), val)
def test_login_ture(self): expect = 'success' res = requests.post(url=self.url, headers=self.header, json={ "username": "******", "password": "******" }) # print(res) # print(res.text) logger.info(res.text) r = res.json()['msg'] # print(r) self.assertEqual(r, expect)
def train_model( params_fp: Path = Path(config.CONFIG_DIR, "params.json"), experiment_name: Optional[str] = "best", run_name: Optional[str] = "model", ) -> None: """Train a model using the specified parameters. Args: params_fp (Path, optional): Parameters to use for training. Defaults to `config/params.json`. experiment_name (str, optional): Name of the experiment to save the run to. Defaults to `best`. run_name (str, optional): Name of the run. Defaults to `model`. """ # Parameters params = Namespace(**utils.load_dict(filepath=params_fp)) # Start run mlflow.set_experiment(experiment_name=experiment_name) with mlflow.start_run(run_name=run_name): run_id = mlflow.active_run().info.run_id logger.info(f"Run ID: {run_id}") # Train artifacts = train.train(params=params) # Set tags tags = {} mlflow.set_tags(tags) # Log metrics performance = artifacts["performance"] logger.info(json.dumps(performance["overall"], indent=2)) metrics = { "precision": performance["overall"]["precision"], "recall": performance["overall"]["recall"], "f1": performance["overall"]["f1"], "best_val_loss": artifacts["loss"], "behavioral_score": performance["behavioral"]["score"], "slices_f1": performance["slices"]["overall"]["f1"], } mlflow.log_metrics(metrics) # Log artifacts with tempfile.TemporaryDirectory() as dp: utils.save_dict(vars(artifacts["params"]), Path(dp, "params.json"), cls=NumpyEncoder) utils.save_dict(performance, Path(dp, "performance.json")) artifacts["label_encoder"].save(Path(dp, "label_encoder.json")) artifacts["tokenizer"].save(Path(dp, "tokenizer.json")) torch.save(artifacts["model"].state_dict(), Path(dp, "model.pt")) mlflow.log_artifacts(dp) mlflow.log_params(vars(artifacts["params"]))
def case_log(data): url = data['url'] feature = data['feature'] test_desc = data['test_desc'] input_payload = data['input_payload'] # input_headers = data['input_headers'] expire_result = data['expire_result'] logger.info("%s 测试开始" % feature) logger.info("%s 开始" % test_desc) logger.info("url: %s" % url) logger.info("请求头: %s" % input_payload) # logger.info("请求体: %s" % input_headers) logger.info("期望结果:%s" % expire_result)
def test_city_not_exist(self): data = readexceldata.get_test_data("test_city_not_exist", self.data_list) print(data) case_log.case_log(data) url = data["url"] # input_args=data["input_args"] input_args = eval(data["input_args"]) # print(type(input_args),input_args) expire_result = data["expire_result"] # response=requests.post(url,data=input_args) response = requests.post(url, data=input_args) logger.info("响应内容:%s" % response) print(response.text) self.assertIn("城市错误或不支持", response.text)
def test_add_sys_user_xs(self): data = { "name": "销售001", "password": "******", "roleIdList": [ 3 ], "userId": 0, "username": "******" } expect = '用户名太短' logger.info(self.url, self.header, data, expect) res = requests.post(url=self.url,json=data, headers=self.header) logger.info(res.text) r = res.json()['msg'] self.assertEqual(r, expect)
def test_add_sys_user_username_repeat(self): data = { "name": "5231", "password": "******", "roleIdList": [ 1 ], "userId": 0, "username": "******" } expect = '用户名已存在' logger.info(self.url, self.header, data, expect) res = requests.post(url=self.url,json=data, headers=self.header) logger.info(res.text) r = res.json()['msg'] self.assertEqual(r, expect)
def compute_features( params_fp: Path = Path(config.CONFIG_DIR, "params.json"), ) -> None: """Compute and save features for training. Args: params_fp (Path, optional): Location of parameters (just using num_samples, num_epochs, etc.) to use for training. Defaults to `config/params.json`. """ # Parameters params = Namespace(**utils.load_dict(filepath=params_fp)) # Compute features data.compute_features(params=params) logger.info("✅ Computed features!")
def case_log(data): url = data['url'] test_name = data['test_name'] input_args = data["input_args"] expire_result = data['expire_result'] logger.info("%s 测试开始" % test_name) logger.info("url:%s" % url) logger.info("入参:%s" % input_args) logger.info("期望结果:%s" % expire_result)
def test_package_add_001(self): url = self.url header = self.header data = { "name": "套餐名称", "price": "1000", "type": "1", "rightsList": [30005] } expect = {'msg': 'success'} database = {'msg': 'success'} res = requests.post(url=url, headers=header, json=data) data_mock = mock.return_value = database logger.info(res.text) res = data_mock r = res self.assertEqual(r, expect)
def run(task_start_mode='new', update_interval=60): """ 启动任务调度系统 :param task_start_mode: 'new':清空缓存,从头开始; 'restart': 则继续上一次结束点开始, 之前未处理完的任务将继续被执行 :param update_interval: 结果更新周期,默认30秒 :return: """ try: logger.info('----------------Start Task Scheduler System. task_start_mode={}, interval={}--------------------'.format(task_start_mode, update_interval)) bk_scheduler = BackgroundScheduler() if task_start_mode == 'restart': # restart模式会将之前所有没有结束的任务重新拉起执行 restart_all_tasks(scheduler=bk_scheduler) else: # new模式只会启动状态为new的任务, 默认 logger.info('clean environment. ') clean_environment() start_all_new_tasks(scheduler=bk_scheduler) # 启动定时结果更新 bk_scheduler.add_job(update_results, 'interval', seconds=update_interval, misfire_grace_time=20, max_instances=5) bk_scheduler.start() while True: time.sleep(2) except (KeyboardInterrupt, SystemExit): bk_scheduler.shutdown() logger.warning('Scheduler have been stopped.') finally: if bk_scheduler.running: bk_scheduler.shutdown() logger.info('-------------------Stop Task Scheduler System.-------------------')
def objective(params: Namespace, trial: optuna.trial._trial.Trial) -> float: """Objective function for optimization trials. Args: params (Namespace): Input parameters for each trial (see `config/params.json`). trial (optuna.trial._trial.Trial): Optuna optimization trial. Returns: F1 score from evaluating the trained model on the test data split. """ # Paramters (to tune) params.embedding_dim = trial.suggest_int("embedding_dim", 128, 512) params.num_filters = trial.suggest_int("num_filters", 128, 512) params.hidden_dim = trial.suggest_int("hidden_dim", 128, 512) params.dropout_p = trial.suggest_uniform("dropout_p", 0.3, 0.8) params.lr = trial.suggest_loguniform("lr", 5e-5, 5e-4) # Train (can move some of these outside for efficiency) logger.info(f"\nTrial {trial.number}:") logger.info(json.dumps(trial.params, indent=2)) artifacts = train(params=params, trial=trial) # Set additional attributes params = artifacts["params"] performance = artifacts["performance"] logger.info(json.dumps(performance["overall"], indent=2)) trial.set_user_attr("threshold", params.threshold) trial.set_user_attr("precision", performance["overall"]["precision"]) trial.set_user_attr("recall", performance["overall"]["recall"]) trial.set_user_attr("f1", performance["overall"]["f1"]) return performance["overall"]["f1"]
def diff( author: str = config.AUTHOR, repo: str = config.REPO, tag_a: str = "workspace", tag_b: str = "", ): # pragma: no cover, can't be certain what diffs will exist """Difference between two release TAGs.""" # Tag b if tag_b == "": tags_url = f"https://api.github.com/repos/{author}/{repo}/tags" tag_b = utils.load_json_from_url(url=tags_url)[0]["name"] logger.info(f"Comparing {tag_a} with {tag_b}:") # Params params_a = params(author=author, repo=repo, tag=tag_a, verbose=False) params_b = params(author=author, repo=repo, tag=tag_b, verbose=False) params_diff = utils.dict_diff(d_a=params_a, d_b=params_b, d_a_name=tag_a, d_b_name=tag_b) logger.info(f"Parameter differences: {json.dumps(params_diff, indent=2)}") # Performance performance_a = performance(author=author, repo=repo, tag=tag_a, verbose=False) performance_b = performance(author=author, repo=repo, tag=tag_b, verbose=False) performance_diff = utils.dict_diff( d_a=performance_a, d_b=performance_b, d_a_name=tag_a, d_b_name=tag_b ) logger.info(f"Performance differences: {json.dumps(performance_diff, indent=2)}") return params_diff, performance_diff
def optimize( params_fp: Path = Path(config.CONFIG_DIR, "params.json"), study_name: Optional[str] = "optimization", num_trials: int = 100, ) -> None: """Optimize a subset of hyperparameters towards an objective. This saves the best trial's parameters into `config/params.json`. Args: params_fp (Path, optional): Location of parameters (just using num_samples, num_epochs, etc.) to use for training. Defaults to `config/params.json`. study_name (str, optional): Name of the study to save trial runs under. Defaults to `optimization`. num_trials (int, optional): Number of trials to run. Defaults to 100. """ # Parameters params = Namespace(**utils.load_dict(filepath=params_fp)) # Optimize pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5) study = optuna.create_study(study_name=study_name, direction="maximize", pruner=pruner) mlflow_callback = MLflowCallback(tracking_uri=mlflow.get_tracking_uri(), metric_name="f1") study.optimize( lambda trial: train.objective(params, trial), n_trials=num_trials, callbacks=[mlflow_callback], ) # All trials trials_df = study.trials_dataframe() trials_df = trials_df.sort_values(["value"], ascending=False) # Best trial logger.info(f"Best value (f1): {study.best_trial.value}") params = {**params.__dict__, **study.best_trial.params} params["threshold"] = study.best_trial.user_attrs["threshold"] utils.save_dict(params, params_fp, cls=NumpyEncoder) logger.info(json.dumps(params, indent=2, cls=NumpyEncoder))