async def login(user, url_name, url_password, captcha=''): validate = '' challenge = '' extra_params = { 'seccode': f'{validate}|jordan' if validate else '', 'validate': validate, 'challenge': challenge, 'username': url_name, 'password': url_password, 'ts': utils.curr_time(), } params = user.app_sign(extra_params) # url_password 存在一些 % 这些,b站要求作为 string 不编码为 "%25" # aiohttp doc 符合,但是 # https://github.com/aio-libs/aiohttp/blob/10c8ce9567d008d4f92a99ffe45f8d0878e99275/aiohttp/client_reqrep.py#L215-L219 # yarl 兼容问题 # 故手动处理 params_str = utils.prepare_params(params) url_aiohttp = f'https://passport.bilibili.com/x/passport-login/oauth2/login?{params_str}' json_rsp = await user.login_session.request_json( 'POST', url_aiohttp, headers=user.app.headers, params=None, ctrl=LOGIN_CTRL) return json_rsp
async def refresh_token(user): dict_cookie = dict() for param in user.dict_user['cookie'].split(';'): key, value = param.split('=') dict_cookie[key] = value extra_params = { 'access_key': user.dict_user['access_key'], 'access_token': user.dict_user['access_key'], 'refresh_token': user.dict_user['refresh_token'], 'ts': utils.curr_time(), **dict_cookie } params = user.app_sign(extra_params) # 这里没办法,cookie 里面有特殊字符,与 yarl 兼容无关 params_str = utils.prepare_params(params) url = f'https://passport.bilibili.com/api/v2/oauth2/refresh_token?{params_str}' json_rsp = await user.login_session.request_json( 'POST', url, headers=user.app.headers, params=None, ctrl=LOGIN_CTRL) print('json_rsp', json_rsp) return json_rsp
def main(): dl = DataLoader( source_path='../temp/letters_source.txt', target_path='../temp/letters_target.txt') sources, targets = dl.load() tf_estimator = tf.estimator.Estimator( tf_estimator_model_fn, params=prepare_params(dl), model_dir=args.model_dir) for epoch in range(args.num_epochs): tf_estimator.train(tf.estimator.inputs.numpy_input_fn( x = {'source':sources, 'target':targets}, batch_size = args.batch_size, num_epochs = None, shuffle = True), steps=1000) greedy_decode(['apple', 'common', 'zhedong'], tf_estimator, dl)
def main(): dl = DataLoader(source_path='../temp/dialog_source.txt', target_path='../temp/dialog_target.txt') sources, targets = dl.load() print('Source Vocab Size:', len(dl.source_word2idx)) print('Target Vocab Size:', len(dl.target_word2idx)) tf_estimator = tf.estimator.Estimator(tf_estimator_model_fn, params=prepare_params(dl)) for epoch in range(1): tf_estimator.train( tf.estimator.inputs.numpy_input_fn(x={ 'source': sources, 'target': targets }, batch_size=args.batch_size, num_epochs=1, shuffle=True)) greedy_decode(['你是谁', '你喜欢我吗', '给我唱一首歌', '我帅吗'], tf_estimator, dl)
def fit_predict(self, **kwargs) -> float: """ The fit_predict method is created this way, as a black box for the Bayesian optimization library to work. It accepts the hyperparamters and creates the predictions with them. It keeps track of 3 metrics (mae, rmse and theshold mae) using a KFold crossvalidation and one or both of the algorithms. This approach was used (instead of using cv included in the packages) for getting and combining predictions at the crossvalidation stage. Also, different kind of manipulations can be made with the data prior to prediction. kwargs: dict key / value pairs of the hyperparameters of xgb and/or lgb Returns ------- float The selected metric to be optimized. """ rmse_final = 0.0 mae_final = 0.0 mae_thresh_final = 0.0 rmse_final_train = 0.0 mae_final_train = 0.0 mae_thresh_final_train = 0.0 self.params_xgb = prepare_params(self.params_xgb, self.config, kwargs, 'xgb') self.params_lgb = prepare_params(self.params_lgb, self.config, kwargs, 'lgb') folds = KFold(n_splits=self.config['n_fold'], shuffle=self.config['shuffle'], random_state=self.config['random_state']) for train_index, valid_index in folds.split(self.X, self.y): X_train, X_valid = self.X.iloc[train_index], self.X.iloc[valid_index] y_train, y_valid = self.y.iloc[train_index], self.y.iloc[valid_index] if self.config['algorithm'] in ['xgb', 'both']: xgb_train = xgb.DMatrix(X_train, y_train) xgb_valid = xgb.DMatrix(X_valid, y_valid) bst_xgb = xgb.train(self.params_xgb, xgb_train, num_boost_round=self.config['num_boost_round'], early_stopping_rounds=self.config['early_stopping_rounds'], verbose_eval=self.config['verbose'], evals=[(xgb_valid, 'eval')]) xgb_best_iteration = bst_xgb.best_iteration xgb_importance = bst_xgb.get_score(importance_type='total_gain') self.feature_importance['gain_xgb'] += self.feature_importance['feature'].apply(lambda x: xgb_importance[x] if x in xgb_importance else 0.0) xgb_valid_pred = bst_xgb.predict(xgb.DMatrix(X_valid), ntree_limit=xgb_best_iteration) xgb_train_pred = bst_xgb.predict(xgb.DMatrix(X_train), ntree_limit=xgb_best_iteration) if self.config['algorithm'] in ['lgb', 'both']: lgb_train = lgb.Dataset(X_train, y_train) lgb_valid = lgb.Dataset(X_valid, y_valid) bst_lgb = lgb.train(self.params_lgb, lgb_train, num_boost_round=self.config['num_boost_round'], early_stopping_rounds=self.config['early_stopping_rounds'], verbose_eval=self.config['verbose_eval'], valid_sets=[lgb_valid]) lgb_best_iteration = bst_lgb.best_iteration self.feature_importance['gain_lgb'] += np.array(bst_lgb.feature_importance(importance_type='gain')) lgb_valid_pred = bst_lgb.predict(X_valid, num_iteration=lgb_best_iteration) lgb_train_pred = bst_lgb.predict(X_train, num_iteration=lgb_best_iteration) if self.config['algorithm'] == 'xgb': y_pred = xgb_valid_pred y_pred_train = xgb_train_pred elif self.config['algorithm'] == 'lgb': y_pred = lgb_valid_pred y_pred_train = lgb_train_pred elif self.config['algorithm'] == 'both': balance = kwargs['balance'] y_pred = balance * xgb_valid_pred + (1.0 - balance) * lgb_valid_pred y_pred_train = balance * xgb_train_pred + (1.0 - balance) * lgb_train_pred valid_mae_thresh = mae_thresh(y_valid, y_pred, thresh=self.config['threshold']) valid_rmse = rmse(y_valid, y_pred) valid_mae = mean_absolute_error(y_valid, y_pred) mae_thresh_final += valid_mae_thresh / self.config['n_fold'] rmse_final += valid_rmse / self.config['n_fold'] mae_final += valid_mae / self.config['n_fold'] train_mae_thresh = mae_thresh(y_train, y_pred_train, thresh=self.config['threshold']) train_rmse = rmse(y_train, y_pred_train) train_mae = mean_absolute_error(y_train, y_pred_train) mae_thresh_final_train += train_mae_thresh / self.config['n_fold'] rmse_final_train += train_rmse / self.config['n_fold'] mae_final_train += train_mae / self.config['n_fold'] self.count += 1 print('Iteration {} - Validation / Train - MAE: {:.3f} / {:.3f}, RMSE: {:.3f} / {:.3f}, MAE above {}: {:.3f} / {:.3f}'.format(self.count, mae_final, mae_final_train, rmse_final, rmse_final_train, self.config['threshold'], mae_thresh_final, mae_thresh_final_train)) self.feature_importance.to_csv(self.config['feat_imp_path'], index=None) if self.config['bo_optimize'] == 'rmse': result = -rmse_final elif self.config['bo_optimize'] == 'mae': result = -mae_final elif self.config['bo_optimize'] == 'mae_t': result = -mae_thresh_final return result