def test_inference_train(): from glob import glob from model import load_model, find_best, delete_model import keras.backend as K from utils import get_session from transfer import inference_train name = 'test/transfer' delete_model(name) K.set_session(get_session()) imgs_path = glob('data/DeepQ-Vivepaper/frame/**/*.png', recursive=True)[:100] valid_csv = 'data/DeepQ-Vivepaper/data/annotations.csv' class_map = 'data/class-map.csv' thresh = 0.98 model = load_model(find_best()[0], compile=True) buff = inference_train(model, imgs_path, valid_csv, class_map, thresh, batch_size=4, image_min_side=460, image_max_side=1024, name=name) buff.seek(0) print(buff.read())
def test_main(): from inference import main from model import find_best weights = find_best()[0] print(weights) args = ('--weights %s --batch-size 1' % weights).split() score = main(args) assert score > 0
def main(): """ 命令行参数及其整理 """ parser = utils.MyArguments() parser.add_argument("--De", default=5.2, type=float, help="平均潜伏期") parser.add_argument("--Dq", default=14, type=float, help="平均隔离期") parser.add_argument("--c", default=13.0046, type=float, help="初始平均接触率") parser.add_argument("--q", default=0.0, type=float, help="初始隔离率") parser.add_argument("--beta", default=2.03e-9, type=float, help="基础传染概率") parser.add_argument("--theta", default=1.6003, type=float, help="无症状感染者传染概率系数") parser.add_argument("--nu", default=1.5008, type=float, help="潜伏期传染概率系数") parser.add_argument("--phi", default=0.9, type=float, help="有症状感染者收治率") parser.add_argument("--gammaI", default=0.1029, type=float, help="有症状感染者自愈速率") parser.add_argument("--gammaA", default=0.2978, type=float, help="无症状感染者自愈速度") parser.add_argument("--gammaH", default=1 / 10.5, type=float, help="医院治愈速率") parser.add_argument("--deltaI", default=1 / 3.5, type=float, help="出现症状患者被收治的速率") parser.add_argument("--deltaQ", default=0.1259, type=float, help="隔离的潜伏者出现症状(及时被收治)的速率") parser.add_argument("--rho", default=0.6834, type=float, help="有症状感染者占所有感染者的比例") parser.add_argument("--use_19", action="store_true") parser.add_argument("--zero_spring", action="store_true") parser.add_argument("-pil", "--piecewise_interval_length", default=3, type=int) args = parser.parse_args() # 对于一些通用的参数,这里已经进行整理了 """ 读取准备好的数据 """ dat_file = "./DATA/Provinces.pkl" dataset = utils.Dataset(dat_file, args.t0, args.tm, args.fit_time_start) """ 构建、或读取、或训练模型 """ # 根据不同的情况来得到合适的模型 if args.model is not None and args.model != "fit": models = NetSEAIRQ_piecewise.load(args.model) else: # 不然就进行训练 # 设置我们拟合模型需要的数据 if args.use_whhb: mask = None else: mask = np.full(dataset.num_regions, True, dtype=np.bool) mask[0] = False fit_start_index = (dataset.fit_start_t.ord - dataset.epi_t0.ord) fit_start_index = int(fit_start_index) fit_data_all = dataset.epi_times.delta[fit_start_index:] # 根据分段的宽度,设置多个模型,并将其训练用参数也 pil = args.piecewise_interval_length n_models = int(ceil(fit_data_all.shape[0] / pil)) models = [] score_kwargs = [] for i in range(n_models): model = NetSEAIRQ_piecewise( populations=dataset.populations, y0_hb=np.array([0, 0, 0, 0, args.y0, 0, 0]), score_type=args.fit_score, gamma_func_kwargs={ "gammas": (dataset.out19_dict if args.use_19 else dataset.out20_dict), "zero_period": (dataset.zero_period.delta if args.zero_spring else None) }, Pmn_func_kwargs={"pmn": dataset.pmn_matrix_relative}, De=args.De, Dq=args.Dq, c=args.c, q=args.q, beta=args.beta, rho=args.rho, deltaI=args.deltaI, deltaQ=args.deltaQ, gammaI=args.gammaI, gammaA=args.gammaH, gammaH=args.gammaH, theta=args.theta, nu=args.nu, phi=args.phi, ) use_dat_start = i * pil use_dat_end = min((i + 1) * pil, fit_data_all.shape[0]) score_kwarg = { "times": dataset.epi_times.delta[use_dat_start:use_dat_end], "mask": mask, "trueH": dataset.trueH[use_dat_start:use_dat_end], # "trueR": (dataset.trueD + dataset.trueR)[ # use_dat_start:use_dat_end] } models.append(model) score_kwargs.append(score_kwarg) # 搜索最优参数 if args.fit_method == "annealing": fit_kwargs = {"callback": utils.callback, "method": "annealing"} else: fit_kwargs = { "method": args.fit_method, "fig_dir": args.save_dir + "/", "njobs": -1, "NIND": args.geatpy_nind, "MAXGEN": args.geatpy_maxgen, "n_populations": args.geatpy_npop } last_y0 = None predHs = [] all_opts = [] for i, (model, score_kwarg) in enumerate(zip(models, score_kwargs)): # 被这次训练的时间整理出来 start_time = utils.CustomDate.from_delta( score_kwarg["times"].min(), dataset.t0.str) end_time = utils.CustomDate.from_delta(score_kwarg["times"].max(), dataset.t0.str) print("开始训练 %s<->%s" % (start_time.str, end_time.str)) # 第一次训练的模型和后面训练的模型使用不同的参数 # 之后训练的模型要使用前面模型的最后一天输出作为y0 if i == 0: model.no_fit_params([]) else: model.no_fit_params(["y0_hb[2:4]"]) model.set_y0(last_y0) # 得到训练参数,进行训练 dim, lb, ub = model.fit_params_range() opt_res = find_best(lambda x: score_func(x, model, score_kwarg), dim, lb, ub, **fit_kwargs) all_opts.append(opt_res) # 将得到的最优参数设置到模型中 model.set_params(opt_res["BestParam"]) # 预测结果 preds = model.predict(score_kwarg["times"]) predHs.append(preds[0]) # 预测结果中最后一天作为新的y0 last_y0 = np.concatenate(preds, axis=1)[-1, :] predHs = np.concatenate(predHs, axis=0) utils.save(all_opts, os.path.join(args.save_dir, "opt_res.pkl")) utils.save([m.kwargs for m in models], os.path.join(args.save_dir, "models.pkl")) # model.save(os.path.join(args.save_dir, "model.pkl")) # utils.save(opt_res, os.path.join(args.save_dir, "opt_res.pkl")) """ 计算相关指标以及绘制图像 """ # 预测R0 pass # 计算每个地区的曲线下面积以及面积差,并保存 # auc = under_area( # dataset.epi_times.delta, # dataset.trueH, # dataset.pred_times.delta, # nopr_preds[0], # ) # auc_df = pd.DataFrame( # auc.T, columns=["true_area", "pred_area", "diff_area"], # index=dataset.regions # ) # auc_df["population"] = dataset.populations # auc_df["diff_norm"] = auc_df.diff_area / auc_df.population # auc_df.sort_values("diff_norm", inplace=True) # 为每个地区绘制曲线图 plt.rcParams["font.sans-serif"] = ["SimHei"] img_dir = os.path.join(args.save_dir, "imgs") if not os.path.exists(img_dir): os.mkdir(img_dir) for i, reg in enumerate(dataset.regions): """ y0 = [H R E A I Sq Eq] + [S] """ plot_one_regions( reg, [ ("trueH", dataset.epi_times.ord.astype("int"), dataset.trueH[:, i], "ro"), # ("trueR", dataset.epi_times.ord.astype("int"), # dataset.trueR[:, i]+dataset.trueD[:, i], "bo"), # ("predH", dataset.pred_times.ord.astype("int"), # predHs[:, i], "r"), ("predH", dataset.epi_times.ord.astype("int"), predHs[:, i], "r"), # ("predR", dataset.pred_times.ord.astype("int"), # prot_preds[1][:, i], "b"), # ("predE", dataset.pred_times.ord.astype("int"), # prot_preds[3][:, i], "y"), # ("predA", dataset.pred_times.ord.astype("int"), # prot_preds[4][:, i], "g"), # ("predI", dataset.pred_times.ord.astype("int"), # prot_preds[4][:, i], "c"), ], [ ("trueH", dataset.epi_times.ord.astype("int"), dataset.trueH[:, i], "ro"), # ("trueR", dataset.epi_times.ord.astype("int"), # dataset.trueR[:, i]+dataset.trueD[:, i], "bo"), # ("predH", dataset.pred_times.ord.astype("int"), # predHs[:, i], "r"), ("predH", dataset.epi_times.ord.astype("int"), predHs[:, i], "r"), # ("predR", dataset.pred_times.ord.astype("int"), # nopr_preds[1][:, i], "b"), # ("predE", dataset.pred_times.ord.astype("int"), # nopr_preds[3][:, i], "y"), # ("predA", dataset.pred_times.ord.astype("int"), # nopr_preds[4][:, i], "g"), # ("predI", dataset.pred_times.ord.astype("int"), # nopr_preds[4][:, i], "c"), ], save_dir=img_dir) # # 保存结果 # for i, name in enumerate([ # "predH", "predR", "predE", "predA", "predI" # ]): # pd.DataFrame( # prot_preds[i], # columns=dataset.regions, # index=dataset.pred_times.str # ).to_csv( # os.path.join(args.save_dir, "protect_%s.csv" % name) # ) # pd.DataFrame( # nopr_preds[i], # columns=dataset.regions, # index=dataset.pred_times.str # ).to_csv( # os.path.join(args.save_dir, "noprotect_%s.csv" % name) # ) # auc_df.to_csv(os.path.join(args.save_dir, "auc.csv")) # # 这里保存的是原始数据 # for i, attr_name in enumerate(["trueD", "trueH", "trueR"]): # save_arr = getattr(dataset, attr_name) # pd.DataFrame( # save_arr, # columns=dataset.regions, # index=dataset.epi_times.str # ).to_csv(os.path.join(args.save_dir, "%s.csv" % attr_name)) # 保存args到路径中(所有事情都完成再保存数据,安全) save_args = deepcopy(args.__dict__) save_args["model_type"] = "NetSEAIRQ-piecewise" utils.save(save_args, os.path.join(args.save_dir, "args.json"), "json")
def main(): """ 命令行参数及其整理 """ parser = utils.MyArguments() parser.add_argument("--De", default=5.2, type=float, help="平均潜伏期") parser.add_argument("--Dq", default=14, type=float, help="平均隔离期") parser.add_argument("--c", default=13.0046, type=float, help="初始平均接触率") parser.add_argument("--q", default=0.0, type=float, help="初始隔离率") parser.add_argument( "--beta", default=2.03e-9, type=float, help="基础传染概率" ) parser.add_argument( "--theta", default=1.6003, type=float, help="无症状感染者传染概率系数" ) parser.add_argument( "--nu", default=1.5008, type=float, help="潜伏期传染概率系数" ) parser.add_argument( "--phi", default=0.9, type=float, help="有症状感染者收治率" ) parser.add_argument( "--gammaI", default=0.1029, type=float, help="有症状感染者自愈速率" ) parser.add_argument( "--gammaA", default=0.2978, type=float, help="无症状感染者自愈速度" ) parser.add_argument( "--gammaH", default=1/10.5, type=float, help="医院治愈速率" ) parser.add_argument( "--deltaI", default=1/3.5, type=float, help="出现症状患者被收治的速率" ) parser.add_argument( "--deltaQ", default=0.1259, type=float, help="隔离的潜伏者出现症状(及时被收治)的速率" ) parser.add_argument( "--rho", default=0.6834, type=float, help="有症状感染者占所有感染者的比例" ) parser.add_argument("--protect_ck", default=0.0, type=float) parser.add_argument("--protect_qk", default=0.0, type=float) parser.add_argument("--use_19", action="store_true") parser.add_argument("--zero_spring", action="store_true") args = parser.parse_args() # 对于一些通用的参数,这里已经进行整理了 """ 读取准备好的数据 """ dat_file = "./DATA/Provinces.pkl" dataset = utils.Dataset(dat_file, args.t0, args.tm, args.fit_time_start) """ 构建、或读取、或训练模型 """ # 根据不同的情况来得到合适的模型 if args.model is not None and args.model != "fit": model = NetSEAIRQ.load(args.model) else: model = NetSEAIRQ( populations=dataset.populations, y0for1=np.array([0, 0, 0, 0, args.y0, 0, 0]), protect=True, score_type=args.fit_score, protect_args={ "t0": dataset.protect_t0.delta, "c_k": args.protect_ck, "q_k": args.protect_qk }, gamma_func_kwargs={ "gammas": (dataset.out19_dict if args.use_19 else dataset.out20_dict), "zero_period": (dataset.zero_period.delta if args.zero_spring else None) }, Pmn_func_kwargs={"pmn": dataset.pmn_matrix_relative}, De=args.De, Dq=args.Dq, c=args.c, q=args.q, beta=args.beta, rho=args.rho, deltaI=args.deltaI, deltaQ=args.deltaQ, gammaI=args.gammaI, gammaA=args.gammaH, gammaH=args.gammaH, theta=args.theta, nu=args.nu, phi=args.phi, ) if args.model == "fit": # 设置我们拟合模型需要的数据 if args.use_whhb: mask = None else: mask = np.full(dataset.num_regions, True, dtype=np.bool) mask[0] = False fit_start_index = (dataset.fit_start_t.ord - dataset.epi_t0.ord) fit_start_index = int(fit_start_index) score_kwargs = { "times": dataset.epi_times.delta[fit_start_index:], "mask": mask, } score_kwargs["trueH"] = dataset.trueH # score_kwargs["trueR"] = dataset.trueD + dataset.trueR # 搜索 if args.fit_method == "annealing": fit_kwargs = { "callback": utils.callback, "method": "annealing" } else: fit_kwargs = { "method": args.fit_method, "fig_dir": args.save_dir+"/", "njobs": -1, "NIND": args.geatpy_nind, "MAXGEN": args.geatpy_maxgen, "n_populations": args.geatpy_npop } dim, lb, ub = model.fit_params_range() opt_res = find_best( lambda x: score_func(x, model, score_kwargs), dim, lb, ub, **fit_kwargs ) # 把拟合得到的参数整理成dataframe,然后保存 temp_d, temp_i = {}, 0 for i, (k, vs) in enumerate(model.fit_params_info.items()): params_k = opt_res["BestParam"][temp_i:(temp_i+vs[0])] for j, v in enumerate(params_k): temp_d[k+str(j)] = v temp_i += vs[0] pd.Series(temp_d).to_csv( os.path.join(args.save_dir, "params.csv") ) # 将得到的最优参数设置到模型中,并保存 model.set_params(opt_res["BestParam"]) model.save(os.path.join(args.save_dir, "model.pkl")) utils.save(opt_res, os.path.join(args.save_dir, "opt_res.pkl")) # 预测结果 prot_preds = model.predict(dataset.pred_times.delta) model.protect = False nopr_preds = model.predict(dataset.pred_times.delta) """ 计算相关指标以及绘制图像 """ # 预测R0 pass # 计算每个地区的曲线下面积以及面积差,并保存 auc = under_area( dataset.epi_times.delta, dataset.trueH, dataset.pred_times.delta, nopr_preds[0], ) auc_df = pd.DataFrame( auc.T, columns=["true_area", "pred_area", "diff_area"], index=dataset.regions ) auc_df["population"] = dataset.populations auc_df["diff_norm"] = auc_df.diff_area / auc_df.population auc_df.sort_values("diff_norm", inplace=True) # 为每个地区绘制曲线图 plt.rcParams["font.sans-serif"] = ["SimHei"] img_dir = os.path.join(args.save_dir, "imgs") if not os.path.exists(img_dir): os.mkdir(img_dir) for i, reg in enumerate(dataset.regions): """ y0 = [H R E A I Sq Eq] + [S] """ plot_one_regions( reg, [ ("trueH", dataset.epi_times.ord.astype("int"), dataset.trueH[:, i], "ro"), # ("trueR", dataset.epi_times.ord.astype("int"), # dataset.trueR[:, i]+dataset.trueD[:, i], "bo"), ("predH", dataset.pred_times.ord.astype("int"), prot_preds[0][:, i], "r"), # ("predR", dataset.pred_times.ord.astype("int"), # prot_preds[1][:, i], "b"), # ("predE", dataset.pred_times.ord.astype("int"), # prot_preds[3][:, i], "y"), # ("predA", dataset.pred_times.ord.astype("int"), # prot_preds[4][:, i], "g"), # ("predI", dataset.pred_times.ord.astype("int"), # prot_preds[4][:, i], "c"), ], [ ("trueH", dataset.epi_times.ord.astype("int"), dataset.trueH[:, i], "ro"), # ("trueR", dataset.epi_times.ord.astype("int"), # dataset.trueR[:, i]+dataset.trueD[:, i], "bo"), ("predH", dataset.pred_times.ord.astype("int"), nopr_preds[0][:, i], "r"), # ("predR", dataset.pred_times.ord.astype("int"), # nopr_preds[1][:, i], "b"), # ("predE", dataset.pred_times.ord.astype("int"), # nopr_preds[3][:, i], "y"), # ("predA", dataset.pred_times.ord.astype("int"), # nopr_preds[4][:, i], "g"), # ("predI", dataset.pred_times.ord.astype("int"), # nopr_preds[4][:, i], "c"), ], save_dir=img_dir ) # 保存结果 for i, name in enumerate([ "predH", "predR", "predE", "predA", "predI" ]): pd.DataFrame( prot_preds[i], columns=dataset.regions, index=dataset.pred_times.str ).to_csv( os.path.join(args.save_dir, "protect_%s.csv" % name) ) pd.DataFrame( nopr_preds[i], columns=dataset.regions, index=dataset.pred_times.str ).to_csv( os.path.join(args.save_dir, "noprotect_%s.csv" % name) ) auc_df.to_csv(os.path.join(args.save_dir, "auc.csv")) # 这里保存的是原始数据 for i, attr_name in enumerate(["trueD", "trueH", "trueR"]): save_arr = getattr(dataset, attr_name) pd.DataFrame( save_arr, columns=dataset.regions, index=dataset.epi_times.str ).to_csv(os.path.join(args.save_dir, "%s.csv" % attr_name)) # 保存args到路径中(所有事情都完成再保存数据,安全) save_args = deepcopy(args.__dict__) save_args["model_type"] = "NetSEAIRQ" utils.save(save_args, os.path.join(args.save_dir, "args.json"), "json")
def main(): """ 命令行参数及其整理 """ parser = utils.MyArguments() parser.add_argument("--De", default=5.2, type=float) parser.add_argument("--Di", default=11.5, type=float) parser.add_argument("--alpha_E", default=0.0, type=float) parser.add_argument("--alpha_I", default=0.4, type=float) parser.add_argument("--protect_k", default=0.0, type=float) parser.add_argument("--use_19", action="store_true") parser.add_argument("--zero_spring", action="store_true") args = parser.parse_args() # 对于一些通用的参数,这里已经进行整理了 """ 读取准备好的数据 """ # if args.region_type == "city": # dat_file = "./DATA/City.pkl" # else: dat_file = "./DATA/Provinces.pkl" dataset = utils.Dataset(dat_file, args.t0, args.tm, args.fit_time_start) """ 构建、或读取、或训练模型 """ # 根据不同的情况来得到合适的模型 if args.model is not None: model = NetSEIR.load(args.model) else: model = NetSEIR( De=args.De, Di=args.Di, populations=dataset.populations, y0for1=args.y0, alpha_I=args.alpha_I, alpha_E=args.alpha_E, protect=True, score_type=args.fit_score, protect_args={ "t0": dataset.protect_t0.delta, "k": args.protect_k }, gamma_func_kwargs={ "gammas": (dataset.out19_dict if args.use_19 else dataset.out20_dict), "zero_period": (dataset.zero_period.delta if args.zero_spring else None) }, Pmn_func_kwargs={"pmn": dataset.pmn_matrix_relative}) if args.fit: # 设置我们拟合模型需要的数据 if args.use_whhb: mask = None else: mask = np.full(dataset.num_regions, True, dtype=np.bool) mask[0] = False fit_start_index = (dataset.fit_start_t.ord - dataset.epi_t0.ord) fit_start_index = int(fit_start_index) score_kwargs = { "times": dataset.epi_times.delta[fit_start_index:], "true_infects": dataset.trueH[fit_start_index:, :], "mask": mask, } # 搜索 if args.fit_method == "annealing": fit_kwargs = {"callback": utils.callback, "method": "annealing"} else: fit_kwargs = { "method": "SEGA", "fig_dir": args.save_dir + "/", "njobs": -1, "NIND": args.geatpy_nind, "MAXGEN": args.geatpy_maxgen, "n_populations": args.geatpy_npop } dim, lb, ub = model.fit_params_range() opt_res = find_best(lambda x: score_func(x, model, score_kwargs), dim, lb, ub, **fit_kwargs) # 把拟合得到的参数整理成dataframe,然后保存 temp_d, temp_i = {}, 0 for i, (k, vs) in enumerate(model.fit_params_info.items()): params_k = opt_res["BestParam"][temp_i:(temp_i + vs[0])] for j, v in enumerate(params_k): temp_d[k + str(j)] = v temp_i += vs[0] pd.Series(temp_d).to_csv(os.path.join(args.save_dir, "params.csv")) # 将得到的最优参数设置到模型中,并保存 model.set_params(opt_res["BestParam"]) model.save(os.path.join(args.save_dir, "model.pkl")) utils.save(opt_res, os.path.join(args.save_dir, "opt_res.pkl")) # 预测结果 prot_preds = model.predict(dataset.pred_times.delta) model.protect = False nopr_preds = model.predict(dataset.pred_times.delta) """ 计算相关指标以及绘制图像 """ # 预测R0 pass # 计算每个地区的曲线下面积以及面积差,并保存 auc = under_area( dataset.epi_times.delta, dataset.trueH, dataset.pred_times.delta, nopr_preds[2], ) auc_df = pd.DataFrame(auc.T, columns=["true_area", "pred_area", "diff_area"], index=dataset.regions) auc_df["population"] = dataset.populations auc_df["diff_norm"] = auc_df.diff_area / auc_df.population auc_df.sort_values("diff_norm", inplace=True) # utils.save(auc, os.path.join(args.save_dir, "auc.pkl")) # 为每个地区绘制曲线图 plt.rcParams["font.sans-serif"] = ["SimHei"] img_dir = os.path.join(args.save_dir, "imgs") if not os.path.exists(img_dir): os.mkdir(img_dir) for i, reg in enumerate(dataset.regions): plot_one_regions(reg, [("true", dataset.epi_times.ord.astype("int"), dataset.trueH[:, i], "ro"), ("predI", dataset.pred_times.ord.astype("int"), prot_preds[2][:, i], "r"), ("predE", dataset.pred_times.ord.astype("int"), prot_preds[1][:, i], "y"), ("predR", dataset.pred_times.ord.astype("int"), prot_preds[3][:, i], "b")], [("true", dataset.epi_times.ord.astype("int"), dataset.trueH[:, i], "ro"), ("predI", dataset.pred_times.ord.astype("int"), nopr_preds[2][:, i], "r"), ("predE", dataset.pred_times.ord.astype("int"), nopr_preds[1][:, i], "y"), ("predR", dataset.pred_times.ord.astype("int"), nopr_preds[3][:, i], "b")], save_dir=img_dir) # 保存结果 for i, name in enumerate(["predS", "predE", "predI", "predR"]): pd.DataFrame(prot_preds[i], columns=dataset.regions, index=dataset.pred_times.str).to_csv( os.path.join(args.save_dir, "protect_%s.csv" % name)) pd.DataFrame(nopr_preds[i], columns=dataset.regions, index=dataset.pred_times.str).to_csv( os.path.join(args.save_dir, "noprotect_%s.csv" % name)) auc_df.to_csv(os.path.join(args.save_dir, "auc.csv")) # 这里保存的是原始数据 for i, attr_name in enumerate(["trueD", "trueH", "trueR"]): save_arr = getattr(dataset, attr_name) pd.DataFrame(save_arr, columns=dataset.regions, index=dataset.epi_times.str).to_csv( os.path.join(args.save_dir, "%s.csv" % attr_name)) # 保存args到路径中(所有事情都完成再保存数据,安全) save_args = deepcopy(args.__dict__) save_args["model_type"] = "NetSEIR" utils.save(save_args, os.path.join(args.save_dir, "args.json"), "json")