def calculate(time_s, time_e): try: tasks = pre.generate_tasks(time_s, time_e, freq="w", processes=7, conn=engine_rd) tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)} print(time_e, len(tasks)) except ValueError as e: print(time_e, e) for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]): print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format(ut=time_e, sd=statistic_date, l=len(ids_used))) result_return = [] result_risk = [] result_sub = [] data = pre.ProcessedData(statistic_date, list(ids_used), _freq) bms = {index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items()} tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_return, cols_return_sorted = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_risk, cols_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub, cols_sub_sorted = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_return.extend(res_return) result_risk.extend(res_risk) result_sub.extend(res_sub) df_return = pd.DataFrame(result_return) df_risk = pd.DataFrame(result_risk) df_sub = pd.DataFrame(result_sub) cols_return = cal.format_cols( cols_return_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) cols_risk = cal.format_cols( cols_risk_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) cols_sub = cal.format_cols( cols_sub_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) df_return.columns = cols_return df_risk.columns = cols_risk df_sub.columns = cols_sub try: io.to_sql("fund_weekly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("fund_weekly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("fund_subsidiary_weekly_index", conn=engine_rd, dataframe=df_sub, chunksize=5000) except Exception as e: time.sleep(10) io.to_sql("fund_weekly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("fund_weekly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("fund_subsidiary_weekly_index", conn=engine_rd, dataframe=df_sub, chunksize=5000) print("TASK DONE: {ut}".format(ut=time_e))
def cal_by_date(statistic_date, fund_ids): """ :param statistic_date: datetime.date :param fund_ids: list :return: """ print("STATISTIC_DATE:{sd}, LENGTH:{l}".format(sd=statistic_date, l=len(fund_ids))) result_1 = [] result_2 = [] result_3 = [] data = pre.ProcessedData(statistic_date, fund_ids, _freq, pe=[], conn=engine_rd, conn_mkt=engine_mkt, weekday=True) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_1, _funcs_1_sourted = cal.calculate(_funcs_1, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_2, _funcs_2_sorted = cal.calculate(_funcs_2, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_3, _funcs_3_sorted = cal.calculate(_funcs_3, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_1.extend(res_1) result_2.extend(res_2) result_3.extend(res_3) df_1 = pd.DataFrame(result_1) df_2 = pd.DataFrame(result_2) df_3 = pd.DataFrame(result_3) # 删除空行 df_1.dropna(how='all', inplace=True) df_2.dropna(how='all', inplace=True) df_3.dropna(how='all', inplace=True) cols_1 = cal.format_cols_mutual( _funcs_1_sourted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) cols_2 = cal.format_cols_mutual( _funcs_2_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) cols_3 = cal.format_cols_mutual( _funcs_3_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) if len(df_1): df_1.columns = cols_1 io.to_sql("fund_daily_return", conn=engine_wt, dataframe=df_1, chunksize=500) if len(df_2): df_2.columns = cols_2 io.to_sql("fund_daily_risk", conn=engine_wt, dataframe=df_2, chunksize=500) if len(df_3): df_3.columns = cols_3 io.to_sql("fund_daily_subsidiary", conn=engine_wt, dataframe=df_3, chunksize=500)
"mdd_time", "skewness", "kurtosis", "ERVaR" ] cols_return = cal.format_cols(_funcs_return, _freq, prefix=["index_id", "index_name", "statistic_date"]) cols_risk = cal.format_cols(_funcs_risk, _freq, prefix=["index_id", "index_name", "statistic_date"]) cols_sub = cal.format_cols(_funcs_sub, _freq, prefix=["index_id", "index_name", "statistic_date"]) date_rng = pd.date_range(today - relativedelta(weeks=1), today) for statistic_date in date_rng: result_return = [] result_risk = [] result_sub = [] ids_used = pre.fetch_fids_used(statistic_date=statistic_date, freq=_freq, conn=engine_rd) data = pre.ProcessedData(statistic_date, [], _freq) bms = {index_id: cal.Benchmark(attr_dict, index_id) for index_id, attr_dict in data.index.items()} tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for bm_name, bm in bms.items(): if bm.id == "y1_treasury_rate": continue res_return = cal.calculate(_funcs_return, _intervals, None, _freq, statistic_date, bm, None, tbond) res_risk = cal.calculate(_funcs_risk, _intervals, None, _freq, statistic_date, bm, None, tbond) res_sub = cal.calculate(_funcs_sub, _intervals, None, _freq, statistic_date, bm, None, tbond) result_return.extend(res_return) result_risk.extend(res_risk) result_sub.extend(res_sub)
tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)} print(update_time, len(tasks)) except ValueError as e: print(update_time, e) continue for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]): if statistic_date != dt.date(2017, 9, 21): continue print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format( ut=update_time, sd=statistic_date, l=len(ids_used))) result_return = [] result_risk = [] result_sub = [] data = pre.ProcessedData(statistic_date, list(ids_used), _freq, pe=[], conn=engine_rd, conn_mkt=engine_mkt) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): if fid != "000127": continue fund = cal.Fund(attrs) res_return, _funcs_return_sorted = cal.calculate( _funcs_return, _intervals, _bms_used, _freq,
_bms_used = ["hs300", "csi500", "sse50", "cbi", "strategy", "FI01", "nfi"] for year in range(2015, 2016): for month in range(1, 2): month_range = cld.monthrange(year, month)[1] for day in range(19, month_range + 1): result_risk2 = [] result_sub2 = [] result_sub3 = [] statistic_date = dt.date(year, month, day) ids_used = pre.fetch_fids_used(statistic_date=statistic_date, freq=_freq, conn=engine_rd) print(statistic_date, len(ids_used)) data = pre.ProcessedData(statistic_date, ids_used, _freq) bms = {index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items()} tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_risk2, funcs_risk2_sorted = cal.calculate(_funcs_risk2, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub2, funcs_sub2_sorted = cal.calculate(_funcs_sub2, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub3, funcs_sub3_sorted = cal.calculate(_funcs_sub3, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_risk2.extend(res_risk2) result_sub2.extend(res_sub2) result_sub3.extend(res_sub3) df_risk2 = pd.DataFrame(result_risk2)
k: v for k, v in tasks.items() if (k >= dt.date(2015, 1, 1) and k < dt.date(UPDATE_TIME.year, UPDATE_TIME.month, 1)) } print(UPDATE_TIME, len(tasks)) except ValueError as e: print(UPDATE_TIME, e) for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]): print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format( ut=UPDATE_TIME, sd=statistic_date, l=len(ids_used))) result_return = [] result_risk = [] result_sub = [] data = pre.ProcessedData(statistic_date, list(ids_used), _freq) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_return = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond) res_risk = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond) res_sub = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond) result_return.extend(res_return) result_risk.extend(res_risk)
def calculate(statistic_date_with_ids_used): """ Args: tasks<date: {oid1, oid2, ..., }>: Returns: """ statistic_date, ids_used = statistic_date_with_ids_used data = pre.ProcessedData(statistic_date, sorted(ids_used), _freq) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) result_return = [] result_risk = [] result_sub = [] res_return, cols_return_sorted = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_risk, cols_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub, cols_sub_sorted = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_return.extend(res_return) result_risk.extend(res_risk) result_sub.extend(res_sub) df_return = pd.DataFrame(result_return) df_risk = pd.DataFrame(result_risk) df_sub = pd.DataFrame(result_sub) cols_return = cal.format_cols_org4r( cols_return_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) cols_risk = cal.format_cols_org4r( cols_risk_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) cols_sub = cal.format_cols_org4r( cols_sub_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) df_return.columns = cols_return df_risk.columns = cols_risk df_sub.columns = cols_sub df_return["index_id"] = sf.SQL.Org4R.INDEXID df_risk["index_id"] = sf.SQL.Org4R.INDEXID df_sub["index_id"] = sf.SQL.Org4R.INDEXID io.to_sql("org_monthly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("org_monthly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("org_monthly_research", conn=engine_rd, dataframe=df_sub, chunksize=5000)
def calculate(statistic_date): print(statistic_date) ids_used = pre.fetch_fids_used(statistic_date=statistic_date, freq=_freq, conn=engine_rd) # TMP EXEC # tmp = set(['P1001447', 'P1008404', 'P1003197', 'P1000902', 'P1004813', 'P1004746', 'P1001198', # 'P1001203', 'P1004150', 'P1014451', 'P1008451', 'P1008160', 'P1000277', 'P1028421']) # ids_used = list(set(ids_used).intersection(tmp)) data = pre.ProcessedData(statistic_date, ids_used, _freq) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_return, cols_return_sorted = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_risk, cols_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub, cols_sub_sorted = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_return = [] result_risk = [] result_sub = [] result_return.extend(res_return) result_risk.extend(res_risk) result_sub.extend(res_sub) df_return = pd.DataFrame(result_return) df_risk = pd.DataFrame(result_risk) df_sub = pd.DataFrame(result_sub) cols_return = cal.format_cols_org4r( cols_return_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) cols_risk = cal.format_cols_org4r( cols_risk_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) cols_sub = cal.format_cols_org4r( cols_sub_sorted, _freq, prefix=["org_id", "org_name", "statistic_date", "benchmark"]) df_return.columns = cols_return df_risk.columns = cols_risk df_sub.columns = cols_sub df_return["index_id"] = sf.SQL.Org4R.INDEXID df_risk["index_id"] = sf.SQL.Org4R.INDEXID df_sub["index_id"] = sf.SQL.Org4R.INDEXID io.to_sql("org_monthly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("org_monthly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("org_monthly_research", conn=engine_rd, dataframe=df_sub, chunksize=5000)
# print(update_time, len(tasks)) # # except ValueError as e: # print(update_time, e) # continue # for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]): statistic_date = dt.date(2017, 8, 25) ids_used = ["JR000001"] result_return = [] result_risk = [] result_sub = [] data = pre.ProcessedData(statistic_date, list(ids_used), _freq, pe=[], weekday=True) bms = { index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items() } tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_return = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond) res_risk = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond) res_sub = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq,