def calculate(time_s, time_e): try: tasks = pre.generate_tasks(time_s, time_e, freq="w", processes=7, conn=engine_rd) tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)} print(time_e, len(tasks)) except ValueError as e: print(time_e, e) for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]): print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format(ut=time_e, sd=statistic_date, l=len(ids_used))) result_return = [] result_risk = [] result_sub = [] data = pre.ProcessedData(statistic_date, list(ids_used), _freq) bms = {index_name: cal.Benchmark(attr_dict, index_name) for index_name, attr_dict in data.index.items()} tbond = cal.Tbond(data.index["y1_treasury_rate"], "y1_treasury_rate") for fid, attrs in data.funds.items(): fund = cal.Fund(attrs) res_return, cols_return_sorted = cal.calculate(_funcs_return, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_risk, cols_risk_sorted = cal.calculate(_funcs_risk, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) res_sub, cols_sub_sorted = cal.calculate(_funcs_sub, _intervals, _bms_used, _freq, statistic_date, fund, bms, tbond, with_func_names=True) result_return.extend(res_return) result_risk.extend(res_risk) result_sub.extend(res_sub) df_return = pd.DataFrame(result_return) df_risk = pd.DataFrame(result_risk) df_sub = pd.DataFrame(result_sub) cols_return = cal.format_cols( cols_return_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) cols_risk = cal.format_cols( cols_risk_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) cols_sub = cal.format_cols( cols_sub_sorted, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"] ) df_return.columns = cols_return df_risk.columns = cols_risk df_sub.columns = cols_sub try: io.to_sql("fund_weekly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("fund_weekly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("fund_subsidiary_weekly_index", conn=engine_rd, dataframe=df_sub, chunksize=5000) except Exception as e: time.sleep(10) io.to_sql("fund_weekly_return", conn=engine_rd, dataframe=df_return, chunksize=5000) io.to_sql("fund_weekly_risk", conn=engine_rd, dataframe=df_risk, chunksize=5000) io.to_sql("fund_subsidiary_weekly_index", conn=engine_rd, dataframe=df_sub, chunksize=5000) print("TASK DONE: {ut}".format(ut=time_e))
def generate_tasks(update_time_l, update_time_r): try: tasks = pre.generate_tasks(update_time_l, update_time_r, freq="w", processes=7, conn=engine_rd) tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)} print("{l}->{r}".format(l=update_time_l, r=update_time_r), len(tasks)) except ValueError as e: print("{l}->{r}".format(l=update_time_l, r=update_time_r), e) return tasks
"ability_security", "tracking_error_a", "p_earning_periods", "n_earning_periods", "min_return", "max_return", "skewness", "kurtosis", ] _bms_used = ["hs300", "csi500", "sse50", "cbi", "nfi"] for update_time in [UPDATE_TIME]: try: tasks = pre.generate_tasks(update_time - relativedelta(hours=35, minutes=5), update_time, freq=_freq, processes=7, conn=engine_rd) tasks = {k: v for k, v in tasks.items() if k >= dt.date(2015, 1, 1)} print(update_time, len(tasks)) except ValueError as e: print(update_time, e) continue for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]): if statistic_date != dt.date(2017, 9, 21): continue print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format( ut=update_time, sd=statistic_date, l=len(ids_used))) result_return = [] result_risk = [] result_sub = []
prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) cols_risk = cal.format_cols( _funcs_risk, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) cols_sub = cal.format_cols( _funcs_sub, _freq, prefix=["fund_id", "fund_name", "statistic_date", "benchmark"]) _bms_used = ["hs300", "csi500", "sse50", "cbi", "strategy", "FI01", "nfi"] try: tasks = pre.generate_tasks(last_month - dt.timedelta(last_month.day - 1), last_month + dt.timedelta(0, 86399), freq=_freq, processes=7, conn=engine_rd) tasks = { k: v for k, v in tasks.items() if (k >= dt.date(2015, 1, 1) and k < dt.date(UPDATE_TIME.year, UPDATE_TIME.month, 1)) } print(UPDATE_TIME, len(tasks)) except ValueError as e: print(UPDATE_TIME, e) for statistic_date, ids_used in sorted(tasks.items(), key=lambda x: x[0]): print("UPDATE TIME:{ut}: STATISTIC DATE:{sd}, LENGTH:{l}".format( ut=UPDATE_TIME, sd=statistic_date, l=len(ids_used)))