def is_daily(ids_to_update): sql_fnds = "SELECT fund_id, statistic_date FROM fund_nv_data_standard WHERE statistic_date >= '19700101' \ AND fund_id IN {ids}".format(ids=sf.SQL.ids4sql(ids_to_update)) su.tic("Fetching get_data......") fnds = pd.read_sql(sql_fnds, engine_rd) fnds = fnds.dropna().sort_values(by=["fund_id", "statistic_date"], ascending=[True, False]) fnds.index = range(len(fnds)) fnds["statistic_date"] = fnds["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) su.tic("Calculating......") ids4slice = su.idx4slice(fnds, slice_by="fund_id") ids = fnds.drop_duplicates(subset=["fund_id"])["fund_id"].tolist() t_reals = su.slice(fnds, ids4slice, "statistic_date") match_ratio = [ (len(t_real) - 1) / ((t_real[0] - t_real[-1]) / 86400) if len(t_real) > 1 else None for t_real in t_reals ] matched = [ "日度" if (x is not None and x >= 0.5) else None for x in match_ratio ] result = dict([x for x in list(zip(ids, matched)) if x[1] is not None]) return result
def merge(ddls, cmts): su.tic("merging results...") for k, v in ddls.items(): v["name_sc"] = v["name"].apply(lambda x: get_dictvalue(cmts, x, 0)) v["comments"] = v["name"].apply(lambda x: get_dictvalue(cmts, x, 1)) ddls[k] = v[["name", "name_sc", "type", "default", "nullable", "comments", "is_pk"]] return ddls
def main(): server_name, db_name, file_name = get_source() engine_read = cfg.load_engine()[server_name] ddls = get_ddl(db_name, engine_read) cmts = get_cmt() dfs = merge(ddls, cmts) export(dfs, file_name=file_name) su.tic("done...")
def fetch_data(sqls, conn): su.tic("fetching new get_data...") dfs = {} for tb_name, sql in sqls.items(): su.tic("fetching get_data of `{tb_name}`...".format(tb_name=tb_name)) dfs[tb_name] = pd.read_sql(sql, conn) dfs["m"] = [dfs[_tb["re_m"]], dfs[_tb["ri_m"]], dfs[_tb["sub_m"]]] return dfs
def delete_old(data, tb_name, conn): su.tic("delete old get_data...") ids4delete = sf.SQL.ids4sql(data["fund_id"].tolist()) sql_delete = "DELETE FROM {tb_name} WHERE fund_id IN {ids}".format( tb_name=tb_name, ids=ids4delete ) conn.execute(sql_delete)
def merge_result(dfs, how="inner"): su.tic("merge new get_data...") d = dfs[0] for i in range(1, len(dfs)): d = pd.merge(d, dfs[i], how=how, on=["fund_id", "statistic_date", "benchmark"], suffixes=["", "_dup"]) return d
def export(df_dict, file_name="ddls", path=su.get_desktop_path()): su.tic("exporting...") file_path = os.path.join(path, file_name) if ".xlsx" not in file_path.lower(): file_path += ".xlsx" tmp = pd.DataFrame() tmp.to_excel(file_path, index=False) dict_items = sorted(df_dict.items(), key=lambda d: d[0], reverse=False) # for k, v in df_dict.items(): for k, v in dict_items: book = load_workbook("{path}".format(path=file_path)) writer = pd.ExcelWriter("{path}".format(path=file_path), engine='openpyxl') writer.book = book writer.sheets = dict((ws.title, ws) for ws in book.worksheets) v = v.fillna("") v = v.astype(str) v.to_excel(writer, "{tb_name}".format(tb_name=k, index=False), index=False) writer.save()
def get_cmt(): su.tic("fetching comments...") dfs = pd.read_excel("D:\Work\Others\项目\国泰君安项目\国泰君安项目表结构文件\私募产品数据库表结构(基础表)4.0版本.xls", sheetname=None, header=2) cmt_dict = {} for k, v in dfs.items(): v = v.drop(v.columns[6:], axis=1) v.columns = ["name", "name_sc", "type", "is_pk", "default", "comment"] dfs[k] = v tmp = dict(zip(v["name"], list(zip(v["name_sc"], v["comment"])))) cmt_dict.update(tmp) cmt_dict["year_persistence"] = cmt_dict["year_persietance"] cmt_dict["total_persistence"] = cmt_dict["total_persietance"] cmt_dict["m3_persistence"] = cmt_dict["m3_persietance"] cmt_dict["m6_persistence"] = cmt_dict["m6_persietance"] cmt_dict["y1_persistence"] = cmt_dict["y1_persietance"] cmt_dict["y2_persistence"] = cmt_dict["y2_persietance"] cmt_dict["y3_persistence"] = cmt_dict["y3_persietance"] cmt_dict["y5_persistence"] = cmt_dict["y5_persietance"] return cmt_dict
def is_monthly(ids_to_update): su.tic("Fetching get_data......") sql_std_m = "SELECT fund_id, statistic_date_std, statistic_date FROM fund_nv_standard_m \ WHERE fund_id IN {ids} \ AND fund_id IN (SELECT fund_id FROM (SELECT fund_id, COUNT(fund_id) cnt FROM fund_nv_standard_m \ GROUP BY fund_id HAVING cnt >= 3) T)".format( ids=sf.SQL.ids4sql(ids_to_update)) d_std_m = pd.read_sql(sql_std_m, engine_rd) ids4slice = su.idx4slice(d_std_m, slice_by="fund_id") ids = d_std_m.drop_duplicates(subset=["fund_id"])["fund_id"].tolist() t_reals = su.slice(d_std_m, ids4slice, "statistic_date") t_stds = su.slice(d_std_m, ids4slice, "statistic_date_std") match_ratio = [ len([x for x in t_real if x is not None]) / len(t_std) for t_real, t_std in zip(t_reals, t_stds) ] matched = [ "月度" if (x is not None and x >= 0.5) else None for x in match_ratio ] result = dict([x for x in list(zip(ids, matched)) if x[1] is not None]) return result
def get_ddl(schema, engine): su.tic("fetching ddl...") insp = reflection.Inspector.from_engine(engine) tb_names = insp.get_table_names(schema) cols = {tb: [] for tb in tb_names} pks = {tb: [] for tb in tb_names} for tb_name in tb_names: cols[tb_name] = insp.get_columns(table_name=tb_name, schema=schema) pks[tb_name] = insp.get_primary_keys(table_name=tb_name, schema=schema) for k, v in cols.items(): tb = pd.DataFrame.from_dict(v) tb = tb[["name", "type", "nullable", "default"]] cols[k] = tb for k, v in pks.items(): cols[k]["is_pk"] = cols[k]["name"].apply(lambda x: x in v) cols[k]["is_pk"] = cols[k]["is_pk"].astype(bool) # cols[k]["is_pk"] = cols[k]["is_pk"].apply(lambda x: int(x)) # cols[k]["is_pk"] = cols[k]["is_pk"].astype(int) cols[k].loc[cols[k]["is_pk"] == False, "is_pk"] = None return cols
def main(): total_fund_ids = get_fund_ids() #一次计算100个基金 STEP = 100 sliced = [ total_fund_ids[i:i + STEP] for i in range(0, len(total_fund_ids), STEP) ] index = 1 for fund_ids in sliced: sql_fund_ids = ",".join(["'{}'".format(x) for x in fund_ids]) result = cal_std(sql_fund_ids, True) su.tic("Process:{}/{}".format(index, len(sliced))) index += 1 su.tic("Data to DB...month..{}.".format(sql_fund_ids)) conn.execute( "DELETE FROM fund_nv_standard_m where fund_id in ({})".format( sql_fund_ids)) result["m"].to_sql("fund_nv_standard_m", conn, if_exists="append", index=False, chunksize=10000) su.tic("Data to DB...week..{}.".format(','.join(fund_ids))) conn.execute( "DELETE FROM fund_nv_standard_w where fund_id in ({})".format( sql_fund_ids)) result["w"].to_sql("fund_nv_standard_w", conn, if_exists="append", index=False, chunksize=10000)
def main(): tb_w = sf.Table("w", "indicator") tb_m = sf.Table("m", "indicator") engines = cfg.load_engine() engine_read = engines["2Gb"] engine_write = engines["2Gb"] conn_read = engine_read.connect() conn_write = engine_write.connect() tb_names = [ _tb["re_w"], _tb["ri_w"], _tb["sub_w"], _tb["re_m"], _tb["ri_m"], _tb["sub_m"] ] sqls = gen_sql_indicator_latest(tb_names) dfs = fetch_data(sqls, conn_read) dw = merge_result(dfs["w"], how="inner") dm = merge_result(dfs["m"], how="inner") refresh(dw, tb_w, conn_write) refresh(dm, tb_m, conn_write) if __name__ == "__main__": su.tic("fund_freq_indicator...") main() su.tic("Done...")
result = [] for mday in range(3, 4): print("Day {0}: {1}".format(mday, dt.datetime.now())) conn = engine_rd.connect() date_s = sf.Time(dt.datetime(year, month, mday) - dt.timedelta(mday)) # Generate statistic_date sql_ids_updated = sf.SQL.ids_updated_sd(date_s.today, freq="m") ids_used = [x[0] for x in engine_rd.execute(sql_ids_updated).fetchall()] print("length of ids: {0}".format(len(ids_used))) if len(ids_used) == 0: continue # Check whether there are updated get_data this day tic("Getting Data") sql_fdate = sf.SQL.foundation_date(ids_used) # Get their foundation date fdate = pd.read_sql(sql_fdate, engine_rd) ids_fdate = fdate["fund_id"].tolist() t_min = fdate if len(ids_fdate) != len( ids_used): # Check whether all fund has foundation date get_data. If not, use first nv get_data instead. ids_diff = list(set(ids_used) - set(ids_fdate)) sql_fnvdate = sf.SQL.firstnv_date(ids_diff) fnvdate = pd.read_sql(sql_fnvdate, engine_rd) ids_diff = fnvdate["fund_id"].tolist() t_min = pd.merge(fdate, fnvdate, "outer") print("Some foundation_date missed, use first nv date...", len(ids_diff), len(ids_fdate), len(t_min)) t_min = t_min.sort_values("fund_id", ascending=True) # Sort the df by fund id ASC
def calculate(idx): dfs = pd.DataFrame() PEIndex = sf.PEIndex(idx) first_year = PEIndex.firstyear result_r = {} components_num = {} for year in range(first_year, process_date.year + 1): if year == process_date.timetuple().tm_year: month = process_date.month else: month = 12 sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m conn = engine_rd.connect() date_s = dt.date(year, month, 1) - dt.timedelta(1) su.tic("Getting Data") d = pd.read_sql(sql_i, conn) conn.close() su.tic("Preprocessing...") d["statistic_date"] = d["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) d_dd = d.drop_duplicates("fund_id") idx_slice = d_dd.index.tolist() idx_slice.append(len(d)) t_std = tu.timeseries_std(dt.datetime(year, month, 10), month, 12, 1, use_lastday=True) t_std1 = t_std[:-1] su.tic("Grouping...") ds = [ d[idx_slice[i]:idx_slice[i + 1]] for i in range(len(idx_slice) - 1) ] ts = [x["statistic_date"].tolist() for x in ds] navs = [x["nav"].tolist() for x in ds] su.tic("Matching...") matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts] matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts] matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts] matchs = [ su.merge_result(x1, x2, x3) for x1, x2, x3 in zip(matchs1, matchs2, matchs3) ] su.tic("Getting Result...") t_matchs = [x[0] for x in matchs] t_matchs = [tu.tr(x) for x in t_matchs] idx_matchs = [x[1] for x in matchs] nav_matchs = [[ navs[i][idx] if idx is not None else None for idx in idx_matchs[i].values() ] for i in range(len(idx_matchs))] su.tic("Calculating Index...") nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix() rs = nvs[:-1] / nvs[1:] - 1 rs[rs > 30] = np.nan rs[rs < -1] = np.nan r = np.nanmean(rs, axis=1) r[np.isnan(r)] = 0 result_r[year] = r components_num[year] = np.sum(~np.isnan(rs), axis=1) su.tic("Year:{0}, Done...".format(year)) values_r = [] values_num = [] for year in range(first_year, process_date.timetuple().tm_year + 1): if len(values_r) == 0: values_r = result_r[year].tolist()[::-1] values_num = components_num[year].tolist()[::-1] else: values_r.extend(result_r[year].tolist()[::-1]) values_num.extend(components_num[year].tolist()[::-1]) adjust_periods = 1 date_tmp = date_s - relativedelta.relativedelta(months=adjust_periods + 1) date_tmp = dt.date(date_tmp.year, date_tmp.month, cld.monthrange(date_tmp.year, date_tmp.month)[1]) sql_base = "SELECT index_value FROM fund_month_index_static WHERE index_id = '{idx_id}' \ AND statistic_date = '{sd}'".format(idx_id=PEIndex.id, sd=date_tmp) base = pd.read_sql(sql_base, engine_rd).get_value(0, "index_value") result = (np.array(values_r)[-(adjust_periods + 1) - 1:] + 1).cumprod() * base result = result.tolist() values_num = values_num[-(adjust_periods + 1) - 1:] tag = tu.timeseries_std( dt.datetime(year, month + 1, 10), tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10), 12), 12)[::-1] tag = [dt.date.fromtimestamp(x - 864000) for x in tag] tag = tag[-(adjust_periods + 1) - 1:] op = pd.DataFrame(list(zip(tag, result, values_num))) op.columns = ["statistic_date", "index_value", "funds_num"] cols = [ "index_id", "index_name", "typestandard_code", "typestandard_name", "type_code", "type_name", "stype_code", "stype_name", "index_method", "data_source", "data_source_name" ] values = [ PEIndex.id, PEIndex.name, PEIndex.typestandard["code"], PEIndex.typestandard["name"], PEIndex.type["code"], PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1, 0, "私募云通" ] col_dict = dict(zip(cols, values)) for col, val in col_dict.items(): op[col] = val dfs = dfs.append(op[:-1]) return dfs
def cal_std(fund_ids=None, whole=False): """ 每次计算一个基金 Args: fund_ids: str whole: Returns: """ if whole is True: sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard" else: sql_navs = "SELECT fund_id, nav, added_nav, swanav, statistic_date FROM fund_nv_data_standard \ WHERE update_time >= {ut}".format(ut=yesterday) if fund_ids: if 'WHERE' in sql_navs: sql_navs += " AND fund_id in ({})".format(fund_ids) else: sql_navs += " WHERE fund_id in ({})".format(fund_ids) su.tic("Fetching nv Data......") df_nav = pd.read_sql(sql_navs, conn) criterias = [(df_nav["nav"] >= 0.2), (df_nav["added_nav"] >= 0.2), (df_nav["statistic_date"] >= dt.date(1970, 1, 2)), (df_nav["statistic_date"] <= dt.date.today())] su.tic("Preprocessing......") criteria = get_creterias(criterias) df_nav = df_nav.loc[criteria].sort_values(["fund_id", "statistic_date"], ascending=[True, False]) df_nav.index = range(len(df_nav)) ids = df_nav["fund_id"].drop_duplicates().tolist() t_mins = list(df_nav.groupby("fund_id")["statistic_date"].min()) t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins] t_maxs = list(df_nav.groupby("fund_id")["statistic_date"].max()) t_maxs_tstp = [time.mktime(x.timetuple()) for x in t_maxs] idx4slice = su.idx4slice(df_nav, slice_by="fund_id") navs = su.slice(df_nav, idx4slice, "nav") added_navs = su.slice(df_nav, idx4slice, "added_nav") swanavs = su.slice(df_nav, idx4slice, "swanav") t_reals = su.slice(df_nav, idx4slice, "statistic_date") t_reals_tstp = [] for t_real in t_reals: t_reals_tstp.append([time.mktime(x.timetuple()) for x in t_real]) t_std_alls_w = [ tu.timeseries_std(friday, tu.periods_in_interval(friday, t_min, 12), extend=4) for t_min in t_mins ] # 标准序列_成立以来 t_std_alls_w = [ t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls_w, t_mins_tstp) ] t_std_alls_w = [ t_std_all[-len([x for x in t_std_all if x < t_max]) - 1:] for t_std_all, t_max in zip(t_std_alls_w, t_maxs_tstp) ] t_std_alls_m = [ tu.timeseries_std(date, tu.periods_in_interval(date, t_min, 12), periods_y=12, use_lastday=True, extend=6) for date, t_min in zip(t_maxs, t_mins) ] # 标准序列_成立以来 t_std_alls_m = [ t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls_m, t_mins_tstp) ] su.tic("Matching......") matchs_w = [ tu.outer_match4indicator_w(t_real, t_std) for t_real, t_std in zip(t_reals_tstp, t_std_alls_w) ] idx_matchs_w = [x[1] for x in matchs_w] nav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(navs, idx_matchs_w)] anav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(added_navs, idx_matchs_w)] swanav_matchs_w = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(swanavs, idx_matchs_w)] t_matchs_w = [[ t_real[ix] if ix is not None else None for ix in idx.values() ] for t_real, idx in zip(t_reals, idx_matchs_w)] t_matchs_std_w = [ tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_w ] matchs_m = [ tu.outer_match4indicator_m(t_real, t_std) for t_real, t_std in zip(t_reals_tstp, t_std_alls_m) ] idx_matchs_m = [x[1] for x in matchs_m] nav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(navs, idx_matchs_m)] anav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(added_navs, idx_matchs_m)] swanav_matchs_m = [[ nav[ix] if ix is not None else None for ix in idx.values() ] for nav, idx in zip(swanavs, idx_matchs_m)] t_matchs_m = [[ t_real[ix] if ix is not None else None for ix in idx.values() ] for t_real, idx in zip(t_reals, idx_matchs_m)] t_matchs_std_m = [ tu.tr(x[:-1], "date") if x is not None else None for x in t_std_alls_m ] result_w = { "fund_id": ids, "nav": nav_matchs_w, "added_nav": anav_matchs_w, "swanav": swanav_matchs_w, "statistic_date": t_matchs_w, "statistic_date_std": t_matchs_std_w } result_m = { "fund_id": ids, "nav": nav_matchs_m, "added_nav": anav_matchs_m, "swanav": swanav_matchs_m, "statistic_date": t_matchs_m, "statistic_date_std": t_matchs_std_m } su.tic("Merging Result......") result = {} result["w"] = pd.DataFrame.from_dict(merge_result(result_w, ids)) result["m"] = pd.DataFrame.from_dict(merge_result(result_m, ids)) return result
def calculate(idx, export_path=None): dfs = pd.DataFrame() PEIndex = sf.PEIndex(idx) first_date = PEIndex.firstmonday result_r = {} components_num = {} components = {} for year in range(first_date.timetuple().tm_year, _process_date.year + 1): if year == _process_date.year: month = _process_date.month day = tu.date_of_weekday(_process_date, 0, (0, 0)).day if day > _process_date.day: # 修正周一在上个月,跨月后产生的日期问题 month -= 1 else: month = 12 day = 31 date_s = dt.date(year, month, day) # sql_i = sf.SQL_PEIndex(idx, year).yeardata_w["nv"] sql_mindate = sf.SQL_PEIndex(idx, year).yeardata_w["t_min"] conn = engine_rd.connect() su.tic("Getting Data") d = pd.read_sql(sql_i, conn) d.index = range(len(d)) t_min = pd.read_sql(sql_mindate, conn)["statistic_date_earliest"].tolist() t_min = [time.mktime(x.timetuple()) for x in t_min] # conn.close() su.tic("Preprocessing...") d["statistic_date"] = d["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) d_dd = d.drop_duplicates("fund_id") idx_slice = d_dd.index.tolist() idx_slice.append(len(d)) ids = d_dd["fund_id"].tolist() last_monday = date_s - dt.timedelta( cld.weekday(date_s.year, date_s.month, date_s.day)) # t_std = tu.timeseries_std(last_monday, "a", 52, extend=1) # if year == first_date.timetuple().tm_year: t_std = t_std[:-1] # su.tic("Slicing") t_std_long = tu.timeseries_std( last_monday, tu.periods_in_interval(last_monday, dt.date(year - 1, 11, 30), 12)) t_std_long_p1m = [(x + relativedelta(months=1)).timestamp() for x in tu.tr(t_std_long)] real_p1m = su.compare(t_min, t_std_long) # 实际最早日期和标准序列日期比较 p1m_std = su.compare(t_std_long_p1m, t_std) # 加一个月的标准序列日期和标准序列日期比较 data_used = [p1m_std[x - 1] for x in real_p1m] su.tic("Grouping...") ds = [ d[idx_slice[i]:idx_slice[i + 1]] for i in range(len(idx_slice) - 1) ] ts = [x["statistic_date"].tolist() for x in ds] navs = [x["nav"].tolist() for x in ds] su.tic("Matching...") matchs = [tu.outer_match4index_w(x, t_std, False) for x in ts] su.tic("Getting Result...") # t_matchs = [x[0] for x in matchs] # t_matchs = [tu.tr(x) for x in t_matchs] idx_matchs = [x[1] for x in matchs] nav_matchs = [[ navs[i][idx] if idx is not None else None for idx in idx_matchs[i].values() ] for i in range(len(idx_matchs))] su.tic("Calculating Index...") nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix() print(nvs.shape) for i in range(len(ids)): nvs[data_used[i] + 1:, i] = np.nan rs = nvs[:-1] / nvs[1:] - 1 rs[rs > 10] = np.nan rs[rs < -1] = np.nan r = np.nanmean(rs, axis=1) r[np.isnan(r)] = 0 result_r[year] = r components_num[year] = np.sum(~np.isnan(rs), axis=1) # log samples tmp = pd.DataFrame(nvs, columns=ids).T tmp["fund_id"] = tmp.index tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]] components[year] = tmp su.tic("Year:{0}, Done...".format(year)) values_r = [] values_num = [] for year in range(first_date.timetuple().tm_year, date_s.year + 1): if len(values_r) == 0: values_r = result_r[year].tolist()[::-1] values_num = components_num[year].tolist()[::-1] else: values_r.extend(result_r[year].tolist()[::-1]) values_num.extend(components_num[year].tolist()[::-1]) result = (np.array(values_r) + 1).cumprod() * 1000 result = result.tolist() result.insert(0, 1000) values_num.insert(0, 0) tag = tu.timeseries_std(dt.datetime(year, month, day), tu.periods_in_interval( dt.datetime(year, month, day), dt.datetime(first_date.year, 1, 1), 12), 52, extend=5)[::-1] tag = [x for x in tag if x >= first_date.timestamp()] tag = [dt.date.fromtimestamp(x) for x in tag] # local debug op = pd.DataFrame(list(zip(tag, result, values_num))) op.columns = ["statistic_date", "index_value", "funds_num"] cols = [ "index_id", "index_name", "typestandard_code", "typestandard_name", "type_code", "type_name", "stype_code", "stype_name", "index_method", "data_source", "data_source_name" ] values = [ PEIndex.id, PEIndex.name, PEIndex.typestandard["code"], PEIndex.typestandard["name"], PEIndex.type["code"], PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1, 0, "私募云通" ] col_dict = dict(zip(cols, values)) for col, val in col_dict.items(): op[col] = val dfs = dfs.append(op) if export_path is not None: tmp = tag.copy() for year in sorted(components.keys(), reverse=True): # print(year, len(tmp)) components[year].columns = [ "fund_id", *[tmp.pop() for i in range(len(components[year].columns) - 2)], tmp[-1] ] io.export_to_xl( components, "{sd}_{index_name}_w_samples".format( sd=last_monday.strftime("%Y%m%d"), index_name=PEIndex.id), export_path) return dfs
def main(): su.tic("getting original get_data...") sql_fi = "SELECT fund_id, fund_name, fund_name_py, fund_full_name, reg_time, foundation_date, fund_status, region, data_freq, open_date, fund_custodian, \ fund_time_limit, fund_stockbroker, fee_subscription, fee_redeem, fee_trust, fee_manage, fee_pay, fund_member \ FROM fund_info" d_fi = pd.read_sql(sql_fi, conn_read) sql_oi = "SELECT org_id, org_name, org_full_name, profile FROM org_info" d_oi = pd.read_sql(sql_oi, conn_read) sql_fom = "SELECT fund_id, org_id FROM fund_org_mapping WHERE org_type_code = 1" d_fom = pd.read_sql(sql_fom, conn_read) sql_ftm = "SELECT fund_id, typestandard_code, typestandard_name, type_code, type_name, stype_code, stype_name FROM fund_type_mapping WHERE typestandard_code IN (601, 602, 603, 604) AND flag=1" d_ftm = pd.read_sql(sql_ftm, conn_read) sql_ti = "SELECT id as fund_id, foundation_days_range FROM time_index WHERE id_category = 1" d_ti = pd.read_sql(sql_ti, conn_read) sql_fnd = "SELECT fnd1.fund_id, fnd2.nav, fnd2.added_nav, fnd2.swanav, fnd2.statistic_date \ FROM (SELECT fund_id, MAX(statistic_date) as md FROM fund_nv_data_standard GROUP BY fund_id) fnd1 \ JOIN fund_nv_data_standard fnd2 ON fnd1.fund_id = fnd2.fund_id AND fnd1.md = fnd2.statistic_date" d_fnd = pd.read_sql(sql_fnd, conn_read) su.tic("merging get_data...") tm = {} k_vs = [("typestandard_code", "typestandard_name"), ("type_code", "type_name"), ("stype_code", "stype_name")] for k_v in k_vs: tmp = d_ftm.loc[:, k_v].sort_values( by=k_v[0], ascending=[True]).drop_duplicates().dropna() tmp = dict(zip(tmp[k_v[0]], tmp[k_v[1]])) tm.update(tmp) tm.update({np.nan: None}) ftm = dict( zip(zip(d_ftm["fund_id"], d_ftm["typestandard_code"]), zip(d_ftm["type_code"], d_ftm["stype_code"]))) d = d_fi.copy() d = d.merge(d_fom, how="left", on="fund_id") d = d.merge(d_oi, how="left", on="org_id") d = d.merge(d_ti, how="left", on="fund_id") d = d.merge(d_fnd, how="left", on="fund_id") _trans = {1: 1, 2: 4, 3: 2, 4: 3} for tsc in range(1, 5): tmp = d_ftm.loc[d_ftm["typestandard_code"] == 600 + tsc, ["fund_id", "typestandard_name"]] d = d.merge(tmp, how="left", on="fund_id") d.columns = list(d.columns[:-4]) + [ "typestandard_name_{tsc}".format(tsc=_trans[tsc]) for tsc in range(1, 5) ] for tsc in range(1, 5): d["type_code_{tsc}".format(tsc=_trans[tsc])] = d["fund_id"].apply( lambda fid: get_dict_value((fid, 600 + tsc), ftm, 0)) d["type_code_name_{tsc}".format( tsc=_trans[tsc])] = d["type_code_{tsc}".format( tsc=_trans[tsc])].apply(lambda tc: get_dict_value(tc, tm)) d["stype_code_{tsc}".format(tsc=_trans[tsc])] = d["fund_id"].apply( lambda fid: get_dict_value((fid, 600 + tsc), ftm, 1)) d["stype_code_name_{tsc}".format( tsc=_trans[tsc])] = d["stype_code_{tsc}".format( tsc=_trans[tsc])].apply(lambda tc: get_dict_value(tc, tm)) d = d.drop_duplicates(subset=["fund_id"]) d.index = d["fund_id"] # 将fund_type_mapping表中typestandard_code = 601的所有 # 和typestandard_code = 605中的type_code = 60502(股权投资基金),60503(创业投资基金),60504(其他投资基金), # 写入的fund_info_aggregation表的type_code_1,type_code_name_1中 sql_ts5 = "SELECT fund_id, type_code as type_code_1, type_name as type_code_name_1 FROM fund_type_mapping \ WHERE type_code IN (60502, 60503, 60504) AND flag = 1" df_ts5 = pd.read_sql(sql_ts5, engine_read) df_ts5.index = df_ts5["fund_id"] d = d.fillna(df_ts5) refresh(d, conn_write)
d["type_code_{tsc}".format(tsc=_trans[tsc])] = d["fund_id"].apply( lambda fid: get_dict_value((fid, 600 + tsc), ftm, 0)) d["type_code_name_{tsc}".format( tsc=_trans[tsc])] = d["type_code_{tsc}".format( tsc=_trans[tsc])].apply(lambda tc: get_dict_value(tc, tm)) d["stype_code_{tsc}".format(tsc=_trans[tsc])] = d["fund_id"].apply( lambda fid: get_dict_value((fid, 600 + tsc), ftm, 1)) d["stype_code_name_{tsc}".format( tsc=_trans[tsc])] = d["stype_code_{tsc}".format( tsc=_trans[tsc])].apply(lambda tc: get_dict_value(tc, tm)) d = d.drop_duplicates(subset=["fund_id"]) d.index = d["fund_id"] # 将fund_type_mapping表中typestandard_code = 601的所有 # 和typestandard_code = 605中的type_code = 60502(股权投资基金),60503(创业投资基金),60504(其他投资基金), # 写入的fund_info_aggregation表的type_code_1,type_code_name_1中 sql_ts5 = "SELECT fund_id, type_code as type_code_1, type_name as type_code_name_1 FROM fund_type_mapping \ WHERE type_code IN (60502, 60503, 60504) AND flag = 1" df_ts5 = pd.read_sql(sql_ts5, engine_read) df_ts5.index = df_ts5["fund_id"] d = d.fillna(df_ts5) refresh(d, conn_write) # return d if __name__ == "__main__": su.tic("fund_info_aggregation...") main() su.tic("Done...")
def calculate(): su.tic("fetching get_data...") df_whole = pd.DataFrame() conn = engine_read.connect() year = process_date.year month = process_date.month month_range = cld.monthrange(year, month)[1] time_to_fill = sf.Time(dt.datetime(year, month, month_range)) sql_bm = sf.SQL.market_index(time_to_fill.today) # Get benchmark prices sql_pe = sf.SQL.pe_index(time_to_fill.today, freq="m") ###w->m su.tic("preprocessing get_data...") bm = pd.read_sql(sql_bm, engine_read) bm["y1_treasury_rate"] = bm["y1_treasury_rate"].fillna(method="backfill") bm["y1_treasury_rate"] = bm["y1_treasury_rate"].apply(su.annually2monthly) bm["statistic_date"] = bm["statistic_date"].apply(su.date2tstp) pe = pd.read_sql(sql_pe, engine_read) pe["statistic_date"] = pe["statistic_date"].apply(su.date2tstp) conn.close() prices_bm = [ bm["hs300"].tolist(), bm["csi500"].tolist(), bm["sse50"].tolist(), bm["cbi"].tolist(), bm["nfi"] ] price_pe = pe["index_value"].tolist() r_tbond = bm["y1_treasury_rate"].tolist() t_bm = bm["statistic_date"].tolist() t_pe = pe["statistic_date"].tolist() intervals = table.intervals intervals4 = [1, 2, 3, 4, 5, 6, 9, 10, 11] intervals5 = [1, 2, 3, 4, 5, 6, 10, 11] result = [] conn = engine_read.connect() # Get Data date_s = sf.Time(process_date - dt.timedelta(process_date.day)) # Generate statistic_date sql_fids_updated = sf.SQL.ids_updated_sd(date_s.today, "om") ids_updated = tuple( x[0] for x in conn.execute(sql_fids_updated).fetchall()) # 找到当月净值有更新的基金 sql_o_updated = "SELECT DISTINCT fom.org_id FROM fund_org_mapping fom \ JOIN org_info oi ON fom.org_id = oi.org_id \ WHERE org_type_code = 1 AND oi.found_date <= '{0}' AND fund_id IN {1}".format( date_s.today - relativedelta(months=3), ids_updated) # 根据净值更新的基金确定需要计算的投顾 o_updated = tuple(x[0] for x in conn.execute(sql_o_updated).fetchall()) sql_fom = "SELECT fom.org_id, fom.fund_id, oi.found_date, oi.org_name FROM fund_org_mapping fom \ JOIN org_info oi ON fom.org_id = oi.org_id \ JOIN fund_info fi ON fom.fund_id = fi.fund_id \ WHERE fom.org_id IN {0} AND fom.org_type_code = 1 AND oi.found_date <= '{1}' AND fi.foundation_date <= '{2}'".format( o_updated, date_s.today - relativedelta(months=3), date_s.today - relativedelta(months=1)) fom = pd.read_sql(sql_fom, conn) # 根据需要计算的投顾找到其旗下管理的所有基金 fid_used = tuple(fom["fund_id"]) sql_fnd = sf.SQL.nav(fid_used) fnd = pd.read_sql(sql_fnd, conn) fnd = fnd.dropna() fnd.index = range(len(fnd)) data = fom.merge(fnd, how="inner", on="fund_id") data = data.sort_values(by=["org_id", "fund_id", "statistic_date"], ascending=[True, True, False]) t_mins = data.groupby(["org_id"])["statistic_date"].min().tolist() t_mins_tstp = [time.mktime(x.timetuple()) for x in t_mins] data["statistic_date"] = data["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) data.index = range(len(data)) ids_o = data["org_id"].drop_duplicates().tolist() names_o = data.drop_duplicates(subset=["org_id"])["org_name"].tolist() idx4slice_o = su.idx4slice(data, "org_id") dfs = [ data[idx4slice_o[i]:idx4slice_o[i + 1]] if i != len(idx4slice_o) - 1 else data[idx4slice_o[i]:] for i in range(len(idx4slice_o) - 1) ] # Proprocess # 标准序列 t_stds = [ tu.timeseries_std(date_s.today, interval, periods_y=12, use_lastday=True, extend=1) for interval in intervals ] t_std_y5 = t_stds[6] t_stds_len = [len(x) - 1 for x in t_stds] # 基金标准序列_成立以来 t_std_alls = [ tu.timeseries_std(date_s.today, tu.periods_in_interval(date_s.today, t_min, 12), periods_y=12, use_lastday=True, extend=6) for t_min in t_mins ] # 标准序列_成立以来 t_std_alls = [ t_std_all[:len([x for x in t_std_all if x >= t_min]) + 1] for t_std_all, t_min in zip(t_std_alls, t_mins_tstp) ] # 基准指数的标准序列_成立以来 matchs_bm = [ tu.outer_match4indicator_m(t_bm, t_std_all, False) for t_std_all in t_std_alls ] idx_matchs_bm = [x[1] for x in matchs_bm] price_bm0_all = [[ prices_bm[0][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] price_bm1_all = [[ prices_bm[1][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] price_bm2_all = [[ prices_bm[2][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] price_bm3_all = [[ prices_bm[3][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] price_bm4_all = [[ prices_bm[4][ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_bm] matchs_pe = [ tu.outer_match4indicator_m(t_pe, t_std_all, False) for t_std_all in t_std_alls ] idx_matchs_pe = [x[1] for x in matchs_pe] price_pe_all = [[ price_pe[ix] if ix is not None else None for ix in idx.values() ] for idx in idx_matchs_pe] # 基准指标的收益率_成立以来 r_bm0_all = [fi.gen_return_series(x) for x in price_bm0_all] r_bm1_all = [fi.gen_return_series(x) for x in price_bm1_all] r_bm2_all = [fi.gen_return_series(x) for x in price_bm2_all] r_bm3_all = [fi.gen_return_series(x) for x in price_bm3_all] r_bm4_all = [fi.gen_return_series(x) for x in price_bm4_all] r_pe_all = [fi.gen_return_series(x) for x in price_pe_all] tmp = [len(idx_matchs_bm[i]) for i in range(len(idx_matchs_bm))] tmp_id = tmp.index(max(tmp)) tmp_list = [ r_tbond[ix] if ix is not None else None for ix in idx_matchs_bm[tmp_id].values() ] tmp = pd.DataFrame(tmp_list)[0].fillna(method="backfill").tolist() r_f_all = [[ r_tbond[idx[k]] if idx[k] is not None else tmp[k] for k in idx.keys() ] for idx in idx_matchs_bm] r_f_all = [x[1:] for x in r_f_all] # 基准指标的收益率_不同频率 matchs_bm = tu.outer_match4indicator_m(t_bm, t_std_y5, False) # 基准指数标准序列_成立以来 matchs_pe = tu.outer_match4indicator_m(t_pe, t_std_y5, False) idx_matchs_bm = matchs_bm[1] idx_matchs_pe = matchs_pe[1] price_bm0_y5 = [ prices_bm[0][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_bm1_y5 = [ prices_bm[1][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_bm2_y5 = [ prices_bm[2][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_bm3_y5 = [ prices_bm[3][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_bm4_y5 = [ prices_bm[4][ix] if ix is not None else None for ix in idx_matchs_bm.values() ] price_pe_y5 = [ price_pe[ix] if ix is not None else None for ix in idx_matchs_pe.values() ] # 基准指标的收益率_不同频率 r_bm0_y5 = fi.gen_return_series(price_bm0_y5) r_bm1_y5 = fi.gen_return_series(price_bm1_y5) r_bm2_y5 = fi.gen_return_series(price_bm2_y5) r_bm3_y5 = fi.gen_return_series(price_bm3_y5) r_bm4_y5 = fi.gen_return_series(price_bm4_y5) r_pe_y5 = fi.gen_return_series(price_pe_y5) r_f_y5 = [ r_tbond[ix] if ix is not None else None for ix in idx_matchs_bm.values() ] r_f_y5 = r_f_y5[1:] rs_bm0 = [r_bm0_y5[:length - 1] for length in t_stds_len] rs_bm1 = [r_bm1_y5[:length - 1] for length in t_stds_len] rs_bm2 = [r_bm2_y5[:length - 1] for length in t_stds_len] rs_bm3 = [r_bm3_y5[:length - 1] for length in t_stds_len] rs_bm4 = [r_bm4_y5[:length - 1] for length in t_stds_len] rs_pe = [r_pe_y5[:length - 1] for length in t_stds_len] rs_f = [r_f_y5[:length - 1] for length in t_stds_len] benchmark = { 1: rs_bm0, 2: rs_bm1, 3: rs_bm2, 4: rs_pe, 6: rs_bm3, 7: rs_bm4 } benchmark_all = { 1: r_bm0_all, 2: r_bm1_all, 3: r_bm2_all, 4: r_pe_all, 6: r_bm3_all, 7: r_bm4_all } su.tic("calculating ...") for i in range(len(ids_o)): if i % 200 == 0: print("%s done..." % str(i * 200)) df = dfs[i] df.index = range(len(df)) idx4slice = su.idx4slice(df, "fund_id") navs = su.slice(df, idx4slice, "nav") t_reals = su.slice(df, idx4slice, "statistic_date") matchs_all = [ tu.outer_match4indicator_m(t_real, t_std_alls[i], drop_none=False) for t_real in t_reals ] idx_matchs_all = [x[1] for x in matchs_all] nav_matchs_all = [[ nav[ix] if ix is not None else np.NaN for ix in idx.values() ] for nav, idx in zip(navs, idx_matchs_all)] nv_matrix = np.array(nav_matchs_all).T r_total = np.nanmean((nv_matrix[:-1] / nv_matrix[1:] - 1), axis=1) price_total = np.nancumprod(1 + r_total[::-1])[::-1].tolist() price_total.append(1) # 定义基期伪价格为1 r_total = fi.gen_return_series(price_total) prices = [] for j in range(7): if t_mins[i] + relativedelta(months=intervals[j]) <= date_s.today: length = min(len(price_total), t_stds_len[j]) prices.append(price_total[:length]) else: prices.append(None) for j in range(7, 11): length = min(len(price_total), t_stds_len[j]) prices.append(price_total[:length]) prices.append(price_total) navs1 = [prices[i] for i in intervals4] navs2 = [prices[i] for i in intervals5] rs1 = [fi.gen_return_series(x) for x in navs1] rs2 = [fi.gen_return_series(x) for x in navs2] rs_f_ = rs_f.copy() rs_f_.append(r_f_all[i]) rs_f1_ = [rs_f_[i] for i in intervals4] rs_f2_ = [rs_f_[i] for i in intervals5] ir = [fi.accumulative_return(r) for r in navs1[:-1]] ir.append( fi.accumulative_return([x for x in navs1[-1] if x is not None])) # ir_a = [fi.return_a(r, 12) for r in rs1] calmar = [fi.calmar_a(nv, r_f, 12) for nv, r_f in zip(navs2, rs_f2_)] sortino = [fi.sortino_a(r, r_f, 12) for r, r_f in zip(rs2, rs_f2_)] for k in benchmark.keys(): rs_bm_ = benchmark[k].copy() # 指定benchmark rs_bm_.append(benchmark_all[k][i]) rs_bm1 = [rs_bm_[i] for i in intervals4] rs_bm2 = [rs_bm_[i] for i in intervals5] ier_a = [ fi.excess_return_a(r, r_bm, 12) for r, r_bm in zip(rs1, rs_bm1) ] info = [fi.info_a(r, r_bm, 12) for r, r_bm in zip(rs2, rs_bm2)] jensen = [ fi.jensen_a(r, r_bm, r_f, 12) for r, r_bm, r_f in zip(rs2, rs_bm2, rs_f2_) ] treynor = [ fi.treynor_a(r, r_bm, r_f, 12) for r, r_bm, r_f in zip(rs2, rs_bm2, rs_f2_) ] sharpe_a = [fi.sharpe_a(r, r_f, 12) for r, r_f in zip(rs2, rs_f2_)] tmp = [ ir, ir_a, ier_a, calmar, sortino, treynor, info, jensen, sharpe_a ] result_i = [ ids_o[i], names_o[i], k, 1, 1, nv_matrix.shape[1], 60001, "全产品", 6000101, "全产品", date_s.today ] for x in tmp: result_i.extend(x) result.append(result_i) df = pd.DataFrame(result) df[list(range(11, 78 + 8))] = df[list(range(11, 78 + 8))].astype(np.float64) df[list(range(11, 78 + 8))] = df[list(range( 11, 78 + 8))].apply(lambda x: round(x, 6)) df.columns = columns df_whole = df_whole.append(df) return df_whole
def main(): tic("Calculating time_index...") fd_f, fd_o = [calculate(x) for x in [1, 2]] for d_fd in [fd_f, fd_o]: io.to_sql("time_index", engine_wt, d_fd, "update") tic("Done...")
from utils.database import config as cfg, io from utils.script import swanv from utils.script.scriptutils import tic engines = cfg.load_engine() engine_rd = engines["2Gb"] engine_wt = engines["2Gb"] def main(): conn = engine_wt.connect() sql_check = "UPDATE fund_nv_data_standard SET swanav = NULL WHERE nav <> added_nav AND swanav = nav" conn.execute(sql_check) sql_update = "UPDATE fund_nv_data_standard SET swanav = nav WHERE nav = added_nav" conn.execute(sql_update) conn.close() df = swanv.calculate_swanav() if len(df) > 0: df.index = range(len(df)) print("{num} records to update".format(num=len(df))) io.to_sql("fund_nv_data_standard", engine_wt, df, "update") if __name__ == "__main__": tic("swanav...") main() tic("Done...")
def calculate(idx, export_path=None): dfs = pd.DataFrame() PEIndex = sf.PEIndex(idx) first_year = PEIndex.firstyear result_r = {} components_num = {} components = {} for year in range(first_year, process_date.year + 1): if year == process_date.timetuple().tm_year: month = process_date.month else: month = 12 sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m conn = engine_rd.connect() su.tic("Getting Data") d = pd.read_sql(sql_i, conn) conn.close() su.tic("Preprocessing...") d["statistic_date"] = d["statistic_date"].apply( lambda x: time.mktime(x.timetuple())) d_dd = d.drop_duplicates("fund_id") idx_slice = d_dd.index.tolist() idx_slice.append(len(d)) ids = d_dd["fund_id"].tolist() t_std = tu.timeseries_std(dt.datetime(year, month, 10), month, 12, 1, use_lastday=True) t_std1 = t_std[:-1] su.tic("Grouping...") ds = [ d[idx_slice[i]:idx_slice[i + 1]] for i in range(len(idx_slice) - 1) ] ts = [x["statistic_date"].tolist() for x in ds] navs = [x["nav"].tolist() for x in ds] su.tic("Matching...") matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts] matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts] matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts] matchs = [ su.merge_result(x1, x2, x3) for x1, x2, x3 in zip(matchs1, matchs2, matchs3) ] su.tic("Getting Result...") t_matchs = [x[0] for x in matchs] t_matchs = [tu.tr(x) for x in t_matchs] idx_matchs = [x[1] for x in matchs] nav_matchs = [[ navs[i][idx] if idx is not None else None for idx in idx_matchs[i].values() ] for i in range(len(idx_matchs))] su.tic("Calculating Index...") nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix() rs = nvs[:-1] / nvs[1:] - 1 rs[rs > 30] = np.nan rs[rs < -1] = np.nan r = np.nanmean(rs, axis=1) r[np.isnan(r)] = 0 result_r[year] = r components_num[year] = np.sum(~np.isnan(rs), axis=1) # log samples tmp = pd.DataFrame(nvs, columns=ids).T tmp["fund_id"] = tmp.index tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]] components[year] = tmp su.tic("Year:{0}, Done...".format(year)) values_r = [] values_num = [] for year in range(first_year, process_date.timetuple().tm_year + 1): if len(values_r) == 0: values_r = result_r[year].tolist()[::-1] values_num = components_num[year].tolist()[::-1] else: values_r.extend(result_r[year].tolist()[::-1]) values_num.extend(components_num[year].tolist()[::-1]) result = (np.array(values_r) + 1).cumprod() * 1000 result = result.tolist() result.insert(0, 1000) values_num.insert(0, 0) # tag = tu.timeseries_std(dt.datetime(year, month + 1, 10), # tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10), # 12), 12)[::-1] tag = tu.timeseries_std( dt.datetime(year, month + 1, 10), tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10), 12), 12)[::-1] tag = [dt.date.fromtimestamp(x - 864000) for x in tag] op = pd.DataFrame(list(zip(tag, result, values_num))) op.columns = ["statistic_date", "index_value", "funds_num"] cols = [ "index_id", "index_name", "typestandard_code", "typestandard_name", "type_code", "type_name", "stype_code", "stype_name", "index_method", "data_source", "data_source_name" ] values = [ PEIndex.id, PEIndex.name, PEIndex.typestandard["code"], PEIndex.typestandard["name"], PEIndex.type["code"], PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1, 0, "私募云通" ] col_dict = dict(zip(cols, values)) for col, val in col_dict.items(): op[col] = val dfs = dfs.append(op[:-1]) if export_path is not None: tmp = tag.copy() for year in sorted(components.keys(), reverse=True): print(year, len(tmp)) components[year].columns = [ "fund_id", *[tmp.pop() for i in range(len(components[year].columns) - 2)], tmp[-1] ] io.export_to_xl( components, "{sd}_{index_name}_m_samples".format(sd=tag[-2].strftime("%Y%m%d"), index_name=PEIndex.id), export_path) return dfs
if fue >= 1: df_p2.ix[idx].ix[:fue, "added_nav"] = df_p2.ix[idx].ix[:fue, "nav"] else: continue else: print(idx) continue result = df_p1.append(df_p2).sort_values(by=["fund_id", "statistic_date"], ascending=[True, True]) ids_error = set(result.loc[ (result.nav == result.added_nav) & (result.nav != result.swanav1) & (~np.isnan(result.swanav1))].fund_id.drop_duplicates().tolist()) result = result.loc[result.fund_id.apply(lambda x: x not in ids_error)] result["data_source"] = 2 result.index = range(len(result)) io.to_sql("fund_nv_data_source", engine_wt, result[["fund_id", "statistic_date", "data_source", "added_nav"]], type="update") return result if __name__ == "__main__": tic("nv_data_source...") main() tic("Done...")
result = cal_std(sql_fund_ids, True) su.tic("Process:{}/{}".format(index, len(sliced))) index += 1 su.tic("Data to DB...month..{}.".format(sql_fund_ids)) conn.execute( "DELETE FROM fund_nv_standard_m where fund_id in ({})".format( sql_fund_ids)) result["m"].to_sql("fund_nv_standard_m", conn, if_exists="append", index=False, chunksize=10000) su.tic("Data to DB...week..{}.".format(','.join(fund_ids))) conn.execute( "DELETE FROM fund_nv_standard_w where fund_id in ({})".format( sql_fund_ids)) result["w"].to_sql("fund_nv_standard_w", conn, if_exists="append", index=False, chunksize=10000) if __name__ == "__main__": su.tic("nv2std...") main() su.tic("Done...")