Пример #1
0
def calculate(idx, export_path=None):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_date = PEIndex.firstmonday

    result_r = {}
    components_num = {}
    components = {}
    for year in range(first_date.timetuple().tm_year, _process_date.year + 1):

        if year == _process_date.year:
            month = _process_date.month
            day = tu.date_of_weekday(_process_date, 0, (0, 0)).day
            if day > _process_date.day:  # 修正周一在上个月,跨月后产生的日期问题
                month -= 1
        else:
            month = 12
            day = 31

        date_s = dt.date(year, month, day)  #

        sql_i = sf.SQL_PEIndex(idx, year).yeardata_w["nv"]
        sql_mindate = sf.SQL_PEIndex(idx, year).yeardata_w["t_min"]

        conn = engine_rd.connect()

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        d.index = range(len(d))

        t_min = pd.read_sql(sql_mindate,
                            conn)["statistic_date_earliest"].tolist()
        t_min = [time.mktime(x.timetuple()) for x in t_min]  #
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))
        ids = d_dd["fund_id"].tolist()

        last_monday = date_s - dt.timedelta(
            cld.weekday(date_s.year, date_s.month, date_s.day))  #
        t_std = tu.timeseries_std(last_monday, "a", 52, extend=1)  #
        if year == first_date.timetuple().tm_year:
            t_std = t_std[:-1]

        #
        su.tic("Slicing")
        t_std_long = tu.timeseries_std(
            last_monday,
            tu.periods_in_interval(last_monday, dt.date(year - 1, 11, 30), 12))
        t_std_long_p1m = [(x + relativedelta(months=1)).timestamp()
                          for x in tu.tr(t_std_long)]
        real_p1m = su.compare(t_min, t_std_long)  # 实际最早日期和标准序列日期比较
        p1m_std = su.compare(t_std_long_p1m, t_std)  # 加一个月的标准序列日期和标准序列日期比较
        data_used = [p1m_std[x - 1] for x in real_p1m]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs = [tu.outer_match4index_w(x, t_std, False) for x in ts]

        su.tic("Getting Result...")
        # t_matchs = [x[0] for x in matchs]
        # t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        print(nvs.shape)
        for i in range(len(ids)):
            nvs[data_used[i] + 1:, i] = np.nan

        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 10] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)

        # log samples
        tmp = pd.DataFrame(nvs, columns=ids).T
        tmp["fund_id"] = tmp.index
        tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]]
        components[year] = tmp

        su.tic("Year:{0}, Done...".format(year))
    values_r = []
    values_num = []
    for year in range(first_date.timetuple().tm_year, date_s.year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    result = (np.array(values_r) + 1).cumprod() * 1000
    result = result.tolist()
    result.insert(0, 1000)
    values_num.insert(0, 0)

    tag = tu.timeseries_std(dt.datetime(year, month, day),
                            tu.periods_in_interval(
                                dt.datetime(year, month, day),
                                dt.datetime(first_date.year, 1, 1), 12),
                            52,
                            extend=5)[::-1]
    tag = [x for x in tag if x >= first_date.timestamp()]
    tag = [dt.date.fromtimestamp(x) for x in tag]

    # local debug
    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op)

    if export_path is not None:
        tmp = tag.copy()
        for year in sorted(components.keys(), reverse=True):
            # print(year, len(tmp))
            components[year].columns = [
                "fund_id",
                *[tmp.pop() for i in range(len(components[year].columns) - 2)],
                tmp[-1]
            ]

        io.export_to_xl(
            components, "{sd}_{index_name}_w_samples".format(
                sd=last_monday.strftime("%Y%m%d"), index_name=PEIndex.id),
            export_path)

    return dfs
Пример #2
0
def calculate(idx):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_year = PEIndex.firstyear

    result_r = {}
    components_num = {}

    for year in range(first_year, process_date.year + 1):
        if year == process_date.timetuple().tm_year:
            month = process_date.month
        else:
            month = 12

        sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m

        conn = engine_rd.connect()

        date_s = dt.date(year, month, 1) - dt.timedelta(1)

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))

        t_std = tu.timeseries_std(dt.datetime(year, month, 10),
                                  month,
                                  12,
                                  1,
                                  use_lastday=True)
        t_std1 = t_std[:-1]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts]
        matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts]
        matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts]
        matchs = [
            su.merge_result(x1, x2, x3)
            for x1, x2, x3 in zip(matchs1, matchs2, matchs3)
        ]

        su.tic("Getting Result...")
        t_matchs = [x[0] for x in matchs]
        t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 30] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)
        su.tic("Year:{0}, Done...".format(year))

    values_r = []
    values_num = []
    for year in range(first_year, process_date.timetuple().tm_year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    adjust_periods = 1
    date_tmp = date_s - relativedelta.relativedelta(months=adjust_periods + 1)
    date_tmp = dt.date(date_tmp.year, date_tmp.month,
                       cld.monthrange(date_tmp.year, date_tmp.month)[1])
    sql_base = "SELECT index_value FROM fund_month_index_static WHERE index_id = '{idx_id}' \
    AND statistic_date = '{sd}'".format(idx_id=PEIndex.id, sd=date_tmp)
    base = pd.read_sql(sql_base, engine_rd).get_value(0, "index_value")

    result = (np.array(values_r)[-(adjust_periods + 1) - 1:] +
              1).cumprod() * base
    result = result.tolist()
    values_num = values_num[-(adjust_periods + 1) - 1:]

    tag = tu.timeseries_std(
        dt.datetime(year, month + 1, 10),
        tu.periods_in_interval(dt.datetime(year, month + 1, 10),
                               dt.datetime(first_year, 1, 10), 12), 12)[::-1]
    tag = [dt.date.fromtimestamp(x - 864000) for x in tag]
    tag = tag[-(adjust_periods + 1) - 1:]

    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op[:-1])
    return dfs
Пример #3
0
def calculate(idx, export_path=None):
    dfs = pd.DataFrame()

    PEIndex = sf.PEIndex(idx)
    first_year = PEIndex.firstyear

    result_r = {}
    components_num = {}
    components = {}

    for year in range(first_year, process_date.year + 1):
        if year == process_date.timetuple().tm_year:
            month = process_date.month
        else:
            month = 12

        sql_i = sf.SQL_PEIndex(PEIndex.idx, year).yeardata_m

        conn = engine_rd.connect()

        su.tic("Getting Data")
        d = pd.read_sql(sql_i, conn)
        conn.close()

        su.tic("Preprocessing...")
        d["statistic_date"] = d["statistic_date"].apply(
            lambda x: time.mktime(x.timetuple()))
        d_dd = d.drop_duplicates("fund_id")
        idx_slice = d_dd.index.tolist()
        idx_slice.append(len(d))
        ids = d_dd["fund_id"].tolist()

        t_std = tu.timeseries_std(dt.datetime(year, month, 10),
                                  month,
                                  12,
                                  1,
                                  use_lastday=True)
        t_std1 = t_std[:-1]

        su.tic("Grouping...")
        ds = [
            d[idx_slice[i]:idx_slice[i + 1]]
            for i in range(len(idx_slice) - 1)
        ]
        ts = [x["statistic_date"].tolist() for x in ds]
        navs = [x["nav"].tolist() for x in ds]

        su.tic("Matching...")
        matchs1 = [tu.outer_match4index_f7(x, t_std1, False) for x in ts]
        matchs2 = [tu.outer_match4index_b7(x, t_std1) for x in ts]
        matchs3 = [tu.outer_match4index_m(x, t_std, False) for x in ts]
        matchs = [
            su.merge_result(x1, x2, x3)
            for x1, x2, x3 in zip(matchs1, matchs2, matchs3)
        ]

        su.tic("Getting Result...")
        t_matchs = [x[0] for x in matchs]
        t_matchs = [tu.tr(x) for x in t_matchs]
        idx_matchs = [x[1] for x in matchs]
        nav_matchs = [[
            navs[i][idx] if idx is not None else None
            for idx in idx_matchs[i].values()
        ] for i in range(len(idx_matchs))]

        su.tic("Calculating Index...")
        nvs = pd.DataFrame(nav_matchs).T.astype(float).as_matrix()
        rs = nvs[:-1] / nvs[1:] - 1
        rs[rs > 30] = np.nan
        rs[rs < -1] = np.nan
        r = np.nanmean(rs, axis=1)
        r[np.isnan(r)] = 0

        result_r[year] = r
        components_num[year] = np.sum(~np.isnan(rs), axis=1)

        # log samples
        tmp = pd.DataFrame(nvs, columns=ids).T
        tmp["fund_id"] = tmp.index
        tmp = tmp[[tmp.columns[-1], *tmp.columns[:-1]]]
        components[year] = tmp

        su.tic("Year:{0}, Done...".format(year))

    values_r = []
    values_num = []
    for year in range(first_year, process_date.timetuple().tm_year + 1):
        if len(values_r) == 0:
            values_r = result_r[year].tolist()[::-1]
            values_num = components_num[year].tolist()[::-1]
        else:
            values_r.extend(result_r[year].tolist()[::-1])
            values_num.extend(components_num[year].tolist()[::-1])

    result = (np.array(values_r) + 1).cumprod() * 1000
    result = result.tolist()
    result.insert(0, 1000)
    values_num.insert(0, 0)

    # tag = tu.timeseries_std(dt.datetime(year, month + 1, 10),
    #                         tu.periods_in_interval(dt.datetime(year, month + 1, 10), dt.datetime(first_year, 1, 10),
    #                                                12), 12)[::-1]
    tag = tu.timeseries_std(
        dt.datetime(year, month + 1, 10),
        tu.periods_in_interval(dt.datetime(year, month + 1, 10),
                               dt.datetime(first_year, 1, 10), 12), 12)[::-1]
    tag = [dt.date.fromtimestamp(x - 864000) for x in tag]

    op = pd.DataFrame(list(zip(tag, result, values_num)))
    op.columns = ["statistic_date", "index_value", "funds_num"]

    cols = [
        "index_id", "index_name", "typestandard_code", "typestandard_name",
        "type_code", "type_name", "stype_code", "stype_name", "index_method",
        "data_source", "data_source_name"
    ]
    values = [
        PEIndex.id, PEIndex.name, PEIndex.typestandard["code"],
        PEIndex.typestandard["name"], PEIndex.type["code"],
        PEIndex.type["name"], PEIndex.stype["code"], PEIndex.stype["name"], 1,
        0, "私募云通"
    ]
    col_dict = dict(zip(cols, values))
    for col, val in col_dict.items():
        op[col] = val

    dfs = dfs.append(op[:-1])

    if export_path is not None:
        tmp = tag.copy()
        for year in sorted(components.keys(), reverse=True):
            print(year, len(tmp))
            components[year].columns = [
                "fund_id",
                *[tmp.pop() for i in range(len(components[year].columns) - 2)],
                tmp[-1]
            ]
        io.export_to_xl(
            components,
            "{sd}_{index_name}_m_samples".format(sd=tag[-2].strftime("%Y%m%d"),
                                                 index_name=PEIndex.id),
            export_path)

    return dfs