Exemplo n.º 1
0
def get_shfe_daily(date=None):
    """
        获取上期所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    Return
    -------
        DataFrame
            上期所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest 持仓量
                turnover      成交额
                settle        结算价
                pre_settle     前结算价
                variety       合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % day.strftime('%Y%m%d'))
        return None
    try:
        json_data = json.loads(
            requests_link(cons.SHFE_DAILY_URL % (day.strftime('%Y%m%d')),
                          headers=cons.shfe_headers).text)
    except requests.HTTPError as reason:
        if reason.response != 404:
            print(cons.SHFE_DAILY_URL % (day.strftime('%Y%m%d')), reason)
        return

    if len(json_data['o_curinstrument']) == 0:
        return

    df = pd.DataFrame([
        row for row in json_data['o_curinstrument'] if
        row['DELIVERYMONTH'] not in ['小计', '合计'] and row['DELIVERYMONTH'] != ''
    ])
    df['variety'] = df.PRODUCTID.str.slice(0, -6).str.upper()
    df['symbol'] = df['variety'] + df['DELIVERYMONTH']
    df['date'] = day.strftime('%Y%m%d')
    v_wap_df = get_shfe_v_wap(day)
    if v_wap_df is not None:
        df = pd.merge(df,
                      v_wap_df[v_wap_df.time_range == '9:00-15:00'],
                      on=['date', 'symbol'],
                      how='left')
        df['turnover'] = df.v_wap * df.VOLUME
    else:
        df['VOLUME'] = df['VOLUME'].apply(lambda x: 0 if x == '' else x)
        df['turnover'] = df['VOLUME'] * df['SETTLEMENTPRICE']
    df.rename(columns=cons.SHFE_COLUMNS, inplace=True)
    return df[cons.OUTPUT_COLUMNS]
Exemplo n.º 2
0
def _czce_df_read(url, skip_rows, encoding='utf-8', header=0):
    """
    郑州商品交易所的网页数据
    :param header:
    :type header:
    :param url: 网站 string
    :param skip_rows: 去掉前几行 int
    :param encoding: utf-8 or gbk or gb2312
    :return: pd.DataFrame
    """
    r = requests_link(url, encoding)
    data = pd.read_html(r.text,
                        match='.+',
                        flavor=None,
                        header=header,
                        index_col=0,
                        skiprows=skip_rows,
                        attrs=None,
                        parse_dates=False,
                        thousands=', ',
                        encoding="gbk",
                        decimal='.',
                        converters=None,
                        na_values=None,
                        keep_default_na=True)
    return data
Exemplo n.º 3
0
def get_shfe_inv_2(date: str = None):
    """
        抓取上海商品交易所注册仓单数据
        适用20140519(包括)至今
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.SHFE_INV_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    try:
        context = r.json()
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        data_cut = data[data['VARNAME'] == var]
        prod_key = var.split('$$')[0]
        prod_key = prod_key.replace("(", "").replace(")", "")
        if data_cut['SPOTWGHTS'].tolist()[-1] == '':
            spot_wrt = 0
        else:
            spot_wrt = int(data_cut['SPOTWGHTS'].tolist()[-1])
        if data_cut['WRTWGHTS'].tolist()[-1] == '':
            wrt_wrt = 0
        else:
            wrt_wrt = int(data_cut['WRTWGHTS'].tolist()[-1])
        if data_cut['WHSTOCKS'].tolist()[-1] == '':
            wh_stock = 0
        else:
            wh_stock = int(data_cut['WHSTOCKS'].tolist()[-1])
        data_dict = {'var': chinese_to_english(prod_key), 'var_label': prod_key, \
                     # chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)),

                     'spot_inventory': spot_wrt,
                     'warrant_inventory': wrt_wrt,
                     'warehouse_stocks': wh_stock,
                     'date': date}
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    # if len(records.index) != 0:
    #     records.index = records['var']
    #     vars_in_market = [i for i in vars_list if i in records.index]
    #     records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 4
0
def get_cffex_rank_table(date="20200427", vars_list=cons.contract_symbols):
    """
    中国金融期货交易所前 20 会员持仓排名数据明细
    注:该交易所既公布品种排名,也公布标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20100416开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest          该会员持多单                  int
    long_open_interest_chg      该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    vars_list = [
        i for i in vars_list if i in cons.market_exchange_symbols["cffex"]
    ]
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2010, 4, 16):
        print(Exception("cffex数据源开始日期为20100416,跳过"))
        return {}
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return {}
    big_dict = {}
    for var in vars_list:
        # print(var)
        # var = "IF"
        url = cons.CFFEX_VOL_RANK_URL % (
            date.strftime("%Y%m"),
            date.strftime("%d"),
            var,
        )
        r = requests_link(url, encoding="gbk")
        if not r:
            return False
        if "网页错误" not in r.text:
            try:
                temp_chche = StringIO(r.text.split("\n交易日,")[1])
            except:
                temp_chche = StringIO(
                    r.text.split("\n交易日,")[0][4:])  # 20200316开始数据结构变化,统一格式
            table = pd.read_csv(temp_chche)
            table = table.dropna(how="any")
            table = table.applymap(lambda x: x.strip()
                                   if isinstance(x, str) else x)
            for symbol in set(table["合约"]):
                table_cut = table[table["合约"] == symbol]
                table_cut.columns = ["symbol", "rank"] + rank_columns
                table_cut = _table_cut_cal(pd.DataFrame(table_cut), symbol)
                big_dict[symbol] = table_cut.reset_index(drop=True)
    return big_dict
Exemplo n.º 5
0
def get_czce_receipt_1(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20080222至20100824(包括)
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: list
    :type vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    :return: 展期收益率数据
    :rtype: pandas.DataFrame
    var             商品品种                     string
    receipt         仓单数                       int
    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    if date == '20090820':
        return pd.DataFrame()
    url = cons.CZCE_RECEIPT_URL_1 % date
    r = requests_link(url, encoding='utf-8')
    context = r.text
    data = pd.read_html(context)[1]
    records = pd.DataFrame()
    indexes = [x for x in data.index if '品种:' in str(data[0].tolist()[x])]
    ends = [x for x in data.index if '总计' in str(data[0].tolist()[x])]
    for i in list(range(len(indexes))):
        if i != len(indexes) - 1:
            data_cut = data.loc[indexes[i]:ends[i], :]
            data_cut = data_cut.fillna(method='pad')
        else:
            data_cut = data.loc[indexes[i]:, :]
            data_cut = data_cut.fillna(method='pad')
        if 'PTA' in data_cut[0].tolist()[0]:
            var = 'TA'
        else:
            var = chinese_to_english(
                re.sub(r'[A-Z]+', '', data_cut[0].tolist()[0][3:]))
        if var == 'CF':
            receipt = data_cut[6].tolist()[-1]
            receipt_chg = data_cut[7].tolist()[-1]
        else:
            receipt = data_cut[5].tolist()[-1]
            receipt_chg = data_cut[6].tolist()[-1]
        data_dict = {
            'var': var,
            'receipt': int(receipt),
            'receipt_chg': int(receipt_chg),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 6
0
def get_shfe_receipt_2(
        date: str = None,
        vars_list: List = cons.contract_symbols) -> pd.DataFrame:
    """
    上海商品交易所-注册仓单数据-类型2
    适用 20140519(包括)-至今
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: 合约品种如 RB、AL 等列表 为空时为所有商品
    :type vars_list: list
    :return: 注册仓单数据
    :rtype: pandas.DataFrame
    """
    if not isinstance(vars_list, list):
        return warnings.warn(f"symbol_list: 必须是列表")
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.SHFE_RECEIPT_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    try:
        context = r.json()
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        data_cut = data[data['VARNAME'] == var]
        if "BC" in var:
            data_dict = {
                'var': "BC",
                'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]),
                'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]),
                'date': date
            }
        else:
            data_dict = {
                'var': chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)),
                'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]),
                'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]),
                'date': date
            }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
        temp_records = records.groupby('var')[['receipt', 'receipt_chg'
                                               ]].sum().reset_index()
        temp_records['date'] = date
        records = temp_records
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 7
0
Arquivo: cot.py Projeto: dgczy/akshare
def get_cffex_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    郑州商品交易所前20会员持仓排名数据明细
    注:该交易所既公布品种排名,也公布标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20100416开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest          该会员持多单                  int
    long_open_interest_chg      该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    vars_list = [
        i for i in vars_list if i in cons.market_exchange_symbols['cffex']
    ]
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2010, 4, 16):
        print(Exception("cffex数据源开始日期为20100416,跳过"))
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    big_dict = {}
    for var in vars_list:
        url = cons.CFFEX_VOL_RANK_URL % (date.strftime('%Y%m'),
                                         date.strftime('%d'), var)
        r = requests_link(url, encoding='gbk')
        if not r:
            return False
        if '网页错误' not in r.text:
            table = pd.read_csv(StringIO(r.text.split('\n交易日,')[1]))
            table = table.dropna(how='any')
            table = table.applymap(lambda x: x.strip()
                                   if type(x) == type('') else x)
            for symbol in set(table['合约']):
                table_cut = table[table['合约'] == symbol]
                table_cut.columns = ['symbol', 'rank'] + rank_columns
                table_cut = _table_cut_cal(pd.DataFrame(table_cut), symbol)
                big_dict[symbol] = table_cut.reset_index(drop=True)
    return big_dict
Exemplo n.º 8
0
def get_shfe_receipt_2(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
        抓取上海商品交易所注册仓单数据
        适用20140519(包括)至今
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.SHFE_RECEIPT_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    try:
        context = r.json()
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        data_cut = data[data['VARNAME'] == var]
        data_dict = {
            'var': chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)),
            'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]),
            'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 9
0
def get_shfe_v_wap(date=None):
    """
        获取上期所日成交均价数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    Return
    -------
        DataFrame
            郑商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                time_range    v_wap时段,分09:00-10:15和09:00-15:00两类
                v_wap          加权平均成交均价
        或 None(给定日期没有数据)
    """
    day = cons.convert_date(date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        json_data = json.loads(
            requests_link(
                cons.SHFE_V_WAP_URL % (day.strftime("%Y%m%d")),
                headers=cons.headers,
                encoding="utf-8",
            ).text
        )
    except requests.HTTPError as reason:
        if reason.response not in [404, 403]:
            print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason)
        return None

    if len(json_data["o_currefprice"]) == 0:
        return None
    try:
        df = pd.DataFrame(json_data["o_currefprice"])
        df["INSTRUMENTID"] = df["INSTRUMENTID"].str.strip()
        df[":B1"].astype("int16")
        return df.rename(columns=cons.SHFE_V_WAP_COLUMNS)[
            list(cons.SHFE_V_WAP_COLUMNS.values())
        ]
    except:
        return None
Exemplo n.º 10
0
Arquivo: cot.py Projeto: xyzwj/akshare
def _czce_df_read(url, skip_rows, encoding='utf-8', header=0):
    """
    郑州商品交易所的网页数据
    :param header:
    :type header:
    :param url: 网站 string
    :param skip_rows: 去掉前几行 int
    :param encoding: utf-8 or gbk or gb2312
    :return: pd.DataFrame
    """
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36",
        "Host": "www.czce.com.cn",
        "Cookie": "XquW6dFMPxV380S=CAaD3sMkdXv3fUoaJlICIEv0MVegGq5EoMyBcxkOjCgSjmpuovYFuTLtYFcxTZGw; XquW6dFMPxV380T=5QTTjUlA6f6WiDO7fMGmqNxHBWz.hKIc8lb_tc1o4nHrJM4nsXCAI9VHaKyV_jkHh4cIVvD25kGQAh.MvLL1SHRA20HCG9mVVHPhAzktNdPK3evjm0NYbTg2Gu_XGGtPhecxLvdFQ0.JlAxy_z0C15_KdO8kOI18i4K0rFERNPxjXq5qG1Gs.QiOm976wODY.pe8XCQtAsuLYJ.N4DpTgNfHJp04jhMl0SntHhr.jhh3dFjMXBx.JEHngXBzY6gQAhER7uSKAeSktruxFeuKlebse.vrPghHqWvJm4WPTEvDQ8q",
    }
    r = requests_link(url, encoding, headers=headers)
    data = pd.read_html(r.text, match='.+', flavor=None, header=header, index_col=0, skiprows=skip_rows, attrs=None,
                        parse_dates=False, thousands=', ', encoding="gbk", decimal='.',
                        converters=None, na_values=None, keep_default_na=True)
    return data
Exemplo n.º 11
0
def get_dce_daily(date=None, symbol_type="futures", retries=0):
    """
        获取大连商品交易所日交易数据
    Parameters
    ------
        date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        symbol_type: 数据类型, 为'futures'期货 或 'option'期权二者之一
        retries: int, 当前重试次数,达到3次则获取数据失败
    Return
    -------
        DataFrame
            大商所日交易数据(DataFrame):
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                volume        成交量
                open_interest   持仓量
                turnover       成交额
                settle        结算价
                pre_settle    前结算价
                variety       合约类别
        或
        DataFrame
           郑商所每日期权交易数据
                symbol        合约代码
                date          日期
                open          开盘价
                high          最高价
                low           最低价
                close         收盘价
                pre_settle      前结算价
                settle         结算价
                delta          对冲值
                volume         成交量
                open_interest     持仓量
                oi_change       持仓变化
                turnover        成交额
                implied_volatility 隐含波动率
                exercise_volume   行权量
                variety        合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    if retries > 3:
        print("maximum retires for DCE market data: ", day.strftime("%Y%m%d"))
        return

    if symbol_type == "futures":
        url = (cons.DCE_DAILY_URL + "?" +
               urllib.parse.urlencode({
                   "currDate": day.strftime("%Y%m%d"),
                   "year": day.strftime("%Y"),
                   "month": str(int(day.strftime("%m")) - 1),
                   "day": day.strftime("%d"),
               }))
        listed_columns = cons.DCE_COLUMNS
        output_columns = cons.OUTPUT_COLUMNS
    elif symbol_type == "option":
        url = (cons.DCE_DAILY_URL + "?" +
               urllib.parse.urlencode({
                   "currDate": day.strftime("%Y%m%d"),
                   "year": day.strftime("%Y"),
                   "month": str(int(day.strftime("%m")) - 1),
                   "day": day.strftime("%d"),
                   "dayQuotes.trade_type": "1",
               }))
        listed_columns = cons.DCE_OPTION_COLUMNS
        output_columns = cons.OPTION_OUTPUT_COLUMNS
    else:
        print("invalid symbol_type :" + symbol_type +
              ', should be one of "futures" or "option"')
        return

    try:
        response = requests_link(url, method="post", headers=cons.headers).text
    except requests.exceptions.ContentDecodingError as reason:
        return get_dce_daily(day, retries=retries + 1)
    except requests.exceptions.HTTPError as reason:
        if reason.response == 504:
            return get_dce_daily(day, retries=retries + 1)
        elif reason.response != 404:
            print(cons.DCE_DAILY_URL, reason)
        return

    if "错误:您所请求的网址(URL)无法获取" in response:
        return get_dce_daily(day, retries=retries + 1)
    elif "暂无数据" in response:
        return

    data = BeautifulSoup(response, "html.parser").find_all("tr")
    if len(data) == 0:
        return

    dict_data = list()
    implied_data = list()
    for i_data in data[1:]:
        if "小计" in i_data.text or "总计" in i_data.text:
            continue
        x = i_data.find_all("td")
        if symbol_type == "futures":
            row_dict = {"variety": cons.DCE_MAP[x[0].text.strip()]}
            row_dict["symbol"] = row_dict["variety"] + x[1].text.strip()
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if "-" in field_content:
                    row_dict[field] = 0
                elif field in ["volume", "open_interest"]:
                    row_dict[field] = int(field_content.replace(",", ""))
                else:
                    row_dict[field] = float(field_content.replace(",", ""))
            dict_data.append(row_dict)
        elif len(x) == 16:
            m = cons.FUTURES_SYMBOL_PATTERN.match(x[1].text.strip())
            if not m:
                continue
            row_dict = {
                "symbol": x[1].text.strip(),
                "variety": m.group(1).upper(),
                "contract_id": m.group(0),
            }
            for i, field in enumerate(listed_columns):
                field_content = x[i + 2].text.strip()
                if "-" in field_content:
                    row_dict[field] = 0
                elif field in ["volume", "open_interest"]:
                    row_dict[field] = int(field_content.replace(",", ""))
                else:
                    row_dict[field] = float(field_content.replace(",", ""))
            dict_data.append(row_dict)
        elif len(x) == 2:
            implied_data.append({
                "contract_id": x[0].text.strip(),
                "implied_volatility": float(x[1].text.strip()),
            })
    df = pd.DataFrame(dict_data)
    df["date"] = day.strftime("%Y%m%d")
    if symbol_type == "futures":
        return df[output_columns]
    else:
        return pd.merge(
            df,
            pd.DataFrame(implied_data),
            on="contract_id",
            how="left",
            indicator=False,
        )[output_columns]
Exemplo n.º 12
0
def get_cffex_daily(date=None):
    """
    中国金融期货交易所日交易数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天
    :return: pandas.DataFrame
    中国金融期货交易所日:
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low          最低价
    close         收盘价
    volume        成交量
    open_interest   持仓量
    turnover      成交额
    settle        结算价
    pre_settle    前结算价
    variety       合约类别
    或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        html = requests_link(
            cons.CFFEX_DAILY_URL.format(day.strftime("%Y%m"),
                                        day.strftime("%d"),
                                        day.strftime("%Y%m%d")),
            encoding="gbk",
            headers=cons.headers,
        ).text
    except requests.exceptions.HTTPError as reason:
        if reason.response != 404:
            print(
                cons.CFFEX_DAILY_URL %
                (day.strftime("%Y%m"), day.strftime("%d"),
                 day.strftime("%Y%m%d")),
                reason,
            )
        return

    if html.find("网页错误") >= 0:
        return
    html = [
        i.replace(" ", "").split(",") for i in html.split("\n")[:-2]
        if i[0][0] != "小"
    ]

    if html[0][0] != "合约代码":
        return

    dict_data = list()
    day_const = day.strftime("%Y%m%d")
    for row in html[1:]:
        m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
        if not m:
            continue
        row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)}

        for i, field in enumerate(cons.CFFEX_COLUMNS):
            if row[i + 1] == "":
                row_dict[field] = 0.0
            elif field in ["volume", "open_interest", "oi_chg"]:
                row_dict[field] = int(row[i + 1])
            else:
                try:
                    row_dict[field] = float(row[i + 1])
                except:
                    pass
        row_dict["pre_settle"] = row_dict["close"] - row_dict["change1"]
        dict_data.append(row_dict)

    return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
Exemplo n.º 13
0
def get_shfe_daily(date=None):
    """
    上海期货交易所-日频率-量价数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 默认为当前交易日
    :type date: str or datetime.date
    :return: 上海期货交易所-日频率-量价数据
    :rtype: pandas.DataFrame or None
    上期所日交易数据(DataFrame):
    symbol        合约代码
    date          日期
    open          开盘价
    high          最高价
    low           最低价
    close         收盘价
    volume        成交量
    open_interest 持仓量
    turnover      成交额
    settle        结算价
    pre_settle     前结算价
    variety       合约类别
    或 None(给定交易日没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % day.strftime("%Y%m%d"))
        return None
    try:
        json_data = json.loads(
            requests_link(
                cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")),
                headers=cons.shfe_headers,
            ).text)
    except requests.HTTPError as reason:
        if reason.response != 404:
            print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason)
        return

    if len(json_data["o_curinstrument"]) == 0:
        return

    df = pd.DataFrame([
        row for row in json_data["o_curinstrument"] if
        row["DELIVERYMONTH"] not in ["小计", "合计"] and row["DELIVERYMONTH"] != ""
    ])
    df["variety"] = df.PRODUCTID.str.slice(0, -6).str.upper()
    df["symbol"] = df["variety"] + df["DELIVERYMONTH"]
    df["date"] = day.strftime("%Y%m%d")
    v_wap_df = get_shfe_v_wap(day)
    if v_wap_df is not None:
        df = pd.merge(
            df,
            v_wap_df[v_wap_df.time_range == "9:00-15:00"],
            on=["date", "symbol"],
            how="left",
        )
        df["turnover"] = df.v_wap * df.VOLUME
    else:
        df["VOLUME"] = df["VOLUME"].apply(lambda x: 0 if x == "" else x)
        df["turnover"] = df["VOLUME"] * df["SETTLEMENTPRICE"]
    df.rename(columns=cons.SHFE_COLUMNS, inplace=True)
    return df[cons.OUTPUT_COLUMNS]
Exemplo n.º 14
0
Arquivo: cot.py Projeto: dgczy/akshare
def get_dce_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    大连商品交易所前 20 会员持仓排名数据明细
    注: 该交易所既公布品种排名, 也公布标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表为空时为所有商品, 数据从20060104开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2006, 1, 4):
        print(Exception("dce数据源开始日期为20060104,跳过"))
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    vars_list = [
        i for i in vars_list if i in cons.market_exchange_symbols['dce']
    ]
    big_dict = {}
    for var in vars_list:
        url = cons.DCE_VOL_RANK_URL % (var.lower(), var.lower(), date.year,
                                       date.month - 1, date.day)
        list_60_name = []
        list_60 = []
        list_60_chg = []
        rank = []
        texts = requests_link(url).content.splitlines()
        if not texts:
            return False
        if len(texts) > 30:
            for text in texts:
                line = text.decode("utf-8")
                string_list = line.split()
                try:
                    if int(string_list[0]) <= 20:
                        list_60_name.append(string_list[1])
                        list_60.append(string_list[2])
                        list_60_chg.append(string_list[3])
                        rank.append(string_list[0])
                except:
                    pass
            table_cut = pd.DataFrame({
                'rank':
                rank[0:20],
                'vol_party_name':
                list_60_name[0:20],
                'vol':
                list_60[0:20],
                'vol_chg':
                list_60_chg[0:20],
                'long_party_name':
                list_60_name[20:40],
                'long_open_interest':
                list_60[20:40],
                'long_open_interest_chg':
                list_60_chg[20:40],
                'short_party_name':
                list_60_name[40:60],
                'short_open_interest':
                list_60[40:60],
                'short_open_interest_chg':
                list_60_chg[40:60]
            })
            table_cut = table_cut.applymap(lambda x: x.replace(',', ''))
            table_cut = _table_cut_cal(table_cut, var)
            big_dict[var] = table_cut.reset_index(drop=True)
    return big_dict
Exemplo n.º 15
0
def get_czce_receipt_3(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20151112(包括)至今
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    Return
    -------
        DataFrame:
            展期收益率数据(DataFrame):`1
                var             商品品种                     string
                receipt         仓单数                       int
                date            日期                         string YYYYMMDD
    """

    date = (cons.convert_date(date).strftime("%Y%m%d")
            if date is not None else datetime.date.today())
    if date not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return None
    url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding="utf-8")
    r.encoding = "utf-8"
    data = pd.read_html(r.text, encoding="gb2312")
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for data_cut in data:
        if len(data_cut.columns) > 3 and len(data_cut.index) > 7:
            last_indexes = [
                x for x in data_cut.index
                if "注:" in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if "PTA" in data_cut[0].tolist()[0]:
                var = "TA"
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(" ")[0][3:]
                if len(string) > 7:
                    continue
                print(string)
                var = chinese_to_english(re.sub("[A-Z]+", "", string))
            data_cut.columns = data_cut.loc[1, :]
            data_cut = data_cut.fillna(method="pad")
            try:
                receipt = data_cut.loc[:, "仓单数量"].tolist()[-1]
            except:
                receipt = data_cut.loc[:, "仓单数量(保税)"].tolist()[-1]
            receipt_chg = data_cut.loc[:, "当日增减"].tolist()[-1]
            data_dict = {
                "var": var,
                "receipt": int(receipt),
                "receipt_chg": int(receipt_chg),
                "date": date,
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records["var"]
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 16
0
Arquivo: cot.py Projeto: dgczy/akshare
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    上海期货交易所前 20 会员持仓排名数据明细
    注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
    数据从20020107开始,每交易日16:30左右更新数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
        rank                        排名                        int
        vol_party_name              成交量排序的当前名次会员        string(中文)
        vol                         该会员成交量                  int
        vol_chg                     该会员成交量变化量             int
        long_party_name             持多单排序的当前名次会员        string(中文)
        long_open_interest          该会员持多单                  int
        long_open_interest_chg      该会员持多单变化量             int
        short_party_name            持空单排序的当前名次会员        string(中文)
        short_open_interest         该会员持空单                  int
        short_open_interest_chg     该会员持空单变化量             int
        symbol                      标的合约                     string
        var                         品种                        string
        date                        日期                        string YYYYMMDD

    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2002, 1, 7):
        print("shfe数据源开始日期为20020107,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d'))
    r = requests_link(url, 'utf-8')
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context['o_cursor'])

    df = df.rename(
        columns={
            'CJ1': 'vol',
            'CJ1_CHG': 'vol_chg',
            'CJ2': 'long_open_interest',
            'CJ2_CHG': 'long_open_interest_chg',
            'CJ3': 'short_open_interest',
            'CJ3_CHG': 'short_open_interest_chg',
            'PARTICIPANTABBR1': 'vol_party_name',
            'PARTICIPANTABBR2': 'long_party_name',
            'PARTICIPANTABBR3': 'short_party_name',
            'PRODUCTNAME': 'product1',
            'RANK': 'rank',
            'INSTRUMENTID': 'symbol',
            'PRODUCTSORTNO': 'product2'
        })

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if type(x) == type('') else x)
    df = df.applymap(lambda x: None if x == '' else x)
    df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x))

    df = df[df['rank'] > 0]
    for col in [
            'PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1',
            'product2'
    ]:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars_list if var in df['variety'].tolist()]
    big_dict = {}
    for var in get_vars:
        df_var = df[df['variety'] == var]
        for symbol in set(df_var['symbol']):
            df_symbol = df_var[df_var['symbol'] == symbol]
            big_dict[symbol] = df_symbol.reset_index(drop=True)
    return big_dict
Exemplo n.º 17
0
Arquivo: cot.py Projeto: dgczy/akshare
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312")
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))
            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 2]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, 'rank'] = table_cut.index
            table_cut.loc['合计', 'rank'] = 999
            table_cut.loc[
                '合计',
                ['vol_party_name', 'long_party_name', 'short_party_name'
                 ]] = None
            table_cut.loc[:, 'symbol'] = symbol
            table_cut.loc[:, 'variety'] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(
                lambda x: x.replace(',', ''))
            table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[1]
    elif date < datetime.date(2017, 12, 28):
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[0]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(
        lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexes = [i for i in table.index if '合约' in i or '品种' in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(
                    find_chinese(re.compile(r':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(r':(.*) ').findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how='any').iloc[1:, :]
            table_cut = table_cut.loc[[
                x for x in table_cut.index if x in [str(i) for i in range(21)]
            ], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol] = table_cut.reset_index(drop=True)

    return big_dict
Exemplo n.º 18
0
def get_czce_receipt_3(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20151112(包括)至今
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    Return
    -------
        DataFrame:
            展期收益率数据(DataFrame):`1
                var             商品品种                     string
                receipt         仓单数                       int
                date            日期                         string YYYYMMDD
    """

    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding='utf-8')
    r.encoding = 'utf-8'
    data = pd.read_html(r.text, encoding='gb2312')
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for data_cut in data:
        if len(data_cut.columns) > 3 and len(data_cut.index) > 7:
            last_indexes = [
                x for x in data_cut.index
                if '注:' in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if 'PTA' in data_cut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                if len(string) > 7:
                    continue
                print(string)
                var = chinese_to_english(re.sub('[A-Z]+', '', string))
            data_cut.columns = data_cut.loc[1, :]
            data_cut = data_cut.fillna(method='pad')
            try:
                receipt = data_cut.loc[:, '仓单数量'].tolist()[-1]
            except:
                receipt = data_cut.loc[:, '仓单数量(保税)'].tolist()[-1]
            receipt_chg = data_cut.loc[:, '当日增减'].tolist()[-1]
            data_dict = {
                'var': var,
                'receipt': int(receipt),
                'receipt_chg': int(receipt_chg),
                'date': date
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 19
0
def get_cffex_daily(date=None):
    """
    获取中国金融期货交易所日交易数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天
    :return: pd.DataFrame
    中国金融期货交易所日(pd.DataFrame):
        symbol        合约代码
        date          日期
        open          开盘价
        high          最高价
        low          最低价
        close         收盘价
        volume        成交量
        open_interest   持仓量
        turnover      成交额
        settle        结算价
        pre_settle    前结算价
        variety       合约类别
        或 None(给定日期没有交易数据)
    """
    day = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if day.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % day.strftime('%Y%m%d'))
        return None
    try:
        html = requests_link(cons.CFFEX_DAILY_URL.format(
            day.strftime('%Y%m'), day.strftime('%d'), day.strftime('%Y%m%d')),
                             encoding="gbk",
                             headers=cons.headers).text
    except requests.exceptions.HTTPError as reason:
        if reason.response != 404:
            print(
                cons.CFFEX_DAILY_URL %
                (day.strftime('%Y%m'), day.strftime('%d'),
                 day.strftime('%Y%m%d')), reason)
        return

    if html.find('网页错误') >= 0:
        return
    html = [
        i.replace(' ', '').split(',') for i in html.split('\n')[:-2]
        if i[0][0] != '小'
    ]

    if html[0][0] != '合约代码':
        return

    dict_data = list()
    day_const = day.strftime('%Y%m%d')
    for row in html[1:]:
        m = cons.FUTURES_SYMBOL_PATTERN.match(row[0])
        if not m:
            continue
        row_dict = {'date': day_const, 'symbol': row[0], 'variety': m.group(1)}

        for i, field in enumerate(cons.CFFEX_COLUMNS):
            if row[i + 1] == "":
                row_dict[field] = 0.0
            elif field in ['volume', 'open_interest', 'oi_chg']:
                row_dict[field] = int(row[i + 1])
            else:
                try:
                    row_dict[field] = float(row[i + 1])
                except:
                    pass
        row_dict['pre_settle'] = row_dict['close'] - row_dict['change1']
        dict_data.append(row_dict)

    return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
Exemplo n.º 20
0
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    上海期货交易所前 20 会员持仓排名数据明细
    注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
    数据从20020107开始,每交易日16:30左右更新数据
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest          该会员持多单                  int
    long_open_interest_chg      该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest         该会员持空单                  int
    short_open_interest_chg     该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2002, 1, 7):
        print("shfe数据源开始日期为20020107,跳过")
        return {}
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return {}
    url = cons.SHFE_VOL_RANK_URL % (date.strftime("%Y%m%d"))
    r = requests_link(url, "utf-8")
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context["o_cursor"])

    df = df.rename(
        columns={
            "CJ1": "vol",
            "CJ1_CHG": "vol_chg",
            "CJ2": "long_open_interest",
            "CJ2_CHG": "long_open_interest_chg",
            "CJ3": "short_open_interest",
            "CJ3_CHG": "short_open_interest_chg",
            "PARTICIPANTABBR1": "vol_party_name",
            "PARTICIPANTABBR2": "long_party_name",
            "PARTICIPANTABBR3": "short_party_name",
            "PRODUCTNAME": "product1",
            "RANK": "rank",
            "INSTRUMENTID": "symbol",
            "PRODUCTSORTNO": "product2",
        })

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df = df.applymap(lambda x: None if x == "" else x)
    df["variety"] = df["symbol"].apply(lambda x: symbol_varieties(x))

    df = df[df["rank"] > 0]
    for col in [
            "PARTICIPANTID1",
            "PARTICIPANTID2",
            "PARTICIPANTID3",
            "product1",
            "product2",
    ]:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars_list if var in df["variety"].tolist()]
    big_dict = {}
    for var in get_vars:
        df_var = df[df["variety"] == var]
        for symbol in set(df_var["symbol"]):
            df_symbol = df_var[df_var["symbol"] == symbol]
            big_dict[symbol] = df_symbol.reset_index(drop=True)
    return big_dict
Exemplo n.º 21
0
def get_czce_receipt_3(
        date: str = None,
        vars_list: List = cons.contract_symbols) -> pd.DataFrame:
    """
    郑州商品交易所-注册仓单数据
    适用 20151008-至今
    http://www.czce.com.cn/cn/jysj/cdrb/H770310index_1.htm
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: 合约品种如 CF、TA 等列表为空时为所有商品
    :type vars_list: list
    :return: 注册仓单数据
    :rtype: pandas.DataFrame
    """
    if not isinstance(vars_list, list):
        return warnings.warn("vars_list: 必须是列表")
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = f"http://www.czce.com.cn/cn/DFSStaticFiles/Future/{date[:4]}/{date}/FutureDataWhsheet.xls"
    r = requests_link(url, encoding='utf-8')
    temp_df = pd.read_excel(BytesIO(r.content))
    temp_df = temp_df[[
        bool(1 - item) for item in [
            item if item is not np.NAN else False
            for item in temp_df.iloc[:, 0].str.contains("非农产品")
        ]
    ]]
    temp_df.reset_index(inplace=True, drop=True)
    range_list_one = list(temp_df[[
        item if item is not np.NAN else False
        for item in temp_df.iloc[:, 0].str.contains("品种")
    ]].index)
    range_list_two = list(temp_df[[
        item if item is not np.NAN else False
        for item in temp_df.iloc[:, 0].str.contains("品种")
    ]].index)[1:]
    range_list_two.append(None)
    symbol_list = []
    receipt_list = []
    receipt_chg_list = []
    for page in range(len(range_list_one)):
        inner_df = temp_df[range_list_one[page]:range_list_two[page]]
        reg = re.compile(r'[A-Z]+')
        try:
            symbol = reg.findall(inner_df.iloc[0, 0])[0]
        except:
            continue
        symbol_list.append(symbol)
        inner_df.columns = inner_df.iloc[1, :]
        inner_df = inner_df.iloc[2:, :]
        inner_df = inner_df.dropna(axis=1, how='all')
        if symbol == "PTA":
            try:
                receipt_list.append(
                    inner_df['仓单数量(完税)'].iloc[-1] +
                    inner_df['仓单数量(保税)'].iloc[-1])  # 20210316 TA 分为保税和完税
            except:
                receipt_list.append(0)
        elif symbol == "MA":
            try:
                try:
                    receipt_list.append(
                        inner_df['仓单数量(完税)'].iloc[-2] +
                        inner_df['仓单数量(保税)'].iloc[-2])  # 20210316 MA 分为保税和完税
                except:
                    receipt_list.append(
                        inner_df['仓单数量(完税)'].iloc[-2])  # 处理 MA 的特殊格式
            except:
                receipt_list.append(0)
        else:
            try:
                receipt_list.append(inner_df['仓单数量'].iloc[-1])
            except:
                receipt_list.append(0)
        if symbol == "MA":
            receipt_chg_list.append(inner_df['当日增减'].iloc[-2])
        else:
            receipt_chg_list.append(inner_df['当日增减'].iloc[-1])
    data_df = pd.DataFrame([
        symbol_list, receipt_list, receipt_chg_list,
        [date] * len(receipt_chg_list)
    ]).T
    data_df.columns = ['var', 'receipt', 'receipt_chg', 'date']
    temp_list = data_df['var'].tolist()
    data_df['var'] = [item if item != "PTA" else "TA" for item in temp_list]
    if len(data_df.index) != 0:
        data_df.index = data_df['var']
        vars_in_market = [i for i in vars_list if i in data_df.index]
        records = data_df.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 22
0
def get_czce_rank_table(date="20081015", vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime("%Y%m%d"))
        r = requests.get(url)
        r.encoding = "utf-8"
        soup = BeautifulSoup(r.text, "lxml")
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, "utf-8")
        r.encoding = "utf-8"
        symbols = []
        for link in soup.find_all("b"):
            strings = str(link).split(" ")

            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                if symbol == "SR905日期:":
                    symbol = "SR905"
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 1]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, "rank"] = table_cut.index
            table_cut.loc["合计", "rank"] = 999
            table_cut.loc[
                "合计",
                ["vol_party_name", "long_party_name", "short_party_name"
                 ]] = None
            table_cut.loc[:, "symbol"] = symbol
            table_cut.loc[:, "variety"] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(
                lambda x: x.replace(",", ""))
            table_cut = table_cut.applymap(lambda x: 0 if x == "-" else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):  # 20200311 格式修正
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0, header=None)[3:]
        big_df = pd.DataFrame()
        for item in data:
            big_df = pd.concat([big_df, item], axis=0, ignore_index=False)
        big_df.columns = big_df.iloc[0, :].tolist()
        data = big_df.iloc[1:, :]
    elif date < datetime.date(2017, 12, 28):  # 20200311 格式修正
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0, header=0)[1]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.index.name = table.columns[0]
    table.columns = rank_columns
    table.loc[:, "rank"] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(
        lambda x: x.replace(",", ""))
    table = table.applymap(lambda x: 0 if x == "-" else x)
    indexes = [i for i in table.index if "合约" in i or "品种" in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if "PTA" in string:
            symbol = "TA"
        else:
            try:
                symbol = chinese_to_english(
                    find_chinese(re.compile(r":(.*) ").findall(string)[0]))
            except:
                symbol = re.compile(r":(.*) ").findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how="any").iloc[1:, :]
            table_cut = table_cut.loc[[
                x for x in table_cut.index if x in [str(i) for i in range(21)]
            ], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol.strip()] = table_cut.reset_index(drop=True)

    return big_dict