示例#1
0
文件: cot.py 项目: yutiansut/fushare
def _czce_df_read(url, skiprow, encode='utf-8'):
    """
        抓取郑州商品期货交易所的网页数据
        Parameters
        ------
            url:        网站         string
            skiprow:    去掉前几行    int
        Return
        -------
            DataFrame
                
    """
    r = requests_link(url, encode)
    data = pd.read_html(r.text,
                        match='.+',
                        flavor=None,
                        header=0,
                        index_col=0,
                        skiprows=skiprow,
                        attrs=None,
                        parse_dates=False,
                        tupleize_cols=False,
                        thousands=', ',
                        encoding="gbk",
                        decimal='.',
                        converters=None,
                        na_values=None,
                        keep_default_na=True)
    return data
示例#2
0
文件: cot.py 项目: yutiansut/fushare
def get_cffex_rank_table(date=None, vars=cons.vars):
    """
        抓取郑州商品期货交易所前20会员持仓排名数据明细
        注:该交易所即公布了品种排名,也公布了标的排名
        Parameters
        ------
            date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
            数据从20100416开始,每交易日16:30左右更新数据
        Return
        -------
            DataFrame:
                rank                        排名                        int
                vol_party_name              成交量排序的当前名次会员        string(中文)
                vol                         该会员成交量                  int
                vol_chg                     该会员成交量变化量             int
                long_party_name             持多单排序的当前名次会员        string(中文)
                long_openIntr               该会员持多单                  int
                long_openIntr_chg           该会员持多单变化量             int
                short_party_name            持空单排序的当前名次会员        string(中文)
                short_openIntr              该会员持空单                  int
                short_openIntr_chg          该会员持空单变化量             int
                symbol                      标的合约                     string
                var                         品种                        string
                date                        日期                        string YYYYMMDD
    """
    vars = [i for i in vars if i in cons.market_var['cffex']]
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    D = {}
    for var in vars:
        url = cons.CFFEX_VOLRANK_URL % (date.strftime('%Y%m'),
                                        date.strftime('%d'), var)
        r = requests_link(url, encoding='gbk')
        if '网页错误' not in r.text:
            table = pd.read_csv(StringIO(r.text.split('\n交易日,')[1]))
            table = table.dropna(how='any')
            table = table.applymap(lambda x: x.strip()
                                   if type(x) == type('') else x)
            for symbol in set(table['合约']):
                tableCut = table[table['合约'] == symbol]
                tableCut.columns = ['symbol', 'rank'] + rank_columns
                tableCut = _tableCut_cal(tableCut, symbol)
                D[symbol] = tableCut.reset_index(drop=True)
    return D
示例#3
0
文件: cot.py 项目: yutiansut/fushare
def get_czce_rank_table(date=None, vars=cons.vars):
    """
        抓取郑州商品期货交易所前20会员持仓排名数据明细
        注:该交易所即公布了品种排名,也公布了标的排名
        Parameters
        ------
            date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
            数据从20050509开始,每交易日16:30左右更新数据
        Return
        -------
            DataFrame:
                rank                        排名                        int
                vol_party_name              成交量排序的当前名次会员        string(中文)
                vol                         该会员成交量                  int
                vol_chg                     该会员成交量变化量             int
                long_party_name             持多单排序的当前名次会员        string(中文)
                long_openIntr               该会员持多单                  int
                long_openIntr_chg           该会员持多单变化量             int
                short_party_name            持空单排序的当前名次会员        string(中文)
                short_openIntr              该会员持空单                  int
                short_openIntr_chg          该会员持空单变化量             int
                symbol                      标的合约                     string
                var                         品种                        string
                date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOLRANK_URL_1 % (date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skiprow=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312")
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))
            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                symbols.append(symbol)
        D = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            tableCut = data[i + 1]
            tableCut.columns = rank_columns
            tableCut = tableCut.iloc[:-1, :]
            tableCut.loc[:, 'rank'] = tableCut.index
            tableCut.loc['合计', 'rank'] = 999
            tableCut.loc[
                '合计',
                ['vol_party_name', 'long_party_name', 'short_party_name'
                 ]] = None
            tableCut.loc[:, 'symbol'] = symbol
            tableCut.loc[:, 'var'] = symbol2varietie(symbol)
            D[symbol] = tableCut.reset_index(drop=True)
        return D

    elif date <= datetime.date(2015, 11, 11):
        url = cons.CZCE_VOLRANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skiprow=1)[1]
    elif date < datetime.date(2017, 12, 28):
        url = cons.CZCE_VOLRANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skiprow=1)[0]
    else:
        url = cons.CZCE_VOLRANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skiprow=0)[0]

    if len(data.columns) < 6:
        return {}

    table = data.iloc[:, :9]
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(
        lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexs = [i for i in table.index if '合约' in i or '品种' in i]
    indexs.insert(0, 0)
    D = {}

    for i in range(len(indexs)):
        if indexs[i] == 0:
            tableCut = table.loc[:indexs[i + 1], :]
            string = tableCut.index.name

        elif i < len(indexs) - 1:
            tableCut = table.loc[indexs[i]:indexs[i + 1], :]
            string = tableCut.index[0]
        else:
            tableCut = table.loc[indexs[i]:, :]
            string = tableCut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(
                    find_chinese(re.compile(':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(':(.*) ').findall(string)[0]
        var = symbol2varietie(symbol)
        if var in vars:
            tableCut = tableCut.dropna(how='any').iloc[1:, :]
            tableCut = tableCut.loc[[
                x for x in tableCut.index if x in [str(i) for i in range(21)]
            ], :]
            tableCut = _tableCut_cal(tableCut, symbol)
            D[symbol] = tableCut.reset_index(drop=True)
    return D
示例#4
0
文件: cot.py 项目: yutiansut/fushare
def get_shfe_rank_table(date=None, vars=cons.vars):
    """
        抓取上海商品期货交易所前20会员持仓排名数据明细
        注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名
        Parameters
        ------
            date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars: 合约品种如RB、AL等列表 为空时为所有商品
            数据从20020107开始,每交易日16:30左右更新数据
        Return
        -------
            DataFrame:
                rank                        排名                        int
                vol_party_name              成交量排序的当前名次会员        string(中文)
                vol                         该会员成交量                  int
                vol_chg                     该会员成交量变化量             int
                long_party_name             持多单排序的当前名次会员        string(中文)
                long_openIntr               该会员持多单                  int
                long_openIntr_chg           该会员持多单变化量             int
                short_party_name            持空单排序的当前名次会员        string(中文)
                short_openIntr              该会员持空单                  int
                short_openIntr_chg          该会员持空单变化量             int
                symbol                      标的合约                     string
                var                         品种                        string
                date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        print('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    url = cons.SHFE_VOLRANK_URL % (date.strftime('%Y%m%d'))
    r = requests_link(url, 'utf-8')
    try:
        context = json.loads(r.text)
    except:
        return {}
    df = pd.DataFrame(context['o_cursor'])

    df = df.rename(
        columns={
            'CJ1': 'vol',
            'CJ1_CHG': 'vol_chg',
            'CJ2': 'long_openIntr',
            'CJ2_CHG': 'long_openIntr_chg',
            'CJ3': 'short_openIntr',
            'CJ3_CHG': 'short_openIntr_chg',
            'PARTICIPANTABBR1': 'vol_party_name',
            'PARTICIPANTABBR2': 'long_party_name',
            'PARTICIPANTABBR3': 'short_party_name',
            'PRODUCTNAME': 'product1',
            'RANK': 'rank',
            'INSTRUMENTID': 'symbol',
            'PRODUCTSORTNO': 'product2'
        })

    if len(df.columns) < 3:
        return {}
    df = df.applymap(lambda x: x.strip() if type(x) == type('') else x)
    df = df.applymap(lambda x: None if x == '' else x)
    df['var'] = df['symbol'].apply(lambda x: symbol2varietie(x))

    df = df[df['rank'] > 0]
    for col in [
            'PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1',
            'product2'
    ]:
        try:
            del df[col]
        except:
            pass
    get_vars = [var for var in vars if var in df['var'].tolist()]
    D = {}
    for var in get_vars:
        df_var = df[df['var'] == var]
        for symbol in set(df_var['symbol']):
            df_symbol = df_var[df_var['symbol'] == symbol]
            D[symbol] = df_symbol.reset_index(drop=True)
    return D