Exemplo n.º 1
0
def get_shfe_inv_2(date: str = None):
    """
        抓取上海商品交易所注册仓单数据
        适用20140519(包括)至今
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.SHFE_INV_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    try:
        context = r.json()
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        data_cut = data[data['VARNAME'] == var]
        prod_key = var.split('$$')[0]
        prod_key = prod_key.replace("(", "").replace(")", "")
        if data_cut['SPOTWGHTS'].tolist()[-1] == '':
            spot_wrt = 0
        else:
            spot_wrt = int(data_cut['SPOTWGHTS'].tolist()[-1])
        if data_cut['WRTWGHTS'].tolist()[-1] == '':
            wrt_wrt = 0
        else:
            wrt_wrt = int(data_cut['WRTWGHTS'].tolist()[-1])
        if data_cut['WHSTOCKS'].tolist()[-1] == '':
            wh_stock = 0
        else:
            wh_stock = int(data_cut['WHSTOCKS'].tolist()[-1])
        data_dict = {'var': chinese_to_english(prod_key), 'var_label': prod_key, \
                     # chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)),

                     'spot_inventory': spot_wrt,
                     'warrant_inventory': wrt_wrt,
                     'warehouse_stocks': wh_stock,
                     'date': date}
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    # if len(records.index) != 0:
    #     records.index = records['var']
    #     vars_in_market = [i for i in vars_list if i in records.index]
    #     records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 2
0
def get_czce_receipt_1(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20080222至20100824(包括)
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: list
    :type vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    :return: 展期收益率数据
    :rtype: pandas.DataFrame
    var             商品品种                     string
    receipt         仓单数                       int
    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    if date == '20090820':
        return pd.DataFrame()
    url = cons.CZCE_RECEIPT_URL_1 % date
    r = requests_link(url, encoding='utf-8')
    context = r.text
    data = pd.read_html(context)[1]
    records = pd.DataFrame()
    indexes = [x for x in data.index if '品种:' in str(data[0].tolist()[x])]
    ends = [x for x in data.index if '总计' in str(data[0].tolist()[x])]
    for i in list(range(len(indexes))):
        if i != len(indexes) - 1:
            data_cut = data.loc[indexes[i]:ends[i], :]
            data_cut = data_cut.fillna(method='pad')
        else:
            data_cut = data.loc[indexes[i]:, :]
            data_cut = data_cut.fillna(method='pad')
        if 'PTA' in data_cut[0].tolist()[0]:
            var = 'TA'
        else:
            var = chinese_to_english(
                re.sub(r'[A-Z]+', '', data_cut[0].tolist()[0][3:]))
        if var == 'CF':
            receipt = data_cut[6].tolist()[-1]
            receipt_chg = data_cut[7].tolist()[-1]
        else:
            receipt = data_cut[5].tolist()[-1]
            receipt_chg = data_cut[6].tolist()[-1]
        data_dict = {
            'var': var,
            'receipt': int(receipt),
            'receipt_chg': int(receipt_chg),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 3
0
def get_czce_receipt_2(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
        抓取郑州商品交易所注册仓单数据
        适用20100825(包括)至20151111(包括)
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如CF、TA等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.CZCE_RECEIPT_URL_2 % (date[:4], date)
    r = requests.get(url)
    r.encoding = 'utf-8'
    data = pd.read_html(r.text)[3:]
    records = pd.DataFrame()
    for data_cut in data:
        if len(data_cut.columns) > 3:
            last_indexes = [
                x for x in data_cut.index
                if '注:' in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if 'PTA' in data_cut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                var = chinese_to_english(re.sub(r'[A-Z]+', '', string))
            data_cut.columns = data_cut.T[1].tolist()
            receipt = data_cut['仓单数量'].tolist()[-1]
            receipt_chg = data_cut['当日增减'].tolist()[-1]
            data_dict = {
                'var': var,
                'receipt': int(receipt),
                'receipt_chg': int(receipt_chg),
                'date': date
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 4
0
def get_shfe_receipt_1(
        date: str = None,
        vars_list: List = cons.contract_symbols) -> pd.DataFrame:
    """
    上海期货交易所-注册仓单数据-类型1
    适用 20081006 至 20140518(包括)、20100126、20101029日期交易所格式混乱,直接回复脚本中 pandas.DataFrame, 20100416、20130821日期交易所数据丢失
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :type vars_list: list
    :return: 注册仓单数据-类型1
    :rtype: pandas.DataFrame
    """
    if not isinstance(vars_list, list):
        return warnings.warn(f"symbol_list: 必须是列表")
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    if date == '20100126':
        shfe_20100126['date'] = date
        return shfe_20100126
    elif date == '20101029':
        shfe_20101029['date'] = date
        return shfe_20101029
    elif date in ['20100416', '20130821']:
        return warnings.warn('20100416、20130821交易所数据丢失')
    else:
        var_list = [
            '天然橡胶', '沥青仓库', '沥青厂库', '热轧卷板', '燃料油', '白银', '线材', '螺纹钢', '铅', '铜',
            '铝', '锌', '黄金', '锡', '镍'
        ]
        url = cons.SHFE_RECEIPT_URL_1 % date
        data = pandas_read_html_link(url)[0]
        indexes = [x for x in data.index if (data[0].tolist()[x] in var_list)]
        last_index = [
            x for x in data.index if '注' in str(data[0].tolist()[x])
        ][0] - 1
        records = pd.DataFrame()
        for i in list(range(len(indexes))):
            if i != len(indexes) - 1:
                data_cut = data.loc[indexes[i]:indexes[i + 1] - 1, :]
            else:
                data_cut = data.loc[indexes[i]:last_index, :]
                data_cut = data_cut.fillna(method='pad')
            data_dict = dict()
            data_dict['var'] = chinese_to_english(data_cut[0].tolist()[0])
            data_dict['receipt'] = int(data_cut[2].tolist()[-1])
            data_dict['receipt_chg'] = int(data_cut[3].tolist()[-1])
            data_dict['date'] = date
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 5
0
def get_shfe_receipt_2(
        date: str = None,
        vars_list: List = cons.contract_symbols) -> pd.DataFrame:
    """
    上海商品交易所-注册仓单数据-类型2
    适用 20140519(包括)-至今
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: 合约品种如 RB、AL 等列表 为空时为所有商品
    :type vars_list: list
    :return: 注册仓单数据
    :rtype: pandas.DataFrame
    """
    if not isinstance(vars_list, list):
        return warnings.warn(f"symbol_list: 必须是列表")
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.SHFE_RECEIPT_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    try:
        context = r.json()
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        data_cut = data[data['VARNAME'] == var]
        if "BC" in var:
            data_dict = {
                'var': "BC",
                'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]),
                'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]),
                'date': date
            }
        else:
            data_dict = {
                'var': chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)),
                'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]),
                'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]),
                'date': date
            }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
        temp_records = records.groupby('var')[['receipt', 'receipt_chg'
                                               ]].sum().reset_index()
        temp_records['date'] = date
        records = temp_records
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 6
0
def get_czce_receipt_2(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    郑州商品交易所-注册仓单数据
    http://www.czce.com.cn/cn/jysj/cdrb/H770310index_1.htm
    适用 20100825(包括) - 20151111(包括)
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :type date: str
    :param vars_list: 合约品种如 CF、TA 等列表为空时为所有商品
    :type vars_list: list
    :return: 注册仓单数据
    :rtype: pandas.DataFrame
    """
    if not isinstance(vars_list, list):
        return warnings.warn(f"symbol_list: 必须是列表")
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.CZCE_RECEIPT_URL_2 % (date[:4], date)
    r = requests.get(url)
    r.encoding = 'utf-8'
    data = pd.read_html(r.text)[3:]
    records = pd.DataFrame()
    for data_cut in data:
        if len(data_cut.columns) > 3:
            last_indexes = [
                x for x in data_cut.index
                if '注:' in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if 'PTA' in data_cut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                var = chinese_to_english(re.sub(r'[A-Z]+', '', string))
            data_cut.columns = data_cut.T[1].tolist()
            receipt = data_cut['仓单数量'].tolist()[-1]
            receipt_chg = data_cut['当日增减'].tolist()[-1]
            data_dict = {
                'var': var,
                'receipt': int(receipt),
                'receipt_chg': int(receipt_chg),
                'date': date
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 7
0
def get_dce_receipt(date: str = None,
                    symbol_list: List = cons.contract_symbols):
    """
    完成
    采集大连商品交易所注册仓单数据
    :param date: format 开始日期: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 为空时为当天
    :param symbol_list: 合约品种如 RB, AL等列表, 为空时为所有商品数据从 20060106开始,每周五更新仓单数据。直到20090407起,每交易日都更新仓单数据
    :return: pd.DataFrame
    展期收益率数据(DataFrame):
    var             商品品种                     string
    receipt         仓单数                       int
    date            日期                         string YYYYMMDD
    """
    if not isinstance(symbol_list, list):
        return warnings.warn(f"symbol_list: 必须是列表")
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    payload = {
        "weekQuotes.variety": "all",
        "year": date.year,
        "month": date.month - 1,  # 网站月份描述少 1 个月, 属于网站问题
        "day": date.day
    }
    data = pandas_read_html_link(cons.DCE_RECEIPT_URL,
                                 method="post",
                                 data=payload,
                                 headers=cons.dce_headers)[0]
    records = pd.DataFrame()
    for x in data.to_dict(orient='records'):
        if isinstance(x['品种'], str):
            if x['品种'][-2:] == '小计':
                var = x['品种'][:-2]
                temp_data = {
                    'var': chinese_to_english(var),
                    'receipt': int(x['今日仓单量']),
                    'receipt_chg': int(x['增减']),
                    'date': date.strftime('%Y%m%d')
                }
                records = records.append(pd.DataFrame(temp_data, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in symbol_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 8
0
def get_shfe_receipt_2(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
        抓取上海商品交易所注册仓单数据
        适用20140519(包括)至今
        Parameters
        ------
            date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
            vars_list: 合约品种如RB、AL等列表 为空时为所有商品
        Return
        -------
            DataFrame:
                展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.SHFE_RECEIPT_URL_2 % date
    r = requests_link(url, encoding='utf-8')
    try:
        context = r.json()
    except:
        return pd.DataFrame()
    data = pd.DataFrame(context['o_cursor'])
    if len(data.columns) < 1:
        return pd.DataFrame()
    records = pd.DataFrame()
    for var in set(data['VARNAME'].tolist()):
        data_cut = data[data['VARNAME'] == var]
        data_dict = {
            'var': chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)),
            'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]),
            'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]),
            'date': date
        }
        records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 9
0
Arquivo: cot.py Projeto: dgczy/akshare
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime('%Y%m%d') not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, 'utf-8')
        r.encoding = 'utf-8'
        soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312")
        symbols = []
        for link in soup.find_all('b'):
            strings = (str(link).split(' '))
            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 2]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, 'rank'] = table_cut.index
            table_cut.loc['合计', 'rank'] = 999
            table_cut.loc[
                '合计',
                ['vol_party_name', 'long_party_name', 'short_party_name'
                 ]] = None
            table_cut.loc[:, 'symbol'] = symbol
            table_cut.loc[:, 'variety'] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(
                lambda x: x.replace(',', ''))
            table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[1]
    elif date < datetime.date(2017, 12, 28):
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=1)[0]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d'))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.columns = rank_columns
    table.loc[:, 'rank'] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(
        lambda x: x.replace(',', ''))
    table = table.applymap(lambda x: 0 if x == '-' else x)
    indexes = [i for i in table.index if '合约' in i or '品种' in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if 'PTA' in string:
            symbol = 'TA'
        else:
            try:
                symbol = chinese_to_english(
                    find_chinese(re.compile(r':(.*) ').findall(string)[0]))
            except:
                symbol = re.compile(r':(.*) ').findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how='any').iloc[1:, :]
            table_cut = table_cut.loc[[
                x for x in table_cut.index if x in [str(i) for i in range(21)]
            ], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol] = table_cut.reset_index(drop=True)

    return big_dict
Exemplo n.º 10
0
def _check_information(df_data, date):
    """
    进行数据验证和计算模块
    :param df_data: pandas.DataFrame 采集的数据
    :param date: datetime.date 具体某一天 YYYYMMDD
    :return: pandas.DataFrame
    中间数据
    symbol  spot_price near_contract  ...  near_basis_rate dom_basis_rate      date
     CU    49620.00        cu1811  ...        -0.002418      -0.003426  20181108
     RB     4551.54        rb1811  ...        -0.013521      -0.134359  20181108
     ZN    22420.00        zn1811  ...        -0.032114      -0.076271  20181108
     AL    13900.00        al1812  ...         0.005396       0.003957  20181108
     AU      274.10        au1811  ...         0.005655       0.020430  20181108
     WR     4806.25        wr1903  ...        -0.180026      -0.237035  20181108
     RU    10438.89        ru1811  ...        -0.020969       0.084406  20181108
     PB    18600.00        pb1811  ...        -0.001344      -0.010215  20181108
     AG     3542.67        ag1811  ...        -0.000754       0.009408  20181108
     BU     4045.53        bu1811  ...        -0.129904      -0.149679  20181108
     HC     4043.33        hc1811  ...        -0.035449      -0.088128  20...
    """
    df_data = df_data.loc[:, [0, 1, 2, 3, 5, 6]]
    df_data.columns = [
        "symbol",
        "spot_price",
        "near_contract",
        "near_contract_price",
        "dominant_contract",
        "dominant_contract_price",
    ]
    records = pd.DataFrame()
    for string in df_data["symbol"].tolist():
        if string == "PTA":
            news = "PTA"
        else:
            news = "".join(re.findall(r"[\u4e00-\u9fa5]", string))
        if news != "" and news not in [
                "商品", "价格", "上海期货交易所", "郑州商品交易所", "大连商品交易所"
        ]:
            symbol = chinese_to_english(news)
            record = pd.DataFrame(df_data[df_data["symbol"] == string])
            record.loc[:, "symbol"] = symbol
            record.loc[:,
                       "spot_price"] = record.loc[:,
                                                  "spot_price"].astype(float)
            if (symbol == "JD"
                ):  # 鸡蛋现货为元/公斤, 鸡蛋期货为元/500千克, 其余元/吨(http://www.100ppi.com/sf/)
                record.loc[:, "spot_price"] = float(record["spot_price"]) * 500
            elif (
                    symbol == "FG"
            ):  # 上表中现货单位为元/平方米, 期货单位为元/吨. 换算公式:元/平方米*80=元/吨(http://www.100ppi.com/sf/959.html)
                record.loc[:, "spot_price"] = float(record["spot_price"]) * 80
            records = records.append(record)

    records.loc[:, [
        "near_contract_price", "dominant_contract_price", "spot_price"
    ]] = records.loc[:, [
        "near_contract_price", "dominant_contract_price", "spot_price"
    ]].astype("float")

    records.loc[:, "near_contract"] = records["near_contract"].replace(
        r"[^0-9]*(\d*)$", r"\g<1>", regex=True)
    records.loc[:, "dominant_contract"] = records["dominant_contract"].replace(
        r"[^0-9]*(\d*)$", r"\g<1>", regex=True)

    records.loc[:, "near_contract"] = records[
        "symbol"] + records.loc[:, "near_contract"].astype("int").astype("str")
    records.loc[:, "dominant_contract"] = records[
        "symbol"] + records.loc[:, "dominant_contract"].astype("int").astype(
            "str")

    records["near_contract"] = records["near_contract"].apply(
        lambda x: x.lower() if x[:-4] in cons.market_exchange_symbols[
            "shfe"] + cons.market_exchange_symbols["dce"] else x)
    records.loc[:,
                "dominant_contract"] = records.loc[:,
                                                   "dominant_contract"].apply(
                                                       lambda x: x.lower()
                                                       if x[:-4] in cons.
                                                       market_exchange_symbols[
                                                           "shfe"] + cons.
                                                       market_exchange_symbols[
                                                           "dce"] else x)
    records.loc[:, "near_contract"] = records.loc[:, "near_contract"].apply(
        lambda x: x[:-4] + x[-3:]
        if x[:-4] in cons.market_exchange_symbols["czce"] else x)
    records.loc[:,
                "dominant_contract"] = records.loc[:,
                                                   "dominant_contract"].apply(
                                                       lambda x: x[:-4] + x[-3:
                                                                            ]
                                                       if x[:-4] in cons.
                                                       market_exchange_symbols[
                                                           "czce"] else x)

    records[
        "near_basis"] = records["near_contract_price"] - records["spot_price"]
    records["dom_basis"] = records["dominant_contract_price"] - records[
        "spot_price"]
    records["near_basis_rate"] = (
        records["near_contract_price"] / records["spot_price"] - 1)
    records["dom_basis_rate"] = (
        records["dominant_contract_price"] / records["spot_price"] - 1)
    records.loc[:, "date"] = date.strftime("%Y%m%d")
    return records
Exemplo n.º 11
0
def get_czce_receipt_3(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    郑州商品交易所注册仓单数据
    适用 20151112-至今
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    Return
    -------
        DataFrame:
            展期收益率数据(DataFrame):`1
                var             商品品种                     string
                receipt         仓单数                       int
                date            日期                         string YYYYMMDD
    """

    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn('%s非交易日' % date.strftime('%Y%m%d'))
        return None
    url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding='utf-8')
    r.encoding = 'utf-8'
    data = pd.read_html(r.text, encoding='gb2312')
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for data_cut in data:
        if len(data_cut.columns) > 3 and len(data_cut.index) > 7:
            last_indexes = [
                x for x in data_cut.index
                if '注:' in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if 'PTA' in data_cut[0].tolist()[0]:
                var = 'TA'
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(' ')[0][3:]
                if len(string) > 7:
                    continue
                print(string)
                var = chinese_to_english(re.sub('[A-Z]+', '', string))
            data_cut.columns = data_cut.loc[1, :]
            data_cut = data_cut.fillna(method='pad')
            try:
                receipt = data_cut.loc[:, '仓单数量'].tolist()[-1]
            except:
                receipt = data_cut.loc[:, '仓单数量(保税)'].tolist()[-1]
            receipt_chg = data_cut.loc[:, '当日增减'].tolist()[-1]
            data_dict = {
                'var': var,
                'receipt': int(receipt),
                'receipt_chg': int(receipt_chg),
                'date': date
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records['var']
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 12
0
def _check_information(df_data, date):
    """
    进行数据验证和计算模块
    :param df_data: pandas.DataFrame 采集的数据
    :param date: datetime.date 具体某一天 YYYYMMDD
    :return: pandas.DataFrame
    中间数据
       symbol  spot_price near_contract  ...  near_basis_rate dom_basis_rate      date
         CU    49620.00        cu1811  ...        -0.002418      -0.003426  20181108
         RB     4551.54        rb1811  ...        -0.013521      -0.134359  20181108
         ZN    22420.00        zn1811  ...        -0.032114      -0.076271  20181108
         AL    13900.00        al1812  ...         0.005396       0.003957  20181108
         AU      274.10        au1811  ...         0.005655       0.020430  20181108
         WR     4806.25        wr1903  ...        -0.180026      -0.237035  20181108
         RU    10438.89        ru1811  ...        -0.020969       0.084406  20181108
         PB    18600.00        pb1811  ...        -0.001344      -0.010215  20181108
         AG     3542.67        ag1811  ...        -0.000754       0.009408  20181108
         BU     4045.53        bu1811  ...        -0.129904      -0.149679  20181108
         HC     4043.33        hc1811  ...        -0.035449      -0.088128  20...
    """
    df_data = df_data.loc[:, [0, 1, 2, 3, 5, 6]]
    df_data.columns = [
        'symbol', 'spot_price', 'near_contract', 'near_contract_price',
        'dominant_contract', 'dominant_contract_price'
    ]
    records = pd.DataFrame()
    for string in df_data['symbol'].tolist():
        if string == 'PTA':
            news = 'PTA'
        else:
            news = ''.join(re.findall(r'[\u4e00-\u9fa5]', string))
        if news != '' and news not in [
                '商品', '价格', '上海期货交易所', '郑州商品交易所', '大连商品交易所'
        ]:
            symbol = chinese_to_english(news)
            record = pd.DataFrame(df_data[df_data['symbol'] == string])
            record.loc[:, 'symbol'] = symbol
            record.loc[:,
                       'spot_price'] = record.loc[:,
                                                  'spot_price'].astype(float)
            if symbol == 'JD':  # 鸡蛋现货为元/公斤, 鸡蛋期货为元/500千克, 其余元/吨(http://www.100ppi.com/sf/)
                record.loc[:, 'spot_price'] = float(record['spot_price']) * 500
            elif symbol == 'FG':  # 上表中现货单位为元/平方米, 期货单位为元/吨. 换算公式:元/平方米*80=元/吨(http://www.100ppi.com/sf/959.html)
                record.loc[:, 'spot_price'] = float(record['spot_price']) * 80
            records = records.append(record)

    records.loc[:, [
        'near_contract_price', 'dominant_contract_price', 'spot_price'
    ]] = records.loc[:, [
        'near_contract_price', 'dominant_contract_price', 'spot_price'
    ]].astype('float')

    records.loc[:, 'near_contract'] = records['near_contract'].replace(
        r'[^0-9]*(\d*)$', r'\g<1>', regex=True)
    records.loc[:, 'dominant_contract'] = records['dominant_contract'].replace(
        r'[^0-9]*(\d*)$', r'\g<1>', regex=True)

    records.loc[:, 'near_contract'] = records[
        'symbol'] + records.loc[:, 'near_contract'].astype('int').astype('str')
    records.loc[:, 'dominant_contract'] = records[
        'symbol'] + records.loc[:, 'dominant_contract'].astype('int').astype(
            'str')

    records['near_contract'] = records['near_contract'].apply(
        lambda x: x.lower() if x[:-4] in cons.market_exchange_symbols[
            'shfe'] + cons.market_exchange_symbols['dce'] else x)
    records.loc[:,
                'dominant_contract'] = records.loc[:,
                                                   'dominant_contract'].apply(
                                                       lambda x: x.lower()
                                                       if x[:-4] in cons.
                                                       market_exchange_symbols[
                                                           'shfe'] + cons.
                                                       market_exchange_symbols[
                                                           'dce'] else x)
    records.loc[:, 'near_contract'] = records.loc[:, 'near_contract'].apply(
        lambda x: x[:-4] + x[-3:]
        if x[:-4] in cons.market_exchange_symbols['czce'] else x)
    records.loc[:,
                'dominant_contract'] = records.loc[:,
                                                   'dominant_contract'].apply(
                                                       lambda x: x[:-4] + x[-3:
                                                                            ]
                                                       if x[:-4] in cons.
                                                       market_exchange_symbols[
                                                           'czce'] else x)

    records[
        'near_basis'] = records['near_contract_price'] - records['spot_price']
    records['dom_basis'] = records['dominant_contract_price'] - records[
        'spot_price']
    records['near_basis_rate'] = records['near_contract_price'] / records[
        'spot_price'] - 1
    records['dom_basis_rate'] = records['dominant_contract_price'] / records[
        'spot_price'] - 1
    records.loc[:, 'date'] = date.strftime('%Y%m%d')
    return records
Exemplo n.º 13
0
def get_czce_rank_table(date="20081015", vars_list=cons.contract_symbols):
    """
    郑州商品交易所前 20 会员持仓排名数据明细
    注:该交易所既公布了品种排名, 也公布了标的排名
    :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据
    :return: pd.DataFrame
    rank                        排名                        int
    vol_party_name              成交量排序的当前名次会员        string(中文)
    vol                         该会员成交量                  int
    vol_chg                     该会员成交量变化量             int
    long_party_name             持多单排序的当前名次会员        string(中文)
    long_open_interest               该会员持多单                  int
    long_open_interest_chg           该会员持多单变化量             int
    short_party_name            持空单排序的当前名次会员        string(中文)
    short_open_interest              该会员持空单                  int
    short_open_interest_chg          该会员持空单变化量             int
    symbol                      标的合约                     string
    var                         品种                        string
    date                        日期                        string YYYYMMDD
    """
    date = cons.convert_date(
        date) if date is not None else datetime.date.today()
    if date < datetime.date(2005, 5, 9):
        print("czce数据源开始日期为20050509,跳过")
        return {}
    if date.strftime("%Y%m%d") not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return {}
    if date <= datetime.date(2010, 8, 25):
        url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime("%Y%m%d"))
        r = requests.get(url)
        r.encoding = "utf-8"
        soup = BeautifulSoup(r.text, "lxml")
        data = _czce_df_read(url, skip_rows=0)
        r = requests_link(url, "utf-8")
        r.encoding = "utf-8"
        symbols = []
        for link in soup.find_all("b"):
            strings = str(link).split(" ")

            if len(strings) > 5:
                try:
                    symbol = chinese_to_english(strings[4])
                except:
                    symbol = strings[4]
                if symbol == "SR905日期:":
                    symbol = "SR905"
                symbols.append(symbol)
        big_dict = {}
        for i in range(len(symbols)):
            symbol = symbols[i]
            table_cut = data[i + 1]
            table_cut.columns = rank_columns
            table_cut = table_cut.iloc[:-1, :]
            table_cut.loc[:, "rank"] = table_cut.index
            table_cut.loc["合计", "rank"] = 999
            table_cut.loc[
                "合计",
                ["vol_party_name", "long_party_name", "short_party_name"
                 ]] = None
            table_cut.loc[:, "symbol"] = symbol
            table_cut.loc[:, "variety"] = symbol_varieties(symbol)
            table_cut[intColumns] = table_cut[intColumns].fillna(0)
            table_cut[intColumns] = table_cut[intColumns].astype(str)
            table_cut[intColumns] = table_cut[intColumns].applymap(
                lambda x: x.replace(",", ""))
            table_cut = table_cut.applymap(lambda x: 0 if x == "-" else x)

            table_cut[intColumns] = table_cut[intColumns].astype(float)
            table_cut[intColumns] = table_cut[intColumns].astype(int)
            big_dict[symbol] = table_cut.reset_index(drop=True)
        return big_dict

    elif date <= datetime.date(2015, 11, 11):  # 20200311 格式修正
        url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0, header=None)[3:]
        big_df = pd.DataFrame()
        for item in data:
            big_df = pd.concat([big_df, item], axis=0, ignore_index=False)
        big_df.columns = big_df.iloc[0, :].tolist()
        data = big_df.iloc[1:, :]
    elif date < datetime.date(2017, 12, 28):  # 20200311 格式修正
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0, header=0)[1]
    else:
        url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime("%Y%m%d"))
        data = _czce_df_read(url, skip_rows=0)[0]

    if len(data.columns) < 6:
        return {}

    table = pd.DataFrame(data.iloc[:, :9])
    table.index.name = table.columns[0]
    table.columns = rank_columns
    table.loc[:, "rank"] = table.index
    table[intColumns] = table[intColumns].astype(str)
    table[intColumns] = table[intColumns].applymap(
        lambda x: x.replace(",", ""))
    table = table.applymap(lambda x: 0 if x == "-" else x)
    indexes = [i for i in table.index if "合约" in i or "品种" in i]
    indexes.insert(0, 0)
    big_dict = {}

    for i in range(len(indexes)):

        if indexes[i] == 0:
            table_cut = table.loc[:indexes[i + 1], :]
            string = table_cut.index.name

        elif i < len(indexes) - 1:
            table_cut = table.loc[indexes[i]:indexes[i + 1], :]
            string = table_cut.index[0]

        else:
            table_cut = table.loc[indexes[i]:, :]
            string = table_cut.index[0]

        if "PTA" in string:
            symbol = "TA"
        else:
            try:
                symbol = chinese_to_english(
                    find_chinese(re.compile(r":(.*) ").findall(string)[0]))
            except:
                symbol = re.compile(r":(.*) ").findall(string)[0]

        var = symbol_varieties(symbol)

        if var in vars_list:
            table_cut = table_cut.dropna(how="any").iloc[1:, :]
            table_cut = table_cut.loc[[
                x for x in table_cut.index if x in [str(i) for i in range(21)]
            ], :]

            table_cut = _table_cut_cal(table_cut, symbol)
            big_dict[symbol.strip()] = table_cut.reset_index(drop=True)

    return big_dict
Exemplo n.º 14
0
def get_shfe_inv_1(date: str = None, vars_list: List = cons.contract_symbols):
    """
    抓取上海期货交易所注册仓单数据, 适用20081006至20140518(包括) 20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame, 20100416、20130821日期交易所数据丢失
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
    展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD           
    """
    date = cons.convert_date(date).strftime(
        '%Y%m%d') if date is not None else datetime.date.today()
    if date not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    var_list = [
        '天然橡胶', '沥青仓库', '沥青厂库', '热轧卷板', '燃料油', '白银', '线材', '螺纹钢', '铅', '铜',
        '铝', '锌', '黄金', '锡', '镍'
    ]
    url = cons.SHFE_INV_URL_1 % date
    data = pandas_read_html_link(url)[0]
    indexes = []
    last_indexes = []
    data_list = data[0].tolist()
    end_flag = False
    for x in data.index:
        prod_key = str(data_list[x]).split(':')[-1]
        if prod_key in var_list:
            if end_flag:
                last_indexes.append(x - 1)
            indexes.append(x)
            end_flag = True
            continue
        if end_flag:
            if ('总 计' in str(data_list[x])) or ('总  计' in str(data_list[x])):
                last_indexes.append(x)
                end_flag = False
            elif ('注:' in str(data_list[x])):
                if x - 1 not in last_indexes:
                    last_indexes.append(x - 1)
                    end_flag = False
    records = pd.DataFrame()
    for i in list(range(len(indexes))):
        data_cut = data.loc[indexes[i]:last_indexes[i], :]
        data_dict = dict()
        prod_key = data_cut[0].tolist()[0].split(':')[-1]
        prod_key = prod_key = prod_key.replace("(", "").replace(")", "")
        data_dict['var'] = chinese_to_english(prod_key)
        data_dict['var_label'] = prod_key
        if prod_key in ['沥青仓库', '燃料油', '天然橡胶', '铅', '铜', '铝', '锌', '锡', '镍']:
            data_dict['spot_inventory'] = int(data_cut[4].tolist()[-1])
            data_dict['warrant_inventory'] = int(data_cut[5].tolist()[-1])
            data_dict['warehouse_stocks'] = int(data_cut[9].tolist()[-1])
        elif prod_key in ['沥青厂库', '热轧卷板', '白银', '线材', '螺纹钢']:
            data_dict['spot_inventory'] = 0
            data_dict['warrant_inventory'] = int(data_cut[3].tolist()[-1])
            data_dict['warehouse_stocks'] = int(data_cut[6].tolist()[-1])
        elif prod_key in ['黄金']:
            data_dict['spot_inventory'] = 0
            data_dict['warrant_inventory'] = int(data_cut[1].tolist()[-1])
            data_dict['warehouse_stocks'] = 0
        data_dict['date'] = date
        records = records.append(pd.DataFrame(data_dict, index=[1]))
    # if len(records.index) != 0:
    #     records.index = records['var']
    #     vars_in_market = [i for i in vars_list if i in records.index]
    #     records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 15
0
def get_shfe_receipt_1(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取上海期货交易所注册仓单数据, 适用20081006至20140518(包括) 20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame, 20100416、20130821日期交易所数据丢失
    :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
    :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品
    :return: pd.DataFrame
    展期收益率数据(DataFrame):
                    var             商品品种                     string
                    receipt         仓单数                       int
                    date            日期                         string YYYYMMDD           
    """
    date = (cons.convert_date(date).strftime("%Y%m%d")
            if date is not None else datetime.date.today())
    if date not in calendar:
        warnings.warn(f"{date.strftime('%Y%m%d')}非交易日")
        return None
    if date == "20100126":
        shfe_20100126["date"] = date
        return shfe_20100126
    elif date == "20101029":
        shfe_20101029["date"] = date
        return shfe_20101029
    elif date in ["20100416", "20130821"]:
        return warnings.warn("20100416、20130821日期交易所数据丢失")
    else:
        var_list = [
            "天然橡胶",
            "沥青仓库",
            "沥青厂库",
            "热轧卷板",
            "燃料油",
            "白银",
            "线材",
            "螺纹钢",
            "铅",
            "铜",
            "铝",
            "锌",
            "黄金",
            "锡",
            "镍",
        ]
        url = cons.SHFE_RECEIPT_URL_1 % date
        data = pandas_read_html_link(url)[0]
        indexes = [x for x in data.index if (data[0].tolist()[x] in var_list)]
        last_index = [
            x for x in data.index if "注" in str(data[0].tolist()[x])
        ][0] - 1
        records = pd.DataFrame()
        for i in list(range(len(indexes))):
            if i != len(indexes) - 1:
                data_cut = data.loc[indexes[i]:indexes[i + 1] - 1, :]
            else:
                data_cut = data.loc[indexes[i]:last_index, :]
                data_cut = data_cut.fillna(method="pad")
            data_dict = dict()
            data_dict["var"] = chinese_to_english(data_cut[0].tolist()[0])
            data_dict["receipt"] = int(data_cut[2].tolist()[-1])
            data_dict["receipt_chg"] = int(data_cut[3].tolist()[-1])
            data_dict["date"] = date
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records["var"]
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)
Exemplo n.º 16
0
def get_czce_receipt_3(date: str = None,
                       vars_list: List = cons.contract_symbols):
    """
    抓取郑州商品交易所注册仓单数据
    适用20151112(包括)至今
    Parameters
    ------
        date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天
        vars_list: 合约品种如CF、TA等列表 为空时为所有商品
    Return
    -------
        DataFrame:
            展期收益率数据(DataFrame):`1
                var             商品品种                     string
                receipt         仓单数                       int
                date            日期                         string YYYYMMDD
    """

    date = (cons.convert_date(date).strftime("%Y%m%d")
            if date is not None else datetime.date.today())
    if date not in calendar:
        warnings.warn("%s非交易日" % date.strftime("%Y%m%d"))
        return None
    url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date)
    r = requests_link(url, encoding="utf-8")
    r.encoding = "utf-8"
    data = pd.read_html(r.text, encoding="gb2312")
    records = pd.DataFrame()
    if len(data) < 4:
        return records
    if int(date) <= 20171227:
        data = data[1:]
    for data_cut in data:
        if len(data_cut.columns) > 3 and len(data_cut.index) > 7:
            last_indexes = [
                x for x in data_cut.index
                if "注:" in str(data_cut[0].tolist()[x])
            ]
            if len(last_indexes) > 0:
                last_index = last_indexes[0] - 1
                data_cut = data_cut.loc[:last_index, :]
            if "PTA" in data_cut[0].tolist()[0]:
                var = "TA"
            else:
                strings = data_cut[0].tolist()[0]
                string = strings.split(" ")[0][3:]
                if len(string) > 7:
                    continue
                print(string)
                var = chinese_to_english(re.sub("[A-Z]+", "", string))
            data_cut.columns = data_cut.loc[1, :]
            data_cut = data_cut.fillna(method="pad")
            try:
                receipt = data_cut.loc[:, "仓单数量"].tolist()[-1]
            except:
                receipt = data_cut.loc[:, "仓单数量(保税)"].tolist()[-1]
            receipt_chg = data_cut.loc[:, "当日增减"].tolist()[-1]
            data_dict = {
                "var": var,
                "receipt": int(receipt),
                "receipt_chg": int(receipt_chg),
                "date": date,
            }
            records = records.append(pd.DataFrame(data_dict, index=[0]))
    if len(records.index) != 0:
        records.index = records["var"]
        vars_in_market = [i for i in vars_list if i in records.index]
        records = records.loc[vars_in_market, :]
    return records.reset_index(drop=True)