Пример #1
0
def exchange_suspend_info(ref_date, force_update=False):
    start_date = ref_date

    if not force_update:
        start_date = (find_latest_date() + dt.timedelta(days=1))

    end_date = ref_date
    date_range = pd.date_range(start_date, end_date)

    datas = []
    for date in date_range:
        if isBizDay('china.sse', date):
            datas.append(suspend_info(date.strftime('%Y-%m-%d')))
            spyder_logger.info('Scraping finished for date {0}'.format(date))

    if not datas:
        spyder_logger.info('No data is available for {0}'.format(ref_date))
        return
        
    total_table = pd.concat(datas)
    total_table.drop_duplicates(['停(复)牌时间', '证券代码'], inplace=True)

    if not total_table.empty:
        insert_table(total_table,
                     ['effectiveDate',
                      'instrumentID',
                      'instrumentName',
                      'status',
                      'reason',
                      'stopTime'],
                     'suspend_info',
                     exchange_db_settings)
Пример #2
0
def load_fund_holding(start_date, end_date):
    session = login()
    querl_url_template = 'http://simudata.howbuy.com/profile/favouriteStocks.htm?' \
                         'jjdm5=&zqdm=&endDate={0}&orderBy=cgsl&orderRule=Desc&page={1}'

    stamps = date_stamps(start_date, end_date)

    datas = []

    for end_date in stamps:

        page = 0
        previous_page = None

        while True:
            page += 1
            query_url = querl_url_template.format(end_date, page)

            info_data = session.post(query_url)
            soup = BeautifulSoup(info_data.text, 'lxml')

            error_message = soup.find('div', attrs={'class': 'iocn'})
            if error_message:
                raise ValueError(error_message.text)

            tables = soup.find_all('table')

            if soup == previous_page:
                break

            if tables:
                target_table = tables[1]

                if target_table.tbody.td.text == '未查询到相关数据!':
                    break

                fund_data = parse_table(target_table)
                datas.append(fund_data)
            previous_page = soup
            spyder_logger.info("Page No. {0:4d} is finished.".format(page))

        spyder_logger.info(
            'Publication Date : {0} is finished for fund holding'.format(
                end_date))

    if datas:
        total_table = pd.concat(datas)
        total_table.drop_duplicates(['基金代码', '基金简称', '股票代码'], inplace=True)
        return total_table[[
            '基金代码', '基金简称', '截止日期', '持股数量(万股)', '持股比例(%)', '变动数量(万股)', '股票代码',
            '股票简称'
        ]]
    else:
        spyder_logger.warning("No any data got between {0} and {1}".format(
            start_date, end_date))
        return pd.DataFrame()
Пример #3
0
def load_fund_index(start_month=200601, end_month=202201):
    session = login()
    querl_url_template = 'http://simudata.howbuy.com/profile/howbuyIndex.htm?staDate={0}' \
                         '&orderRule=Desc&endDate={1}&page={2}&smzs={3}'

    index_codes = ['HB0001',
                   'HB0011',
                   'HB0012',
                   'HB0014',
                   'HB0015',
                   'HB0016',
                   'HB0017',
                   'HB0018',
                   'HB001b',
                   'HB001d']

    datas = []

    for code in index_codes:
        page = 0
        previous_page = None

        while True:
            page += 1
            query_url = querl_url_template.format(start_month, end_month, page, code)
            info_data = session.post(query_url)
            soup = BeautifulSoup(info_data.text, 'lxml')

            error_message = soup.find('div', attrs={'class': 'iocn'})
            if error_message:
                raise ValueError(error_message.text)

            tables = soup.find_all('table')
            target_table = tables[1]

            if soup == previous_page or target_table.tbody.td.text == '未查询到相关数据!':
                break

            fund_data = parse_table(target_table)
            datas.append(fund_data)
            previous_page = soup

        spyder_logger.info('Fund index:{0} is finished.'.format(code))

    if datas:
        total_table = pd.concat(datas)
        total_table.drop_duplicates(['统计月份', '指数代码'], inplace=True)
        return total_table.reset_index(drop=True)
    else:
        return pd.DataFrame()
Пример #4
0
def exchange_announcement_info(ref_date):

    total_table = announcement_info(ref_date.strftime('%Y-%m-%d'))

    if total_table.empty:
        spyder_logger.info('No new data is available for {0}'.format(ref_date))
        return

    total_table.drop_duplicates(['url'], inplace=True)

    if not total_table.empty:
        insert_table(total_table, [
            'reportDate', 'instrumentID', 'title', 'url', 'updateTime',
            'exchangePlace'
        ], 'announcement_info', exchange_db_settings)
Пример #5
0
def load_howbuy_fund_type(ref_date):
    session = login()
    quert_url_template = 'http://simudata.howbuy.com/profile/newJjjz.htm?orderBy=clrq' \
                         '&orderByDesc=true&jjdm=&jldm=&glrm=&cllx=qb&zzxs=qb&syMin=&syMax=&' \
                         'page={0}&perPage=30'

    full_table = []
    page = 0
    previous_page = None

    while True:
        page += 1
        query_url = quert_url_template.format(page)
        info_data = session.post(query_url)
        soup = BeautifulSoup(info_data.text, 'lxml')

        error_message = soup.find('div', attrs={'class': 'iocn'})
        if error_message:
            raise ValueError(error_message.text)

        if soup == previous_page:
            break

        previous_page = soup
        tables = soup.find_all('table')
        target_table = tables[1]

        fund_data = parse_table(target_table)
        fund_data = fund_data[fund_data['成立日期'] != 0]
        if len(fund_data) == 0 or fund_data.iloc[len(fund_data) -
                                                 1]['成立日期'] < ref_date:
            break

        full_table.append(fund_data)
        spyder_logger.info("Page No. {0:4d} is finished.".format(page))

    if full_table:
        total_table = pd.concat(full_table)
        total_table.drop_duplicates(['基金代码'], inplace=True)
        total_table = total_table[(total_table['净值日期'] != 0)
                                  & (total_table['成立日期'] != 0)]
        total_table = total_table[total_table['成立日期'] >= ref_date]
        return total_table[[
            '基金代码', '基金简称', '基金管理人', '基金经理', '成立日期', '好买策略', '复权单位净值', '净值日期'
        ]]
    else:
        return pd.DataFrame()
Пример #6
0
def load_howbuy_style_return(start_month=200001, end_month=202101):
    session = login()
    querl_url_template = 'http://simudata.howbuy.com/profile/strategies.htm?staDate={0}' \
                         '&cllx=qb&endDate={1}&page={2}&syl=j1y'

    datas = []
    page = 0
    previous_page = None

    while True:
        page += 1
        query_url = querl_url_template.format(start_month, end_month, page)
        info_data = session.post(query_url)
        soup = BeautifulSoup(info_data.text, 'lxml')

        error_message = soup.find('div', attrs={'class': 'iocn'})
        if error_message:
            raise ValueError(error_message.text)

        tables = soup.find_all('table')
        target_table = tables[1]

        if soup == previous_page or target_table.tbody.td.text == '未查询到相关数据!':
            break

        previous_page = soup

        fund_data = parse_table(target_table,
                                col_level=2,
                                col_names=[
                                    'No.', '统计月份', '好买策略', '最大值', '最小值', '中位数',
                                    '均值', '沪深300同期收益率'
                                ])
        datas.append(fund_data)
        spyder_logger.info("Page No. {0:4d} is finished.".format(page))

    if datas:
        total_table = pd.concat(datas)
        total_table.drop_duplicates(['统计月份', '好买策略'], inplace=True)
        return total_table
    else:
        return pd.DataFrame()