Пример #1
0
def getIndexStockByType(dir_path, index_type):
    heads = ['StockCode','StockName','Price',\
    'Trade','PB','PE_S','PE_D','EarningsPerShare','NetProfitDes','ROE','GrossProfitRate','NetAssetValuePerShare','CapitalStock',\
     'ScaleShareType','FinanceAnalize']

    browser = webdriver.Chrome()
    browser.get('http://data.eastmoney.com/other/index/hs300.html')

    parent = browser.find_element_by_id("mk_type")

    boards = parent.find_elements_by_xpath('.//*')

    workbook = xlwt.Workbook()

    for i, board in enumerate(boards):
        board_name = board.text
        if board_name == index_type:
            sheet = addHeaders(workbook, board_name, heads)
            board.click()
            time.sleep(2)
            print("正在获取:" + board_name + "...")
            total_page = int(
                browser.find_element_by_xpath(
                    '//*[@id="miniPageNav"]/b[4]/span').text)
            for page in range(1, total_page + 1):
                for j in range(50):
                    cu.printProgress("第" + str(page) + "页-" + "第" + str(j) +
                                     "条...")
                    company = browser.find_element_by_xpath(
                        '//*[@id="dt_1"]/tbody/tr[' + str(j + 1) + ']/td[3]/a')
                    code = browser.find_element_by_xpath(
                        '//*[@id="dt_1"]/tbody/tr[' + str(j + 1) + ']/td[2]/a')
                    price = browser.find_element_by_xpath(
                        '//*[@id="dt_1"]/tbody/tr[' + str(j + 1) +
                        ']/td[4]/span')

                    row = (page - 1) * 50 + 1 + j
                    sheet.write(row, 0, code.text)
                    sheet.write(row, 1, company.text)
                    sheet.write(row, 2, price.text)
                    getBaseDataFromF10(code.text, row, 3, sheet)

                page_parent = browser.find_element_by_id("PageCont")
                page_btns = page_parent.find_elements_by_xpath('.//*')
                subscript = 0
                if (len(page_btns) > 0) & (page != total_page + 1):
                    for page_btn in page_btns:
                        if page_btn.text == "下一页":
                            subscript = page_btns.index(page_btn)
                    page_btns[subscript].click()
                    time.sleep(2)

            file_path = dir_path + board_name + '.xls'
            workbook.save(file_path)

            print('数据下载完毕,已保存到' + file_path)

    browser.close()
    browser.quit()
Пример #2
0
def getStockDataByType(dir_path, stock_type=STOCK_TYPE_HSA, save_to_db=False):
    # StockCode:股票代码
    # StockName:股票简称
    # Price:当前股价
    '-------每日浮动数据-------'
    # PriceLimit:涨跌幅
    # QuantityRelativeR:量比
    # TurnoverRate:换手率
    '-------相对固定数据-------'
    # Trade:行业
    # PB:市净率
    # PE_S:静态市盈率
    # PE_D:动态市盈率
    # EarningsPerShare:每股收益
    # NetProfitDes:净利润描述(包括净利润和增长比率)
    # ROE:净资产收益率
    # GrossProfitRate:毛利率
    # NetAssetValuePerShare:每股净资产
    # CapitalStock:股本
    # ScaleShareType:类型(大盘股、小盘股)
    # FinanceAnalize:财务分析
    # MoneyFlowPerShare:每股现金流量
    '-------备用字段-------'
    # Proceeds:营业收入
    # ProceedsYOY:营业收入-同比增长
    # ProceedsQOQ:营业收入-环比增长
    # NetProfit:净利润
    # NetProfitYOY:净利润-同比增长
    # NetProfitQOQ:净利润-环比增长
    # GrossProfitRate:销售毛利率
    heads = ['StockCode','StockName','Price','PriceLimit','QuantityRelativeR','TurnoverRate',\
    'Trade','PB','PE_S','PE_D','EarningsPerShare','NetProfitDes','ROE','GrossProfitRate','NetAssetValuePerShare','CapitalStock',\
     'ScaleShareType','FinanceAnalize','MoneyFlowPerShare']

    # 初始化webdriver
    browser = webdriver.Chrome()
    browser.get("http://quote.eastmoney.com/center/gridlist.html#hs_a_board")

    parent = browser.find_element_by_xpath('//*[@id="tab"]/ul')
    boards = parent.find_elements_by_xpath('.//*')

    # 初始化xl对象
    workbook = xlwt.Workbook()

    #连接数据库
    engine = create_engine(
        'mysql+pymysql://root:root@localhost:3306/listed_company')

    try:
        for i, board in enumerate(boards):
            board_name = board.text
            if board_name == stock_type:
                sheet = addHeaders(workbook, board_name, heads)
                board.click()
                time.sleep(2)
                print("正在获取:" + board_name + "..." + '请勿关闭浏览器')
                page_parent = browser.find_element_by_class_name(
                    'paginate_page')
                pages = page_parent.find_elements_by_xpath('.//*')
                total_page = int(pages[len(pages) - 1].text)
                # total_page = 2

                #统计每页缺失数据的数量
                missing_count = 0
                row_index = 0
                for page in range(1, total_page + 1):
                    for j in range(20):
                        cu.printProgress("第" + str(page) + "页-" + "第" +
                                         str(j) + "条...")
                        company = browser.find_element_by_xpath(
                            '//*[@id="table_wrapper-table"]/tbody/tr[' +
                            str(j + 1) + ']/td[3]/a')
                        code = browser.find_element_by_xpath(
                            '//*[@id="table_wrapper-table"]/tbody/tr[' +
                            str(j + 1) + ']/td[2]/a')
                        price = browser.find_element_by_xpath(
                            '//*[@id="table_wrapper-table"]/tbody/tr[' +
                            str(j + 1) + ']/td[5]/span')
                        pl = browser.find_element_by_xpath(
                            '//*[@id="table_wrapper-table"]/tbody/tr[' +
                            str(j + 1) + ']/td[6]/span')
                        qrr = browser.find_element_by_xpath(
                            '//*[@id="table_wrapper-table"]/tbody/tr[' +
                            str(j + 1) + ']/td[15]')
                        tr = browser.find_element_by_xpath(
                            '//*[@id="table_wrapper-table"]/tbody/tr[' +
                            str(j + 1) + ']/td[16]')
                        # 排除无价格、ST和退市的股票
                        if ((price.text == '-') | sdu.isST(company.text)
                                | sdu.isDelist(company.text)):
                            missing_count += 1
                            continue
                        else:
                            row_index += 1
                            if save_to_db:
                                sql = 'INSERT INTO t_tonghua (stock_code,stock_name) VALUES ("{}","{}")'.format(
                                    code.text, company.text)
                                engine.execute(sql)
                            else:
                                sheet.write(row_index, 0, code.text)
                                sheet.write(row_index, 1, company.text)
                                sheet.write(row_index, 2, price.text)
                                sheet.write(row_index, 3, pl.text)
                                sheet.write(row_index, 4, qrr.text)
                                sheet.write(row_index, 5, tr.text)
                                getBaseDataFromF10(code.text, row_index, 6,
                                                   sheet)

                    page_btn = browser.find_element_by_xpath(
                        '//*[@id="main-table_paginate"]/a[2]')
                    if page < total_page:
                        page_btn.click()
                        time.sleep(2)

    except Exception:
        traceback.print_exc()

    if save_to_db == False:
        #保存为xls文件
        file_path = dir_path + board_name + '.xls'
        workbook.save(file_path)
        print('数据下载完毕,已保存到' + file_path)

    browser.close()
    browser.quit()
Пример #3
0
def getAnnualReports(dir_path, df, year):
    workbook = xlwt.Workbook()

    # stock_code:股票代码
    # stock_name:股票简称
    '-------财务数据-------'
    # total_revenue:营业收入
    # total_revenue_gr:营业收入增长率
    # net_profit_atsopc:净利润

    # basic_eps:每股收益
    # basic_eps_gr:每股收益增长率
    # np_per_share:每股净资产
    # operate_cash_flow_ps:现金流

    # avg_roe:净资产收益率
    # asset_liab_ratio:资产负债率

    heads = [
        'stock_code', 'stock_name', 'total_revenue', 'total_revenue_gr',
        'net_profit_atsopc', 'net_profit_atsopc_gr', 'basic_eps',
        'basic_eps_gr', 'np_per_share', 'np_per_share_gr',
        'operate_cash_flow_ps', 'operate_cash_flow_ps_gr', 'avg_roe',
        'asset_liab_ratio'
    ]
    #添加表头
    sheet = workbook.add_sheet(year + "年报")
    for h in range(len(heads)):
        sheet.write(0, h, heads[h])

    row_index = 0
    for index, row in df.iterrows():
        stock_code = row['StockCode']
        dict_ar = getAnnualReportByStockCode(stock_code, years[year])
        if len(dict_ar) == 0:
            continue
        if year not in dict_ar['report_name']:
            continue
        row_index = row_index + 1
        cu.printProgress('获取第' + str(row_index) + '条')

        sheet.write(row_index, 0, stock_code)
        sheet.write(row_index, 1, row['StockName'])
        sheet.write(row_index, 2, dict_ar['total_revenue'][0])
        sheet.write(row_index, 3, dict_ar['total_revenue'][1])
        sheet.write(row_index, 4, dict_ar['net_profit_atsopc'][0])
        sheet.write(row_index, 5, dict_ar['net_profit_atsopc'][1])
        sheet.write(row_index, 6, dict_ar['basic_eps'][0])
        sheet.write(row_index, 7, dict_ar['basic_eps'][1])
        sheet.write(row_index, 8, dict_ar['np_per_share'][0])
        sheet.write(row_index, 9, dict_ar['np_per_share'][1])
        sheet.write(row_index, 10, dict_ar['operate_cash_flow_ps'][0])
        sheet.write(row_index, 11, dict_ar['operate_cash_flow_ps'][1])
        sheet.write(
            row_index, 12,
            float(0 if dict_ar['avg_roe'][0] is None else dict_ar['avg_roe'][0]
                  ) / 100)
        sheet.write(row_index, 13, float(dict_ar['asset_liab_ratio'][0]) / 100)

    #     if index == 10:
    #         break
    file_path = dir_path + year + '年报.xls'
    workbook.save(file_path)
    print('数据下载完毕,已保存到' + file_path)