def getIndexStockByType(dir_path, index_type): heads = ['StockCode','StockName','Price',\ 'Trade','PB','PE_S','PE_D','EarningsPerShare','NetProfitDes','ROE','GrossProfitRate','NetAssetValuePerShare','CapitalStock',\ 'ScaleShareType','FinanceAnalize'] browser = webdriver.Chrome() browser.get('http://data.eastmoney.com/other/index/hs300.html') parent = browser.find_element_by_id("mk_type") boards = parent.find_elements_by_xpath('.//*') workbook = xlwt.Workbook() for i, board in enumerate(boards): board_name = board.text if board_name == index_type: sheet = addHeaders(workbook, board_name, heads) board.click() time.sleep(2) print("正在获取:" + board_name + "...") total_page = int( browser.find_element_by_xpath( '//*[@id="miniPageNav"]/b[4]/span').text) for page in range(1, total_page + 1): for j in range(50): cu.printProgress("第" + str(page) + "页-" + "第" + str(j) + "条...") company = browser.find_element_by_xpath( '//*[@id="dt_1"]/tbody/tr[' + str(j + 1) + ']/td[3]/a') code = browser.find_element_by_xpath( '//*[@id="dt_1"]/tbody/tr[' + str(j + 1) + ']/td[2]/a') price = browser.find_element_by_xpath( '//*[@id="dt_1"]/tbody/tr[' + str(j + 1) + ']/td[4]/span') row = (page - 1) * 50 + 1 + j sheet.write(row, 0, code.text) sheet.write(row, 1, company.text) sheet.write(row, 2, price.text) getBaseDataFromF10(code.text, row, 3, sheet) page_parent = browser.find_element_by_id("PageCont") page_btns = page_parent.find_elements_by_xpath('.//*') subscript = 0 if (len(page_btns) > 0) & (page != total_page + 1): for page_btn in page_btns: if page_btn.text == "下一页": subscript = page_btns.index(page_btn) page_btns[subscript].click() time.sleep(2) file_path = dir_path + board_name + '.xls' workbook.save(file_path) print('数据下载完毕,已保存到' + file_path) browser.close() browser.quit()
def getStockDataByType(dir_path, stock_type=STOCK_TYPE_HSA, save_to_db=False): # StockCode:股票代码 # StockName:股票简称 # Price:当前股价 '-------每日浮动数据-------' # PriceLimit:涨跌幅 # QuantityRelativeR:量比 # TurnoverRate:换手率 '-------相对固定数据-------' # Trade:行业 # PB:市净率 # PE_S:静态市盈率 # PE_D:动态市盈率 # EarningsPerShare:每股收益 # NetProfitDes:净利润描述(包括净利润和增长比率) # ROE:净资产收益率 # GrossProfitRate:毛利率 # NetAssetValuePerShare:每股净资产 # CapitalStock:股本 # ScaleShareType:类型(大盘股、小盘股) # FinanceAnalize:财务分析 # MoneyFlowPerShare:每股现金流量 '-------备用字段-------' # Proceeds:营业收入 # ProceedsYOY:营业收入-同比增长 # ProceedsQOQ:营业收入-环比增长 # NetProfit:净利润 # NetProfitYOY:净利润-同比增长 # NetProfitQOQ:净利润-环比增长 # GrossProfitRate:销售毛利率 heads = ['StockCode','StockName','Price','PriceLimit','QuantityRelativeR','TurnoverRate',\ 'Trade','PB','PE_S','PE_D','EarningsPerShare','NetProfitDes','ROE','GrossProfitRate','NetAssetValuePerShare','CapitalStock',\ 'ScaleShareType','FinanceAnalize','MoneyFlowPerShare'] # 初始化webdriver browser = webdriver.Chrome() browser.get("http://quote.eastmoney.com/center/gridlist.html#hs_a_board") parent = browser.find_element_by_xpath('//*[@id="tab"]/ul') boards = parent.find_elements_by_xpath('.//*') # 初始化xl对象 workbook = xlwt.Workbook() #连接数据库 engine = create_engine( 'mysql+pymysql://root:root@localhost:3306/listed_company') try: for i, board in enumerate(boards): board_name = board.text if board_name == stock_type: sheet = addHeaders(workbook, board_name, heads) board.click() time.sleep(2) print("正在获取:" + board_name + "..." + '请勿关闭浏览器') page_parent = browser.find_element_by_class_name( 'paginate_page') pages = page_parent.find_elements_by_xpath('.//*') total_page = int(pages[len(pages) - 1].text) # total_page = 2 #统计每页缺失数据的数量 missing_count = 0 row_index = 0 for page in range(1, total_page + 1): for j in range(20): cu.printProgress("第" + str(page) + "页-" + "第" + str(j) + "条...") company = browser.find_element_by_xpath( '//*[@id="table_wrapper-table"]/tbody/tr[' + str(j + 1) + ']/td[3]/a') code = browser.find_element_by_xpath( '//*[@id="table_wrapper-table"]/tbody/tr[' + str(j + 1) + ']/td[2]/a') price = browser.find_element_by_xpath( '//*[@id="table_wrapper-table"]/tbody/tr[' + str(j + 1) + ']/td[5]/span') pl = browser.find_element_by_xpath( '//*[@id="table_wrapper-table"]/tbody/tr[' + str(j + 1) + ']/td[6]/span') qrr = browser.find_element_by_xpath( '//*[@id="table_wrapper-table"]/tbody/tr[' + str(j + 1) + ']/td[15]') tr = browser.find_element_by_xpath( '//*[@id="table_wrapper-table"]/tbody/tr[' + str(j + 1) + ']/td[16]') # 排除无价格、ST和退市的股票 if ((price.text == '-') | sdu.isST(company.text) | sdu.isDelist(company.text)): missing_count += 1 continue else: row_index += 1 if save_to_db: sql = 'INSERT INTO t_tonghua (stock_code,stock_name) VALUES ("{}","{}")'.format( code.text, company.text) engine.execute(sql) else: sheet.write(row_index, 0, code.text) sheet.write(row_index, 1, company.text) sheet.write(row_index, 2, price.text) sheet.write(row_index, 3, pl.text) sheet.write(row_index, 4, qrr.text) sheet.write(row_index, 5, tr.text) getBaseDataFromF10(code.text, row_index, 6, sheet) page_btn = browser.find_element_by_xpath( '//*[@id="main-table_paginate"]/a[2]') if page < total_page: page_btn.click() time.sleep(2) except Exception: traceback.print_exc() if save_to_db == False: #保存为xls文件 file_path = dir_path + board_name + '.xls' workbook.save(file_path) print('数据下载完毕,已保存到' + file_path) browser.close() browser.quit()
def getAnnualReports(dir_path, df, year): workbook = xlwt.Workbook() # stock_code:股票代码 # stock_name:股票简称 '-------财务数据-------' # total_revenue:营业收入 # total_revenue_gr:营业收入增长率 # net_profit_atsopc:净利润 # basic_eps:每股收益 # basic_eps_gr:每股收益增长率 # np_per_share:每股净资产 # operate_cash_flow_ps:现金流 # avg_roe:净资产收益率 # asset_liab_ratio:资产负债率 heads = [ 'stock_code', 'stock_name', 'total_revenue', 'total_revenue_gr', 'net_profit_atsopc', 'net_profit_atsopc_gr', 'basic_eps', 'basic_eps_gr', 'np_per_share', 'np_per_share_gr', 'operate_cash_flow_ps', 'operate_cash_flow_ps_gr', 'avg_roe', 'asset_liab_ratio' ] #添加表头 sheet = workbook.add_sheet(year + "年报") for h in range(len(heads)): sheet.write(0, h, heads[h]) row_index = 0 for index, row in df.iterrows(): stock_code = row['StockCode'] dict_ar = getAnnualReportByStockCode(stock_code, years[year]) if len(dict_ar) == 0: continue if year not in dict_ar['report_name']: continue row_index = row_index + 1 cu.printProgress('获取第' + str(row_index) + '条') sheet.write(row_index, 0, stock_code) sheet.write(row_index, 1, row['StockName']) sheet.write(row_index, 2, dict_ar['total_revenue'][0]) sheet.write(row_index, 3, dict_ar['total_revenue'][1]) sheet.write(row_index, 4, dict_ar['net_profit_atsopc'][0]) sheet.write(row_index, 5, dict_ar['net_profit_atsopc'][1]) sheet.write(row_index, 6, dict_ar['basic_eps'][0]) sheet.write(row_index, 7, dict_ar['basic_eps'][1]) sheet.write(row_index, 8, dict_ar['np_per_share'][0]) sheet.write(row_index, 9, dict_ar['np_per_share'][1]) sheet.write(row_index, 10, dict_ar['operate_cash_flow_ps'][0]) sheet.write(row_index, 11, dict_ar['operate_cash_flow_ps'][1]) sheet.write( row_index, 12, float(0 if dict_ar['avg_roe'][0] is None else dict_ar['avg_roe'][0] ) / 100) sheet.write(row_index, 13, float(dict_ar['asset_liab_ratio'][0]) / 100) # if index == 10: # break file_path = dir_path + year + '年报.xls' workbook.save(file_path) print('数据下载完毕,已保存到' + file_path)