def return_stock_in_bankuai(self, bankuai): # bankuai parameter is a list from the top bankuai to the bottom bankuai in the format below # ['板块','概念板块','AB股票'] # # return ['板块','概念板块','AB股票',[[code,name],[code,name],...]] bankuai_tree = self.__bankuai_tree if not bankuai[2] in bankuai_tree[bankuai[0]]["children"][bankuai[1]]["children"]: raise RuntimeError,("The url of [" + ",".join(bankuai) + "] is not correct.","in Eastmoney.py") bankuai_detail_url = self.return_url_for_bankuai_stock(bankuai) while True: # Infinite loop unitl stock download completes successfully try: bankuai_detail_page = read_url(bankuai_detail_url) break except: warn_log('Connection lost, retry in 10 seconds ...') time.sleep(10) r_return_code_detail_grp = r'\[(?P<code_detail_grp>.*)\]' code_detail_grp = re.search(r_return_code_detail_grp, bankuai_detail_page).group("code_detail_grp") r_return_code_detail = r'"(?P<code_detail>[^"]*)"' r_code_detail = re.compile(r_return_code_detail) stocks = [] for m in r_code_detail.finditer(code_detail_grp): match_group_into_list = m.group("code_detail").split(",") stocks.append([match_group_into_list[1],match_group_into_list[2].decode("utf-8")]) out_list = copy.copy(bankuai) out_list.append(stocks) return out_list
def return_bankuai_tree(bankuai_url = self.__base_url + "/" + self.__bankuai_ext): # The bankuai in html is like # AAA # BBB->GGG # BBB->CCC # DDD->EEE->FFF # # to match AAA, we use r_return_category_no_sub_category # to match BBB and its children, we use r_return_first_lvl_with_sub_category and r_return_detail_lvl # to match DDD and its children (2nd lvl), we use r_return_first_lvl_with_sub_category, r_return_second_lvl_with_sub_category and r_return_detail_lvl # # *The code for Chinese character is gb2312* bankuai_page = read_url(bankuai_url) d_code_url = {} #{name: url=... # children={name: url=... # children={name: url=...} # }, # name: url=...,} r_return_category_no_sub_category = r'<dd class="node-item" data-key="\w+"><a href="(?P<url>[0-9a-zA-Z_,#\.]+)"><span class="text">(?P<name>[^<]+)</span></a></dd>' r_return_first_lvl_with_sub_category = r'<dd class="js-sub" data-id="\w+"><span class="node-item" data-key="\w*"><b class="icon-sub-title"></b><a href="(?P<url>[0-9a-zA-Z_,#\.]+)"[^>]*><span class="text">(?P<name>[^<]+)</span>(?P<content>.*?)(?=</ul>)</ul></dd>' r_return_second_lvl_with_sub_category = r'<li class="node-sub-sub"><a href="(?P<url>[^"]+)" class="[^>]+"><span class="text">(?P<name>[^<]+)</span></a><b class="icon-right"></b><div class="[^>]+">(?P<content>.*?)(?=</div>)</div><div class="hover-mask"></div></li>' r_return_detail_lvl = r'<a href="(?P<url>[^"]+)"[^>]*><span class="text">(?P<name>[^<]+)</span></a>' r_no_sub_cat = re.compile(r_return_category_no_sub_category) r_first_lvl = re.compile(r_return_first_lvl_with_sub_category) r_second_lvl = re.compile(r_return_second_lvl_with_sub_category) r_detail_lvl = re.compile(r_return_detail_lvl) for m in r_no_sub_cat.finditer(bankuai_page): if m.group("name").decode("gb2312") not in d_code_url: d_code_url[m.group("name").decode("gb2312")] = {"url": m.group("url")} for m in r_first_lvl.finditer(bankuai_page): first_lvl_group_name = m.group("name").decode("gb2312") d_code_url[first_lvl_group_name] = {"url": m.group("url")} d_code_url[first_lvl_group_name].setdefault("children", {}) if m.group("content").find("class=\"node-sub-sub\"") == -1: # one sub-branches for m1 in r_detail_lvl.finditer(m.group("content")): d_code_url[first_lvl_group_name]["children"][m1.group("name").decode("gb2312")] = {"url": m1.group("url")} else: # two sub-branches for m1 in r_second_lvl.finditer(m.group("content")): second_lvl_group_name = m1.group("name").decode("gb2312") d_code_url[first_lvl_group_name]["children"][second_lvl_group_name] = {"url": m1.group("url")} d_code_url[first_lvl_group_name]["children"][second_lvl_group_name].setdefault("children", {}) for m2 in r_detail_lvl.finditer(m1.group("content")): d_code_url[first_lvl_group_name]["children"][second_lvl_group_name]["children"][m2.group("name").decode("gb2312")] = {"url": m2.group("url")} return d_code_url
def return_bankuai_in_bankuai(self, bankuai, sort_direction="desc"): # bankuai parameter is a list from the top bankuai to the bottom bankuai in the format below # ['板块','概念板块'] # return # ['板块','概念板块',[ # [bankuai_name,increase,amount(in 0.1billion),change_ratio,rising_count,falling_count,leading_stock_code,leading_stock_name,increase], # [bankuai_name,increase,amount(in 0.1billion),change_ratio,rising_count,falling_count,leading_stock_code,leading_stock_name,increase], # ... # ] if not sort_direction.lower() in ["desc","asc"]: raise RuntimeError,("Incorrect parameter [%(direction)s]" % {"direction": sort_direction},"in Eastmoney.py") bankuai_url = self.return_url_for_bankuai_bankuai(bankuai) while True: # Infinite loop unitl stock download completes successfully try: bankuai_page = read_url(bankuai_url) break except: warn_log('Connection lost, retry in 10 seconds ...') time.sleep(10) r_return_bankuai_detail_grp = r'\[\[(?P<bankuai_detail_group_desc>[^\]]+)\],\[(?P<bankuai_detail_group_asc>[^\]]+)\]\]' match_objs = re.search(r_return_bankuai_detail_grp, bankuai_page) bankuai_detail_grp = match_objs.group("bankuai_detail_group_" + sort_direction) r_return_code_detail = r'"(?P<code_detail>[^"]*)"' r_code_detail = re.compile(r_return_code_detail) bankuais = [] for m in r_code_detail.finditer(bankuai_detail_grp): match_group_into_list = m.group("code_detail").split(",") bankuais.append([match_group_into_list[0],match_group_into_list[1],match_group_into_list[2],match_group_into_list[3],match_group_into_list[4],match_group_into_list[5],match_group_into_list[6],match_group_into_list[7],match_group_into_list[8]]) out_list = copy.copy(bankuai) out_list.append(bankuais) return out_list
#!/usr/bin/python2.7 # coding:utf-8 import re, sys, pprint, copy, csv, os reload(sys) sys.setdefaultencoding("gbk") from tooling.common_tool import print_log, warn_log, read_url, get_date, return_new_name_for_existing_file from Sys_paths import Sys_paths page_code = read_url('http://stock.jrj.com.cn/share,600225,jjcg_3.shtml') print page_code