class DataCrawl: def __init__(self): self.dm = DBManager("wm_details") self.headers = { "User-Agent": ":Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36" } def start_crawl(self): print("-start up-") _year, _season = get_cur_season() self.get_url(_year, _season) def end_crawl(self): self.dm.close_db() def get_url(self, year, season): code_list = self.dm.get_code_list() for item in code_list: key = item["code"][:6] url = "http://quotes.money.163.com/trade/lsjysj_" + key + ".html?year=" + year + "&season=" + season print(url) max_try = 8 for tries in range(max_try): try: content = requests.get(url) self.parse_pager(content.content, item["code"]) break except Exception: if tries < (max_try - 1): sleep(2) continue else: print("ERROR TO DOWNLODE DATE") code_list.close() def parse_pager(self, content, key): try: _result = self.dm.find_by_id(key) timer_list = [x["cur_timer"] for x in _result["price_list"]] soup = bs4.BeautifulSoup(content, "lxml") parse_list = soup.select("div.inner_box tr") for item in parse_list[1:]: data = [x.string for x in item.select("td")] price = { "cur_timer": data[0], "cur_open_price": data[1], "cur_max_price": data[2], "cur_min_price": data[3], "cur_close_price": data[4], "cur_price_range": data[6], "cur_total_volume": data[7], "cur_total_money": data[8] } if price["cur_timer"] not in timer_list: self.dm.add_tk_item(key, price) print(key, "success") except Exception: print(key, "faild")
class WmacdUtils: def __init__(self): self.db_manager_wm = DBManager("wm_details") self.db_manager_tk = DBManager("tk_details") # 初始化wmacd数据 def init_w_time(self): # 初始化时间轴 date_list = date_range("2016-01-04", "2018-12-30") # tk_details = self.dm.find_by_key({"code": code, "cur_timer": {"$in": cur_date_list}}) code_list = self.db_manager_wm.get_code_list() for code_item in code_list: code = code_item["code"] print(code) tk_result = self.db_manager_tk.find_by_key({"code": code})[0] tk_details = sorted(tk_result["price_list"], key=lambda x: cmp_datatime_02(x), reverse=False) for index in range(len(date_list)): if datetime.datetime.strptime(date_list[index], "%Y-%m-%d").weekday() == 0: cur_date_list = date_list[index:index + 7] # 从数据库中获取这个时间段内的数据 cur_tk_details = [ x for x in tk_details if x["cur_timer"] in cur_date_list ] try: open_price_list = [ float(x["cur_open_price"]) for x in cur_tk_details ] max_price_list = [ float(x["cur_max_price"]) for x in cur_tk_details ] min_price_list = [ float(x["cur_min_price"]) for x in cur_tk_details ] close_price_list = [ float(x["cur_close_price"]) for x in cur_tk_details ] total_volume_list = [ int(x["cur_total_volume"].replace(",", "")) for x in cur_tk_details ] total_money_list = [ int(x["cur_total_money"].replace(",", "")) for x in cur_tk_details ] except Exception as e: continue if cur_tk_details: wmacd_item = { "frist_date": cur_date_list[0], "date_list": cur_date_list, "open_price": open_price_list[0], "max_price": max(max_price_list), "min_price": min(min_price_list), "close_price": close_price_list[-1], "total_volume": sum(total_volume_list), "total_money": sum(total_money_list), } else: wmacd_item = { "frist_date": cur_date_list[0], "date_list": cur_date_list, "open_price": 0, "max_price": 0, "min_price": 0, "close_price": 0, "total_volume": 0, "total_money": 0, } # 在数据库中添加一条记录 self.db_manager_wm.add_tk_item(code, wmacd_item) def update_w_macd(self, cur_date=datetime.datetime.now().date()): date_list = date_range("2016-01-04", "2018-12-30") for index in range(len(date_list)): # 匹配到当前时间所在的区间 if datetime.datetime.strptime(date_list[index], "%Y-%m-%d").weekday() == 0: cur_date_list = date_list[index:index + 7] if str(cur_date) in cur_date_list: ticker_list = self.db_manager_wm.get_code_list() # 更新每支股票的数据 for tk_item in ticker_list: code = tk_item["code"] tk_result = self.db_manager_tk.find_by_key( {"code": code})[0] tk_details = sorted(tk_result["price_list"], key=lambda x: cmp_datatime_02(x), reverse=False) # 从数据库中获取这个时间段内的数据 cur_tk_details = [ x for x in tk_details if x["cur_timer"] in cur_date_list ] open_price_list = [ float(x["cur_open_price"]) for x in cur_tk_details ] max_price_list = [ float(x["cur_max_price"]) for x in cur_tk_details ] min_price_list = [ float(x["cur_min_price"]) for x in cur_tk_details ] close_price_list = [ float(x["cur_close_price"]) for x in cur_tk_details ] total_volume_list = [ int(x["cur_total_volume"].replace(",", "")) for x in cur_tk_details ] total_money_list = [ int(x["cur_total_money"].replace(",", "")) for x in cur_tk_details ] if cur_tk_details: wmacd_item = { "frist_date": cur_date_list[0], "date_list": cur_date_list, "open_price": open_price_list[0], "max_price": max(max_price_list), "min_price": min(min_price_list), "close_price": close_price_list[-1], "total_volume": sum(total_volume_list), "total_money": sum(total_money_list), } # 修改数据库中的数据 self.db_manager_wm.update_wm_price_list( code, wmacd_item["frist_date"], wmacd_item)