Exemplo n.º 1
0
class DataCrawl:
    def __init__(self):
        self.dm = DBManager("wm_details")
        self.headers = {
            "User-Agent":
            ":Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
        }

    def start_crawl(self):
        print("-start up-")
        _year, _season = get_cur_season()
        self.get_url(_year, _season)

    def end_crawl(self):
        self.dm.close_db()

    def get_url(self, year, season):
        code_list = self.dm.get_code_list()
        for item in code_list:
            key = item["code"][:6]
            url = "http://quotes.money.163.com/trade/lsjysj_" + key + ".html?year=" + year + "&season=" + season
            print(url)

            max_try = 8
            for tries in range(max_try):
                try:
                    content = requests.get(url)
                    self.parse_pager(content.content, item["code"])
                    break
                except Exception:
                    if tries < (max_try - 1):
                        sleep(2)
                        continue
                    else:
                        print("ERROR TO DOWNLODE DATE")
        code_list.close()

    def parse_pager(self, content, key):
        try:
            _result = self.dm.find_by_id(key)
            timer_list = [x["cur_timer"] for x in _result["price_list"]]
            soup = bs4.BeautifulSoup(content, "lxml")
            parse_list = soup.select("div.inner_box tr")
            for item in parse_list[1:]:
                data = [x.string for x in item.select("td")]
                price = {
                    "cur_timer": data[0],
                    "cur_open_price": data[1],
                    "cur_max_price": data[2],
                    "cur_min_price": data[3],
                    "cur_close_price": data[4],
                    "cur_price_range": data[6],
                    "cur_total_volume": data[7],
                    "cur_total_money": data[8]
                }
                if price["cur_timer"] not in timer_list:
                    self.dm.add_tk_item(key, price)
            print(key, "success")
        except Exception:
            print(key, "faild")
Exemplo n.º 2
0
class WmacdUtils:
    def __init__(self):
        self.db_manager_wm = DBManager("wm_details")
        self.db_manager_tk = DBManager("tk_details")

    # 初始化wmacd数据
    def init_w_time(self):
        # 初始化时间轴
        date_list = date_range("2016-01-04", "2018-12-30")
        # tk_details = self.dm.find_by_key({"code": code, "cur_timer": {"$in": cur_date_list}})
        code_list = self.db_manager_wm.get_code_list()
        for code_item in code_list:
            code = code_item["code"]
            print(code)
            tk_result = self.db_manager_tk.find_by_key({"code": code})[0]
            tk_details = sorted(tk_result["price_list"],
                                key=lambda x: cmp_datatime_02(x),
                                reverse=False)
            for index in range(len(date_list)):
                if datetime.datetime.strptime(date_list[index],
                                              "%Y-%m-%d").weekday() == 0:
                    cur_date_list = date_list[index:index + 7]
                    # 从数据库中获取这个时间段内的数据
                    cur_tk_details = [
                        x for x in tk_details
                        if x["cur_timer"] in cur_date_list
                    ]
                    try:
                        open_price_list = [
                            float(x["cur_open_price"]) for x in cur_tk_details
                        ]
                        max_price_list = [
                            float(x["cur_max_price"]) for x in cur_tk_details
                        ]
                        min_price_list = [
                            float(x["cur_min_price"]) for x in cur_tk_details
                        ]
                        close_price_list = [
                            float(x["cur_close_price"]) for x in cur_tk_details
                        ]
                        total_volume_list = [
                            int(x["cur_total_volume"].replace(",", ""))
                            for x in cur_tk_details
                        ]
                        total_money_list = [
                            int(x["cur_total_money"].replace(",", ""))
                            for x in cur_tk_details
                        ]
                    except Exception as e:
                        continue
                    if cur_tk_details:
                        wmacd_item = {
                            "frist_date": cur_date_list[0],
                            "date_list": cur_date_list,
                            "open_price": open_price_list[0],
                            "max_price": max(max_price_list),
                            "min_price": min(min_price_list),
                            "close_price": close_price_list[-1],
                            "total_volume": sum(total_volume_list),
                            "total_money": sum(total_money_list),
                        }
                    else:
                        wmacd_item = {
                            "frist_date": cur_date_list[0],
                            "date_list": cur_date_list,
                            "open_price": 0,
                            "max_price": 0,
                            "min_price": 0,
                            "close_price": 0,
                            "total_volume": 0,
                            "total_money": 0,
                        }
                    # 在数据库中添加一条记录
                    self.db_manager_wm.add_tk_item(code, wmacd_item)

    def update_w_macd(self, cur_date=datetime.datetime.now().date()):
        date_list = date_range("2016-01-04", "2018-12-30")
        for index in range(len(date_list)):
            # 匹配到当前时间所在的区间
            if datetime.datetime.strptime(date_list[index],
                                          "%Y-%m-%d").weekday() == 0:
                cur_date_list = date_list[index:index + 7]
                if str(cur_date) in cur_date_list:
                    ticker_list = self.db_manager_wm.get_code_list()
                    # 更新每支股票的数据
                    for tk_item in ticker_list:
                        code = tk_item["code"]
                        tk_result = self.db_manager_tk.find_by_key(
                            {"code": code})[0]
                        tk_details = sorted(tk_result["price_list"],
                                            key=lambda x: cmp_datatime_02(x),
                                            reverse=False)
                        # 从数据库中获取这个时间段内的数据
                        cur_tk_details = [
                            x for x in tk_details
                            if x["cur_timer"] in cur_date_list
                        ]
                        open_price_list = [
                            float(x["cur_open_price"]) for x in cur_tk_details
                        ]
                        max_price_list = [
                            float(x["cur_max_price"]) for x in cur_tk_details
                        ]
                        min_price_list = [
                            float(x["cur_min_price"]) for x in cur_tk_details
                        ]
                        close_price_list = [
                            float(x["cur_close_price"]) for x in cur_tk_details
                        ]
                        total_volume_list = [
                            int(x["cur_total_volume"].replace(",", ""))
                            for x in cur_tk_details
                        ]
                        total_money_list = [
                            int(x["cur_total_money"].replace(",", ""))
                            for x in cur_tk_details
                        ]
                        if cur_tk_details:
                            wmacd_item = {
                                "frist_date": cur_date_list[0],
                                "date_list": cur_date_list,
                                "open_price": open_price_list[0],
                                "max_price": max(max_price_list),
                                "min_price": min(min_price_list),
                                "close_price": close_price_list[-1],
                                "total_volume": sum(total_volume_list),
                                "total_money": sum(total_money_list),
                            }
                            # 修改数据库中的数据
                            self.db_manager_wm.update_wm_price_list(
                                code, wmacd_item["frist_date"], wmacd_item)