Exemplo n.º 1
0
 def load_stock(self, type, stock, date):
     data = []
     for i in range(1, nasdaq.get_time_slice_max(type) + 1):
         _time = nasdaq.get_time_slice_max(type) - i + 1
         if not self.check_time_finish(type, stock, date, _time):
             return None
         t = self._load_time_data(type, stock, date, _time)
         keys = [int(key) for key in t.keys()]
         keys.sort()
         for key in keys:
             data.extend(json.loads(t[str(key)]))
     data.reverse()
     return data
Exemplo n.º 2
0
 def remove_stock(self, type, stock, date):
     """
     clean all data
     """
     for _time in range(1, nasdaq.get_time_slice_max(type) + 1):
         data_key = self._data_key(type, date, stock, _time)
         info_key = self._info_key(type, date, stock, _time)
         self.client.delete(data_key)
         self.client.delete(info_key)
Exemplo n.º 3
0
 def test_all(self, redis_client):
     stock_redis = StockQuoteRedisModel(redis_client)
     for _type in (nasdaq.REAL_TIME_QUOTE, nasdaq.PRE_MARKET_QUOTE,
                   nasdaq.AFTER_HOUR_QUOTE):
         stock_redis.remove_stock(_type, "baba", "2013-10-25")
         for _time in range(
                 1,
                 nasdaq.get_time_slice_max(nasdaq.REAL_TIME_QUOTE) + 1):
             import random
             total_page = random.randint(1, 10)
             for page in range(1, total_page + 1):
                 stock_redis.save_page_result(
                     _type, "baba", "2013-10-25", _time, page, total_page,
                     [("15:%02d:%02d" % (_time, total_page - page), 25.13,
                       _time * 100000 + (total_page - page) * 10 + 1),
                      ("15:%02d:%02d" % (_time, total_page - page), 25.13,
                       _time * 100000 + (total_page - page) * 10)])
                 if page != total_page:
                     assert not stock_redis.check_time_finish(
                         _type, "baba", "2013-10-25", _time)
         assert stock_redis.check_stock_finish(_type, "baba", "2013-10-25")
         t = stock_redis.load_stock(_type, "baba", "2013-10-25")
         from copy import deepcopy
         t1 = deepcopy(t)
         t1.sort(key=lambda x: x[-1])
         assert t == t1
         assert not stock_redis.stock_task_started(
             _type, nasdaq.get_last_trading_date(_type), "baba")
         stock_redis.begin_stock_task(_type,
                                      nasdaq.get_last_trading_date(_type),
                                      "baba")
         assert stock_redis.stock_task_started(
             _type, nasdaq.get_last_trading_date(_type), "baba")
         assert not stock_redis.stock_task_finished(
             _type, nasdaq.get_last_trading_date(_type), "baba")
         stock_redis.end_stock_task(_type,
                                    nasdaq.get_last_trading_date(_type),
                                    "baba")
         assert stock_redis.stock_task_finished(
             _type, nasdaq.get_last_trading_date(_type), "baba")
Exemplo n.º 4
0
 def test_all(self,redis_client):
     stock_redis = StockQuoteRedisModel(redis_client)
     for _type in (nasdaq.REAL_TIME_QUOTE, nasdaq.PRE_MARKET_QUOTE, nasdaq.AFTER_HOUR_QUOTE):
         stock_redis.remove_stock(_type, "baba", "2013-10-25")
         for _time in range(1,nasdaq.get_time_slice_max(nasdaq.REAL_TIME_QUOTE) + 1):
             import random
             total_page = random.randint(1,10)
             for page in range(1, total_page + 1):
                 stock_redis.save_page_result(_type, "baba", "2013-10-25", _time, page, total_page, [("15:%02d:%02d" %(_time, total_page - page),25.13,_time * 100000 + (total_page - page) * 10 + 1),("15:%02d:%02d" %(_time, total_page - page),25.13,_time * 100000 + (total_page - page)*10)])
                 if page != total_page:
                     assert not stock_redis.check_time_finish(_type, "baba", "2013-10-25", _time)
         assert stock_redis.check_stock_finish(_type, "baba", "2013-10-25")
         t = stock_redis.load_stock(_type, "baba", "2013-10-25")
         from copy import deepcopy
         t1 = deepcopy(t)
         t1.sort(key = lambda x:x[-1])
         assert t == t1
         assert not stock_redis.stock_task_started(_type, nasdaq.get_last_trading_date(_type), "baba")
         stock_redis.begin_stock_task(_type, nasdaq.get_last_trading_date(_type), "baba")
         assert stock_redis.stock_task_started(_type, nasdaq.get_last_trading_date(_type), "baba")
         assert not stock_redis.stock_task_finished(_type, nasdaq.get_last_trading_date(_type), "baba")
         stock_redis.end_stock_task(_type, nasdaq.get_last_trading_date(_type), "baba")
         assert stock_redis.stock_task_finished(_type, nasdaq.get_last_trading_date(_type), "baba")
Exemplo n.º 5
0
def parse_stock_quote_page(self, args):
    status_code, content, ext = args
    _type, stock, time, page = ext["type"], ext["stock"], ext["time"], ext["page"]
    logger.debug("parse stock quote page,status_code=%d,type=%d,stock=%s,time=%d,page=%d,id=%s", status_code, _type,
                 stock, time, page, self.request.id)

    def _re_crawl_page():
        if ext["retries"] < settings.STOCK_SPIDER_MAX_RETRY:
            ext["retries"] += 1
            page_url = nasdaq.quote_slice_url_by_type(_type, stock, time, page)
            logger.debug("_re_crawl_page,type=%d,page_url=%s,retries=%d", _type, page_url, ext["retries"])
            spider_task.apply_async((page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT, ext),
                                    link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES)
        else:
            logger.debug("max retries failed,status_code=%d,type=%d,stock=%s,time=%d,page=%d", status_code, _type,
                         stock, time, page)
            save_failed_time_quote_task(_type, stock, time, page, "STOCK_SPIDER_MAX_RETRY HIT")

    if status_code != 200:
        _re_crawl_page()
        return
    try:
        date, data, current, first, last = nasdaq.parse_time_quote_slice_page(content)
    except Exception as exc:
        if isinstance(exc, nasdaq.NoTradingDataException):
            pass
        else:
            _re_crawl_page()
        return
    if ext["page"] == 1:
        # start crawl other pages
        for page_no in range(2, last + 1):
            page_url = nasdaq.quote_slice_url_by_type(_type, stock, time, page_no)
            c_ext = {
                "type": _type,
                "stock": stock,
                "time": time,
                "page": page_no,
                "retries": 0
            }
            logger.debug("start spider_task,type=%d,page_url=%s", _type, page_url)
            spider_task.apply_async((page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT,c_ext),
                                    link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES)
        if ext["time"] == 1:
            # start crawl other times
            for time_no in range(2, nasdaq.get_time_slice_max(_type) + 1):
                page_url = nasdaq.quote_slice_url_by_type(_type, stock, time_no, 1)
                c_ext = {
                    "type": _type,
                    "stock": stock,
                    "time": time_no,
                    "page": 1,
                    "retries": 0
                }
                redis_model = StockQuoteRedisModel(redis_client)
                redis_model.begin_stock_task(_type, nasdaq.get_last_trading_date(_type), stock)
                logger.debug("start spider_task,type=%d,page_url=%s", _type, page_url)
                spider_task.apply_async((page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT,c_ext),
                                        link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES)
    logger.debug("parse stock quote page ok,status_code=%d,type=%d,stock=%s,time=%d,page=%d,id=%s", status_code, _type,
                 stock, time, page, self.request.id)
    save_stock_quote_result.apply_async((_type, date, stock, time, page, last, data), expires=settings.STOCK_QUOTE_EXPIRES)
Exemplo n.º 6
0
def parse_stock_quote_page(self, args):
    status_code, content, ext = args
    _type, stock, time, page = ext["type"], ext["stock"], ext["time"], ext[
        "page"]
    logger.debug(
        "parse stock quote page,status_code=%d,type=%d,stock=%s,time=%d,page=%d,id=%s",
        status_code, _type, stock, time, page, self.request.id)

    def _re_crawl_page():
        if ext["retries"] < settings.STOCK_SPIDER_MAX_RETRY:
            ext["retries"] += 1
            page_url = nasdaq.quote_slice_url_by_type(_type, stock, time, page)
            logger.debug("_re_crawl_page,type=%d,page_url=%s,retries=%d",
                         _type, page_url, ext["retries"])
            spider_task.apply_async((page_url, settings.STOCK_SPIDER_USE_PROXY,
                                     settings.STOCK_SPIDER_TASK_TIMEOUT, ext),
                                    link=parse_stock_quote_page.s(),
                                    expires=settings.STOCK_QUOTE_EXPIRES)
        else:
            logger.debug(
                "max retries failed,status_code=%d,type=%d,stock=%s,time=%d,page=%d",
                status_code, _type, stock, time, page)
            save_failed_time_quote_task(_type, stock, time, page,
                                        "STOCK_SPIDER_MAX_RETRY HIT")

    if status_code != 200:
        _re_crawl_page()
        return
    try:
        date, data, current, first, last = nasdaq.parse_time_quote_slice_page(
            content)
    except Exception as exc:
        if isinstance(exc, nasdaq.NoTradingDataException):
            pass
        else:
            _re_crawl_page()
        return
    if ext["page"] == 1:
        # start crawl other pages
        for page_no in range(2, last + 1):
            page_url = nasdaq.quote_slice_url_by_type(_type, stock, time,
                                                      page_no)
            c_ext = {
                "type": _type,
                "stock": stock,
                "time": time,
                "page": page_no,
                "retries": 0
            }
            logger.debug("start spider_task,type=%d,page_url=%s", _type,
                         page_url)
            spider_task.apply_async(
                (page_url, settings.STOCK_SPIDER_USE_PROXY,
                 settings.STOCK_SPIDER_TASK_TIMEOUT, c_ext),
                link=parse_stock_quote_page.s(),
                expires=settings.STOCK_QUOTE_EXPIRES)
        if ext["time"] == 1:
            # start crawl other times
            for time_no in range(2, nasdaq.get_time_slice_max(_type) + 1):
                page_url = nasdaq.quote_slice_url_by_type(
                    _type, stock, time_no, 1)
                c_ext = {
                    "type": _type,
                    "stock": stock,
                    "time": time_no,
                    "page": 1,
                    "retries": 0
                }
                redis_model = StockQuoteRedisModel(redis_client)
                redis_model.begin_stock_task(
                    _type, nasdaq.get_last_trading_date(_type), stock)
                logger.debug("start spider_task,type=%d,page_url=%s", _type,
                             page_url)
                spider_task.apply_async(
                    (page_url, settings.STOCK_SPIDER_USE_PROXY,
                     settings.STOCK_SPIDER_TASK_TIMEOUT, c_ext),
                    link=parse_stock_quote_page.s(),
                    expires=settings.STOCK_QUOTE_EXPIRES)
    logger.debug(
        "parse stock quote page ok,status_code=%d,type=%d,stock=%s,time=%d,page=%d,id=%s",
        status_code, _type, stock, time, page, self.request.id)
    save_stock_quote_result.apply_async(
        (_type, date, stock, time, page, last, data),
        expires=settings.STOCK_QUOTE_EXPIRES)
Exemplo n.º 7
0
 def check_stock_finish(self, type, stock, date):
     for _time in range(1, nasdaq.get_time_slice_max(type) + 1):
         if not self.check_time_finish(type, stock, date, _time):
             return False
     return True