def load_stock(self, type, stock, date): data = [] for i in range(1, nasdaq.get_time_slice_max(type) + 1): _time = nasdaq.get_time_slice_max(type) - i + 1 if not self.check_time_finish(type, stock, date, _time): return None t = self._load_time_data(type, stock, date, _time) keys = [int(key) for key in t.keys()] keys.sort() for key in keys: data.extend(json.loads(t[str(key)])) data.reverse() return data
def remove_stock(self, type, stock, date): """ clean all data """ for _time in range(1, nasdaq.get_time_slice_max(type) + 1): data_key = self._data_key(type, date, stock, _time) info_key = self._info_key(type, date, stock, _time) self.client.delete(data_key) self.client.delete(info_key)
def test_all(self, redis_client): stock_redis = StockQuoteRedisModel(redis_client) for _type in (nasdaq.REAL_TIME_QUOTE, nasdaq.PRE_MARKET_QUOTE, nasdaq.AFTER_HOUR_QUOTE): stock_redis.remove_stock(_type, "baba", "2013-10-25") for _time in range( 1, nasdaq.get_time_slice_max(nasdaq.REAL_TIME_QUOTE) + 1): import random total_page = random.randint(1, 10) for page in range(1, total_page + 1): stock_redis.save_page_result( _type, "baba", "2013-10-25", _time, page, total_page, [("15:%02d:%02d" % (_time, total_page - page), 25.13, _time * 100000 + (total_page - page) * 10 + 1), ("15:%02d:%02d" % (_time, total_page - page), 25.13, _time * 100000 + (total_page - page) * 10)]) if page != total_page: assert not stock_redis.check_time_finish( _type, "baba", "2013-10-25", _time) assert stock_redis.check_stock_finish(_type, "baba", "2013-10-25") t = stock_redis.load_stock(_type, "baba", "2013-10-25") from copy import deepcopy t1 = deepcopy(t) t1.sort(key=lambda x: x[-1]) assert t == t1 assert not stock_redis.stock_task_started( _type, nasdaq.get_last_trading_date(_type), "baba") stock_redis.begin_stock_task(_type, nasdaq.get_last_trading_date(_type), "baba") assert stock_redis.stock_task_started( _type, nasdaq.get_last_trading_date(_type), "baba") assert not stock_redis.stock_task_finished( _type, nasdaq.get_last_trading_date(_type), "baba") stock_redis.end_stock_task(_type, nasdaq.get_last_trading_date(_type), "baba") assert stock_redis.stock_task_finished( _type, nasdaq.get_last_trading_date(_type), "baba")
def test_all(self,redis_client): stock_redis = StockQuoteRedisModel(redis_client) for _type in (nasdaq.REAL_TIME_QUOTE, nasdaq.PRE_MARKET_QUOTE, nasdaq.AFTER_HOUR_QUOTE): stock_redis.remove_stock(_type, "baba", "2013-10-25") for _time in range(1,nasdaq.get_time_slice_max(nasdaq.REAL_TIME_QUOTE) + 1): import random total_page = random.randint(1,10) for page in range(1, total_page + 1): stock_redis.save_page_result(_type, "baba", "2013-10-25", _time, page, total_page, [("15:%02d:%02d" %(_time, total_page - page),25.13,_time * 100000 + (total_page - page) * 10 + 1),("15:%02d:%02d" %(_time, total_page - page),25.13,_time * 100000 + (total_page - page)*10)]) if page != total_page: assert not stock_redis.check_time_finish(_type, "baba", "2013-10-25", _time) assert stock_redis.check_stock_finish(_type, "baba", "2013-10-25") t = stock_redis.load_stock(_type, "baba", "2013-10-25") from copy import deepcopy t1 = deepcopy(t) t1.sort(key = lambda x:x[-1]) assert t == t1 assert not stock_redis.stock_task_started(_type, nasdaq.get_last_trading_date(_type), "baba") stock_redis.begin_stock_task(_type, nasdaq.get_last_trading_date(_type), "baba") assert stock_redis.stock_task_started(_type, nasdaq.get_last_trading_date(_type), "baba") assert not stock_redis.stock_task_finished(_type, nasdaq.get_last_trading_date(_type), "baba") stock_redis.end_stock_task(_type, nasdaq.get_last_trading_date(_type), "baba") assert stock_redis.stock_task_finished(_type, nasdaq.get_last_trading_date(_type), "baba")
def parse_stock_quote_page(self, args): status_code, content, ext = args _type, stock, time, page = ext["type"], ext["stock"], ext["time"], ext["page"] logger.debug("parse stock quote page,status_code=%d,type=%d,stock=%s,time=%d,page=%d,id=%s", status_code, _type, stock, time, page, self.request.id) def _re_crawl_page(): if ext["retries"] < settings.STOCK_SPIDER_MAX_RETRY: ext["retries"] += 1 page_url = nasdaq.quote_slice_url_by_type(_type, stock, time, page) logger.debug("_re_crawl_page,type=%d,page_url=%s,retries=%d", _type, page_url, ext["retries"]) spider_task.apply_async((page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT, ext), link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES) else: logger.debug("max retries failed,status_code=%d,type=%d,stock=%s,time=%d,page=%d", status_code, _type, stock, time, page) save_failed_time_quote_task(_type, stock, time, page, "STOCK_SPIDER_MAX_RETRY HIT") if status_code != 200: _re_crawl_page() return try: date, data, current, first, last = nasdaq.parse_time_quote_slice_page(content) except Exception as exc: if isinstance(exc, nasdaq.NoTradingDataException): pass else: _re_crawl_page() return if ext["page"] == 1: # start crawl other pages for page_no in range(2, last + 1): page_url = nasdaq.quote_slice_url_by_type(_type, stock, time, page_no) c_ext = { "type": _type, "stock": stock, "time": time, "page": page_no, "retries": 0 } logger.debug("start spider_task,type=%d,page_url=%s", _type, page_url) spider_task.apply_async((page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT,c_ext), link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES) if ext["time"] == 1: # start crawl other times for time_no in range(2, nasdaq.get_time_slice_max(_type) + 1): page_url = nasdaq.quote_slice_url_by_type(_type, stock, time_no, 1) c_ext = { "type": _type, "stock": stock, "time": time_no, "page": 1, "retries": 0 } redis_model = StockQuoteRedisModel(redis_client) redis_model.begin_stock_task(_type, nasdaq.get_last_trading_date(_type), stock) logger.debug("start spider_task,type=%d,page_url=%s", _type, page_url) spider_task.apply_async((page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT,c_ext), link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES) logger.debug("parse stock quote page ok,status_code=%d,type=%d,stock=%s,time=%d,page=%d,id=%s", status_code, _type, stock, time, page, self.request.id) save_stock_quote_result.apply_async((_type, date, stock, time, page, last, data), expires=settings.STOCK_QUOTE_EXPIRES)
def parse_stock_quote_page(self, args): status_code, content, ext = args _type, stock, time, page = ext["type"], ext["stock"], ext["time"], ext[ "page"] logger.debug( "parse stock quote page,status_code=%d,type=%d,stock=%s,time=%d,page=%d,id=%s", status_code, _type, stock, time, page, self.request.id) def _re_crawl_page(): if ext["retries"] < settings.STOCK_SPIDER_MAX_RETRY: ext["retries"] += 1 page_url = nasdaq.quote_slice_url_by_type(_type, stock, time, page) logger.debug("_re_crawl_page,type=%d,page_url=%s,retries=%d", _type, page_url, ext["retries"]) spider_task.apply_async((page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT, ext), link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES) else: logger.debug( "max retries failed,status_code=%d,type=%d,stock=%s,time=%d,page=%d", status_code, _type, stock, time, page) save_failed_time_quote_task(_type, stock, time, page, "STOCK_SPIDER_MAX_RETRY HIT") if status_code != 200: _re_crawl_page() return try: date, data, current, first, last = nasdaq.parse_time_quote_slice_page( content) except Exception as exc: if isinstance(exc, nasdaq.NoTradingDataException): pass else: _re_crawl_page() return if ext["page"] == 1: # start crawl other pages for page_no in range(2, last + 1): page_url = nasdaq.quote_slice_url_by_type(_type, stock, time, page_no) c_ext = { "type": _type, "stock": stock, "time": time, "page": page_no, "retries": 0 } logger.debug("start spider_task,type=%d,page_url=%s", _type, page_url) spider_task.apply_async( (page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT, c_ext), link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES) if ext["time"] == 1: # start crawl other times for time_no in range(2, nasdaq.get_time_slice_max(_type) + 1): page_url = nasdaq.quote_slice_url_by_type( _type, stock, time_no, 1) c_ext = { "type": _type, "stock": stock, "time": time_no, "page": 1, "retries": 0 } redis_model = StockQuoteRedisModel(redis_client) redis_model.begin_stock_task( _type, nasdaq.get_last_trading_date(_type), stock) logger.debug("start spider_task,type=%d,page_url=%s", _type, page_url) spider_task.apply_async( (page_url, settings.STOCK_SPIDER_USE_PROXY, settings.STOCK_SPIDER_TASK_TIMEOUT, c_ext), link=parse_stock_quote_page.s(), expires=settings.STOCK_QUOTE_EXPIRES) logger.debug( "parse stock quote page ok,status_code=%d,type=%d,stock=%s,time=%d,page=%d,id=%s", status_code, _type, stock, time, page, self.request.id) save_stock_quote_result.apply_async( (_type, date, stock, time, page, last, data), expires=settings.STOCK_QUOTE_EXPIRES)
def check_stock_finish(self, type, stock, date): for _time in range(1, nasdaq.get_time_slice_max(type) + 1): if not self.check_time_finish(type, stock, date, _time): return False return True