class FuturesSpider: def __init__(self): self.dm = DBManager("Futures_d_table") def init_table(self): for item in symbol_list: self.dm.add_one({ "code": item.split("/")[0], "symbol": item.split("/")[1], "details": [] }) def start_crawl(self): for symbol in symbol_list: url = "http://stock2.finance.sina.com.cn/futures/api/json.php/IndexService.getInnerFuturesDailyKLine?symbol=" + symbol.split( "/")[0] print(url) max_try = 8 for tries in range(max_try): try: content = requests.get(url) self.parse_pager(content.content, symbol.split("/")[0]) break except Exception: if tries < (max_try - 1): sleep(2) continue else: print(symbol, "fail") def parse_pager(self, content, code): timer_list = [ x["date"] for x in self.dm.find_one_by_key({"code": code})["details"] ] data = json.loads(content) for item in data: __dict = { "date": item[0], "open": item[1], "high": item[2], "low": item[3], "close": item[4], "count": item[5] } if __dict["date"] not in timer_list: self.dm.add_futures_item(code, __dict) print(code, "success")
import os import sys sys.path.append('F:\\PYTHON\\TsSpiderServer\\') from mongo_db.mongodb_manager import DBManager base_path = os.path.abspath(os.path.join(os.getcwd(), "..")) + "/bean" if __name__ == '__main__': dm = DBManager("wm_details") file_path = base_path + "/data_code.txt" _file = open(file_path, 'r', encoding='utf_-8') tk_list = list() while True: line = _file.readline() if '' == line: break str_code = line.split()[0] str_title = line.split()[1] dm.add_one({"code": str_code, "title": str_title, "price_list": []})