def get_futunn_live(self): lasttime = DateUtil.string_toDatetime(self.mongodbutil_live.getLastLivetime()) for i in range(0,-1,-1): p = int(1000*time.mktime(time.localtime())) + i url = 'https://news.futunn.com/main/live-list?page={0}page_size=50&_=1556778263374'.format(i,p) logger.info("address current url {0}...".format(url)) arr = [] header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} try: res = requests.get(url, headers=header, timeout=60) res.raise_for_status() if res.status_code == 200: data = res.text js = json.loads(data) list = js['data']['list'] for elem in list: itemTime = DateUtil.string_toDatetime(elem['time']) if itemTime > lasttime: arr.append( elem ) logger.info(elem) else: continue if len(arr) > 0 : self.mongodbutil_live.insertItems(arr) logger.info("store items to mongodb ...") else: logger.info("still have no new live message") except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return 1, 'ok'
def get_individual_companyinfo(self, market, code): ret_code = -1 ret_data = '' self.itemArray = [] url = "https://finance.futunn.com/api/finance/company-info?code={0}&label={1}".format(code.upper(), market.lower()) try: header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 'Accept': 'application/json,text/javascript,*.*;q=0.01', 'Origin': 'https://www.futunn.com', 'Referer': 'https://www.futunn.com/quote/stock-info?m={0}&code={1}&type=finance_analyse'.format( market.lower(), code.upper) } res = requests.get(url, headers=header) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' res.raise_for_status() if res.status_code == 200: data = res.text js = json.loads(data) obj = js['data'] if len(obj) != 0: obj['market'] = market obj['code'] = code self.itemArray.append(obj) ret_code = 0 ret_data = '' except Exception as err: # time.sleep(4 * random.random()) logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret_code, ret_data
def job_info_appender(*_args): ''' :return: ''' global is_closing global working global timerid working = True store = _args[0][0] arr = _args[0][1] futunews = _args[0][2] logger.info('start crawl current futu news...') while not is_closing: if is_closing: break begin = time.time() logger.info('Current Time:{}, info'.format(datetime.datetime.now())) try: ret_code, ret_data = futunews.get_live_info() items = futunews.get_item_array() if len(items) > 0: futunews.mongodbutil.insertItems(items) logger.info("store items to mongodb ...") else: logger.info("all items exists") except Exception as err: time.sleep(4 * random.random()) logger.warning(err) try: ret_code, ret_data = futunews.get_futunn_live() except Exception as err: time.sleep(4 * random.random()) logger.warning(err) if is_closing is True: break working = False if not is_closing: sched.add_job(scheduled_job2, 'interval', seconds=random.randint(30,50), id=timerid) end = time.time() logger.info("fetching for one period , cost time: {}".format((end - begin))) break
def get_chn_page(self, market, code, page): self.itemArray = [] url = self.generate_page_url(market, code, page) logger.info('fetch url: {}'.format(url)) try: res = requests.get(url, timeout=60, headers={'Content-type':'text/html;charset=gb2312'}) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text #.encode(res.encoding) res.raise_for_status() if res.status_code == 200: contentSoup = bs4.BeautifulSoup(html, 'lxml') strList = str(contentSoup.select('.datelist > ul'))[10:-12] elems = strList.split("<br/>") if len(elems) < 2: return -1,'' for elem in elems: if elem == '': continue json = {} elem = elem.lstrip() parts = elem.split('<a href="') json['code'] = code json['date'] = parts[0].rstrip() + ":00" s = json['date'] parts1 = parts[1].split('" target="_blank">') json['href'] = parts1[0] json['year'] = 'real' parts2 = parts1[1].split('</a>') json['title'] = parts2[0] logger.info("date:{},title:{}".format(s, json['title'])) ret, content = self.get_content(json['href'], "utf-8") # if ret != -1: # time.sleep(4 * random.random()) if ret == 0: json['content'] = content self.itemArray.append(json) except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return page + 1,''
def get_content(self, url, enco): content = '' ret = -1 urlExist = self.mongodbutil.urlIsExist(url) if urlExist: logger.info('This url:{} has existed'.format(url)) return -2, content header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} try: res = requests.get(url, headers=header, timeout=60) res.encoding = enco res.raise_for_status() if res.status_code == 200: soup = bs4.BeautifulSoup(res.text, 'lxml') elems = soup.select('.inner') if len(elems) > 0: content = elems[0].getText() ret = 0 except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret, content
def job_calendar_appender(*_args): ''' :return: ''' global is_closing global working global timerid store = _args[0][0] arr = _args[0][1] futunews = _args[0][2] working = True logger.info('start crawl current calendar ...') while not is_closing: if is_closing: break begin = time.time() logger.info('Current Time:{}, info'.format(datetime.datetime.now())) try: ret_code, ret_data = futunews.get_calendars() except Exception as err: time.sleep(4 * random.random()) logger.warning(err) if is_closing is True: break end = time.time() logger.info("fetching calendar for one period , cost time: {}".format((end - begin))) break working = False
def get_calendars(self): urls = [ 'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=%5B%22%E6%B8%AF%E8%82%A1%E6%96%B0%E8%82%A1%22%2C%22%E7%BE%8E%E8%82%A1%E6%96%B0%E8%82%A1%22%2C%22A%E8%82%A1%E6%96%B0%E8%82%A1%22%5D&stock_type=&_={1}', 'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["港股财报"%2C"美股财报"%2C"A股财报"]&stock_type=&_={1}', 'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["港股除权除息"%2C"美股除权除息"%2C"A股除权除息"]&stock_type=&_={1}', 'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["财经事件"]&stock_type=&_={1}', 'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["经济数据"]&stock_type=&_={1}', 'https://news.futunn.com/new-calendar/events-list?begin_time={0}&end_time=2037-12-31&event_type=["休市提醒"]&stock_type=&_={1}' ] for idx in range(0,len(urls),1): print(idx) url = urls[idx].format(DateUtil.getTodayStr(),int(1000*time.mktime(time.localtime())) + idx) print(url) logger.info("address current url {0}...".format(url)) arr = [] header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} try: res = requests.get(url, headers=header, timeout=60) res.raise_for_status() if res.status_code == 200: data = res.text js = json.loads(data) list = js['data']['list'] for elem in list: # { unique key, drop duplicate # "market_type": 1, # "event_type": 1, # "event_time": 1 # } # elem['event_type'] # elem['market_type'] # elem['event_time'] #itemTime = DateUtil.string_toDatetime(elem['time']) # 'event_type': '港股新股', # 'market_type': 'HK', # 'event_text': '认购中<br/><a href="http://www.futunn.com/quote/stock?m=hk&code=01832" target="_blank" data-market="hk" data-code="01832" class="js-nn-stock">海天地悦旅(01832)</a><br/><a href="http://www.futunn.com/quote/stock?m=hk&code=02230" target="_blank" data-market="hk" data-code="02230" class="js-nn-stock">羚邦集团(02230)</a><br/>', # 'event_time': '2019-05-05 00:00:00', # 'total': 2} # if itemTime > lasttime: # arr.append( elem ) # logger.info(elem) # else: # continue arr.append(elem) if len(arr) > 0 : self.mongodbutil_calendar.insertItems(arr) logger.info("store items to mongodb ...") else: logger.info("still have no calendar live message") except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return 1, 'ok'
def get_futunn_news(self): for i in range(94471,94480,1): url = 'https://news.futunn.com/market/{0}?src=3'.format(i) urlExist = self.mongodbutil.urlIsExist(url) if urlExist: logger.info('This url:{} has existed'.format(url)) continue json = {} header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} try: res = requests.get(url, headers=header, timeout=60) res.raise_for_status() if res.status_code == 200: soup = bs4.BeautifulSoup(res.text, 'lxml') elems = soup.select('.inner') json['content'] = elems[0].getText() elems = soup.select('.news-title > h1') json['title'] = elems[0].getText() elems = soup.select('.news-title > .timeBar') pos = elems[0].getText().strip().find('2') json['date'] = elems[0].getText().strip()[pos:pos+16] json['href'] = url json['code'] = ' ' json['year'] = DateUtil.string_toDatetime2(json['date']).year json['sourcefrom'] = 'futunn' self.itemArray.append(json) if len(self.get_item_array()) > 50: self.mongodbutil.insertItems(self.get_item_array()) logger.info("store items to mongodb ...") self.clear_item_array() except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return 1, 'ok'
def get_live_info(self): ret_code = -1 ret_data = '' self.itemArray = [] lasttime = DateUtil.string_toDatetime2('2019-05-01 09:00') try: res = requests.get(self.url) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text # .encode(res.encoding) res.raise_for_status() if res.status_code == 200 : contentSoup = bs4.BeautifulSoup(html, 'lxml') elems = contentSoup.find_all('a', class_='news-link') for elem in elems: json = {} json['code'] = ' ' newstime = elem.select('span') time = newstime[len(newstime) - 1].getText() json['date'] = DateUtil.string_toDatetime2(time) s = json['date'] if s < lasttime : continue else: lasttime = s h3 = elem.select('h3') json['title'] = h3[len(h3) - 1].getText() logger.info("date:{},title:{}".format(s, json['title'])) json['href'] = elem.attrs['href'] json['year'] = json['date'].year json['sourcefrom'] = 'futunn' ret,content = self.get_content(json['href'],'utf-8') # if ret != -1 : # time.sleep(4 * random.random()) if ret == 0 : json['content'] = content self.itemArray.append(json) ret_code = 0 ret_data = '' except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret_code,ret_data
def get_individual_news(self,market, code): ret_code = -1 ret_data = '' self.itemArray = [] url = "https://www.futunn.com/quote/stock-news?m={0}&code={1}".format(market.lower(),code.upper()) try: header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 'Accept': 'application/json,text/javascript,*.*;q=0.01', 'Origin': 'https://www.futunn.com', 'Referer': 'https://www.futunn.com/quote/stock-info?m={0}&code={1}&type=finance_analyse'.format(market.lower(),code.upper) } res = requests.get(url, headers=header) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text # .encode(res.encoding) res.raise_for_status() if res.status_code == 200: contentSoup = bs4.BeautifulSoup(html, 'lxml') elems = contentSoup.select('.ulList02 > ul > li') for elem in elems: json = {} json['code'] = code json['market'] = market json['title'] = elem.select('.txt01')[0].getText() json['href'] = elem.select('.txt01 > a')[0]['href'] json['date'] = DateUtil.string_toDatetime2(elem.select('.bar01')[0].getText().strip()[3:]) json['year'] = json['date'].year json['sourcefrom'] = 'futunn' ret, content = self.get_content(json['href'],'utf-8') # if ret != -1: # time.sleep(4 * random.random()) if ret == 0: json['content'] = content self.itemArray.append(json) ret_code = 0 ret_data = '' except Exception as err: # time.sleep(4 * random.random()) logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret_code, ret_data
def get_page(self,market, code,url): ret_code = -1 ret_data = '' self.itemArray = [] try: res = requests.get(url, timeout=60, headers={ 'Content-type': 'text/html;charset=gb2312' }) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text # .encode(res.encoding) res.raise_for_status() if res.status_code == 200 : contentSoup = bs4.BeautifulSoup(html,'lxml') elems = contentSoup.select('#js_ggzx > li,.li_point > ul > li,.col02_22 > ul > li') for elem in elems: json = {} json['code'] = code temp = elem.__str__()[4:5] if (temp == '\n') and market == 'US': continue ele = elem.select('span') json['date'] = DateUtil.format_date(ele[0].getText()[1:-1]) s = json['date'] ele = elem.select('a') json['title'] = ele[len(ele)-1].getText() logger.info("date:{},title:{}".format(s, json['title'])) json['href'] = ele[len(ele)-1].attrs['href'] json['year'] = 'guess' ret,content = self.get_content(json['href'],'utf-8') # if ret != -1 : # time.sleep(4 * random.random()) if ret == 0 : json['content'] = content self.itemArray.append(json) ret_code = 0 ret_data = '' except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return ret_code,ret_data
def get_us_page(self, market, code, page, type): """ :param market: :param code: :param page: :param type: :return: (page_number, type), page_number:-1 """ self.itemArray = [] url = self.generate_page_url(market, code, page) url = url + type logger.info('fetch url: {}'.format(url)) try: res = requests.get(url, timeout=60, headers={'Content-type': 'text/html;charset=gb2312'}) if res.encoding == 'ISO-8859-1': res.encoding = 'gbk' html = res.text # .encode(res.encoding) res.raise_for_status() if res.status_code == 200: contentSoup = bs4.BeautifulSoup(html, 'lxml') elems = contentSoup.select('.xb_news > ul > li') if page >= 100: if type.__eq__("1"): return 1, '2' else: return -1, '2' for elem in elems: json = {} json['code'] = code ele = elem.select('span') if len(ele) == 0: continue json['date'] = DateUtil.format_date_us_history(ele[0].getText()) s = json['date'] ele = elem.select('a') json['title'] = ele[len(ele) - 1].getText() logger.info("date:{},title:{}".format(s, json['title'])) json['href'] = ele[len(ele) - 1].attrs['href'] json['year'] = 'real' ret, content = self.get_content(json['href'], "utf-8") # if ret != -1: # time.sleep(4 * random.random()) if ret == 0: json['content'] = content self.itemArray.append(json) except Exception as err: #time.sleep(4 * random.random()) logger.warning(err) except requests.exceptions.ConnectTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.ReadTimeout as err: logger.warning(err) ret_code = -1 ret_data = err except requests.exceptions.Timeout as err: logger.warning(err) ret_code = -1 ret_data = err except: logger.warning('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds') time.sleep(random.random()) ret_code = -1 ret_data = '' finally: res.close() return page + 1, type
def job_once_global(*_args): ''' :return: ''' global is_closing store = _args[0][0] arr = _args[0][1] sinanewshistory = _args[0][2] while not is_closing: begin = time.time() ret_arr = arr total = len(ret_arr) curr = 0 for code in ret_arr: curr += 1 logger.info( "current fetching entry progress {}/{} code:{} ".format( curr, total, code)) if curr < 10398: continue market = code[0:2] symbol = code[3:] # if code != 'SH.600996': # logger.info("current fetching entry progress {}/{} code:{} ".format(curr, total, code)) # continue sinanewshistory.clear_item_array() logger.info('Current Time:{}, code:{}, market:{}'.format( datetime.datetime.now(), symbol, market)) page = 1 type = '1' while page != -1 or (page > 0 and page < 2): if is_closing: break try: if market == 'HK': page, _ = sinanewshistory.get_hk_page( market, symbol, page) if market == 'US': page, type = sinanewshistory.get_us_page( market, symbol, page, type) if market == 'SZ' or market == 'SH': page, _ = sinanewshistory.get_chn_page( market, symbol, page) items = sinanewshistory.get_item_array() if len(items) > 0: sinanewshistory.mongodbutil.insertItems(items) time.sleep(random.random()) logger.info("store items to mongodb ...") else: logger.info("all items exists") page = -1 except Exception as err: time.sleep(4 * random.random()) logger.warning('my err:{}'.format(err)) page = -1 if is_closing is True: break end = time.time() logger.info("fetching for one period , cost time: {}".format( (end - begin))) signal_int_handler(0, 0) break
def job_once_individuals(*_args): ''' :return: ''' global is_closing global working global timerid working = True store = _args[0][0] arr = _args[0][1] futunews = _args[0][2] logger.info('start crawl current news...') while not is_closing: begin = time.time() ret_arr = arr total = len(ret_arr) curr = 0 for code in ret_arr: if is_closing: break curr += 1 logger.info("current fetching individuals progress {}/{} code:{} ".format(curr,total,code)) if curr < 0: continue market = code[0:2] symbol = code[3:] logger.info('Current Time:{}, code:{}, market:{}'.format(datetime.datetime.now(), symbol, market)) try: futunews.get_individual_news(market, symbol) items = futunews.get_item_array() if len(items) > 0: futunews.mongodbutil.insertItems(items) logger.info("store items to mongodb news ...") else: logger.info("all news items exists") except Exception as err: time.sleep(4 * random.random()) logger.warning(err) # try: # futunews.get_individual_balancesheet(market, symbol) # items = futunews.get_item_array() # if len(items) > 0: # futunews.mongodbutil_balancesheet.insertItems(items) # logger.info("store items to mongodb balancesheet...") # else: # logger.info("all balance sheet items exists") # except Exception as err: # time.sleep(4 * random.random()) # logger.warning(err) # # # try: # futunews.get_individual_cashflow(market, symbol) # items = futunews.get_item_array() # if len(items) > 0: # futunews.mongodbutil_cash.insertItems(items) # logger.info("store items to mongodb cashflow...") # else: # logger.info("all cash flow items exists") # except Exception as err: # time.sleep(4 * random.random()) # logger.warning(err) # # try: # futunews.get_individual_income(market, symbol) # items = futunews.get_item_array() # if len(items) > 0: # futunews.mongodbutil_income.insertItems(items) # logger.info("store items to mongodb income...") # else: # logger.info("all income items exists") # except Exception as err: # time.sleep(4 * random.random()) # logger.warning(err) # # try: # futunews.get_individual_companyinfo(market, symbol) # items = futunews.get_item_array() # if len(items) > 0: # futunews.mongodbutil_companyinfo.insertItems(items) # logger.info("store items to mongodb companyinfo...") # else: # logger.info("all companyinfo items exists") # except Exception as err: # time.sleep(4 * random.random()) # logger.warning(err) # # try: # futunews.get_individual_dividend(market, symbol) # items = futunews.get_item_array() # if len(items) > 0: # futunews.mongodbutil_dividend.insertItems(items) # logger.info("store items to mongodb dividend...") # else: # logger.info("all dividend items exists") # except Exception as err: # time.sleep(4 * random.random()) # logger.warning(err) if is_closing is True: break working = False end = time.time() logger.info("fetching for one period , cost time: {}".format((end - begin))) break
def job_appender(*_args): ''' :return: ''' global is_closing global working global timerid working = True store = _args[0][0] arr = _args[0][1] sinanews = _args[0][2] logger.info('start crawl current news...') while not is_closing: begin = time.time() ret_arr = arr total = len(ret_arr) curr = 0 for code in ret_arr: if is_closing: break curr += 1 logger.info( "current fetching entry progress {}/{} code:{} ".format( curr, total, code)) if curr < 3061: continue market = code[0:2] symbol = code[3:] url = sinanews.generate_url(market, symbol) logger.info('Current Time:{}, code:{}, market:{}'.format( datetime.datetime.now(), symbol, market)) try: sinanews.get_page(market, symbol, url) items = sinanews.get_item_array() if len(items) > 0: sinanews.mongodbutil.insertItems(items) logger.info("store items to mongodb ...") else: logger.info("all items exists") except Exception as err: time.sleep(4 * random.random()) logger.warning(err) if is_closing is True: break working = False if not is_closing: sched.add_job(scheduled_job, 'interval', seconds=1, id=timerid) end = time.time() logger.info("fetching for one period , cost time: {}".format( (end - begin))) break