def analyze(self, html): find_string1 = r'<table width="100%" border="0" cellspacing="0" class="awardList">' find_string2 = r'</table>' pos1 = html.find(find_string1) pos2 = html.find(find_string2, pos1) if not (pos2 > pos1 > 0): return [] pos1 += len(find_string1) data = html[pos1:pos2] handler_data = [] soup = BeautifulSoup(data) for tr in soup.find_all('tr'): tds = tr.find_all('td', class_="start") if len(tds) < 3: continue for td in tds: try: issue = "20" + td["data-period"] number = get_string(td["data-win-number"]).replace( " ", ",") if len(issue) != 11: continue if len(number) != 9: continue handler_data.append([issue, number]) except Exception as e: warn("{}:{}".format(get_string(self.name), get_string(e))) handler_data.sort(key=lambda x: x[0]) return handler_data
def query_for_str(self, query, args=None): result = self.execute_fetchone(query, args) if not result: return '' if self.dict_cursor: return get_string(result.popitem()[1]) else: return get_string(result[0])
def analyze(self, html): handler_data = [] try: html_json = json.loads(html) result_list = html_json["result"] for result in result_list: issue = result["code"] number = result["red"] handler_data.append([issue, number]) except Exception, e: warn("{}:{}".format(get_string(self.name), get_string(e)))
def init(self, params): self.lotto_id = get_int(params.get('lotto_id', '')) self.status = get_int(params.get('status', '')) self.count = get_int(params.get('count', '')) self.issue_interval = get_int(params.get('issue_interval', '')) self.iss_bit = get_int(params.get('iss_bit', '')) self.block_sec = get_int(params.get('block_sec', '')) self.start_time = get_string(params.get('start_time', '')) self.end_time = get_string(params.get('end_time', '')) self.issue_type = get_int(params.get('issue_type', '')) self.offset = get_int(params.get('offset', '')) self.extra_info = get_string(params.get('extra_info', ''))
def analyze(self, html): find_string1 = r'<ul class="info-table">' find_string2 = r'</section>' pos1 = html.find(find_string1) pos2 = html.find(find_string2, pos1) if not (pos2 > pos1 > 0): return [] pos1 += len(find_string1) data = html[pos1:pos2] handler_data = [] soup = BeautifulSoup(data) for ul in soup.find_all('ul'): lis = ul.find_all('li') if len(lis) < 3: continue issue = lis[0].contents[0] number = get_string(lis[2].contents[0]).replace('\t', '').replace( '\r', '').replace('\n', '') if len(issue) != 11: continue if len(number) != 9: continue handler_data.append([issue, number]) handler_data.sort(key=lambda x: x[0]) return handler_data
def log(self, level, msg, args): try: self.thread_lock.acquire() if args and isinstance(args, tuple): msg = msg % tuple( (arg.encode("utf8") if isinstance(arg, unicode) else arg for arg in args)) if level >= self.print_require_level: try: print msg except: pass if level >= self.file_require_level: file_name = path.join( BASE_DIR, 'log', ''.join([ 'log_', strftime('%Y-%m-%d', localtime()), '_', str(getpid()), '.txt' ])) f = open(file_name, 'a') f.write(get_string(msg)) f.write('\n') f.close() except Exception as e: print '(' * 6 print msg print args print format_exc() print ')' * 6 finally: self.thread_lock.release()
def handle_response(response): if response.error: # print response.code warn(get_string(response.error)) pass else: try: result = loads(response.body) code = get_int(result.get("code"), -1) msg = get_string(result.get("msg")) if code == 0: info("==>%s", msg) else: warn("=>%s", msg) except Exception as e: import traceback print traceback.format_exc() print e
def run(self): interval = 5 if self.lotto_status != 1 or self.url_status != 1: return try: html = yield self.get_html() data_list = self.parser_data(html) data_list = filter_lotto_data(self.lotto_id, data_list) msg = "{0}=>".format(get_string(self.lotto_name)) if data_list: self.refresh_last_issue() self.save_data(data_list) crawler_last_issue = max(data_list, key=lambda x: x[0])[0] if self.last_issue.issue >= crawler_last_issue: msg += "已有最新期号:{}".format(self.last_issue.issue) else: msg += "更新最新:{}".format(crawler_last_issue) else: msg += "无数据,最后{}期".format(self.last_issue.issue) now = datetime.now() self.refresh_last_issue() if self.last_issue.issue: self.refresh_next_issue() left_second = get_int(self.next_issue.get_left_second(now)) if left_second > 0: interval = left_second msg += ";下一期{}期将在{}开奖".format(self.next_issue.issue, self.next_issue.result_time) msg += ";系统%s秒后采集" % interval info(msg) except Exception as e: print format_exc() print e finally: IOLoop.instance().add_timeout( timedelta( milliseconds=interval * 1000), partial( self.run))
def query_for_str(sql, args=None, default=''): with db_conn_guard() as conn: result = conn.execute_fetchone(sql, args) if not (result and isinstance(result, tuple) and len(result)): result = [default] return get_string(result[0], default)
def query_for_list(self, query, args=None): try: self.execute(query, args) for row in self.cursor: yield row except Exception as e: error('query_for_list$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e))
def insert(self, query, args=None): try: self.execute(query, args) if self.cursor.rowcount > 0: return True except Exception as e: error('insert$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e)) return False
def update_many(self, query, args=None): try: self.executemany(query, args) if self.cursor.rowcount > 0: return True except Exception as e: error('update_many:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e)) return False
def execute_fetchone(self, query, args=None): try: self.execute(query, args) return self.fetchone() except Exception as e: error('execute_fetchone$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e)) return None