예제 #1
0
    def analyze(self, html):
        find_string1 = r'<table width="100%" border="0" cellspacing="0" class="awardList">'
        find_string2 = r'</table>'
        pos1 = html.find(find_string1)
        pos2 = html.find(find_string2, pos1)
        if not (pos2 > pos1 > 0):
            return []
        pos1 += len(find_string1)
        data = html[pos1:pos2]

        handler_data = []
        soup = BeautifulSoup(data)
        for tr in soup.find_all('tr'):
            tds = tr.find_all('td', class_="start")
            if len(tds) < 3:
                continue
            for td in tds:
                try:
                    issue = "20" + td["data-period"]
                    number = get_string(td["data-win-number"]).replace(
                        " ", ",")
                    if len(issue) != 11:
                        continue
                    if len(number) != 9:
                        continue
                    handler_data.append([issue, number])
                except Exception as e:
                    warn("{}:{}".format(get_string(self.name), get_string(e)))
        handler_data.sort(key=lambda x: x[0])
        return handler_data
예제 #2
0
    def query_for_str(self, query, args=None):
        result = self.execute_fetchone(query, args)
        if not result:
            return ''

        if self.dict_cursor:
            return get_string(result.popitem()[1])
        else:
            return get_string(result[0])
예제 #3
0
 def analyze(self, html):
     handler_data = []
     try:
         html_json = json.loads(html)
         result_list = html_json["result"]
         for result in result_list:
             issue = result["code"]
             number = result["red"]
             handler_data.append([issue, number])
     except Exception, e:
         warn("{}:{}".format(get_string(self.name), get_string(e)))
예제 #4
0
 def init(self, params):
     self.lotto_id = get_int(params.get('lotto_id', ''))
     self.status = get_int(params.get('status', ''))
     self.count = get_int(params.get('count', ''))
     self.issue_interval = get_int(params.get('issue_interval', ''))
     self.iss_bit = get_int(params.get('iss_bit', ''))
     self.block_sec = get_int(params.get('block_sec', ''))
     self.start_time = get_string(params.get('start_time', ''))
     self.end_time = get_string(params.get('end_time', ''))
     self.issue_type = get_int(params.get('issue_type', ''))
     self.offset = get_int(params.get('offset', ''))
     self.extra_info = get_string(params.get('extra_info', ''))
예제 #5
0
    def analyze(self, html):
        find_string1 = r'<ul class="info-table">'
        find_string2 = r'</section>'
        pos1 = html.find(find_string1)
        pos2 = html.find(find_string2, pos1)
        if not (pos2 > pos1 > 0):
            return []
        pos1 += len(find_string1)
        data = html[pos1:pos2]

        handler_data = []
        soup = BeautifulSoup(data)
        for ul in soup.find_all('ul'):
            lis = ul.find_all('li')
            if len(lis) < 3:
                continue
            issue = lis[0].contents[0]
            number = get_string(lis[2].contents[0]).replace('\t', '').replace(
                '\r', '').replace('\n', '')
            if len(issue) != 11:
                continue
            if len(number) != 9:
                continue
            handler_data.append([issue, number])

        handler_data.sort(key=lambda x: x[0])
        return handler_data
예제 #6
0
파일: log.py 프로젝트: linml/lotto_crawler
    def log(self, level, msg, args):
        try:
            self.thread_lock.acquire()

            if args and isinstance(args, tuple):
                msg = msg % tuple(
                    (arg.encode("utf8") if isinstance(arg, unicode) else arg
                     for arg in args))

            if level >= self.print_require_level:
                try:
                    print msg
                except:
                    pass

            if level >= self.file_require_level:
                file_name = path.join(
                    BASE_DIR, 'log', ''.join([
                        'log_',
                        strftime('%Y-%m-%d', localtime()), '_',
                        str(getpid()), '.txt'
                    ]))
                f = open(file_name, 'a')
                f.write(get_string(msg))
                f.write('\n')
                f.close()
        except Exception as e:
            print '(' * 6
            print msg
            print args
            print format_exc()
            print ')' * 6
        finally:
            self.thread_lock.release()
예제 #7
0
파일: push.py 프로젝트: linml/lotto_crawler
 def handle_response(response):
     if response.error:
         # print response.code
         warn(get_string(response.error))
         pass
     else:
         try:
             result = loads(response.body)
             code = get_int(result.get("code"), -1)
             msg = get_string(result.get("msg"))
             if code == 0:
                 info("==>%s", msg)
             else:
                 warn("=>%s", msg)
         except Exception as e:
             import traceback
             print traceback.format_exc()
             print e
예제 #8
0
    def run(self):
        interval = 5
        if self.lotto_status != 1 or self.url_status != 1:
            return

        try:
            html = yield self.get_html()
            data_list = self.parser_data(html)
            data_list = filter_lotto_data(self.lotto_id, data_list)

            msg = "{0}=>".format(get_string(self.lotto_name))
            if data_list:
                self.refresh_last_issue()
                self.save_data(data_list)
                crawler_last_issue = max(data_list, key=lambda x: x[0])[0]
                if self.last_issue.issue >= crawler_last_issue:
                    msg += "已有最新期号:{}".format(self.last_issue.issue)
                else:
                    msg += "更新最新:{}".format(crawler_last_issue)
            else:
                msg += "无数据,最后{}期".format(self.last_issue.issue)

            now = datetime.now()
            self.refresh_last_issue()
            if self.last_issue.issue:
                self.refresh_next_issue()
                left_second = get_int(self.next_issue.get_left_second(now))
                if left_second > 0:
                    interval = left_second

                msg += ";下一期{}期将在{}开奖".format(self.next_issue.issue, self.next_issue.result_time)
            msg += ";系统%s秒后采集" % interval
            info(msg)
        except Exception as e:
            print format_exc()
            print e

        finally:
            IOLoop.instance().add_timeout(
                timedelta(
                    milliseconds=interval * 1000),
                partial(
                    self.run))
예제 #9
0
def query_for_str(sql, args=None, default=''):
    with db_conn_guard() as conn:
        result = conn.execute_fetchone(sql, args)
        if not (result and isinstance(result, tuple) and len(result)):
            result = [default]
    return get_string(result[0], default)
예제 #10
0
 def query_for_list(self, query, args=None):
     try:
         self.execute(query, args)
         for row in self.cursor:
             yield row
     except Exception as e:
         error('query_for_list$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e))
예제 #11
0
 def insert(self, query, args=None):
     try:
         self.execute(query, args)
         if self.cursor.rowcount > 0:
             return True
     except Exception as e:
         error('insert$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e))
     return False
예제 #12
0
 def update_many(self, query, args=None):
     try:
         self.executemany(query, args)
         if self.cursor.rowcount > 0:
             return True
     except Exception as e:
         error('update_many:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e))
     return False
예제 #13
0
 def execute_fetchone(self, query, args=None):
     try:
         self.execute(query, args)
         return self.fetchone()
     except Exception as e:
         error('execute_fetchone$$sql:%s$$args:%s$$error:%s', query, sequence_to_string(args), get_string(e))
     return None