Пример #1
0
    def fail_action(self, values):
        '''
        消息动作处理失败之后,更改队列中间件中该消息的失败次数并记录执行机器的IP
        如果达到该机器的最大尝试失败次数,则更改队列中间件中该消息的状态为未处理,目的让其它机器再次尝试去处理该消息

        :param values:      消息动作处理之后的结果
        '''
        update_sql = """
            update hainiu_queue set fail_times=fail_times+1,fail_ip='%s' where id=%s;
        """
        update_sql_1 = """
            update hainiu_queue set type=1 where id=%s
        """
        try:
            d = DBUtil(config._HAINIU_DB)
            id = values[0]
            u = Util()
            ip = u.get_local_ip()
            sql = update_sql % (ip, id)
            d.execute_no_commit(sql)
            if (self.try_num == Consumer._WORK_TRY_NUM):
                sql = update_sql_1 % id
                d.execute_no_commit(sql)
            d.commit()
        except:
            self.rl.exception()
            self.rl.error(sql)
            d.rollback()
        finally:
            d.close()
Пример #2
0
 def fail_action(self, values):
     update_sql = """
         update hainiu_queue set fail_times=fail_times+1,fail_ip='%s' where id=%s;
     """
     update_sql_1 = """
         update hainiu_queue set type=1 where id=%s;
     """
     update_hainiu_news_seed_sql = """
         update hainiu_web_seed set fail_times=fail_times+1,fail_ip="%s" where md5="%s";
     """
     try:
         d = DBUtil(config._HAINIU_DB)
         id = values[5]
         u = Util()
         ip = u.get_local_ip()
         sql = update_sql % (ip, id)
         d.execute_no_commit(sql)
         main_md5 = values[0]
         sql = update_hainiu_news_seed_sql % (ip, main_md5)
         d.execute_no_commit(sql)
         if (self.try_num == Consumer._WORK_TRY_NUM):
             sql = update_sql_1 % (id)
             d.execute_no_commit(sql)
         d.commit()
     except:
         self.rl.exception()
         self.rl.error(sql)
         d.rollback()
         d.commit()
     finally:
         d.close()
Пример #3
0
def date_merge():
    u = Util()
    fi = FileUtil()
    t = TimeUtil()
    s = SendSmsUtil()
    alter_time = t.now_time()
    beijing_now = datetime.now()
    now_time = int(time.mktime(beijing_now.timetuple()))
    tmp_path = config._LOCAL_DATA_DIR % ('%s/%s_%s.tmp' %
                                         ('merge_tmp', 'seed', now_time))
    up_path = config._LOCAL_DATA_DIR % ('%s/%s_%s.done' %
                                        ('up', 'seed', now_time))
    start_char = ''
    for dirpath, dirnames, filenames in os.walk(config._LOCAL_DATA_DIR %
                                                ('done')):
        for filename in filenames:
            total = 0
            merge_total = 0
            dir = os.path.join(dirpath, filename)
            file_size = os.path.getsize(dir)
            record_list = []
            with open(dir) as f:
                for line in f:
                    try:
                        total += 1
                        line = line.strip().encode('utf-8')
                        if not line:
                            continue
                        md5 = line[:line.find('\001')]
                        record = line[line.find('\001') + 1:]
                        record_md5 = u.get_md5(record)
                        if md5 == record_md5:
                            merge_total += 1
                            record_list.append(record)
                        else:
                            raise Exception('check is faild')

                        if record_list.__len__() >= 10:
                            fi.write_file_content_pattern(
                                tmp_path,
                                start_char + ('\n'.join(record_list)),
                                pattern='a')
                            record_list = []
                            start_char = '\n'
                    except Exception:
                        traceback.print_exc()
                        print line
                        alter_msg = 'alter merge api hainiu time:%s ip:%s' % (
                            alter_time, u.get_local_ip())
                        s.send_sms(alter_msg)

            if record_list.__len__() > 0:
                fi.write_file_content_pattern(tmp_path,
                                              start_char +
                                              ('\n'.join(record_list)),
                                              pattern='a')
                start_char = '\n'

            os.remove(dir)
            print dir, file_size, total, merge_total

    if os.path.exists(tmp_path) and os.path.getsize(tmp_path) > 0:
        shutil.move(tmp_path, up_path)
Пример #4
0
    def action(self):
        is_success = True
        t = TimeUtil()
        f = FileUtil()
        u = Util()
        hu = HtmlUtil()
        r = RequestUtil()
        values = []
        md5 = u.get_md5(self.url)
        now_time = datetime.now()
        update_time = int(time.mktime(now_time.timetuple()))
        create_time = update_time
        create_day = int(t.now_day().replace('-', ''))
        create_hour = int(t.now_hour())
        now_minute = int(t.now_min())
        for i in xrange(60, -5, -5):
            if now_minute >= i:
                now_minute = i
                break
        now_minute = t.now_time(format='%Y%m%d%H') + (
            '0%s' % (str(now_minute)) if now_minute < 10 else str(now_minute))

        values.append(MySQLdb.escape_string(self.url))
        values.append(md5)
        values.append(create_time)
        values.append(create_day)
        values.append(create_hour)
        values.append('')
        values.append(MySQLdb.escape_string(self.param))
        values.append(update_time)
        try:
            html = r.http_get_phandomjs(self.url)
            domain = get_tld(self.url)
            values[5] = domain

            soup = BeautifulSoup(html, 'lxml')
            title_doc = soup.find('title')
            title = title_doc.contents[0] if title_doc is not None and len(
                title_doc.contents) == 1 else ''

            host = hu.get_url_host(self.url)
            values.append(host)
            values.append(MySQLdb.escape_string(title))

            # k = KafkaUtil(config._KAFKA_CONFIG)
            html = html.replace(content._SEQ1,
                                '').replace(content._SEQ2, content._SEQ4)
            # push_str = content._SEQ3.join(('%s','%s')) % (self.url,html)
            # push_str = content._SEQ3.join(('%s','%s')) % (u.get_md5(push_str),push_str)
            # push_str = bytes(push_str)
            # is_success = k.push_message(push_str)

            is_success = True
            if is_success:
                self.save_file(create_time, f, now_minute, u, self.url, html)
            else:
                values.append('')
                values.append('')
                self.rl.error("kafka push error")

        except:
            is_success = False
            values.append('')
            values.append('')
            self.rl.exception()
        finally:
            r.close_phandomjs()

        try:
            if is_success:
                values.append(1)
                insert_web_page_sql = """
                    insert into hainiu_web_page (url,md5,create_time,create_day,create_hour,domain,param,update_time,host,
                    title,status) values ("%s","%s",%s,%s,%s,"%s","%s",%s,"%s","%s",%s) on DUPLICATE KEY  UPDATE update_time=values(update_time);
                """
            else:
                ip = u.get_local_ip()
                values.append(ip)
                values.append(2)
                insert_web_page_sql = """
                    insert into hainiu_web_page (url,md5,create_time,create_day,create_hour,domain,param,update_time,host,
                    title,fail_ip,status) values ("%s","%s",%s,%s,%s,"%s","%s",%s,"%s","%s","%s",%s)
                    on DUPLICATE KEY UPDATE fail_times=fail_times+1,fail_ip=values(fail_ip);
                """

            d = DBUtil(config._HAINIU_DB)
            sql = insert_web_page_sql % tuple(values)
            d.execute(sql)
        except:
            is_success = False
            self.rl.exception()
            self.rl.error(sql)
            d.rollback()
            d.commit()
        finally:
            d.close()

        return super(self.__class__,
                     self).result(is_success,
                                  [md5, self.url, update_time, self.queue_id])