def fail_action(self, values): ''' 消息动作处理失败之后,更改队列中间件中该消息的失败次数并记录执行机器的IP 如果达到该机器的最大尝试失败次数,则更改队列中间件中该消息的状态为未处理,目的让其它机器再次尝试去处理该消息 :param values: 消息动作处理之后的结果 ''' update_sql = """ update hainiu_queue set fail_times=fail_times+1,fail_ip='%s' where id=%s; """ update_sql_1 = """ update hainiu_queue set type=1 where id=%s """ try: d = DBUtil(config._HAINIU_DB) id = values[0] u = Util() ip = u.get_local_ip() sql = update_sql % (ip, id) d.execute_no_commit(sql) if (self.try_num == Consumer._WORK_TRY_NUM): sql = update_sql_1 % id d.execute_no_commit(sql) d.commit() except: self.rl.exception() self.rl.error(sql) d.rollback() finally: d.close()
def fail_action(self, values): update_sql = """ update hainiu_queue set fail_times=fail_times+1,fail_ip='%s' where id=%s; """ update_sql_1 = """ update hainiu_queue set type=1 where id=%s; """ update_hainiu_news_seed_sql = """ update hainiu_web_seed set fail_times=fail_times+1,fail_ip="%s" where md5="%s"; """ try: d = DBUtil(config._HAINIU_DB) id = values[5] u = Util() ip = u.get_local_ip() sql = update_sql % (ip, id) d.execute_no_commit(sql) main_md5 = values[0] sql = update_hainiu_news_seed_sql % (ip, main_md5) d.execute_no_commit(sql) if (self.try_num == Consumer._WORK_TRY_NUM): sql = update_sql_1 % (id) d.execute_no_commit(sql) d.commit() except: self.rl.exception() self.rl.error(sql) d.rollback() d.commit() finally: d.close()
def date_merge(): u = Util() fi = FileUtil() t = TimeUtil() s = SendSmsUtil() alter_time = t.now_time() beijing_now = datetime.now() now_time = int(time.mktime(beijing_now.timetuple())) tmp_path = config._LOCAL_DATA_DIR % ('%s/%s_%s.tmp' % ('merge_tmp', 'seed', now_time)) up_path = config._LOCAL_DATA_DIR % ('%s/%s_%s.done' % ('up', 'seed', now_time)) start_char = '' for dirpath, dirnames, filenames in os.walk(config._LOCAL_DATA_DIR % ('done')): for filename in filenames: total = 0 merge_total = 0 dir = os.path.join(dirpath, filename) file_size = os.path.getsize(dir) record_list = [] with open(dir) as f: for line in f: try: total += 1 line = line.strip().encode('utf-8') if not line: continue md5 = line[:line.find('\001')] record = line[line.find('\001') + 1:] record_md5 = u.get_md5(record) if md5 == record_md5: merge_total += 1 record_list.append(record) else: raise Exception('check is faild') if record_list.__len__() >= 10: fi.write_file_content_pattern( tmp_path, start_char + ('\n'.join(record_list)), pattern='a') record_list = [] start_char = '\n' except Exception: traceback.print_exc() print line alter_msg = 'alter merge api hainiu time:%s ip:%s' % ( alter_time, u.get_local_ip()) s.send_sms(alter_msg) if record_list.__len__() > 0: fi.write_file_content_pattern(tmp_path, start_char + ('\n'.join(record_list)), pattern='a') start_char = '\n' os.remove(dir) print dir, file_size, total, merge_total if os.path.exists(tmp_path) and os.path.getsize(tmp_path) > 0: shutil.move(tmp_path, up_path)
def action(self): is_success = True t = TimeUtil() f = FileUtil() u = Util() hu = HtmlUtil() r = RequestUtil() values = [] md5 = u.get_md5(self.url) now_time = datetime.now() update_time = int(time.mktime(now_time.timetuple())) create_time = update_time create_day = int(t.now_day().replace('-', '')) create_hour = int(t.now_hour()) now_minute = int(t.now_min()) for i in xrange(60, -5, -5): if now_minute >= i: now_minute = i break now_minute = t.now_time(format='%Y%m%d%H') + ( '0%s' % (str(now_minute)) if now_minute < 10 else str(now_minute)) values.append(MySQLdb.escape_string(self.url)) values.append(md5) values.append(create_time) values.append(create_day) values.append(create_hour) values.append('') values.append(MySQLdb.escape_string(self.param)) values.append(update_time) try: html = r.http_get_phandomjs(self.url) domain = get_tld(self.url) values[5] = domain soup = BeautifulSoup(html, 'lxml') title_doc = soup.find('title') title = title_doc.contents[0] if title_doc is not None and len( title_doc.contents) == 1 else '' host = hu.get_url_host(self.url) values.append(host) values.append(MySQLdb.escape_string(title)) # k = KafkaUtil(config._KAFKA_CONFIG) html = html.replace(content._SEQ1, '').replace(content._SEQ2, content._SEQ4) # push_str = content._SEQ3.join(('%s','%s')) % (self.url,html) # push_str = content._SEQ3.join(('%s','%s')) % (u.get_md5(push_str),push_str) # push_str = bytes(push_str) # is_success = k.push_message(push_str) is_success = True if is_success: self.save_file(create_time, f, now_minute, u, self.url, html) else: values.append('') values.append('') self.rl.error("kafka push error") except: is_success = False values.append('') values.append('') self.rl.exception() finally: r.close_phandomjs() try: if is_success: values.append(1) insert_web_page_sql = """ insert into hainiu_web_page (url,md5,create_time,create_day,create_hour,domain,param,update_time,host, title,status) values ("%s","%s",%s,%s,%s,"%s","%s",%s,"%s","%s",%s) on DUPLICATE KEY UPDATE update_time=values(update_time); """ else: ip = u.get_local_ip() values.append(ip) values.append(2) insert_web_page_sql = """ insert into hainiu_web_page (url,md5,create_time,create_day,create_hour,domain,param,update_time,host, title,fail_ip,status) values ("%s","%s",%s,%s,%s,"%s","%s",%s,"%s","%s","%s",%s) on DUPLICATE KEY UPDATE fail_times=fail_times+1,fail_ip=values(fail_ip); """ d = DBUtil(config._HAINIU_DB) sql = insert_web_page_sql % tuple(values) d.execute(sql) except: is_success = False self.rl.exception() self.rl.error(sql) d.rollback() d.commit() finally: d.close() return super(self.__class__, self).result(is_success, [md5, self.url, update_time, self.queue_id])