Exemplo n.º 1
0
 def run(self):
     while True:
         result = self.redis_con.get_yy_rowkey("es:harm:insert:info")
         logging.info(result)
         rowkey,type = eval(result)
         _id = rowkey
         if type == "WECHAT_INFO_TABLE" or type == "INFO_TABLE" or type == "MONITOR_INFO_TABLE":
             _id = trans_md5(rowkey)
         log_info = "表格%s的rowkey的值为:%s" %(type,rowkey)
         logging.info(log_info)
         map = self.hbase_con.getResultByRowkey(type, rowkey,HARM_INFO_ZIDUAN[type])
         if not map:
             continue
         self.es_ping()
         boo = self.es.exists(HARM_INFO_ZIDUAN[type], "sino", _id)
         if boo:
             doc = {"doc": map}
             log_info = "rowkey值已存在"
             logging.info(log_info)
             self.es.update(HARM_INFO_ZIDUAN[type],doc_type="sino",id=_id,body=doc)
             log_info = "%s数据更新成功" %_id
             logging.info(log_info)
         else:
             log_info = "rowkey值:%s不存在" %_id
             logging.info(log_info)
             self.es.index(HARM_INFO_ZIDUAN[type],doc_type="sino",id=_id,body=map)
Exemplo n.º 2
0
 def run(self):
     action_list = []
     count = 0
     start = int(time.time())
     cunzai = 0
     while True:
         rowkey = self.redis_con.get_rowkey("forum_info")
         # logging.info(rowkey)
         if rowkey == None:
             if len(action_list) > 0:
                 logging.warning("重复存入elasticsearch当中%d条数据" % cunzai)
                 cunzai = 0
                 self.commit(action_list)
                 action_list.clear()
                 start = int(time.time())
                 count = 0
             time.sleep(10)
             continue
         param = None
         if "|||||" in rowkey:
             params = rowkey.split("|||||")[1]
             param = params.split(",")
             rowkey = rowkey.split("|||||")[0]
         _id = trans_md5(rowkey)
         boo = self.es.exists("forum_info","sino",_id)
         if boo:
             cunzai = cunzai + 1
             map = self.hbase_con.getResultByRowkey("MONITOR_INFO_TABLE", rowkey, "forum_info",param)
             if not map:
                 continue
             action_list.append({
                 "_op_type":"update",
                 "_index": "forum_info",
                 "_type": "sino",
                 "_id": _id,
                 "doc": map,
             })
         else:
             map = self.hbase_con.getResultByRowkey("MONITOR_INFO_TABLE", rowkey, "forum_info")
             if not map:
                 continue
             action_list.append({
                 "_index": "forum_info",
                 "_type": "sino",
                 "_id": _id,
                 "_source": map,
             })
         end = int(time.time())
         count = count + 1
         if count > COUNT_NUM or (end - start) > 30:
             self.es_ping()
             logging.warning("重复存入elasticsearch当中%d条数据" % cunzai)
             cunzai = 0
             if len(action_list) > 0:
                 self.commit(action_list)
             start = int(time.time())
             action_list.clear()
             count = 0
Exemplo n.º 3
0
 def run(self):
     action_list = []
     count = 0
     start = int(time.time())
     cunzai = 0
     while True:
         rowkey = self.redis_con.get_rowkey("xw_info")
         if rowkey == None:
             if len(action_list) > 0:
                 logging.warning("重复存入elasticsearch当中%d条数据" % cunzai)
                 cunzai = 0
                 self.commit(action_list)
                 action_list.clear()
                 start = int(time.time())
                 count = 0
             time.sleep(10)
             continue
         param = None
         if "|||||" in rowkey:
             params = rowkey.split("|||||")[1]
             param = params.split(",")
             rowkey = rowkey.split("|||||")[0]
         _id = trans_md5(rowkey)
         boo = self.es.exists("xw_info", "sino", _id)
         action = {
             "_index": "xw_info",
             "_type": "sino",
             "_id": "",
         }
         if boo:
             map = self.hbase_con.getResultByRowkey("INFO_TABLE", rowkey, "xw_info",param)
             if not map:
                 continue
             action["_op_type"] = "update"
             action['doc'] = map
             cunzai = cunzai+1
         else:
             map = self.hbase_con.getResultByRowkey("INFO_TABLE", rowkey, "xw_info")
             if not map:
                 continue
             self.es.index(index="xw_info",doc_type="sino",id=_id,body=map)
             self.insert_count = self.insert_count + 1
             continue
             # action['_source'] = map
         action['_id'] = _id
         action_list.append(action)
         end = int(time.time())
         count = count + 1
         if count > COUNT_NUM or (end - start) > 10:
             self.es_ping()
             logging.warning("重复存入elasticsearch当中%d条数据" % cunzai)
             cunzai = 0
             start = int(time.time())
             self.commit(action_list)
             action_list.clear()
             count = 0
Exemplo n.º 4
0
 def run(self):
     while True:
         rowkey = self.redis_con.get_yy_rowkey("es:news:del:info")
         _id = trans_md5(rowkey)
         self.es_ping()
         try:
             boo = self.es.exists(index="xw_info",doc_type="sino",id=_id)
             if boo:
                 self.es.delete(index="xw_info",doc_type="sino",id=_id)
         except Exception as e:
             log_info = "news info delete error %s" %str(e)
             logging.error(log_info)
             boo = self.es.exists(index="xw_info", doc_type="sino", id=_id)
             if boo:
                 self.es.delete(index="xw_info", doc_type="sino", id=_id)
Exemplo n.º 5
0
 def run(self):
     action_list = []
     count = 0
     start = int(time.time())
     cunzai = 0
     while True:
         #获取需要同步的redis值
         rowkey = self.redis_con.get_rowkey("wx_info")
         #由于这里无法使用blpop,所以需要通过空值判定
         if rowkey == None:
             #没有进数据时,将累积的需要同步的数据存入elasticsearch
             if len(action_list) > 0:
                 logging.warning("重复存入elasticsearch当中%d条数据" % cunzai)
                 cunzai = 0
                 self.commit(action_list)
                 action_list.clear()
                 start = int(time.time())
                 count = 0
             time.sleep(10)
             continue
         param = None
         #获取rowkey和需要同步的字段
         if "|||||" in rowkey:
             params = rowkey.split("|||||")[1]
             param = params.split(",")
             rowkey = rowkey.split("|||||")[0]
         #将hbase的rowkey转化为md5类型数据,存入elasticsearch
         _id = trans_md5(rowkey)
         #判定此_id是否存在于elasticsearch,花费时间为30毫秒,实为head请求
         boo = self.es.exists("wx_info", "sino", _id)
         action = {
             "_index": "wx_info",
             "_type": "sino",
             "_id": "",
         }
         #如果数据已存在,采取update的方式进行数据上传
         if boo:
             map = self.hbase_con.getResultByRowkey("WECHAT_INFO_TABLE",
                                                    rowkey, "wx_info",
                                                    param)
             if not map:
                 continue
             action["_op_type"] = "update"
             action['doc'] = map
             cunzai = cunzai + 1
         #如果数据不存在,采集insert的方式进行数据上传(此时不用去关心是否被限制了字段)
         else:
             map = self.hbase_con.getResultByRowkey("WECHAT_INFO_TABLE",
                                                    rowkey, "wx_info")
             if not map:
                 continue
             action['_source'] = map
         action['_id'] = _id
         action_list.append(action)
         end = int(time.time())
         count = count + 1
         #如果数据量超过COUNT_NUM或者距离上次提交数据的时间超过30秒,则提交数据
         if count > COUNT_NUM or (end - start) > 30:
             self.es_ping()
             logging.warning("重复存入elasticsearch当中%d条数据" % cunzai)
             cunzai = 0
             self.commit(action_list)
             start = int(time.time())
             action_list.clear()
             count = 0
Exemplo n.º 6
0
 def run(self):
     action_list = []
     count = 0
     start = int(time.time())
     cunzai = 0
     while True:
         rowkey = self.redis_con.get_rowkey("xw_info_test")
         if rowkey == None:
             if len(action_list) > 0:
                 logging.warning("重复存入elasticsearch当中%d条数据" % cunzai)
                 cunzai = 0
                 self.commit(action_list)
                 action_list.clear()
                 start = int(time.time())
                 count = 0
             time.sleep(10)
             continue
         else:
             self.redis_con.redis_con.hset("es:news:info:tongji",
                                           rowkey.split("|||||")[0], 1)
         param = None
         if "|||||" in rowkey:
             params = rowkey.split("|||||")[1]
             param = params.split(",")
             rowkey = rowkey.split("|||||")[0]
         _id = trans_md5(rowkey)
         boo = self.es.exists("xw_info", "sino", _id)
         action = {
             "_index": "xw_info",
             "_type": "sino",
             "_id": "",
         }
         if boo:
             map = self.hbase_con.getResultByRowkey("INFO_TABLE", rowkey,
                                                    "xw_info", param)
             if not map:
                 continue
             action["_op_type"] = "update"
             action['doc'] = map
             cunzai = cunzai + 1
             print("修改" + rowkey)
         else:
             map = self.hbase_con.getResultByRowkey("INFO_TABLE", rowkey,
                                                    "xw_info")
             if not map:
                 continue
             action['_source'] = map
             print("新增" + rowkey)
             self.redis_con.set_rowkey("xw_info_test", rowkey)
         if map['content_md5']:
             self.redis_con.redis_con.hset("es:news:info:tongji", rowkey,
                                           map['content_md5'])
         action['_id'] = _id
         action_list.append(action)
         end = int(time.time())
         count = count + 1
         if count > COUNT_NUM or (end - start) > 20:
             self.es_ping()
             logging.warning("重复存入elasticsearch当中%d条数据" % cunzai)
             cunzai = 0
             start = int(time.time())
             self.commit(action_list)
             action_list.clear()
             count = 0