class NewsDelInfo(object): def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR, ignore=404) def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_ADDR, ignore=404) def run(self): while True: rowkey = self.redis_con.get_yy_rowkey("es:news:del:info") _id = trans_md5(rowkey) self.es_ping() try: boo = self.es.exists(index="xw_info",doc_type="sino",id=_id) if boo: self.es.delete(index="xw_info",doc_type="sino",id=_id) except Exception as e: log_info = "news info delete error %s" %str(e) logging.error(log_info) boo = self.es.exists(index="xw_info", doc_type="sino", id=_id) if boo: self.es.delete(index="xw_info", doc_type="sino", id=_id)
class Video(object): def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_SF_ADDR) def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_SF_ADDR) def run(self): action_list = [] count = 0 start = int(time.time()) while True: rowkey = self.redis_con.get_rowkey("video") if rowkey == None: if len(action_list) > 0: self.commit(action_list) action_list.clear() count = 0 start = int(time.time()) time.sleep(10) continue if "|||||" in rowkey: rowkey = rowkey.split("|||||")[0] map = self.hbase_con.getSuanfaResultByRowkey( "VIDEO_DATA_TS_TABLE", rowkey, "video") if not map: continue action = { "_index": "video", "_type": "sino", "_id": "", "_source": {}, } action['_id'] = rowkey action['_source'] = map action_list.append(action) end = int(time.time()) count = count + 1 if count > COUNT_NUM or (end - start) > 30: if len(action_list) > 0: self.es_ping() self.commit(action_list) start = int(time.time()) action_list.clear() count = 0 def commit(self, action_list): try: helpers.bulk(self.es, action_list) except Exception as e: log_info = "index:video,\terror:" + str(e) logging.error(log_info) helpers.bulk(self.es, action_list) logging.info("提交成功")
class HarmInsertInfo(object): ''' * create by: yangjt * description:初始化hbase和redis连接 * create time: * * @return ''' def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR) def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_ADDR) ''' * create by: yangjt * description: * create time: * * @return ''' def run(self): while True: result = self.redis_con.get_yy_rowkey("es:harm:insert:info") logging.info(result) rowkey,type = eval(result) _id = rowkey if type == "WECHAT_INFO_TABLE" or type == "INFO_TABLE" or type == "MONITOR_INFO_TABLE": _id = trans_md5(rowkey) log_info = "表格%s的rowkey的值为:%s" %(type,rowkey) logging.info(log_info) map = self.hbase_con.getResultByRowkey(type, rowkey,HARM_INFO_ZIDUAN[type]) if not map: continue self.es_ping() boo = self.es.exists(HARM_INFO_ZIDUAN[type], "sino", _id) if boo: doc = {"doc": map} log_info = "rowkey值已存在" logging.info(log_info) self.es.update(HARM_INFO_ZIDUAN[type],doc_type="sino",id=_id,body=doc) log_info = "%s数据更新成功" %_id logging.info(log_info) else: log_info = "rowkey值:%s不存在" %_id logging.info(log_info) self.es.index(HARM_INFO_ZIDUAN[type],doc_type="sino",id=_id,body=map)
class ImageDelInfo(object): def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_SF_ADDR, ignore=404) def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_SF_ADDR, ignore=404) def delete_index(self, datas): action_list = [] logging.warning("开始检查") for rowkey in datas: boo = self.es.exists(index="image", doc_type="sino", id=rowkey) if boo: action = { "_op_type": "delete", "_index": "image", "_type": "sino", "_id": rowkey, "_source": {}, } action_list.append(action) logging.warning("检查完毕:%d" % len(action_list)) if len(action_list) > 0: self.commit(action_list) def commit(self, action_list): try: self.es_ping() helpers.bulk(self.es, action_list) except Exception as e: log_info = "index:image,\terror:" + str(e) logging.error(log_info) self.es_ping() helpers.bulk(self.es, action_list) def run(self): count = 0 data_list = [] while True: rowkey = self.redis_con.get_yy_rowkey("es:image:del:info") count = count + 1 data_list.append(rowkey) if count > 3000: data_list = set(data_list) data_list = list(data_list) self.delete_index(data_list) data_list.clear() count = 0
def __init__(self): """ :param configdir: default /sandbox/cfg, then ~/sandbox/cfg if not exists :return: """ self.tools = Tools(self) self.DEFAULT_BRANCH = DEFAULT_BRANCH self.readonly = False # if readonly will not manipulate local filesystem appart from /tmp self.sandbox_python_active = False # means we have a sandboxed environment where python3 works in self.sandbox_lua_active = False # same for lua self.config_changed = False self._cmd_installed = {} # should be the only location where we allow logs to be going elsewhere self.loghandlers = [] self.errorhandlers = [] self.state = None self.__init = False self.debug = False self.log_console = False self.log_level = 15 self._secret = None self.interactive = False self.appname = "installer" self.FORMAT_TIME = "%a %d %H:%M:%S" self.MYCOLORS = { "RED": "\033[1;31m", "BLUE": "\033[1;34m", "CYAN": "\033[1;36m", "GREEN": "\033[0;32m", "GRAY": "\033[0;37m", "YELLOW": "\033[0;33m", "RESET": "\033[0;0m", "BOLD": "\033[;1m", "REVERSE": "\033[;7m", } self.MYCOLORS_IGNORE = { "RED": "", "BLUE": "", "CYAN": "", "GREEN": "", "GRAY": "", "YELLOW": "", "RESET": "", "BOLD": "", "REVERSE": "", } LOGFORMATBASE = "{COLOR}{TIME} {filename:<20}{RESET} -{linenr:4d} - {GRAY}{context:<35}{RESET}: {message}" # DO NOT CHANGE COLOR self.LOGFORMAT = { "DEBUG": LOGFORMATBASE.replace("{COLOR}", "{CYAN}"), "STDOUT": "{message}", # 'INFO': '{BLUE}* {message}{RESET}', "INFO": LOGFORMATBASE.replace("{COLOR}", "{BLUE}"), "WARNING": LOGFORMATBASE.replace("{COLOR}", "{YELLOW}"), "ERROR": LOGFORMATBASE.replace("{COLOR}", "{RED}"), "CRITICAL": "{RED}{TIME} {filename:<20} -{linenr:4d} - {GRAY}{context:<35}{RESET}: {message}", } self.GITREPOS = GITREPOS self._db = None self.installers = Installers() self.installers.osx = OSXInstaller(self) self.installers.ubuntu = UbuntuInstaller(self) self.installers.base = BaseInstaller(self) self.installers.jumpscale = JumpscaleInstaller(self) self.docker = DockerFactory(self) self.redis = RedisTools(self) if self.platform() == "linux": self.platform_is_linux = True self.platform_is_unix = True self.platform_is_osx = False elif "darwin" in self.platform(): self.platform_is_linux = False self.platform_is_unix = True self.platform_is_osx = True elif "win32" in self.platform(): self.platform_is_linux = False self.platform_is_unix = False self.platform_is_osx = False self.platform_is_windows = True else: raise self.tools.exceptions.Base( "platform not supported, only linux or osx and windows for now." ) configdir = self._cfgdir_get() basedir = self._basedir_get() if basedir == "/sandbox" and not os.path.exists(basedir): script = """ set -e cd / sudo mkdir -p /sandbox/cfg sudo chown -R {USERNAME}:{GROUPNAME} /sandbox mkdir -p /usr/local/EGG-INFO sudo chown -R {USERNAME}:{GROUPNAME} /usr/local/EGG-INFO """ args = {} args["USERNAME"] = getpass.getuser() st = os.stat(self.config["DIR_HOME"]) gid = st.st_gid # import is here cause it's only unix # for windows support import grp args["GROUPNAME"] = grp.getgrgid(gid)[0] self.tools.execute(script, interactive=True, args=args, die_if_args_left=True) # Set codedir self.tools.dir_ensure(f"{basedir}/code") self.config_file_path = os.path.join(configdir, "jumpscale_config.toml") self.state_file_path = os.path.join(configdir, "jumpscale_done.toml") if self.tools.exists(self.config_file_path): self._config_load() if not "DIR_BASE" in self.config: return else: self.config = self.config_default_get() self.log_includes = [ i for i in self.config.get("LOGGER_INCLUDE", []) if i.strip().strip("''") != "" ] self.log_excludes = [ i for i in self.config.get("LOGGER_EXCLUDE", []) if i.strip().strip("''") != "" ] self.log_level = self.config.get("LOGGER_LEVEL", 10) # self.log_console = self.config.get("LOGGER_CONSOLE", False) # self.log_redis = self.config.get("LOGGER_REDIS", True) self.debug = self.config.get("DEBUG", False) if "JSXDEBUG" in os.environ: self.debug = True self.debugger = self.config.get("DEBUGGER", "pudb") if os.path.exists( os.path.join(self.config["DIR_BASE"], "bin", "python3.6")): self.sandbox_python_active = True else: self.sandbox_python_active = False self._state_load() self.sshagent = SSHAgent(myenv=self) sys.excepthook = self.excepthook if self.tools.exists("{}/bin".format( self.config["DIR_BASE"])): # To check that Js is on host self.loghandler_redis = LogHandler(self, db=self.db) else: # print("- redis loghandler cannot be loaded") self.loghandler_redis = None self.__init = True
class MyEnv: def __init__(self): """ :param configdir: default /sandbox/cfg, then ~/sandbox/cfg if not exists :return: """ self.tools = Tools(self) self.DEFAULT_BRANCH = DEFAULT_BRANCH self.readonly = False # if readonly will not manipulate local filesystem appart from /tmp self.sandbox_python_active = False # means we have a sandboxed environment where python3 works in self.sandbox_lua_active = False # same for lua self.config_changed = False self._cmd_installed = {} # should be the only location where we allow logs to be going elsewhere self.loghandlers = [] self.errorhandlers = [] self.state = None self.__init = False self.debug = False self.log_console = False self.log_level = 15 self._secret = None self.interactive = False self.appname = "installer" self.FORMAT_TIME = "%a %d %H:%M:%S" self.MYCOLORS = { "RED": "\033[1;31m", "BLUE": "\033[1;34m", "CYAN": "\033[1;36m", "GREEN": "\033[0;32m", "GRAY": "\033[0;37m", "YELLOW": "\033[0;33m", "RESET": "\033[0;0m", "BOLD": "\033[;1m", "REVERSE": "\033[;7m", } self.MYCOLORS_IGNORE = { "RED": "", "BLUE": "", "CYAN": "", "GREEN": "", "GRAY": "", "YELLOW": "", "RESET": "", "BOLD": "", "REVERSE": "", } LOGFORMATBASE = "{COLOR}{TIME} {filename:<20}{RESET} -{linenr:4d} - {GRAY}{context:<35}{RESET}: {message}" # DO NOT CHANGE COLOR self.LOGFORMAT = { "DEBUG": LOGFORMATBASE.replace("{COLOR}", "{CYAN}"), "STDOUT": "{message}", # 'INFO': '{BLUE}* {message}{RESET}', "INFO": LOGFORMATBASE.replace("{COLOR}", "{BLUE}"), "WARNING": LOGFORMATBASE.replace("{COLOR}", "{YELLOW}"), "ERROR": LOGFORMATBASE.replace("{COLOR}", "{RED}"), "CRITICAL": "{RED}{TIME} {filename:<20} -{linenr:4d} - {GRAY}{context:<35}{RESET}: {message}", } self.GITREPOS = GITREPOS self._db = None self.installers = Installers() self.installers.osx = OSXInstaller(self) self.installers.ubuntu = UbuntuInstaller(self) self.installers.base = BaseInstaller(self) self.installers.jumpscale = JumpscaleInstaller(self) self.docker = DockerFactory(self) self.redis = RedisTools(self) if self.platform() == "linux": self.platform_is_linux = True self.platform_is_unix = True self.platform_is_osx = False elif "darwin" in self.platform(): self.platform_is_linux = False self.platform_is_unix = True self.platform_is_osx = True elif "win32" in self.platform(): self.platform_is_linux = False self.platform_is_unix = False self.platform_is_osx = False self.platform_is_windows = True else: raise self.tools.exceptions.Base( "platform not supported, only linux or osx and windows for now." ) configdir = self._cfgdir_get() basedir = self._basedir_get() if basedir == "/sandbox" and not os.path.exists(basedir): script = """ set -e cd / sudo mkdir -p /sandbox/cfg sudo chown -R {USERNAME}:{GROUPNAME} /sandbox mkdir -p /usr/local/EGG-INFO sudo chown -R {USERNAME}:{GROUPNAME} /usr/local/EGG-INFO """ args = {} args["USERNAME"] = getpass.getuser() st = os.stat(self.config["DIR_HOME"]) gid = st.st_gid # import is here cause it's only unix # for windows support import grp args["GROUPNAME"] = grp.getgrgid(gid)[0] self.tools.execute(script, interactive=True, args=args, die_if_args_left=True) # Set codedir self.tools.dir_ensure(f"{basedir}/code") self.config_file_path = os.path.join(configdir, "jumpscale_config.toml") self.state_file_path = os.path.join(configdir, "jumpscale_done.toml") if self.tools.exists(self.config_file_path): self._config_load() if not "DIR_BASE" in self.config: return else: self.config = self.config_default_get() self.log_includes = [ i for i in self.config.get("LOGGER_INCLUDE", []) if i.strip().strip("''") != "" ] self.log_excludes = [ i for i in self.config.get("LOGGER_EXCLUDE", []) if i.strip().strip("''") != "" ] self.log_level = self.config.get("LOGGER_LEVEL", 10) # self.log_console = self.config.get("LOGGER_CONSOLE", False) # self.log_redis = self.config.get("LOGGER_REDIS", True) self.debug = self.config.get("DEBUG", False) if "JSXDEBUG" in os.environ: self.debug = True self.debugger = self.config.get("DEBUGGER", "pudb") if os.path.exists( os.path.join(self.config["DIR_BASE"], "bin", "python3.6")): self.sandbox_python_active = True else: self.sandbox_python_active = False self._state_load() self.sshagent = SSHAgent(myenv=self) sys.excepthook = self.excepthook if self.tools.exists("{}/bin".format( self.config["DIR_BASE"])): # To check that Js is on host self.loghandler_redis = LogHandler(self, db=self.db) else: # print("- redis loghandler cannot be loaded") self.loghandler_redis = None self.__init = True @property def db(self): if self._db == "NOTUSED": return None if not self._db: if self.redis.client_core_get(die=False): self._db = self.redis._core_get() else: self._db = "NOTUSED" return self._db def redis_start(self): self._db = self.redis._core_get() def secret_set(self, secret=None, secret_expiration_hours=48): """ can be the hash or the originating secret passphrase """ if not secret: secret = self.tools.ask_password( "please specify secret passphrase for your SDK/3bot (<32chars)" ) assert len(secret) < 32 secret = self._secret_format(secret) expiration = secret_expiration_hours * 3600 if self.db: self.db.set("threebot.secret.encrypted", secret, ex=expiration) return secret def _secret_format(self, secret): if not isinstance(secret, bytes): secret = secret.encode() if len(secret) != 32: import hashlib m = hashlib.md5() m.update(secret) secret = m.hexdigest() return secret def secret_get(self): if not self._secret: secret = None # toremove = None # for key in sys.argv: # if key.startswith("--secret"): # secret = key.split("=", 1)[1].strip() # # start the redis, because secret specified # RedisTools._core_get() # self.secret_set(secret=secret) # toremove = key # # if toremove: # # means we can remove the --secret from sys.arg # # important to do or future command line arg parsing will fail # sys.argv.pop(sys.argv.index(toremove)) if self.db: secret = self.db.get("threebot.secret.encrypted") if "JSXSECRET" in os.environ: secret = os.environ["JSXSECRET"].strip() secret = self._secret_format(secret) if not secret: secret = self.secret_set() if isinstance(secret, bytes): secret = secret.decode() self._secret = secret assert len(self._secret) == 32 return self._secret def platform(self): """ will return one of following strings: linux, darwin """ return sys.platform # # def platform_is_linux(self): # return "posix" in sys.builtin_module_names def check_platform(self): """check if current platform is supported (linux or darwin) for linux, the version check is done by `UbuntuInstaller.ensure_version()` :raises RuntimeError: in case platform is not supported """ platform = self.platform() if "linux" in platform: self.installers.ubuntu.ensure_version() elif "darwin" not in platform: raise self.tools.exceptions.Base("Your platform is not supported") def _homedir_get(self): if self.platform_is_windows: return os.environ["USERPROFILE"] if "HOMEDIR" in os.environ: dir_home = os.environ["HOMEDIR"] elif "HOME" in os.environ: dir_home = os.environ["HOME"] else: dir_home = "/root" return dir_home def _basedir_get(self): if self.readonly: return "/tmp/jumpscale" if "linux" in self.platform(): isroot = None rc, out, err = Tools.execute("whoami", showout=False, die=False) if rc == 0: if out.strip() == "root": isroot = 1 if Tools.exists("/sandbox") or isroot == 1: Tools.dir_ensure("/sandbox") return "/sandbox" if self.platform_is_windows: p = "%s\sandbox" % self._homedir_get() else: p = "%s/sandbox" % self._homedir_get() if not Tools.exists(p): Tools.dir_ensure(p) return p def _cfgdir_get(self): if self.readonly: return "/tmp/jumpscale/cfg" return "%s/cfg" % self._basedir_get( ) if not MyEnv.platform_is_windows else "%s\cfg" % self._basedir_get() def _identitydir_get(self): return f"{self._basedir_get()}/myhost" if not MyEnv.platform_is_windows else "%s\myhost" % self._basedir_get( ) def _codedir_get(self): return f"{self._basedir_get()}/code" if not MyEnv.platform_is_windows else "%s\code" % self._basedir_get( ) def config_default_get(self, config={}): if "DIR_BASE" not in config: config["DIR_BASE"] = self._basedir_get() if "DIR_HOME" not in config: config["DIR_HOME"] = self._homedir_get() if not "DIR_CFG" in config: config["DIR_CFG"] = self._cfgdir_get() if not "DIR_IDENTITY" in config: config["DIR_IDENTITY"] = self._identitydir_get() if not "READONLY" in config: config["READONLY"] = False if not "DEBUG" in config: config["DEBUG"] = False if not "DEBUGGER" in config: config["DEBUGGER"] = "pudb" if "LOGGER_INCLUDE" not in config: config["LOGGER_INCLUDE"] = ["*"] if "LOGGER_EXCLUDE" not in config: config["LOGGER_EXCLUDE"] = ["sal.fs"] if "LOGGER_LEVEL" not in config: config["LOGGER_LEVEL"] = 15 # means std out & plus gets logged if config["LOGGER_LEVEL"] > 50: config["LOGGER_LEVEL"] = 50 # if "LOGGER_CONSOLE" not in config: # config["LOGGER_CONSOLE"] = True # if "LOGGER_REDIS" not in config: # config["LOGGER_REDIS"] = False if "LOGGER_PANEL_NRLINES" not in config: config["LOGGER_PANEL_NRLINES"] = 0 if self.readonly: config["DIR_TEMP"] = "/tmp/jumpscale_installer" # config["LOGGER_REDIS"] = False # config["LOGGER_CONSOLE"] = True if not "DIR_TEMP" in config: config["DIR_TEMP"] = "/tmp/jumpscale" if not "DIR_VAR" in config: config["DIR_VAR"] = "%s/var" % config["DIR_BASE"] if not "DIR_CODE" in config: config["DIR_CODE"] = self._codedir_get() # config["DIR_CODE"] = "%s/code" % config["DIR_BASE"] # if self.tools.exists("%s/code" % config["DIR_BASE"]): # config["DIR_CODE"] = "%s/code" % config["DIR_BASE"] # else: # config["DIR_CODE"] = "%s/code" % config["DIR_HOME"] if not "DIR_BIN" in config: config["DIR_BIN"] = "%s/bin" % config["DIR_BASE"] if not "DIR_APPS" in config: config["DIR_APPS"] = "%s/apps" % config["DIR_BASE"] if not "EXPLORER_ADDR" in config: config["EXPLORER_ADDR"] = "explorer.testnet.grid.tf" if not "THREEBOT_DOMAIN" in config: config["THREEBOT_DOMAIN"] = "3bot.testnet.grid.tf" if not "THREEBOT_CONNECT" in config: config["THREEBOT_CONNECT"] = True # max log msgpacks files on the file system each file is 1k logs if not "MAX_MSGPACKS_LOGS_COUNT" in config: config["MAX_MSGPACKS_LOGS_COUNT"] = 50 if not "SSH_KEY_DEFAULT" in config: config["SSH_KEY_DEFAULT"] = "" if not "SSH_AGENT" in config: config["SSH_AGENT"] = True if not "USEGIT" in config: config["USEGIT"] = True return config def configure(self, config=None, readonly=None, debug=None, secret=None): """ the args of the command line will also be parsed, will check for --readonly default is false --debug default debug is False :return: """ if secret: self.secret_set(secret) basedir = self._basedir_get() if config: self.config.update(config) if readonly: self.config["READONLY"] = readonly if debug: self.config["DEBUG"] = debug # installpath = os.path.dirname(inspect.getfile(os.path)) # # MEI means we are pyexe BaseInstaller # if installpath.find("/_MEI") != -1 or installpath.endswith("dist/install"): # pass # dont need yet but keep here if DockerFactory.indocker(): self.config["IN_DOCKER"] = True else: self.config["IN_DOCKER"] = False self.config_save() self.init() @property def adminsecret(self): return self.secret_get() def test(self): if not self.loghandlers != []: self.tools.shell() def excepthook(self, exception_type, exception_obj, tb, die=True, stdout=True, level=50): """ :param exception_type: :param exception_obj: :param tb: :param die: :param stdout: :param level: :return: logdict see github/threefoldtech/jumpscaleX_core/docs/Internals/logging_errorhandling/logdict.md """ if isinstance(exception_obj, self.tools.exceptions.RemoteException): print( self.tools.text_replace( "{RED}*****Remote Exception*****{RESET}")) logdict = exception_obj.data self.tools.log2stdout(logdict) exception_obj.data = None exception_obj.exception = None # logdict = self.tools.log(tb=tb, level=level, exception=exception_obj, stdout=stdout) try: logdict = self.tools.log(tb=tb, level=level, exception=exception_obj, stdout=stdout) except Exception as e: self.tools.pprint("{RED}ERROR IN LOG HANDLER") print(e) ttype, msg, tb = sys.exc_info() traceback.print_exception(etype=ttype, tb=tb, value=msg) self.tools.pprint("{RESET}") raise e sys.exit(1) exception_obj._logdict = logdict if self.debug and tb: # exception_type, exception_obj, tb = sys.exc_info() pudb.post_mortem(tb) if die is False: return logdict else: sys.exit(1) def exception_handle(self, exception_obj, die=True, stdout=True, level=50, stack_go_up=0): """ e is the error as raised by e.g. try/except statement :param exception_obj: the exception obj coming from the try/except :param die: die if error :param stdout: if True send the error log to stdout :param level: 50 is error critical :return: logdict see github/threefoldtech/jumpscaleX_core/docs/Internals/logging_errorhandling/logdict.md example try: something except Exception as e: logdict = j.core.myenv.exception_handle(e,die=False,stdout=True) """ ttype, msg, tb = sys.exc_info() return self.excepthook(ttype, exception_obj, tb, die=die, stdout=stdout, level=level) # def identity_set(self,name="default",): def config_edit(self): """ edits the configuration file which is in {DIR_BASE}/cfg/jumpscale_config.toml {DIR_BASE} normally is /sandbox """ self.tools.file_edit(self.config_file_path) def _config_load(self): """ loads the configuration file which default is in {DIR_BASE}/cfg/jumpscale_config.toml {DIR_BASE} normally is /sandbox """ config = self.tools.config_load(self.config_file_path) self.config = self.config_default_get(config) def config_save(self): self.tools.config_save(self.config_file_path, self.config) def _state_load(self): """ only 1 level deep toml format only for int,string,bool no multiline """ if self.tools.exists(self.state_file_path): self.state = self.tools.config_load(self.state_file_path, if_not_exist_create=False) elif not self.readonly: self.state = self.tools.config_load(self.state_file_path, if_not_exist_create=True) else: self.state = {} def state_save(self): if self.readonly: return self.tools.config_save(self.state_file_path, self.state) def _key_get(self, key): key = key.split("=", 1)[0] key = key.split(">", 1)[0] key = key.split("<", 1)[0] key = key.split(" ", 1)[0] key = key.upper() return key def state_get(self, key): key = self._key_get(key) if key in self.state: return True return False def state_set(self, key): if self.readonly: return key = self._key_get(key) self.state[key] = True self.state_save() def state_delete(self, key): if self.readonly: return key = self._key_get(key) if key in self.state: self.state.pop(key) self.state_save() def states_delete(self, prefix): if self.readonly: return prefix = prefix.upper() keys = [i for i in self.state.keys()] for key in keys: if key.startswith(prefix): self.state.pop(key) # print("#####STATEPOP:%s" % key) self.state_save() def state_reset(self): """ remove all state """ self.tools.delete(self.state_file_path) self._state_load()
def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR)
class GetNewsUser(object): def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR) def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_ADDR) def run(self): action_list = [] count = 0 start = int(time.time()) cunzai = 0 while True: rowkey = self.redis_con.get_rowkey("xw_user") if rowkey == None: if len(action_list) > 0: logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) action_list.clear() start = int(time.time()) count = 0 time.sleep(10) continue param = None if "|||||" in rowkey: params = rowkey.split("|||||")[1] param = params.split(",") rowkey = rowkey.split("|||||")[0] if len(rowkey) > 500: log_info = "id:%s长度超过500" % rowkey logging.warning(log_info) continue boo = self.es.exists("xw_user", "sino", rowkey) action = { "_index": "xw_user", "_type": "sino", "_id": "", } if boo: map = self.hbase_con.getResultByRowkey("NEWS_PERSON_TABLE", rowkey, "xw_user", param) if not map: continue action["_op_type"] = "update" action['doc'] = map cunzai = cunzai + 1 else: map = self.hbase_con.getResultByRowkey("NEWS_PERSON_TABLE", rowkey, "xw_user") if not map: continue action['_source'] = map action['_id'] = rowkey action_list.append(action) end = int(time.time()) count = count + 1 if count > COUNT_NUM or (end - start) > 10: if len(action_list) > 0: self.es_ping() logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) start = int(time.time()) action_list.clear() count = 0 def commit(self, action_list): try: helpers.bulk(self.es, action_list) except Exception as e: log_info = "index:xw_user,\terror:" + str(e) logging.error(log_info) helpers.bulk(self.es, action_list) logging.warning("提交成功:%d条数据" % len(action_list))
def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR,timeout=30) self.insert_count = 0
class GetInfo(object): def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR,timeout=30) self.insert_count = 0 def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_ADDR,timeout=30) def run(self): action_list = [] count = 0 start = int(time.time()) cunzai = 0 while True: rowkey = self.redis_con.get_rowkey("xw_info") if rowkey == None: if len(action_list) > 0: logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) action_list.clear() start = int(time.time()) count = 0 time.sleep(10) continue param = None if "|||||" in rowkey: params = rowkey.split("|||||")[1] param = params.split(",") rowkey = rowkey.split("|||||")[0] _id = trans_md5(rowkey) boo = self.es.exists("xw_info", "sino", _id) action = { "_index": "xw_info", "_type": "sino", "_id": "", } if boo: map = self.hbase_con.getResultByRowkey("INFO_TABLE", rowkey, "xw_info",param) if not map: continue action["_op_type"] = "update" action['doc'] = map cunzai = cunzai+1 else: map = self.hbase_con.getResultByRowkey("INFO_TABLE", rowkey, "xw_info") if not map: continue self.es.index(index="xw_info",doc_type="sino",id=_id,body=map) self.insert_count = self.insert_count + 1 continue # action['_source'] = map action['_id'] = _id action_list.append(action) end = int(time.time()) count = count + 1 if count > COUNT_NUM or (end - start) > 10: self.es_ping() logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 start = int(time.time()) self.commit(action_list) action_list.clear() count = 0 def commit(self,action_list): try: helpers.bulk(self.es, action_list) except Exception as e: log_info = "index:xw_info index,\terror:" + str(e) logging.error(log_info) helpers.bulk(self.es, action_list) logging.warning("新增存入elasticsearch当中%d条数据" % self.insert_count) logging.warning("提交成功:%d条数据" % len(action_list))
class GetWechatInfo(object): ''' * create by: yangjt * description:初始化hbase,redis-cluster,elasticsearch连接 * create time: * * @return ''' def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR, timeout=30) def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_ADDR, timeout=30) ''' * create by: yangjt * description:WECHAT_INFO_TABLE数据同步 * create time: * * @return ''' def run(self): action_list = [] count = 0 start = int(time.time()) cunzai = 0 while True: #获取需要同步的redis值 rowkey = self.redis_con.get_rowkey("wx_info") #由于这里无法使用blpop,所以需要通过空值判定 if rowkey == None: #没有进数据时,将累积的需要同步的数据存入elasticsearch if len(action_list) > 0: logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) action_list.clear() start = int(time.time()) count = 0 time.sleep(10) continue param = None #获取rowkey和需要同步的字段 if "|||||" in rowkey: params = rowkey.split("|||||")[1] param = params.split(",") rowkey = rowkey.split("|||||")[0] #将hbase的rowkey转化为md5类型数据,存入elasticsearch _id = trans_md5(rowkey) #判定此_id是否存在于elasticsearch,花费时间为30毫秒,实为head请求 boo = self.es.exists("wx_info", "sino", _id) action = { "_index": "wx_info", "_type": "sino", "_id": "", } #如果数据已存在,采取update的方式进行数据上传 if boo: map = self.hbase_con.getResultByRowkey("WECHAT_INFO_TABLE", rowkey, "wx_info", param) if not map: continue action["_op_type"] = "update" action['doc'] = map cunzai = cunzai + 1 #如果数据不存在,采集insert的方式进行数据上传(此时不用去关心是否被限制了字段) else: map = self.hbase_con.getResultByRowkey("WECHAT_INFO_TABLE", rowkey, "wx_info") if not map: continue action['_source'] = map action['_id'] = _id action_list.append(action) end = int(time.time()) count = count + 1 #如果数据量超过COUNT_NUM或者距离上次提交数据的时间超过30秒,则提交数据 if count > COUNT_NUM or (end - start) > 30: self.es_ping() logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) start = int(time.time()) action_list.clear() count = 0 ''' * create by: yangjt * description:批量上传数据 * create time: * action_list:{ "_index": "wx_info", "_type": "sino", "_id": "", "_source":{"key":"value"} } * @return ''' def commit(self, action_list): try: helpers.bulk(self.es, action_list) except Exception as e: log_info = "index:wechat,\terror:" + str(e) logging.error(log_info) helpers.bulk(self.es, action_list) logging.warning("提交成功:%d条数据" % len(action_list))
class Image(object): def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_SF_ADDR) self.insert_count = 0 def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_SF_ADDR) def run(self): action_list = [] count = 0 start = int(time.time()) cunzai = 0 while True: rowkey = self.redis_con.get_rowkey("image") if rowkey == None: if len(action_list) > 0: logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) action_list.clear() count = 0 start = int(time.time()) time.sleep(10) continue param = None if "|||||" in rowkey: params = rowkey.split("|||||")[1] param = params.split(",") rowkey = rowkey.split("|||||")[0] boo = self.es.exists("image", "sino", rowkey) action = { "_index": "image", "_type": "sino", "_id": "", } if boo: map = self.hbase_con.getSuanfaResultByRowkey( "IMAGE_DATA_TABLE", rowkey, "image", param) if not map: continue action["_op_type"] = "update" action['doc'] = map cunzai = cunzai + 1 else: map = self.hbase_con.getSuanfaResultByRowkey( "IMAGE_DATA_TABLE", rowkey, "image") if not map: continue try: self.es.index(index="image", doc_type="sino", id=rowkey, body=map) self.insert_count = self.insert_count + 1 except Exception as e: log_info = "单条插入错误:%s" % str(e) logging.error(log_info) continue # action['_source'] = map action['_id'] = rowkey action_list.append(action) end = int(time.time()) count = count + 1 if count > COUNT_NUM or (end - start) > 30: logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 if len(action_list) > 0: self.es_ping() self.commit(action_list) start = int(time.time()) action_list.clear() count = 0 def commit(self, action_list): try: helpers.bulk(self.es, action_list) except Exception as e: log_info = "index:image,\terror:" + str(e) logging.error(log_info) helpers.bulk(self.es, action_list) logging.warning("提交成功:%d条数据" % len(action_list))
class GetForumInfo(object): def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR,timeout=30) def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_ADDR,timeout=30) def run(self): action_list = [] count = 0 start = int(time.time()) cunzai = 0 while True: rowkey = self.redis_con.get_rowkey("forum_info") # logging.info(rowkey) if rowkey == None: if len(action_list) > 0: logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) action_list.clear() start = int(time.time()) count = 0 time.sleep(10) continue param = None if "|||||" in rowkey: params = rowkey.split("|||||")[1] param = params.split(",") rowkey = rowkey.split("|||||")[0] _id = trans_md5(rowkey) boo = self.es.exists("forum_info","sino",_id) if boo: cunzai = cunzai + 1 map = self.hbase_con.getResultByRowkey("MONITOR_INFO_TABLE", rowkey, "forum_info",param) if not map: continue action_list.append({ "_op_type":"update", "_index": "forum_info", "_type": "sino", "_id": _id, "doc": map, }) else: map = self.hbase_con.getResultByRowkey("MONITOR_INFO_TABLE", rowkey, "forum_info") if not map: continue action_list.append({ "_index": "forum_info", "_type": "sino", "_id": _id, "_source": map, }) end = int(time.time()) count = count + 1 if count > COUNT_NUM or (end - start) > 30: self.es_ping() logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 if len(action_list) > 0: self.commit(action_list) start = int(time.time()) action_list.clear() count = 0 def commit(self,action_list): try: helpers.bulk(self.es, action_list) except Exception as e: log_info = "index:forum_info,\terror:" + str(e) logging.error(log_info) helpers.bulk(self.es, action_list) logging.warning("提交成功:%d条数据" % len(action_list))
class GetSiteRecord(object): def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR, timeout=30) def es_ping(self): if not self.es.ping(): self.es = Elasticsearch(ES_ADDR, timeout=30) def run(self): action_list = [] count = 0 start = int(time.time()) cunzai = 0 while True: rowkey = self.redis_con.get_rowkey("site_record") if rowkey == None: if len(action_list) > 0: logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) action_list.clear() start = int(time.time()) count = 0 time.sleep(10) continue param = None # self.redis_con.insert_yy_rowkey("es:wangxin:wechat:info",rowkey) # self.redis_con.insert_yy_rowkey("es:kafka:wechat:info", rowkey) if "|||||" in rowkey: params = rowkey.split("|||||")[1] param = params.split(",") rowkey = rowkey.split("|||||")[0] boo = self.es.exists("site_record", "sino", rowkey) action = { "_index": "site_record", "_type": "sino", "_id": "", } if boo: map = self.hbase_con.getResultByRowkey("SITE_RECORD", rowkey, "site_record", param) if not map: continue action["_op_type"] = "update" action['doc'] = map cunzai = cunzai + 1 else: map = self.hbase_con.getResultByRowkey("SITE_RECORD", rowkey, "site_record") if not map: continue action['_source'] = map action['_id'] = rowkey action_list.append(action) end = int(time.time()) count = count + 1 if count > COUNT_NUM or (end - start) > 30: self.es_ping() logging.warning("重复存入elasticsearch当中%d条数据" % cunzai) cunzai = 0 self.commit(action_list) start = int(time.time()) action_list.clear() count = 0 def commit(self, action_list): try: helpers.bulk(self.es, action_list) except Exception as e: log_info = "index:site_record,\terror:" + str(e) logging.error(log_info) helpers.bulk(self.es, action_list) logging.warning("提交成功:%d条数据" % len(action_list))
def __init__(self): self.hbase_con = HbaseInfoTask() self.redis_con = RedisTools() self.es = Elasticsearch(ES_ADDR, timeout=ELASTIC_TIMEOUT)