def main(): logging.info("taskflow receiver is running") # get redis data redisdb = RedisDB() while True: data = redisdb.pop_msg_queue() if data is not None: message_process(int(data)) time.sleep(1)
def getdata(): try: r = RedisDB('localhost', priorities) r.cleanup() data = r.getvalues() if not data: raise ('No data') except: return jsonify(status='no data'), 404 return data, 200
def getdata(): try: r = RedisDB('localhost',priorities) r.cleanup() data = r.getvalues() if not data: raise('No data') except: return jsonify(status='no data'),404 return data,200
def message_process(flow_instance_id): try: redisdb = RedisDB() # 获取需要运行的模块 output_filename = settings.TASK_RUN_LOG_FORMAT % flow_instance_id logging.debug("output_filename:%s", output_filename) logging.debug("task_run_filename:%s", settings.TASK_RUN_FILE) logging.debug("task python bin location:%s", settings.PYTHONBIN) with open(output_filename, "a") as outfile: pm = subprocess.Popen([ settings.PYTHONBIN, "-u", settings.TASK_RUN_FILE, "-i", str(flow_instance_id) ], close_fds=True, stdout=outfile, stderr=subprocess.STDOUT) json_data = { "worker_process_id": pm.pid, "worker_hostname": socket.gethostname(), "flow_instance_id": flow_instance_id, "start_time": time.time() } redisdb.add_running_instance(flow_instance_id, json_data) redisdb.close() except: logging.error('message_process err \n %s', traceback.format_exc())
def setdata(): cnt = 0 for prio in priorities: try: jdata = request.get_json() assert(jdata['newalert'][prio][0]['text']) except: cnt = cnt + 1 if len(priorities) == cnt: return jsonify(status='failed to parse data'),400 else: r = RedisDB('localhost',priorities) current_timestamp = int(datetime.datetime.strftime(datetime.datetime.now(),'%s')) for prio in priorities: try: for j in jdata['newalert'][prio]: r.setvalues(prio,j['text'],current_timestamp+int(j['duration'])) except: pass r.cleanup() return jsonify(status='success'),200
class Spider(object): downloaders = list() redisDB = None instanceConf=None def __init__(self, config = None): self.instanceConf = config self.redisDB = RedisDB(config=self.instanceConf) for url in config.START_URL: self.redisDB.Enqueue(url, depth=0, title='', trycount=0) self.startDownloader() def startDownloader(self): downloader = Downloader(self.redisDB,config=self.instanceConf) downloader.start()
def main(): logging.info("taskflow sender is running") taskflowdb = TaskFlowDB() redisdb = RedisDB() while True: data = taskflowdb.get_undo_instances() if len(data) == 0: time.sleep(30) continue sended_ids = [] try: for item in data: redisdb.push_msg_queue(item["id"]) sended_ids.append(item["id"]) except: logging.warning("push redis err \n %s", traceback.format_exc()) if len(sended_ids): for item in sended_ids: taskflowdb.save_instance_status(item, 'running') if len(sended_ids) != len(data): logging.error("redis data error: sended len not equal data len") raise Exception("redis data error") time.sleep(2)
def setdata(): cnt = 0 for prio in priorities: try: jdata = request.get_json() assert (jdata['newalert'][prio][0]['text']) except: cnt = cnt + 1 if len(priorities) == cnt: return jsonify(status='failed to parse data'), 400 else: r = RedisDB('localhost', priorities) current_timestamp = int( datetime.datetime.strftime(datetime.datetime.now(), '%s')) for prio in priorities: try: for j in jdata['newalert'][prio]: r.setvalues(prio, j['text'], current_timestamp + int(j['duration'])) except: pass r.cleanup() return jsonify(status='success'), 200
def main(flow_instance_id): try: taskflowdb = TaskFlowDB() # 获取基础数据信息 instance_data = taskflowdb.get_instance(flow_instance_id) flow_id = instance_data["flowid"] step_num = instance_data["curstepnum"] flow_step_data = taskflowdb.get_flow_step(flow_id, step_num) module_name = flow_step_data["modulename"] module_data = taskflowdb.get_module(module_name) arguments_definition = json.loads(module_data["arguments"]) # 动态导入运行模块 inner_module = importlib.import_module("modules.%s" % module_name) inner_method = getattr(inner_module, "main") # 处理参数数据 # 实例获取到的参数 dict_instance_arguments = json.loads(instance_data["arguments"]) # 运行中的产生的参数 dict_instance_run_data = taskflowdb.get_instance_run_data( flow_instance_id) inner_kwargs = {} # 处理输入参数别名的情况并设定模块运行数据 input_argment_alias = json.loads(flow_step_data["inputargalias"]) for arg_item in arguments_definition: key_name = arg_item["name"] input_key_name = input_argment_alias.get(key_name, key_name) if key_name in dict_instance_arguments: inner_kwargs[key_name] = dict_instance_arguments.get( input_key_name) elif key_name in dict_instance_run_data: inner_kwargs[key_name] = dict_instance_run_data.get( input_key_name) else: inner_kwargs[key_name] = None inner_kwargs["sys_taskflow_instance"] = instance_data # 记录instance_steps数据 step_name = flow_step_data["stepname"] json_data = json.dumps(inner_kwargs, cls=CustomJSONEncoder) worker_name = socket.gethostname() instance_step_id = taskflowdb.add_instance_step( flow_instance_id, step_num, step_name, json_data, worker_name, 'running', '') # 暂时关闭释放资源,因为连接串资源宝贵 taskflowdb.close() # 运行模块 result = True message = "" return_data = {} try: logging.info("----------run module: %s start----------" % module_name) ret = inner_method(**inner_kwargs) logging.info("----------run module: %s finish----------" % module_name) if ret is not None: if type(ret) is bool: result = ret elif type(ret) is tuple: len_ret = len(ret) if len_ret > 0: result = bool(ret[0]) if len_ret > 1: message = str(ret[1]) if len_ret > 2: return_data = dict(ret[2]) except: result = False message = traceback.format_exc() logging.error("run module err \n %s", message) exec_status = u'success' if result else u'fail' # 重新开启db资源 taskflowdb = TaskFlowDB() # 更新instance_steps 数据 taskflowdb.save_instance_step_status(instance_step_id, exec_status, message) # 处理执行结果 if result: # 执行成功 # 参数别名处理与运行数据保存 output_argment_alias = json.loads(flow_step_data["outputargalias"]) for key, value in return_data.items(): new_key_name = output_argment_alias.get(key, key) new_value = value if type(value) in [tuple, set]: new_value = list(value) if type(value) in [list, set, dict, tuple]: key_type = 'object' else: key_type = 'simple' if 'object' == key_type: new_value = json.dumps(new_value, cls=CustomJSONEncoder) taskflowdb.set_instance_run_data(flow_instance_id, key_type, new_key_name, new_value) # 是否整个流程结束 if step_num >= instance_data["stepcount"]: taskflowdb.save_instance_status(flow_instance_id, exec_status) else: # 下个步骤是否需要暂停 nextstep_waitseconds = int( flow_step_data["nextstep_waitseconds"]) curstepnum = step_num + 1 curstepruncount = 0 if nextstep_waitseconds == -1: exec_status = 'pause' next_runtime = datetime.datetime.now() else: exec_status = 'standby' # 加入 next step 延迟执行逻辑 next_runtime = datetime.datetime.now( ) + datetime.timedelta(seconds=nextstep_waitseconds) taskflowdb.save_instance_status(flow_instance_id, exec_status, curstepnum, curstepruncount, next_runtime) else: # 如果执行失败,则判断是否继续执行重试 failed_retrycounts = int(flow_step_data["failed_retrycounts"]) curstepruncount = int(instance_data["curstepruncount"]) + 1 # 如果不重试 或者 超过重试次数 if failed_retrycounts == 0 or curstepruncount > failed_retrycounts: taskflowdb.save_instance_status( flow_instance_id, exec_status, cur_step_runcount=curstepruncount) else: exec_status = 'standby' # 默认一分钟后重试 next_runtime = datetime.datetime.now() + datetime.timedelta( seconds=60) taskflowdb.save_instance_status( flow_instance_id, exec_status, cur_step_runcount=curstepruncount, next_runtime=next_runtime) except: logging.error("task run err \n %s", traceback.format_exc()) try: # remove running flow_instance_id redisdb = RedisDB() redisdb.remove_running_instance(flow_instance_id) redisdb.close() except: logging.error("task run remove redis running key err \n %s", traceback.format_exc())
def SMEMBERS(peer_id): while (1): KEY = "PPFC_{0}".format(peer_id) VALUE = RedisDB.get(KEY) if VALUE[1:6] != 'empty': break
logger.critical("API file '{}' is not found.".format(API_FILE)) sys.exit(1) except PermissionError: logger.critical("API file '{}' is not readable.".format(API_FILE)) sys.exit(1) # Redis backend for storing data redis_host = CONFIG.get("redis", {}).get("host", "127.0.0.1") redis_port = CONFIG.get("redis", {}).get("port", 6379) redis_db = CONFIG.get("redis", {}).get("db", 0) redis_prefix = CONFIG.get("redis", {}).get("data_prefix", "ipvisualizator") initial_users = CONFIG.get("users", []) try: db = RedisDB(host=redis_host, port=redis_port, db=redis_db, data_prefix=redis_prefix, initial_users=initial_users) except redis.exceptions.ConnectionError as error: logger.critical("Can't connect to redis {}:{}.".format( redis_host, redis_port)) sys.exit(1) # Expose app for WSGI applications application = app.app # Run Flask development server if __name__ == "__main__": app.run(port=CONFIG["app"]["port"])
def __init__(self, config = None): self.instanceConf = config self.redisDB = RedisDB(config=self.instanceConf) for url in config.START_URL: self.redisDB.Enqueue(url, depth=0, title='', trycount=0) self.startDownloader()
sys.path.append("../") from db_config import dmpMysqlConfig, dmpRedisConfig, bendiRedisConfig from mysqldb import MysqlDB from redisdb import RedisDB # import join import json import time import binascii import joblib environment = 'dev' # environment = 'prod' redis_config = dmpRedisConfig(environment) redis_config['db'] = 7 redis_conn = RedisDB(config=redis_config).getRedisConn() mysqlConfig = dmpMysqlConfig(environment) mysqlConfig['db'] = 'dmp_spider' mysql_conn = MysqlDB(config=mysqlConfig).getClient() cur = mysql_conn.cursor() def get_data(): """58data""" try: count = 0 url_list = [] sql = "select source_url from house_five8_spider where house_city=''" cur.execute(sql) data_list = cur.fetchall()