def do_doom(self, t, code, queue_at, deadline): logger = logging.getLogger('mwtm_cleaner') if queue_at != None and (deadline == None or deadline == 0 or \ deadline > queue_at): logger.debug('to retry task %s, queue at %s', t.uuid, queue_at) yield db_execute(RETRY_TASK, queue_at, code, t.id) #yield db_execute(RENEW_EVENT, t.uuid, 'retry') g_logger.info( trans2json(message="task_uuid:%s, " "site_asset_id:%s, deadline:%s, external_id:%s " % (t.uuid, t.site_asset_id, deadline, t.external_id), action="retry task")) else: logger.debug('to fail task %s', t.uuid) g_logger.info( trans2json(message="task_uuid:%s, " "site_asset_id:%s, external_id:%s" % (t.uuid, t.site_asset_id, t.external_id), action="to fail task")) rc, _ = yield db_query(CHECK_TASK, t.id) if rc <= 0: yield db_execute(FAIL_TASK, code, t.id) self.send_matches(t, unrecognized=True) task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid)) self.update_hbase_task(task_status) stats.incr(QUERY_FAILED, 1)
def checkParams(self, res): if res.has_key("params"): res = res['params'] self.parent_info = res['parent_info'] if \ res.has_key('parent_info') else [] self.match_type = res['match_type'] if\ res.has_key('match_type') else None self.matches = res['matches'] if\ res.has_key('matches') else [] self.extra_info = res['extra_info'] if\ res.has_key('extra_info') else None self.crr = res['notification'] if\ res.has_key('notification') else None self.url = res['extra_info_url'] if\ res.has_key('extra_info_url') else None if res.has_key('match_type') and res['match_type']!='match': self.matches = [] if not res.has_key('site_asset_id'): g_logger.info(trans2json(message='params is error , params has' ' no key site_asset_id', atcion='checkParams')) return False else : if res['site_asset_id'] == "" or res['site_asset_id'] == [] or \ not isinstance(res['site_asset_id'], list): g_logger.info(trans2json(message="params is error , site_asset_id='' or []", atcion='checkParams')) return False else: self.site_asset_id = res['site_asset_id'] return True
def process_task(self, body, message): self.logger.info('get task from broker :%s, type:%s', body, type(body)) g_logger.info(trans2json(message="get task from broker :" "%s"%str(body), action ="fetchTask")) stats.incr(FETCH_TASKS, 1) if isinstance(body, dict): body = json.dumps(body) else: body = json.loads(body) if self.checkParams(body): try: self.process(config, body) message.ack() stats.incr(FINISHED_TASKS, 1) except sotreError: message.ack() stats.incr(DROP_TASKS, 1) g_logger.error(trans2json(message= 'some unexpected thing' ' happen, maybe db error ', action = 'store task to db')) self.logger.error('some unexpected thing happen ,' 'Error:', exc_info=True ) else: self.logger.error("params is error: %s" , body) g_logger.error(trans2json(message= 'message from mq , params is error')) stats.incr(DROP_TASKS, 1) message.ack()
def process_task(self, body, message): self.logger.info('get task from broker :%s, type:%s', body, type(body)) g_logger.info( trans2json(message="get task from broker :" "%s" % str(body), action="fetchTask")) stats.incr(FETCH_TASKS, 1) if isinstance(body, dict): body = json.dumps(body) else: body = json.loads(body) if self.checkParams(body): try: self.process(config, body) message.ack() stats.incr(FINISHED_TASKS, 1) except sotreError: message.ack() stats.incr(DROP_TASKS, 1) g_logger.error( trans2json(message='some unexpected thing' ' happen, maybe db error ', action='store task to db')) self.logger.error('some unexpected thing happen ,' 'Error:', exc_info=True) else: self.logger.error("params is error: %s", body) g_logger.error( trans2json(message='message from mq , params is error')) stats.incr(DROP_TASKS, 1) message.ack()
def process_task(self, body, message): try: data = body #data = json.loads(body) utils.digest = data['params']['digest'] g_logger_info.info( trans2json("receive gateway task message %s" % (body), 'qb_push_receive_gateway')) #g_logger_info.info(trans2json("task_uuid:%s"%data['params']['external_id'])) gv.statsd_conn.incr( "thunder.querybroker.qbpush.receive_gateway_message", 1) result = self.check_input_params(data) if result[0] != 0: error_message = self.trans_error_json(result, data) g_logger.error(trans2json("response info %s" % error_message)) else: ret_code, result = query_hash(data) if ret_code is None: self.send_task_priority_escalator(data) message.ack() return except Exception: g_logger.error( trans2json("process_task errors happend %s" % str(traceback.format_exc()))) message.ack()
def main(): try: args = docopt.docopt(__doc__, version=gv.version) cfg_file = get_conf_abspath(args) check_conf_validation(cfg_file) cfg = parse_conf_file(cfg_file) init_logger(cfg) get_global_vars(cfg) init_mysql_keyword(cfg) init_statsd() init_redis() except: g_logger.error(traceback.format_exc()) sys.exit(1) gv.dp.start() while True: with Connection(gv.taskpriorit_url) as conn: try: worker = Worker(conn, gv.taskpriorit_exchange, gv.taskpriorit_queue, gv.taskpriorit_routing_key) worker.run() g_logger.info(trans2json('task priority escalator start')) except Exception: g_logger.error(trans2json("task priority escalator %s happend!" % str(traceback.format_exc()))) gv.dp.join()
def checkParams(self, res): if res.has_key("params"): res = res['params'] self.parent_info = res['parent_info'] if \ res.has_key('parent_info') else [] self.match_type = res['match_type'] if\ res.has_key('match_type') else None self.matches = res['matches'] if\ res.has_key('matches') else [] self.extra_info = res['extra_info'] if\ res.has_key('extra_info') else None self.crr = res['notification'] if\ res.has_key('notification') else None self.url = res['extra_info_url'] if\ res.has_key('extra_info_url') else None if res.has_key('match_type') and res['match_type'] != 'match': self.matches = [] if not res.has_key('site_asset_id'): g_logger.info( trans2json(message='params is error , params has' ' no key site_asset_id', atcion='checkParams')) return False else: if res['site_asset_id'] == "" or res['site_asset_id'] == [] or \ not isinstance(res['site_asset_id'], list): g_logger.info( trans2json( message="params is error , site_asset_id='' or []", atcion='checkParams')) return False else: self.site_asset_id = res['site_asset_id'] return True
def run(queue): pro = producer() push = pusher(pro) while True: t = queue.get(block=True) logger.info('get a task to push, task_id: %s', t) g_logger.info( trans2json(message='get a task to push, task_id: %s' % t, atcion='get task')) task = getTask(t) logger.info('------task:%s', task) try: re = push.getMatch(t) #logger.info('type:%s, %s', type(re), re) push.pushResult(re) updateFinished(t, 'success') dropUnpush(t) logger.info( 'succeed to push the match result , task_id: %s,' 'result: %s', t, re) g_logger.info( trans2json(message='succeed to push the match result ,' 'external_id :%s, task_id: %s' % ( task['i:external_id'], t, ), action='push result')) except resultError: logger.error("failed to get the result , task_id: %s", t) g_logger.error( trans2json(message='failed to push result,' 'external_id: %s, task_id: %s' % (task['i:external_id'], t), action='push result')) dropUnpush(t) updateFinished(t, 'failed') except AssertionError: logger.error("failed to get matches, task_id: %s", t) g_logger.error( trans2json(message='failed to push result,' 'external_id: %s, task_id: %s' % (task['i:external_id'], t), action='push result')) dropUnpush(t) updateFinished(t, 'failed') except: logger.error( 'failed to push result, rest status to new,' 'task_id: %s, Error:', t, exc_info=True) g_logger.error( trans2json(message='failed to push result, res status' 'to new, external_id: %s, task_id: %s ' % (task['i:external_id'], t), action='retry to push')) changeStatus(t, 'new') logger.error("reset status to new , task_id: %s", t)
def process(self, config, body): #db_txn(pool, partial(store), data, cre) self.logger.info('start to process message') task_id = None task = defaultdict(list) try: #self.process(config, task) req = requests.post(config['matches_server'] + '?source=init', data=json.dumps(body)) self.logger.info('get request :%s, type:%s', req.content, type(req.content)) res = json.loads(req.content) task_id = res['result']['task_id'] parseParams(body, task) if res['result']['source'] == 'auto_match': db_txn(pool, partial(updateStatus), task_id, task['site_asset_id'], task['external_id']) self.logger.info( 'this task has already been in hbase reset' ' status to new , site_asset_ids : %s', task['site_asset_id']) g_logger.info( trans2json( message='task has already been in' ' hbase, external_id:%s, site_asset_id:%s, task_id:%s' % ( task['external_id'], task['site_asset_id'], task_id, ), action="reset status to new")) else: genTask(task) task['task_uuid'] = task_id db_txn(pool, partial(storeTaskMysql), task) g_logger.info( trans2json(message="succeed to store task external_id:%s," " site_asset_id:%s, task_id:%s" % (task['external_id'], task['site_asset_id'], task['task_uuid']), action='store task to db')) except: self.logger.info( 'failed to store task, start to retry, task_uuid: %s' ' site_asset_id: %s', task_id, task['site_asset_id'], exc_info=True) g_logger.error( trans2json( message='failed to store task, start to' ' retry ,external_id:%s, site_asset_id: %s, task_id: %s' % (task['external_id'], task['site_asset_id'], task_id), action='store task to db')) raise sotreError
def connect_rabbitmq(mq_url, queue_name): try: exchange = Exchange(queue_name, type='fanout') queue = kombu.Queue(queue_name, exchange, routing_key=queue_name) #connection = Connection('amqp://*****:*****@localhost:5672//') g_logger.debug( trans2json('connect to %s, queue is %s' % (mq_url, queue_name))) connection = Connection(mq_url) return connection except Exception, msg: #cas_system_log('error', 'connect rabbitmq failed [%s]' %(msg)) g_logger.error(trans2json('connect rabbitmq failed [%s]' % (msg)))
def send_bttask_to_cas(self, rds_conn, data, url_hot): try: message = json.dumps(data) rds_conn.set_hot(message, url_hot) g_logger_info.info( trans2json("send to cas bt download task %s" % (data), "qb_pull_send_cas")) gv.statsd_conn.incr( "thunder.querybroker.qbpull.send_to_cas_download", 1) except Exception: g_logger.error( trans2json("send cas redis bt task errors happend %s" % str(traceback.format_exc())))
def upload_file(upload_path, file_path): ret = 0 err = '' try: args = "%s/swift upload '%s' '%s' " % ( gv.swith_path, upload_path, file_path) ret, _, err = popen(args) os.remove(file_path) g_logger.info(trans2json('upload file success')) except OSError: g_logger.error(trans2json( "delete or upload bt file %s error %s" % (file_path, traceback.format_exc()))) finally: return False if ret or err else True
def buf_tasks(self, reqs): accs = self.accounts() backends = self.backends() for t in reqs: try: self.logger.info("receivce task from picker, task_uuid: %s, " "site_asset_id: %s" % (t.uuid, t.site_asset_id)) self.logger.debug("receive task info:%s" % t._asdict()) g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="receive picked task")) if not self.task_check(t, accs, backends): self.reply(t) continue acc = accs[t.account]._asdict() acc["backends"] = [v._asdict() for v in acc["backends"]] backs = {} for k, v in backends.iteritems(): backs[k] = v._asdict() self.logger.debug("add task's account: %s, backends: %s" % (acc, backs)) ct = Task().query.delay(t._asdict(), acc, backs) self.taskm[ct.task_id]['celery_task'] = ct self.taskm[ct.task_id]['task'] = t self.tasks.add(ct) self.logger.info("add task to celery, task_uuid: %s, " "site_asset_id: %s, celery_uuid: %s " % \ (t.uuid, t.site_asset_id, ct.task_id)) g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="add task to celery")) except Exception, ex: self.reply(t) self.logger.error("catch exception from buf tasks, " "task_uuid: %s , site_asset_id: %s" % (t.uuid, t.site_asset_id), exc_info=True) continue try: db_txn(self.pool, partial(self.record, t)) except Exception: self.logger.error("failed to record execution for task %s" % t.uuid)
def query_vddb_async(req_hash, data): g_logger.debug(trans2json("query vddb async by hash %s" % str(req_hash))) mysystem = mysystem(gv.mysystem_user, gv.mysystem_passwd, gv.mysystem_url, False, MEDIA_REQ_TIMEOUT, g_logger) uuid = data['params']['external_id'] ret, status_listing = mysystem.query(req_hash, uuid) working_cnt = 0 copyrighted_cnt = 0 uncopyrighted_cnt = 0 status_cnt = len(status_listing) for status in status_listing: if status['status'] == STATUS_COPYRIGHTED: copyrighted_cnt += 1 if status['status'] == STATUS_UNCOPYRIGHTED: uncopyrighted_cnt += 1 if status['status'] == STATUS_WORKING: working_cnt += 1 # all can not check if ret == STATUS_UNDETECTED: ret_code = 2 return ret_code, status_listing if status_cnt > 0: if copyrighted_cnt == status_cnt or working_cnt == status_cnt or uncopyrighted_cnt == status_cnt: ret_code = 1 return ret_code, status_listing return 4, None
def task_finished(self, celery_id, query_res): try: t = self.taskm[celery_id]['task'] self.logger.info("finished query, task_id:%s, " "site_asset_id: %s, celery_id:%s, " "ret: %s, err: %s " % (t.uuid, t.site_asset_id, celery_id, query_res.ret, query_res.err)) self.logger.debug("task_id:%s, out: %s", t.uuid, query_res.out) self.logger.debug("finished task info: %s" % str(t)) #parse query result g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s " % (t.site_asset_id, t.uuid, t.external_id), action="task finished query from celery")) if not isinstance(query_res, TaskRes): #means catch some exception self.cleaner.request((t, BAD_OUTPUT, None)) else: _, state, res = self.parse_query_res(query_res) self.cleaner.request((t, state, res)) except Exception, ex: self.cleaner.request((t, BAD_OUTPUT, None)) self.logger.error("task finished catch unhandle exception, " "task_uuid:%s" % t.uuid, exc_info=True)
def send_matches(self, task, matches=[], crr="", unrecognized=False): match_type = "no_match" if unrecognized: match_type = "unrecognized" elif len(matches): match_type = 'match' data = dict(id="null", jsonrpc="2.0", method="matches", params=dict(matches=matches, site_asset_id=eval(task.site_asset_id), notification=crr, match_type=match_type)) params = dict(source="auto_match") req = None try: req = requests.post(self.matches_server, params=params, data=json.dumps(data)) if req.status_code != 200: self.logger.error("send matches failed, code:%s", req.status_code) raise SendMatchesError("send matches faild, task_id:%s" % task.uuid) except RequestException: self.logger.error("send matches failed, %s", task.uuid, exc_info=True) raise SendMatchesError("send matches faild") self.logger.info("send matches success, task_uuid:%s, site_asset_id:%s," "external_id:%s", task.uuid, task.site_asset_id, task.external_id) g_logger.info(trans2json(message="task_uuid:%s, " "site_asset_id:%s, external_id:%s " % \ (task.uuid, task.site_asset_id, task.external_id), action="send matches success"))
def load(self): ''' :return code `None`, `dict` `None`: no such task `{}`: in progress `{"keys1":"value1"...}`:normal results ''' cache = self.load_from_cache() if cache is None: if self.has_parent(): self.prefix_search = True g_logger.info( trans2json(message="site_asset_id:%s, " "prefix_search:%s" % (self.site_asset_id, self.prefix_search), action="no hit cache")) tids = self.get_tids() self.logger.debug("task_ids: %s" % tids) format_data = None if tids == []: format_data = wrap_error(NO_TASK_ERROR['code'], NO_TASK_ERROR['message'], []) else: format_data = self.format_matches(tids) self.save_cache(format_data) return format_data else: self.logger.debug("hit cache, site_asset_id:[%s], cache: %s" % (self.site_asset_id, cache)) return cache
def load(self): ''' :return code `None`, `dict` `None`: no such task `{}`: in progress `{"keys1":"value1"...}`:normal results ''' cache = self.load_from_cache() if cache is None: if self.has_parent(): self.prefix_search = True g_logger.info(trans2json(message="site_asset_id:%s, " "prefix_search:%s" % (self.site_asset_id, self.prefix_search), action="no hit cache")) tids = self.get_tids() self.logger.debug("task_ids: %s" % tids) format_data = None if tids == []: format_data = wrap_error(NO_TASK_ERROR['code'], NO_TASK_ERROR['message'], []) else: format_data = self.format_matches(tids) self.save_cache(format_data) return format_data else: self.logger.debug("hit cache, site_asset_id:[%s], cache: %s" % (self.site_asset_id, cache)) return cache
def finish(self, t, p, res): self.logger.info('to finish task, task_uuid:%s, site_asset_id:%s', t.uuid, t.site_asset_id) self.logger.debug("res:%s " % str(res)) assert res.matches != None code = WITHOUT_MATCH if len(res.matches) == 0 else WITH_MATCH if code == WITHOUT_MATCH: try: if db_txn(self.pool, partial(self.check_matches, t)): code = WITH_MATCH except: pass tr = 'match' if code == WITH_MATCH else 'no_match' self.logger.debug('record finished task %s, site_asset_id: %s', t.uuid, t.site_asset_id) try: ms = self.filter_matches(res.matches) for m in ms: g_logger.info( trans2json( message="company_id:%s, " "meta_uuid:%s, instance_uuid:%s, vddb_company_id:%s" % (t.account, m['meta_uuid'], m['instance_id'], m['company_id']), action='matches info')) mc = len(ms) #m = match_saver(self.hbase_pool, self.redis_conn, task_status, ms, res.crr) #m.save() self.send_matches(t, ms, res.crr) task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid)) self.update_hbase_task(task_status) db_txn(self.pool, partial(self.update_task, t, code, mc, tr)) except: self.logger.error('failed to finish task: %s, site_asset_id: %s' % (t.uuid, t.site_asset_id), exc_info=True) # dooming may succeed, as it touches fewer tables self.doom(t, INTERNAL, p, res) return g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="task query complete")) stats.incr(QUERY_SUCCESS, 1)
def upload_file(upload_path, file_path): ret = -1 err = '' try: args = "%s/swift upload '%s' '%s' " % ( gv.swith_path, upload_path, file_path) ret, _, err = popen(args) if ret == 0: g_logger.info(trans2json('upload file success')) os.remove(file_path) else: g_logger.error(trans2json('upload file failed, cmd is %s, reason is %s' % (args, err))) except: g_logger.error(trans2json( "delete or upload bt file %s error %s" % (file_path, traceback.format_exc()))) finally: return False if ret else True
def query_vddb_async(req_hash, data): g_logger.debug(trans2json("query vddb async by hash %s" % str(req_hash))) mysystem = mysystem(gv.mysystem_user, gv.mysystem_passwd, gv.mysystem_url, False,3, g_logger) uuid = data['params']['external_id'] ret, status_listing = mysystem.query(req_hash, uuid) return ret, status_listing '''
def download_file(swift_path, download_path): ret = 0 err = '' try: swift_path_list = swift_path.split('/') container = swift_path_list[0] far_name = swift_path_list[-1] swift_name = swift_path[len(container) + 1:len(swift_path)] download_far_name = os.path.join(download_path, far_name) args = "%s/swift download %s %s -o %s " % ( gv.swith_path, container, swift_name, download_far_name) ret, _, err = popen(args) g_logger.info(trans2json('download file success: %s' % args)) except Exception: g_logger.error(trans2json("download file error%s") % traceback.format_exc()) finally: return False if ret or err else True, download_far_name
def run(queue): pro = producer() push = pusher(pro) while True: t = queue.get(block=True) logger.info('get a task to push, task_id: %s', t) g_logger.info(trans2json(message ='get a task to push, task_id: %s'%t, atcion= 'get task')) task = getTask(t) logger.info('------task:%s', task) try: re = push.getMatch(t) #logger.info('type:%s, %s', type(re), re) push.pushResult(re) updateFinished(t, 'success') dropUnpush(t) logger.info('succeed to push the match result , task_id: %s,' 'result: %s', t, re) g_logger.info(trans2json(message='succeed to push the match result ,' 'external_id :%s, task_id: %s' %(task['i:external_id'], t, ), action='push result')) except resultError: logger.error("failed to get the result , task_id: %s", t) g_logger.error(trans2json(message ='failed to push result,' 'external_id: %s, task_id: %s' %(task['i:external_id'], t), action='push result')) dropUnpush(t) updateFinished(t, 'failed') except AssertionError: logger.error("failed to get matches, task_id: %s", t) g_logger.error(trans2json(message ='failed to push result,' 'external_id: %s, task_id: %s' %(task['i:external_id'], t), action='push result')) dropUnpush(t) updateFinished(t, 'failed') except : logger.error('failed to push result, rest status to new,' 'task_id: %s, Error:', t, exc_info=True) g_logger.error(trans2json(message ='failed to push result, res status' 'to new, external_id: %s, task_id: %s ' %(task['i:external_id'], t), action = 'retry to push')) changeStatus(t, 'new') logger.error("reset status to new , task_id: %s", t)
def POST(self): try: stats.incr(RESULT_INSERT, 1) web.header("Content-Type", "application/json") res = web.data() req = web.input() self.logger.info('input:%s', req) self.source = req.get("source", "") res = json.loads(res) self.logger.debug('get input :%s', req) self.logger.debug('get message :%s', res) g_logger.info( trans2json(message='get message ,input: %s, ' 'msg: %s' % (req, res), action='get resquest post')) if not self.checkParams(res): self.error_code = PARAMS_ERROR["code"] self.error_msg = PARAMS_ERROR["message"] self.error_data.append("site_asset_id") raise web.BadRequest( wrap_error(self.error_code, self.error_msg, self.error_data)) ins = insert(self.site_asset_id, self.source, self.match_type, self.matches, self.extra_info, self.crr, self.url, self.parent_info) tid, src = ins.store_result() g_logger.info( trans2json(message='reply to caller, task_id:%s,' 'source:%s' % (tid, src), action='reply to caller')) stats.incr(RESULT_INSERT_SUCCESS, 1) if self.source == 'init': return init_result(tid, src) else: return insert_result() except Exception: stats.incr(RESULT_INSERT_FAILED, 1) self.logger.error("va-interface catch unhandle exception", exc_info=True) self.error_code = INTERNAL_ERROR["code"] self.error_msg = INTERNAL_ERROR["message"] raise web.internalerror(message=wrap_error( self.error_code, self.error_msg, self.error_data))
def buf_tasks(self, reqs): accs = self.accounts() backends = self.backends() for t in reqs: try: self.logger.info("receivce task from picker, task_uuid: %s, " "site_asset_id: %s" % (t.uuid, t.site_asset_id)) self.logger.debug("receive task info:%s" % t._asdict()) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="receive picked task")) if not self.task_check(t, accs, backends): self.reply(t) continue acc = accs[t.account]._asdict() acc["backends"] = [v._asdict() for v in acc["backends"]] backs = {} for k, v in backends.iteritems(): backs[k] = v._asdict() self.logger.debug("add task's account: %s, backends: %s" % (acc, backs)) ct = Task().query.delay(t._asdict(), acc, backs) self.taskm[ct.task_id]['celery_task'] = ct self.taskm[ct.task_id]['task'] = t self.tasks.add(ct) self.logger.info("add task to celery, task_uuid: %s, " "site_asset_id: %s, celery_uuid: %s " % \ (t.uuid, t.site_asset_id, ct.task_id)) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="add task to celery")) except Exception, ex: self.reply(t) self.logger.error("catch exception from buf tasks, " "task_uuid: %s , site_asset_id: %s" % (t.uuid, t.site_asset_id), exc_info=True) continue try: db_txn(self.pool, partial(self.record, t)) except Exception: self.logger.error("failed to record execution for task %s" % t.uuid)
def process_task(self, body, message): try: data = json.loads(body) utils.digest = data['params']['digest'] g_logger_info.info( trans2json("receive CAS finsh message %s" % (data), "qb_pull_receive_cas")) gv.statsd_conn.incr( "thunder.querybroker.qbpull.receive_cas_finish_message", 1) result = self.check_input_params(data) if result[0] != 0: error_message = self.trans_error_json(result, data) message.ack() g_logger.error(trans2json("response info %s" % error_message)) return else: # check error code error_code = int(data['params']['error_code']) # download correct if error_code == gv.DOWNLOAD_SUCCESS: # write to redis url/bt hash and dna hash #writeHashToRedis(data) # get query hash result if data['params'].has_key('files'): self.parse_and_send_vddbmessage(data) message.ack() return else: self.parse_and_send_casmessage(data) message.ack() return else: # download error if error_code in gv.UNRECOGNIZED_ERROR_LIST: post_to_vddbdnaerror(data, error_code, '') message.ack() return except Exception: message.ack() g_logger.error( trans2json("worker_query errors happend %s" % str(traceback.format_exc())))
def upload_file(upload_path, file_path): ret = -1 err = '' try: args = "%s/swift upload '%s' '%s' " % (gv.swith_path, upload_path, file_path) ret, _, err = popen(args) if ret == 0: g_logger.info(trans2json('upload file success')) os.remove(file_path) else: g_logger.error( trans2json('upload file failed, cmd is %s, reason is %s' % (args, err))) except: g_logger.error( trans2json("delete or upload bt file %s error %s" % (file_path, traceback.format_exc()))) finally: return False if ret else True
def pick(self, t): self.picked[t.account].add(t.id) if t.account in self.fetching: self.fetching[t.account].add(t.id) self.manager.request(t) self.logger.info('picked task %s for account %s, site_asset_id %s', t.uuid, t.account, t.site_asset_id) g_logger.info(trans2json(action="picked task to query", message="task_uuid: %s, site_asset_id: %s, external_id: %s" % \ (t.uuid, t.site_asset_id, t.external_id))) stats.incr(PICKED, 1)
def send_task_pushresult(self, data): message = json.dumps(data) g_logger_info.info(trans2json("send to push result message %s" % (data),"qb_push_push_result")) gv.statsd_conn.incr("thunder.querybroker.qbpush.send_qbresultpush", 1) with producers[self.pushresult_connection].acquire(block=True) as producer: producer.publish(message, serializer='json', compression='bzip2', exchange=self.pushresult_exchange, declare=[self.pushresult_exchange], routing_key=gv.pushresult_routing_key)
def post_to_vddbdnaerror(data, code, dna_hash): files_size_len = 0 if data['params'].has_key('files'): files_size_len = len(data['params']['files']) message = {} message['jsonrpc'] = '2.0' message['method'] = 'insert' message['id'] = 'null' message['params'] = {} message['params']['site_asset_id'] = [] if code in gv.UNRECOGNIZED_ERROR_LIST: message['params']['match_type'] = 'unrecognized' elif code in gv.NOMATCH_ERROR_LIST: message['params']['match_type'] = 'no_match' set_parent_info(data, message) if (data['params'].has_key('seed_file') and files_size_len > 1) or files_size_len > 1: if data['params'].has_key('seed_file'): if data['params']['seed_file']['hash'] != None and data['params'][ 'seed_file']['hash'] != '': message['params']['site_asset_id'].append( data['params']['seed_file']['hash'] + '-' + dna_hash) if data['params'].has_key('url'): if data['params']['url']['hash'] != None and data['params']['url'][ 'hash'] != '': message['params']['site_asset_id'].append( data['params']['url']['hash'] + '-' + dna_hash) if data['params']['thunder_hash'] != None and data['params'][ 'thunder_hash'] != '': message['params']['site_asset_id'].append( data['params']['thunder_hash'] + '-' + dna_hash) else: if data['params'].has_key('seed_file'): if data['params']['seed_file']['hash'] != None and data['params'][ 'seed_file']['hash'] != '': message['params']['site_asset_id'].append( data['params']['seed_file']['hash']) if data['params'].has_key('url'): if data['params']['url']['hash'] != None and data['params']['url'][ 'hash'] != '': message['params']['site_asset_id'].append( data['params']['url']['hash']) if data['params']['thunder_hash'] != None and data['params'][ 'thunder_hash'] != '': message['params']['site_asset_id'].append( data['params']['thunder_hash']) if dna_hash != '': message['params']['site_asset_id'].append(dna_hash) header = {"Content-Type": "application/json"} conn = httplib.HTTPConnection(gv.mysystem_host, int(gv.mysystem_port)) g_logger_info.info(trans2json("post %s to vddb-async matches" % message)) conn.request('POST', "/vddb-async/matches?source=auto_match", json.dumps(message), header)
def main(): args = docopt.docopt(__doc__, version=gv.version) cfg_file = get_conf_abspath(args) check_conf_validation(cfg_file) cfg = parse_conf_file(cfg_file) init_logger(cfg) get_global_vars(cfg) gv.dp.start() init_statsd() while True: with Connection(gv.qb_url) as conn: try: worker = Worker(conn, gv.qb_exchange, gv.qb_queue, gv.qb_routing_key) g_logger.info(trans2json('query_broker qb_push service start')) worker.run() except Exception: g_logger.error( trans2json("qb_push %s happend!" % str(traceback.format_exc()))) gv.dp.join()
def GET(self): try: stats.incr(RECEIVE_REQUEST, 1) web.header("Content-Type", "application/json") req = web.input() self.site_asset_id = str(req.get('site_asset_id', "")) if req.get('all_matches', 'true').lower() == 'false': self.all_matches = False if self.site_asset_id == "": self.error_code = PARAMS_ERROR["code"] self.error_msg = PARAMS_ERROR["message"] self.error_data.append("site_asset_id") raise web.BadRequest( wrap_error(self.error_code, self.error_msg, self.error_data)) self.logger.info( "get history matches, site_asset_id: %s, " "all_matches: %s", self.site_asset_id, self.all_matches) g_logger.info(trans2json(message="site_asset_id: %s, " "all_matches: %s" % \ (self.site_asset_id, self.all_matches), action="get history matches")) mr = match_result(self.site_asset_id, self.all_matches) res = mr.load() self.logger.debug( "site_asset_id: %s," "all_matches: %s, history matches: %s", self.site_asset_id, self.all_matches, res) if res == None: g_logger.info( trans2json(message="site_asset_id:%s, " "all_matches: %s" % (self.site_asset_id, self.all_matches), action="no such task")) return res except web.BadRequest, e: stats.incr(REQUEST_ERROR, 1) self.logger.error("site_asset_id is null") raise e
def check_input_params(self, data): method = "finish_task" result = [0, 0] if not data.has_key('jsonrpc'): result[0] = JSONRPC_ERROR result[1] = "There is no key named jsonrpc" g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data.has_key('method') or data['method'] != method: result[0] = METHOD_ERROR result[ 1] = "There is no key named method or method is not " + method g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data.has_key('params'): result[0] = PARAMS_ERROR result[1] = "There is no key named params" g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data['params']['additional_info'].has_key( 'client_id' ) or data['params']['additional_info']['client_id'] == '': result[0] = CLIENT_ID_ERROR result[ 1] = "There is no key named client_id in params or client_id is null " g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data['params'].has_key('error_code'): result[0] = ERROR_CODE_ERROR result[1] = "There is no key named error_code" g_logger.error( trans2json("input params check failed: %s" % result[1])) else: g_logger.info(trans2json("----Params check done.----")) return result
def finish(self, t, p, res): self.logger.info('to finish task, task_uuid:%s, site_asset_id:%s', t.uuid, t.site_asset_id) self.logger.debug("res:%s " % str(res)) assert res.matches != None code = WITHOUT_MATCH if len(res.matches) == 0 else WITH_MATCH if code == WITHOUT_MATCH: try: if db_txn(self.pool, partial(self.check_matches, t)): code = WITH_MATCH except: pass tr = 'match' if code == WITH_MATCH else 'no_match' self.logger.debug('record finished task %s, site_asset_id: %s', t.uuid, t.site_asset_id) try: ms = self.filter_matches(res.matches) for m in ms: g_logger.info(trans2json(message="company_id:%s, " "meta_uuid:%s, instance_uuid:%s, vddb_company_id:%s" % (t.account, m['meta_uuid'], m['instance_id'], m['company_id']), action='matches info')) mc = len(ms) #m = match_saver(self.hbase_pool, self.redis_conn, task_status, ms, res.crr) #m.save() self.send_matches(t, ms, res.crr) task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid)) self.update_hbase_task(task_status) db_txn(self.pool, partial(self.update_task, t, code, mc, tr)) except: self.logger.error('failed to finish task: %s, site_asset_id: %s' % (t.uuid, t.site_asset_id), exc_info=True) # dooming may succeed, as it touches fewer tables self.doom(t, INTERNAL, p, res) return g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="task query complete")) stats.incr(QUERY_SUCCESS, 1)
def store_result(self): task_id = None source = None res = self.get_sid_info() if res: source = res['t:source'] self.logger.info('start update res:%s', res) task_id = res['t:task_uuid'] self.logger.info('task_id:%s', task_id) if self.source == 'manual'\ or self.source == 'auto_match' \ or self.source == 'init'\ or (res['t:source'] == 'manual_tmp'and self.source =='manual_tmp')\ or (self.source == 'manual_tmp' and res['t:source'] ==\ 'auto_match' and not self.check_finished(task_id)): self.logger.info('source:%s, type:%s, check_finished:%s', self.source, type(self.source), self.check_finished(task_id)) self.update_result(self.source, task_id, self.match_type, self.matches, self.extra_info, self.url, self.site_asset_id) if not self.has_parent(self.site_asset_id): self.save_redis(task_id, self.source, self.match_type) g_logger.info( trans2json(message='succeed to update result,' 'task_id:%s, match_type:%s, source:%s' % (task_id, self.source, self.match_type), atcion='update result')) else: self.logger.info('start to store new task') task_id = str(uuid.uuid1()) self.store_sid_tid(self.site_asset_id, task_id, self.source) self.store_tid_sid(task_id, self.site_asset_id) self.save_matches(self.match_type, task_id, self.matches) self.store_task_info(task_id, self.extra_info, self.url) self.logger.info('succeed to store task: %s, task_id %s', self.site_asset_id, task_id) #storeHbasempl('unpush', str(task_d), {'u:match_type':'new'}) #storeFinished('finished', ) #self.store_unpush(task_id, 'new') self.save_redis(task_id, self.source, self.match_type) #if self.source !='init' and self.source != 'manual_tmp': # self.store_unpush(task_id, 'new') if self.parent_info: for i in self.parent_info: for k, v in i.items(): redis_conn.delete(k + "-s") self.store_parent_task(k, v) self.record_result_statsd() return task_id, source
def send_task_priority_escalator(self, data): data['params']['downloader_time'] = 0 data['params']['downloader_retry'] = 0 message = json.dumps(data) g_logger_info.info(trans2json("send to send task priority escalator message %s" % (data),"qb_push_send_priority")) gv.statsd_conn.incr("thunder.querybroker.qbpush.send_qbpriority", 1) with producers[self.taskpriority_connection].acquire(block=True) as producer: producer.publish(message, serializer='json', compression='bzip2', exchange=self.taskpriority_exchange, declare=[self.taskpriority_exchange], routing_key=gv.taskpriorit_routing_key)
def process_task(self, body, message): try: data = body #data = json.loads(body) utils.digest = data['params']['digest'] g_logger_info.info(trans2json("receive gateway task message %s" % (body),'qb_push_receive_gateway')) #g_logger_info.info(trans2json("task_uuid:%s"%data['params']['external_id'])) gv.statsd_conn.incr("thunder.querybroker.qbpush.receive_gateway_message", 1) result = self.check_input_params(data) if result[0] != 0: error_message = self.trans_error_json(result, data) g_logger.error(trans2json("response info %s" % error_message)) else: ret_code, result = query_hash(data) if ret_code is None: self.send_task_priority_escalator(data) message.ack() return except Exception: g_logger.error( trans2json("process_task errors happend %s" % str(traceback.format_exc()))) message.ack()
def process(self, config, body): #db_txn(pool, partial(store), data, cre) self.logger.info('start to process message') task_id = None task = defaultdict(list) try : #self.process(config, task) req =requests.post(config['matches_server']+'?source=init', data= json.dumps(body)) self.logger.info('get request :%s, type:%s', req.content, type(req.content)) res = json.loads(req.content) task_id = res['result']['task_id'] parseParams(body, task) if res['result']['source'] == 'auto_match': db_txn(pool, partial(updateStatus), task_id, task['site_asset_id'], task['external_id']) self.logger.info('this task has already been in hbase reset' ' status to new , site_asset_ids : %s',task['site_asset_id']) g_logger.info(trans2json(message='task has already been in' ' hbase, external_id:%s, site_asset_id:%s, task_id:%s'% (task['external_id'], task['site_asset_id'], task_id, ), action = "reset status to new")) else: genTask(task) task['task_uuid'] = task_id db_txn(pool, partial(storeTaskMysql), task) g_logger.info(trans2json(message="succeed to store task external_id:%s," " site_asset_id:%s, task_id:%s"%(task['external_id'], task['site_asset_id'], task['task_uuid']), action = 'store task to db')) except : self.logger.info('failed to store task, start to retry, task_uuid: %s' ' site_asset_id: %s', task_id, task['site_asset_id'], exc_info=True ) g_logger.error(trans2json(message='failed to store task, start to' ' retry ,external_id:%s, site_asset_id: %s, task_id: %s' %(task['external_id'], task['site_asset_id'], task_id), action='store task to db')) raise sotreError
def send_task_pushresult(self, data): message = json.dumps(data) g_logger_info.info( trans2json("send to push result message %s" % (data), "qb_push_push_result")) gv.statsd_conn.incr("thunder.querybroker.qbpush.send_qbresultpush", 1) with producers[self.pushresult_connection].acquire( block=True) as producer: producer.publish(message, serializer='json', compression='bzip2', exchange=self.pushresult_exchange, declare=[self.pushresult_exchange], routing_key=gv.pushresult_routing_key)
def do_doom(self, t, code, queue_at, deadline): logger = logging.getLogger('mwtm_cleaner') if queue_at != None and (deadline == None or deadline == 0 or \ deadline > queue_at): logger.debug('to retry task %s, queue at %s', t.uuid, queue_at) yield db_execute(RETRY_TASK, queue_at, code, t.id) #yield db_execute(RENEW_EVENT, t.uuid, 'retry') g_logger.info(trans2json(message="task_uuid:%s, " "site_asset_id:%s, deadline:%s, external_id:%s " % (t.uuid, t.site_asset_id, deadline, t.external_id), action="retry task")) else: logger.debug('to fail task %s', t.uuid) g_logger.info(trans2json(message="task_uuid:%s, " "site_asset_id:%s, external_id:%s" % (t.uuid, t.site_asset_id, t.external_id), action="to fail task")) rc, _ = yield db_query(CHECK_TASK, t.id) if rc <= 0: yield db_execute(FAIL_TASK, code, t.id) self.send_matches(t, unrecognized=True) task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid)) self.update_hbase_task(task_status) stats.incr(QUERY_FAILED, 1)
def main(): args = docopt.docopt(__doc__, version=gv.version) cfg_file = get_conf_abspath(args) check_conf_validation(cfg_file) cfg = parse_conf_file(cfg_file) init_logger(cfg) get_global_vars(cfg) init_statsd() gv.dp.start() thread_tasker = fetch_query_result() thread_tasker.start() while True: with Connection(gv.finsh_url) as conn: try: worker = Worker_query(conn) g_logger.info( trans2json("start exceptionhandle and query service")) worker.run() except Exception: g_logger.error( trans2json("error happend! %s" % str(traceback.format_exc()))) thread_tasker.join() gv.dp.join()
def send_task_to_vddb(self, data): message = json.dumps(data) #message = data g_logger_info.info( trans2json("send to vddb async query %s" % (data), "qb_pull_send_vddb")) gv.statsd_conn.incr("thunder.querybroker.qbpull.send_to_vddb_query", 1) with producers[self.vddb_connection].acquire(block=True) as producer: producer.publish(message, serializer='json', compression='bzip2', exchange=self.vddb_exchange, declare=[self.vddb_exchange], routing_key=gv.vddb_queryrouting_key)
def POST(self): try: stats.incr(RESULT_INSERT, 1) web.header("Content-Type", "application/json") res = web.data() req = web.input() self.logger.info('input:%s', req) self.source = req.get("source","") res = json.loads(res) self.logger.debug('get input :%s', req) self.logger.debug('get message :%s', res) g_logger.info(trans2json(message='get message ,input: %s, ' 'msg: %s'%(req, res), action='get resquest post')) if not self.checkParams(res): self.error_code = PARAMS_ERROR["code"] self.error_msg = PARAMS_ERROR["message"] self.error_data.append("site_asset_id") raise web.BadRequest(wrap_error(self.error_code, self.error_msg, self.error_data)) ins = insert(self.site_asset_id, self.source, self.match_type, self.matches, self.extra_info, self.crr, self.url, self.parent_info) tid, src = ins.store_result() g_logger.info(trans2json(message='reply to caller, task_id:%s,' 'source:%s'%(tid, src), action='reply to caller')) stats.incr(RESULT_INSERT_SUCCESS, 1) if self.source == 'init': return init_result(tid, src) else: return insert_result() except Exception: stats.incr(RESULT_INSERT_FAILED, 1) self.logger.error("va-interface catch unhandle exception", exc_info=True) self.error_code = INTERNAL_ERROR["code"] self.error_msg = INTERNAL_ERROR["message"] raise web.internalerror(message=wrap_error(self.error_code, self.error_msg, self.error_data))
def GET(self): try: stats.incr(RECEIVE_REQUEST, 1) web.header("Content-Type", "application/json") req = web.input() self.site_asset_id = str(req.get('site_asset_id', "")) if req.get('all_matches', 'true').lower() == 'false': self.all_matches = False if self.site_asset_id == "": self.error_code = PARAMS_ERROR["code"] self.error_msg = PARAMS_ERROR["message"] self.error_data.append("site_asset_id") raise web.BadRequest(wrap_error(self.error_code, self.error_msg, self.error_data)) self.logger.info("get history matches, site_asset_id: %s, " "all_matches: %s", self.site_asset_id, self.all_matches) g_logger.info(trans2json(message="site_asset_id: %s, " "all_matches: %s" % \ (self.site_asset_id, self.all_matches), action="get history matches")) mr = match_result(self.site_asset_id, self.all_matches) res = mr.load() self.logger.debug("site_asset_id: %s," "all_matches: %s, history matches: %s", self.site_asset_id, self.all_matches, res) if res == None: g_logger.info(trans2json(message="site_asset_id:%s, " "all_matches: %s" % (self.site_asset_id, self.all_matches), action="no such task")) return res except web.BadRequest, e: stats.incr(REQUEST_ERROR, 1) self.logger.error("site_asset_id is null") raise e
def store_result(self): task_id = None source = None res = self.get_sid_info() if res: source = res['t:source'] self.logger.info('start update res:%s', res) task_id = res['t:task_uuid'] self.logger.info('task_id:%s', task_id) if self.source == 'manual'\ or self.source == 'auto_match' \ or self.source == 'init'\ or (res['t:source'] == 'manual_tmp'and self.source =='manual_tmp')\ or (self.source == 'manual_tmp' and res['t:source'] ==\ 'auto_match' and not self.check_finished(task_id)): self.logger.info('source:%s, type:%s, check_finished:%s', self.source, type(self.source), self.check_finished(task_id)) self.update_result(self.source, task_id, self.match_type, self.matches, self.extra_info, self.url, self.site_asset_id) if not self.has_parent(self.site_asset_id): self.save_redis(task_id, self.source, self.match_type) g_logger.info(trans2json(message='succeed to update result,' 'task_id:%s, match_type:%s, source:%s'%(task_id, self.source, self.match_type), atcion='update result')) else: self.logger.info('start to store new task') task_id = str(uuid.uuid1()) self.store_sid_tid(self.site_asset_id, task_id, self.source) self.store_tid_sid(task_id, self.site_asset_id) self.save_matches(self.match_type, task_id, self.matches) self.store_task_info(task_id, self.extra_info, self.url) self.logger.info('succeed to store task: %s, task_id %s', self.site_asset_id, task_id) #storeHbasempl('unpush', str(task_d), {'u:match_type':'new'}) #storeFinished('finished', ) #self.store_unpush(task_id, 'new') self.save_redis(task_id, self.source, self.match_type) #if self.source !='init' and self.source != 'manual_tmp': # self.store_unpush(task_id, 'new') if self.parent_info: for i in self.parent_info: for k, v in i.items(): redis_conn.delete(k+"-s") self.store_parent_task(k, v) self.record_result_statsd() return task_id, source
def send_task_priority_escalator(self, data): data['params']['downloader_time'] = 0 data['params']['downloader_retry'] = 0 message = json.dumps(data) g_logger_info.info( trans2json( "send to send task priority escalator message %s" % (data), "qb_push_send_priority")) gv.statsd_conn.incr("thunder.querybroker.qbpush.send_qbpriority", 1) with producers[self.taskpriority_connection].acquire( block=True) as producer: producer.publish(message, serializer='json', compression='bzip2', exchange=self.taskpriority_exchange, declare=[self.taskpriority_exchange], routing_key=gv.taskpriorit_routing_key)
def check_input_params(self, data): method = "submit_task" result = [0, 0] if not data.has_key('jsonrpc'): result[0] = 121201 result[1] = "There is no key named jsonrpc" g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data.has_key('method') or data['method'] != method: result[0] = 121202 result[ 1] = "There is no key named method or method is not " + method g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data.has_key('params'): result[0] = 121203 result[1] = "There is no key named params" g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data['params'].has_key('additional_info'): result[0] = 121204 result[ 1] = "There is no key named client_id in params or client_id is null " g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data['params'].has_key( 'external_id') or data['params']['external_id'] == '': result[0] = 121210 result[ 1] = "There is no key named external_id or external_id is null" g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data['params']['thunder_hash']: if data['params'].has_key('url'): if not data['params']['url']['hash']: if data['params'].has_key('seed_file'): if not data['params']['seed_file']['hash']: result[0] = 121211 result[1] = "There is no hash in params" g_logger.error( trans2json("input params check failed: %s" % result[1])) else: g_logger.debug(trans2json("----Params check done.----")) return result
def post_to_vddbdnaerror(data, code, dna_hash): files_size_len = 0 if data['params'].has_key('files'): files_size_len = len(data['params']['files']) message = {} message['jsonrpc'] = '2.0' message['method'] = 'insert' message['id'] = 'null' message['params'] = {} message['params']['site_asset_id'] = [] if code in gv.UNRECOGNIZED_ERROR_LIST: message['params']['match_type'] = 'unrecognized' elif code in gv.NOMATCH_ERROR_LIST: message['params']['match_type'] = 'no_match' set_parent_info(data,message) if (data['params'].has_key('seed_file') and files_size_len >1) or files_size_len > 1: if data['params'].has_key('seed_file'): if data['params']['seed_file']['hash'] != None and data['params']['seed_file']['hash'] != '': message['params']['site_asset_id'].append(data['params']['seed_file']['hash'] + '-'+dna_hash) if data['params'].has_key('url'): if data['params']['url']['hash'] != None and data['params']['url']['hash'] != '': message['params']['site_asset_id'].append(data['params']['url']['hash'] + '-' + dna_hash) if data['params']['thunder_hash'] != None and data['params']['thunder_hash'] != '': message['params']['site_asset_id'].append(data['params']['thunder_hash']+'-'+dna_hash) else: if data['params'].has_key('seed_file'): if data['params']['seed_file']['hash'] != None and data['params']['seed_file']['hash'] != '': message['params']['site_asset_id'].append(data['params']['seed_file']['hash']) if data['params'].has_key('url'): if data['params']['url']['hash'] != None and data['params']['url']['hash'] != '': message['params']['site_asset_id'].append(data['params']['url']['hash']) if data['params']['thunder_hash'] != None and data['params']['thunder_hash'] != '': message['params']['site_asset_id'].append(data['params']['thunder_hash']) if dna_hash != '': message['params']['site_asset_id'].append(dna_hash) header = {"Content-Type": "application/json"} conn = httplib.HTTPConnection(gv.mysystem_host, int(gv.mysystem_port)) g_logger_info.info(trans2json("post %s to vddb-async matches" % message)) conn.request('POST', "/vddb-async/matches?source=auto_match", json.dumps(message), header)
def check_input_params(self, data): method = "submit_task" result = [0, 0] if not data.has_key('jsonrpc'): result[0] = 121201 result[1] = "There is no key named jsonrpc" g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data.has_key('method') or data['method'] != method: result[0] = 121202 result[ 1] = "There is no key named method or method is not " + method g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data.has_key('params'): result[0] = 121203 result[1] = "There is no key named params" g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data['params'].has_key('additional_info'): result[0] = 121204 result[ 1] = "There is no key named client_id in params or client_id is null " g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data['params'].has_key('external_id') or data['params']['external_id'] == '': result[0] = 121210 result[1] = "There is no key named external_id or external_id is null" g_logger.error( trans2json("input params check failed: %s" % result[1])) elif not data['params']['thunder_hash']: if data['params'].has_key('url'): if not data['params']['url']['hash']: if data['params'].has_key('seed_file'): if not data['params']['seed_file']['hash']: result[0] = 121211 result[1] = "There is no hash in params" g_logger.error(trans2json("input params check failed: %s" % result[1])) else: g_logger.debug(trans2json("----Params check done.----")) return result
def task_finished(self, celery_id, query_res): try: t = self.taskm[celery_id]['task'] self.logger.info("finished query, task_id:%s, " "site_asset_id: %s, celery_id:%s, " "ret: %s, err: %s " % (t.uuid, t.site_asset_id, celery_id, query_res.ret, query_res.err)) self.logger.debug("task_id:%s, out: %s", t.uuid, query_res.out) self.logger.debug("finished task info: %s" % str(t)) #parse query result g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s " % (t.site_asset_id, t.uuid, t.external_id), action="task finished query from celery")) if not isinstance(query_res, TaskRes): #means catch some exception self.cleaner.request((t, BAD_OUTPUT, None)) else: _, state, res = self.parse_query_res(query_res) self.cleaner.request((t, state, res)) except Exception, ex: self.cleaner.request((t, BAD_OUTPUT, None)) self.logger.error("task finished catch unhandle exception, " "task_uuid:%s" % t.uuid, exc_info=True)
def writeHashToRedis(data): key_hash = '' if data['params'].has_key('files'): if len(data['params']['files']) > 1: if data['params'].has_key('url'): key_hash = data['params']['url']['hash'] else: ret_code, bt_file_name = download_file( data['params']['seed_file']['path'], gv.file_tmpdir) if ret_code == True: seed_file_content = '' with open(bt_file_name, 'r') as fp: seed_file_content = fp.read() seed_file_hash = Hash( filename=bt_file_name, content=seed_file_content).value data['params']['seed_file']['hash'] = seed_file_hash key_hash = seed_file_hash try: os.remove(bt_file_name) except OSError: g_logger.error(trans2json( "delete bt file %s error %s" % (bt_file_name, traceback.format_exc()))) for i in data['params']['files']: key = "%s#%s" % ( data['params']['additional_info']['client_id'], key_hash) dna_hash = {} code = i['code'] if code == GENERATE_SUCESS: dna_hash[i['hash']] = None dna_hash['file_path'] = i['file_path'] elif code in (NOT_COPYWRITE, FILTERING): dna_hash[i['hash']] = 0 dna_hash['file_path'] = i['file_path'] elif code == GENERATE_FAILED: dna_hash[i['hash']] = 3 dna_hash['file_path'] = i['file_path'] writesetredis(gv.rds_conn, key, dna_hash)
def query_hash(data): result_hash_list = [] start_time=time.time() if data['params'].has_key('url'): if data['params']['url']['hash'] != None and data['params']['url']['hash'] != '': ret_code, result = query_vddb_async( data['params']['url']['hash'], data) if ret_code == 1: end_time = time.time() #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000) return ret_code, result result_hash_list.append((ret_code, result)) if data['params']['thunder_hash'] != None and data['params']['thunder_hash'] != '': ret_code, result = query_vddb_async( data['params']['thunder_hash'], data) if ret_code == 1: end_time = time.time() #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000) return ret_code, result result_hash_list.append((ret_code, result)) if data['params'].has_key('seed_file'): seed_file_hash = '' if data['params']['seed_file']['hash'] != '': seed_file_hash = data['params']['seed_file']['hash'] else: ret_code, bt_file_name = download_file( data['params']['seed_file']['path'], gv.file_tmpdir) if ret_code: client_id = data['params']['additional_info']['client_id'] with open(bt_file_name, 'rb') as fp: seed_file_content = fp.read() seed_file_hash = Hash( filename=bt_file_name, content=seed_file_content).value data['params']['seed_file']['hash'] = seed_file_hash try: os.remove(bt_file_name) except OSError: g_logger.error(trans2json( "delete bt file %s error %s" % (bt_file_name, traceback.format_exc()))) ret_code, result = query_vddb_async(seed_file_hash, data) if ret_code == 1: end_time = time.time() #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000) return ret_code, result result_hash_list.append((ret_code, result)) if data['params'].has_key('files'): hash_list = [] data_list = [] for i in data['params']['files']: dna_hash = i['hash'] hash_list.append(dna_hash) data_list.append(data) result_list = map(query_vddb_async, hash_list, data_list) for i in range(len(result_list)): if result_list[i][0] == 1: end_time = time.time() #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000) return result_list[i][0], result_list[i][1] end_time = time.time() #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000) return 3, None
def run(self): g_logger.info(trans2json("start fetch query result service")) self.th.run()
def query(t, account, backends): #account {..., backends:[{'extra':, 'account':, 'backend':}]} #backends {id:{'capacity':, 'id':, 'vddb_address':}} logger = logging.getLogger("mw_celery_task") try: logger.info("receive task to query: task_uuid: %s, " "site_asset_id: %s" % (t['uuid'], t['site_asset_id'])) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="get task from celery")) urls = [] for b in account['backends']: be = backends[b['backend']]#{'capacity':, 'id':, 'vddb_address':} opts = dict(level=b['level'], mode=b['mode'], extra=b['extra']) if account['hot'] == 'true': opts['hot_user'] = account['hot_user'] opts['hot_pass'] = account['hot_pass'] # if account['slicing'] == 'true': # opts['slice'] = p['slice_duration'] qs = urlencode(opts.items()) urls.append('vdna://%s:%s@%s/?%s' % (account['backend_user'], account['backend_pass'], be['vddb_address'], qs)) args = ['/'.join([os.getenv('MW_HOME'), 'lib', 'task_executor.py']), '--task', str(t['id']), '--task-uuid', t['uuid'], '--timestamp', str(t['created_at']), '--account', str(account['id']), '--site-domain', account['domain'], '--site-asset-id', t['site_asset_id'], '--clip-format', t['format'], '--dna-url', t['dna_url']] if account['slicing'] == 'true': args.extend(['--slice-duration', str(account['slice_duration'])]) if account['allow_partial'] == 'false': args.append('--fail-partial') # reverse query ingestion triggering is done by reverse_trigger.py # if account['do_reverse'] == 'true': # args.extend('--reverse-query') for u in urls: args.extend(['--backend', u]) if t['scope'] != None: for s in t['scope']: args.extend(['--scope', s]) for r in account['rules']: args.append('--' + r) except Exception: logger.error("generate command line failed, " "uuid: %s, site_asset_id: %s" % \ (t['uuid'], t['site_asset_id']), exc_info=True) logger.debug("task: %s, account: %s, backends: %s" % \ (t, account, backends)) raise TaskException(err="query failed, generate execute cmd failed") else: # bufsize=-1 usually means fully buffer the output, usually, ugh # please contact [email protected] if stdout is blocked proc = None try: start_time = time.time() proc = Popen(args, close_fds=True, stdout=PIPE, bufsize= -1) #row = db_txn(pool, partial(self.record, t, proc.pid)) logger.info("spawn a process to query, task_uuid: %s, " "site_asset_id: %s, just wait til finished" % (t['uuid'], t['site_asset_id'])) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="start query vddb")) out, err = proc.communicate() ret = proc.wait() logger.info("query finished, return to manager, " "task_uuid: %s, site_asset_id: %s, " "ret: %s, out: %s, err: %s" % (t['uuid'], t['site_asset_id'], ret, out, err)) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="query vddb finished")) end_time = time.time() stats.timing(QUERY_VDDB, int(end_time-start_time)*1000) return TaskRes(t, ret, out, err) except: logger.error("spawn process catch exception, uuid: %s, " "site_asset_id: %s, " % \ (t['uuid'], t['site_asset_id']), exc_info=True) logger.debug("task: %s, account: %s" % (t, account)) raise TaskException(err="query failed, spawn process failed")