Exemplo n.º 1
0
 def do_doom(self, t, code, queue_at, deadline):
     logger = logging.getLogger('mwtm_cleaner')
     if queue_at != None and (deadline == None or deadline == 0 or \
                              deadline > queue_at):
         logger.debug('to retry task %s, queue at %s', t.uuid, queue_at)
         yield db_execute(RETRY_TASK, queue_at, code, t.id)
         #yield db_execute(RENEW_EVENT, t.uuid, 'retry')
         g_logger.info(
             trans2json(message="task_uuid:%s, "
                        "site_asset_id:%s, deadline:%s, external_id:%s " %
                        (t.uuid, t.site_asset_id, deadline, t.external_id),
                        action="retry task"))
     else:
         logger.debug('to fail task %s', t.uuid)
         g_logger.info(
             trans2json(message="task_uuid:%s, "
                        "site_asset_id:%s, external_id:%s" %
                        (t.uuid, t.site_asset_id, t.external_id),
                        action="to fail task"))
         rc, _ = yield db_query(CHECK_TASK, t.id)
         if rc <= 0:
             yield db_execute(FAIL_TASK, code, t.id)
             self.send_matches(t, unrecognized=True)
             task_status = db_txn(self.pool,
                                  partial(self.load_task_status, t.uuid))
             self.update_hbase_task(task_status)
         stats.incr(QUERY_FAILED, 1)
Exemplo n.º 2
0
 def checkParams(self, res):
     if res.has_key("params"):
         res = res['params']
         self.parent_info = res['parent_info'] if \
         res.has_key('parent_info') else []
         self.match_type = res['match_type'] if\
         res.has_key('match_type') else None
         self.matches = res['matches'] if\
         res.has_key('matches') else []
         self.extra_info = res['extra_info'] if\
         res.has_key('extra_info') else None
         self.crr = res['notification'] if\
         res.has_key('notification') else None
         self.url = res['extra_info_url'] if\
         res.has_key('extra_info_url') else None
         if res.has_key('match_type') and res['match_type']!='match':
             self.matches = []
         if not res.has_key('site_asset_id'):
             g_logger.info(trans2json(message='params is error , params has'
                  ' no key site_asset_id', atcion='checkParams'))
             return False
         else :
             if res['site_asset_id'] == "" or res['site_asset_id'] == [] or \
                    not isinstance(res['site_asset_id'], list):
                 g_logger.info(trans2json(message="params is error , site_asset_id='' or []",
                     atcion='checkParams'))
                 return False
             else:
                 self.site_asset_id = res['site_asset_id']
     return True
Exemplo n.º 3
0
 def send_matches(self, task, matches=[], crr="", unrecognized=False):
     match_type = "no_match"
     if unrecognized:
         match_type = "unrecognized"
     elif len(matches):
         match_type = 'match'
     data = dict(id="null", jsonrpc="2.0",
                 method="matches",
                 params=dict(matches=matches,
                 site_asset_id=eval(task.site_asset_id), notification=crr,
                 match_type=match_type))
     params = dict(source="auto_match")
     req = None
     try:
         req = requests.post(self.matches_server, params=params,
                             data=json.dumps(data))
         if req.status_code != 200:
             self.logger.error("send matches failed, code:%s", req.status_code)
             raise SendMatchesError("send matches faild, task_id:%s" % task.uuid)
     except RequestException:
         self.logger.error("send matches failed, %s", task.uuid, exc_info=True)
         raise SendMatchesError("send matches faild")
     self.logger.info("send matches success, task_uuid:%s, site_asset_id:%s,"
                      "external_id:%s", task.uuid, task.site_asset_id,
                      task.external_id)
     g_logger.info(trans2json(message="task_uuid:%s, "
                   "site_asset_id:%s, external_id:%s " % \
                   (task.uuid, task.site_asset_id,
                    task.external_id), action="send matches success"))
Exemplo n.º 4
0
 def checkParams(self, res):
     if res.has_key("params"):
         res = res['params']
         self.parent_info = res['parent_info'] if \
         res.has_key('parent_info') else []
         self.match_type = res['match_type'] if\
         res.has_key('match_type') else None
         self.matches = res['matches'] if\
         res.has_key('matches') else []
         self.extra_info = res['extra_info'] if\
         res.has_key('extra_info') else None
         self.crr = res['notification'] if\
         res.has_key('notification') else None
         self.url = res['extra_info_url'] if\
         res.has_key('extra_info_url') else None
         if res.has_key('match_type') and res['match_type'] != 'match':
             self.matches = []
         if not res.has_key('site_asset_id'):
             g_logger.info(
                 trans2json(message='params is error , params has'
                            ' no key site_asset_id',
                            atcion='checkParams'))
             return False
         else:
             if res['site_asset_id'] == "" or res['site_asset_id'] == [] or \
                    not isinstance(res['site_asset_id'], list):
                 g_logger.info(
                     trans2json(
                         message="params is error , site_asset_id='' or []",
                         atcion='checkParams'))
                 return False
             else:
                 self.site_asset_id = res['site_asset_id']
     return True
Exemplo n.º 5
0
 def process_task(self, body, message):
     self.logger.info('get task from broker :%s, type:%s', body, type(body))
     g_logger.info(
         trans2json(message="get task from broker :"
                    "%s" % str(body),
                    action="fetchTask"))
     stats.incr(FETCH_TASKS, 1)
     if isinstance(body, dict):
         body = json.dumps(body)
     else:
         body = json.loads(body)
     if self.checkParams(body):
         try:
             self.process(config, body)
             message.ack()
             stats.incr(FINISHED_TASKS, 1)
         except sotreError:
             message.ack()
             stats.incr(DROP_TASKS, 1)
             g_logger.error(
                 trans2json(message='some unexpected thing'
                            ' happen, maybe db error ',
                            action='store task to db'))
             self.logger.error('some unexpected thing happen ,'
                               'Error:',
                               exc_info=True)
     else:
         self.logger.error("params is error: %s", body)
         g_logger.error(
             trans2json(message='message from mq , params is error'))
         stats.incr(DROP_TASKS, 1)
         message.ack()
Exemplo n.º 6
0
 def load(self):
     '''
         :return code `None`, `dict`
             `None`: no such task
             `{}`: in progress
             `{"keys1":"value1"...}`:normal results
     '''
     cache = self.load_from_cache()
     if cache is None:
         if self.has_parent():
             self.prefix_search = True
         g_logger.info(
             trans2json(message="site_asset_id:%s, "
                        "prefix_search:%s" %
                        (self.site_asset_id, self.prefix_search),
                        action="no hit cache"))
         tids = self.get_tids()
         self.logger.debug("task_ids: %s" % tids)
         format_data = None
         if tids == []:
             format_data = wrap_error(NO_TASK_ERROR['code'],
                                      NO_TASK_ERROR['message'], [])
         else:
             format_data = self.format_matches(tids)
         self.save_cache(format_data)
         return format_data
     else:
         self.logger.debug("hit cache, site_asset_id:[%s], cache: %s" %
                           (self.site_asset_id, cache))
         return cache
Exemplo n.º 7
0
 def task_finished(self, celery_id, query_res):
     try:
         t = self.taskm[celery_id]['task']
         self.logger.info("finished query, task_id:%s, "
                          "site_asset_id: %s, celery_id:%s, "
                          "ret: %s, err: %s " %
                          (t.uuid, t.site_asset_id, celery_id,
                           query_res.ret, query_res.err))
         self.logger.debug("task_id:%s, out: %s", t.uuid, query_res.out)
         self.logger.debug("finished task info: %s" % str(t))
         #parse query result
         g_logger.info(
             trans2json(message="site_asset_id:%s, "
                        "task_uuid:%s, external_id:%s " %
                        (t.site_asset_id, t.uuid, t.external_id),
                        action="task finished query from celery"))
         if not isinstance(query_res, TaskRes):  #means catch some exception
             self.cleaner.request((t, BAD_OUTPUT, None))
         else:
             _, state, res = self.parse_query_res(query_res)
             self.cleaner.request((t, state, res))
     except Exception, ex:
         self.cleaner.request((t, BAD_OUTPUT, None))
         self.logger.error("task finished catch unhandle exception, "
                           "task_uuid:%s" % t.uuid,
                           exc_info=True)
Exemplo n.º 8
0
 def process_task(self, body, message):
     self.logger.info('get task from broker :%s, type:%s', body,
                 type(body))
     g_logger.info(trans2json(message="get task from broker :"
             "%s"%str(body), action ="fetchTask"))
     stats.incr(FETCH_TASKS, 1)
     if isinstance(body, dict):
         body = json.dumps(body)
     else:
         body = json.loads(body)
     if self.checkParams(body):
         try:
             self.process(config, body)
             message.ack()
             stats.incr(FINISHED_TASKS, 1)
         except sotreError:
             message.ack()
             stats.incr(DROP_TASKS, 1)
             g_logger.error(trans2json(message= 'some unexpected thing'
                 ' happen, maybe db error ', action = 'store task to db'))
             self.logger.error('some unexpected thing happen ,'
                      'Error:', exc_info=True )
     else:
         self.logger.error("params is error: %s" , body)
         g_logger.error(trans2json(message= 'message from mq , params is error'))
         stats.incr(DROP_TASKS, 1)
         message.ack()
Exemplo n.º 9
0
 def load(self):
     '''
         :return code `None`, `dict`
             `None`: no such task
             `{}`: in progress
             `{"keys1":"value1"...}`:normal results
     '''
     cache = self.load_from_cache()
     if cache is None:
         if self.has_parent():
             self.prefix_search = True
         g_logger.info(trans2json(message="site_asset_id:%s, "
                                  "prefix_search:%s" % (self.site_asset_id, self.prefix_search),
                                  action="no hit cache"))
         tids = self.get_tids()
         self.logger.debug("task_ids: %s" % tids)
         format_data = None
         if tids == []:
             format_data = wrap_error(NO_TASK_ERROR['code'],
                                      NO_TASK_ERROR['message'], [])
         else:
             format_data = self.format_matches(tids)
         self.save_cache(format_data)
         return format_data
     else:
         self.logger.debug("hit cache, site_asset_id:[%s], cache: %s" %
                          (self.site_asset_id, cache))
         return cache
Exemplo n.º 10
0
def run(queue):
    pro = producer()
    push = pusher(pro)
    while True:
        t = queue.get(block=True)
        logger.info('get a task to push, task_id: %s', t)
        g_logger.info(
            trans2json(message='get a task to push, task_id: %s' % t,
                       atcion='get task'))
        task = getTask(t)
        logger.info('------task:%s', task)
        try:
            re = push.getMatch(t)
            #logger.info('type:%s, %s', type(re), re)
            push.pushResult(re)
            updateFinished(t, 'success')
            dropUnpush(t)
            logger.info(
                'succeed to push the match result , task_id: %s,'
                'result: %s', t, re)
            g_logger.info(
                trans2json(message='succeed to push the match result ,'
                           'external_id :%s, task_id: %s' % (
                               task['i:external_id'],
                               t,
                           ),
                           action='push result'))
        except resultError:
            logger.error("failed to get the result , task_id: %s", t)
            g_logger.error(
                trans2json(message='failed to push result,'
                           'external_id: %s, task_id: %s' %
                           (task['i:external_id'], t),
                           action='push result'))
            dropUnpush(t)
            updateFinished(t, 'failed')
        except AssertionError:
            logger.error("failed to get matches, task_id: %s", t)
            g_logger.error(
                trans2json(message='failed to push result,'
                           'external_id: %s, task_id: %s' %
                           (task['i:external_id'], t),
                           action='push result'))
            dropUnpush(t)
            updateFinished(t, 'failed')
        except:
            logger.error(
                'failed to push result, rest status to new,'
                'task_id: %s,  Error:',
                t,
                exc_info=True)
            g_logger.error(
                trans2json(message='failed to push result, res status'
                           'to new, external_id: %s, task_id: %s ' %
                           (task['i:external_id'], t),
                           action='retry to push'))
            changeStatus(t, 'new')
            logger.error("reset status to new , task_id: %s", t)
    def start(self):
        g_logger.info('wait for time:{}'.format(self.start_date_time))
        while True:

            if self.local_time() >= self.start_date_time_ms:
                g_logger.info('run')
                break
            else:
                time.sleep(self.sleep_interval)
Exemplo n.º 12
0
 def process(self, config, body):
     #db_txn(pool, partial(store), data, cre)
     self.logger.info('start to process message')
     task_id = None
     task = defaultdict(list)
     try:
         #self.process(config, task)
         req = requests.post(config['matches_server'] + '?source=init',
                             data=json.dumps(body))
         self.logger.info('get request :%s, type:%s', req.content,
                          type(req.content))
         res = json.loads(req.content)
         task_id = res['result']['task_id']
         parseParams(body, task)
         if res['result']['source'] == 'auto_match':
             db_txn(pool, partial(updateStatus), task_id,
                    task['site_asset_id'], task['external_id'])
             self.logger.info(
                 'this task has already been in hbase reset'
                 ' status to new , site_asset_ids : %s',
                 task['site_asset_id'])
             g_logger.info(
                 trans2json(
                     message='task has already been in'
                     ' hbase, external_id:%s, site_asset_id:%s, task_id:%s'
                     % (
                         task['external_id'],
                         task['site_asset_id'],
                         task_id,
                     ),
                     action="reset status to new"))
         else:
             genTask(task)
             task['task_uuid'] = task_id
             db_txn(pool, partial(storeTaskMysql), task)
             g_logger.info(
                 trans2json(message="succeed to store task external_id:%s,"
                            " site_asset_id:%s, task_id:%s" %
                            (task['external_id'], task['site_asset_id'],
                             task['task_uuid']),
                            action='store task to db'))
     except:
         self.logger.info(
             'failed to store task, start to retry, task_uuid: %s'
             ' site_asset_id: %s',
             task_id,
             task['site_asset_id'],
             exc_info=True)
         g_logger.error(
             trans2json(
                 message='failed to store task, start to'
                 ' retry ,external_id:%s, site_asset_id: %s, task_id: %s' %
                 (task['external_id'], task['site_asset_id'], task_id),
                 action='store task to db'))
         raise sotreError
Exemplo n.º 13
0
 def pick(self, t):
     self.picked[t.account].add(t.id)
     if t.account in self.fetching:
         self.fetching[t.account].add(t.id)
     self.manager.request(t)
     self.logger.info('picked task %s for account %s, site_asset_id %s',
                      t.uuid, t.account, t.site_asset_id)
     g_logger.info(trans2json(action="picked task to query",
                              message="task_uuid: %s, site_asset_id: %s, external_id: %s" % \
                              (t.uuid, t.site_asset_id, t.external_id)))
     stats.incr(PICKED, 1)
Exemplo n.º 14
0
    def store_result(self):
        task_id = None
        source = None
        res = self.get_sid_info()
        if res:
            source = res['t:source']
            self.logger.info('start update res:%s', res)
            task_id = res['t:task_uuid']
            self.logger.info('task_id:%s', task_id)
            if self.source == 'manual'\
                    or self.source == 'auto_match' \
                    or self.source == 'init'\
                    or (res['t:source'] == 'manual_tmp'and self.source
                            =='manual_tmp')\
                    or (self.source == 'manual_tmp' and res['t:source'] ==\
                            'auto_match' and not self.check_finished(task_id)):
                self.logger.info('source:%s, type:%s, check_finished:%s',
                                 self.source, type(self.source),
                                 self.check_finished(task_id))
                self.update_result(self.source, task_id, self.match_type,
                                   self.matches, self.extra_info, self.url,
                                   self.site_asset_id)
                if not self.has_parent(self.site_asset_id):
                    self.save_redis(task_id, self.source, self.match_type)
            g_logger.info(
                trans2json(message='succeed to update result,'
                           'task_id:%s, match_type:%s, source:%s' %
                           (task_id, self.source, self.match_type),
                           atcion='update result'))
        else:
            self.logger.info('start to store new task')
            task_id = str(uuid.uuid1())
            self.store_sid_tid(self.site_asset_id, task_id, self.source)
            self.store_tid_sid(task_id, self.site_asset_id)
            self.save_matches(self.match_type, task_id, self.matches)
            self.store_task_info(task_id, self.extra_info, self.url)
            self.logger.info('succeed to store task: %s, task_id %s',
                             self.site_asset_id, task_id)

            #storeHbasempl('unpush', str(task_d), {'u:match_type':'new'})
            #storeFinished('finished', )
            #self.store_unpush(task_id, 'new')
            self.save_redis(task_id, self.source, self.match_type)
        #if self.source !='init' and  self.source != 'manual_tmp':
        #    self.store_unpush(task_id, 'new')
        if self.parent_info:
            for i in self.parent_info:
                for k, v in i.items():
                    redis_conn.delete(k + "-s")
                    self.store_parent_task(k, v)
        self.record_result_statsd()
        return task_id, source
Exemplo n.º 15
0
    def buf_tasks(self, reqs):
        accs = self.accounts()
        backends = self.backends()
        for t in reqs:
            try:
                self.logger.info("receivce task from picker, task_uuid: %s, "
                                 "site_asset_id: %s" %
                                 (t.uuid, t.site_asset_id))
                self.logger.debug("receive task info:%s" % t._asdict())
                g_logger.info(
                    trans2json(message="site_asset_id:%s, "
                               "task_uuid:%s, external_id:%s" %
                               (t.site_asset_id, t.uuid, t.external_id),
                               action="receive picked task"))
                if not self.task_check(t, accs, backends):
                    self.reply(t)
                    continue
                acc = accs[t.account]._asdict()
                acc["backends"] = [v._asdict() for v in acc["backends"]]
                backs = {}
                for k, v in backends.iteritems():
                    backs[k] = v._asdict()
                self.logger.debug("add task's account: %s, backends: %s" %
                                  (acc, backs))

                ct = Task().query.delay(t._asdict(), acc, backs)
                self.taskm[ct.task_id]['celery_task'] = ct
                self.taskm[ct.task_id]['task'] = t
                self.tasks.add(ct)
                self.logger.info("add task to celery, task_uuid: %s, "
                                 "site_asset_id: %s, celery_uuid: %s " % \
                                 (t.uuid, t.site_asset_id, ct.task_id))
                g_logger.info(
                    trans2json(message="site_asset_id:%s, "
                               "task_uuid:%s, external_id:%s" %
                               (t.site_asset_id, t.uuid, t.external_id),
                               action="add task to celery"))

            except Exception, ex:
                self.reply(t)
                self.logger.error("catch exception from buf tasks, "
                                  "task_uuid: %s , site_asset_id: %s" %
                                  (t.uuid, t.site_asset_id),
                                  exc_info=True)
                continue
            try:
                db_txn(self.pool, partial(self.record, t))
            except Exception:
                self.logger.error("failed to record execution for task %s" %
                                  t.uuid)
Exemplo n.º 16
0
    def store_result(self):
        task_id = None
        source = None
        res = self.get_sid_info()
        if res:
            source = res['t:source']
            self.logger.info('start update res:%s', res)
            task_id = res['t:task_uuid']
            self.logger.info('task_id:%s', task_id)
            if self.source == 'manual'\
                    or self.source == 'auto_match' \
                    or self.source == 'init'\
                    or (res['t:source'] == 'manual_tmp'and self.source
                            =='manual_tmp')\
                    or (self.source == 'manual_tmp' and res['t:source'] ==\
                            'auto_match' and not self.check_finished(task_id)):
                self.logger.info('source:%s, type:%s, check_finished:%s', self.source,
                            type(self.source),  self.check_finished(task_id))
                self.update_result(self.source, task_id, self.match_type, self.matches,
                                self.extra_info, self.url, self.site_asset_id)
                if not self.has_parent(self.site_asset_id):
                    self.save_redis(task_id, self.source, self.match_type)
            g_logger.info(trans2json(message='succeed to update result,'
                'task_id:%s, match_type:%s, source:%s'%(task_id,
                    self.source, self.match_type), atcion='update result'))
        else:
            self.logger.info('start to store new task')
            task_id = str(uuid.uuid1())
            self.store_sid_tid(self.site_asset_id, task_id, self.source)
            self.store_tid_sid(task_id, self.site_asset_id)
            self.save_matches(self.match_type, task_id, self.matches)
            self.store_task_info(task_id, self.extra_info, self.url)
            self.logger.info('succeed to store task: %s, task_id %s',
                    self.site_asset_id, task_id)

            #storeHbasempl('unpush', str(task_d), {'u:match_type':'new'})
            #storeFinished('finished', )
        #self.store_unpush(task_id, 'new')
            self.save_redis(task_id, self.source, self.match_type)
        #if self.source !='init' and  self.source != 'manual_tmp':
        #    self.store_unpush(task_id, 'new')
        if self.parent_info:
            for i in self.parent_info:
                for k, v in i.items():
                    redis_conn.delete(k+"-s")
                    self.store_parent_task(k, v)
        self.record_result_statsd()
        return task_id, source
Exemplo n.º 17
0
    def finish(self, t, p, res):
        self.logger.info('to finish task, task_uuid:%s, site_asset_id:%s',
                         t.uuid, t.site_asset_id)
        self.logger.debug("res:%s " % str(res))
        assert res.matches != None
        code = WITHOUT_MATCH if len(res.matches) == 0 else WITH_MATCH
        if code == WITHOUT_MATCH:
            try:
                if db_txn(self.pool, partial(self.check_matches, t)):
                    code = WITH_MATCH
            except:
                pass

        tr = 'match' if code == WITH_MATCH else 'no_match'
        self.logger.debug('record finished task %s, site_asset_id: %s', t.uuid,
                          t.site_asset_id)
        try:
            ms = self.filter_matches(res.matches)
            for m in ms:
                g_logger.info(
                    trans2json(
                        message="company_id:%s, "
                        "meta_uuid:%s, instance_uuid:%s, vddb_company_id:%s" %
                        (t.account, m['meta_uuid'], m['instance_id'],
                         m['company_id']),
                        action='matches info'))
            mc = len(ms)
            #m = match_saver(self.hbase_pool, self.redis_conn, task_status, ms, res.crr)
            #m.save()
            self.send_matches(t, ms, res.crr)
            task_status = db_txn(self.pool,
                                 partial(self.load_task_status, t.uuid))
            self.update_hbase_task(task_status)
            db_txn(self.pool, partial(self.update_task, t, code, mc, tr))
        except:
            self.logger.error('failed to finish task: %s, site_asset_id: %s' %
                              (t.uuid, t.site_asset_id),
                              exc_info=True)
            # dooming may succeed, as it touches fewer tables
            self.doom(t, INTERNAL, p, res)
            return
        g_logger.info(
            trans2json(message="site_asset_id:%s, "
                       "task_uuid:%s, external_id:%s" %
                       (t.site_asset_id, t.uuid, t.external_id),
                       action="task query complete"))
        stats.incr(QUERY_SUCCESS, 1)
Exemplo n.º 18
0
 def POST(self):
     try:
         stats.incr(RESULT_INSERT, 1)
         web.header("Content-Type", "application/json")
         res = web.data()
         req = web.input()
         self.logger.info('input:%s', req)
         self.source = req.get("source", "")
         res = json.loads(res)
         self.logger.debug('get input :%s', req)
         self.logger.debug('get message :%s', res)
         g_logger.info(
             trans2json(message='get message ,input: %s, '
                        'msg: %s' % (req, res),
                        action='get resquest post'))
         if not self.checkParams(res):
             self.error_code = PARAMS_ERROR["code"]
             self.error_msg = PARAMS_ERROR["message"]
             self.error_data.append("site_asset_id")
             raise web.BadRequest(
                 wrap_error(self.error_code, self.error_msg,
                            self.error_data))
         ins = insert(self.site_asset_id, self.source, self.match_type,
                      self.matches, self.extra_info, self.crr, self.url,
                      self.parent_info)
         tid, src = ins.store_result()
         g_logger.info(
             trans2json(message='reply to caller, task_id:%s,'
                        'source:%s' % (tid, src),
                        action='reply to caller'))
         stats.incr(RESULT_INSERT_SUCCESS, 1)
         if self.source == 'init':
             return init_result(tid, src)
         else:
             return insert_result()
     except Exception:
         stats.incr(RESULT_INSERT_FAILED, 1)
         self.logger.error("va-interface catch unhandle exception",
                           exc_info=True)
         self.error_code = INTERNAL_ERROR["code"]
         self.error_msg = INTERNAL_ERROR["message"]
         raise web.internalerror(message=wrap_error(
             self.error_code, self.error_msg, self.error_data))
Exemplo n.º 19
0
def run(queue):
    pro = producer()
    push = pusher(pro)
    while True:
        t = queue.get(block=True)
        logger.info('get a task to push, task_id: %s', t)
        g_logger.info(trans2json(message ='get a task to push, task_id: %s'%t,
            atcion= 'get task'))
        task = getTask(t)
        logger.info('------task:%s', task)
        try:
            re = push.getMatch(t)
            #logger.info('type:%s, %s', type(re), re)
            push.pushResult(re)
            updateFinished(t, 'success')
            dropUnpush(t)
            logger.info('succeed to push the match result , task_id: %s,'
                    'result: %s', t, re)
            g_logger.info(trans2json(message='succeed to push the match result ,'
                'external_id :%s, task_id: %s'
                %(task['i:external_id'], t, ), action='push result'))
        except resultError:
            logger.error("failed to get the result , task_id: %s", t)
            g_logger.error(trans2json(message ='failed to push result,'
                'external_id: %s, task_id: %s'
                %(task['i:external_id'], t), action='push result'))
            dropUnpush(t)
            updateFinished(t, 'failed')
        except AssertionError:
            logger.error("failed to get matches, task_id: %s", t)
            g_logger.error(trans2json(message ='failed to push result,'
                'external_id: %s, task_id: %s'
                %(task['i:external_id'], t), action='push result'))
            dropUnpush(t)
            updateFinished(t, 'failed')
        except :
            logger.error('failed to push result, rest status to new,'
                    'task_id: %s,  Error:', t,  exc_info=True)
            g_logger.error(trans2json(message ='failed to push result, res status'
                'to new, external_id: %s, task_id: %s '
                %(task['i:external_id'], t), action = 'retry to push'))
            changeStatus(t, 'new')
            logger.error("reset status to new , task_id: %s", t)
Exemplo n.º 20
0
    def buf_tasks(self, reqs):
        accs = self.accounts()
        backends = self.backends()
        for t in reqs:
            try:
                self.logger.info("receivce task from picker, task_uuid: %s, "
                                 "site_asset_id: %s" % (t.uuid, t.site_asset_id))
                self.logger.debug("receive task info:%s" % t._asdict())
                g_logger.info(trans2json(message="site_asset_id:%s, "
                    "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id),
                    action="receive picked task"))
                if not self.task_check(t, accs, backends):
                    self.reply(t)
                    continue
                acc = accs[t.account]._asdict()
                acc["backends"] = [v._asdict() for v in acc["backends"]]
                backs = {}
                for k, v in backends.iteritems():
                    backs[k] = v._asdict()
                self.logger.debug("add task's account: %s, backends: %s" % (acc, backs))

                ct = Task().query.delay(t._asdict(), acc, backs)
                self.taskm[ct.task_id]['celery_task'] = ct
                self.taskm[ct.task_id]['task'] = t
                self.tasks.add(ct)
                self.logger.info("add task to celery, task_uuid: %s, "
                                 "site_asset_id: %s, celery_uuid: %s " % \
                                 (t.uuid, t.site_asset_id, ct.task_id))
                g_logger.info(trans2json(message="site_asset_id:%s, "
                              "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid,
                              t.external_id), action="add task to celery"))

            except Exception, ex:
                self.reply(t)
                self.logger.error("catch exception from buf tasks, "
                                  "task_uuid: %s , site_asset_id: %s" % (t.uuid,
                                  t.site_asset_id), exc_info=True)
                continue
            try:
                db_txn(self.pool, partial(self.record, t))
            except Exception:
                self.logger.error("failed to record execution for task %s" % t.uuid)
Exemplo n.º 21
0
    def GET(self):
        try:
            stats.incr(RECEIVE_REQUEST, 1)
            web.header("Content-Type", "application/json")
            req = web.input()
            self.site_asset_id = str(req.get('site_asset_id', ""))
            if req.get('all_matches', 'true').lower() == 'false':
                self.all_matches = False
            if self.site_asset_id == "":
                self.error_code = PARAMS_ERROR["code"]
                self.error_msg = PARAMS_ERROR["message"]
                self.error_data.append("site_asset_id")
                raise web.BadRequest(
                    wrap_error(self.error_code, self.error_msg,
                               self.error_data))

            self.logger.info(
                "get history matches, site_asset_id: %s, "
                "all_matches: %s", self.site_asset_id, self.all_matches)
            g_logger.info(trans2json(message="site_asset_id: %s, "
                                     "all_matches: %s" % \
                                    (self.site_asset_id, self.all_matches),
                                     action="get history matches"))
            mr = match_result(self.site_asset_id, self.all_matches)
            res = mr.load()
            self.logger.debug(
                "site_asset_id: %s,"
                "all_matches: %s, history matches: %s", self.site_asset_id,
                self.all_matches, res)
            if res == None:
                g_logger.info(
                    trans2json(message="site_asset_id:%s, "
                               "all_matches: %s" %
                               (self.site_asset_id, self.all_matches),
                               action="no such task"))
            return res
        except web.BadRequest, e:
            stats.incr(REQUEST_ERROR, 1)
            self.logger.error("site_asset_id is null")
            raise e
Exemplo n.º 22
0
    def finish(self, t, p, res):
        self.logger.info('to finish task, task_uuid:%s, site_asset_id:%s',
                         t.uuid, t.site_asset_id)
        self.logger.debug("res:%s " % str(res))
        assert res.matches != None
        code = WITHOUT_MATCH if len(res.matches) == 0 else WITH_MATCH
        if code == WITHOUT_MATCH:
            try:
                if db_txn(self.pool, partial(self.check_matches, t)):
                    code = WITH_MATCH
            except:
                pass

        tr = 'match' if code == WITH_MATCH else 'no_match'
        self.logger.debug('record finished task %s, site_asset_id: %s',
                          t.uuid, t.site_asset_id)
        try:
            ms = self.filter_matches(res.matches)
            for m in ms:
                g_logger.info(trans2json(message="company_id:%s, "
                "meta_uuid:%s, instance_uuid:%s, vddb_company_id:%s" % (t.account, m['meta_uuid'],
                              m['instance_id'], m['company_id']),
                              action='matches info'))
            mc = len(ms)
            #m = match_saver(self.hbase_pool, self.redis_conn, task_status, ms, res.crr)
            #m.save()
            self.send_matches(t, ms, res.crr)
            task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid))
            self.update_hbase_task(task_status)
            db_txn(self.pool, partial(self.update_task, t, code, mc, tr))
        except:
            self.logger.error('failed to finish task: %s, site_asset_id: %s' %
                              (t.uuid, t.site_asset_id), exc_info=True)
            # dooming may succeed, as it touches fewer tables
            self.doom(t, INTERNAL, p, res)
            return
        g_logger.info(trans2json(message="site_asset_id:%s, "
                      "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid,
                      t.external_id), action="task query complete"))
        stats.incr(QUERY_SUCCESS, 1)
Exemplo n.º 23
0
 def process(self, config, body):
     #db_txn(pool, partial(store), data, cre)
     self.logger.info('start to process message')
     task_id = None
     task = defaultdict(list)
     try :
         #self.process(config, task)
         req =requests.post(config['matches_server']+'?source=init',
                 data= json.dumps(body))
         self.logger.info('get request :%s, type:%s', req.content,
                 type(req.content))
         res = json.loads(req.content)
         task_id = res['result']['task_id']
         parseParams(body, task)
         if res['result']['source'] == 'auto_match':
             db_txn(pool, partial(updateStatus), task_id,
                     task['site_asset_id'], task['external_id'])
             self.logger.info('this task has already been in hbase reset'
                 ' status to new , site_asset_ids : %s',task['site_asset_id'])
             g_logger.info(trans2json(message='task has already been in'
                 ' hbase, external_id:%s, site_asset_id:%s, task_id:%s'%
                 (task['external_id'], task['site_asset_id'], task_id, ),
                 action = "reset status to new"))
         else:
             genTask(task)
             task['task_uuid'] = task_id
             db_txn(pool, partial(storeTaskMysql), task)
             g_logger.info(trans2json(message="succeed to store task external_id:%s,"
              " site_asset_id:%s, task_id:%s"%(task['external_id'], task['site_asset_id'],
             task['task_uuid']), action = 'store task to db'))
     except :
         self.logger.info('failed to store task, start to retry, task_uuid: %s'
                 ' site_asset_id: %s',
                 task_id, task['site_asset_id'], exc_info=True )
         g_logger.error(trans2json(message='failed to store task, start to'
             ' retry ,external_id:%s, site_asset_id: %s, task_id: %s'
                 %(task['external_id'], task['site_asset_id'], task_id),
                 action='store task to db'))
         raise sotreError
Exemplo n.º 24
0
 def task_finished(self, celery_id, query_res):
     try:
         t = self.taskm[celery_id]['task']
         self.logger.info("finished query, task_id:%s, "
                          "site_asset_id: %s, celery_id:%s, "
                          "ret: %s, err: %s " %
                          (t.uuid, t.site_asset_id, celery_id,
                           query_res.ret, query_res.err))
         self.logger.debug("task_id:%s, out: %s", t.uuid, query_res.out)
         self.logger.debug("finished task info: %s" % str(t))
         #parse query result
         g_logger.info(trans2json(message="site_asset_id:%s, "
                       "task_uuid:%s, external_id:%s " % (t.site_asset_id, t.uuid,
                       t.external_id), action="task finished query from celery"))
         if not isinstance(query_res, TaskRes): #means catch some exception
             self.cleaner.request((t, BAD_OUTPUT, None))
         else:
             _, state, res = self.parse_query_res(query_res)
             self.cleaner.request((t, state, res))
     except Exception, ex:
         self.cleaner.request((t, BAD_OUTPUT, None))
         self.logger.error("task finished catch unhandle exception, "
                           "task_uuid:%s" % t.uuid, exc_info=True)
Exemplo n.º 25
0
 def do_doom(self, t, code, queue_at, deadline):
     logger = logging.getLogger('mwtm_cleaner')
     if queue_at != None and (deadline == None or deadline == 0 or \
                              deadline > queue_at):
         logger.debug('to retry task %s, queue at %s', t.uuid, queue_at)
         yield db_execute(RETRY_TASK, queue_at, code, t.id)
         #yield db_execute(RENEW_EVENT, t.uuid, 'retry')
         g_logger.info(trans2json(message="task_uuid:%s, "
                       "site_asset_id:%s, deadline:%s, external_id:%s " % (t.uuid,
                       t.site_asset_id, deadline, t.external_id),
                       action="retry task"))
     else:
         logger.debug('to fail task %s', t.uuid)
         g_logger.info(trans2json(message="task_uuid:%s, "
                       "site_asset_id:%s, external_id:%s" % (t.uuid,
                       t.site_asset_id, t.external_id), action="to fail task"))
         rc, _ = yield db_query(CHECK_TASK, t.id)
         if rc <= 0:
             yield db_execute(FAIL_TASK, code, t.id)
             self.send_matches(t, unrecognized=True)
             task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid))
             self.update_hbase_task(task_status)
         stats.incr(QUERY_FAILED, 1)
Exemplo n.º 26
0
 def POST(self):
     try:
         stats.incr(RESULT_INSERT, 1)
         web.header("Content-Type", "application/json")
         res = web.data()
         req = web.input()
         self.logger.info('input:%s', req)
         self.source = req.get("source","")
         res = json.loads(res)
         self.logger.debug('get input :%s', req)
         self.logger.debug('get message :%s', res)
         g_logger.info(trans2json(message='get message ,input: %s, '
             'msg: %s'%(req, res), action='get resquest post'))
         if not self.checkParams(res):
             self.error_code = PARAMS_ERROR["code"]
             self.error_msg = PARAMS_ERROR["message"]
             self.error_data.append("site_asset_id")
             raise web.BadRequest(wrap_error(self.error_code, self.error_msg,
                               self.error_data))
         ins = insert(self.site_asset_id, self.source, self.match_type,
                 self.matches, self.extra_info, self.crr, self.url, self.parent_info)
         tid, src = ins.store_result()
         g_logger.info(trans2json(message='reply to caller, task_id:%s,'
         'source:%s'%(tid, src), action='reply to caller'))
         stats.incr(RESULT_INSERT_SUCCESS, 1)
         if self.source == 'init':
             return init_result(tid, src)
         else:
             return insert_result()
     except Exception:
         stats.incr(RESULT_INSERT_FAILED, 1)
         self.logger.error("va-interface catch unhandle exception", exc_info=True)
         self.error_code = INTERNAL_ERROR["code"]
         self.error_msg = INTERNAL_ERROR["message"]
         raise web.internalerror(message=wrap_error(self.error_code,
                                                 self.error_msg,
                                                 self.error_data))
Exemplo n.º 27
0
    def GET(self):
        try:
            stats.incr(RECEIVE_REQUEST, 1)
            web.header("Content-Type", "application/json")
            req = web.input()
            self.site_asset_id = str(req.get('site_asset_id', ""))
            if req.get('all_matches', 'true').lower() == 'false':
                self.all_matches = False
            if self.site_asset_id == "":
                self.error_code = PARAMS_ERROR["code"]
                self.error_msg = PARAMS_ERROR["message"]
                self.error_data.append("site_asset_id")
                raise web.BadRequest(wrap_error(self.error_code, self.error_msg,
                                  self.error_data))

            self.logger.info("get history matches, site_asset_id: %s, "
                             "all_matches: %s",
                             self.site_asset_id, self.all_matches)
            g_logger.info(trans2json(message="site_asset_id: %s, "
                                     "all_matches: %s" % \
                                    (self.site_asset_id, self.all_matches),
                                     action="get history matches"))
            mr = match_result(self.site_asset_id, self.all_matches)
            res = mr.load()
            self.logger.debug("site_asset_id: %s,"
                              "all_matches: %s, history matches: %s",
                              self.site_asset_id, self.all_matches, res)
            if res == None:
                g_logger.info(trans2json(message="site_asset_id:%s, "
                                         "all_matches: %s" % (self.site_asset_id,
                                         self.all_matches), action="no such task"))
            return res
        except web.BadRequest, e:
            stats.incr(REQUEST_ERROR, 1)
            self.logger.error("site_asset_id is null")
            raise e
Exemplo n.º 28
0
 def send_matches(self, task, matches=[], crr="", unrecognized=False):
     match_type = "no_match"
     if unrecognized:
         match_type = "unrecognized"
     elif len(matches):
         match_type = 'match'
     data = dict(id="null",
                 jsonrpc="2.0",
                 method="matches",
                 params=dict(matches=matches,
                             site_asset_id=eval(task.site_asset_id),
                             notification=crr,
                             match_type=match_type))
     params = dict(source="auto_match")
     req = None
     try:
         req = requests.post(self.matches_server,
                             params=params,
                             data=json.dumps(data))
         if req.status_code != 200:
             self.logger.error("send matches failed, code:%s",
                               req.status_code)
             raise SendMatchesError("send matches faild, task_id:%s" %
                                    task.uuid)
     except RequestException:
         self.logger.error("send matches failed, %s",
                           task.uuid,
                           exc_info=True)
         raise SendMatchesError("send matches faild")
     self.logger.info(
         "send matches success, task_uuid:%s, site_asset_id:%s,"
         "external_id:%s", task.uuid, task.site_asset_id, task.external_id)
     g_logger.info(trans2json(message="task_uuid:%s, "
                   "site_asset_id:%s, external_id:%s " % \
                   (task.uuid, task.site_asset_id,
                    task.external_id), action="send matches success"))
Exemplo n.º 29
0
    def query(t, account, backends):
        #account {..., backends:[{'extra':, 'account':, 'backend':}]}
        #backends {id:{'capacity':, 'id':, 'vddb_address':}}
        logger = logging.getLogger("mw_celery_task")
        try:
            logger.info("receive task to query: task_uuid: %s, "
                        "site_asset_id: %s" % (t['uuid'], t['site_asset_id']))

            g_logger.info(trans2json(message="site_asset_id:%s, "
                          "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'],
                          t['external_id']), action="get task from celery"))

            urls = []
            for b in account['backends']:
                be = backends[b['backend']]#{'capacity':, 'id':, 'vddb_address':}
                opts = dict(level=b['level'], mode=b['mode'], extra=b['extra'])
                if account['hot'] == 'true':
                    opts['hot_user'] = account['hot_user']
                    opts['hot_pass'] = account['hot_pass']
                # if account['slicing'] == 'true':
                #     opts['slice'] = p['slice_duration']
                qs = urlencode(opts.items())
                urls.append('vdna://%s:%s@%s/?%s' % (account['backend_user'],
                                                     account['backend_pass'],
                                                     be['vddb_address'], qs))
            args = ['/'.join([os.getenv('MW_HOME'), 'lib', 'task_executor.py']),
                    '--task', str(t['id']), '--task-uuid', t['uuid'],
                    '--timestamp', str(t['created_at']),
                    '--account', str(account['id']),
                    '--site-domain', account['domain'],
                    '--site-asset-id', t['site_asset_id'],
                    '--clip-format', t['format'],
                    '--dna-url', t['dna_url']]
            if account['slicing'] == 'true':
                args.extend(['--slice-duration', str(account['slice_duration'])])
            if account['allow_partial'] == 'false':
                args.append('--fail-partial')
            # reverse query ingestion triggering is done by reverse_trigger.py
            # if account['do_reverse'] == 'true':
            #     args.extend('--reverse-query')
            for u in urls:
                args.extend(['--backend', u])
            if t['scope'] != None:
                for s in t['scope']:
                    args.extend(['--scope', s])
            for r in account['rules']:
                args.append('--' + r)
        except Exception:
            logger.error("generate command line failed, "
                         "uuid: %s, site_asset_id: %s" % \
                         (t['uuid'], t['site_asset_id']), exc_info=True)
            logger.debug("task: %s, account: %s, backends: %s" % \
                        (t, account, backends))
            raise TaskException(err="query failed, generate execute cmd failed")
        else:
            # bufsize=-1 usually means fully buffer the output, usually, ugh
            # please contact [email protected] if stdout is blocked
            proc = None
            try:
                start_time = time.time()
                proc = Popen(args, close_fds=True, stdout=PIPE, bufsize= -1)
                #row = db_txn(pool, partial(self.record, t, proc.pid)) 
                logger.info("spawn a process to query, task_uuid: %s, "
                            "site_asset_id: %s, just wait til finished" %
                            (t['uuid'], t['site_asset_id']))
                g_logger.info(trans2json(message="site_asset_id:%s, "
                              "task_uuid:%s, external_id:%s" % (t['site_asset_id'],
                              t['uuid'], t['external_id']), action="start query vddb"))
                out, err = proc.communicate()
                ret = proc.wait()
                logger.info("query finished, return to manager, "
                            "task_uuid: %s, site_asset_id: %s, "
                            "ret: %s, out: %s, err: %s"
                             % (t['uuid'], t['site_asset_id'], ret, out, err))
                g_logger.info(trans2json(message="site_asset_id:%s, "
                             "task_uuid:%s, external_id:%s" % (t['site_asset_id'],
                              t['uuid'], t['external_id']), action="query vddb finished"))
                end_time = time.time()
                stats.timing(QUERY_VDDB, int(end_time-start_time)*1000)
                return TaskRes(t, ret, out, err)
            except:
                logger.error("spawn process catch exception, uuid: %s, "
                             "site_asset_id: %s, " % \
                             (t['uuid'], t['site_asset_id']),  exc_info=True)
                logger.debug("task: %s, account: %s" % (t, account))
                raise TaskException(err="query failed, spawn process failed")
Exemplo n.º 30
0
    def query(t, account, backends):
        #account {..., backends:[{'extra':, 'account':, 'backend':}]}
        #backends {id:{'capacity':, 'id':, 'vddb_address':}}
        logger = logging.getLogger("mw_celery_task")
        try:
            logger.info("receive task to query: task_uuid: %s, "
                        "site_asset_id: %s" % (t['uuid'], t['site_asset_id']))

            g_logger.info(
                trans2json(message="site_asset_id:%s, "
                           "task_uuid:%s, external_id:%s" %
                           (t['site_asset_id'], t['uuid'], t['external_id']),
                           action="get task from celery"))

            urls = []
            for b in account['backends']:
                be = backends[
                    b['backend']]  #{'capacity':, 'id':, 'vddb_address':}
                opts = dict(level=b['level'], mode=b['mode'], extra=b['extra'])
                if account['hot'] == 'true':
                    opts['hot_user'] = account['hot_user']
                    opts['hot_pass'] = account['hot_pass']
                # if account['slicing'] == 'true':
                #     opts['slice'] = p['slice_duration']
                qs = urlencode(opts.items())
                urls.append('vdna://%s:%s@%s/?%s' %
                            (account['backend_user'], account['backend_pass'],
                             be['vddb_address'], qs))
            args = [
                '/'.join([os.getenv('MW_HOME'), 'lib',
                          'task_executor.py']), '--task',
                str(t['id']), '--task-uuid', t['uuid'], '--timestamp',
                str(t['created_at']), '--account',
                str(account['id']), '--site-domain', account['domain'],
                '--site-asset-id', t['site_asset_id'], '--clip-format',
                t['format'], '--dna-url', t['dna_url']
            ]
            if account['slicing'] == 'true':
                args.extend(
                    ['--slice-duration',
                     str(account['slice_duration'])])
            if account['allow_partial'] == 'false':
                args.append('--fail-partial')
            # reverse query ingestion triggering is done by reverse_trigger.py
            # if account['do_reverse'] == 'true':
            #     args.extend('--reverse-query')
            for u in urls:
                args.extend(['--backend', u])
            if t['scope'] != None:
                for s in t['scope']:
                    args.extend(['--scope', s])
            for r in account['rules']:
                args.append('--' + r)
        except Exception:
            logger.error("generate command line failed, "
                         "uuid: %s, site_asset_id: %s" % \
                         (t['uuid'], t['site_asset_id']), exc_info=True)
            logger.debug("task: %s, account: %s, backends: %s" % \
                        (t, account, backends))
            raise TaskException(
                err="query failed, generate execute cmd failed")
        else:
            # bufsize=-1 usually means fully buffer the output, usually, ugh
            # please contact [email protected] if stdout is blocked
            proc = None
            try:
                start_time = time.time()
                proc = Popen(args, close_fds=True, stdout=PIPE, bufsize=-1)
                #row = db_txn(pool, partial(self.record, t, proc.pid))
                logger.info("spawn a process to query, task_uuid: %s, "
                            "site_asset_id: %s, just wait til finished" %
                            (t['uuid'], t['site_asset_id']))
                g_logger.info(
                    trans2json(
                        message="site_asset_id:%s, "
                        "task_uuid:%s, external_id:%s" %
                        (t['site_asset_id'], t['uuid'], t['external_id']),
                        action="start query vddb"))
                out, err = proc.communicate()
                ret = proc.wait()
                logger.info("query finished, return to manager, "
                            "task_uuid: %s, site_asset_id: %s, "
                            "ret: %s, out: %s, err: %s" %
                            (t['uuid'], t['site_asset_id'], ret, out, err))
                g_logger.info(
                    trans2json(
                        message="site_asset_id:%s, "
                        "task_uuid:%s, external_id:%s" %
                        (t['site_asset_id'], t['uuid'], t['external_id']),
                        action="query vddb finished"))
                end_time = time.time()
                stats.timing(QUERY_VDDB, int(end_time - start_time) * 1000)
                return TaskRes(t, ret, out, err)
            except:
                logger.error("spawn process catch exception, uuid: %s, "
                             "site_asset_id: %s, " % \
                             (t['uuid'], t['site_asset_id']),  exc_info=True)
                logger.debug("task: %s, account: %s" % (t, account))
                raise TaskException(err="query failed, spawn process failed")