Пример #1
0
 def update_spider_flag(self, action):
     flag = {'start':1, 'finish':3}.get(action, 1)
     sql = "UPDATE task SET `spider_flag`=%s where id=%s"
     try:
         db.execute(sql, flag, self.task_id)
     except Exception:
         ERROR('update_spider_flag exception,task_id:%s' % self.task_id)
Пример #2
0
 def update_sitemap_parsed(self, action):
     flag = {'start': 0, 'finish': 1}.get(action, 1)
     sql = "UPDATE task SET `sitemap_parsed`=%s where id=%s"
     try:
         db.execute(sql, flag, self.task_id)
     except Exception:
         ERROR('update_sitemap_parsed exception,task_id:%s' % self.task_id)
Пример #3
0
 def update_sitemap_parsed(self, action):
     flag = {'start':0, 'finish':1}.get(action, 1)
     sql = "UPDATE task SET `sitemap_parsed`=%s where id=%s"
     try:
         db.execute(sql, flag, self.task_id)
     except Exception:
         ERROR('update_sitemap_parsed exception,task_id:%s' % self.task_id)
Пример #4
0
 def update_url_end_time(self, request):
     if request.id is not None:
         sql = "UPDATE url SET `end_time`=%s WHERE id=%s"
         try:
             db.execute(sql, datetime.now(), request.id)
         except Exception:
             ERROR('update_url_end_time exception,task_id:%s' % self.task_id)
Пример #5
0
 def update_spider_flag(self, action):
     flag = {'start': 1, 'finish': 3}.get(action, 1)
     sql = "UPDATE task SET `spider_flag`=%s where id=%s"
     try:
         db.execute(sql, flag, self.task_id)
     except Exception:
         ERROR('update_spider_flag exception,task_id:%s' % self.task_id)
Пример #6
0
 def update_url_end_time(self, request):
     if request.id is not None:
         sql = "UPDATE url SET `end_time`=%s WHERE id=%s"
         try:
             db.execute(sql, datetime.now(), request.id)
         except Exception:
             ERROR('update_url_end_time exception,task_id:%s' %
                   self.task_id)
Пример #7
0
 def update_progress(self, rule_id):
     try:
         sql = "SELECT `progress` FROM task WHERE id=%s" % self.task_id
         progress = db.get(sql).progress
         if rule_id not in progress.split('|'):
             progress += '|%s' % rule_id
             sql = "UPDATE task SET `progress`='%s' WHERE id=%s" % (progress, self.task_id)
             db.execute(sql)
     except Exception:
         ERROR("update_progress exception")
Пример #8
0
 def update_progress(self, rule_id):
     try:
         sql = "SELECT `progress` FROM task WHERE id=%s" % self.task_id
         progress = db.get(sql).progress
         if rule_id not in progress.split('|'):
             progress += '|%s' % rule_id
             sql = "UPDATE task SET `progress`='%s' WHERE id=%s" % (
                 progress, self.task_id)
             db.execute(sql)
     except Exception:
         ERROR("update_progress exception")
Пример #9
0
 def analyse_result(self, rule_id, risk, result, url):
     response = result.response
     details = result.details
     risk = {'low':1, 'middle':2, 'high':3}.get(risk, 1)
     details = "\r\n".join(details) if isinstance(details,(list,tuple)) else details
     requrl = response.request.url
     request = self.generateRequest(response.request,requrl)
     response = self.generateResponse(response)
     sql = "INSERT INTO %s" % RESULT_TABLE
     data = [ attr.encode('utf-8') for attr in (self.task_id,rule_id,str(risk),requrl,details,request,response) ]
     sql += "(`task_id`,`rule_id`,`risk`,`url`,`detail`,`request`,`response`) VALUES(%s,%s,%s,%s,%s,%s,%s)"
     db.execute(sql,*data)
Пример #10
0
 def analyse_result(self, rule_id, risk, result, url):
     response = result.response
     details = result.details
     risk = {'low': 1, 'middle': 2, 'high': 3}.get(risk, 1)
     details = "\r\n".join(details) if isinstance(details,
                                                  (list,
                                                   tuple)) else details
     requrl = response.request.url
     request = self.generateRequest(response.request, requrl)
     response = self.generateResponse(response)
     sql = "INSERT INTO %s" % RESULT_TABLE
     data = [
         attr.encode('utf-8')
         for attr in (self.task_id, rule_id, str(risk), requrl, details,
                      request, response)
     ]
     sql += "(`task_id`,`rule_id`,`risk`,`url`,`detail`,`request`,`response`) VALUES(%s,%s,%s,%s,%s,%s,%s)"
     db.execute(sql, *data)
Пример #11
0
def pipeline(request):
    try:
        data = [getattr(request,attr).encode('utf-8') for attr in ('url','method','params','referer')]

        sql_c = "SELECT COUNT(1) as `c` FROM %s" % (URL_TABLE)
        # sql_c += " WHERE `task_id`='%s' and `url`='%s' and `method`='%s' and `params`='%s'"
        # sql_c = sql_c % (conf.taskid, data[0], data[1], data[2])
        sql_c += " WHERE `task_id`=%s and `url`=%s and `method`=%s and `params`=%s"
        if db.get(sql_c, conf.taskid, data[0], data[1], data[2]).c > 0:
            return
        sql = "INSERT INTO %s" % (URL_TABLE)
        sql += "(`task_id`,`url`,`method`,`params`,`referer`,`start_time`) VALUES(%s,%s,%s,%s,%s,%s)"
        data.append(datetime.now())
        return db.execute(sql, conf.taskid, *data)
    except Exception:
        ERROR("Crawler.pipeline Exception")
Пример #12
0
def pipeline(request):
    try:
        data = [
            getattr(request, attr).encode('utf-8')
            for attr in ('url', 'method', 'params', 'referer')
        ]

        sql_c = "SELECT COUNT(1) as `c` FROM %s" % (URL_TABLE)
        # sql_c += " WHERE `task_id`='%s' and `url`='%s' and `method`='%s' and `params`='%s'"
        # sql_c = sql_c % (conf.taskid, data[0], data[1], data[2])
        sql_c += " WHERE `task_id`=%s and `url`=%s and `method`=%s and `params`=%s"
        if db.get(sql_c, conf.taskid, data[0], data[1], data[2]).c > 0:
            return
        sql = "INSERT INTO %s" % (URL_TABLE)
        sql += "(`task_id`,`url`,`method`,`params`,`referer`,`start_time`) VALUES(%s,%s,%s,%s,%s,%s)"
        data.append(datetime.now())
        return db.execute(sql, conf.taskid, *data)
    except Exception:
        ERROR("Crawler.pipeline Exception")
Пример #13
0
def set_unreachable_flag(task_id):
    sql = "UPDATE task SET `reachable`=0 WHERE id=%s" % task_id
    try:
        db.execute(sql)
    except Exception:
        ERROR('set_unreachable failed,task_id:%s,please check' % task_id)
Пример #14
0
def update_end_time(task_id):
    sql = "UPDATE task SET `end_time`=%s WHERE id=%s"
    try:
        db.execute(sql, datetime.now(), task_id)
    except Exception:
        ERROR('update_end_time failed,task_id:%s,please check' % task_id)
Пример #15
0
def set_unreachable_flag(task_id):
    sql = "UPDATE task SET `reachable`=0 WHERE id=%s" % task_id
    try:
        db.execute(sql)
    except Exception:
        ERROR('set_unreachable failed,task_id:%s,please check' % task_id)
Пример #16
0
def update_task_status(task_id):
    sql = "UPDATE task SET `status`=3 WHERE id=%s" % task_id
    try:
        db.execute(sql)
    except Exception:
        ERROR('update_task_status failed,task_id:%s,please check' % task_id)
Пример #17
0
def update_end_time(task_id):
    sql = "UPDATE task SET `end_time`=%s WHERE id=%s"
    try: 
        db.execute(sql, datetime.now(), task_id)
    except Exception:
        ERROR('update_end_time failed,task_id:%s,please check' % task_id)
Пример #18
0
def update_task_status(task_id):
    sql = "UPDATE task SET `status`=3 WHERE id=%s" % task_id
    try:
        db.execute(sql)
    except Exception:
        ERROR('update_task_status failed,task_id:%s,please check' % task_id)