Пример #1
0
def attr_from_script(scriptname, attr):
    try:
        path = "%s.%s" % (SCRIPTS_NAME, scriptname)
        __import__(path)
        module = sys.modules[path]
        try:
            attr = getattr(module, attr)
            return attr
        except AttributeError:
            ERROR("AttributeError,path:%s has no %s attribute" % (path, attr))
    except ImportError:
        ERROR("ImportError,path:%s" % path)
Пример #2
0
    def runDomain(self, rule_id, run_domain, filename, risk):
        #DEBUG("ScanEngine run_domain pool size:%s" % len(self.pool))
        result = None
        try:
            self.update_progress(rule_id)
            result = run_domain(self.ruleattr)
        except Exception:
            ERROR("rule_id:%s,scriptname:%s exception" % (rule_id, filename))

        if result is not None:
            try:
                self.analyse_result(rule_id, risk, result, self.domain)
            except Exception:
                ERROR('analyse_result exception,rule_id:%s' % rule_id)
Пример #3
0
 def update_sitemap_parsed(self, action):
     flag = {'start': 0, 'finish': 1}.get(action, 1)
     sql = "UPDATE task SET `sitemap_parsed`=%s where id=%s"
     try:
         db.execute(sql, flag, self.task_id)
     except Exception:
         ERROR('update_sitemap_parsed exception,task_id:%s' % self.task_id)
Пример #4
0
 def update_spider_flag(self, action):
     flag = {'start': 1, 'finish': 3}.get(action, 1)
     sql = "UPDATE task SET `spider_flag`=%s where id=%s"
     try:
         db.execute(sql, flag, self.task_id)
     except Exception:
         ERROR('update_spider_flag exception,task_id:%s' % self.task_id)
Пример #5
0
 def get_exist_url(self):
     sql = "SELECT * FROM url WHERE task_id=%s"
     urls = None
     try:
         urls = [url for url in db.iter(sql, self.task_id)]
     except Exception:
         ERROR('get_url exception,task_id:%s' % self.task_id)
     return urls or []
Пример #6
0
 def update_url_end_time(self, request):
     if request.id is not None:
         sql = "UPDATE url SET `end_time`=%s WHERE id=%s"
         try:
             db.execute(sql, datetime.now(), request.id)
         except Exception:
             ERROR('update_url_end_time exception,task_id:%s' %
                   self.task_id)
Пример #7
0
def main():
    try:
        parseCmdline()
        init()
        run()
        #user_test()
    except KeyboardInterrupt:
        INFO("User aborted,scan stop")
    except DestinationUnReachable as e:
        WARN("Destination:%s not reachable,please check" % e.dest)
    except TopException:
        ERROR("User define exception")
    except Exception as e:
        ERROR("Exception occur,scan stop")
    finally:
        task_finsh_clean()
        INFO("Scan finished!")
Пример #8
0
    def start(cls, request, schedule):
        try:
            spider = cls(request, schedule)
            #DEBUG(currentName() + ' start')
            spider.run(request)

        except Exception, e:
            ERROR('Spider.start Exception')
Пример #9
0
def mkdir(path, remove=True):
    if os.path.isdir(path):
        if remove:
            try:
                shutil.rmtree(path)
                os.mkdir(path)
            except Exception:
                ERROR("rmtree except,path" + path)
    else:
        os.mkdir(path)
Пример #10
0
 def update_progress(self, rule_id):
     try:
         sql = "SELECT `progress` FROM task WHERE id=%s" % self.task_id
         progress = db.get(sql).progress
         if rule_id not in progress.split('|'):
             progress += '|%s' % rule_id
             sql = "UPDATE task SET `progress`='%s' WHERE id=%s" % (
                 progress, self.task_id)
             db.execute(sql)
     except Exception:
         ERROR("update_progress exception")
Пример #11
0
def pipeline(request):
    try:
        data = [
            getattr(request, attr).encode('utf-8')
            for attr in ('url', 'method', 'params', 'referer')
        ]

        sql_c = "SELECT COUNT(1) as `c` FROM %s" % (URL_TABLE)
        # sql_c += " WHERE `task_id`='%s' and `url`='%s' and `method`='%s' and `params`='%s'"
        # sql_c = sql_c % (conf.taskid, data[0], data[1], data[2])
        sql_c += " WHERE `task_id`=%s and `url`=%s and `method`=%s and `params`=%s"
        if db.get(sql_c, conf.taskid, data[0], data[1], data[2]).c > 0:
            return
        sql = "INSERT INTO %s" % (URL_TABLE)
        sql += "(`task_id`,`url`,`method`,`params`,`referer`,`start_time`) VALUES(%s,%s,%s,%s,%s,%s)"
        data.append(datetime.now())
        return db.execute(sql, conf.taskid, *data)
    except Exception:
        ERROR("Crawler.pipeline Exception")
Пример #12
0
def request(url, **kwargs):
    """
    quick start: 
        http://blog.csdn.net/iloveyin/article/details/21444613
        http://www.zhidaow.com/post/python-requests-install-and-brief-introduction
    :param method: method for the new :class:`Request` object.
    :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`.
    :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
    :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
    :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
    :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload.
    :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
    :param timeout: (optional) Float describing the timeout of the request in seconds.
    :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.
    :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
    :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided.
    :param stream: (optional) if ``False``, the response content will be immediately downloaded.
    :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair.
    :param encode: (optional),if none,return unicode,else return encode str
    """
    def checkCharset(response):
        if response.encoding == "ISO-8859-1":  #requests default header encoding
            encoding = requests.utils.get_encodings_from_content(
                response.content)
            if encoding:
                response.encoding = encoding[0]
        return response

    kwargs.setdefault('headers', {})
    kwargs.setdefault('timeout', conf.timeout)
    method = kwargs.pop('method') if kwargs.has_key(
        'method') else DEFAULT_METHOD
    decode = kwargs.pop('decode') if kwargs.has_key('decode') else None
    if conf.cookie:
        kwargs.setdefault('cookies', conf.cookie)
    if method.upper() in ("GET", "POST", "options"):
        kwargs.setdefault('allow_redirects', True)
    else:
        kwargs.setdefault('allow_redirects', False)

    # key = cacheFileName(url,method,kwargs)
    # exist = os.path.isfile(key)
    # try:
    #     if exist:
    #         DEBUG("%s in cache" % url)
    #         res = FileResponse(key,url,method,**kwargs).load()
    #         if res:
    #             return checkCharset(res)
    # except IOError,e:
    #     ERROR("cache file read exception,url:%s,method:%s,kwargs:%s" %(url,method,str(kwargs)))

    h = [k.title() for k in kwargs['headers'].iterkeys()]
    kwargs['headers'].update(
        dict([(k, v) for k, v in HEADERS.iteritems() if k not in h]))
    try:
        response = requests.request(method, url, **kwargs)
        # try:
        #     FileResponse(key,url,method,**kwargs).store(response)
        # except IOError,e:
        #     ERROR("cache file write exception,url:%s,method:%s,kwargs:%s" %(url,method,str(kwargs)))
        response = checkCharset(response)
        if decode:
            _ = response.text
            assert isinstance(_, unicode)
            try:
                _e = _.encode(decode)
            except UnicodeEncodeError:
                ERROR("encodePage error,charset:%s,url:%s" %
                      (response.encoding, url))
                _e = _.encode(decode, 'replace')
            response.text_encoded = _e
        return response
    except Exception, e:
        ERROR("request exception,url:" + url)
Пример #13
0
def update_end_time(task_id):
    sql = "UPDATE task SET `end_time`=%s WHERE id=%s"
    try:
        db.execute(sql, datetime.now(), task_id)
    except Exception:
        ERROR('update_end_time failed,task_id:%s,please check' % task_id)
Пример #14
0
def update_task_status(task_id):
    sql = "UPDATE task SET `status`=3 WHERE id=%s" % task_id
    try:
        db.execute(sql)
    except Exception:
        ERROR('update_task_status failed,task_id:%s,please check' % task_id)
Пример #15
0
def set_unreachable_flag(task_id):
    sql = "UPDATE task SET `reachable`=0 WHERE id=%s" % task_id
    try:
        db.execute(sql)
    except Exception:
        ERROR('set_unreachable failed,task_id:%s,please check' % task_id)