Exemplo n.º 1
0
Arquivo: http.py Projeto: WKPlus/crab
 def check(self, page):
     if self.is_status_expected(page.status):
         return True, None
     else:
         GLog.warning("Status code[%s] not expected for url: %s",
                      page.status, page.url)
         return False, "Unexpected status code %s" % page.status
Exemplo n.º 2
0
Arquivo: http.py Projeto: WKPlus/scod
    def check_bak(self, url, port=None):
        #extract protocol and address from the url
        if len(url.split(":")) > 1:
            protocol = url.split(":")[0]
            addr = url.split(":")[1].strip("/")
        else:
            protocol = "http"
            addr = url

        #extract host and path from the address
        host = addr.split("/", 1)[0]
        path = addr.split("/", 1)[1] if len(
            addr.split("/", 1)) > 1 else ""

        #send http request and get response
        #因为有些url访问的时候path必须以"/"开始才能访问,有些必须不以"/"开始
        #才可以访问,因此需要测试两个路径(以"/"开始和不以"/"开始)
        #如果全部fail才标志为fail
        #print url, path
        status = get_status_code(protocol, host, port, path)
        if status is None or not self.is_status_expected(status):
            status = get_status_code(protocol, host, port, "/" + path)
            if status is None or not self.is_status_expected(status):
                GLog.warning("Status code[%s] not expected for url: %s",
                             status, url)
                return 1
        return 0
Exemplo n.º 3
0
Arquivo: http.py Projeto: WKPlus/scod
 def check(self, url):
     status = utils.get_status_code(url)
     if not self.is_status_expected(status):
         GLog.warning("Status code[%s] not expected for url: %s",
                         status, url)
         return 1
     return 0
Exemplo n.º 4
0
Arquivo: utils.py Projeto: WKPlus/crab
def read_list_from_file(file_name):
    if os.path.isfile(file_name):
        with open(file_name, "r") as in_fd:
            return [line.strip() for line in in_fd]
    else:
        GLog.warning("file[%s] not existed!", file_name)
        return []
Exemplo n.º 5
0
Arquivo: html.py Projeto: WKPlus/scod
 def check(self, url):
     content = utils.read_content(url)
     parser = HTMLTagParser()
     parser.feed(content)
     unmatch_tags = parser.close()
     for t in unmatch_tags:
         GLog.warning(
             "Broken %s tag[%s] in pos[%s,%s] in url: %s",
             "start" if t.start else "end",
             t.name,
             t.pos[0],
             t.pos[1],
             url,
         )
     if len(unmatch_tags) != 0:
         return 1
     else:
         return 0
Exemplo n.º 6
0
    def check_url(self, url):
        #使用所有的checker检查某个url
        parent = url.parent if isinstance(url, URL) else None
        url = url.url if isinstance(url, URL) else url
        GLog.debug("Checking url: %s", url)

        failed_checker = 0
        for checker in self.checkers:
            failed_checker += checker.check(url)
        #如果所有checker检查返回0(正常),直接返回0
        if failed_checker == 0:
            return 0
        #有checker检查失败了
        if not self.is_white_url(url):
            GLog.fatal("URL check failed: %s, parent url: %s", url, parent)
            return 1
        else:
            GLog.warning("URL check failed: %s, parent url: %s", url, parent)
            return 0
Exemplo n.º 7
0
    def check_by_blacklist(self, url):
        links = self.get_url_list(url)
        #count represent fail number
        count = 0
        for link in links:
            #获取url当中的host
            host = self.get_host_from_url(link)

            for domain in self.blacklist:
                if host.endswith(domain):
                    count += 1
                    self.err_msg += 'Domain in blacklist [%s]\n' % link
                    GLog.warning("Domain in blacklist [%s], parent url:%s",
                                 link, url)
                    break
        #count==0 represent no wrong
        if count == 0:
            return 0
        else:
            return 1
Exemplo n.º 8
0
 def check_by_whitelist(self, url):
     links = self.get_url_list(url)
     #count represent fail number
     count = 0
     for link in links:
         #获取url当中的host
         host = self.get_host_from_url(link)
         #flag=True represent domain is ok
         flag = False
         for domain in self.whitelist:
             if host.endswith(domain):
                 flag = True
                 break
         if not flag:
             count += 1
             self.err_msg += 'Domain not in whitelist [%s]\n' % link
             GLog.warning("Domain not in whitelist [%s], parent url: %s",
                          link, url)
     #count==0 represent no wrong
     if count == 0:
         return 0
     else:
         return 1
Exemplo n.º 9
0
Arquivo: utils.py Projeto: WKPlus/crab
def run_cmd(cmd):
    ret, output = commands.getstatusoutput(cmd)
    if ret != 0:
        GLog.warning("Run cmd[%s] failed! Error msg: %s", cmd, output)
    return ret, output