Пример #1
0
    def __init__(self,
                 config: SectionProxy,
                 method: str,
                 query_string: bytes,
                 response_start: Callable[..., None],
                 response_body: Callable[..., None],
                 response_done: Callable[..., None],
                 error_log: Callable[[str], int] = sys.stderr.write) -> None:
        self.config = config  # type: SectionProxy
        self.charset_bytes = self.config['charset'].encode('ascii')
        self.method = method  # Request method to the UX; bytes
        self.response_start = response_start
        self.response_body = response_body
        self._response_done = response_done
        self.error_log = error_log  # function to log errors to
        self.test_uri = None  # type: str
        self.req_hdrs = None  # type: StrHeaderListType
        self.format = None  # type: str
        self.test_id = None  # type: str
        self.check_name = None  # type: str
        self.descend = None  # type: bool
        self.save = None  # type: bool
        self.timeout = None  # type: Any
        self.referer_spam_domains = []  # type: List[str]
        if config.get("limit_origin_tests", ""):
            if self._origin_period == None:
                self._origin_period = config.getfloat("limit_origin_period",
                                                      fallback=1) * 3600
                thor.schedule(self._origin_period, self.ratelimit_cleanup)

        if config.get("referer_spam_domains", ""):
            self.referer_spam_domains = [i.strip() for i in \
                config["referer_spam_domains"].split()]
        self.run(query_string)
Пример #2
0
 def show_task_map(self, watch: bool = False) -> Union[str, None]:
     """
     Show the task map for debugging.
     """
     if self._task_map and watch:
         sys.stderr.write("* %s - %s\n" % (self, self._task_map))
         thor.schedule(5, self.show_task_map)
         return None
     return repr(self._task_map)
Пример #3
0
 def show_task_map(self, watch: bool = False) -> Union[str, None]:
     """
     Show the task map for debugging.
     """
     if self._task_map and watch:
         sys.stderr.write("* %s - %s\n" % (self, self._task_map))
         thor.schedule(5, self.show_task_map)
         return None
     return repr(self._task_map)
Пример #4
0
 def __init__(self, inp, handle_result, processor, *proc_args):
     self.input = inp
     self.handle_result = handle_result
     self.processor = processor
     self.proc_args = proc_args
     self.running = True
     self.outstanding = 0
     self.line_num = 0
     schedule(0, self.schedule_lines)
     run()
Пример #5
0
 def _load_checker(self, origin: str, robots_txt: bytes) -> None:
     """Load a checker for an origin, given its robots.txt file."""
     if robots_txt == "": # empty or non-200
         checker = DummyChecker() # type: RobotChecker
     else:
         checker = RobotFileParser()
         checker.parse(robots_txt.decode('ascii', 'replace').splitlines())
     self.robot_checkers[origin] = checker
     def del_checker() -> None:
         try:
             del self.robot_checkers[origin]
         except:
             pass
     thor.schedule(self.freshness_lifetime, del_checker)
Пример #6
0
 def __init__(self) -> None:
     # Set up the watchdog
     if os.environ.get("SYSTEMD_WATCHDOG"):
         thor.schedule(self.watchdog_freq, self.watchdog_ping)
         signal.signal(signal.SIGABRT, self.abrt_handler)
     # Set up the server
     server = thor.http.HttpServer(config.get("host", ""),
                                   int(config["port"]))
     server.on("exchange", RedHandler)
     try:
         thor.run()
     except KeyboardInterrupt:
         sys.stderr.write("Stopping...\n")
         thor.stop()
Пример #7
0
    def _load_checker(self, origin: str, robots_txt: bytes) -> None:
        """Load a checker for an origin, given its robots.txt file."""
        if robots_txt == "":  # empty or non-200
            checker = DummyChecker()  # type: RobotChecker
        else:
            checker = RobotFileParser()
            checker.parse(robots_txt.decode('ascii', 'replace').splitlines())
        self.robot_checkers[origin] = checker

        def del_checker() -> None:
            try:
                del self.robot_checkers[origin]
            except:
                pass

        thor.schedule(self.freshness_lifetime, del_checker)
Пример #8
0
 def __init__(self, base_uri, method, query_string, 
   response_start, response_body, response_done):
     self.base_uri = base_uri
     self.method = method
     self.response_start = response_start
     self.response_body = response_body
     self._response_done = response_done
     
     self.test_uri = None
     self.req_hdrs = None # tuple of unicode K,V
     self.format = None
     self.test_id = None
     self.check_type = None
     self.descend = None
     self.save = None
     self.parse_qs(method, query_string)
     
     self.start = time.time()
     self.timeout = thor.schedule(max_runtime, self.timeoutError)
     if self.save and save_dir and self.test_id:
         self.save_test()
     elif self.test_id:
         self.load_saved_test()
     elif self.test_uri:
         self.run_test()
     else:
         self.show_default()
Пример #9
0
    def __init__(self, base_uri, method, query_string, response_start,
                 response_body, response_done):
        self.base_uri = base_uri
        self.method = method
        self.response_start = response_start
        self.response_body = response_body
        self._response_done = response_done

        self.test_uri = None
        self.req_hdrs = None
        self.format = None
        self.test_id = None
        self.descend = None
        self.save = None
        self.parse_qs(method, query_string)

        self.start = time.time()
        self.timeout = thor.schedule(max_runtime, self.timeoutError)
        if self.save and save_dir and self.test_id:
            self.save_test()
        elif self.test_id:
            self.load_saved_test()
        elif self.test_uri:
            self.run_test()
        else:
            self.show_default()
Пример #10
0
    def __init__(self,
                 config: SectionProxy,
                 method: str,
                 query_string: bytes,
                 req_headers: RawHeaderListType,
                 response_start: Callable[..., None],
                 response_body: Callable[..., None],
                 response_done: Callable[..., None],
                 error_log: Callable[[str], int] = sys.stderr.write) -> None:
        self.config = config  # type: SectionProxy
        self.charset_bytes = self.config['charset'].encode('ascii')
        self.method = method
        self.req_headers = req_headers
        self.response_start = response_start
        self.response_body = response_body
        self._response_done = response_done
        self.error_log = error_log  # function to log errors to
        self.test_uri = None   # type: str
        self.test_id = None    # type: str
        self.robot_time = None # type: str
        self.robot_hmac = None # type: str
        self.req_hdrs = None   # type: StrHeaderListType
        self.format = None     # type: str
        self.check_name = None # type: str
        self.descend = None    # type: bool
        self.save = None       # type: bool
        self.save_path = None  # type: str
        self.timeout = None    # type: Any
        self.referer_spam_domains = [] # type: List[str]

        if config.get("limit_client_tests", fallback=""):
            if self._client_period is None:
                self._client_period = config.getfloat("limit_client_period", fallback=1) * 3600
                thor.schedule(self._client_period, self.client_ratelimit_cleanup)

        if config.get("limit_origin_tests", fallback=""):
            if self._origin_period is None:
                self._origin_period = config.getfloat("limit_origin_period", fallback=1) * 3600
                thor.schedule(self._origin_period, self.origin_ratelimit_cleanup)

        if config.get("referer_spam_domains", fallback=""):
            self.referer_spam_domains = [i.strip() for i in \
                config["referer_spam_domains"].split()]

        self.run(query_string)
Пример #11
0
 def __init__(self, config: SectionProxy) -> None:
     self.config = config
     self.static_files = {}    # type: Dict[bytes, bytes]
     # Load static files
     self.walk_files(config['asset_dir'], b"static/")
     if config.get('extra_base_dir'):
         self.walk_files(config['extra_base_dir'])
     # Set up the watchdog
     if os.environ.get("SYSTEMD_WATCHDOG"):
         thor.schedule(self.watchdog_freq, self.watchdog_ping)
     # Set up the server
     server = thor.http.HttpServer(config.get('host', ''), int(config['port']))
     server.on('exchange', self.red_handler)
     try:
         thor.run()
     except KeyboardInterrupt:
         sys.stderr.write("Stopping...\n")
         thor.stop()
Пример #12
0
 def __init__(self, config: SectionProxy) -> None:
     self.config = config
     self.static_files = {}  # type: Dict[bytes, bytes]
     # Load static files
     self.walk_files(config['asset_dir'], b"static/")
     if config.get('extra_base_dir'):
         self.walk_files(config['extra_base_dir'])
     # Set up the watchdog
     if os.environ.get("SYSTEMD_WATCHDOG"):
         thor.schedule(self.watchdog_freq, self.watchdog_ping)
     # Set up the server
     server = thor.http.HttpServer(config.get('host', ''),
                                   int(config['port']))
     server.on('exchange', self.red_handler)
     try:
         thor.run()
     except KeyboardInterrupt:
         sys.stderr.write("Stopping...\n")
         thor.stop()
Пример #13
0
 def schedule_lines(self):
     """
     Schedule some lines to run, respecting self.rate lines per second.
     """
     unreserved = self.max_outstanding - self.outstanding
     for i in range(unreserved):
         if self.running:
             line = self.input.readline()
             self.line_num += 1
             if line == "":
                 sys.stderr.write("* Input finished.\n")
                 self.running = False
                 break
             else:
                 self.parse_line(line)
             if self.line_num % self.notify == 0:
                 sys.stderr.write("* %s processed\n" % (self.line_num))
     if self.running:
         schedule(0.1, self.schedule_lines)
Пример #14
0
 def connect(self, conn):
     self.conn = conn
     self.start = time()
     self.latency = self.start - self.conn_start
     self.timeouts.append(
         schedule(self.wait_timeout, self.report, "TIMEOUT", "wait")
     )
     conn.on("data", self.data)
     conn.on("close", self.close)
     conn.write(self.magic)
     conn.pause(False)
Пример #15
0
    def __init__(self, config: SectionProxy) -> None:
        self.config = config
        self.handler = partial(RedHandler, server=self)

        # Set up the watchdog
        if os.environ.get("SYSTEMD_WATCHDOG"):
            thor.schedule(self.watchdog_freq, self.watchdog_ping)
            signal.signal(signal.SIGABRT, self.abrt_handler)

        # Read static files
        self.static_files = self.walk_files(self.config["asset_dir"], b"static/")
        if self.config.get("extra_base_dir"):
            self.static_files.update(self.walk_files(self.config["extra_base_dir"]))

        # Set up the server
        server = thor.http.HttpServer(
            self.config.get("host", ""), int(self.config["port"])
        )
        server.on("exchange", self.handler)
        try:
            thor.run()
        except KeyboardInterrupt:
            sys.stderr.write("Stopping...\n")
            thor.stop()
Пример #16
0
def slack_run(webui: "RedWebUi") -> None:
    """Handle a slack request."""
    slack_response_uri = webui.body_args.get("response_url", [""])[0].strip()
    formatter = slack.SlackFormatter(webui.config,
                                     None,
                                     webui.output,
                                     slack_uri=slack_response_uri)
    webui.test_uri = webui.body_args.get("text", [""])[0].strip()

    webui.exchange.response_start(
        b"200",
        b"OK",
        [
            (b"Content-Type", formatter.content_type()),
            (b"Cache-Control", b"max-age=300"),
        ],
    )
    webui.output(
        json.dumps({
            "response_type": "ephemeral",
            "text": f"_Checking_ {webui.test_uri} _..._",
        }))
    webui.exchange.response_done([])

    top_resource = HttpResource(webui.config)
    top_resource.set_request(webui.test_uri, req_hdrs=webui.req_hdrs)
    formatter.bind_resource(top_resource)
    if not verify_slack_secret(webui):
        webui.error_response(
            formatter,
            b"403",
            b"Forbidden",
            "Incorrect Slack Authentication.",
            "Bad slack token.",
        )
        return
    webui.timeout = thor.schedule(int(webui.config["max_runtime"]),
                                  formatter.timeout)

    @thor.events.on(formatter)
    def formatter_done() -> None:
        if webui.timeout:
            webui.timeout.delete()
            webui.timeout = None
        save_test(webui, top_resource)

    top_resource.check()
Пример #17
0
 def watchdog_ping(self) -> None:
     notify(Notification.WATCHDOG)
     thor.schedule(self.watchdog_freq, self.watchdog_ping)
Пример #18
0
 def watchdog_ping(self) -> None:
     notify(Notification.WATCHDOG)
     thor.schedule(self.watchdog_freq, self.watchdog_ping)
Пример #19
0
 def check_done() -> None:
     thor.schedule(0.1, self._done)
Пример #20
0
 def origin_ratelimit_cleanup(self) -> None:
     """
     Clean up origin ratelimit counters.
     """
     self._origin_counts.clear()
     thor.schedule(self._client_period, self.origin_ratelimit_cleanup)
Пример #21
0
 def __init__(self, host, port, loop=None):
     EventEmitter.__init__(self)
     self.tcp_server = self.tcp_server_class(host, port, loop=loop)
     self.tcp_server.on('connect', self.handle_conn)
     schedule(0, self.emit, 'start')
Пример #22
0
 def response_done(trailers: RawHeaderListType) -> None:
     thor.schedule(0, thor.stop)
Пример #23
0
    def run_test(self) -> None:
        """Test a URI."""
        # try to initialise stored test results
        if self.config.get("save_dir", "") and os.path.exists(
                self.config["save_dir"]):
            try:
                fd, self.save_path = tempfile.mkstemp(
                    prefix="", dir=self.config["save_dir"])
                self.test_id = os.path.split(self.save_path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                self.test_id = None  # should already be None, but make sure

        top_resource = HttpResource(self.config, descend=self.descend)
        self.timeout = thor.schedule(
            int(self.config["max_runtime"]),
            self.timeoutError,
            top_resource.show_task_map,
        )
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, "html", self.descend)(
            self.config,
            self.output,
            allow_save=self.test_id,
            is_saved=False,
            test_id=self.test_id,
            descend=self.descend,
        )

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(
                referers[0]).hostname in self.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(
                b"403",
                b"Forbidden",
                [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"max-age=360, must-revalidate"),
                ],
            )
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        # robot human check
        if self.robot_time and self.robot_time.isdigit() and self.robot_hmac:
            valid_till = int(self.robot_time)
            computed_hmac = hmac.new(self._robot_secret,
                                     bytes(self.robot_time, "ascii"))
            is_valid = self.robot_hmac == computed_hmac.hexdigest()
            if is_valid and valid_till >= thor.time():
                self.continue_test(top_resource, formatter)
                return
            else:
                self.response_start(
                    b"403",
                    b"Forbidden",
                    [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate"),
                    ],
                )
                formatter.start_output()
                formatter.error_output("Naughty.")
                self.response_done([])
                self.error_log("Naughty robot key.")

        # enforce client limits
        if self.config.getint("limit_client_tests", fallback=0):
            client_id = self.get_client_id()
            if client_id:
                if self._client_counts.get(
                        client_id,
                        0) > self.config.getint("limit_client_tests"):
                    self.response_start(
                        b"429",
                        b"Too Many Requests",
                        [
                            (b"Content-Type", formatter.content_type()),
                            (b"Cache-Control", b"max-age=60, must-revalidate"),
                        ],
                    )
                    formatter.start_output()
                    formatter.error_output(
                        "Your client is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("client over limit: %s" %
                                   client_id.decode("idna"))
                    return
                self._client_counts[client_id] += 1

        # enforce origin limits
        if self.config.getint("limit_origin_tests", fallback=0):
            origin = url_to_origin(self.test_uri)
            if origin:
                if self._origin_counts.get(
                        origin, 0) > self.config.getint("limit_origin_tests"):
                    self.response_start(
                        b"429",
                        b"Too Many Requests",
                        [
                            (b"Content-Type", formatter.content_type()),
                            (b"Cache-Control", b"max-age=60, must-revalidate"),
                        ],
                    )
                    formatter.start_output()
                    formatter.error_output(
                        "Origin is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("origin over limit: %s" % origin)
                    return
                self._origin_counts[origin] += 1

        # check robots.txt
        robot_fetcher = RobotFetcher(self.config)

        @thor.events.on(robot_fetcher)
        def robot(results: Tuple[str, bool]) -> None:
            url, robot_ok = results
            if robot_ok:
                self.continue_test(top_resource, formatter)
            else:
                valid_till = str(int(thor.time()) + 60)
                robot_hmac = hmac.new(self._robot_secret,
                                      bytes(valid_till, "ascii"))
                self.response_start(
                    b"403",
                    b"Forbidden",
                    [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"no-cache"),
                    ],
                )
                formatter.start_output()
                formatter.error_output(
                    "This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>."
                    % (self.test_uri, valid_till, robot_hmac.hexdigest()))
                self.response_done([])

        robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
Пример #24
0
 def ratelimit_cleanup(self) -> None:
     """
     Clean up ratelimit counters.
     """
     self._origin_counts.clear()
     thor.schedule(self._origin_period, self.ratelimit_cleanup)
Пример #25
0
 def __init__(self, host: bytes, port: int, loop: LoopBase=None) -> None:
     EventEmitter.__init__(self)
     self.tcp_server = self.tcp_server_class(host, port, loop=loop)
     self.tcp_server.on('connect', self.handle_conn)
     schedule(0, self.emit, 'start')
Пример #26
0
    def run_test(self) -> None:
        """Test a URI."""
        if self.config.save_dir and os.path.exists(self.config.save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='',
                                            dir=self.config.save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        top_resource = HttpResource(descend=self.descend)
        self.timeout = thor.schedule(self.config.max_runtime,
                                     self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html',
                                   self.descend)(self.ui_uri,
                                                 self.config.lang,
                                                 self.output,
                                                 allow_save=test_id,
                                                 is_saved=False,
                                                 test_id=test_id,
                                                 descend=self.descend)
        content_type = "%s; charset=%s" % (formatter.media_type,
                                           self.config.charset)
        if self.check_name:
            display_resource = top_resource.subreqs.get(
                self.check_name, top_resource)
        else:
            display_resource = top_resource

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(
                referers[0]).hostname in self.config.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(
                b"403", b"Forbidden",
                [(b"Content-Type", content_type.encode('ascii')),
                 (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                b"502", b"Gateway Error",
                [(b"Content-Type", content_type.encode('ascii')),
                 (b"Cache-Control", b"max-age=60, must-revalidate")])
            formatter.start_output()
            formatter.error_output("Forbidden by robots.txt.")
            self.response_done([])
            return

        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(top_resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass  # we don't cry if we can't store it.
            ti = sum([i.transfer_in for i, t in top_resource.linked],
                     top_resource.transfer_in)
            to = sum([i.transfer_out for i, t in top_resource.linked],
                     top_resource.transfer_out)
            if ti + to > self.config.log_traffic:
                self.error_log("%iK in %iK out for <%s> (descend %s)" %
                               (ti / 1024, to / 1024, e_url(
                                   self.test_uri), str(self.descend)))

        self.response_start(
            b"200", b"OK",
            [(b"Content-Type", content_type.encode('ascii')),
             (b"Cache-Control", b"max-age=60, must-revalidate")])
        formatter.bind_resource(display_resource)
        top_resource.check()
Пример #27
0
 def response_done(trailers: RawHeaderListType) -> None:
     thor.schedule(0, thor.stop)
Пример #28
0
 def origin_ratelimit_cleanup(self) -> None:
     """
     Clean up origin ratelimit counters.
     """
     self._origin_counts.clear()
     thor.schedule(self._client_period, self.origin_ratelimit_cleanup)
Пример #29
0
    def run_test(self) -> None:
        """Test a URI."""
        if self.config.save_dir and os.path.exists(self.config.save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=self.config.save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        top_resource = HttpResource(descend=self.descend)
        self.timeout = thor.schedule(self.config.max_runtime, self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html', self.descend)(
            self.ui_uri, self.config.lang, self.output,
            allow_save=test_id, is_saved=False, test_id=test_id, descend=self.descend)
        content_type = "%s; charset=%s" % (formatter.media_type, self.config.charset)
        if self.check_name:
            display_resource = top_resource.subreqs.get(self.check_name, top_resource)
        else:
            display_resource = top_resource

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in self.config.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(b"403", b"Forbidden", [
                (b"Content-Type", content_type.encode('ascii')),
                (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(b"502", b"Gateway Error", [
                (b"Content-Type", content_type.encode('ascii')),
                (b"Cache-Control", b"max-age=60, must-revalidate")])
            formatter.start_output()
            formatter.error_output("Forbidden by robots.txt.")
            self.response_done([])
            return

        @thor.events.on(formatter)
        def formatter_done() -> None:
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(top_resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
            ti = sum([i.transfer_in for i, t in top_resource.linked], top_resource.transfer_in)
            to = sum([i.transfer_out for i, t in top_resource.linked], top_resource.transfer_out)
            if ti + to > self.config.log_traffic:
                self.error_log("%iK in %iK out for <%s> (descend %s)" % (
                    ti / 1024, to / 1024, e_url(self.test_uri), str(self.descend)))

        self.response_start(b"200", b"OK", [
            (b"Content-Type", content_type.encode('ascii')),
            (b"Cache-Control", b"max-age=60, must-revalidate")])
        formatter.bind_resource(display_resource)
        top_resource.check()
Пример #30
0
 def check_done() -> None:
     thor.schedule(0.1, self._done)
Пример #31
0
 def response_done(trailers):
     thor.schedule(0, thor.stop)
Пример #32
0
    def run_test(self) -> None:
        """Test a URI."""
        # try to initialise stored test results
        if self.config.get('save_dir', "") and os.path.exists(self.config['save_dir']):
            try:
                fd, self.save_path = tempfile.mkstemp(prefix='', dir=self.config['save_dir'])
                self.test_id = os.path.split(self.save_path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                self.test_id = None # should already be None, but make sure

        top_resource = HttpResource(self.config, descend=self.descend)
        self.timeout = thor.schedule(int(self.config['max_runtime']), self.timeoutError,
                                     top_resource.show_task_map)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, 'html', self.descend)(
            self.config, self.output, allow_save=self.test_id, is_saved=False,
            test_id=self.test_id, descend=self.descend)

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in self.referer_spam_domains:
            referer_error = "Referer not allowed."
        if referer_error:
            self.response_start(b"403", b"Forbidden", [
                (b"Content-Type", formatter.content_type()),
                (b"Cache-Control", b"max-age=360, must-revalidate")])
            formatter.start_output()
            formatter.error_output(referer_error)
            self.response_done([])
            return

        # robot human check
        if self.robot_time and self.robot_time.isdigit() and self.robot_hmac:
            valid_till = int(self.robot_time)
            computed_hmac = hmac.new(self._robot_secret, bytes(self.robot_time, 'ascii'))
            is_valid = self.robot_hmac == computed_hmac.hexdigest()
            if is_valid and valid_till >= thor.time():
                self.continue_test(top_resource, formatter)
                return
            else:
                self.response_start(b"403", b"Forbidden", [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"max-age=60, must-revalidate")])
                formatter.start_output()
                formatter.error_output("Naughty.")
                self.response_done([])
                self.error_log("Naughty robot key.")

        # enforce client limits
        if self.config.getint('limit_client_tests', fallback=0):
            client_id = self.get_client_id()
            if client_id:
                if self._client_counts.get(client_id, 0) > \
                  self.config.getint('limit_client_tests'):
                    self.response_start(b"429", b"Too Many Requests", [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate")])
                    formatter.start_output()
                    formatter.error_output("Your client is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("client over limit: %s" % client_id.decode('idna'))
                    return
                self._client_counts[client_id] += 1

        # enforce origin limits
        if self.config.getint('limit_origin_tests', fallback=0):
            origin = url_to_origin(self.test_uri)
            if origin:
                if self._origin_counts.get(origin, 0) > \
                  self.config.getint('limit_origin_tests'):
                    self.response_start(b"429", b"Too Many Requests", [
                        (b"Content-Type", formatter.content_type()),
                        (b"Cache-Control", b"max-age=60, must-revalidate")])
                    formatter.start_output()
                    formatter.error_output("Origin is over limit. Please try later.")
                    self.response_done([])
                    self.error_log("origin over limit: %s" % origin)
                    return
                self._origin_counts[origin] += 1

        # check robots.txt
        robot_fetcher = RobotFetcher(self.config)
        @thor.events.on(robot_fetcher)
        def robot(results: Tuple[str, bool]) -> None:
            url, robot_ok = results
            if robot_ok:
                self.continue_test(top_resource, formatter)
            else:
                valid_till = str(int(thor.time()) + 60)
                robot_hmac = hmac.new(self._robot_secret, bytes(valid_till, 'ascii'))
                self.response_start(b"403", b"Forbidden", [
                    (b"Content-Type", formatter.content_type()),
                    (b"Cache-Control", b"no-cache")])
                formatter.start_output()
                formatter.error_output("This site doesn't allow robots. If you are human, please <a href='?uri=%s&robot_time=%s&robot_hmac=%s'>click here</a>." % (self.test_uri, valid_till, robot_hmac.hexdigest()) )
                self.response_done([])

        robot_fetcher.check_robots(HttpRequest.iri_to_uri(self.test_uri))
Пример #33
0
 def response_done(trailers):
     thor.schedule(0, thor.stop)
Пример #34
0
    def run_test(self) -> None:
        """Test a URI."""
        self.test_id = init_save_file(self)
        top_resource = HttpResource(self.config, descend=self.descend)
        top_resource.set_request(self.test_uri, req_hdrs=self.req_hdrs)
        formatter = find_formatter(self.format, "html", self.descend)(
            self.config,
            top_resource,
            self.output,
            allow_save=self.test_id,
            is_saved=False,
            test_id=self.test_id,
            descend=self.descend,
        )
        continue_test = partial(self.continue_test, top_resource, formatter)
        error_response = partial(self.error_response, formatter)

        self.timeout = thor.schedule(
            int(self.config["max_runtime"]),
            self.timeoutError,
            top_resource.show_task_map,
        )

        # referer limiting
        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None

        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."

        referer_spam_domains = [
            i.strip() for i in self.config.get("referer_spam_domains",
                                               fallback="").split()
        ]
        if (referer_spam_domains and referers
                and urlsplit(referers[0]).hostname in referer_spam_domains):
            referer_error = "Referer not allowed."

        if referer_error:
            error_response(b"403", b"Forbidden", referer_error)
            return

        # enforce client limits
        try:
            ratelimiter.process(self, error_response)
        except ValueError:
            return  # over limit, don't continue.

        # hCaptcha
        if self.config.get("hcaptcha_sitekey", "") and self.config.get(
                "hcaptcha_secret", ""):
            CaptchaHandler(
                self,
                self.get_client_id(),
                continue_test,
                error_response,
            ).run()
        else:
            continue_test()