Пример #1
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it. 
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, 'html', self.descend)(
            self.base_uri, self.test_uri, self.req_hdrs, lang,
            self.output, allow_save=test_id, is_saved=False,
            test_id=test_id, descend=self.descend
        )

        self.response_start(
            "200", "OK", [
            ("Content-Type", "%s; charset=%s" % (
                formatter.media_type, charset)), 
            ("Cache-Control", "max-age=60, must-revalidate")
        ])
        
        ired = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend
        )
#        sys.stdout.write(pickle.dumps(ired))
        formatter.start_output()

        def done():
            if self.check_type:
            # TODO: catch errors
                state = ired.subreqs.get(self.check_type, None)
            else:
                state = ired
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(ired, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
#            objgraph.show_growth()        
        ired.run(done)
Пример #2
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix='', dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, 'html', self.descend)(
            self.base_uri, self.test_uri, self.req_hdrs, lang,
            self.output, allow_save=test_id, is_saved=False,
            check_type=self.check_type, test_id=test_id, descend=self.descend
        )

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == 'referer':
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in referer_spam_domains:
            referer_error = "Referer now allowed."
        if referer_error:
            self.response_start(
                "403", "Forbidden", [
                ("Content-Type", "%s; charset=%s" % (
                    formatter.media_type, charset)),
                ("Cache-Control", "max-age=360, must-revalidate")
            ])
            formatter.start_output()
            self.output(error_template % referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                "502", "Gateway Error", [
                ("Content-Type", "%s; charset=%s" % (
                    formatter.media_type, charset)),
                ("Cache-Control", "max-age=60, must-revalidate")
            ])
            formatter.start_output()
            self.output(error_template % "Forbidden by robots.txt.")
            self.response_done([])
            return

        self.response_start(
            "200", "OK", [
            ("Content-Type", "%s; charset=%s" % (
                formatter.media_type, charset)),
            ("Cache-Control", "max-age=60, must-revalidate")
        ])

        resource = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend
        )
#        sys.stdout.write(pickle.dumps(resource))
        formatter.start_output()

        def done():
            if self.check_type:
                state = resource.subreqs.get(self.check_type, resource)
            else:
                state = resource
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, 'w')
                    pickle.dump(resource, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass # we don't cry if we can't store it.
#            objgraph.show_growth()
            ti = sum([i.transfer_in for i,t in resource.linked], resource.transfer_in)
            to = sum([i.transfer_out for i,t in resource.linked], resource.transfer_out)
            if ti + to > log_traffic:
                sys.stderr.write("%iK in %iK out for <%s> (descend %s)" % (
                    ti / 1024,
                    to / 1024,
                    self.test_uri,
                    str(self.descend)
                ))

        resource.run(done)
Пример #3
0
    def run_test(self):
        """Test a URI."""
        if save_dir and os.path.exists(save_dir):
            try:
                fd, path = tempfile.mkstemp(prefix="", dir=save_dir)
                test_id = os.path.split(path)[1]
            except (OSError, IOError):
                # Don't try to store it.
                test_id = None
        else:
            test_id = None

        formatter = find_formatter(self.format, "html", self.descend)(
            self.base_uri,
            self.test_uri,
            self.req_hdrs,
            lang,
            self.output,
            allow_save=test_id,
            is_saved=False,
            test_id=test_id,
            descend=self.descend,
        )

        referers = []
        for hdr, value in self.req_hdrs:
            if hdr.lower() == "referer":
                referers.append(value)
        referer_error = None
        if len(referers) > 1:
            referer_error = "Multiple referers not allowed."
        if referers and urlsplit(referers[0]).hostname in referer_spam_domains:
            referer_error = "Referer now allowed."
        if referer_error:
            self.response_start(
                "403",
                "Forbidden",
                [
                    ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                    ("Cache-Control", "max-age=360, must-revalidate"),
                ],
            )
            formatter.start_output()
            self.output(error_template % referer_error)
            self.response_done([])
            return

        if not self.robots_precheck(self.test_uri):
            self.response_start(
                "502",
                "Gateway Error",
                [
                    ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                    ("Cache-Control", "max-age=60, must-revalidate"),
                ],
            )
            formatter.start_output()
            self.output(error_template % "Forbidden by robots.txt.")
            self.response_done([])
            return

        self.response_start(
            "200",
            "OK",
            [
                ("Content-Type", "%s; charset=%s" % (formatter.media_type, charset)),
                ("Cache-Control", "max-age=60, must-revalidate"),
            ],
        )

        ired = HttpResource(
            self.test_uri,
            req_hdrs=self.req_hdrs,
            status_cb=formatter.status,
            body_procs=[formatter.feed],
            descend=self.descend,
        )
        #        sys.stdout.write(pickle.dumps(ired))
        formatter.start_output()

        def done():
            if self.check_type:
                # TODO: catch errors
                state = ired.subreqs.get(self.check_type, None)
            else:
                state = ired
            formatter.set_state(state)
            formatter.finish_output()
            self.response_done([])
            if test_id:
                try:
                    tmp_file = gzip.open(path, "w")
                    pickle.dump(ired, tmp_file)
                    tmp_file.close()
                except (IOError, zlib.error, pickle.PickleError):
                    pass  # we don't cry if we can't store it.

        #            objgraph.show_growth()
        ired.run(done)