示例#1
0
    def process_redirect(self, response):
        if response.status_code == 302:
            if 'location' not in response.headers:
                raise UnexpectedNoResult()

            url = urlparse.urlparse(response.headers['location'])

            if url.scheme != "http" or url.netloc != "bit.ly" or url.path != "/a/warning":
                raise UnexpectedNoResult("Unexpected Location header after HTTP status 302")

            if sys.version_info[0] == 2:
                query = urlparse.parse_qs(url.query.encode('latin-1'))
            else:
                query = urlparse.parse_qs(url.query)

            if not ("url" in query and len(query["url"]) == 1) or not ("hash" in query and len(query["hash"]) == 1):
                raise UnexpectedNoResult("Unexpected Location header after HTTP status 302")
            if query["hash"][0] != self.current_shortcode:
                raise UnexpectedNoResult("Hash mismatch for HTTP status 302")

            if sys.version_info[0] == 2:
                unshortened_url = query["url"][0].decode('latin-1')
            else:
                unshortened_url = query["url"][0]

            return (URLStatus.ok, unshortened_url, None)

        else:
            return BaseService.process_redirect(self, response)
示例#2
0
    def _parse_errorhelp(self, response):
        match = re.search('<meta http-equiv="refresh" content="0;url=(.*?)">',
                          response.text)

        if not match:
            raise UnexpectedNoResult(
                "No redirect on \"errorhelp\" page on HTTP status 200 for {0}".
                format(response.url))

        url = urlparse.urlparse(match.group(1))

        if url.scheme != "http" or url.netloc != "tinyurl.com" or url.path != "/errorb.php":
            raise UnexpectedNoResult(
                "Unexpected redirect on \"errorhelp\" page  on HTTP status 200 for {0}"
                .format(response.url))

        if sys.version_info[0] == 2:
            query = urlparse.parse_qs(url.query.encode('utf-8'))
        else:
            query = urlparse.parse_qs(url.query)

        if not ("url" in query and len(query["url"])
                == 1) or not ("path" in query and len(query["path"]) == 1):
            raise UnexpectedNoResult(
                "Unexpected redirect on \"errorhelp\" page  on HTTP status 200 for {0}"
                .format(response.url))

        if query["path"][0] != ("/" + self.current_shortcode):
            raise UnexpectedNoResult(
                "Code mismatch on \"errorhelp\" on HTTP status 200")

        encoding = response.encoding

        if sys.version_info[0] == 2:
            try:
                result_url = query["url"][0].decode('utf-8')
            except UnicodeError:
                try:
                    result_url = query["url"][0].decode('cp1252')
                    encoding = 'cp1252'
                except UnicodeError:
                    result_url = query["url"][0].decode('latin-1')
                    encoding = 'latin-1'
        else:
            result_url = query["url"][0]

        return (URLStatus.ok, result_url, encoding)
示例#3
0
    def _scrub_url(self, code, url):
        parsed_url = urlparse.urlparse(url)

        if parsed_url.hostname == "redirect.tinyurl.com" and parsed_url.path == "/api/click":
            if sys.version_info[0] == 2:
                query = urlparse.parse_qs(parsed_url.query.encode('latin-1'))
            else:
                query = urlparse.parse_qs(parsed_url.query, encoding='latin-1')

            if query["out"]:
                if sys.version_info[0] == 2:
                    scrubbed_url = query["out"][0].decode('latin-1')
                else:
                    scrubbed_url = query["out"][0]

                return (URLStatus.ok, scrubbed_url, 'latin-1')

        return (URLStatus.ok, url, 'latin-1')
示例#4
0
    def _scrub_url(self, code, url):
        parsed_url = urlparse.urlparse(url)

        if parsed_url.hostname == "redirect.tinyurl.com" and parsed_url.path == "/api/click":
            if sys.version_info[0] == 2:
                query = urlparse.parse_qs(parsed_url.query.encode('latin-1'))
            else:
                query = urlparse.parse_qs(parsed_url.query, encoding='latin-1')

            if query["out"]:
                if sys.version_info[0] == 2:
                    scrubbed_url = query["out"][0].decode('latin-1')
                else:
                    scrubbed_url = query["out"][0]

                return (URLStatus.ok, scrubbed_url, 'latin-1')

        return (URLStatus.ok, url, 'latin-1')
示例#5
0
    def _parse_errorhelp(self, response):
        match = re.search('<meta http-equiv="refresh" content="0;url=(.*?)">', response.text)

        if not match:
            raise UnexpectedNoResult("No redirect on \"errorhelp\" page on HTTP status 200 for {0}".format(response.url))

        url = urlparse.urlparse(match.group(1))

        if url.scheme != "http" or url.netloc != "tinyurl.com" or url.path != "/errorb.php":
            raise UnexpectedNoResult("Unexpected redirect on \"errorhelp\" page  on HTTP status 200 for {0}".format(response.url))

        if sys.version_info[0] == 2:
            query = urlparse.parse_qs(url.query.encode('utf-8'))
        else:
            query = urlparse.parse_qs(url.query)

        if not ("url" in query and len(query["url"]) == 1) or not ("path" in query and len(query["path"]) == 1):
            raise UnexpectedNoResult("Unexpected redirect on \"errorhelp\" page  on HTTP status 200 for {0}".format(response.url))

        if query["path"][0] != ("/" + self.current_shortcode):
            raise UnexpectedNoResult("Code mismatch on \"errorhelp\" on HTTP status 200")

        encoding = response.encoding

        if sys.version_info[0] == 2:
            try:
                result_url = query["url"][0].decode('utf-8')
            except UnicodeError:
                try:
                    result_url = query["url"][0].decode('cp1252')
                    encoding = 'cp1252'
                except UnicodeError:
                    result_url = query["url"][0].decode('latin-1')
                    encoding = 'latin-1'
        else:
            result_url = query["url"][0]

        return (URLStatus.ok, result_url, encoding)