Exemplo n.º 1
0
    def process_unknown_code(self, response):
        if response.status_code != 200:
            return BaseService.process_unknown_code(self, response)

        url = self.params['url_template'].format(shortcode=self.current_shortcode)
        response = self.fetch_url(url, 'get')

        if response.status_code != 200:
            raise UnexpectedNoResult(
                "Didn't get OK on second try. Got {0} for {1}"
                .format(response.status_code, self.current_shortcode)
                )

        # Copied form tinyback. I don't think code will reach here anymore

        match = re.search(
            "<a class=\"btn ignore\" href=\"(.*?)\" title=",
            html_unescape(response.text)
        )

        if not match:
            raise UnexpectedNoResult(
                "Didn't get match on second try for {0}"
                .format(self.current_shortcode)
            )

        return (URLStatus.ok, match.group(1), response.encoding)
Exemplo n.º 2
0
    def process_unknown_code(self, response):
        first_status_code = response.status_code

        if first_status_code not in (200, 500):
            return BaseService.process_unknown_code(self, response)

        url = self.params["url_template"].format(shortcode=self.current_shortcode)
        response = self.fetch_url(url, "get")
        second_status_code = response.status_code

        if second_status_code not in (200, 500):
            raise UnhandledStatusCode(
                "HTTP status changed from %s to %i on second request for %s"
                % (first_status_code, second_status_code, self.current_shortcode)
            )

        pattern = '<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class="quote"><span class="quotet"></span><br/>(.*?)</div> <br />'
        match = re.search(pattern, response.text)

        if not match:
            text = response.text.replace("<br />\n", "")
            match = re.search(pattern, text)

        if not match:
            pattern = '<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class="quote"><span class="quotet"></span><br/>(.*?)</div> <br />'
            match = re.search(pattern, response.text, re.DOTALL)

        if not match:
            raise UnexpectedNoResult("Could not find target URL on preview page for {0}".format(self.current_shortcode))

        url = html_unescape(match.group(1))
        url = url.replace("\n", "").replace("\r", "")

        return URLStatus.ok, url, response.encoding
Exemplo n.º 3
0
    def process_unknown_code(self, response):
        first_status_code = response.status_code

        if first_status_code not in (200, 500):
            return BaseService.process_unknown_code(self, response)

        url = self.params['url_template'].format(
            shortcode=self.current_shortcode)
        response = self.fetch_url(url, 'get')
        second_status_code = response.status_code

        if second_status_code not in (200, 500):
            raise UnhandledStatusCode(
                "HTTP status changed from %s to %i on second request for %s" %
                (first_status_code, second_status_code,
                 self.current_shortcode))

        pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />"
        match = re.search(pattern, response.text)

        if not match:
            text = response.text.replace("<br />\n", "")
            match = re.search(pattern, text)

        if not match:
            pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />"
            match = re.search(pattern, response.text, re.DOTALL)

        if not match:
            raise UnexpectedNoResult(
                "Could not find target URL on preview page for {0}".format(
                    self.current_shortcode))

        url = html_unescape(match.group(1))
        url = url.replace('\n', '').replace('\r', '')

        return URLStatus.ok, url, response.encoding