Exemplo n.º 1
0
 def __init__(self, *args, **kwargs):
     BaseService.__init__(self, *args, **kwargs)
     self.user_agent = (
         'Mozilla/5.0 (Windows NT 6.1; WOW64) '
         'AppleWebKit/537.36 (KHTML, like Gecko) '
         'Chrome/39.0.2171.95 Safari/537.36 '
         'Nintendu/64 (URLTeam {0})'
     ).format(VERSION)
Exemplo n.º 2
0
    def process_redirect(self, response):
        status, link, encoding = BaseService.process_redirect(self, response)

        if link.startswith('/') and link.endswith('/'):
            url = self.params['url_template'].format(shortcode=self.current_shortcode) + '/'
            response = self.fetch_url(url, 'head')

            status, link, encoding = BaseService.process_redirect(self, response)

        if link in ('https://myspace.com/404', 'http://myspace.com/404'):
            return URLStatus.not_found, None, None

        else:
            return status, link, encoding
Exemplo n.º 3
0
    def process_unknown_code(self, response):
        if response.status_code != 200:
            return BaseService.process_unknown_code(self, response)

        url = self.params['url_template'].format(shortcode=self.current_shortcode)
        response = self.fetch_url(url, 'get')

        if response.status_code != 200:
            raise UnexpectedNoResult(
                "Didn't get OK on second try. Got {0} for {1}"
                .format(response.status_code, self.current_shortcode)
                )

        # Copied form tinyback. I don't think code will reach here anymore

        match = re.search(
            "<a class=\"btn ignore\" href=\"(.*?)\" title=",
            html_unescape(response.text)
        )

        if not match:
            raise UnexpectedNoResult(
                "Didn't get match on second try for {0}"
                .format(self.current_shortcode)
            )

        return (URLStatus.ok, match.group(1), response.encoding)
Exemplo n.º 4
0
    def process_unknown_code(self, response):
        first_status_code = response.status_code

        if first_status_code not in (200, 500):
            return BaseService.process_unknown_code(self, response)

        url = self.params["url_template"].format(shortcode=self.current_shortcode)
        response = self.fetch_url(url, "get")
        second_status_code = response.status_code

        if second_status_code not in (200, 500):
            raise UnhandledStatusCode(
                "HTTP status changed from %s to %i on second request for %s"
                % (first_status_code, second_status_code, self.current_shortcode)
            )

        pattern = '<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class="quote"><span class="quotet"></span><br/>(.*?)</div> <br />'
        match = re.search(pattern, response.text)

        if not match:
            text = response.text.replace("<br />\n", "")
            match = re.search(pattern, text)

        if not match:
            pattern = '<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class="quote"><span class="quotet"></span><br/>(.*?)</div> <br />'
            match = re.search(pattern, response.text, re.DOTALL)

        if not match:
            raise UnexpectedNoResult("Could not find target URL on preview page for {0}".format(self.current_shortcode))

        url = html_unescape(match.group(1))
        url = url.replace("\n", "").replace("\r", "")

        return URLStatus.ok, url, response.encoding
Exemplo n.º 5
0
    def process_redirect(self, response):
        status, url, encoding = BaseService.process_redirect(self, response)

        if url == 'http://yatuc.com':
            return URLStatus.not_found, None, None
        else:
            return status, url, encoding
Exemplo n.º 6
0
    def process_redirect(self, response):
        if response.status_code == 302:
            if 'location' not in response.headers:
                raise UnexpectedNoResult()

            url = urlparse.urlparse(response.headers['location'])

            if url.scheme != "http" or url.netloc != "bit.ly" or url.path != "/a/warning":
                raise UnexpectedNoResult("Unexpected Location header after HTTP status 302")

            if sys.version_info[0] == 2:
                query = urlparse.parse_qs(url.query.encode('latin-1'))
            else:
                query = urlparse.parse_qs(url.query)

            if not ("url" in query and len(query["url"]) == 1) or not ("hash" in query and len(query["hash"]) == 1):
                raise UnexpectedNoResult("Unexpected Location header after HTTP status 302")
            if query["hash"][0] != self.current_shortcode:
                raise UnexpectedNoResult("Hash mismatch for HTTP status 302")

            if sys.version_info[0] == 2:
                unshortened_url = query["url"][0].decode('latin-1')
            else:
                unshortened_url = query["url"][0]

            return (URLStatus.ok, unshortened_url, None)

        else:
            return BaseService.process_redirect(self, response)
Exemplo n.º 7
0
 def process_redirect_body(self, response):
     try:
         return BaseService.process_redirect_body(self, response)
     except UnexpectedNoResult:
         if b'Sajn\xc3\xa1lom, de a be\xc3\xadrt URL hib\xc3\xa1s!'.decode('utf8') in response.text:
             return (URLStatus.not_found, None, None)
         else:
             raise
Exemplo n.º 8
0
    def process_redirect(self, response):
        status, url, encoding = BaseService.process_redirect(self, response)

        if url.startswith('http://totally.awe.sm/') and \
                url.endswith(self.current_shortcode):
            return URLStatus.not_found, None, None
        else:
            return status, url, encoding
Exemplo n.º 9
0
    def process_redirect(self, response):
        status, url, encoding = BaseService.process_redirect(self, response)

        if status == URLStatus.ok:
            if url == 'http://www.godaddy.com/default.aspx?isc=xcowebgd':
                return URLStatus.not_found, None, None

        return status, url, encoding
Exemplo n.º 10
0
 def process_redirect_body(self, response):
     try:
         return BaseService.process_redirect_body(self, response)
     except UnexpectedNoResult:
         if b'Sajn\xc3\xa1lom, de a be\xc3\xadrt URL hib\xc3\xa1s!'.decode(
                 'utf8') in response.text:
             return (URLStatus.not_found, None, None)
         else:
             raise
Exemplo n.º 11
0
    def process_redirect(self, response):
        try:
            url_status, link, encoding = BaseService.process_redirect(self, response)
        except UnexpectedNoResult:
            if "Location" not in response.headers:
                return URLStatus.not_found, None, None
            else:
                raise

        if link == "/site/getprivate?snip=" + self.current_shortcode:
            return URLStatus.unavailable, None, None
        else:
            return url_status, link, encoding
Exemplo n.º 12
0
    def process_redirect(self, response):
        try:
            url_status, link, encoding = BaseService.process_redirect(
                self, response)
        except UnexpectedNoResult:
            if 'Location' not in response.headers:
                return URLStatus.not_found, None, None
            else:
                raise

        if link == "/site/getprivate?snip=" + self.current_shortcode:
            return URLStatus.unavailable, None, None
        else:
            return url_status, link, encoding
Exemplo n.º 13
0
    def process_redirect(self, response):
        if response.status_code == 200:
            return self._fetch_200(response)
        else:
            if 'Location' in response.headers and response.status_code == 301:
                tiny = response.headers.get("X-tiny")

                if tiny and tiny[:3] == "aff":
                    return self._preview(self.current_shortcode,
                                         response.headers['Location'])

            try:
                return BaseService.process_redirect(self, response)
            except UnexpectedNoResult:
                return (URLStatus.unavailable, None, None)
Exemplo n.º 14
0
    def process_redirect(self, response):
        if response.status_code == 200:
            return self._fetch_200(response)
        else:
            if 'Location' in response.headers and response.status_code == 301:
                tiny = response.headers.get("X-tiny")

                if tiny and tiny[:3] == "aff":
                    return self._preview(
                        self.current_shortcode, response.headers['Location']
                    )

            try:
                return BaseService.process_redirect(self, response)
            except UnexpectedNoResult:
                return (URLStatus.unavailable, None, None)
Exemplo n.º 15
0
    def process_redirect_body(self, response):
        try:
            return BaseService.process_redirect_body(self, response)
        except UnexpectedNoResult:
            if 'cakeErr1-context' not in response.text:
                raise

            match = re.search(r'"Location: (.*)"</pre>', response.text,
                              re.DOTALL)

            if not match:
                raise

            link = match.group(1)
            # Python's urllib escapes too much. We'll escape the bare minimum
            link = link.replace('\n', '%0A')
            link = link.replace('\r', '%0D')

            return (URLStatus.ok, link, response.encoding)
Exemplo n.º 16
0
    def process_redirect_body(self, response):
        try:
            return BaseService.process_redirect_body(self, response)
        except UnexpectedNoResult:
            if 'cakeErr1-context' not in response.text:
                raise

            match = re.search(
                r'"Location: (.*)"</pre>', response.text, re.DOTALL
            )

            if not match:
                raise

            link = match.group(1)
            # Python's urllib escapes too much. We'll escape the bare minimum
            link = link.replace('\n', '%0A')
            link = link.replace('\r', '%0D')

            return (URLStatus.ok, link, response.encoding)
Exemplo n.º 17
0
    def process_unavailable(self, response):
        if response.status_code != 410:
            return BaseService.process_unavailable(self, response)

        match = re.search(r'was forwarding to: <BR> <font color=red>(.*)</font>', response.text)

        if not match:
            if re.search(r'This shortURL address was REMOVED for SPAMMING', response.text):
                return URLStatus.unavailable, None, None

        if not match and 'REMOVED FOR SPAMMING' in response.text:
            return URLStatus.unavailable, None, None

        if not match:
            raise UnexpectedNoResult(
                "Could not find target URL on blocked page for {0}"
                .format(self.current_shortcode))

        url = html_unescape(match.group(1))

        return URLStatus.ok, url, response.encoding
Exemplo n.º 18
0
    def process_unknown_code(self, response):
        first_status_code = response.status_code

        if first_status_code not in (200, 500):
            return BaseService.process_unknown_code(self, response)

        url = self.params['url_template'].format(
            shortcode=self.current_shortcode)
        response = self.fetch_url(url, 'get')
        second_status_code = response.status_code

        if second_status_code not in (200, 500):
            raise UnhandledStatusCode(
                "HTTP status changed from %s to %i on second request for %s" %
                (first_status_code, second_status_code,
                 self.current_shortcode))

        pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />"
        match = re.search(pattern, response.text)

        if not match:
            text = response.text.replace("<br />\n", "")
            match = re.search(pattern, text)

        if not match:
            pattern = "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />"
            match = re.search(pattern, response.text, re.DOTALL)

        if not match:
            raise UnexpectedNoResult(
                "Could not find target URL on preview page for {0}".format(
                    self.current_shortcode))

        url = html_unescape(match.group(1))
        url = url.replace('\n', '').replace('\r', '')

        return URLStatus.ok, url, response.encoding
Exemplo n.º 19
0
 def scrape_one(self, sequence_number):
     self._processing_phishing_page = False
     return BaseService.scrape_one(self, sequence_number)
Exemplo n.º 20
0
 def scrape_one(self, sequence_number):
     self._processing_phishing_page = False
     return BaseService.scrape_one(self, sequence_number)
Exemplo n.º 21
0
 def __init__(self, *args, **kwargs):
     BaseService.__init__(self, *args, **kwargs)
     self._processing_phishing_page = False
Exemplo n.º 22
0
 def fetch_url(self, url):
     return BaseService.fetch_url(self, url + '?passthru=1')
Exemplo n.º 23
0
 def __init__(self, *args, **kwargs):
     BaseService.__init__(self, *args, **kwargs)
     self.user_agent = ('Mozilla/5.0 (Windows NT 6.1; WOW64) '
                        'AppleWebKit/537.36 (KHTML, like Gecko) '
                        'Chrome/39.0.2171.95 Safari/537.36 '
                        'Nintendu/64 (URLTeam {0})').format(VERSION)
Exemplo n.º 24
0
 def __init__(self, *args, **kwargs):
     BaseService.__init__(self, *args, **kwargs)
     self._processing_phishing_page = False
Exemplo n.º 25
0
    def process_redirect_body(self, response):
        if response.status_code == 301 and 'Location' not in response.headers:
            return (URLStatus.not_found, None, None)

        return BaseService.process_redirect_body(self, response)
Exemplo n.º 26
0
 def __init__(self, *args, **kwargs):
     self.shortcode_width = kwargs.pop('shortcode_width', 4)
     BaseService.__init__(self, *args, **kwargs)
Exemplo n.º 27
0
 def process_redirect(self, response):
     try:
         return BaseService.process_redirect(self, response)
     except UnexpectedNoResult:
         return (URLStatus.not_found, None, None)