示例#1
0
    def unexpected_http_status(self, code, resp):
        if resp.status == 302:
            location = resp.getheader("Location")

            if location and ("sharedby" in location or "visibli" in location):
                raise exceptions.NoRedirectException()
            elif location and location.startswith("http://yahoo.com"):
                raise exceptions.BlockedException("Banned (location=%s)" %
                                                  location)

            # Guess it be an override for site that busts out iframes
            return location

        if resp.status != 200:
            return super(BaseVisbliService,
                         self).unexpected_http_status(code, resp)

        resp, data = self._http_get(code)
        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)

        match = re.search(r'<iframe id="[^"]+" src="([^"]+)">', data)
        if not match:
            if 'Undefined index:  HTTP_USER_AGENT' in data:
                raise exceptions.ServiceException(
                    "Website broken about user-agent")

            raise exceptions.ServiceException("No iframe url found")

        url = match.group(1).decode("utf-8")
        url = HTMLParser.HTMLParser().unescape(url).encode("utf-8")
        return url
示例#2
0
    def _fetch_blocked(self, code):
        resp = self._http_fetch(code, "GET")
        data = resp.read()

        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)
        if not data:
            raise exceptions.CodeBlockedException(
                "Empty response on status 200")

        if self.RATE_LIMIT_STRING in data:
            raise exceptions.BlockedException()

        position = data.find(self.BLOCKED_STRING_START)
        if position == -1:
            raise exceptions.ServiceException(
                "Unexpected response on status 200")
        data = data[position + len(self.BLOCKED_STRING_START):]

        position = data.find(self.BLOCKED_STRING_END)
        if position == -1:
            raise exceptions.ServiceException(
                "Unexpected response on status 200")

        url = data[:position].decode("utf-8")
        return HTMLParser.HTMLParser().unescape(url).encode("utf-8")
示例#3
0
    def fetch(self, code):
        resp = self._http_fetch(code)

        if resp.status == 200:
            return self._fetch_200(code)
        elif resp.status == 301:
            location = resp.getheader("Location")
            if not location:
                raise exceptions.CodeBlockedException(
                    "No Location header after HTTP status 301")
            tiny = resp.getheader("X-tiny")
            if tiny and tiny[:3] == "aff":
                return self._preview(code)
            return location
        elif resp.status == 302:
            raise exceptions.CodeBlockedException()
        elif resp.status == 404:
            raise exceptions.NoRedirectException()
        elif resp.status == 500:
            # Some "errorhelp" URLs result in HTTP status 500, which goes away when trying a different server
            self._conn.close()
            raise exceptions.ServiceException("HTTP status 500")
        else:
            raise exceptions.ServiceException("Unknown HTTP status %i" %
                                              resp.status)

        return resp.status
示例#4
0
    def fetch(self, code):
        resp = self._http_fetch(code)

        if resp.status == 301:
            location = resp.getheader("Location")
            if not location:
                raise exceptions.ServiceException(
                    "No Location header after HTTP status 301")
            if resp.reason == "Moved":  # Normal bit.ly redirect
                return location
            elif resp.reason == "Moved Permanently":
                # Weird "bundles" redirect, forces connection close despite
                # sending Keep-Alive header
                self._conn.close()
                raise exceptions.CodeBlockedException()
            else:
                raise exceptions.ServiceException(
                    "Unknown HTTP reason %s after HTTP status 301" %
                    resp.reason)
        elif resp.status == 302:
            location = resp.getheader("Location")
            if not location:
                raise exceptions.ServiceException(
                    "No Location header after HTTP status 302")
            return self._parse_warning_url(code, location)
        elif resp.status == 403:
            raise exceptions.BlockedException()
        elif resp.status == 404:
            raise exceptions.NoRedirectException()
        elif resp.status == 410:
            raise exceptions.CodeBlockedException()
        else:
            raise exceptions.ServiceException("Unknown HTTP status %i" %
                                              resp.status)
示例#5
0
 def _parse_warning_url(self, code, url):
     url = urlparse.urlparse(url)
     if url.scheme != "http" or url.netloc != "bitly.com" or url.path != "/a/warning":
         raise exceptions.ServiceException(
             "Unexpected Location header after HTTP status 302")
     query = urlparse.parse_qs(url.query)
     if not ("url" in query and len(query["url"])
             == 1) or not ("hash" in query and len(query["hash"]) == 1):
         raise exceptions.ServiceException(
             "Unexpected Location header after HTTP status 302")
     if query["hash"][0] != code:
         raise exceptions.ServiceException(
             "Hash mismatch forr HTTP status 302")
     return query["url"][0]
示例#6
0
    def _parse_json(self, data):
        try:
            data = json.loads(data)
        except ValueError:
            raise exceptions.ServiceException("Could not decode response")

        if not "kind" in data or data["kind"] != "urlshortener#url":
            raise exceptions.ServiceException("No/bad type given")
        if not "status" in data:
            raise exceptions.ServiceException("No status given")
        if not "longUrl" in data:
            raise exceptions.CodeBlockedException("Status: %s" %
                                                  data["status"])
        return data["longUrl"]
示例#7
0
    def _fetch_200(self, code):
        resp, data = self._http_get(code)

        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)

        if "<title>Redirecting...</title>" in data:
            return self._parse_errorhelp(code, data)
        elif "Error: TinyURL redirects to a TinyURL." in data:
            return self._parse_tinyurl_redirect(data)
        else:
            raise exceptions.ServiceException(
                "Unexpected response on status 200")
示例#8
0
    def _preview(self, code):
        resp = self._http_fetch("preview.php?num=" + code, "GET")
        data = resp.read()

        if resp.status != 200:
            raise exceptions.ServiceException(
                "Unexpected HTTP status %i on preview page" % resp.status)

        match = re.search(
            "<a id=\"redirecturl\" href=\"(.*?)\">Proceed to this site.</a>",
            data, re.DOTALL)
        if not match:
            raise exceptions.ServiceException("No redirect on preview page")

        return HTMLParser.HTMLParser().unescape(match.group(1)).encode("utf-8")
示例#9
0
class HTTPService(Service):
    """
    Httplib-based URL shortener client

    Abstract serivce class to help with using httplib.
    """
    @abc.abstractproperty
    def url(self):
        """
        Returns the base URL of the URL shortener
        """

    def __init__(self):
        parsed_url = urlparse.urlparse(self.url)
        self._path = parsed_url.path or "/"

        self._conn = httplib.HTTPConnection(parsed_url.netloc, timeout=30)

    def _http_fetch(self, code, method="HEAD"):
        try:
            self._conn.request(method, self._path + code)
            resp = self._conn.getresponse()
            if method == "HEAD":
                resp.read()
            return resp
        except httplib.HTTPException, e:
            self._conn.close()
            raise exceptions.ServiceException("HTTP exception: %s" % e)
        except socket.error, e:
            self._conn.close()
            raise exceptions.ServiceException("Socket error: %s" % e)
示例#10
0
class Googl(Service):
    """
    http://goo.gl/
    """
    @property
    def rate_limit(self):
        return (1, 5)

    @property
    def charset(self):
        return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

    def __init__(self):
        host = "www.googleapis.com"

        version = platform.python_version_tuple()
        if int(version[0]) == 2 and int(version[1]) <= 5:
            self._conn = httplib.HTTPSConnection(host)
        else:
            self._conn = httplib.HTTPSConnection(host, timeout=30)

    def fetch(self, code):
        try:
            self._conn.request(
                "GET", "/urlshortener/v1/url?shortUrl=http://goo.gl/%s" % code)
            resp = self._conn.getresponse()
            data = resp.read()
        except httplib.HTTPException, e:
            self._conn.close()
            raise exceptions.ServiceException("HTTP exception: %s" % e)
        except socket.error, e:
            self._conn.close()
            raise exceptions.ServiceException("Socket error: %s" % e)
示例#11
0
class YourlsService(Service):
    """
    A service for installations of Yourls (http://yourls.org).
    """
    @abc.abstractproperty
    def yourls_api_url(self):
        """
        The endpoint of the Yourls API.

        The Yourls API is typically located at /yourls-api.php
        """

    @abc.abstractproperty
    def yourls_url_convert(self):
        """
        The value of the YOURLS_URL_CONVERT parameter.

        The YOUR_SULR_CONVERT parameter specifies what charset is used by the
        Yourls installation.
        """

    @property
    def charset(self):
        if self.yourls_url_convert == 36:
            return "0123456789abcdefghijklmnopqrstuvwxyz"
        elif self.yourls_url_convert == 62:
            return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
        raise RuntimeError("Bad value for yourls_url_convert parameter")

    def __init__(self):
        parsed_url = urlparse.urlparse(self.yourls_api_url)
        self._path = parsed_url.path or "/"

        if parsed_url.scheme == "http":
            klass = httplib.HTTPConnection
        elif parsed_url.scheme == "https":
            klass = httplib.HTTPSConnection
        else:
            raise ValueError("Unknown scheme %s" % parsed_url.scheme)

        version = platform.python_version_tuple()
        if int(version[0]) == 2 and int(version[1]) <= 5:
            self._conn = klass(parsed_url.netloc)
        else:
            self._conn = klass(parsed_url.netloc, timeout=30)

    def fetch(self, code):
        params = {"action": "expand", "shorturl": code, "format": "simple"}
        try:
            self._conn.request("GET",
                               self._path + "?" + urllib.urlencode(params))
            resp = self._conn.getresponse()
            data = resp.read()
        except httplib.HTTPException, e:
            self._conn.close()
            raise exceptions.ServiceException("HTTP exception: %s" % e)
        except socket.error, e:
            self._conn.close()
            raise exceptions.ServiceException("Socket error: %s" % e)
示例#12
0
    def _preview(self, code, affiliate_url):
        resp, data = self._http_get("preview.php?num=" + code)

        if resp.status != 200:
            raise exceptions.ServiceException(
                "Unexpected HTTP status %i on preview page" % resp.status)

        match = re.search(
            "<a id=\"redirecturl\" href=\"(.*?)\">Proceed to this site.</a>",
            data, re.DOTALL)
        if not match:
            raise exceptions.ServiceException("No redirect on preview page")

        url = match.group(1).decode("utf-8")
        if url == "":
            return self._scrub_url(code, affiliate_url)
        return HTMLParser.HTMLParser().unescape(url).encode("utf-8")
示例#13
0
    def _fetch_blocked(self, code):
        resp = self._http_fetch(code, "GET")
        data = resp.read()

        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)

        match = re.search("<a class=\"btn ignore\" href=\"(.*?)\" title=",
                          data)
        if not match:
            raise exceptions.ServiceException(
                "Could not find target URL in safety warning")

        url = match.group(1).decode("utf-8")
        return HTMLParser.HTMLParser().unescape(url).encode("utf-8")
示例#14
0
    def fetch(self, code):
        resp = self._http_fetch(code)

        if resp.status == 200:
            return self._fetch_blocked(code)
        elif resp.status == 301:
            location = resp.getheader("Location")
            if not location:
                raise exceptions.ServiceException(
                    "No Location header after HTTP status 301")
            return location
        elif resp.status == 404:
            raise exceptions.NoRedirectException()
        elif resp.status == 502:
            raise exceptions.CodeBlockedException("HTTP status 502")
        else:
            raise exceptions.ServiceException("Unknown HTTP status %i" %
                                              resp.status)
示例#15
0
    def _parse_tinyurl_redirect(self, data):
        match = re.search(
            "<p class=\"intro\">The URL you followed redirects back to a TinyURL and therefore we can't directly send you to the site\\. The URL it redirects to is <a href=\"(.*?)\">",
            data, re.DOTALL)
        if not match:
            raise exceptions.ServiceException(
                "No redirect on \"tinyurl redirect\" page on HTTP status 200")

        return HTMLParser.HTMLParser().unescape(match.group(1)).encode("utf-8")
示例#16
0
    def fetch(self, code):
        resp = self._http_fetch(code)

        if resp.status in self.http_status_redirect:
            location = resp.getheader("Location")
            if not location:
                raise exceptions.ServiceException(
                    "No Location header after HTTP status 301")
            return location
        elif resp.status in self.http_status_no_redirect:
            raise exceptions.NoRedirectException()
        elif resp.status in self.http_status_code_blocked:
            raise exceptions.CodeBlockedException()
        elif resp.status in self.http_status_blocked:
            raise exceptions.BlockedException()
        else:
            raise exceptions.ServiceException("Unknown HTTP status %i" %
                                              resp.status)
示例#17
0
    def unexpected_http_status(self, code, resp):
        if resp.status != 200:
            return super(Owly, self).unexpected_http_status(code, resp)

        resp, data = self._http_get(code)
        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)

        match = re.search("<a class=\"btn ignore\" href=\"(.*?)\" title=",
                          data)
        if not match:
            raise exceptions.ServiceException(
                "Could not find target URL in safety warning")

        url = match.group(1).decode("utf-8")
        return HTMLParser.HTMLParser().unescape(url).encode("utf-8")
示例#18
0
 def fetch(self, code):
     try:
         self._conn.request(
             "GET", "/urlshortener/v1/url?shortUrl=http://goo.gl/%s" % code)
         resp = self._conn.getresponse()
         data = resp.read()
     except httplib.HTTPException, e:
         self._conn.close()
         raise exceptions.ServiceException("HTTP exception: %s" % e)
示例#19
0
 def fetch(self, code):
     params = {"action": "expand", "shorturl": code, "format": "simple"}
     try:
         self._conn.request("GET",
                            self._path + "?" + urllib.urlencode(params))
         resp = self._conn.getresponse()
         data = resp.read()
     except httplib.HTTPException, e:
         self._conn.close()
         raise exceptions.ServiceException("HTTP exception: %s" % e)
示例#20
0
    def _parse_preview(self, code, data):
        match = re.search(
            "<b>Click the link</b> if you'd like to proceed to the destination shown: -<br /><a href=\"(.*)\" class=\"biglink\">",
            data)
        if not match:
            raise exceptions.ServiceException(
                "Could not find target URL in 'Preview' page")

        url = match.group(1).decode("utf-8")
        return HTMLParser.HTMLParser().unescape(url).encode("utf-8")
示例#21
0
    def unexpected_http_status(self, code, resp):
        if resp.status != 500:
            return super(Snipurl, self).unexpected_http_status(code, resp)

        resp, data = self._http_get(code)
        if resp.status != 500:
            raise exceptions.ServiceException(
                "HTTP status changed from 500 to %i on second request" %
                resp.status)

        match = re.search(
            "<p>You clicked on a snipped URL, which will take you to the following looong URL: </p> <div class=\"quote\"><span class=\"quotet\"></span><br/>(.*?)</div> <br />",
            data)
        if not match:
            raise exceptions.ServiceException(
                "Could not find target URL on preview page")

        url = match.group(1).decode("utf-8")
        return HTMLParser.HTMLParser().unescape(url).encode("utf-8")
示例#22
0
 def _http_fetch(self, code, method="HEAD"):
     try:
         self._conn.request(method, self._path + code)
         resp = self._conn.getresponse()
         if method == "HEAD":
             resp.read()
         return resp
     except httplib.HTTPException, e:
         self._conn.close()
         raise exceptions.ServiceException("HTTP exception: %s" % e)
示例#23
0
    def _parse_blocked(self, code, data):
        match = re.search(
            "<p>For reference and to help those fighting spam the original destination of this URL is given below \(we strongly recommend you don't visit it since it may damage your PC\): -<br />(.*)</p><h2>is\.gd</h2><p>is\.gd is a free service used to shorten long URLs\.",
            data)
        if not match:
            raise exceptions.ServiceException(
                "Could not find target URL in 'Link Disabled' page")

        url = match.group(1).decode("utf-8")
        url = HTMLParser.HTMLParser().unescape(url).encode("utf-8")
        if url == "":
            raise exceptions.CodeBlockedException("Empty URL on preview")
        return url
示例#24
0
    def _parse_errorhelp(self, code, data):
        match = re.search('<meta http-equiv="refresh" content="0;url=(.*?)">',
                          data)
        if not match:
            raise exceptions.ServiceException(
                "No redirect on \"errorhelp\" page on HTTP status 200")
        url = urlparse.urlparse(match.group(1))
        if url.scheme != "http" or url.netloc != "tinyurl.com" or url.path != "/errorb.php":
            raise exceptions.ServiceException(
                "Unexpected redirect on \"errorhelp\" page  on HTTP status 200"
            )
        query = urlparse.parse_qs(url.query)
        if not ("url" in query and len(query["url"])
                == 1) or not ("path" in query and len(query["path"]) == 1):
            raise exceptions.ServiceException(
                "Unexpected redirect on \"errorhelp\" page  on HTTP status 200"
            )
        if query["path"][0] != ("/" + code):
            raise exceptions.ServiceException(
                "Code mismatch on \"errorhelp\" on HTTP status 200")

        return query["url"][0]
示例#25
0
    def _http_fetch(self, code, method):
        headers = self.http_headers
        if self.http_keepalive:
            headers["Connection"] = "Keep-Alive"
        else:
            headers["Connection"] = "close"

        try:
            self._conn.request(method, self._path + code, headers=headers)
            resp = self._conn.getresponse()
            result = (resp, resp.read())
            if not self.http_keepalive:
                self._conn.close()
            return result
        except httplib.HTTPException, e:
            self._conn.close()
            raise exceptions.ServiceException("HTTP exception: %s" % e)
示例#26
0
    def unexpected_http_status(self, code, resp):
        if resp.status != 200:
            return super(Isgd, self).unexpected_http_status(code, resp)

        resp, data = self._http_get(code)
        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)

        if not data:
            raise exceptions.CodeBlockedException(
                "Empty response on status 200")
        if "<div id=\"main\"><p>Rate limit exceeded - please wait 1 minute before accessing more shortened URLs</p></div>" in data:
            raise exceptions.BlockedException()
        if "<div id=\"disabled\"><h2>Link Disabled</h2>" in data:
            return self._parse_blocked(code, data)
        if "<p>The full original link is shown below. <b>Click the link</b> if you'd like to proceed to the destination shown:" in data:
            return self._parse_preview(code, data)
示例#27
0
            self._conn.request("GET",
                               self._path + "?" + urllib.urlencode(params))
            resp = self._conn.getresponse()
            data = resp.read()
        except httplib.HTTPException, e:
            self._conn.close()
            raise exceptions.ServiceException("HTTP exception: %s" % e)
        except socket.error, e:
            self._conn.close()
            raise exceptions.ServiceException("Socket error: %s" % e)

        if resp.status == 200:
            if data == "not found":
                raise exceptions.NoRedirectException()
            return data
        raise exceptions.ServiceException("Unexpected HTTP status %i" %
                                          resp.status)


class Bitly(HTTPService):
    """
    http://bit.ly/
    """
    @property
    def charset(self):
        return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_"

    @property
    def url(self):
        return "http://bit.ly/"

    def fetch(self, code):
示例#28
0
 def unexpected_http_status(self, code, resp):
     raise exceptions.ServiceException("Unexpected HTTP status %i" %
                                       resp.status)
示例#29
0
class HTTPService(Service):
    """
    Httplib-based URL shortener client

    Abstract serivce class to help with using httplib.
    """
    @abc.abstractproperty
    def url(self):
        """
        Returns the base URL of the URL shortener
        """

    @property
    def http_headers(self):
        """
        Dictionary of additional HTTP headers to send with every request.
        """
        return {}

    @property
    def http_keepalive(self):
        """
        Whether to use HTTP persistent connections or not. If set to false, the
        connection will be forcibly closed after each request
        """
        return True

    def __init__(self):
        parsed_url = urlparse.urlparse(self.url)
        self._path = parsed_url.path or "/"

        if parsed_url.scheme == "http":
            klass = httplib.HTTPConnection
        elif parsed_url.scheme == "https":
            klass = httplib.HTTPSConnection
        else:
            raise ValueError("Unknown scheme %s" % parsed_url.scheme)

        version = platform.python_version_tuple()
        if int(version[0]) == 2 and int(version[1]) <= 5:
            self._conn = klass(parsed_url.netloc)
        else:
            self._conn = klass(parsed_url.netloc, timeout=30)

    def _http_head(self, code):
        return self._http_fetch(code, "HEAD")[0]

    def _http_get(self, code):
        return self._http_fetch(code, "GET")

    def _http_fetch(self, code, method):
        headers = self.http_headers
        if self.http_keepalive:
            headers["Connection"] = "Keep-Alive"
        else:
            headers["Connection"] = "close"

        try:
            self._conn.request(method, self._path + code, headers=headers)
            resp = self._conn.getresponse()
            result = (resp, resp.read())
            if not self.http_keepalive:
                self._conn.close()
            return result
        except httplib.HTTPException, e:
            self._conn.close()
            raise exceptions.ServiceException("HTTP exception: %s" % e)
        except socket.error, e:
            self._conn.close()
            raise exceptions.ServiceException("Socket error: %s" % e)