예제 #1
0
 def __init__(self, url, body, http_status, content_type, content_length):
     self._http_response_body = body
     self._http_status = http_status
     self._content_type = content_type
     self._content_length = content_length
     self._url = url
     self._wakachi = Wakachi()
     self._bs = BeautifulSoup(self._http_response_body)
예제 #2
0
 def __init__(self, url, body, http_status, content_type, content_length):
     self._http_response_body = body
     self._http_status = http_status
     self._content_type = content_type
     self._content_length = content_length
     self._url = url
     self._wakachi = Wakachi()
     self._bs = BeautifulSoup(self._http_response_body)
예제 #3
0
class WebModel:
    def __init__(self, url, body, http_status, content_type, content_length):
        self._http_response_body = body
        self._http_status = http_status
        self._content_type = content_type
        self._content_length = content_length
        self._url = url
        self._wakachi = Wakachi()
        self._bs = BeautifulSoup(self._http_response_body)

    @property
    def host(self):
        """

        :rtype str
        """
        template = "{scheme}://{netloc}"
        o = urlparse(self._url)
        host = template.format(scheme=o.scheme, netloc=o.netloc)
        return host

    @property
    def links(self):
        """
        Return link in a html file
        if href is relative path, convert it to absolute path

        :rtype list[str]
        """
        links = list()
        for link in self._bs.find_all('a'):
            href = link.get('href')
            if href:
                # :type href str
                if href.startswith("/"):
                    href = self.host + href
                links.append(href)
        return links

    @property
    def html(self):
        return self._http_response_body

    @property
    def text(self):
        """
        Return text after remove html tag

        :rtype str
        """
        return self._bs.get_text()

    @property
    def wakachi(self):
        """
        Return parsed text file with wakachi gaki
        """
        return list(self._wakachi.parse(self.text))

    @property
    def http_status(self):
        """
        Return http status code

        :rtype int
        """
        return self._http_status

    @property
    def content_type(self):
        """

        :rtype str
        :return: content type
        """
        return self._content_type

    @property
    def content_length(self):
        """

        :rtype int
        """
        return self._content_length

    @property
    def hashed_url(self):
        """

        :rtype str
        :return:
        """
        # TODO : implment using hashlib

    def __len__(self):
        if self.content_length():
            return self.content_length()
        if self._http_response_body:
            return len(self._http_response_body)

    @staticmethod
    def from_requests_response(resp):
        """

        :rtype WebModel
        :param resp:
        :return:
        """
        w = WebModel(resp.url, resp.text, resp.status_code,
                     resp.headers.get("content-type"),
                     resp.headers.get("content-length"))
        return w

    @staticmethod
    def from_requests_error():
        # TODO: implement
        pass
예제 #4
0
class WebModel:
    def __init__(self, url, body, http_status, content_type, content_length):
        self._http_response_body = body
        self._http_status = http_status
        self._content_type = content_type
        self._content_length = content_length
        self._url = url
        self._wakachi = Wakachi()
        self._bs = BeautifulSoup(self._http_response_body)

    @property
    def host(self):
        """

        :rtype str
        """
        template = "{scheme}://{netloc}"
        o = urlparse(self._url)
        host = template.format(scheme=o.scheme, netloc=o.netloc)
        return host

    @property
    def links(self):
        """
        Return link in a html file
        if href is relative path, convert it to absolute path

        :rtype list[str]
        """
        links = list()
        for link in self._bs.find_all('a'):
            href = link.get('href')
            if href:
                # :type href str
                if href.startswith("/"):
                    href = self.host + href
                links.append(href)
        return links

    @property
    def html(self):
        return self._http_response_body

    @property
    def text(self):
        """
        Return text after remove html tag

        :rtype str
        """
        return self._bs.get_text()

    @property
    def wakachi(self):
        """
        Return parsed text file with wakachi gaki
        """
        return list(self._wakachi.parse(self.text))

    @property
    def http_status(self):
        """
        Return http status code

        :rtype int
        """
        return self._http_status

    @property
    def content_type(self):
        """

        :rtype str
        :return: content type
        """
        return self._content_type

    @property
    def content_length(self):
        """

        :rtype int
        """
        return self._content_length

    @property
    def hashed_url(self):
        """

        :rtype str
        :return:
        """
        # TODO : implment using hashlib

    def __len__(self):
        if self.content_length():
            return self.content_length()
        if self._http_response_body:
            return len(self._http_response_body)

    @staticmethod
    def from_requests_response(resp):
        """

        :rtype WebModel
        :param resp:
        :return:
        """
        w = WebModel(resp.url, resp.text, resp.status_code,
                resp.headers.get("content-type"),
                resp.headers.get("content-length"))
        return w

    @staticmethod
    def from_requests_error():
        # TODO: implement
        pass