Exemplo n.º 1
0
    def get_blogposts(self):
        """
        Lists all of users PUBLISHED blogposts. For unpublished, see 
        :meth:`get_concepts`.

        Returns:
            list: sorted (old->new) list of Blogpost objects.
        """
        if not self.has_blog:
            return []

        def cut_crap(data):
            data = data.split(
                '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>'
            )[0]

            return data.split('<div class="st" id="st">')[1]

        cnt = 0
        posts = []
        parsed = [1]  # just placeholder for first iteration
        while parsed:
            data = self._get(self._compose_blogposts_url(cnt))

            dom = dhtmlparser.parseString(cut_crap(data))
            parsed = [
                Blogpost.from_html(blog_html)
                for blog_html in dom.find("div", {"class": "cl"})
            ]

            posts.extend(parsed)
            cnt += BLOG_STEP

        return sorted(posts, key=lambda x: x.created_ts)
Exemplo n.º 2
0
def iter_blogposts(start=0, end=None, lazy=True):
    """
    Iterate over blogs. Based at bloglist.

    Args:
        start (int, default 0): Start at this page.
        end (int, default None): End at this page.
        lazy (bool, default True): Initialize :class:`.Blogpost` objects only
             with informations from listings. Don't download full text and
             comments.

    Yields:
        obj: :class:`.Blogpost` objects.
    """
    for cnt, url in enumerate(_next_blog_url(start)):
        data = _shared.download(url)

        data = _remove_crap_from_bloglist(data)

        # parse basic info about all blogs at page
        dom = _dhtmlparser.parseString(data)
        for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})):
            yield Blogpost.from_html(blog, lazy=lazy)

            # every page has 25 blogposts, but somethimes I am getting more
            if bcnt >= 24:
                break

        # detect end of pagination at the bottom
        if not _should_continue(dom):
            break

        if end is not None and cnt >= end:
            break
Exemplo n.º 3
0
    def get_blogposts(self):
        """
        Lists all of users PUBLISHED blogposts. For unpublished, see 
        :meth:`get_concepts`.

        Returns:
            list: sorted (old->new) list of Blogpost objects.
        """
        if not self.has_blog:
            return []

        def cut_crap(data):
            data = data.split(
                '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>')[0]

            return data.split('<div class="st" id="st">')[1]

        cnt = 0
        posts = []
        parsed = [1]  # just placeholder for first iteration
        while parsed:
            data = self._get(self._compose_blogposts_url(cnt))

            dom = dhtmlparser.parseString(cut_crap(data))
            parsed = [
                Blogpost.from_html(blog_html)
                for blog_html in dom.find("div", {"class": "cl"})
            ]

            posts.extend(parsed)
            cnt += BLOG_STEP

        return sorted(posts, key=lambda x: x.created_ts)
Exemplo n.º 4
0
def iter_blogposts(start=0, end=None, lazy=True):
    """
    Iterate over blogs. Based at bloglist.

    Args:
        start (int, default 0): Start at this page.
        end (int, default None): End at this page.
        lazy (bool, default True): Initialize :class:`.Blogpost` objects only
             with informations from listings. Don't download full text and
             comments.

    Yields:
        obj: :class:`.Blogpost` objects.
    """
    for cnt, url in enumerate(_next_blog_url(start)):
        data = _shared.download(url)

        # clean crap, get just content
        data = data.split(
            '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>'
        )[0]
        data = data.split('<div class="st" id="st">')[1]

        # some blogs have openning comment in perex, which f***s ups bloglist
        # - this will close comments that goes over bloglist
        data = data.replace(
            '<div class="signature">',
            '<!-- --><div class="signature">'
        )

        # parse basic info about all blogs at page
        dom = _dhtmlparser.parseString(data)
        for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})):
            yield Blogpost.from_html(blog, lazy=lazy)

            # every page has 25 blogposts, but somethimes I am getting more
            if bcnt >= 24:
                break

        # detect end of pagination at the bottom
        if not _should_continue(dom):
            break

        if end is not None and cnt >= end:
            break
Exemplo n.º 5
0
def iter_blogposts(start=0, end=None, lazy=True):
    """
    Iterate over blogs. Based at bloglist.

    Args:
        start (int, default 0): Start at this page.
        end (int, default None): End at this page.
        lazy (bool, default True): Initialize :class:`.Blogpost` objects only
             with informations from listings. Don't download full text and
             comments.

    Yields:
        obj: :class:`.Blogpost` objects.
    """
    for cnt, url in enumerate(_next_blog_url(start)):
        data = _shared.download(url)

        # clean crap, get just content
        data = data.split(
            '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>')[0]
        data = data.split('<div class="st" id="st">')[1]

        # some blogs have openning comment in perex, which f***s ups bloglist
        # - this will close comments that goes over bloglist
        data = data.replace('<div class="signature">',
                            '<!-- --><div class="signature">')

        # parse basic info about all blogs at page
        dom = _dhtmlparser.parseString(data)
        for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})):
            yield Blogpost.from_html(blog, lazy=lazy)

            # every page has 25 blogposts, but somethimes I am getting more
            if bcnt >= 24:
                break

        # detect end of pagination at the bottom
        if not _should_continue(dom):
            break

        if end is not None and cnt >= end:
            break