Пример #1
0
def index() -> Results:
    from . import hpi
    from my.hackernews import dogsheep

    for item in dogsheep.items():
        if isinstance(item, Exception):
            yield item
            continue
        hn_url = item.permalink
        title = "hackernews"
        if item.title:
            title = item.title
        elif item.text_html:
            title = item.text_html
            title = textwrap.shorten(
                    title, width=79, placeholder="…",
                    break_long_words=True)
        # The locator is always the HN story. If the story is a link (as
        # opposed to a text post), we insert a visit such that the link
        # will point back to the corresponding HN story.
        loc = Loc.make(title=title, href=hn_url)
        urls = [hn_url]
        if item.url is not None:
            urls.append(item.url)
        for url in urls:
            yield Visit(
                    url=url,
                    dt=item.created,
                    locator=loc,
                    context=title,
            )
Пример #2
0
def index(*, render_markdown: bool = False) -> Results:
    from my.discord import messages

    # TODO: optionally import? this would break if someone
    # hasnt installed promnesia like pip3 install '.[all]' to
    # to install the markdown module for promnesia
    from promnesia.sources.markdown import TextParser

    for m in messages():
        # hmm - extract URLs from attachments?
        # Probably not very useful unless I extract info from them with url_metadata or something

        context: str = m.content

        # if render_markdown flag is enabled, render the text as markdown (HTML)
        if render_markdown:
            context = TextParser(m.content)._doc_ashtml()

        # permalink back to this discord message
        loc = Loc.make(title=m.channel.description, href=m.link)

        for u in iter_urls(m.content):
            yield Visit(
                url=u,
                dt=m.timestamp,
                context=context,
                locator=loc,
            )
Пример #3
0
 def err_ex():
     for i in range(3):
         if i == 1:
             yield ExtractionError()
         else:
             yield Visit(
                 url=f'http://test{i}',
                 dt=datetime.utcfromtimestamp(0),
                 locator=Loc.make('whatever'),
             )
Пример #4
0
def index() -> Results:
    from my.grouvee import games
    from grouvee_export.dal import Game

    for g in games():
        game_url = g.url
        loc = Loc.make(title=f"Grouvee {g.name}", href=game_url)
        for s in g.shelves:
            yield Visit(
                url=game_url, dt=s.added, locator=loc, context=f"{g.name} ({s.name})"
            )
Пример #5
0
def index() -> Results:
    from my.twitch.gdpr import events

    for e in events():
        if e.event_type == "pageview":
            url = str(e.context)
            yield Visit(
                url=url,
                dt=e.dt,
                locator=Loc.make(title=f"Twitch {e.context}", href=url),
            )