Exemplo n.º 1
0
    def _get_content_from_data(self, data, session: Session, self_html: str,
                               post_hint: str) -> str:
        content: str = ""
        url_overridden_by_dest: str = get_node_value_if_exists(
            data, "url_overridden_by_dest")
        if len(url_overridden_by_dest
               ) > 0 and url_overridden_by_dest[:1] == '/':
            url_overridden_by_dest = "https://www.reddit.com" + url_overridden_by_dest
        preview_image: Optional[str] = cast(
            Optional[str],
            get_node(data, "preview", "images", 0, "source", "url"))
        is_gallery: str = str(get_node_value_if_exists(data, "is_gallery"))
        domain: Optional[str] = cast(str, get_node(data, "domain"))

        if self_html != "":
            content += html.unescape(self_html)

        if is_gallery == "True":
            content += self._manage_gallery(data)

        c: Optional[str] = self._manage_external_content(
            session, url_overridden_by_dest, post_hint, preview_image, domain,
            data)
        if c is not None:
            content += c

        content = self._manage_reddit_preview_images(content)
        content = content.replace("<video ", "<video controls ")

        return content
Exemplo n.º 2
0
    def _format_video(self, node: dict) -> str:
        content: str = ""

        if "id" not in node:
            link: Optional[str] = cast(
                Optional[str], json_utils.get_node(node, "link", "__ref"))
            if link is not None:
                content = self._build(link)
        else:
            video_id: str = b64decode(cast(
                str, node["id"]).encode("ascii")).decode("ascii")
            page = requests.get(
                url="https://www.eurosport.fr/cors/feed_player_video_vid%s.json"
                % video_id[len("Video:"):])
            j = json.loads(page.text)

            poster = ""
            if "PictureUrl" in j:
                poster = j["PictureUrl"]

            if "VideoUrl" in j:
                content = """<video width="100%%" controls="" preload="auto" poster="%s">
                                    <source src="%s" />
                                </video>""" % (poster, j["VideoUrl"])
            elif "EmbedUrl" in j:
                content = """<iframe src="%s"/>""" % (j["EmbedUrl"])

            content += "<p><i><small>%s</small></i></p>" % json_utils.get_node(
                node, "title")

        return content
Exemplo n.º 3
0
    def _format(self, node: dict) -> str:
        type_name = cast(str, json_utils.get_node(node, "__typename"))
        node_format: str = "<p><i><small>Unknown type name: '%s'%s</small></i></p>" % (
            type_name, CONTENT_MARKER
        )  # default value if type name not handled

        if type_name == "HyperLink":
            node_format = "<a href=\"%s\">%s</a>" % (node["url"],
                                                     CONTENT_MARKER)
        elif type_name == "Picture":
            node_format = "<img src=\"%s\" alt=\"%s\"></img>%s" % (
                node["url"], node["caption"], CONTENT_MARKER)
        elif type_name == "Video":
            node_format = self._format_video(node)
        elif type_name in ["Paragraph", "ListItem"]:
            node_format = "<p>%s</p>" % CONTENT_MARKER
        elif type_name == "Text":
            node_format = node["content"]
        elif type_name == "H2":
            node_format = "<h2>%s</h2>" % CONTENT_MARKER
        elif type_name == "HyperLinkInternal":
            node_format = "<a href=\"%s\">%s</a>" % (self._build(
                cast(str, json_utils.get_node(node, "content",
                                              "__ref"))), node["label"])
        elif type_name == "Link":
            node_format = node["url"]
        elif type_name == "InternalContent":
            node_format = self._build(
                cast(str, json_utils.get_node(node, "content", "__ref")))
        elif type_name == "List":
            node_format = self._format_list(node)
        elif type_name == "Blockquote":
            node_format = "<blockquote>%s</blockquote>" % CONTENT_MARKER
        elif type_name in ["Body", "CyclingStage", "Program"]:
            node_format = CONTENT_MARKER
        elif type_name == "BreakLine":
            node_format = "<br/>"
        elif type_name in ["TeamSportsMatch", "Article"]:
            node_format = self._build(
                cast(str, json_utils.get_node(node, "link", "__ref")))
        elif type_name == "Table":
            node_format = self._format_table(node)
        elif type_name == "TableLine":
            node_format = self._format_table_line(node)
        elif type_name == "TableColumn":
            node_format = self._format_table_column(node)
        elif type_name == "Embed":
            node_format = self._format_embed(node)

        return node_format
Exemplo n.º 4
0
    def get_comments(self, comments: dict, deep: int = 0) -> str:
        """Append comments to the content. The webscrapped version contains only 2 levels in threads.
        The comments are displayed in a <ul> list. Only the comment, no nickname, no points, no date.

        Args:
            comments (dict): json containing comments

        Returns:
            str: html content for comments
        """

        comments_html: str = ""
        if deep < 3:  # not to deep
            if "body_html" in comments:
                comments_html += "<li>%s</li>" % html.unescape(
                    comments["body_html"])

            replies = get_node(comments, "replies", "data", "children")
            if isinstance(replies, list):
                for reply in replies:
                    if "kind" in reply and reply[
                            "kind"] == "t1" and "data" in reply:
                        comments_html += self.get_comments(
                            reply["data"], deep + 1)

            comments_html = "<ul>%s</ul>" % comments_html

        return comments_html
Exemplo n.º 5
0
    def _format_table_column(self, node: dict) -> str:
        tds = ""
        for td in cast(List[str],
                       json_utils.get_node(node, "contents", "__refs")):
            tds = "\n\t<td>%s</td>" % self._build(td)

        return tds
Exemplo n.º 6
0
    def _format_list(self, node: dict) -> str:
        content: str = ""
        nodes_index: Optional[List[str]] = cast(
            Optional[List[str]],
            json_utils.get_node(node, "listItems", "__refs"))
        if nodes_index is not None:
            for node_index in nodes_index:
                content += self._build(node_index)

        return content
Exemplo n.º 7
0
 def __init__(self, data: dict, ql_ref: str) -> None:
     self.data: dict = data
     self.root: Optional[dict] = json_utils.get_node(
         self.data, "props", "pageProps", "serverQueryRecords")
     if self.root is not None:
         #self.root = self.root[next(iter(self.root))]
         self.ql_article: dict = self.root[ql_ref]
         # self.ql_article: Optional[dict] = cast(Optional[dict], json_utils.get_first_node_in_subpath(
         #    data, ql_ref))
     self.graph_ql_body: Optional[str] = None
Exemplo n.º 8
0
    def _get_datatypes_json(self, data: dict, ttype: str) -> List[dict]:
        datatype_json: List[dict] = []
        if len(data) > 0:
            for d in data:
                nodes = get_node(d, "data", "children")
                if nodes is not None:
                    for node in nodes:
                        if "kind" in node and node[
                                "kind"] == ttype and "data" in node:
                            datatype_json.append(node["data"])

        return datatype_json
Exemplo n.º 9
0
    def _build(self, node_name: str) -> str:
        content: str = ""

        node = cast(dict, json_utils.get_node(cast(dict, self.root),
                                              node_name))
        content_formatter: str = self._format(node)

        nodes_index = cast(
            Optional[List[str]],
            json_utils.get_node(cast(dict, self.root), node_name, "contents",
                                "__refs"))
        if isinstance(nodes_index, list):
            for node_index in nodes_index:
                refs_node_name = cast(
                    str,
                    json_utils.get_node(cast(dict, self.root), node_index,
                                        "__id"))
                if refs_node_name is not None:
                    content += self._build(refs_node_name)

        return content_formatter.replace(CONTENT_MARKER, content)
Exemplo n.º 10
0
    def build_article(self) -> str:
        content: str = ""
        if self.ql_article is not None:
            if "title" in self.ql_article:
                content += "<h1>%s</h1>" % self.ql_article["title"]
            # picture_id = cast(str, json_utils.get_first_node_in_subpath(
            #    self.ql_article, "picture", "__ref"))
            picture_id = json_utils.get_node(cast(dict, self.ql_article),
                                             "picture", "__ref")
            if picture_id is not None:
                picture_node = json_utils.get_node(cast(dict, self.root),
                                                   cast(str, picture_id))
                if picture_node is not None:
                    content += "<img src=\"%s\"/>" % json_utils.get_node(
                        picture_node, "url")

            self.graph_ql_body = cast(
                str,
                json_utils.get_node(self.ql_article, "graphQLBody", "__ref"))
            if self.graph_ql_body is not None:
                content += self._build(self.graph_ql_body)

        return content
Exemplo n.º 11
0
    def _get_content_by_post_hint(self, session: Session, url: str,
                                  post_hint: str, preview_image: Optional[str],
                                  domain: str, data: dict) -> Optional[str]:
        external_content: Optional[str] = None
        if post_hint == "rich:video":
            external_content = "<p><img src=\"%s\"/></p><p><a href=\"%s\">Source : %s</a></p>" % (
                preview_image, url, domain)
        elif post_hint == "hosted:video":
            video_url = get_node(data, "media", "reddit_video", "hls_url")
            external_content = """<p><video poster="%s" muted="muted" >
                        <source src="%s" type="application/vnd.apple.mpegURL">
                    </video></p>""" % (preview_image, video_url)
        elif post_hint == "image" or is_a_picture_url(url):
            external_content = "<p><img src=\"%s\"/></p>" % url
        elif post_hint in ["", "link"]:
            external_content = super().get_readable_content(
                session, url, headers=HEADERS, add_source_link=True)

        return external_content