Exemplo n.º 1
0
    def parse_macro(self):
        self.get_token(TokenTypes.LITERAL, "[")
        macro_name = self.get_token_value(TokenTypes.TEXT)
        self.get_token(TokenTypes.LITERAL, "]")
        self.get_token(TokenTypes.LITERAL, "(")

        raw = False
        if macro_name == "footnote":
            raw = True

        arguments = self.collect_join(
            stop_tokens=[Token(TokenTypes.LITERAL, ")"), Token(TokenTypes.EOL)],
        )
        p = analyse(ArgumentsParser(raw=raw), arguments)

        self.get_token(TokenTypes.LITERAL, ")")

        if macro_name == "link":
            return self.parse_macro_link(args=p.args, kwargs=p.kwargs)
        if macro_name == "mailto":
            return self.parse_macro_mailto(args=p.args, kwargs=p.kwargs)
        elif macro_name == "image":
            return self.parse_macro_image(args=p.args, kwargs=p.kwargs)
        elif macro_name == "footnote":
            return self.parse_macro_footnote(args=p.args, kwargs=p.kwargs)

        return MacroNode(macro_name, args=p.args, kwargs=p.kwargs)
Exemplo n.º 2
0
    def parse_sentence(self, stop_tokens=None):
        content = []
        stop_tokens = stop_tokens or set()
        stop_tokens = stop_tokens.union(
            {Token(TokenTypes.EOF),
             Token(TokenTypes.EOL)})

        result = self.parse_styled_text(stop_tokens)
        while result is not None:
            content.append(result)
            result = self.parse_styled_text(stop_tokens)

        import itertools

        # Group consecutive WordNode nodes into a single TextNode
        grouped_nodes = []
        for key, group in itertools.groupby(content,
                                            lambda x: x.__class__ == WordNode):
            if key:
                text = "".join([n.value for n in group])
                grouped_nodes.append(TextNode(text))
            else:
                grouped_nodes.extend(list(group))

        return SentenceNode(content=grouped_nodes)
Exemplo n.º 3
0
def test_token_equality_ignores_position():
    assert Token("sometype", "somevalue", position=(12, 34)) == Token(
        "sometype", "somevalue"
    )
    assert Token("sometype", "somevalue") == Token(
        "sometype", "somevalue", position=(12, 34)
    )
Exemplo n.º 4
0
    def _parse_list_nodes(self):
        # This parses all items of a list

        # Ignore initial white spaces
        with self:
            self.get_token(TokenTypes.WHITESPACE)

        # Parse the header and ignore the following white spaces
        header = self.get_token(TokenTypes.LITERAL,
                                check=lambda x: x[0] in "*#").value
        self.get_token(TokenTypes.WHITESPACE)

        # Collect and parse the text of the item
        text = self._collect_text_content()
        content = self._parse_text_content(text)

        # Compute the level of the item
        level = len(header)

        nodes = []
        nodes.append(ListItemNode(level, content))

        while not self.peek_token() in [
                Token(TokenTypes.EOF),
                Token(TokenTypes.EOL)
        ]:
            # This is the SentenceNode inside the last node added to the list
            # which is used to append potential nested nodes
            last_node_sentence = nodes[-1].content

            # Ignore the initial white spaces
            with self:
                self.get_token(TokenTypes.WHITESPACE)

            if len(self.peek_token().value) == level:
                # The new item is on the same level

                # Get the header
                header = self.get_token().value

                # Ignore white spaces
                self.get_token(TokenTypes.WHITESPACE)

                # Collect and parse the text of the item
                text = self._collect_text_content()
                content = self._parse_text_content(text)

                nodes.append(ListItemNode(len(header), content))
            elif len(self.peek_token().value) > level:
                # The new item is on a deeper level

                # Treat the new line as a new list
                numbered = True if self.peek_token().value[0] == "#" else False
                subnodes = self._parse_list_nodes()
                last_node_sentence.content.append(ListNode(numbered, subnodes))
            else:
                break

        return nodes
Exemplo n.º 5
0
    def parse_verbatim(self):
        self.get_token(TokenTypes.LITERAL, "`")
        content = self.collect_join(
            [Token(TokenTypes.LITERAL, "`"), Token(TokenTypes.EOL)],
        )
        self.get_token(TokenTypes.LITERAL, "`")

        return VerbatimNode(content)
Exemplo n.º 6
0
    def _parse_paragraph(self):
        lines = self.collect_lines(
            [Token(TokenTypes.EOL),
             Token(TokenTypes.EOF)])
        text = " ".join(lines)
        sentence = self._parse_text_content(text)

        args, kwargs = self._pop_attributes()

        self._save(ParagraphNode(sentence, args=args, kwargs=kwargs))
Exemplo n.º 7
0
    def _parse_multi_line_comment(self):
        # ////
        # A comment
        # on multiple lines
        # ////

        self.get_token(TokenTypes.LITERAL, "////")
        self._collect_lines(
            [Token(TokenTypes.LITERAL, "////"),
             Token(TokenTypes.EOF)])
        self.force_token(TokenTypes.LITERAL, "////")
    def _parse_verbatim(self):
        self.get_token(TokenTypes.LITERAL, "`")
        text = self.collect_join(
            [Token(TokenTypes.LITERAL, "`"), Token(TokenTypes.EOF)],
            preserve_escaped_stop_tokens=True,
        )
        self.get_token(TokenTypes.LITERAL, "`")

        text = f"`{text}`"

        self._save(TextNode(text))
Exemplo n.º 9
0
    def parse_class(self):
        self.get_token(TokenTypes.LITERAL, "[")
        classes = self.collect_join(
            [Token(TokenTypes.LITERAL, "]"), Token(TokenTypes.EOL)]
        )
        self.get_token(TokenTypes.LITERAL, "]")
        self.get_token(TokenTypes.LITERAL, "#")
        content = self.parse_sentence(stop_tokens={Token(TokenTypes.LITERAL, "#")})
        self.get_token(TokenTypes.LITERAL, "#")

        classes = classes.split(",")

        return ClassNode(classes, content)
Exemplo n.º 10
0
    def _parse_unnamed_argument(self):
        # This parses a named argument
        # in the form value or "value"

        # Values can be surrounded by quotes
        if self.peek_token_is(TokenTypes.LITERAL, '"'):
            self.get_token(TokenTypes.LITERAL, '"')
            value = self.collect_join([Token(TokenTypes.LITERAL, '"')])
            self.get_token(TokenTypes.LITERAL, '"')
        else:
            value = self.collect_join(
                [Token(TokenTypes.LITERAL, ","),
                 Token(TokenTypes.EOF)])

        return value
Exemplo n.º 11
0
    def _parse_block(self):
        delimiter = self.get_token(TokenTypes.TEXT).value

        if len(delimiter) != 4 or len(set(delimiter)) != 1:
            raise TokenError

        self.get_token(TokenTypes.EOL)

        content = self.collect_lines(
            [Token(TokenTypes.TEXT, delimiter),
             Token(TokenTypes.EOF)])

        self.force_token(TokenTypes.TEXT, delimiter)
        self.get_token(TokenTypes.EOL)

        secondary_content = self.collect_lines(
            [Token(TokenTypes.EOL),
             Token(TokenTypes.EOF)])

        args, kwargs = self._pop_attributes()
        title = self._pop_title()

        if len(args) != 0 and args[0] in ["if", "ifnot"]:
            return self._parse_conditional_block(args[0], content, args[1:],
                                                 kwargs)

        if len(args) != 0 and args[0] in ["raw"]:
            return self._parse_raw_block(content, args[1:], kwargs)

        if len(args) != 0 and args[0] == "source":
            return self._parse_source_block(content, secondary_content, title,
                                            args[1:], kwargs)

        if len(args) != 0 and args[0] == "admonition":
            return self._parse_admonition_block(content, args[1:], kwargs)

        if len(args) != 0 and args[0] == "quote":
            return self._parse_quote_block(content, title, args[1:], kwargs)

        try:
            blocktype = args[0]
            args = args[1:]
        except IndexError:
            blocktype = None

        return self._parse_standard_block(blocktype, content,
                                          secondary_content, title, args,
                                          kwargs)
Exemplo n.º 12
0
    def _parse_paragraph(self):
        # This parses a paragraph.
        # Paragraphs can be written on multiple lines and
        # end with an empty line.

        # Get all the lines, join them and parse them
        lines = self._collect_lines(
            [Token(TokenTypes.EOL),
             Token(TokenTypes.EOF)])
        text = " ".join(lines)
        sentence = self._parse_text_content(text)

        # Consume the attributes
        args, kwargs = self.argsparser.get_arguments_and_reset()

        self._save(ParagraphNode(sentence, args=args, kwargs=kwargs))
Exemplo n.º 13
0
 def force_token(self, ttype, tvalue=None):
     try:
         return self.get_token(ttype, tvalue)
     except TokenError:
         raise ExpectedError({
             "expected": Token(ttype, tvalue),
             "found": self.current_token
         })
Exemplo n.º 14
0
def test_token_accepts_text_position():
    line = 456
    column = 123
    t = Token("sometype", "somevalue", position=(line, column))

    assert t.type == "sometype"
    assert t.value == "somevalue"
    assert t.position == (line, column)
Exemplo n.º 15
0
    def parse_style(self):
        style = self.get_token_value(TokenTypes.LITERAL,
                                     check=lambda x: x in "*_")
        content = self.parse_sentence(
            stop_tokens={Token(TokenTypes.LITERAL, style)})
        self.get_token(TokenTypes.LITERAL, style)

        return StyleNode(MAP_STYLES[style], content)
Exemplo n.º 16
0
    def _parse_unnamed_argument(self):
        if self.peek_token_is(TokenTypes.LITERAL, '"'):
            self.get_token(TokenTypes.LITERAL, '"')
            value = self.collect_join([Token(TokenTypes.LITERAL, '"')])
            self.get_token(TokenTypes.LITERAL, '"')
        else:
            value = self.get_token(TokenTypes.TEXT).value

        return value
Exemplo n.º 17
0
def test_check_current_token_with_function():
    p = init_parser("\n")
    p.get_token()

    assert p.check_current_token(
        TokenTypes.EOL, check=lambda x: x is None) == Token(TokenTypes.EOL)

    # check_current_token doesn't advance the index
    assert p.get_token() == EOL
    assert p.get_token() == EOF
Exemplo n.º 18
0
    def _parse_curly(self):
        variable_name = []
        self.get_token(TokenTypes.LITERAL, "{")
        variable_name = self.collect_join(
            [Token(TokenTypes.LITERAL, "}"), Token(TokenTypes.EOF)]
        )
        self.get_token(TokenTypes.LITERAL, "}")

        try:
            if "." not in variable_name:
                variable_value = self.variables[variable_name]
            else:
                namespace, variable_name = variable_name.split(".")

                variable_value = self.variables[namespace][variable_name]

            self._save(TextNode(variable_value))
        except KeyError:
            raise PreprocessError(f'Attribute "{variable_name}" has not been defined')
Exemplo n.º 19
0
    def _parse_single_argument(self):
        if self.raw:
            value = self.collect_join([Token(TokenTypes.EOL), Token(TokenTypes.EOF)])
            self.args.append(value)
            return

        with self:
            name, value = self._parse_named_argument()
            self.kwargs[name] = value
            self._named_arguments = True
            return

        if self._named_arguments:
            raise ParseError("Unnamed arguments after named arguments are forbidden")

        with self:
            value = self._parse_unnamed_argument()
            self.args.append(value)
            return
Exemplo n.º 20
0
    def collect_lines(self, stop_tokens):
        # This collects several lines of text in a list
        # until it gets to a line that begins with one
        # of the tokens listed in stop_tokens.
        lines = []
        while self.peek_token() not in stop_tokens:
            lines.append(self.collect_join([Token(TokenTypes.EOL)]))
            self.get_token(TokenTypes.EOL)

        return lines
Exemplo n.º 21
0
    def peek_token(self, ttype=None, tvalue=None, check=None):
        """
        Return the next token without advancing the index.
        """

        try:
            token = self.tokens[self.index + 1]
            return self._check_token(token, ttype, tvalue, check)
        except IndexError:
            return Token(TokenTypes.EOF)
Exemplo n.º 22
0
    def get_token(self, ttype=None, tvalue=None, check=None):
        """
        Return the next token and advances the index.
        The token is stored it in current_token.
        """

        if self.index == len(self.tokens):
            return Token(TokenTypes.EOF)

        self.index += 1
        return self._check_token(self.current_token, ttype, tvalue, check)
Exemplo n.º 23
0
    def get_token(self, ttype=None, tvalue=None, check=None):
        """
        Return the next token and advances the index.
        This function returns the next token and then advances the index,
        and can optionally check its type or value (see _check_token).
        The token is stored it in self._current_token.
        """

        if self.index == len(self.tokens):
            return Token(TokenTypes.EOF)

        self.index += 1
        return self._check_token(self.current_token, ttype, tvalue, check)
Exemplo n.º 24
0
    def _parse_list_nodes(self):
        with self:
            self.get_token(TokenTypes.WHITESPACE)

        header = self.get_token(TokenTypes.LITERAL,
                                check=lambda x: x[0] in "*#").value
        self.get_token(TokenTypes.WHITESPACE)
        text = self._collect_text_content()

        content = self._parse_text_content(text)
        level = len(header)

        nodes = []
        nodes.append(ListItemNode(level, content))

        while not self.peek_token() in [
                Token(TokenTypes.EOF),
                Token(TokenTypes.EOL)
        ]:
            # This is the SentenceNode inside the last node added to the list
            last_node_sentence = nodes[-1].content

            with self:
                self.get_token(TokenTypes.WHITESPACE)

            if len(self.peek_token().value) == level:
                header = self.get_token().value
                self.get_token(TokenTypes.WHITESPACE)
                text = self._collect_text_content()
                content = self._parse_text_content(text)
                nodes.append(ListItemNode(len(header), content))
            elif len(self.peek_token().value) > level:
                numbered = True if self.peek_token().value[0] == "#" else False
                subnodes = self._parse_list_nodes()
                last_node_sentence.content.append(ListNode(numbered, subnodes))
            else:
                break

        return nodes
Exemplo n.º 25
0
    def _collect_lines(self, stop_tokens):
        # This collects several lines of text in a list
        # until it gets to a line that begins with one
        # of the tokens listed in stop_tokens.
        # It is useful for block or other elements that
        # are clearly surrounded by delimiters.
        lines = []

        while self.peek_token() not in stop_tokens:
            lines.append(self.collect_join([Token(TokenTypes.EOL)]))
            self.get_token(TokenTypes.EOL)

        return lines
Exemplo n.º 26
0
 def force_token(self, ttype, tvalue=None):
     """
     Return the next token and advances the index,
     but forces the token to have a specific type
     and optionally a value.
     If the token doesn't match the provided values
     the function raises an ExpectedError
     """
     try:
         return self.get_token(ttype, tvalue)
     except TokenError:
         raise ExpectedError(
             {"expected": Token(ttype, tvalue), "found": self.current_token}
         )
Exemplo n.º 27
0
    def current_token(self):
        """
        Returns the token being parsed.
        We often need to know which token we are currently
        parsing, but we might already have parsed all
        of them, so this convenience method wraps the
        possible index error.
        """
        if self.index < 0:
            raise ValueError("The parser has no current token")

        try:
            return self.tokens[self.index]
        except IndexError:
            return Token(TokenTypes.EOF)
Exemplo n.º 28
0
    def _parse_variable_definition(self):
        # This parses a variable definition
        #
        # Simple variables are defined as :name:value
        # as True booleans as just :name:
        # and as False booleas as :!name:
        #
        # Variable names can use a namespace with
        # :namespace.name:value

        # Get the mandatory variable name
        self.get_token(TokenTypes.LITERAL, ":")
        variable_name = self.get_token(TokenTypes.TEXT).value
        self.get_token(TokenTypes.LITERAL, ":")

        # Assume the variable is a flag
        variable_value = True

        # If the name starts with ! it's a false flag
        if variable_name.startswith("!"):
            variable_value = False
            variable_name = variable_name[1:]

        # Get the optional value
        value = self.collect_join([Token(TokenTypes.EOL)])

        # The value is assigned only if the variable
        # is not a negative flag. In that case it is ignored
        if variable_value and len(value) > 0:
            variable_value = value

        # If the variable name contains a dot we
        # want to use a namespace
        if "." not in variable_name:
            self.variables[variable_name] = variable_value
        else:
            # Let's ignore all others dots
            namespace, variable_name = variable_name.split(".", maxsplit=1)

            # This defines the namespace if it's not already there
            try:
                self.variables[namespace][variable_name] = variable_value
            except KeyError:
                self.variables[namespace] = {variable_name: variable_value}
Exemplo n.º 29
0
    def _parse_variable_definition(self):
        self.get_token(TokenTypes.LITERAL, ":")
        variable_name = self.get_token(TokenTypes.TEXT).value
        self.get_token(TokenTypes.LITERAL, ":")

        variable_value = True
        if variable_name.startswith("!"):
            variable_value = False
            variable_name = variable_name[1:]

        value = self.collect_join([Token(TokenTypes.EOL)])
        if len(value) > 0:
            variable_value = value

        if "." not in variable_name:
            self.variables[variable_name] = variable_value
        else:
            namespace, variable_name = variable_name.split(".")

            try:
                self.variables[namespace][variable_name] = variable_value
            except KeyError:
                self.variables[namespace] = {variable_name: variable_value}
Exemplo n.º 30
0
    def _parse_block(self):
        # Parse a block in the form
        #
        # [block_type]
        # ----
        # Content
        # ----
        # Optional secondary content
        #
        # Blocks are delimited by 4 consecutive identical characters.

        # Get the delimiter and check the length
        delimiter = self.get_token(TokenTypes.TEXT).value
        if len(delimiter) != 4 or len(set(delimiter)) != 1:
            raise TokenError
        self.get_token(TokenTypes.EOL)

        # Collect everything until the next delimiter
        content = self._collect_lines(
            [Token(TokenTypes.TEXT, delimiter),
             Token(TokenTypes.EOF)])
        self.force_token(TokenTypes.TEXT, delimiter)
        self.get_token(TokenTypes.EOL)

        # Get the optional secondary content
        secondary_content = self._collect_lines(
            [Token(TokenTypes.EOL),
             Token(TokenTypes.EOF)])

        # Consume the title
        title = self._pop_title()

        # The first unnamed argument is the block type
        blocktype = self.argsparser.pop()

        # If there is a block alias for blocktype replace it
        # otherwise use the blocktype we already have
        blocktype = self.block_aliases.get(blocktype, blocktype)

        # Assign names

        self.argsparser.set_names_and_defaults(
            self.block_names.get(blocktype, []),
            self.block_defaults.get(blocktype, {}))

        # Consume the attributes
        args, kwargs = self.argsparser.get_arguments_and_reset()

        # Extract classes and convert them into a list
        classes = [
            i for i in kwargs.pop("classes", "").split(",") if len(i) > 0
        ]

        # Extract condition if present and process it
        condition = kwargs.pop("condition", "")

        # Run this only if there is a condition on this block
        if len(condition) > 0:
            try:
                # The condition should be either test:variable:value or test:variable:
                test, variable, value = condition.split(":")
            except ValueError:
                self.error(
                    f'Condition {condition} is not in the form "test:variable:value" or "test:variable:'
                )

            # If there is no value use True
            if len(value) == 0:
                value = True

            # Check if the variable matches the value and apply the requested test
            match = self.variables.get(variable) == value
            result = True if test == "if" else False

            # If the condition is not satisfied return
            if match is not result:
                return

        # Extract the preprocessor
        preprocessor = kwargs.pop("preprocessor", "none")

        # Extract the engine
        engine = kwargs.pop("engine", "default")

        # Create the node parameters according to the engine
        if engine in ["raw", "mau"]:
            # Engine "raw" doesn't process the content,
            # so we just pass it untouched in the form of
            # a TextNode per line. The same is true for "mau"
            # as the visitor will have to fire up an new parser
            # to process the content.
            content = [TextNode(line) for line in content]
            secondary_content = [TextNode(line) for line in secondary_content]
        elif engine == "source":
            # Engine "source" extracts the content (source code),
            # the callouts, and the highlights.
            # The default language is "text".

            content, callouts, highlights = self._parse_source_engine(
                content, secondary_content, kwargs)
            secondary_content = []

            kwargs["callouts"] = callouts
            kwargs["highlights"] = highlights
            kwargs["language"] = kwargs.get("language", "text")

        elif engine == "default":
            # This is the default engine and it parses
            # both content and secondary content using a new parser
            # but then merges headers and footnotes into the
            # current one.

            # Parse the primary and secondary content and record footnotes
            pc = MainParser(variables=self.variables).analyse(
                "\n".join(content))
            ps = MainParser(variables=self.variables).analyse(
                "\n".join(secondary_content))
            content = pc.nodes
            secondary_content = ps.nodes

            self.footnote_defs.extend(pc.footnote_defs)
            self.headers.extend(pc.headers)
        else:
            raise EngineError(f"Engine {engine} is not available")

        self._save(
            BlockNode(
                blocktype=blocktype,
                content=content,
                secondary_content=secondary_content,
                args=args,
                classes=classes,
                engine=engine,
                preprocessor=preprocessor,
                kwargs=kwargs,
                title=title,
            ))