Пример #1
0
def parse_ast(expression, root_node, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"):
    """Parse an AST.

    :type expression: str
    :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: The origin expression.
    :param root_node: The root node of the AST.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : dict
    :return: The parsed atoms dictionary.
    """

    #  Wrap the interface option.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Wrap the molecule option.
    molecule_opt = _ml_opt.OptionWrapper(options)

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Get the iteration order.
    work_list = _ml_ast_bfs.do_bfs(root_node, True)

    #  Initialize the parsed node container.
    parsed = {}
    """:type : dict[int, MergeUtil]"""

    #  Iterate nodes from the leaves to the root.
    for work_node in work_list:
        if work_node.is_hydrate_group() or work_node.is_molecule():
            assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) or \
                isinstance(work_node, _ast_base.ASTNodeMolecule)

            #  Get the prefix number.
            coeff = work_node.get_prefix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Process the electronics.
            if work_node.is_molecule():
                el_charge = work_node.get_electronic_count().simplify()
                if not el_charge.is_zero:
                    build.add("e", el_charge * coeff)

            #  Iterate all children.
            for child_id in range(0, len(work_node)):
                #  Get child node and its parsing result.
                child = work_node[child_id]
                child_parsed = parsed[id(child)]

                #  Content check.
                if work_node.is_hydrate_group() and len(child_parsed) == 0:
                    assert isinstance(child, _ast_base.ASTNodeMolecule)

                    err = _cm_error.Error(
                        _ml_error.MOLECULE_NO_CONTENT,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.no_content.description"
                        ),
                        options
                    )

                    if child_id == 0:
                        err.push_traceback(
                            expression,
                            child.get_ending_position_in_source_text() + 1,
                            child.get_ending_position_in_source_text() + 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.before"
                            )
                        )
                    elif child_id == len(work_node) - 1:
                        err.push_traceback(
                            expression,
                            child.get_starting_position_in_source_text() - 1,
                            child.get_starting_position_in_source_text() - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.after"
                            )
                        )
                    else:
                        err.push_traceback(
                            expression,
                            child.get_starting_position_in_source_text() - 1,
                            child.get_ending_position_in_source_text() + 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.inside"
                            )
                        )

                    raise err

                #  Merge.
                build.merge(child_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_atom():
            assert isinstance(work_node, _ast_base.ASTNodeAtom)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Add the atom.
            build.add(work_node.get_atom_symbol(), coeff)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_parenthesis():
            assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get inner node and its parsing result.
            inner_parsed = parsed[id(work_node.get_inner_node())]

            #  Content check.
            if len(inner_parsed) == 0:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text(),
                    work_node.get_ending_position_in_source_text(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.inside"
                    )
                )
                raise err

            #  Merge.
            build.merge(inner_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_abbreviation():
            assert isinstance(work_node, _ast_base.ASTNodeAbbreviation)

            #  Get the abbreviation symbol.
            abbr_symbol = work_node.get_abbreviation_symbol()

            #  Check symbol length.
            if len(abbr_symbol) == 0:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text(),
                    work_node.get_ending_position_in_source_text(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.inside"
                    )
                )
                raise err

            #  Get the abbreviation mapping.
            abbr_mapping = molecule_opt.get_abbreviation_mapping()

            #  Check the existence.
            if abbr_symbol not in abbr_mapping:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNSUPPORTED_ABBREVIATION,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unsupported_abbreviation.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text() + 1,
                    work_node.get_ending_position_in_source_text() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unsupported_abbreviation.message"
                    )
                )
                raise err

            abbr_expression = abbr_mapping[abbr_symbol]

            try:
                abbr_parser = if_opt.get_molecule_parser()
                abbr_ast_root = abbr_parser.parse_expression(
                    abbr_expression,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )
                abbr_resolved = abbr_parser.parse_ast(
                    abbr_expression,
                    abbr_ast_root,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )
            except _cm_error.Error as err:
                err.push_traceback(
                    abbr_expression,
                    0,
                    len(abbr_expression) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parsing_abbreviation.expand"
                    )
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text() + 1,
                    work_node.get_ending_position_in_source_text() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parsing_abbreviation.origin"
                    )
                )
                raise err

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get the suffix number.
            coeff = work_node.get_suffix_number()

            #  Add atoms.
            for atom_symbol in abbr_resolved:
                build.add(atom_symbol, abbr_resolved[atom_symbol] * coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        else:
            raise RuntimeError("Never reach this condition.")

    #  Get the parsing result of the root node.
    root_node_parsed = parsed[id(root_node)]

    #  Content check.
    if len(root_node_parsed) == 0:
        err = _cm_error.Error(
            _ml_error.MOLECULE_NO_CONTENT,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.no_content.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.no_content.inside"
            )
        )
        raise err

    return root_node_parsed.get_data()
Пример #2
0
def print_ast(root_node, mexp_parser):
    """Print an AST to text.

    :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :type mexp_parser: bce.parser.interface.mexp_parser.MathExpressionParserInterface
    :param root_node: The root node of the AST.
    :param mexp_parser: The math expression parser.
    :rtype : str
    :return: The printed expression.
    """

    #  Get the printing order.
    work_order = _ml_ast_bfs.do_bfs(root_node, True)

    #  Initialize the printing result container.
    printed = {}

    for work_node in work_order:
        if work_node.is_hydrate_group():
            assert isinstance(work_node, _ml_ast_base.ASTNodeHydrateGroup)

            #  Print the prefix number part.
            pfx = work_node.get_prefix_number().simplify()
            if pfx != _math_cst.ONE:
                model = _print_operand(pfx, mexp_parser) + "(%s)"
            else:
                model = "%s"

            #  Print children nodes.
            inner = printed[id(work_node[0])]
            for child_id in range(1, len(work_node)):
                inner += "." + printed[id(work_node[child_id])]

            #  Save printing result.
            printed[id(work_node)] = model % inner
        elif work_node.is_molecule():
            assert isinstance(work_node, _ml_ast_base.ASTNodeMolecule)

            #  Print the prefix number part.
            pfx = work_node.get_prefix_number().simplify()
            build = _print_operand(pfx, mexp_parser)

            #  Print children nodes.
            for child_id in range(0, len(work_node)):
                build += printed[id(work_node[child_id])]

            #  Print the electronic part.
            el_charge = work_node.get_electronic_count().simplify()
            if not el_charge.is_zero:
                build += _print_electronic(el_charge, mexp_parser)

            #  Save printing result.
            printed[id(work_node)] = build
        elif work_node.is_atom():
            assert isinstance(work_node, _ml_ast_base.ASTNodeAtom)

            #  Print and save the result.
            printed[id(
                work_node)] = work_node.get_atom_symbol() + _print_suffix(
                    work_node, mexp_parser)
        elif work_node.is_parenthesis():
            assert isinstance(work_node,
                              _ml_ast_base.ASTNodeParenthesisWrapper)

            #  Print and save the result.
            printed[id(work_node)] = "(%s)%s" % (
                printed[id(work_node.get_inner_node())],
                _print_suffix(work_node, mexp_parser))
        elif work_node.is_abbreviation():
            assert isinstance(work_node, _ml_ast_base.ASTNodeAbbreviation)

            #  Print and save the result.
            printed[id(work_node)] = "[%s]%s" % (
                work_node.get_abbreviation_symbol(),
                _print_suffix(work_node, mexp_parser))
        else:
            raise RuntimeError("BUG: Unhandled AST node type.")

    #  Post process - add status.
    post_process = printed[id(root_node)]
    if root_node.is_gas_status():
        post_process += "(g)"
    elif root_node.is_liquid_status():
        post_process += "(l)"
    elif root_node.is_solid_status():
        post_process += "(s)"
    elif root_node.is_aqueous_status():
        post_process += "(aq)"
    else:
        pass

    return post_process
Пример #3
0
def generate_ast(expression, token_list, options):
    """Generate an AST from the token list.

    :type expression: str
    :type token_list: list[bce.parser.molecule.token.Token]
    :type options: bce.option.Option
    :param expression: The origin expression.
    :param token_list: The token list.
    :param options: The options.
    :rtype : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :return: The root node of the generated AST.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Initialize the molecule status container.
    molecule_status = None

    #  Initialize the state machine.
    state = _STATE_ROOT

    #  Generate initial AST.
    root = _ml_ast_base.ASTNodeHydrateGroup()
    node = _ml_ast_base.ASTNodeMolecule(root)
    root.append_child(node)

    #  Register the starting position.
    root.register_starting_position_in_source_text(0)
    node.register_starting_position_in_source_text(0)

    #  Initialize the token cursor.
    cursor = 0

    while True:
        #  Get current token.
        token = token_list[cursor]

        if state == _STATE_ROOT:
            #  Find molecule in parent nodes and current node.
            while node is not None and not node.is_molecule():
                node = node.get_parent_node()
            if node is None:
                raise RuntimeError("BUG: Can't find molecule group.")

            #  Redirect by rules.
            if token.is_operand() and len(node) == 0:
                state = _STATE_PREFIX_NUMBER
            elif token.is_symbol():
                state = _STATE_ATOM
            elif token.is_abbreviation():
                state = _STATE_ABBREVIATION
            elif token.is_left_parenthesis():
                state = _STATE_LEFT_PARENTHESIS
            elif token.is_right_parenthesis():
                state = _STATE_RIGHT_PARENTHESIS
            elif token.is_electronic_begin():
                state = _STATE_ELECTRONIC
            elif token.is_hydrate_dot():
                state = _STATE_HYDRATE_DOT
            elif token.is_status():
                state = _STATE_MOLECULE_STATUS
            elif token.is_end():
                break
            else:
                #  Raise an error if the token can't be recognized.
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.other"))
                raise err
        elif state == _STATE_ATOM:
            #  Create a new atom node and register its starting position.
            new_node = _ml_ast_base.ASTNodeAtom(token.get_symbol(), node)
            new_node.register_starting_position_in_source_text(
                token.get_position())

            #  Add the node to the molecule group.
            node.append_child(new_node)

            #  Switch the node pointer to the new created node.
            node = new_node

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_ABBREVIATION:
            #  Create a new abbreviation node and register its starting position.
            new_node = _ml_ast_base.ASTNodeAbbreviation(
                token.get_symbol()[1:-1], node)
            new_node.register_starting_position_in_source_text(
                token.get_position())

            #  Add the node to the molecule group.
            node.append_child(new_node)

            #  Switch the node pointer to the new created node.
            node = new_node

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_LEFT_PARENTHESIS:
            #  Create new nodes.
            new_hydrate_grp = _ml_ast_base.ASTNodeHydrateGroup()
            new_molecule = _ml_ast_base.ASTNodeMolecule(new_hydrate_grp)
            new_parenthesis = _ml_ast_base.ASTNodeParenthesisWrapper(
                new_hydrate_grp, node)

            #  Link them correctly and them add the new created parenthesis node to the molecule group.
            new_hydrate_grp.set_parent_node(new_parenthesis)
            new_hydrate_grp.append_child(new_molecule)
            node.append_child(new_parenthesis)

            #  Switch the node pointer to the new created molecule node.
            node = new_molecule

            #  Register their starting positions.
            new_hydrate_grp.register_starting_position_in_source_text(
                token.get_position() + 1)
            new_molecule.register_starting_position_in_source_text(
                token.get_position() + 1)
            new_parenthesis.register_starting_position_in_source_text(
                token.get_position())

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_RIGHT_PARENTHESIS:
            #  Find parenthesis node in parent nodes and current node.
            while node is not None and not node.is_parenthesis():
                #  Register the ending position of current working node.
                node.register_ending_position_in_source_text(
                    token.get_position() - 1)

                #  Go to the parent node.
                node = node.get_parent_node()

            #  Raise an error if the node can't be found.
            if node is None:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.left"))
                raise err

            #  Register the ending position of current working node.
            node.set_right_parenthesis_position(token.get_position())

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_ELECTRONIC:
            #  Save the starting position of the electronic descriptor.
            e_start_pos = token.get_position()

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Try to read the prefix number.
            e_pfx = _math_cst.ONE
            e_pfx_start = token.get_position()
            has_e_pfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_e_pfx_number = True

                #  Process the prefix number.
                e_pfx *= token.get_operand_value().simplify()

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Simplify before checking.
            e_pfx = e_pfx.simplify()

            #  Domain check.
            if e_pfx.is_negative or e_pfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, e_pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.electronic_charge")
                )
                raise err

            #  Validate.
            if has_e_pfx_number and e_pfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, e_pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.electronic_charge"
                    ))
                raise err

            #  Process the electronic positivity flag.
            if token.is_electronic_positive_flag():
                pass
            elif token.is_electronic_negative_flag():
                e_pfx = -e_pfx
            else:
                if token.is_end():
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.description"
                        ), options)
                    err.push_traceback(
                        expression, e_start_pos,
                        token.get_position() - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.right")
                    )
                else:
                    #  Raise an error if current working token is not an electronic positivity flag.
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.description"
                        ), options)
                    err.push_traceback(
                        expression, token.get_position(),
                        token.get_position() + len(token.get_symbol()) - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.electronic_suffix"
                        ))

                raise err

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Raise an error if current working token is not '>'.
            if not token.is_electronic_end():
                if token.is_end():
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.description"
                        ), options)
                    err.push_traceback(
                        expression, e_start_pos,
                        token.get_position() - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.right")
                    )
                else:
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.description"
                        ), options)
                    err.push_traceback(
                        expression, token.get_position(),
                        token.get_position() + len(token.get_symbol()) - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.electronic_end"
                        ))

                raise err

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Raise an error if the electronic descriptor is not at the end of a molecule block.
            if not (token.is_right_parenthesis() or token.is_hydrate_dot()
                    or token.is_end() or token.is_status()):
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, e_start_pos,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.electronic_misplaced"
                    ))
                raise err

            #  Set the electronic count.
            node.set_electronic_count(e_pfx)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_HYDRATE_DOT:
            #  Save the ending position of current working node.
            node.register_ending_position_in_source_text(token.get_position() -
                                                         1)

            #  Go to parent node.
            node = node.get_parent_node()
            assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup)

            #  Create a new molecule node and set its starting position.
            new_molecule = _ml_ast_base.ASTNodeMolecule(node)
            new_molecule.register_starting_position_in_source_text(
                token.get_position() + 1)

            #  Add the new created molecule node to the hydrate group node.
            node.append_child(new_molecule)

            #  Switch the node pointer to the new created molecule node.
            node = new_molecule

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_PREFIX_NUMBER:
            #  Save the starting position of the prefix.
            pfx_start = token.get_position()

            #  Read prefix numbers.
            has_pfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_pfx_number = True

                #  Process the prefix number.
                node.set_prefix_number(node.get_prefix_number() *
                                       token.get_operand_value().simplify())

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Simplify before checking.
            pfx = node.get_prefix_number().simplify()

            #  Domain check.
            if pfx.is_negative or pfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.domain_error.prefix"))
                raise err

            #  Validate.
            if has_pfx_number and pfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.prefix"))
                raise err

            #  Set the prefix number.
            node.set_prefix_number(pfx)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_SUFFIX_NUMBER:
            #  Save the starting position of the suffix.
            sfx_start = token.get_position()

            #  Read suffix numbers.
            has_sfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_sfx_number = True

                #  Process the suffix number.
                node.set_suffix_number(node.get_suffix_number() *
                                       token.get_operand_value().simplify())

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Get the suffix.
            sfx = node.get_suffix_number()

            #  Simplify before checking.
            sfx = sfx.simplify()

            #  Domain check.
            if sfx.is_negative or sfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, sfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.domain_error.suffix"))
                raise err

            #  Validate.
            if has_sfx_number and sfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, sfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.suffix"))
                raise err

            #  Register the ending position of current working node.
            node.register_ending_position_in_source_text(token.get_position() -
                                                         1)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_MOLECULE_STATUS:
            #  Raise an error if the token is not at the end of the molecule.
            if not token_list[cursor + 1].is_end():
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.electronic_misplaced"
                    ))
                raise err

            #  Fetch the molecule status.
            if token.is_gas_status():
                molecule_status = _ml_ast_base.STATUS_GAS
            elif token.is_liquid_status():
                molecule_status = _ml_ast_base.STATUS_LIQUID
            elif token.is_solid_status():
                molecule_status = _ml_ast_base.STATUS_SOLID
            elif token.is_aqueous_status():
                molecule_status = _ml_ast_base.STATUS_AQUEOUS
            else:
                raise RuntimeError("BUG: Unrecognized status.")

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        else:
            raise RuntimeError("BUG: Unrecognized state.")

    #  Get the ending position.
    ending_pos = token_list[-1].get_position() - 1

    #  Initialize the parenthesis-mismatched flag.
    mismatch_flag = False

    #  Pre-create an error.
    err = _cm_error.Error(
        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
        _l10n_reg.get_message(
            lang_id, "parser.molecule.error.parenthesis_mismatch.description"),
        options)

    while node is not None:
        #  Register the ending position of current working node.
        node.register_ending_position_in_source_text(ending_pos)

        #  Mark the error flag and add an error description if current node is a parenthesis node.
        if node.is_parenthesis():
            mismatch_flag = True
            err.push_traceback(
                expression, node.get_starting_position_in_source_text(),
                node.get_starting_position_in_source_text(),
                _l10n_reg.get_message(
                    lang_id,
                    "parser.molecule.error.parenthesis_mismatch.right"))

        #  Go to parent node.
        node = node.get_parent_node()

    #  Raise an error if we have met at least 1 parenthesis node.
    if mismatch_flag:
        raise err

    #  Now, we have constructed the whole AST, but we got a lot of useless hydrate group node.
    #  So we have to remove them (all hydrate groups nodes which have only 1 child).

    #  Get iterate order.
    unpack_order = _ml_ast_bfs.do_bfs(root, True)

    #  Initialize unpacked node container.
    unpacked = {}

    for node in unpack_order:
        if node.is_hydrate_group():
            assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup)

            if len(node) == 1:
                #  Get the child node and reset its parent
                child = unpacked[id(node[0])]
                child.set_parent_node(node.get_parent_node())

                #  Save the unpack result.
                unpacked[id(node)] = child
            else:
                #  Update children links.
                for child_id in range(0, len(node)):
                    node[child_id] = unpacked[id(node[child_id])]

                #  Save the unpack result.
                unpacked[id(node)] = node
        elif node.is_molecule():
            assert isinstance(node, _ml_ast_base.ASTNodeMolecule)

            #  Update children links.
            for child_id in range(0, len(node)):
                node[child_id] = unpacked[id(node[child_id])]

            #  Save the unpack result.
            unpacked[id(node)] = node
        elif node.is_parenthesis():
            assert isinstance(node, _ml_ast_base.ASTNodeParenthesisWrapper)

            #  Update children links.
            node.set_inner_node(unpacked[id(node.get_inner_node())])

            #  Save  the unpack result.
            unpacked[id(node)] = node
        else:
            #  Save  the unpack result.
            unpacked[id(node)] = node

    #  Set molecule status.
    root = unpacked[id(root)]
    """:type : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule"""
    root.set_status(molecule_status)

    return root
Пример #4
0
def print_ast(
        root_node,
        mexp_parser,
        mexp_protected_header_enabled=False,
        mexp_protected_header_prefix="X"
):
    """Print an AST to BCE expression.

    :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :type mexp_parser: bce.parser.interface.mexp_parser.MathExpressionParserInterface
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param root_node: The root node of the AST.
    :param mexp_parser: The math expression parser.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : bce.dom.mathml.all.Base
    :return: The printed expression.
    """

    #  Get the printing order.
    work_order = _ml_ast_bfs.do_bfs(root_node, True)

    #  Initialize the printed result container.
    printed = {}

    for work_node in work_order:
        if work_node.is_hydrate_group():
            assert isinstance(work_node, _ml_ast_base.ASTNodeHydrateGroup)

            #  Initialize a row component to contain the printing result.
            build = _mathml.RowComponent()

            #  Print the prefix number part.
            pfx = work_node.get_prefix_number().simplify()
            if pfx != _math_constant.ONE:
                build.append_object(_print_operand(
                    pfx,
                    True,
                    mexp_parser,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                ))
                build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS))
                surround = True
            else:
                surround = False

            #  Print children nodes.
            build.append_object(printed[id(work_node[0])])
            for child_id in range(1, len(work_node)):
                build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_DOT))
                build.append_object(printed[id(work_node[child_id])])

            #  Complete the surrounding parentheses if the flag was marked.
            if surround:
                build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS))

            #  Save printing result.
            printed[id(work_node)] = build
        elif work_node.is_molecule():
            assert isinstance(work_node, _ml_ast_base.ASTNodeMolecule)

            #  Initialize a row component to contain the printing result.
            build = _mathml.RowComponent()

            #  Print the prefix number part.
            pfx = work_node.get_prefix_number().simplify()
            if pfx != _math_constant.ONE:
                build.append_object(_print_operand(
                    pfx,
                    True,
                    mexp_parser,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                ))

            #  Print children nodes.
            for child_id in range(0, len(work_node)):
                build.append_object(printed[id(work_node[child_id])])

            el_charge = work_node.get_electronic_count().simplify()
            if not el_charge.is_zero:
                if len(work_node) == 0:
                    build.append_object(_mathml.SuperComponent(
                        _mathml.TextComponent("e"),
                        _print_super_electronic(
                            el_charge,
                            mexp_parser,
                            mexp_protected_header_enabled=mexp_protected_header_enabled,
                            mexp_protected_header_prefix=mexp_protected_header_prefix
                        )
                    ))
                else:
                    #  Find the innermost row component.
                    innermost = build
                    while innermost[-1].is_row():
                        innermost = innermost[-1]

                    #  Fetch the last item.
                    last_item = innermost[-1]

                    #  Add the electronic.
                    if last_item.is_sub():
                        assert isinstance(last_item, _mathml.SubComponent)
                        last_item = _mathml.SubAndSuperComponent(
                            last_item.get_main_object(),
                            last_item.get_sub_object(),
                            _print_super_electronic(
                                el_charge,
                                mexp_parser,
                                mexp_protected_header_enabled=mexp_protected_header_enabled,
                                mexp_protected_header_prefix=mexp_protected_header_prefix
                            )
                        )
                    else:
                        last_item = _mathml.SuperComponent(
                            last_item,
                            _print_super_electronic(
                                el_charge,
                                mexp_parser,
                                mexp_protected_header_enabled=mexp_protected_header_enabled,
                                mexp_protected_header_prefix=mexp_protected_header_prefix
                            )
                        )

                    #  Save the modified item.
                    innermost[-1] = last_item

            #  Save printing result.
            printed[id(work_node)] = build
        elif work_node.is_atom():
            assert isinstance(work_node, _ml_ast_base.ASTNodeAtom)

            #  Print and save the result.
            printed[id(work_node)] = _print_suffix(
                _mathml.TextComponent(work_node.get_atom_symbol()),
                work_node,
                mexp_parser,
                mexp_protected_header_enabled=mexp_protected_header_enabled,
                mexp_protected_header_prefix=mexp_protected_header_prefix
            )
        elif work_node.is_parenthesis():
            assert isinstance(work_node, _ml_ast_base.ASTNodeParenthesisWrapper)

            #  Initialize a row component to contain the printing result.
            build = _mathml.RowComponent()

            #  Print.
            build.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS))
            build.append_object(printed[id(work_node.get_inner_node())])
            build.append_object(_print_suffix(
                _mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS),
                work_node,
                mexp_parser,
                mexp_protected_header_enabled=mexp_protected_header_enabled,
                mexp_protected_header_prefix=mexp_protected_header_prefix
            ))

            #  Save printing result.
            printed[id(work_node)] = build
        elif work_node.is_abbreviation():
            assert isinstance(work_node, _ml_ast_base.ASTNodeAbbreviation)

            #  Print and save the result.
            printed[id(work_node)] = _print_suffix(
                _mathml.TextComponent("[%s]" % work_node.get_abbreviation_symbol()),
                work_node,
                mexp_parser,
                mexp_protected_header_enabled=mexp_protected_header_enabled,
                mexp_protected_header_prefix=mexp_protected_header_prefix
            )
        else:
            raise RuntimeError("BUG: Unhandled AST node type.")

    #  Post process - add status.
    post_process = printed[id(root_node)]
    if root_node.get_status() is not None:
        if not post_process.is_row():
            tmp = _mathml.RowComponent()
            tmp.append_object(post_process)
            post_process = tmp
        post_process.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_LEFT_PARENTHESIS))
        if root_node.get_status() == _ml_ast_base.STATUS_GAS:
            post_process.append_object(_mathml.TextComponent("g"))
        elif root_node.get_status() == _ml_ast_base.STATUS_LIQUID:
            post_process.append_object(_mathml.TextComponent("l"))
        elif root_node.get_status() == _ml_ast_base.STATUS_SOLID:
            post_process.append_object(_mathml.TextComponent("s"))
        elif root_node.get_status() == _ml_ast_base.STATUS_AQUEOUS:
            post_process.append_object(_mathml.TextComponent("aq"))
        else:
            raise RuntimeError("BUG: No such status.")
        post_process.append_object(_mathml.OperatorComponent(_mathml.OPERATOR_RIGHT_PARENTHESIS))

    return printed[id(root_node)]
Пример #5
0
def substitute_ast(root_node, subst_map):
    """Substitution an AST and save the substituted one to a new AST.

    :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :type subst_map: dict
    :param root_node: The root node of the origin AST.
    :param subst_map: The substitution map.
    :rtype : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule | None
    :return: The root node of the new AST.
    """

    #  Get the BFS order (from the leaves to the root).
    work_order = _ast_bfs.do_bfs(root_node, True)

    #  Initialize the substituted data container.
    substituted = {}
    """:type : dict[int, bce.parser.ast.molecule._ASTNodeBaseML | None]"""

    #  Iterate each node.
    for work_node in work_order:
        if work_node.is_hydrate_group():
            assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup)

            #  Get and substitute the prefix number.
            pfx = work_node.get_prefix_number().subs(subst_map).simplify()
            _check_substituted_mexp(pfx)
            if pfx.is_zero:
                substituted[id(work_node)] = None
                continue

            #  Create a new hydrate group node.
            build_node = _ast_base.ASTNodeHydrateGroup()

            #  Set the prefix number.
            build_node.set_prefix_number(pfx)

            #  Iterate each child.
            for child_id in range(0, len(work_node)):
                #  Get child data.
                child_node = substituted[id(work_node[child_id])]

                if child_node is not None:
                    assert isinstance(child_node, _ast_base.ASTNodeMolecule)

                    #  Simulate raise an error if the child raised before.
                    if child_node.get_property(
                            _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, False):
                        build_node.set_property(
                            _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True)

                    #  Link.
                    child_node.set_parent_node(build_node)
                    build_node.append_child(child_node)

            #  Eliminate the node if there is no content inside.
            if len(build_node) == 0:
                substituted[id(work_node)] = None
                continue

            #  Unpack the hydrate group if there is only 1 molecule in it.
            if len(build_node) == 1:
                #  Get the prefix number of the hydrate group.
                pfx = build_node.get_prefix_number()

                #  Unpack.
                build_node = build_node[0]
                assert isinstance(build_node, _ast_base.ASTNodeMolecule)

                #  Get the new prefix of the unpacked node.
                pfx = (pfx * build_node.get_prefix_number()).simplify()

                if pfx.is_zero:
                    #  Eliminate the node since the prefix is 0.
                    substituted[id(work_node)] = None
                else:
                    #  Set the parent node and prefix number of the unpacked node.
                    # noinspection PyTypeChecker
                    build_node.set_parent_node(None)
                    build_node.set_prefix_number(pfx)

                    #  Save.
                    substituted[id(work_node)] = build_node
            else:
                for child_id in range(0, len(build_node)):
                    #  Get the child node.
                    child_node = build_node[child_id]
                    assert isinstance(child_node, _ast_base.ASTNodeMolecule)

                    #  Check the prefix number of the child.
                    if child_node.get_prefix_number().simplify().is_negative:
                        build_node.set_property(
                            _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True)
                        break

                #  Save.
                substituted[id(work_node)] = build_node
        elif work_node.is_molecule():
            assert isinstance(work_node, _ast_base.ASTNodeMolecule)

            #  Get and substitute the prefix number.
            pfx = work_node.get_prefix_number().subs(subst_map).simplify()
            _check_substituted_mexp(pfx)
            if pfx.is_zero:
                substituted[id(work_node)] = None
                continue

            #  Create a new molecule node.
            build_node = _ast_base.ASTNodeMolecule()

            #  Substitute the electronic count.
            substituted_charge = work_node.get_electronic_count().subs(
                subst_map).simplify()
            _check_substituted_mexp(substituted_charge)
            build_node.set_electronic_count(substituted_charge)

            #  Set the prefix number.
            build_node.set_prefix_number(pfx)

            #  Iterate each child.
            for child_id in range(0, len(work_node)):
                #  Get the child node.
                child_node = substituted[id(work_node[child_id])]

                if child_node is not None:
                    #  Raise an error if the child raised before.
                    if child_node.get_property(
                            _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, False):
                        build_node.set_property(
                            _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True)

                    #  Link.
                    child_node.set_parent_node(build_node)
                    build_node.append_child(child_node)

            if len(build_node) == 0 and build_node.get_electronic_count(
            ).simplify().is_zero:
                #  Eliminate this node since there is no content inside and the electronic count is 0.
                substituted[id(work_node)] = None
            else:
                #  Save.
                substituted[id(work_node)] = build_node
        elif work_node.is_atom():
            assert isinstance(work_node, _ast_base.ASTNodeAtom)

            #  Initialize an atom node.
            build_node = _ast_base.ASTNodeAtom(work_node.get_atom_symbol())

            #  Get and substitute the suffix number.
            sfx = work_node.get_suffix_number().subs(subst_map).simplify()
            _check_substituted_mexp(sfx)

            #  Eliminate this node if the suffix number is 0.
            if sfx.is_zero:
                substituted[id(work_node)] = None
                continue

            if sfx.is_negative:
                build_node.set_property(
                    _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True)

            #  Set the suffix number.
            build_node.set_suffix_number(sfx)

            #  Save.
            substituted[id(work_node)] = build_node
        elif work_node.is_parenthesis():
            assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper)

            #  Get and substitute the suffix number.
            sfx = work_node.get_suffix_number().subs(subst_map).simplify()
            _check_substituted_mexp(sfx)

            #  Get the substituted inner data.
            inner_node = substituted[id(work_node.get_inner_node())]
            assert isinstance(inner_node, _ast_base.ASTNodeHydrateGroup) or \
                isinstance(inner_node, _ast_base.ASTNodeMolecule) or \
                inner_node is None

            #  Eliminate this node if the suffix number is zero or there is nothing inside.
            if sfx.is_zero or inner_node is None:
                substituted[id(work_node)] = None
                continue

            #  Create a new parenthesis wrapper node.
            build_node = _ast_base.ASTNodeParenthesisWrapper(inner_node)

            #  Link.
            inner_node.set_parent_node(build_node)

            #  Set the suffix number.
            build_node.set_suffix_number(sfx)

            if sfx.is_negative or inner_node.get_prefix_number().simplify().is_negative or \
                    inner_node.get_property(_PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, False):
                #  Raise an error since the suffix is negative or the prefix number or the
                #  inner node is negative or the child raised an error before.
                build_node.set_property(
                    _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True)

            #  Save.
            substituted[id(work_node)] = build_node
        elif work_node.is_abbreviation():
            assert isinstance(work_node, _ast_base.ASTNodeAbbreviation)

            #  Create an abbreviation node.
            build_node = _ast_base.ASTNodeAbbreviation(
                work_node.get_abbreviation_symbol())

            #  Get and substitute the suffix number.
            sfx = work_node.get_suffix_number().subs(subst_map).simplify()
            _check_substituted_mexp(sfx)

            #  Eliminate this node if the suffix number is 0.
            if sfx.is_zero:
                substituted[id(work_node)] = None
                continue

            if sfx.is_negative:
                build_node.set_property(
                    _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED, True)

            #  Set the suffix number.
            build_node.set_suffix_number(sfx)

            #  Save
            substituted[id(work_node)] = build_node
        else:
            raise RuntimeError("BUG: Unrecognized node.")

    #  Get the substituted root node data.
    new_root = substituted[id(root_node)]
    assert isinstance(new_root, _ast_base.ASTNodeHydrateGroup) or \
        isinstance(new_root, _ast_base.ASTNodeMolecule) or \
        new_root is None

    if new_root is not None:
        #  Raise an error if the root raised  error before.
        if new_root.get_property(_PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED,
                                 False):
            raise _ml_interface.SubstituteError(
                "An error occurred when do substitution on the molecule.")

        #  Set molecule status.
        new_root.set_status(root_node.get_status())

    #  Remove all "SubstitutionErrorRaised" property.
    for root_id in substituted:
        node = substituted[root_id]
        if node is not None and node.has_property(
                _PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED):
            node.remove_property(_PROPERTY_KEY_SUBSTITUTION_ERROR_RAISED)

    return new_root