示例#1
0
def is_chemical_equation_balanced(expression, options):
    """Check whether a chemical equation is balanced.

    :type expression: str
    :type options: bce.option.Option
    :param expression: The chemical equation.
    :param options: The options.
    :rtype : bool
    :return: True if balanced.
    """

    #  Check characters.
    if not _util_input_chk.check_input_expression_characters(expression):
        raise _pub_exception.InvalidCharacterException("Invalid character.")

    #  Wrap the parser interface options.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Get the CEXP parser.
    cexp_parser = if_opt.get_cexp_parser()

    try:
        #  Parse the chemical equation.
        cexp_object = cexp_parser.parse(expression,
                                        options,
                                        mexp_protected_header_enabled=False)

        #  Check whether the chemical equation is balanced.
        return _lgc_bce_main.check_chemical_equation(cexp_object)
    except _ps_cm_error.Error as err:
        raise _pub_exception.ParserErrorWrapper(err.to_string())
    except _lgc_cm_error.Error as err:
        raise _pub_exception.LogicErrorWrapper(err.to_string())
示例#2
0
def substitute_chemical_equation(expression,
                                 substitute_map,
                                 options,
                                 printer=_pub_printer.PRINTER_TEXT,
                                 unknown_header="X"):
    """Substitute a chemical equation.

    :type expression: str
    :type substitute_map: dict
    :type options: bce.option.Option
    :type printer: int
    :type unknown_header: str
    :param expression: The chemical equation.
    :param substitute_map: The substitution map.
    :param options: The options.
    :param printer: The printer ID.
    :param unknown_header: The header of unknowns.
    :rtype : str | dict[str, str]
    :return: The substituted chemical equation.
    """

    #  Check characters.
    if not _util_input_chk.check_input_expression_characters(expression):
        raise _pub_exception.InvalidCharacterException("Invalid character.")

    #  Wrap the parser interface options.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Get the CEXP parser.
    cexp_parser = if_opt.get_cexp_parser()

    try:
        cexp_object = cexp_parser.parse(expression,
                                        options,
                                        mexp_protected_header_enabled=False)
        cexp_object = cexp_parser.substitute(cexp_object,
                                             options,
                                             substitute_map=substitute_map)

        #  Print.
        return _print_cexp(cexp_object,
                           if_opt.get_cexp_parser(),
                           if_opt.get_molecule_parser(),
                           if_opt.get_mexp_parser(),
                           printer=printer,
                           unknown_header=unknown_header)
    except _ps_cm_error.Error as err:
        raise _pub_exception.ParserErrorWrapper(err.to_string())
    except _ps_cexp_interface.SubstituteError as err:
        raise _pub_exception.SubstitutionErrorWrapper(str(err))
示例#3
0
def parse_ast(expression, root_node, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"):
    """Parse an AST.

    :type expression: str
    :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: The origin expression.
    :param root_node: The root node of the AST.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : dict
    :return: The parsed atoms dictionary.
    """

    #  Wrap the interface option.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Wrap the molecule option.
    molecule_opt = _ml_opt.OptionWrapper(options)

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Get the iteration order.
    work_list = _ml_ast_bfs.do_bfs(root_node, True)

    #  Initialize the parsed node container.
    parsed = {}
    """:type : dict[int, MergeUtil]"""

    #  Iterate nodes from the leaves to the root.
    for work_node in work_list:
        if work_node.is_hydrate_group() or work_node.is_molecule():
            assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) or \
                isinstance(work_node, _ast_base.ASTNodeMolecule)

            #  Get the prefix number.
            coeff = work_node.get_prefix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Process the electronics.
            if work_node.is_molecule():
                el_charge = work_node.get_electronic_count().simplify()
                if not el_charge.is_zero:
                    build.add("e", el_charge * coeff)

            #  Iterate all children.
            for child_id in range(0, len(work_node)):
                #  Get child node and its parsing result.
                child = work_node[child_id]
                child_parsed = parsed[id(child)]

                #  Content check.
                if work_node.is_hydrate_group() and len(child_parsed) == 0:
                    assert isinstance(child, _ast_base.ASTNodeMolecule)

                    err = _cm_error.Error(
                        _ml_error.MOLECULE_NO_CONTENT,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.no_content.description"
                        ),
                        options
                    )

                    if child_id == 0:
                        err.push_traceback(
                            expression,
                            child.get_ending_position_in_source_text() + 1,
                            child.get_ending_position_in_source_text() + 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.before"
                            )
                        )
                    elif child_id == len(work_node) - 1:
                        err.push_traceback(
                            expression,
                            child.get_starting_position_in_source_text() - 1,
                            child.get_starting_position_in_source_text() - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.after"
                            )
                        )
                    else:
                        err.push_traceback(
                            expression,
                            child.get_starting_position_in_source_text() - 1,
                            child.get_ending_position_in_source_text() + 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.inside"
                            )
                        )

                    raise err

                #  Merge.
                build.merge(child_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_atom():
            assert isinstance(work_node, _ast_base.ASTNodeAtom)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Add the atom.
            build.add(work_node.get_atom_symbol(), coeff)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_parenthesis():
            assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get inner node and its parsing result.
            inner_parsed = parsed[id(work_node.get_inner_node())]

            #  Content check.
            if len(inner_parsed) == 0:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text(),
                    work_node.get_ending_position_in_source_text(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.inside"
                    )
                )
                raise err

            #  Merge.
            build.merge(inner_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_abbreviation():
            assert isinstance(work_node, _ast_base.ASTNodeAbbreviation)

            #  Get the abbreviation symbol.
            abbr_symbol = work_node.get_abbreviation_symbol()

            #  Check symbol length.
            if len(abbr_symbol) == 0:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text(),
                    work_node.get_ending_position_in_source_text(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.inside"
                    )
                )
                raise err

            #  Get the abbreviation mapping.
            abbr_mapping = molecule_opt.get_abbreviation_mapping()

            #  Check the existence.
            if abbr_symbol not in abbr_mapping:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNSUPPORTED_ABBREVIATION,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unsupported_abbreviation.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text() + 1,
                    work_node.get_ending_position_in_source_text() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unsupported_abbreviation.message"
                    )
                )
                raise err

            abbr_expression = abbr_mapping[abbr_symbol]

            try:
                abbr_parser = if_opt.get_molecule_parser()
                abbr_ast_root = abbr_parser.parse_expression(
                    abbr_expression,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )
                abbr_resolved = abbr_parser.parse_ast(
                    abbr_expression,
                    abbr_ast_root,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )
            except _cm_error.Error as err:
                err.push_traceback(
                    abbr_expression,
                    0,
                    len(abbr_expression) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parsing_abbreviation.expand"
                    )
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text() + 1,
                    work_node.get_ending_position_in_source_text() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parsing_abbreviation.origin"
                    )
                )
                raise err

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get the suffix number.
            coeff = work_node.get_suffix_number()

            #  Add atoms.
            for atom_symbol in abbr_resolved:
                build.add(atom_symbol, abbr_resolved[atom_symbol] * coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        else:
            raise RuntimeError("Never reach this condition.")

    #  Get the parsing result of the root node.
    root_node_parsed = parsed[id(root_node)]

    #  Content check.
    if len(root_node_parsed) == 0:
        err = _cm_error.Error(
            _ml_error.MOLECULE_NO_CONTENT,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.no_content.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.no_content.inside"
            )
        )
        raise err

    return root_node_parsed.get_data()
示例#4
0
def tokenize(expression,
             options,
             mexp_protected_header_enabled=False,
             mexp_protected_header_prefix="X"):
    """Tokenize a molecule expression.

    :type expression: str
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: The expression.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : list[Token]
    :return: The token list.
    :raise bce.parser.common.error.Error: Raise when a parser error occurred.
    """

    #  Initialize.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()
    if_opt = _interface_opt.OptionWrapper(options)
    result = []
    cur_pos = 0
    end_pos = len(expression)

    while cur_pos < end_pos:
        cur_ch = expression[cur_pos]

        #  Read a integer token if current character is a digit.
        if cur_ch.isdigit():
            #  Search for the next non-digit character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                search_ch = expression[search_pos]

                if not search_ch.isdigit():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Create an integer token.
            result.append(
                create_integer_operand_token(expression[cur_pos:search_end],
                                             len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read an atom symbol if current character is a upper-case alphabet.
        if cur_ch.isupper():
            #  Search for next non-lower-case character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                if not expression[search_pos].islower():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Create a symbol token.
            result.append(
                create_symbol_token(expression[cur_pos:search_end],
                                    len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read a hydrate-dot token if current character is a dot.
        if cur_ch == ".":
            #  Create a dot token.
            result.append(create_hydrate_dot_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if expression.startswith("(g)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_gas_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(l)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_liquid_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(s)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_solid_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(aq)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_aqueous_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 4

            continue

        #  Read a normal left parenthesis if current character is '('.
        if cur_ch == "(":
            #  Create a left parenthesis token.
            result.append(create_left_parenthesis_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Read a normal right parenthesis if current character is ')'.
        if cur_ch == ")":
            #  Create a right parenthesis token.
            result.append(create_right_parenthesis_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Read a abbreviation if current character is '['.
        if cur_ch == "[":
            #  Find the ']'.
            search_end = -1
            search_pos = cur_pos + 1

            while search_pos < end_pos:
                if expression[search_pos] == "]":
                    search_end = search_pos + 1
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Raise an error if we can't find the ']'.
            if search_end == -1:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, cur_pos, cur_pos,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.right"))
                raise err

            #  Create an abbreviation token.
            result.append(
                create_abbreviation_token(expression[cur_pos:search_end],
                                          len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read a math expression if current character is '{'.
        if cur_ch == "{":
            #  Simulate a parenthesis stack to find the end '}'.
            p_mexp = 0

            #  Searching the end '}'.
            search_end = -1
            search_pos = cur_pos + 1

            while search_pos < end_pos:
                search_ch = expression[search_pos]

                if search_ch == "(" or search_ch == "[" or search_ch == "{":
                    #  If current character is a left parenthesis, push it onto the stack.
                    p_mexp += 1
                elif search_ch == ")" or search_ch == "]" or search_ch == "}":
                    #  When we meet a right parenthesis and there's no left parenthesis in the stack.
                    #  The parenthesis we met should be the end '}'.
                    if p_mexp == 0:
                        #  Raise an error if the parenthesis isn't '}'.
                        if search_ch != "}":
                            err = _cm_error.Error(
                                _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                                _l10n_reg.get_message(
                                    lang_id,
                                    "parser.molecule.error.parenthesis_mismatch.description"
                                ), options)
                            err.push_traceback(
                                expression, search_pos, search_pos,
                                _l10n_reg.get_message(
                                    lang_id,
                                    "parser.molecule.error.parenthesis_mismatch.incorrect",
                                    replace_map={"$1": "}"}))
                            raise err

                        #  Set the end position.
                        search_end = search_pos + 1

                        break

                    #  Pop the parenthesis off from the stack.
                    p_mexp -= 1
                else:
                    pass

                #  Go to next searching position.
                search_pos += 1

            #  Raise an error if we can't find the end '}'.
            if search_end == -1:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, cur_pos, cur_pos,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.right"))
                raise err

            #  Raise an error if the math expression has no content.
            if cur_pos + 2 == search_end:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"),
                    options)
                err.push_traceback(
                    expression, cur_pos, cur_pos + 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.no_content.inside"))
                raise err

            #  Get the expression.
            mexp_expr = expression[cur_pos:search_end]

            #  Evaluate the expression.
            try:
                ev_value = if_opt.get_mexp_parser().parse(
                    mexp_expr,
                    options,
                    protected_header_enabled=mexp_protected_header_enabled,
                    protected_header_prefix=mexp_protected_header_prefix)
            except _cm_error.Error as err:
                err.push_traceback(
                    expression, cur_pos, search_end - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.parsing_mexp.message"))
                raise err

            #  Create a math expression token.
            result.append(
                create_mexp_operand_token(mexp_expr, ev_value, len(result),
                                          cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        if cur_ch == "<":
            #  Create an electronic begin parenthesis token.
            result.append(create_electronic_begin_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == ">":
            #  Create an electronic begin parenthesis token.
            result.append(create_electronic_end_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if expression.startswith("e+", cur_pos):
            #  Create a positive electronic flag token.
            result.append(
                create_positive_electronic_flag_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 2

            continue

        if expression.startswith("e-", cur_pos):
            #  Create a negative electronic flag token.
            result.append(
                create_negative_electronic_flag_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 2

            continue

        #  Raise an error if current character can't be tokenized.
        err = _cm_error.Error(
            _ml_error.MOLECULE_UNRECOGNIZED_TOKEN,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.unrecognized_token.description"),
            options)
        err.push_traceback(
            expression, cur_pos, cur_pos,
            _l10n_reg.get_message(
                lang_id, "parser.molecule.error.unrecognized_token.message"))
        raise err

    #  Add an end token.
    result.append(create_end_token(len(result), len(expression)))

    return result
示例#5
0
def substitute_cexp(cexp_object, substitute_map, options):
    """Do substitution on a chemical equation.

    :type cexp_object: bce.parser.interface.cexp_parser.ChemicalEquation
    :type substitute_map: dict
    :type options: bce.option.Option
    :param cexp_object: The chemical equation object.
    :param substitute_map: The substitution map.
    :param options: The options.
    :rtype : bce.parser.interface.cexp_parser.ChemicalEquation
    :return: The substituted chemical equation.
    """

    if cexp_object.get_left_item_count(
    ) == 0 or cexp_object.get_right_item_count() == 0:
        raise _cexp_interface.SubstituteError("Unsupported form.")

    #  Wrap the interface options.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Get the molecule parser.
    ml_parser = if_opt.get_molecule_parser()

    #  Initialize an empty chemical equation.
    new_ce = _cexp_interface.ChemicalEquation()

    #  Process left items.
    for idx in range(0, cexp_object.get_left_item_count()):
        #  Get the item.
        item = cexp_object.get_left_item(idx)

        #  Get and substitute the AST.
        try:
            ast_root = ml_parser.substitute(item.get_molecule_ast(),
                                            substitute_map)
        except _ml_interface.SubstituteError:
            raise _cexp_interface.SubstituteError(
                "Can't substitute sub-molecule.")

        #  Substitute the origin coefficient.
        item_coeff = item.get_coefficient().subs(substitute_map).simplify()
        _check_substituted_mexp(item_coeff)

        if ast_root is None:
            continue

        #  Get and substitute the coefficient.
        coeff = (item_coeff * ast_root.get_prefix_number()).simplify()
        _check_substituted_mexp(coeff)

        #  Clear the prefix number of the AST.
        ast_root.set_prefix_number(_math_cst.ONE)

        #  Re-parse the AST.
        try:
            #  Re-parse.
            atom_dict = ml_parser.parse_ast(
                "-", ast_root, options, mexp_protected_header_enabled=False)

            #  Add the substituted item.
            new_ce.append_left_item(item.get_operator_id(), coeff, ast_root,
                                    atom_dict)
        except _cm_error.Error:
            raise _cexp_interface.SubstituteError("Re-parse error.")

    #  Process right items.
    for idx in range(0, cexp_object.get_right_item_count()):
        #  Get the item.
        item = cexp_object.get_right_item(idx)

        #  Get and substitute the AST.
        try:
            ast_root = ml_parser.substitute(item.get_molecule_ast(),
                                            substitute_map)
        except _ml_interface.SubstituteError:
            raise _cexp_interface.SubstituteError(
                "Can't substitute sub-molecule.")

        #  Substitute the origin coefficient.
        item_coeff = item.get_coefficient().subs(substitute_map).simplify()
        _check_substituted_mexp(item_coeff)

        if ast_root is None:
            continue

        #  Get and substitute the coefficient.
        coeff = (item_coeff * ast_root.get_prefix_number()).simplify()
        _check_substituted_mexp(coeff)

        #  Clear the prefix number of the AST.
        ast_root.set_prefix_number(_math_cst.ONE)

        try:
            #  Re-parse.
            atom_dict = ml_parser.parse_ast(
                "-", ast_root, options, mexp_protected_header_enabled=False)

            #  Add the substituted item.
            new_ce.append_right_item(item.get_operator_id(), coeff, ast_root,
                                     atom_dict)
        except _cm_error.Error:
            raise _cexp_interface.SubstituteError("Re-parse error.")

    #  Remove items with coefficient 0.
    new_ce.remove_items_with_coefficient_zero()

    #  Move items that have negative coefficient to another side.
    new_ce.move_items_with_negative_coefficient_to_another_side()

    #  Integerize the coefficients.
    new_ce.coefficients_integerize()

    #  Check.
    if new_ce.get_left_item_count() == 0 or new_ce.get_right_item_count() == 0:
        raise _cexp_interface.SubstituteError("Side(s) eliminated.")

    return new_ce
示例#6
0
def parse(expression, token_list, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"):
    """Parse the tokenized chemical equation.

    :type expression: str
    :type token_list: list[bce.parser.cexp.token.Token]
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: Origin chemical equation.
    :param token_list: The tokenized chemical equation.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : bce.parser.interface.cexp_parser.ChemicalEquation
    :return: The parsed chemical equation.
    """

    #  Wrap the interface option.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Initialize an empty chemical equation.
    ret = _cexp_interface.ChemicalEquation()

    #  Initialize the sign.
    operator = _cexp_interface.OPERATOR_PLUS

    #  Initialize the form container.
    form = None

    #  Initialize the side mark.
    #  (side == False: Left side; side == True: Right side;)
    side = False

    #  Initialize the state.
    state = _STATE_ROUTE_1

    #  Initialize other variables.
    read_molecule_end = None
    equal_sign_position = -1

    #  Initialize the token cursor.
    cursor = 0
    while True:
        token = token_list[cursor]

        if state == _STATE_ROUTE_1:
            #  Reset the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Redirect by rules.
            if token.is_operator_minus():
                #  Go to read the '-'.
                state = _STATE_READ_MINUS_1
            else:
                #  Go and try to read a molecule.
                read_molecule_end = _STATE_ROUTE_2
                state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MINUS_1:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '-'.
            operator = _cexp_interface.OPERATOR_MINUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MOLECULE:
            if not token.is_molecule():
                if token.is_end():
                    if cursor == 0:
                        #  In this condition, we got an empty expression. Raise an error.
                        err = _cm_error.Error(
                            _cexp_error.CEXP_EMPTY_EXPRESSION,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.empty_expression.description"
                            ),
                            options
                        )
                        raise err
                    else:
                        #  There is no content between the end token and previous token. Raise an error.
                        err = _cm_error.Error(
                            _cexp_error.CEXP_NO_CONTENT,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.description"
                            ),
                            options
                        )
                        err.push_traceback(
                            expression,
                            token.get_position() - 1,
                            token.get_position() - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_after"
                            )
                        )
                        raise err
                else:
                    err = _cm_error.Error(
                        _cexp_error.CEXP_NO_CONTENT,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.cexp.error.no_content.description"
                        ),
                        options
                    )
                    if cursor == 0:
                        #  There is no content before this token. Raise an error.
                        err.push_traceback(
                            expression,
                            token.get_position(),
                            token.get_position() + len(token.get_symbol()) - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_before"
                            )
                        )
                    else:
                        #  There is no content between this token and previous token. Raise an error.
                        err.push_traceback(
                            expression,
                            token.get_position() - 1,
                            token.get_position() + len(token.get_symbol()) - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_between"
                            )
                        )
                    raise err

            try:
                #  Get the molecule parser.
                ml_parser = if_opt.get_molecule_parser()

                #  Parse the molecule.
                ml_ast_root = ml_parser.parse_expression(
                    token.get_symbol(),
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )

                #  Separate the coefficient from the AST.
                ml_coefficient = ml_ast_root.get_prefix_number()
                ml_ast_root.set_prefix_number(_math_cst.ONE)

                #  Parse the AST.
                ml_atoms_dict = ml_parser.parse_ast(
                    token.get_symbol(),
                    ml_ast_root,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )

                #  Add the molecule to the chemical equation.
                if side:
                    ret.append_right_item(operator, ml_coefficient, ml_ast_root, ml_atoms_dict)
                else:
                    ret.append_left_item(operator, ml_coefficient, ml_ast_root, ml_atoms_dict)
            except _cm_error.Error as err:
                #  Add error description.
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.parsing_molecule.message"
                    )
                )
                raise err

            #  Next token.
            cursor += 1

            #  Redirect by pre-saved state.
            state = read_molecule_end
        elif state == _STATE_ROUTE_2:
            #  Redirect by rules.
            if token.is_operator_plus():
                state = _STATE_READ_PLUS
            elif token.is_operator_minus():
                state = _STATE_READ_MINUS_2
            elif token.is_operator_separator():
                state = _STATE_READ_SEPARATOR
            elif token.is_equal():
                state = _STATE_READ_EQUAL_SIGN
            elif token.is_end():
                break
            else:
                raise RuntimeError("BUG: Unexpected token (should never happen).")
        elif state == _STATE_READ_PLUS:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MINUS_2:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '-'.
            operator = _cexp_interface.OPERATOR_MINUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_SEPARATOR:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_AUTO_CORRECTION, options)

            #  Set the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_EQUAL_SIGN:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Next token.
            cursor += 1

            #  Raise an error if the equal sign is duplicated.
            if side:
                err = _cm_error.Error(
                    _cexp_error.CEXP_DUPLICATED_EQUAL_SIGN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.duplicated"
                    )
                )
                err.push_traceback(
                    expression,
                    equal_sign_position,
                    equal_sign_position,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.previous"
                    )
                )
                raise err

            #  Save the position of the equal sign.
            equal_sign_position = token.get_position()

            #  Mark the side flag.
            side = True

            #  Go to route 1.
            state = _STATE_ROUTE_1
        else:
            raise RuntimeError("BUG: Unexpected state.")

    #  Raise an error if there is only 1 molecule.
    if len(ret) == 1:
        err = _cm_error.Error(
            _cexp_error.CEXP_ONLY_ONE_MOLECULE,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.only_one_molecule.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.only_one_molecule.message"
            )
        )
        raise err

    #  Check form.
    if form is None:
        raise RuntimeError("BUG: Form was not set.")

    #  Raise an error if there is no equal sign (for normal form only).
    if form == _FORM_NORMAL and not side:
        err = _cm_error.Error(
            _cexp_error.CEXP_NO_EQUAL_SIGN,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.no_equal_sign.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.no_equal_sign.message"
            )
        )
        raise err

    return ret
示例#7
0
def balance_chemical_equation(expression,
                              options,
                              printer=_pub_printer.PRINTER_TEXT,
                              unknown_header="X",
                              callback_before_balance=None,
                              callback_after_balance=None,
                              callback_context=None):
    """Balance a chemical equation.

    :type expression: str
    :type options: bce.option.Option
    :type printer: int
    :type unknown_header: str
    :type callback_before_balance: types.FunctionType | None
    :type callback_after_balance: types.FunctionType | None
    :param expression: The chemical equation.
    :param options: The options.
    :param printer: The printer ID.
    :param unknown_header: The header of unknowns.
    :param callback_before_balance: Callback that will be called before balancing.
    :param callback_after_balance: Callback that will be called after balancing.
    :param callback_context: The callback context.
    :rtype: str | dict[str, str]
    :return: The balanced chemical equation.
    """

    #  Check characters.
    if not _util_input_chk.check_input_expression_characters(expression):
        raise _pub_exception.InvalidCharacterException("Invalid character.")

    #  Wrap the parser interface options.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Get the CEXP parser.
    cexp_parser = if_opt.get_cexp_parser()

    try:
        #  Parse the chemical equation.
        cexp_object = cexp_parser.parse(
            expression,
            options,
            mexp_protected_header_enabled=True,
            mexp_protected_header_prefix=unknown_header)

        #  Run before balance callback.
        if callback_before_balance is not None and isinstance(
                callback_before_balance, _types.FunctionType):
            callback_before_balance(callback_context, cexp_object)

        #  Balance the chemical equation.
        _lgc_bce_main.balance_chemical_equation(cexp_object,
                                                options,
                                                unknown_header=unknown_header)

        #  Run after balance callback.
        if callback_after_balance is not None and isinstance(
                callback_after_balance, _types.FunctionType):
            callback_after_balance(callback_context, cexp_object)

        #  Print.
        return _print_cexp(cexp_object,
                           if_opt.get_cexp_parser(),
                           if_opt.get_molecule_parser(),
                           if_opt.get_mexp_parser(),
                           printer=printer,
                           unknown_header=unknown_header)
    except _ps_cm_error.Error as err:
        raise _pub_exception.ParserErrorWrapper(err.to_string())
    except _lgc_cm_error.Error as err:
        raise _pub_exception.LogicErrorWrapper(err.to_string())