Пример #1
0
def _macro_register_form(expression, origin_form, new_form, options):
    """Macro of registering new form.

    :type expression: str
    :type origin_form: int
    :type new_form: int
    :type options: _opt.Option
    :param expression: Origin chemical expression.
    :param origin_form: The origin form.
    :param new_form: The new form.
    :param options: The BCE options.
    :rtype : int
    :return: The new form if no conflict exists.
    """

    if origin_form is not None and origin_form != new_form:
        err = _pe.Error(_ce_error.PE_CE_MIXED_FORM,
                        _msg_id.MSG_PE_CE_MIXED_FORM_DESCRIPTION, options)

        err.push_traceback_ex(expression, 0,
                              len(expression) - 1,
                              _msg_id.MSG_PE_CE_MIXED_FORM_TB_MESSAGE)

        raise err

    return new_form
Пример #2
0
def _check_right_operand(expression, token_list, token_id, options):
    """Check the right operand.

    :type expression: str
    :type token_list: list
    :type token_id: int
    :type options: _opt.Option
    :param expression: (The same as the variable in parse_to_rpn() routine.)
    :param token_list: (The same as the variable in parse_to_rpn() routine.)
    :param token_id: (The same as the variable in parse_to_rpn() routine.)
    :param options: (The same as the variable in parse_to_rpn() routine.)
    :raise _pe.Error: When there's no right operand.
    """

    raise_err = False

    if token_id + 1 == len(token_list):
        raise_err = True
    else:
        next_tok = token_list[token_id + 1]
        if not (next_tok.is_left_parenthesis() or next_tok.is_operand()
                or next_tok.is_function()):
            raise_err = True

    if raise_err:
        err_pos = token_list[token_id].get_position()

        err = _pe.Error(_mexp_errors.PE_MEXP_MISSING_OPERAND,
                        _msg_id.MSG_PE_MEXP_MISSING_OPERAND_DESCRIPTION,
                        options)

        err.push_traceback_ex(expression, err_pos, err_pos,
                              _msg_id.MSG_PE_MEXP_MISSING_OPERAND_RIGHT)

        raise err
Пример #3
0
def _macro_simplify(expression, mu_obj, node, options):
    """Macro for simplifying.

    :type expression: str
    :type mu_obj: MergeUtil
    :type node: bce.parser.ast.molecule._ASTNodeBaseML
    :type options: bce.option.Option
    :param expression: The origin expression.
    :param mu_obj: The MergeUtil object.
    :param node: The work node.
    :param options: The options.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Simplify.
    removed = mu_obj.simplify()

    #  Pre-create an atom-eliminated error.
    err = _cm_error.Error(
        _ml_error.MOLECULE_ELEMENT_ELIMINATED,
        _l10n_reg.get_message(
            lang_id,
            "parser.molecule.error.element_eliminated.description"
        ),
        options
    )

    #  Initialize the error flag.
    flag = False

    for symbol in removed:
        if symbol != "e":
            #  Mark the flag.
            flag = True

            #  Add a description.
            err.push_traceback(
                expression,
                node.get_starting_position_in_source_text(),
                node.get_ending_position_in_source_text(),
                _l10n_reg.get_message(
                    lang_id,
                    "parser.molecule.error.element_eliminated.message",
                    replace_map={
                        "$1": symbol
                    }
                )
            )

    #  Raise the error if the flag was marked.
    if flag:
        raise err
Пример #4
0
def _macro_register_form(expression, origin_form, new_form, options):
    """Macro of registering new form.

    :type expression: str
    :type origin_form: int
    :type new_form: int
    :type options: bce.option.Option
    :param expression: The chemical expression.
    :param origin_form: The origin form.
    :param new_form: The new form.
    :param options: The options.
    :rtype : int
    :return: The new form if no conflict exists.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    if origin_form is not None and origin_form != new_form:
        err = _cm_error.Error(
            _cexp_error.CEXP_MIXED_FORM,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.mixed_form.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.mixed_form.message"
            )
        )
        raise err

    return new_form
Пример #5
0
def _macro_simplify(expression, mu_obj, node, options):
    """Macro for simplifying.

    :type expression: str
    :type mu_obj: MergeUtil
    :type node: _ast_base._ASTNodeBaseML
    :type options: _opt.Option
    :param expression: The origin expression.
    :param mu_obj: The MergeUtil object.
    :param node: The work node.
    :param options: The BCE options.
    """

    #  Simplify.
    removed = mu_obj.simplify()

    #  Pre-create an atom-eliminated error.
    err = _pe.Error(_ml_error.PE_ML_ATOM_ELIMINATED,
                    _msg_id.MSG_PE_ML_ATOM_ELIMINATED_DESCRIPTION,
                    options)

    #  Initialize the error flag.
    flag = False

    for symbol in removed:
        if symbol != "e":
            #  Mark the flag.
            flag = True

            #  Add a description.
            err.push_traceback_ex(expression,
                                  node.get_starting_position_in_source_text(),
                                  node.get_ending_position_in_source_text(),
                                  _msg_id.MSG_PE_ML_ATOM_ELIMINATED_TB_MESSAGE,
                                  {"$1": symbol})

    #  Raise the error if the flag was marked.
    if flag:
        raise err
Пример #6
0
def _check_right_operand(expression, token_list, token_id, options):
    """Check the right operand.

    :type expression: str
    :type token_list: list[bce.parser.mexp.token.Token]
    :type token_id: int
    :type options: bce.option.Option
    :param expression: (The same as the variable in parse_to_rpn() routine.)
    :param token_list: (The same as the variable in parse_to_rpn() routine.)
    :param token_id: (The same as the variable in parse_to_rpn() routine.)
    :param options: (The same as the variable in parse_to_rpn() routine.)
    :raise _cm_error.Error: Raise when there's no right operand.
    """

    raise_err = False
    if token_id + 1 == len(token_list):
        raise_err = True
    else:
        next_tok = token_list[token_id + 1]
        if not (next_tok.is_left_parenthesis() or next_tok.is_operand()
                or next_tok.is_function()):
            raise_err = True

    if raise_err:
        #  Get the language ID.
        lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

        #  Raise the error.
        err_pos = token_list[token_id].get_position()
        err = _cm_error.Error(
            _mexp_errors.MEXP_MISSING_OPERAND,
            _l10n_reg.get_message(
                lang_id, "parser.mexp.error.missing_operand.description"),
            options)
        err.push_traceback(
            expression, err_pos, err_pos,
            _l10n_reg.get_message(lang_id,
                                  "parser.mexp.error.missing_operand.right"))
        raise err
Пример #7
0
def parse_to_rpn(expression, token_list, options):
    """Parse an infix math expression to RPN.

    :type expression: str
    :type token_list: list
    :type options: _opt.Option
    :param expression: The infix math expression.
    :param token_list: The tokenized infix math expression.
    :param options: BCE options.
    :rtype : list
    :return: The RPN token list.
    :raise _pe.Error: When a parser error occurred.
    """

    #  Initialize
    token_id = 0
    token_cnt = len(token_list)
    rpn = _RPNProcessor()
    cur_argc = 0
    req_argc = 0
    prev_sep_pos = -1
    p_match_map = {")": "(", "]": "[", "}": "{"}
    p_stack = _adt_stack.Stack()
    p_fn = False

    while token_id < token_cnt:
        #  Get current token.
        token = token_list[token_id]

        #  Get previous token.
        if token_id != 0:
            prev_tok = token_list[token_id - 1]
        else:
            prev_tok = None

        if token.is_operand():
            if not (prev_tok is None):
                if prev_tok.is_right_parenthesis():
                    if token.is_symbol_operand():
                        #  Do completion:
                        #    ([expr])[unknown] => ([expr])*[unknown]
                        #
                        #  For example:
                        #    (3-y)x => (3-y)*x
                        rpn.add_operator(
                            _mexp_tok.create_multiply_operator_token())
                    else:
                        #  Numeric parenthesis suffix was not supported.
                        #
                        #  For example:
                        #    (x-y)3
                        #         ^
                        #         Requires a '*' before this token.
                        err = _pe.Error(
                            _mexp_errors.PE_MEXP_MISSING_OPERATOR,
                            _msg_id.MSG_PE_MEXP_MISSING_OPERATOR_DESCRIPTION,
                            options)

                        err.push_traceback_ex(
                            expression, token.get_position(),
                            token.get_position() + len(token.get_symbol()) - 1,
                            _msg_id.MSG_PE_MEXP_MISSING_OPERATOR_MUL_BEFORE)

                        raise err

                if prev_tok.is_operand():
                    #  Do completion:
                    #    [number][symbol] => [number]*[symbol]
                    #
                    #  For example:
                    #    4x => 4*x
                    rpn.add_operator(
                        _mexp_tok.create_multiply_operator_token())

            #  Process the token.
            rpn.add_operand(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_function():
            #  Raise an error if the function is unsupported.
            if not token.get_symbol() in _mexp_fns.SUPPORTED:
                err = _pe.Error(_mexp_errors.PE_MEXP_FN_UNSUPPORTED,
                                _msg_id.MSG_PE_MEXP_FN_UNSUPPORTED_DESCRIPTION,
                                options)

                err.push_traceback_ex(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _msg_id.MSG_PE_MEXP_FN_UNSUPPORTED_TB_MESSAGE,
                    {"$1": token.get_symbol()})

                raise err

            if (not (prev_tok is None)) and (prev_tok.is_operand() or
                                             prev_tok.is_right_parenthesis()):
                #  Do completion:
                #    [num][fn] => [num]*[fn]
                #
                #  For example:
                #    4pow(2,3) => 4*pow(2,3)
                rpn.add_operator(_mexp_tok.create_multiply_operator_token())

            #  Process the token.
            rpn.add_function(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_operator():
            #  Get the operator.
            op = _mexp_operators.OPERATORS[token.get_subtype()]

            #  Check operands.
            if op.is_required_left_operand():
                _check_left_operand(expression, token_list, token_id, options)

            if op.is_required_right_operand():
                _check_right_operand(expression, token_list, token_id, options)

            #  Process the token.
            rpn.add_operator(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_left_parenthesis():
            #  Save state.
            p_stack.push(
                _ParenthesisStackItem(token.get_symbol(), token_id, p_fn,
                                      cur_argc, req_argc, prev_sep_pos))

            cur_argc = 0
            prev_sep_pos = token_id

            #  Set function state and get required argument count.
            if (not (prev_tok is None)) and prev_tok.is_function():
                p_fn = True
                req_argc = _mexp_fns.ARGUMENT_COUNT[prev_tok.get_symbol()]
            else:
                p_fn = False
                req_argc = 0

            if (not (prev_tok is None)) and (prev_tok.is_right_parenthesis()
                                             or prev_tok.is_operand()):
                #  Do completion
                #    [lp][expr][rp][lp][expr][rp] => [lp][expr][rp]*[lp][expr][rp]
                #
                #  For example:
                #    (2+3)(4+2) => (2+3)*(4+2)
                rpn.add_operator(_mexp_tok.create_multiply_operator_token())

            #  Process the token.
            rpn.add_left_parenthesis(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_right_parenthesis():
            #  Raise an error if there's no content between two separators.
            if prev_sep_pos + 1 == token_id:
                err = _pe.Error(_mexp_errors.PE_MEXP_NO_CONTENT,
                                _msg_id.MSG_PE_MEXP_NO_CONTENT_DESCRIPTION,
                                options)

                if prev_tok.is_left_parenthesis():
                    err.push_traceback_ex(
                        expression, prev_tok.get_position(),
                        token.get_position(),
                        _msg_id.MSG_PE_MEXP_NO_CONTENT_PARENTHESIS)
                else:
                    err.push_traceback_ex(
                        expression, prev_tok.get_position(),
                        token.get_position(),
                        _msg_id.MSG_PE_MEXP_NO_CONTENT_ARGUMENT)

                raise err

            #  Raise an error if there's no left parenthesis to be matched with.
            if len(p_stack) == 0:
                err = _pe.Error(
                    _mexp_errors.PE_MEXP_PARENTHESIS_MISMATCH,
                    _msg_id.MSG_PE_MEXP_PARENTHESIS_MISMATCH_DESCRIPTION,
                    options)

                err.push_traceback_ex(
                    expression, token.get_position(), token.get_position(),
                    _msg_id.MSG_PE_MEXP_PARENTHESIS_MISMATCH_MISSING_LEFT)

                raise err

            #  Get the top item of the stack.
            p_item = p_stack.pop()

            #  Get the symbol of the parenthesis matches with current token.
            p_matched_sym = p_match_map[token.get_symbol()]

            #  Raise an error if the parenthesis was mismatched.
            if p_matched_sym != p_item.get_symbol():
                err = _pe.Error(
                    _mexp_errors.PE_MEXP_PARENTHESIS_MISMATCH,
                    _msg_id.MSG_PE_MEXP_PARENTHESIS_MISMATCH_DESCRIPTION,
                    options)

                err.push_traceback_ex(
                    expression, token.get_position(), token.get_position(),
                    _msg_id.MSG_PE_MEXP_PARENTHESIS_MISMATCH_INCORRECT,
                    {"$1": p_matched_sym})

                raise err

            if p_fn:
                cur_argc += 1

                #  Raise an error if the argument count was not matched.
                if cur_argc != req_argc:
                    fn_token = token_list[p_item.get_token_id() - 1]

                    err = _pe.Error(
                        _mexp_errors.PE_MEXP_FN_ARGC_MISMATCH,
                        _msg_id.MSG_PE_MEXP_FN_ARGC_MISMATCH_DESCRIPTION,
                        options)

                    err.push_traceback_ex(
                        expression, fn_token.get_position(),
                        fn_token.get_position() + len(fn_token.get_symbol()) -
                        1, _msg_id.MSG_PE_MEXP_FN_ARGC_MISMATCH_TB_MESSAGE, {
                            "$1": str(req_argc),
                            "$2": str(cur_argc)
                        })

                    raise err

            #  Restore state.
            p_fn = p_item.is_in_function()
            cur_argc = p_item.get_current_argument_count()
            req_argc = p_item.get_required_argument_count()
            prev_sep_pos = p_item.get_previous_separator_position()

            #  Process the token.
            rpn.add_right_parenthesis()

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_separator():
            #  Raise an error if we're not in function now.
            if not p_fn:
                err = _pe.Error(
                    _mexp_errors.PE_MEXP_ILLEGAL_ARG_SEPARATOR,
                    _msg_id.MSG_PE_MEXP_ILLEGAL_ARG_SEPARATOR_DESCRIPTION,
                    options)

                err.push_traceback_ex(
                    expression, token.get_position(), token.get_position(),
                    _msg_id.MSG_PE_MEXP_ILLEGAL_ARG_SEPARATOR_TB_MESSAGE)

                raise err

            #  Raise an error if there's no content between two separators.
            if prev_sep_pos + 1 == token_id:
                err = _pe.Error(_mexp_errors.PE_MEXP_NO_CONTENT,
                                _msg_id.MSG_PE_MEXP_NO_CONTENT_DESCRIPTION,
                                options)

                err.push_traceback_ex(expression, prev_tok.get_position(),
                                      token.get_position(),
                                      _msg_id.MSG_PE_MEXP_NO_CONTENT_ARGUMENT)

                raise err

            #  Save separator position.
            prev_sep_pos = token_id

            #  Increase argument counter.
            cur_argc += 1

            #  Process the token.
            rpn.add_separator()

            #  Go to next token.
            token_id += 1

            continue
        else:
            raise RuntimeError("Never reach this condition.")

    #  Raise an error if there are still some left parentheses in the stack.
    if len(p_stack) != 0:
        err = _pe.Error(_mexp_errors.PE_MEXP_PARENTHESIS_MISMATCH,
                        _msg_id.MSG_PE_MEXP_PARENTHESIS_MISMATCH_DESCRIPTION,
                        options)

        while len(p_stack) != 0:
            p_item = p_stack.pop()
            p_token = token_list[p_item.get_token_id()]
            err.push_traceback_ex(
                expression, p_token.get_position(), p_token.get_position(),
                _msg_id.MSG_PE_MEXP_PARENTHESIS_MISMATCH_MISSING_RIGHT)

        raise err

    #  Pop all items off from the stack and push them onto the RPN token list.
    rpn.finalize()

    #  Return the RPN token list.
    return rpn.get_rpn()
Пример #8
0
def tokenize(expression, options):
    """Tokenize a math expression.

    :type expression: str
    :type options: _opt.Option
    :param expression: The math expression.
    :param options: The BCE options.
    :rtype : list
    :return: The tokenized math expression.
    :raise _pe.Error: If we meet a syntax error.
    """

    #  Initialize.
    r = []
    cur_pos = 0
    end_pos = len(expression)
    prev_tok = None

    while cur_pos < end_pos:
        cur_ch = expression[cur_pos]

        #  Get previous token if possible.
        if len(r) != 0:
            prev_tok = r[-1]

        #  Read a number token if current character is a digit.
        if cur_ch.isdigit():
            #  Search for next non-digit and non-dot character.
            met_dot = False
            prev_dot_pos = -1
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                search_ch = expression[search_pos]
                if search_ch == ".":
                    #  If we met decimal dot more than once, raise an duplicated-dot error.
                    if met_dot:
                        err = _pe.Error(_mexp_errors.PE_MEXP_DUPLICATED_DECIMAL_DOT,
                                        _msg_id.MSG_PE_MEXP_DUPLICATED_DECIMAL_DOT_DESCRIPTION,
                                        options)

                        err.push_traceback_ex(expression,
                                              search_pos,
                                              search_pos,
                                              _msg_id.MSG_PE_MEXP_DUPLICATED_DECIMAL_DOT_DUPLICATED)

                        err.push_traceback_ex(expression,
                                              prev_dot_pos,
                                              prev_dot_pos,
                                              _msg_id.MSG_PE_MEXP_DUPLICATED_DECIMAL_DOT_PREVIOUS)

                        raise err
                    else:
                        met_dot = True
                        prev_dot_pos = search_pos
                else:
                    if not search_ch.isdigit():
                        search_end = search_pos
                        break

                #  Go to next searching position.
                search_pos += 1

            if met_dot:
                #  Create a float token if there's a decimal dot in the sequence.
                r.append(create_float_operand_token(expression[cur_pos:search_end], len(r), cur_pos))
            else:
                #  Create a integer token if there's no decimal dot in the sequence.
                r.append(create_integer_operand_token(expression[cur_pos:search_end], len(r), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        if cur_ch.isalpha():
            #  Search for next non-alphabet character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                if not expression[search_pos].isalpha():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            if search_end == end_pos:
                symbol_hdr = options.get_protected_math_symbol_header()

                #  Raise an error if the symbol starts with the protected symbol header.
                if expression.startswith(symbol_hdr, cur_pos, search_end):
                    err = _pe.Error(_mexp_errors.PE_MEXP_USE_PROTECTED_SYMBOL_HEADER,
                                    _msg_id.MSG_PE_MEXP_USE_PROTECTED_SYMBOL_HEADER_DESCRIPTION,
                                    options)

                    err.push_traceback_ex(expression,
                                          cur_pos,
                                          search_end - 1,
                                          _msg_id.MSG_PE_MEXP_USE_PROTECTED_SYMBOL_HEADER_TB_MESSAGE,
                                          {"$1": symbol_hdr})

                    raise err

                #  Create a symbol token if there's nothing behind the string we got.
                r.append(create_symbol_operand_token(expression[cur_pos:search_end], len(r), cur_pos))
            else:
                next_ch = expression[search_end]
                if next_ch.isdigit() or next_ch == "(" or next_ch == "[" or next_ch == "{":
                    #  Create a function token if there's a number or a parenthesis behind the string we got.
                    r.append(create_function_token(expression[cur_pos:search_end], len(r), cur_pos))
                else:
                    symbol_hdr = options.get_protected_math_symbol_header()

                    #  Raise an error if the symbol starts with the protected symbol header.
                    if expression.startswith(symbol_hdr, cur_pos, search_end):
                        err = _pe.Error(_mexp_errors.PE_MEXP_USE_PROTECTED_SYMBOL_HEADER,
                                        _msg_id.MSG_PE_MEXP_USE_PROTECTED_SYMBOL_HEADER_DESCRIPTION,
                                        options)

                        err.push_traceback_ex(expression,
                                              cur_pos,
                                              search_end - 1,
                                              _msg_id.MSG_PE_MEXP_USE_PROTECTED_SYMBOL_HEADER_TB_MESSAGE,
                                              {"$1": symbol_hdr})

                        raise err

                    #  Create a symbol token.
                    r.append(create_symbol_operand_token(expression[cur_pos:search_end], len(r), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        if cur_ch == "+":
            #  Create a token.
            r.append(create_plus_operator_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == "-":
            #  If the left operand exists, create a minus operator token. Otherwise, create a negative sign token.
            if (not (prev_tok is None)) and \
                    (prev_tok.is_operand() or prev_tok.is_right_parenthesis()):
                r.append(create_minus_operator_token(len(r), cur_pos))
            else:
                r.append(create_negative_operator_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == "*":
            #  Create a token.
            r.append(create_multiply_operator_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == "/":
            #  Create a token.
            r.append(create_divide_operator_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == "^":
            #  Create a token.
            r.append(create_pow_operator_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == "(" or cur_ch == "[" or cur_ch == "{":
            r.append(create_left_parenthesis_token(cur_ch, len(r), cur_pos))
            cur_pos += 1
            continue

        if cur_ch == ")" or cur_ch == "]" or cur_ch == "}":
            #  Create a  token.
            r.append(create_right_parenthesis_token(cur_ch, len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == ",":
            #  Create a token.
            r.append(create_separator_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Raise an untokenizable error.
        err = _pe.Error(_mexp_errors.PE_MEXP_UNRECOGNIZED_TOKEN,
                        _msg_id.MSG_PE_MEXP_UNRECOGNIZED_TOKEN_DESCRIPTION,
                        options)

        err.push_traceback_ex(expression,
                              cur_pos,
                              cur_pos,
                              _msg_id.MSG_PE_MEXP_UNRECOGNIZED_TOKEN_TB_MESSAGE)

        raise err

    return r
Пример #9
0
def parse(expression, token_list, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"):
    """Parse the tokenized chemical equation.

    :type expression: str
    :type token_list: list[bce.parser.cexp.token.Token]
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: Origin chemical equation.
    :param token_list: The tokenized chemical equation.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : bce.parser.interface.cexp_parser.ChemicalEquation
    :return: The parsed chemical equation.
    """

    #  Wrap the interface option.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Initialize an empty chemical equation.
    ret = _cexp_interface.ChemicalEquation()

    #  Initialize the sign.
    operator = _cexp_interface.OPERATOR_PLUS

    #  Initialize the form container.
    form = None

    #  Initialize the side mark.
    #  (side == False: Left side; side == True: Right side;)
    side = False

    #  Initialize the state.
    state = _STATE_ROUTE_1

    #  Initialize other variables.
    read_molecule_end = None
    equal_sign_position = -1

    #  Initialize the token cursor.
    cursor = 0
    while True:
        token = token_list[cursor]

        if state == _STATE_ROUTE_1:
            #  Reset the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Redirect by rules.
            if token.is_operator_minus():
                #  Go to read the '-'.
                state = _STATE_READ_MINUS_1
            else:
                #  Go and try to read a molecule.
                read_molecule_end = _STATE_ROUTE_2
                state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MINUS_1:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '-'.
            operator = _cexp_interface.OPERATOR_MINUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MOLECULE:
            if not token.is_molecule():
                if token.is_end():
                    if cursor == 0:
                        #  In this condition, we got an empty expression. Raise an error.
                        err = _cm_error.Error(
                            _cexp_error.CEXP_EMPTY_EXPRESSION,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.empty_expression.description"
                            ),
                            options
                        )
                        raise err
                    else:
                        #  There is no content between the end token and previous token. Raise an error.
                        err = _cm_error.Error(
                            _cexp_error.CEXP_NO_CONTENT,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.description"
                            ),
                            options
                        )
                        err.push_traceback(
                            expression,
                            token.get_position() - 1,
                            token.get_position() - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_after"
                            )
                        )
                        raise err
                else:
                    err = _cm_error.Error(
                        _cexp_error.CEXP_NO_CONTENT,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.cexp.error.no_content.description"
                        ),
                        options
                    )
                    if cursor == 0:
                        #  There is no content before this token. Raise an error.
                        err.push_traceback(
                            expression,
                            token.get_position(),
                            token.get_position() + len(token.get_symbol()) - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_before"
                            )
                        )
                    else:
                        #  There is no content between this token and previous token. Raise an error.
                        err.push_traceback(
                            expression,
                            token.get_position() - 1,
                            token.get_position() + len(token.get_symbol()) - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.no_content.operator_between"
                            )
                        )
                    raise err

            try:
                #  Get the molecule parser.
                ml_parser = if_opt.get_molecule_parser()

                #  Parse the molecule.
                ml_ast_root = ml_parser.parse_expression(
                    token.get_symbol(),
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )

                #  Separate the coefficient from the AST.
                ml_coefficient = ml_ast_root.get_prefix_number()
                ml_ast_root.set_prefix_number(_math_cst.ONE)

                #  Parse the AST.
                ml_atoms_dict = ml_parser.parse_ast(
                    token.get_symbol(),
                    ml_ast_root,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )

                #  Add the molecule to the chemical equation.
                if side:
                    ret.append_right_item(operator, ml_coefficient, ml_ast_root, ml_atoms_dict)
                else:
                    ret.append_left_item(operator, ml_coefficient, ml_ast_root, ml_atoms_dict)
            except _cm_error.Error as err:
                #  Add error description.
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.parsing_molecule.message"
                    )
                )
                raise err

            #  Next token.
            cursor += 1

            #  Redirect by pre-saved state.
            state = read_molecule_end
        elif state == _STATE_ROUTE_2:
            #  Redirect by rules.
            if token.is_operator_plus():
                state = _STATE_READ_PLUS
            elif token.is_operator_minus():
                state = _STATE_READ_MINUS_2
            elif token.is_operator_separator():
                state = _STATE_READ_SEPARATOR
            elif token.is_equal():
                state = _STATE_READ_EQUAL_SIGN
            elif token.is_end():
                break
            else:
                raise RuntimeError("BUG: Unexpected token (should never happen).")
        elif state == _STATE_READ_PLUS:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MINUS_2:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Set the operator to '-'.
            operator = _cexp_interface.OPERATOR_MINUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_SEPARATOR:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_AUTO_CORRECTION, options)

            #  Set the operator to '+'.
            operator = _cexp_interface.OPERATOR_PLUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_EQUAL_SIGN:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL, options)

            #  Next token.
            cursor += 1

            #  Raise an error if the equal sign is duplicated.
            if side:
                err = _cm_error.Error(
                    _cexp_error.CEXP_DUPLICATED_EQUAL_SIGN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.duplicated"
                    )
                )
                err.push_traceback(
                    expression,
                    equal_sign_position,
                    equal_sign_position,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.duplicated_equal_sign.previous"
                    )
                )
                raise err

            #  Save the position of the equal sign.
            equal_sign_position = token.get_position()

            #  Mark the side flag.
            side = True

            #  Go to route 1.
            state = _STATE_ROUTE_1
        else:
            raise RuntimeError("BUG: Unexpected state.")

    #  Raise an error if there is only 1 molecule.
    if len(ret) == 1:
        err = _cm_error.Error(
            _cexp_error.CEXP_ONLY_ONE_MOLECULE,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.only_one_molecule.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.only_one_molecule.message"
            )
        )
        raise err

    #  Check form.
    if form is None:
        raise RuntimeError("BUG: Form was not set.")

    #  Raise an error if there is no equal sign (for normal form only).
    if form == _FORM_NORMAL and not side:
        err = _cm_error.Error(
            _cexp_error.CEXP_NO_EQUAL_SIGN,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.no_equal_sign.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.cexp.error.no_equal_sign.message"
            )
        )
        raise err

    return ret
Пример #10
0
def parse(expression, token_list, options):
    """Parse the tokenized chemical equation.

    :type expression: str
    :type token_list: list[_ce_token.Token]
    :type options: _opt.Option
    :param expression: Origin chemical equation.
    :param token_list: The tokenized chemical equation.
    :param options: The BCE options.
    :rtype : _ce_base.ChemicalEquation
    :return: The parsed chemical equation.
    """

    #  Initialize an empty chemical equation.
    ret = _ce_base.ChemicalEquation()

    #  Initialize the sign.
    operator = _ce_op.OPERATOR_PLUS

    #  Initialize the form container.
    form = None

    #  Initialize the side mark.
    #  (side == False: Left side; side == True: Right side;)
    side = False

    #  Initialize the state.
    state = _STATE_ROUTE_1

    #  Initialize other variables.
    read_molecule_end = None
    equal_sign_position = -1

    #  Initialize the token cursor.
    cursor = 0
    while True:
        token = token_list[cursor]

        if state == _STATE_ROUTE_1:
            #  Reset the operator to '+'.
            operator = _ce_op.OPERATOR_PLUS

            #  Redirect by rules.
            if token.is_operator_minus():
                #  Go to read the '-'.
                state = _STATE_READ_MINUS_1
            else:
                #  Go and try to read a molecule.
                read_molecule_end = _STATE_ROUTE_2
                state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MINUS_1:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL,
                                        options)

            #  Set the operator to '-'.
            operator = _ce_op.OPERATOR_MINUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MOLECULE:
            if not token.is_molecule():
                if token.is_end():
                    if cursor == 0:
                        #  In this condition, we got an empty expression. Raise an error.
                        err = _pe.Error(
                            _ce_error.PE_CE_EMPTY_EXPRESSION,
                            _msg_id.MSG_PE_CE_EMPTY_EXPRESSION_DESCRIPTION,
                            options)

                        raise err
                    else:
                        #  There is no content between the end token and previous token. Raise an error.
                        err = _pe.Error(
                            _ce_error.PE_CE_NO_CONTENT,
                            _msg_id.MSG_PE_CE_NO_CONTENT_DESCRIPTION, options)

                        err.push_traceback_ex(
                            expression,
                            token.get_position() - 1,
                            token.get_position() - 1,
                            _msg_id.MSG_PE_CE_NO_CONTENT_OPERATOR_AFTER)

                        raise err
                else:
                    err = _pe.Error(_ce_error.PE_CE_NO_CONTENT,
                                    _msg_id.MSG_PE_CE_NO_CONTENT_DESCRIPTION,
                                    options)
                    if cursor == 0:
                        #  There is no content before this token. Raise an error.
                        err.push_traceback_ex(
                            expression, token.get_position(),
                            token.get_position(),
                            _msg_id.MSG_PE_CE_NO_CONTENT_OPERATOR_BEFORE)
                    else:
                        #  There is no content between this token and previous token. Raise an error.
                        err.push_traceback_ex(
                            expression,
                            token.get_position() - 1, token.get_position(),
                            _msg_id.MSG_PE_CE_NO_CONTENT_OPERATOR_BETWEEN)

                    raise err

            try:
                #  Tokenize the molecule.
                ml_token_list = _ml_token.tokenize(token.get_symbol(), options)

                #  Generate the AST.
                ml_ast_root = _ml_ast_generator.generate_ast(
                    token.get_symbol(), ml_token_list, options)

                #  Separate the coefficient from the AST.
                ml_coeff = ml_ast_root.get_prefix_number()
                ml_ast_root.set_prefix_number(_math_cst.ONE)
                ml_atoms_dict = _ml_ast_parser.parse_ast(
                    token.get_symbol(), ml_ast_root, options)

                #  Add the molecule to the chemical equation.
                if side:
                    ret.append_right_item(operator, ml_coeff, ml_ast_root,
                                          ml_atoms_dict)
                else:
                    ret.append_left_item(operator, ml_coeff, ml_ast_root,
                                         ml_atoms_dict)
            except _pe.Error as err:
                #  Add error description.
                err.push_traceback_ex(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _msg_id.MSG_PE_CE_SUB_ML_ERROR_TRACE_MESSAGE)

                raise err

            #  Next token.
            cursor += 1

            #  Redirect by pre-saved state.
            state = read_molecule_end
        elif state == _STATE_ROUTE_2:
            #  Redirect by rules.
            if token.is_operator_plus():
                state = _STATE_READ_PLUS
            elif token.is_operator_minus():
                state = _STATE_READ_MINUS_2
            elif token.is_operator_separator():
                state = _STATE_READ_SEPARATOR
            elif token.is_equal():
                state = _STATE_READ_EQUAL_SIGN
            elif token.is_end():
                break
            else:
                raise RuntimeError(
                    "BUG: Unexpected token (should never happen).")
        elif state == _STATE_READ_PLUS:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL,
                                        options)

            #  Set the operator to '+'.
            operator = _ce_op.OPERATOR_PLUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_MINUS_2:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL,
                                        options)

            #  Set the operator to '-'.
            operator = _ce_op.OPERATOR_MINUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_SEPARATOR:
            #  Register the new form.
            form = _macro_register_form(expression, form,
                                        _FORM_AUTO_CORRECTION, options)

            #  Set the operator to '+'.
            operator = _ce_op.OPERATOR_PLUS

            #  Next token.
            cursor += 1

            #  Go to read-molecule state.
            read_molecule_end = _STATE_ROUTE_2
            state = _STATE_READ_MOLECULE
        elif state == _STATE_READ_EQUAL_SIGN:
            #  Register the new form.
            form = _macro_register_form(expression, form, _FORM_NORMAL,
                                        options)

            #  Next token.
            cursor += 1

            #  Raise an error if the equal sign is duplicated.
            if side:
                err = _pe.Error(
                    _ce_error.PE_CE_DUPLICATED_EQUAL_SIGN,
                    _msg_id.MSG_PE_CE_DUPLICATED_EQUAL_SIGN_DESCRIPTION,
                    options)

                err.push_traceback_ex(
                    expression, token.get_position(), token.get_position(),
                    _msg_id.MSG_PE_CE_DUPLICATED_EQUAL_SIGN_DUPLICATED)

                err.push_traceback_ex(
                    expression, equal_sign_position, equal_sign_position,
                    _msg_id.MSG_PE_CE_DUPLICATED_EQUAL_SIGN_PREVIOUS)

                raise err

            #  Save the position of the equal sign.
            equal_sign_position = token.get_position()

            #  Mark the side flag.
            side = True

            #  Go to route 1.
            state = _STATE_ROUTE_1
        else:
            raise RuntimeError("BUG: Unexpected state.")

    #  Raise an error if there is only 1 molecule.
    if len(ret) == 1:
        err = _pe.Error(_ce_error.PE_CE_ONLY_ONE_MOLECULE,
                        _msg_id.MSG_PE_CE_ONLY_ONE_MOLECULE_DESCRIPTION,
                        options)

        err.push_traceback_ex(expression, 0,
                              len(expression) - 1,
                              _msg_id.MSG_PE_CE_ONLY_ONE_MOLECULE_TB_MESSAGE)

        raise err

    #  Check form.
    if form is None:
        raise RuntimeError("BUG: Form was not set.")

    #  Raise an error if there is no equal sign (for normal form only).
    if form == _FORM_NORMAL and not side:
        err = _pe.Error(_ce_error.PE_CE_NO_EQUAL_SIGN,
                        _msg_id.MSG_PE_CE_NO_EQUAL_SIGN_DESCRIPTION, options)

        err.push_traceback_ex(expression, 0,
                              len(expression) - 1,
                              _msg_id.MSG_PE_CE_NO_EQUAL_SIGN_TB_MESSAGE)

        raise err

    return ret
Пример #11
0
def parse_ast(expression, root_node, options):
    """Parse an AST.

    :type expression: str
    :type root_node: _ast_base.ASTNodeHydrateGroup | _ast_base.ASTNodeMolecule
    :type options: _opt.Option
    :param expression: The origin expression.
    :param root_node: The root node of the AST.
    :param options: The BCE options.
    :rtype : dict
    :return: The parsed atoms dictionary.
    """

    #  Get the iteration order.
    work_list = _ast_bfs.do_bfs(root_node, True)

    #  Initialize the parsed node container.
    parsed = {}
    """:type : dict[int, MergeUtil]"""

    #  Iterate nodes from the leaves to the root.
    for work_node in work_list:
        if work_node.is_hydrate_group() or work_node.is_molecule():
            assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) or \
                isinstance(work_node, _ast_base.ASTNodeMolecule)

            #  Get the prefix number.
            coeff = work_node.get_prefix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Process the electronics.
            if work_node.is_molecule():
                el_charge = work_node.get_electronic_count().simplify()
                if not el_charge.is_zero:
                    build.add("e", el_charge * coeff)

            #  Iterate all children.
            for child_id in range(0, len(work_node)):
                #  Get child node and its parsing result.
                child = work_node[child_id]
                child_parsed = parsed[id(child)]

                #  Content check.
                if work_node.is_hydrate_group() and len(child_parsed) == 0:
                    assert isinstance(child, _ast_base.ASTNodeMolecule)

                    err = _pe.Error(_ml_error.PE_ML_NO_CONTENT,
                                    _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION,
                                    options)

                    if child_id == 0:
                        err.push_traceback_ex(expression,
                                              child.get_ending_position_in_source_text() + 1,
                                              child.get_ending_position_in_source_text() + 1,
                                              _msg_id.MSG_PE_ML_NO_CONTENT_BEFORE)
                    elif child_id == len(work_node) - 1:
                        err.push_traceback_ex(expression,
                                              child.get_starting_position_in_source_text() - 1,
                                              child.get_starting_position_in_source_text() - 1,
                                              _msg_id.MSG_PE_ML_NO_CONTENT_AFTER)
                    else:
                        err.push_traceback_ex(expression,
                                              child.get_starting_position_in_source_text() - 1,
                                              child.get_ending_position_in_source_text() + 1,
                                              _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE)

                    raise err

                #  Merge.
                build.merge(child_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_atom():
            assert isinstance(work_node, _ast_base.ASTNodeAtom)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Add the atom.
            build.add(work_node.get_atom_symbol(), coeff)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_parenthesis():
            assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get inner node and its parsing result.
            inner_parsed = parsed[id(work_node.get_inner_node())]

            #  Content check.
            if len(inner_parsed) == 0:
                err = _pe.Error(_ml_error.PE_ML_NO_CONTENT,
                                _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION,
                                options)

                err.push_traceback_ex(expression,
                                      work_node.get_starting_position_in_source_text(),
                                      work_node.get_right_parenthesis_position(),
                                      _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE)

                raise err

            #  Merge.
            build.merge(inner_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_abbreviation():
            assert isinstance(work_node, _ast_base.ASTNodeAbbreviation)

            #  Get the abbreviation symbol.
            abbr_symbol = work_node.get_abbreviation_symbol()

            #  Check symbol length.
            if len(abbr_symbol) == 0:
                err = _pe.Error(_ml_error.PE_ML_NO_CONTENT,
                                _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION,
                                options)

                err.push_traceback_ex(expression,
                                      work_node.get_starting_position_in_source_text(),
                                      work_node.get_right_parenthesis_position(),
                                      _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE)

                raise err

            #  Initialize the resolving result container.
            abbr_resolved = None

            #  Try to resolve in the user defined dictionary.
            if options.is_user_abbreviation_dictionary_enabled():
                user_dict = options.get_user_abbreviation_dictionary()
                if abbr_symbol in user_dict:
                    abbr_resolved = user_dict[abbr_symbol]

            #  Try to resolve in system dictionary if it hasn't been resolved.
            if abbr_resolved is None and abbr_symbol in _ml_abbr.ABBREVIATIONS:
                abbr_resolved = _ml_abbr.ABBREVIATIONS[abbr_symbol]

            #  Raise an error if the abbreviation can't be resolved.
            if abbr_resolved is None:
                err = _pe.Error(_ml_error.PE_ML_UNSUPPORTED_ABBREVIATION,
                                _msg_id.MSG_PE_ML_UNSUPPORTED_ABBREVIATION_DESCRIPTION,
                                options)

                err.push_traceback_ex(expression,
                                      work_node.get_starting_position_in_source_text() + 1,
                                      work_node.get_right_parenthesis_position() - 1,
                                      _msg_id.MSG_PE_ML_UNSUPPORTED_ABBREVIATION_TB_MESSAGE)

                raise err

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get the suffix number.
            coeff = work_node.get_suffix_number()

            #  Add atoms.
            for atom_symbol in abbr_resolved:
                build.add(atom_symbol, abbr_resolved[atom_symbol] * coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        else:
            raise RuntimeError("Never reach this condition.")

    #  Get the parsing result of the root node.
    root_node_parsed = parsed[id(root_node)]

    #  Content check.
    if len(root_node_parsed) == 0:
        err = _pe.Error(_ml_error.PE_ML_NO_CONTENT,
                        _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION,
                        options)

        err.push_traceback_ex(expression,
                              0,
                              len(expression) - 1,
                              _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE)

        raise err

    return root_node_parsed.get_data()
Пример #12
0
def calculate_rpn(origin_token_list, rpn_token_list, options):
    """Calculate the value of a RPN token list.

    :type origin_token_list: list of _mexp_token.Token
    :type rpn_token_list: list of _mexp_token.Token
    :type options: _opt.Option
    :param origin_token_list: The origin token list.
    :param rpn_token_list: The RPN token list.
    :return: The calculated value.
    :raise RuntimeError: When a bug appears.
    """

    #  This routine implements the postfix algorithm.

    #  Initialize the operand stack.
    calc_stack = _adt_stack.Stack()

    for token in rpn_token_list:
        if token.is_integer_operand():
            #  Convert the symbol to integer and push it onto the stack.
            calc_stack.push(_mexp_utils.convert_int_string_to_rational(token.get_symbol()))
        elif token.is_float_operand():
            #  Convert the symbol to float and push it onto the stack.
            calc_stack.push(_mexp_utils.convert_float_string_to_rational(token.get_symbol()))
        elif token.is_symbol_operand():
            #  Create a math symbol and push it onto the stack.
            calc_stack.push(_sympy.Symbol(token.get_symbol()))
        elif token.is_plus_operator():
            #  Get two operands.
            num2 = calc_stack.pop()
            num1 = calc_stack.pop()

            #  Do plus and push the result onto the stack.
            calc_stack.push(num1 + num2)
        elif token.is_minus_operator():
            #  Get two operands.
            num2 = calc_stack.pop()
            num1 = calc_stack.pop()

            #  Do minus and push the result onto the stack.
            calc_stack.push(num1 - num2)
        elif token.is_multiply_operator():
            #  Get two operands.
            num2 = calc_stack.pop()
            num1 = calc_stack.pop()

            #  Do multiplication and push the result onto the stack.
            calc_stack.push(num1 * num2)
        elif token.is_divide_operator():
            #  Get two operands.
            num2 = calc_stack.pop()
            num1 = calc_stack.pop()

            #  Raise an error if the rhs equals to zero.
            if num2.is_zero:
                err = _pe.Error(_mexp_errors.PE_MEXP_RPNEV_DIVIDE_ZERO,
                                _msg_id.MSG_PE_MEXP_RPNEV_DIVIDE_ZERO_DESCRIPTION,
                                options)
                err.push_traceback_ex(_base_token.untokenize(origin_token_list),
                                      token.get_position(),
                                      token.get_position(),
                                      _msg_id.MSG_PE_MEXP_RPNEV_DIVIDE_ZERO_OPERATOR)
                raise err

            #  Do division and push the result onto the stack.
            calc_stack.push(num1 / num2)
        elif token.is_pow_operator():
            #  Get two operands.
            num2 = calc_stack.pop()
            num1 = calc_stack.pop()

            #  For a ^ b, when b < 0, a != 0.
            if num2.is_negative and num1.is_zero:
                err = _pe.Error(_mexp_errors.PE_MEXP_RPNEV_DIVIDE_ZERO,
                                _msg_id.MSG_PE_MEXP_RPNEV_DIVIDE_ZERO_DESCRIPTION,
                                options)

                err.push_traceback_ex(_base_token.untokenize(origin_token_list),
                                      token.get_position(),
                                      token.get_position(),
                                      _msg_id.MSG_PE_MEXP_RPNEV_DIVIDE_ZERO_OPERATOR)

                raise err

            #  Do power and push the result onto the stack.
            calc_stack.push(num1 ** num2)
        elif token.is_negative_operator():
            num1 = calc_stack.pop()
            calc_stack.push(-num1)
        elif token.is_function():
            if token.get_symbol() == "pow":
                #  Get two operands.
                num2 = calc_stack.pop()
                num1 = calc_stack.pop()

                #  For pow(a, b), when b < 0, a != 0.
                if num2.is_negative and num1.is_zero:
                    err = _pe.Error(_mexp_errors.PE_MEXP_RPNEV_DIVIDE_ZERO,
                                    _msg_id.MSG_PE_MEXP_RPNEV_DIVIDE_ZERO_DESCRIPTION,
                                    options)

                    err.push_traceback_ex(_base_token.untokenize(origin_token_list),
                                          token.get_position(),
                                          token.get_position() + len(token.get_symbol()) - 1,
                                          _msg_id.MSG_PE_MEXP_RPNEV_DIVIDE_ZERO_POW)

                    raise err

                #  Do power and push the result onto the stack.
                calc_stack.push(num1 ** num2)
            elif token.get_symbol() == "sqrt":
                #  Get one operand.
                num1 = calc_stack.pop()

                #  (For a^b, when b < 0, a != 0.
                if num1.is_negative:
                    err = _pe.Error(_mexp_errors.PE_MEXP_RPNEV_SQRT_NEG_ARG,
                                    _msg_id.MSG_PE_MEXP_RPNEV_SQRT_NEG_ARG_DESCRIPTION,
                                    options)

                    err.push_traceback_ex(_base_token.untokenize(origin_token_list),
                                          token.get_position(),
                                          token.get_position() + len(token.get_symbol()) - 1,
                                          _msg_id.MSG_PE_MEXP_RPNEV_SQRT_NEG_ARG_TB_MESSAGE)

                    raise err

                #  Do sqrt and push the result onto the stack.
                calc_stack.push(_mexp_fns.do_sqrt(num1))
            else:
                raise RuntimeError("Unreachable condition (Invalid function name).")
        else:
            raise RuntimeError("Unreachable condition (Invalid token type).")

    #  If there are more than one operands in the stack, raise a runtime error. But generally,
    #  we shouldn't get this error because we have checked the whole expression when tokenizing.
    if len(calc_stack) > 1:
        raise RuntimeError("Unreachable condition (Too many items in the stack after calculation).")

    return calc_stack.top()
Пример #13
0
def tokenize(expression, options):
    """Tokenize a chemical equation.

    :type expression: str
    :type options: bce.option.Option
    :param expression: The chemical equation.
    :param options: The options.
    :rtype : list[Token]
    :return: The token list.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Initialize the result container.
    result = []

    #  Initialize the cursor.
    cursor = 0

    while cursor < len(expression):
        #  Get current character.
        cur_ch = expression[cursor]
        if cur_ch == "+":
            #  Add a plus token.
            result.append(create_operator_plus_token(len(result), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == "-":
            #  Add a minus token.
            result.append(create_operator_minus_token(len(result), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == ";":
            #  Add a separator token.
            result.append(create_operator_separator_token(len(result), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == "=":
            #  Add an equal sign token.
            result.append(create_equal_token(len(result), cursor))

            #  Next position.
            cursor += 1
        else:
            #  Initialize the stack.
            pm = _adt_stack.Stack()

            #  Initialize the searching cursor.
            search_pos = cursor

            #  Initialize the molecule symbol.
            molecule_symbol = ""

            while search_pos < len(expression):
                #  Get current character.
                search_ch = expression[search_pos]

                if search_ch in ["(", "[", "{", "<"]:
                    #  Emulate pushing operation.
                    pm.push(search_pos)

                    #  Add the character.
                    molecule_symbol += search_ch
                elif search_ch in [")", "]", "}", ">"]:
                    #  Raise an error if there is no left parenthesis in the stack.
                    if len(pm) == 0:
                        err = _cm_error.Error(
                            _ce_error.CEXP_PARENTHESIS_MISMATCH,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.parenthesis_mismatch.description"
                            ), options)
                        err.push_traceback(
                            expression, search_pos, search_pos,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.cexp.error.parenthesis_mismatch.left"))
                        raise err

                    #  Emulate popping operation.
                    pm.pop()

                    #  Add the character.
                    molecule_symbol += search_ch
                elif search_ch in ["+", "-", ";", "="] and len(pm) == 0:
                    break
                else:
                    #  Add the character.
                    molecule_symbol += search_ch

                #  Move the searching cursor.
                search_pos += 1

            #  Raise an error if there are still some parentheses in the stack.
            if len(pm) != 0:
                err = _cm_error.Error(
                    _ce_error.CEXP_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.cexp.error.parenthesis_mismatch.description"),
                    options)

                while len(pm) != 0:
                    mismatched_pos = pm.pop()
                    err.push_traceback(
                        expression, mismatched_pos, mismatched_pos,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.cexp.error.parenthesis_mismatch.right"))

                raise err

            #  Add a molecule token.
            result.append(
                create_molecule_token(molecule_symbol, len(result), cursor))

            #  Set the cursor.
            cursor = search_pos

    #  Add an end token.
    result.append(create_end_token(len(result), len(expression)))

    return result
Пример #14
0
def generate_ast(expression, token_list, options):
    """Generate an AST from the token list.

    :type expression: str
    :type token_list: list[bce.parser.molecule.token.Token]
    :type options: bce.option.Option
    :param expression: The origin expression.
    :param token_list: The token list.
    :param options: The options.
    :rtype : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :return: The root node of the generated AST.
    """

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Initialize the molecule status container.
    molecule_status = None

    #  Initialize the state machine.
    state = _STATE_ROOT

    #  Generate initial AST.
    root = _ml_ast_base.ASTNodeHydrateGroup()
    node = _ml_ast_base.ASTNodeMolecule(root)
    root.append_child(node)

    #  Register the starting position.
    root.register_starting_position_in_source_text(0)
    node.register_starting_position_in_source_text(0)

    #  Initialize the token cursor.
    cursor = 0

    while True:
        #  Get current token.
        token = token_list[cursor]

        if state == _STATE_ROOT:
            #  Find molecule in parent nodes and current node.
            while node is not None and not node.is_molecule():
                node = node.get_parent_node()
            if node is None:
                raise RuntimeError("BUG: Can't find molecule group.")

            #  Redirect by rules.
            if token.is_operand() and len(node) == 0:
                state = _STATE_PREFIX_NUMBER
            elif token.is_symbol():
                state = _STATE_ATOM
            elif token.is_abbreviation():
                state = _STATE_ABBREVIATION
            elif token.is_left_parenthesis():
                state = _STATE_LEFT_PARENTHESIS
            elif token.is_right_parenthesis():
                state = _STATE_RIGHT_PARENTHESIS
            elif token.is_electronic_begin():
                state = _STATE_ELECTRONIC
            elif token.is_hydrate_dot():
                state = _STATE_HYDRATE_DOT
            elif token.is_status():
                state = _STATE_MOLECULE_STATUS
            elif token.is_end():
                break
            else:
                #  Raise an error if the token can't be recognized.
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.other"))
                raise err
        elif state == _STATE_ATOM:
            #  Create a new atom node and register its starting position.
            new_node = _ml_ast_base.ASTNodeAtom(token.get_symbol(), node)
            new_node.register_starting_position_in_source_text(
                token.get_position())

            #  Add the node to the molecule group.
            node.append_child(new_node)

            #  Switch the node pointer to the new created node.
            node = new_node

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_ABBREVIATION:
            #  Create a new abbreviation node and register its starting position.
            new_node = _ml_ast_base.ASTNodeAbbreviation(
                token.get_symbol()[1:-1], node)
            new_node.register_starting_position_in_source_text(
                token.get_position())

            #  Add the node to the molecule group.
            node.append_child(new_node)

            #  Switch the node pointer to the new created node.
            node = new_node

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_LEFT_PARENTHESIS:
            #  Create new nodes.
            new_hydrate_grp = _ml_ast_base.ASTNodeHydrateGroup()
            new_molecule = _ml_ast_base.ASTNodeMolecule(new_hydrate_grp)
            new_parenthesis = _ml_ast_base.ASTNodeParenthesisWrapper(
                new_hydrate_grp, node)

            #  Link them correctly and them add the new created parenthesis node to the molecule group.
            new_hydrate_grp.set_parent_node(new_parenthesis)
            new_hydrate_grp.append_child(new_molecule)
            node.append_child(new_parenthesis)

            #  Switch the node pointer to the new created molecule node.
            node = new_molecule

            #  Register their starting positions.
            new_hydrate_grp.register_starting_position_in_source_text(
                token.get_position() + 1)
            new_molecule.register_starting_position_in_source_text(
                token.get_position() + 1)
            new_parenthesis.register_starting_position_in_source_text(
                token.get_position())

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_RIGHT_PARENTHESIS:
            #  Find parenthesis node in parent nodes and current node.
            while node is not None and not node.is_parenthesis():
                #  Register the ending position of current working node.
                node.register_ending_position_in_source_text(
                    token.get_position() - 1)

                #  Go to the parent node.
                node = node.get_parent_node()

            #  Raise an error if the node can't be found.
            if node is None:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.left"))
                raise err

            #  Register the ending position of current working node.
            node.set_right_parenthesis_position(token.get_position())

            #  Next token.
            cursor += 1

            #  Go to read the suffix number.
            state = _STATE_SUFFIX_NUMBER
        elif state == _STATE_ELECTRONIC:
            #  Save the starting position of the electronic descriptor.
            e_start_pos = token.get_position()

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Try to read the prefix number.
            e_pfx = _math_cst.ONE
            e_pfx_start = token.get_position()
            has_e_pfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_e_pfx_number = True

                #  Process the prefix number.
                e_pfx *= token.get_operand_value().simplify()

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Simplify before checking.
            e_pfx = e_pfx.simplify()

            #  Domain check.
            if e_pfx.is_negative or e_pfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, e_pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.electronic_charge")
                )
                raise err

            #  Validate.
            if has_e_pfx_number and e_pfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, e_pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.electronic_charge"
                    ))
                raise err

            #  Process the electronic positivity flag.
            if token.is_electronic_positive_flag():
                pass
            elif token.is_electronic_negative_flag():
                e_pfx = -e_pfx
            else:
                if token.is_end():
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.description"
                        ), options)
                    err.push_traceback(
                        expression, e_start_pos,
                        token.get_position() - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.right")
                    )
                else:
                    #  Raise an error if current working token is not an electronic positivity flag.
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.description"
                        ), options)
                    err.push_traceback(
                        expression, token.get_position(),
                        token.get_position() + len(token.get_symbol()) - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.electronic_suffix"
                        ))

                raise err

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Raise an error if current working token is not '>'.
            if not token.is_electronic_end():
                if token.is_end():
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.description"
                        ), options)
                    err.push_traceback(
                        expression, e_start_pos,
                        token.get_position() - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.parenthesis_mismatch.right")
                    )
                else:
                    err = _cm_error.Error(
                        _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.description"
                        ), options)
                    err.push_traceback(
                        expression, token.get_position(),
                        token.get_position() + len(token.get_symbol()) - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.unexpected_token.electronic_end"
                        ))

                raise err

            #  Next token.
            cursor += 1
            token = token_list[cursor]

            #  Raise an error if the electronic descriptor is not at the end of a molecule block.
            if not (token.is_right_parenthesis() or token.is_hydrate_dot()
                    or token.is_end() or token.is_status()):
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, e_start_pos,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.electronic_misplaced"
                    ))
                raise err

            #  Set the electronic count.
            node.set_electronic_count(e_pfx)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_HYDRATE_DOT:
            #  Save the ending position of current working node.
            node.register_ending_position_in_source_text(token.get_position() -
                                                         1)

            #  Go to parent node.
            node = node.get_parent_node()
            assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup)

            #  Create a new molecule node and set its starting position.
            new_molecule = _ml_ast_base.ASTNodeMolecule(node)
            new_molecule.register_starting_position_in_source_text(
                token.get_position() + 1)

            #  Add the new created molecule node to the hydrate group node.
            node.append_child(new_molecule)

            #  Switch the node pointer to the new created molecule node.
            node = new_molecule

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_PREFIX_NUMBER:
            #  Save the starting position of the prefix.
            pfx_start = token.get_position()

            #  Read prefix numbers.
            has_pfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_pfx_number = True

                #  Process the prefix number.
                node.set_prefix_number(node.get_prefix_number() *
                                       token.get_operand_value().simplify())

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Simplify before checking.
            pfx = node.get_prefix_number().simplify()

            #  Domain check.
            if pfx.is_negative or pfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.domain_error.prefix"))
                raise err

            #  Validate.
            if has_pfx_number and pfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, pfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.prefix"))
                raise err

            #  Set the prefix number.
            node.set_prefix_number(pfx)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_SUFFIX_NUMBER:
            #  Save the starting position of the suffix.
            sfx_start = token.get_position()

            #  Read suffix numbers.
            has_sfx_number = False
            while token.is_operand():
                #  Mark the flag.
                has_sfx_number = True

                #  Process the suffix number.
                node.set_suffix_number(node.get_suffix_number() *
                                       token.get_operand_value().simplify())

                #  Next token.
                cursor += 1
                token = token_list[cursor]

            #  Get the suffix.
            sfx = node.get_suffix_number()

            #  Simplify before checking.
            sfx = sfx.simplify()

            #  Domain check.
            if sfx.is_negative or sfx.is_zero:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_DOMAIN_ERROR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.domain_error.description"),
                    options)
                err.push_traceback(
                    expression, sfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.domain_error.suffix"))
                raise err

            #  Validate.
            if has_sfx_number and sfx == _math_cst.ONE:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_EXCEED_OPERAND,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.description"),
                    options)
                err.push_traceback(
                    expression, sfx_start,
                    token.get_position() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.exceed_operand.suffix"))
                raise err

            #  Register the ending position of current working node.
            node.register_ending_position_in_source_text(token.get_position() -
                                                         1)

            #  Go to root state.
            state = _STATE_ROOT
        elif state == _STATE_MOLECULE_STATUS:
            #  Raise an error if the token is not at the end of the molecule.
            if not token_list[cursor + 1].is_end():
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNEXPECTED_TOKEN,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unexpected_token.electronic_misplaced"
                    ))
                raise err

            #  Fetch the molecule status.
            if token.is_gas_status():
                molecule_status = _ml_ast_base.STATUS_GAS
            elif token.is_liquid_status():
                molecule_status = _ml_ast_base.STATUS_LIQUID
            elif token.is_solid_status():
                molecule_status = _ml_ast_base.STATUS_SOLID
            elif token.is_aqueous_status():
                molecule_status = _ml_ast_base.STATUS_AQUEOUS
            else:
                raise RuntimeError("BUG: Unrecognized status.")

            #  Next token.
            cursor += 1

            #  Go to root state.
            state = _STATE_ROOT
        else:
            raise RuntimeError("BUG: Unrecognized state.")

    #  Get the ending position.
    ending_pos = token_list[-1].get_position() - 1

    #  Initialize the parenthesis-mismatched flag.
    mismatch_flag = False

    #  Pre-create an error.
    err = _cm_error.Error(
        _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
        _l10n_reg.get_message(
            lang_id, "parser.molecule.error.parenthesis_mismatch.description"),
        options)

    while node is not None:
        #  Register the ending position of current working node.
        node.register_ending_position_in_source_text(ending_pos)

        #  Mark the error flag and add an error description if current node is a parenthesis node.
        if node.is_parenthesis():
            mismatch_flag = True
            err.push_traceback(
                expression, node.get_starting_position_in_source_text(),
                node.get_starting_position_in_source_text(),
                _l10n_reg.get_message(
                    lang_id,
                    "parser.molecule.error.parenthesis_mismatch.right"))

        #  Go to parent node.
        node = node.get_parent_node()

    #  Raise an error if we have met at least 1 parenthesis node.
    if mismatch_flag:
        raise err

    #  Now, we have constructed the whole AST, but we got a lot of useless hydrate group node.
    #  So we have to remove them (all hydrate groups nodes which have only 1 child).

    #  Get iterate order.
    unpack_order = _ml_ast_bfs.do_bfs(root, True)

    #  Initialize unpacked node container.
    unpacked = {}

    for node in unpack_order:
        if node.is_hydrate_group():
            assert isinstance(node, _ml_ast_base.ASTNodeHydrateGroup)

            if len(node) == 1:
                #  Get the child node and reset its parent
                child = unpacked[id(node[0])]
                child.set_parent_node(node.get_parent_node())

                #  Save the unpack result.
                unpacked[id(node)] = child
            else:
                #  Update children links.
                for child_id in range(0, len(node)):
                    node[child_id] = unpacked[id(node[child_id])]

                #  Save the unpack result.
                unpacked[id(node)] = node
        elif node.is_molecule():
            assert isinstance(node, _ml_ast_base.ASTNodeMolecule)

            #  Update children links.
            for child_id in range(0, len(node)):
                node[child_id] = unpacked[id(node[child_id])]

            #  Save the unpack result.
            unpacked[id(node)] = node
        elif node.is_parenthesis():
            assert isinstance(node, _ml_ast_base.ASTNodeParenthesisWrapper)

            #  Update children links.
            node.set_inner_node(unpacked[id(node.get_inner_node())])

            #  Save  the unpack result.
            unpacked[id(node)] = node
        else:
            #  Save  the unpack result.
            unpacked[id(node)] = node

    #  Set molecule status.
    root = unpacked[id(root)]
    """:type : bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule"""
    root.set_status(molecule_status)

    return root
Пример #15
0
def parse_to_rpn(expression,
                 token_list,
                 options,
                 protected_header_enabled=False,
                 protected_header_prefix="X"):
    """Parse an infix math expression to RPN.

    :type expression: str
    :type token_list: list[bce.parser.mexp.token.Token]
    :type options: bce.option.Option
    :type protected_header_enabled: bool
    :type protected_header_prefix: str
    :param expression: The infix math expression.
    :param token_list: The tokenized infix math expression.
    :param options: The options.
    :param protected_header_enabled: Whether the protected headers are enabled.
    :param protected_header_prefix: The prefix of the protected headers.
    :rtype : list[bce.parser.mexp.token.Token]
    :return: The RPN token list.
    :raise bce.parser.common.error.Error: Raise when a parser error occurred.
    """

    #  Initialize
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()
    token_id = 0
    token_count = len(token_list)
    rpn = _RPNProcessor()
    current_argc = 0
    required_argc = 0
    prev_separator_position = -1
    parenthesis_mapping = {")": "(", "]": "[", "}": "{"}
    parenthesis_stack = _adt_stack.Stack()
    in_function = False

    while token_id < token_count:
        #  Get current token.
        token = token_list[token_id]

        #  Get previous token.
        if token_id != 0:
            prev_tok = token_list[token_id - 1]
        else:
            prev_tok = None

        if token.is_operand():
            if token.is_symbol_operand():
                #  Check the protected header.
                if protected_header_enabled and token.get_symbol().startswith(
                        protected_header_prefix):
                    err = _cm_error.Error(
                        _mexp_errors.MEXP_USE_PROTECTED_HEADER,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.protected_header.description"),
                        options)
                    err.push_traceback(
                        expression,
                        token.get_position(),
                        token.get_position() + len(token.get_symbol()) - 1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.protected_header.message"),
                        replace_map={"$1": protected_header_prefix})
                    raise err

            if prev_tok is not None:
                if prev_tok.is_right_parenthesis():
                    if token.is_symbol_operand():
                        #  Do completion:
                        #    ([expr])[unknown] => ([expr])*[unknown]
                        #
                        #  For example:
                        #    (3-y)x => (3-y)*x
                        rpn.add_operator(
                            _mexp_token.create_multiply_operator_token())
                    else:
                        #  Numeric parenthesis suffix was not supported.
                        #
                        #  For example:
                        #    (x-y)3
                        #         ^
                        #         Requires a '*' before this token.
                        err = _cm_error.Error(
                            _mexp_errors.MEXP_MISSING_OPERATOR,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.missing_operator.description"
                            ), options)
                        err.push_traceback(
                            expression, token.get_position(),
                            token.get_position() + len(token.get_symbol()) - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.missing_operator.multiply_before"
                            ))
                        raise err

                if prev_tok.is_operand():
                    #  Do completion:
                    #    [number][symbol] => [number]*[symbol]
                    #
                    #  For example:
                    #    4x => 4*x
                    rpn.add_operator(
                        _mexp_token.create_multiply_operator_token())

            #  Process the token.
            rpn.add_operand(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_function():
            #  Raise an error if the function is unsupported.
            if _mexp_functions.find_function(token.get_symbol()) is None:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_FUNCTION_UNSUPPORTED,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.unsupported_function.description"),
                    options)
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position() + len(token.get_symbol()) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.unsupported_function.message"),
                    replace_map={"$1": token.get_symbol()})
                raise err

            if prev_tok is not None and (prev_tok.is_operand()
                                         or prev_tok.is_right_parenthesis()):
                #  Do completion:
                #    [num][fn] => [num]*[fn]
                #
                #  For example:
                #    4pow(2,3) => 4*pow(2,3)
                rpn.add_operator(_mexp_token.create_multiply_operator_token())

            #  Process the token.
            rpn.add_function(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_operator():
            #  Get the operator.
            op = _mexp_operators.OPERATORS[token.get_subtype()]

            #  Check operands.
            if op.is_required_left_operand():
                _check_left_operand(expression, token_list, token_id, options)

            if op.is_required_right_operand():
                _check_right_operand(expression, token_list, token_id, options)

            #  Process the token.
            rpn.add_operator(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_left_parenthesis():
            #  Save state.
            parenthesis_stack.push(
                _ParenthesisStackItem(token.get_symbol(), token_id,
                                      in_function, current_argc, required_argc,
                                      prev_separator_position))

            current_argc = 0
            prev_separator_position = token_id

            #  Set function state and get required argument count.
            if prev_tok is not None and prev_tok.is_function():
                #  Mark the flag.
                in_function = True

                #  Get the function object.
                fn_object = _mexp_functions.find_function(
                    prev_tok.get_symbol())
                if fn_object is None:
                    raise RuntimeError("BUG: Function object is None.")

                #  Get the required argument count.
                required_argc = fn_object.get_argument_count()
            else:
                #  Clear the flag.
                in_function = False
                required_argc = 0

            if prev_tok is not None and (prev_tok.is_right_parenthesis()
                                         or prev_tok.is_operand()):
                #  Do completion
                #    [lp][expr][rp][lp][expr][rp] => [lp][expr][rp]*[lp][expr][rp]
                #
                #  For example:
                #    (2+3)(4+2) => (2+3)*(4+2)
                rpn.add_operator(_mexp_token.create_multiply_operator_token())

            #  Process the token.
            rpn.add_left_parenthesis(token)

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_right_parenthesis():
            #  Raise an error if there's no content between two separators.
            if prev_separator_position + 1 == token_id:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id, "parser.mexp.error.no_content.description"),
                    options)
                if prev_tok.is_left_parenthesis():
                    err.push_traceback(
                        expression, prev_tok.get_position(),
                        token.get_position(),
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.no_content.in_parentheses"))
                else:
                    err.push_traceback(
                        expression, prev_tok.get_position(),
                        token.get_position(),
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.no_content.in_argument"))

                raise err

            #  Raise an error if there's no left parenthesis to be matched with.
            if len(parenthesis_stack) == 0:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.parenthesis_mismatch.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(), token.get_position(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.parenthesis_mismatch.left"))
                raise err

            #  Get the top item of the stack.
            p_item = parenthesis_stack.pop()

            #  Get the symbol of the parenthesis matches with current token.
            p_matched_sym = parenthesis_mapping[token.get_symbol()]

            #  Raise an error if the parenthesis was mismatched.
            if p_matched_sym != p_item.get_symbol():
                err = _cm_error.Error(
                    _mexp_errors.MEXP_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.parenthesis_mismatch.description"),
                    options)
                err.push_traceback(
                    expression,
                    token.get_position(),
                    token.get_position(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.parenthesis_mismatch.incorrect"),
                    replace_map={"$1": p_matched_sym})
                raise err

            if in_function:
                current_argc += 1

                #  Raise an error if the argument count was not matched.
                if current_argc != required_argc:
                    fn_token = token_list[p_item.get_token_id() - 1]

                    err = _cm_error.Error(
                        _mexp_errors.MEXP_FUNCTION_ARGUMENT_COUNT_MISMATCH,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.argument_count_mismatch.description"
                        ), options)
                    err.push_traceback(
                        expression, fn_token.get_position(),
                        fn_token.get_position() + len(fn_token.get_symbol()) -
                        1,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.mexp.error.argument_count_mismatch.message"
                        ), {
                            "$1": str(required_argc),
                            "$2": str(current_argc)
                        })
                    raise err

            #  Restore state.
            in_function = p_item.is_in_function()
            current_argc = p_item.get_current_argument_count()
            required_argc = p_item.get_required_argument_count()
            prev_separator_position = p_item.get_previous_separator_position()

            #  Process the token.
            rpn.add_right_parenthesis()

            #  Go to next token.
            token_id += 1

            continue
        elif token.is_separator():
            #  Raise an error if we're not in function now.
            if not in_function:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_ILLEGAL_ARGUMENT_SEPARATOR,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.illegal_separator.description"),
                    options)
                err.push_traceback(
                    expression, token.get_position(), token.get_position(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.mexp.error.illegal_separator.message"))
                raise err

            #  Raise an error if there's no content between two separators.
            if prev_separator_position + 1 == token_id:
                err = _cm_error.Error(
                    _mexp_errors.MEXP_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id, "parser.mexp.error.no_content.description"),
                    options)
                err.push_traceback(
                    expression, prev_tok.get_position(), token.get_position(),
                    _l10n_reg.get_message(
                        lang_id, "parser.mexp.error.no_content.in_argument"))
                raise err

            #  Save separator position.
            prev_separator_position = token_id

            #  Increase argument counter.
            current_argc += 1

            #  Process the token.
            rpn.add_separator()

            #  Go to next token.
            token_id += 1

            continue
        else:
            raise RuntimeError("Never reach this condition.")

    #  Raise an error if there are still some left parentheses in the stack.
    if len(parenthesis_stack) != 0:
        err = _cm_error.Error(
            _mexp_errors.MEXP_PARENTHESIS_MISMATCH,
            _l10n_reg.get_message(
                lang_id, "parser.mexp.error.parenthesis_mismatch.description"),
            options)
        while len(parenthesis_stack) != 0:
            p_item = parenthesis_stack.pop()
            p_token = token_list[p_item.get_token_id()]
            err.push_traceback(
                expression, p_token.get_position(), p_token.get_position(),
                _l10n_reg.get_message(
                    lang_id, "parser.mexp.error.parenthesis_mismatch.right"))
        raise err

    #  Pop all items off from the stack and push them onto the RPN token list.
    rpn.finalize()

    #  Return the RPN token list.
    return rpn.get_rpn()
Пример #16
0
def tokenize(expression,
             options,
             mexp_protected_header_enabled=False,
             mexp_protected_header_prefix="X"):
    """Tokenize a molecule expression.

    :type expression: str
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: The expression.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : list[Token]
    :return: The token list.
    :raise bce.parser.common.error.Error: Raise when a parser error occurred.
    """

    #  Initialize.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()
    if_opt = _interface_opt.OptionWrapper(options)
    result = []
    cur_pos = 0
    end_pos = len(expression)

    while cur_pos < end_pos:
        cur_ch = expression[cur_pos]

        #  Read a integer token if current character is a digit.
        if cur_ch.isdigit():
            #  Search for the next non-digit character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                search_ch = expression[search_pos]

                if not search_ch.isdigit():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Create an integer token.
            result.append(
                create_integer_operand_token(expression[cur_pos:search_end],
                                             len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read an atom symbol if current character is a upper-case alphabet.
        if cur_ch.isupper():
            #  Search for next non-lower-case character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                if not expression[search_pos].islower():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Create a symbol token.
            result.append(
                create_symbol_token(expression[cur_pos:search_end],
                                    len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read a hydrate-dot token if current character is a dot.
        if cur_ch == ".":
            #  Create a dot token.
            result.append(create_hydrate_dot_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if expression.startswith("(g)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_gas_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(l)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_liquid_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(s)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_solid_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(aq)", cur_pos):
            #  Create a status descriptor token.
            result.append(create_aqueous_status_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 4

            continue

        #  Read a normal left parenthesis if current character is '('.
        if cur_ch == "(":
            #  Create a left parenthesis token.
            result.append(create_left_parenthesis_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Read a normal right parenthesis if current character is ')'.
        if cur_ch == ")":
            #  Create a right parenthesis token.
            result.append(create_right_parenthesis_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Read a abbreviation if current character is '['.
        if cur_ch == "[":
            #  Find the ']'.
            search_end = -1
            search_pos = cur_pos + 1

            while search_pos < end_pos:
                if expression[search_pos] == "]":
                    search_end = search_pos + 1
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Raise an error if we can't find the ']'.
            if search_end == -1:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, cur_pos, cur_pos,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.right"))
                raise err

            #  Create an abbreviation token.
            result.append(
                create_abbreviation_token(expression[cur_pos:search_end],
                                          len(result), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read a math expression if current character is '{'.
        if cur_ch == "{":
            #  Simulate a parenthesis stack to find the end '}'.
            p_mexp = 0

            #  Searching the end '}'.
            search_end = -1
            search_pos = cur_pos + 1

            while search_pos < end_pos:
                search_ch = expression[search_pos]

                if search_ch == "(" or search_ch == "[" or search_ch == "{":
                    #  If current character is a left parenthesis, push it onto the stack.
                    p_mexp += 1
                elif search_ch == ")" or search_ch == "]" or search_ch == "}":
                    #  When we meet a right parenthesis and there's no left parenthesis in the stack.
                    #  The parenthesis we met should be the end '}'.
                    if p_mexp == 0:
                        #  Raise an error if the parenthesis isn't '}'.
                        if search_ch != "}":
                            err = _cm_error.Error(
                                _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                                _l10n_reg.get_message(
                                    lang_id,
                                    "parser.molecule.error.parenthesis_mismatch.description"
                                ), options)
                            err.push_traceback(
                                expression, search_pos, search_pos,
                                _l10n_reg.get_message(
                                    lang_id,
                                    "parser.molecule.error.parenthesis_mismatch.incorrect",
                                    replace_map={"$1": "}"}))
                            raise err

                        #  Set the end position.
                        search_end = search_pos + 1

                        break

                    #  Pop the parenthesis off from the stack.
                    p_mexp -= 1
                else:
                    pass

                #  Go to next searching position.
                search_pos += 1

            #  Raise an error if we can't find the end '}'.
            if search_end == -1:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_PARENTHESIS_MISMATCH,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.description"
                    ), options)
                err.push_traceback(
                    expression, cur_pos, cur_pos,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parenthesis_mismatch.right"))
                raise err

            #  Raise an error if the math expression has no content.
            if cur_pos + 2 == search_end:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"),
                    options)
                err.push_traceback(
                    expression, cur_pos, cur_pos + 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.no_content.inside"))
                raise err

            #  Get the expression.
            mexp_expr = expression[cur_pos:search_end]

            #  Evaluate the expression.
            try:
                ev_value = if_opt.get_mexp_parser().parse(
                    mexp_expr,
                    options,
                    protected_header_enabled=mexp_protected_header_enabled,
                    protected_header_prefix=mexp_protected_header_prefix)
            except _cm_error.Error as err:
                err.push_traceback(
                    expression, cur_pos, search_end - 1,
                    _l10n_reg.get_message(
                        lang_id, "parser.molecule.error.parsing_mexp.message"))
                raise err

            #  Create a math expression token.
            result.append(
                create_mexp_operand_token(mexp_expr, ev_value, len(result),
                                          cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        if cur_ch == "<":
            #  Create an electronic begin parenthesis token.
            result.append(create_electronic_begin_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == ">":
            #  Create an electronic begin parenthesis token.
            result.append(create_electronic_end_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if expression.startswith("e+", cur_pos):
            #  Create a positive electronic flag token.
            result.append(
                create_positive_electronic_flag_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 2

            continue

        if expression.startswith("e-", cur_pos):
            #  Create a negative electronic flag token.
            result.append(
                create_negative_electronic_flag_token(len(result), cur_pos))

            #  Go to next position.
            cur_pos += 2

            continue

        #  Raise an error if current character can't be tokenized.
        err = _cm_error.Error(
            _ml_error.MOLECULE_UNRECOGNIZED_TOKEN,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.unrecognized_token.description"),
            options)
        err.push_traceback(
            expression, cur_pos, cur_pos,
            _l10n_reg.get_message(
                lang_id, "parser.molecule.error.unrecognized_token.message"))
        raise err

    #  Add an end token.
    result.append(create_end_token(len(result), len(expression)))

    return result
Пример #17
0
def tokenize(expression, options):
    """Tokenize a molecule expression.

    :type expression: str
    :type options: _opt.Option
    :param expression: The expression.
    :param options: The BCE options.
    :rtype : list of Token
    :return: The tokenized molecule.
    :raise _pe.Error: When we meet a parser error.
    """

    #  Initialize.
    r = []
    cur_pos = 0
    end_pos = len(expression)

    while cur_pos < end_pos:
        cur_ch = expression[cur_pos]

        #  Read a integer token if current character is a digit.
        if cur_ch.isdigit():
            #  Search for the next non-digit character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                search_ch = expression[search_pos]

                if not search_ch.isdigit():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Create an integer token.
            r.append(
                create_integer_operand_token(expression[cur_pos:search_end],
                                             len(r), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read an atom symbol if current character is a upper-case alphabet.
        if cur_ch.isupper():
            #  Search for next non-lower-case character.
            search_pos = cur_pos + 1
            search_end = end_pos

            while search_pos < end_pos:
                if not expression[search_pos].islower():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Create a symbol token.
            r.append(
                create_symbol_token(expression[cur_pos:search_end], len(r),
                                    cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read a hydrate-dot token if current character is a dot.
        if cur_ch == ".":
            #  Create a dot token.
            r.append(create_hydrate_dot_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if expression.startswith("(g)", cur_pos):
            #  Create a status descriptor token.
            r.append(create_gas_status_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(l)", cur_pos):
            #  Create a status descriptor token.
            r.append(create_liquid_status_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(s)", cur_pos):
            #  Create a status descriptor token.
            r.append(create_solid_status_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 3

            continue

        if expression.startswith("(aq)", cur_pos):
            #  Create a status descriptor token.
            r.append(create_aqueous_status_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 4

            continue

        #  Read a normal left parenthesis if current character is '('.
        if cur_ch == "(":
            #  Create a left parenthesis token.
            r.append(create_left_parenthesis_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Read a normal right parenthesis if current character is ')'.
        if cur_ch == ")":
            #  Create a right parenthesis token.
            r.append(create_right_parenthesis_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        #  Read a abbreviation if current character is '['.
        if cur_ch == "[":
            #  Find the ']'.
            search_end = -1
            search_pos = cur_pos + 1

            while search_pos < end_pos:
                if expression[search_pos] == "]":
                    search_end = search_pos + 1
                    break

                #  Go to next searching position.
                search_pos += 1

            #  Raise an error if we can't find the ']'.
            if search_end == -1:
                err = _pe.Error(
                    _ml_error.PE_ML_PARENTHESIS_MISMATCH,
                    _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_DESCRIPTION,
                    options)

                err.push_traceback_ex(
                    expression, cur_pos, cur_pos,
                    _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_MISSING_RIGHT)

                raise err

            #  Create an abbreviation token.
            r.append(
                create_abbreviation_token(expression[cur_pos:search_end],
                                          len(r), cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        #  Read a math expression if current character is '{'.
        if cur_ch == "{":
            #  Simulate a parenthesis stack to find the end '}'.
            p_mexp = 0

            #  Searching the end '}'.
            search_end = -1
            search_pos = cur_pos + 1

            while search_pos < end_pos:
                search_ch = expression[search_pos]

                if search_ch == "(" or search_ch == "[" or search_ch == "{":
                    #  If current character is a left parenthesis, push it onto the stack.
                    p_mexp += 1
                elif search_ch == ")" or search_ch == "]" or search_ch == "}":
                    #  When we meet a right parenthesis and there's no left parenthesis in the stack.
                    #  The parenthesis we met should be the end '}'.
                    if p_mexp == 0:
                        #  Raise an error if the parenthesis isn't '}'.
                        if search_ch != "}":
                            err = _pe.Error(
                                _ml_error.PE_ML_PARENTHESIS_MISMATCH, _msg_id.
                                MSG_PE_ML_PARENTHESIS_MISMATCH_DESCRIPTION,
                                options)

                            err.push_traceback_ex(
                                expression, search_pos, search_pos, _msg_id.
                                MSG_PE_ML_PARENTHESIS_MISMATCH_INCORRECT,
                                {"$1": "}"})

                            raise err

                        #  Set the end position.
                        search_end = search_pos + 1

                        break

                    #  Pop the parenthesis off from the stack.
                    p_mexp -= 1
                else:
                    pass

                #  Go to next searching position.
                search_pos += 1

            #  Raise an error if we can't find the end '}'.
            if search_end == -1:
                err = _pe.Error(
                    _ml_error.PE_ML_PARENTHESIS_MISMATCH,
                    _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_DESCRIPTION,
                    options)

                err.push_traceback_ex(
                    expression, cur_pos, cur_pos,
                    _msg_id.MSG_PE_ML_PARENTHESIS_MISMATCH_MISSING_RIGHT)

                raise err

            #  Raise an error if the math expression has no content.
            if cur_pos + 2 == search_end:
                err = _pe.Error(_ml_error.PE_ML_NO_CONTENT,
                                _msg_id.MSG_PE_ML_NO_CONTENT_DESCRIPTION,
                                options)

                err.push_traceback_ex(expression, cur_pos, cur_pos + 1,
                                      _msg_id.MSG_PE_ML_NO_CONTENT_INSIDE)

                raise err

            #  Get the expression.
            mexp_expr = expression[cur_pos:search_end]

            #  Evaluate the expression.
            try:
                ev_value = _mexp_ev.evaluate_math_expression(
                    mexp_expr, options)
            except _pe.Error as err:
                err.push_traceback_ex(expression, cur_pos, search_end - 1,
                                      _msg_id.MSG_PE_ML_TRACEBACK_ERROR_MEXP)

                raise err

            #  Create a math expression token.
            r.append(
                create_mexp_operand_token(mexp_expr, ev_value, len(r),
                                          cur_pos))

            #  Go to next position.
            cur_pos = search_end

            continue

        if cur_ch == "<":
            #  Create an electronic begin parenthesis token.
            r.append(create_electronic_begin_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if cur_ch == ">":
            #  Create an electronic begin parenthesis token.
            r.append(create_electronic_end_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 1

            continue

        if expression.startswith("e+", cur_pos):
            #  Create a positive electronic flag token.
            r.append(create_positive_electronic_flag_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 2

            continue

        if expression.startswith("e-", cur_pos):
            #  Create a negative electronic flag token.
            r.append(create_negative_electronic_flag_token(len(r), cur_pos))

            #  Go to next position.
            cur_pos += 2

            continue

        #  Raise an error if current character can't be tokenized.
        err = _pe.Error(_ml_error.PE_ML_UNRECOGNIZED_TOKEN,
                        _msg_id.MSG_PE_ML_UNRECOGNIZED_TOKEN_DESCRIPTION,
                        options)

        err.push_traceback_ex(expression, cur_pos, cur_pos,
                              _msg_id.MSG_PE_ML_UNRECOGNIZED_TOKEN_TB_MESSAGE)

        raise err

    #  Add an end token.
    r.append(create_end_token(len(r), len(expression)))

    return r
Пример #18
0
def tokenize(expression, options):
    """Tokenize a chemical equation.

    :type expression: str
    :type options: _opt.Option
    :param expression: The chemical equation.
    :param options: The BCE options.
    :rtype : list[Token]
    :return: The token list.
    """

    #  Initialize the result container.
    ret = []

    #  Initialize the cursor.
    cursor = 0

    while cursor < len(expression):
        #  Get current character.
        cur_ch = expression[cursor]
        if cur_ch == "+":
            #  Add a plus token.
            ret.append(create_operator_plus_token(len(ret), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == "-":
            #  Add a minus token.
            ret.append(create_operator_minus_token(len(ret), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == ";":
            #  Add a separator token.
            ret.append(create_operator_separator_token(len(ret), cursor))

            #  Next position.
            cursor += 1
        elif cur_ch == "=":
            #  Add an equal sign token.
            ret.append(create_equal_token(len(ret), cursor))

            #  Next position.
            cursor += 1
        else:
            #  Initialize the stack.
            pm = _adt_stack.Stack()

            #  Initialize the searching cursor.
            search_pos = cursor

            #  Initialize the molecule symbol.
            molecule_symbol = ""

            while search_pos < len(expression):
                #  Get current character.
                search_ch = expression[search_pos]

                if search_ch in ["(", "[", "{", "<"]:
                    #  Emulate pushing operation.
                    pm.push(search_pos)

                    #  Add the character.
                    molecule_symbol += search_ch
                elif search_ch in [")", "]", "}", ">"]:
                    #  Raise an error if there is no left parenthesis in the stack.
                    if len(pm) == 0:
                        err = _pe.Error(
                            _ce_error.PE_CE_PARENTHESIS_MISMATCH,
                            _msg_id.MSG_PE_CE_PARENTHESIS_MISMATCH_DESCRIPTION,
                            options)

                        err.push_traceback_ex(
                            expression, search_pos, search_pos, _msg_id.
                            MSG_PE_CE_PARENTHESIS_MISMATCH_MISSING_LEFT)

                        raise err

                    #  Emulate popping operation.
                    pm.pop()

                    #  Add the character.
                    molecule_symbol += search_ch
                elif search_ch in ["+", "-", ";", "="] and len(pm) == 0:
                    break
                else:
                    #  Add the character.
                    molecule_symbol += search_ch

                #  Move the searching cursor.
                search_pos += 1

            #  Raise an error if there are still some parentheses in the stack.
            if len(pm) != 0:
                err = _pe.Error(
                    _ce_error.PE_CE_PARENTHESIS_MISMATCH,
                    _msg_id.MSG_PE_CE_PARENTHESIS_MISMATCH_DESCRIPTION,
                    options)

                while len(pm) != 0:
                    mismatched_pos = pm.pop()
                    err.push_traceback_ex(
                        expression, mismatched_pos, mismatched_pos,
                        _msg_id.MSG_PE_CE_PARENTHESIS_MISMATCH_MISSING_RIGHT)

                raise err

            #  Add a molecule token.
            ret.append(create_molecule_token(molecule_symbol, len(ret),
                                             cursor))

            #  Set the cursor.
            cursor = search_pos

    #  Add an end token.
    ret.append(create_end_token(len(ret), len(expression)))

    return ret
Пример #19
0
def parse_ast(expression, root_node, options, mexp_protected_header_enabled=False, mexp_protected_header_prefix="X"):
    """Parse an AST.

    :type expression: str
    :type root_node: bce.parser.ast.molecule.ASTNodeHydrateGroup | bce.parser.ast.molecule.ASTNodeMolecule
    :type options: bce.option.Option
    :type mexp_protected_header_enabled: bool
    :type mexp_protected_header_prefix: str
    :param expression: The origin expression.
    :param root_node: The root node of the AST.
    :param options: The options.
    :param mexp_protected_header_enabled: Whether the MEXP protected headers are enabled.
    :param mexp_protected_header_prefix: The prefix of the MEXP protected headers.
    :rtype : dict
    :return: The parsed atoms dictionary.
    """

    #  Wrap the interface option.
    if_opt = _interface_opt.OptionWrapper(options)

    #  Wrap the molecule option.
    molecule_opt = _ml_opt.OptionWrapper(options)

    #  Get the language ID.
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    #  Get the iteration order.
    work_list = _ml_ast_bfs.do_bfs(root_node, True)

    #  Initialize the parsed node container.
    parsed = {}
    """:type : dict[int, MergeUtil]"""

    #  Iterate nodes from the leaves to the root.
    for work_node in work_list:
        if work_node.is_hydrate_group() or work_node.is_molecule():
            assert isinstance(work_node, _ast_base.ASTNodeHydrateGroup) or \
                isinstance(work_node, _ast_base.ASTNodeMolecule)

            #  Get the prefix number.
            coeff = work_node.get_prefix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Process the electronics.
            if work_node.is_molecule():
                el_charge = work_node.get_electronic_count().simplify()
                if not el_charge.is_zero:
                    build.add("e", el_charge * coeff)

            #  Iterate all children.
            for child_id in range(0, len(work_node)):
                #  Get child node and its parsing result.
                child = work_node[child_id]
                child_parsed = parsed[id(child)]

                #  Content check.
                if work_node.is_hydrate_group() and len(child_parsed) == 0:
                    assert isinstance(child, _ast_base.ASTNodeMolecule)

                    err = _cm_error.Error(
                        _ml_error.MOLECULE_NO_CONTENT,
                        _l10n_reg.get_message(
                            lang_id,
                            "parser.molecule.error.no_content.description"
                        ),
                        options
                    )

                    if child_id == 0:
                        err.push_traceback(
                            expression,
                            child.get_ending_position_in_source_text() + 1,
                            child.get_ending_position_in_source_text() + 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.before"
                            )
                        )
                    elif child_id == len(work_node) - 1:
                        err.push_traceback(
                            expression,
                            child.get_starting_position_in_source_text() - 1,
                            child.get_starting_position_in_source_text() - 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.after"
                            )
                        )
                    else:
                        err.push_traceback(
                            expression,
                            child.get_starting_position_in_source_text() - 1,
                            child.get_ending_position_in_source_text() + 1,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.molecule.error.no_content.inside"
                            )
                        )

                    raise err

                #  Merge.
                build.merge(child_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_atom():
            assert isinstance(work_node, _ast_base.ASTNodeAtom)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Add the atom.
            build.add(work_node.get_atom_symbol(), coeff)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_parenthesis():
            assert isinstance(work_node, _ast_base.ASTNodeParenthesisWrapper)

            #  Get suffix number.
            coeff = work_node.get_suffix_number()

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get inner node and its parsing result.
            inner_parsed = parsed[id(work_node.get_inner_node())]

            #  Content check.
            if len(inner_parsed) == 0:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text(),
                    work_node.get_ending_position_in_source_text(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.inside"
                    )
                )
                raise err

            #  Merge.
            build.merge(inner_parsed, coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        elif work_node.is_abbreviation():
            assert isinstance(work_node, _ast_base.ASTNodeAbbreviation)

            #  Get the abbreviation symbol.
            abbr_symbol = work_node.get_abbreviation_symbol()

            #  Check symbol length.
            if len(abbr_symbol) == 0:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_NO_CONTENT,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text(),
                    work_node.get_ending_position_in_source_text(),
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.no_content.inside"
                    )
                )
                raise err

            #  Get the abbreviation mapping.
            abbr_mapping = molecule_opt.get_abbreviation_mapping()

            #  Check the existence.
            if abbr_symbol not in abbr_mapping:
                err = _cm_error.Error(
                    _ml_error.MOLECULE_UNSUPPORTED_ABBREVIATION,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unsupported_abbreviation.description"
                    ),
                    options
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text() + 1,
                    work_node.get_ending_position_in_source_text() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.unsupported_abbreviation.message"
                    )
                )
                raise err

            abbr_expression = abbr_mapping[abbr_symbol]

            try:
                abbr_parser = if_opt.get_molecule_parser()
                abbr_ast_root = abbr_parser.parse_expression(
                    abbr_expression,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )
                abbr_resolved = abbr_parser.parse_ast(
                    abbr_expression,
                    abbr_ast_root,
                    options,
                    mexp_protected_header_enabled=mexp_protected_header_enabled,
                    mexp_protected_header_prefix=mexp_protected_header_prefix
                )
            except _cm_error.Error as err:
                err.push_traceback(
                    abbr_expression,
                    0,
                    len(abbr_expression) - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parsing_abbreviation.expand"
                    )
                )
                err.push_traceback(
                    expression,
                    work_node.get_starting_position_in_source_text() + 1,
                    work_node.get_ending_position_in_source_text() - 1,
                    _l10n_reg.get_message(
                        lang_id,
                        "parser.molecule.error.parsing_abbreviation.origin"
                    )
                )
                raise err

            #  Initialize a new merge utility.
            build = MergeUtil()

            #  Get the suffix number.
            coeff = work_node.get_suffix_number()

            #  Add atoms.
            for atom_symbol in abbr_resolved:
                build.add(atom_symbol, abbr_resolved[atom_symbol] * coeff)

            #  Do simplifying.
            _macro_simplify(expression, build, work_node, options)

            #  Save the parsed result.
            parsed[id(work_node)] = build
        else:
            raise RuntimeError("Never reach this condition.")

    #  Get the parsing result of the root node.
    root_node_parsed = parsed[id(root_node)]

    #  Content check.
    if len(root_node_parsed) == 0:
        err = _cm_error.Error(
            _ml_error.MOLECULE_NO_CONTENT,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.no_content.description"
            ),
            options
        )
        err.push_traceback(
            expression,
            0,
            len(expression) - 1,
            _l10n_reg.get_message(
                lang_id,
                "parser.molecule.error.no_content.inside"
            )
        )
        raise err

    return root_node_parsed.get_data()
Пример #20
0
def tokenize(expression, options):
    """Tokenize a math expression.

    :type expression: str
    :type options: bce.option.Option
    :param expression: The math expression.
    :param options: The options.
    :rtype : list[Token]
    :return: The token list.
    :raise bce.parser.common.error.Error: Raise when meet a parser error.
    """

    #  Initialize.
    result = []
    cursor = 0
    end_position = len(expression)
    prev_tok = None
    lang_id = _l10n_opt.OptionWrapper(options).get_language_id()

    while cursor < end_position:
        cur_ch = expression[cursor]

        #  Get previous token if possible.
        if len(result) != 0:
            prev_tok = result[-1]

        #  Read a number token if current character is a digit.
        if cur_ch.isdigit():
            #  Search for next non-digit and non-dot character.
            met_dot = False
            prev_dot_pos = -1
            search_pos = cursor + 1
            search_end = end_position

            while search_pos < end_position:
                search_ch = expression[search_pos]
                if search_ch == ".":
                    #  If we met decimal dot more than once, raise an duplicated-dot error.
                    if met_dot:
                        err = _cm_error.Error(
                            _mexp_errors.MEXP_DUPLICATED_DECIMAL_DOT,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.duplicated_decimal_dot.description"
                            ), options)
                        err.push_traceback(
                            expression, search_pos, search_pos,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.duplicated_decimal_dot.duplicated_dot"
                            ))
                        err.push_traceback(
                            expression, prev_dot_pos, prev_dot_pos,
                            _l10n_reg.get_message(
                                lang_id,
                                "parser.mexp.error.duplicated_decimal_dot.previous_dot"
                            ))

                        raise err
                    else:
                        met_dot = True
                        prev_dot_pos = search_pos
                else:
                    if not search_ch.isdigit():
                        search_end = search_pos
                        break

                #  Go to next searching position.
                search_pos += 1

            if met_dot:
                #  Create a float token if there's a decimal dot in the sequence.
                result.append(
                    create_float_operand_token(expression[cursor:search_end],
                                               len(result), cursor))
            else:
                #  Create a integer token if there's no decimal dot in the sequence.
                result.append(
                    create_integer_operand_token(expression[cursor:search_end],
                                                 len(result), cursor))

            #  Go to next position.
            cursor = search_end

            continue

        if cur_ch.isalpha():
            #  Search for next non-alphabet character.
            search_pos = cursor + 1
            search_end = end_position

            while search_pos < end_position:
                if not expression[search_pos].isalpha():
                    search_end = search_pos
                    break

                #  Go to next searching position.
                search_pos += 1

            if search_end == end_position:
                #  Create a symbol token if there's nothing behind the string we got.
                result.append(
                    create_symbol_operand_token(expression[cursor:search_end],
                                                len(result), cursor))
            else:
                next_ch = expression[search_end]
                if next_ch.isdigit(
                ) or next_ch == "(" or next_ch == "[" or next_ch == "{":
                    #  Create a function token if there's a number or a parenthesis behind the string we got.
                    result.append(
                        create_function_token(expression[cursor:search_end],
                                              len(result), cursor))
                else:
                    #  Create a symbol token.
                    result.append(
                        create_symbol_operand_token(
                            expression[cursor:search_end], len(result),
                            cursor))

            #  Go to next position.
            cursor = search_end

            continue

        if cur_ch == "+":
            #  Create a token.
            result.append(create_plus_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "-":
            #  If the left operand exists, create a minus operator token. Otherwise, create a negative sign token.
            if prev_tok is not None and (prev_tok.is_operand()
                                         or prev_tok.is_right_parenthesis()):
                result.append(create_minus_operator_token(len(result), cursor))
            else:
                result.append(
                    create_negative_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "*":
            #  Create a token.
            result.append(create_multiply_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "/":
            #  Create a token.
            result.append(create_divide_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "^":
            #  Create a token.
            result.append(create_pow_operator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == "(" or cur_ch == "[" or cur_ch == "{":
            result.append(
                create_left_parenthesis_token(cur_ch, len(result), cursor))
            cursor += 1
            continue

        if cur_ch == ")" or cur_ch == "]" or cur_ch == "}":
            #  Create a  token.
            result.append(
                create_right_parenthesis_token(cur_ch, len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        if cur_ch == ",":
            #  Create a token.
            result.append(create_separator_token(len(result), cursor))

            #  Go to next position.
            cursor += 1

            continue

        #  Raise an untokenizable error.
        err = _cm_error.Error(
            _mexp_errors.MEXP_UNRECOGNIZED_TOKEN,
            _l10n_reg.get_message(
                lang_id, "parser.mexp.error.unrecognized_token.description"),
            options)
        err.push_traceback(
            expression, cursor, cursor,
            _l10n_reg.get_message(
                lang_id, "parser.mexp.error.unrecognized_token.message"))
        raise err

    return result