示例#1
0
    def parse(self, message, characters=None):
        u"""Parse the message into a list of segments.

        :param characters: the control characters to use, if there is no
                UNA segment present
        :param message: The EDI message
        :rtype:
        """

        # FIXME: DRY: use get_control_characters here?
        tokens = []
        # If there is a UNA token, take the following 6 characters
        # unconditionally, save them as token and use it as control characters
        # for further parsing
        if message[0:3] == u'UNA':
            control_chars = message[3:9]
            tokens.append(Token(Token.Type.CONTENT, u'UNA'))
            tokens.append(Token(Token.Type.CTRL_CHARS, control_chars))

            # remove the UNA segment from the string
            message = message[9:].lstrip(u"\r\n")
            self.characters = Characters.from_str(u'UNA' + control_chars)

        else:
            # if no UNA header present, use default control characters
            if characters is not None:
                self.characters = characters

        tokenizer = Tokenizer()
        tokens += tokenizer.get_tokens(message, self.characters)
        segments = self.convert_tokens_to_segments(tokens, self.characters)
        return segments
示例#2
0
 def test_quadruple_escape(self):
     self._assert_tokens("RFF+PD????:5", [
         Token(Token.Type.CONTENT, "RFF"),
         Token(Token.Type.DATA_SEPARATOR, "+"),
         Token(Token.Type.CONTENT, "PD??"),
         Token(Token.Type.COMPONENT_SEPARATOR, ":"),
         Token(Token.Type.CONTENT, "5"),
     ])
示例#3
0
 def test_basic(self):
     self._assert_tokens("RFF+PD:50515", [
         Token(Token.Type.CONTENT, "RFF"),
         Token(Token.Type.DATA_SEPARATOR, "+"),
         Token(Token.Type.CONTENT, "PD"),
         Token(Token.Type.COMPONENT_SEPARATOR, ":"),
         Token(Token.Type.CONTENT, "50515"),
     ])
示例#4
0
def test_triple_escape():
    _assert_tokens(
        "RFF+PD???:5'",
        [
            Token(Token.Type.CONTENT, "RFF"),
            Token(Token.Type.DATA_SEPARATOR, "+"),
            Token(Token.Type.CONTENT, "PD?:5"),
            Token(Token.Type.TERMINATOR, "'"),
        ],
    )
示例#5
0
def test_starts_with_escape():
    _assert_tokens(
        "DTM+?+0'",
        [
            Token(Token.Type.CONTENT, "DTM"),
            Token(Token.Type.DATA_SEPARATOR, "+"),
            Token(Token.Type.CONTENT, "+0"),
            Token(Token.Type.TERMINATOR, "'"),
        ],
    )
示例#6
0
def expected_crlf():
    return [
        Token(Token.Type.CONTENT, "RFF"),
        Token(Token.Type.COMPONENT_SEPARATOR, ":"),
        Token(Token.Type.CONTENT, "5"),
        Token(Token.Type.TERMINATOR, "'"),
        Token(Token.Type.CONTENT, "DEF"),
        Token(Token.Type.COMPONENT_SEPARATOR, ":"),
        Token(Token.Type.CONTENT, "6"),
        Token(Token.Type.TERMINATOR, "'"),
    ]
示例#7
0
 def test_ignore_whitespace(self):
     self._assert_tokens("RFF:5'\nDEF:6", [
         Token(Token.Type.CONTENT, "RFF"),
         Token(Token.Type.COMPONENT_SEPARATOR, ":"),
         Token(Token.Type.CONTENT, "5"),
         Token(Token.Type.TERMINATOR, "'"),
         Token(Token.Type.CONTENT, "DEF"),
         Token(Token.Type.COMPONENT_SEPARATOR, ":"),
         Token(Token.Type.CONTENT, "6"),
     ])
示例#8
0
    def get_next_token(self) -> Optional[Token]:
        """Get the next token from the message."""

        # If we're not escaping this character then see if it's
        # a control character

        token_type = not self.isEscaped and self.token_selector.get(self._char)
        if token_type:
            self.store_current_char_and_read_next()
            token = Token(token_type, self.extract_stored_chars())
            if token_type == Token.Type.TERMINATOR:
                while self._char in self.characters.line_terminators:
                    self.read_next_char()
            return token

        while not self.is_control_character():
            if self.end_of_message():
                raise RuntimeError("Unexpected end of EDI message")

            self.store_current_char_and_read_next()
        return Token(Token.Type.CONTENT, self.extract_stored_chars())
示例#9
0
    def get_next_token(self) -> Token or None:
        """Get the next token from the message."""

        if self.end_of_message():
            return None

        # If we're not escaping this character then see if it's
        # a control character
        if not self.isEscaped:
            if self._char == self.characters.component_separator:
                self.store_current_char_and_read_next()
                return Token(Token.Type.COMPONENT_SEPARATOR,
                             self.extract_stored_chars())

            if self._char == self.characters.data_separator:
                self.store_current_char_and_read_next()
                return Token(Token.Type.DATA_SEPARATOR,
                             self.extract_stored_chars())

            if self._char == self.characters.segment_terminator:
                self.store_current_char_and_read_next()
                token = Token(Token.Type.TERMINATOR,
                              self.extract_stored_chars())

                # Ignore any trailing space after the end of the segment
                while self._char in ["\r", "\n"]:
                    self.read_next_char()

                return token

        while not self.is_control_character():
            if self.end_of_message():
                raise RuntimeError("Unexpected end of EDI message")

            self.store_current_char_and_read_next()

        return Token(Token.Type.CONTENT, self.extract_stored_chars())
示例#10
0
def test_quadruple_escape():
    _assert_tokens(
        "RFF+PD????:5'",
        [
            Token(Token.Type.CONTENT, "RFF"),
            Token(Token.Type.DATA_SEPARATOR, "+"),
            Token(Token.Type.CONTENT, "PD??"),
            Token(Token.Type.COMPONENT_SEPARATOR, ":"),
            Token(Token.Type.CONTENT, "5"),
            Token(Token.Type.TERMINATOR, "'"),
        ],
    )
示例#11
0
def test_basic():
    _assert_tokens(
        "RFF+PD:50515'",
        [
            Token(Token.Type.CONTENT, "RFF"),
            Token(Token.Type.DATA_SEPARATOR, "+"),
            Token(Token.Type.CONTENT, "PD"),
            Token(Token.Type.COMPONENT_SEPARATOR, ":"),
            Token(Token.Type.CONTENT, "50515"),
            Token(Token.Type.TERMINATOR, "'"),
        ],
    )
示例#12
0
 def test_triple_escape(self):
     self._assert_tokens("RFF+PD???:5", [
         Token(Token.Type.CONTENT, "RFF"),
         Token(Token.Type.DATA_SEPARATOR, "+"),
         Token(Token.Type.CONTENT, "PD?:5"),
     ])
示例#13
0
 def _assert_tokens(self, message, expected=None):
     if expected is None:
         expected = []
     tokens = self._tokenizer.get_tokens("{}'".format(message), Characters())
     expected.append(Token(Token.Type.TERMINATOR, "'"))
     self.assertEqual(expected, tokens)
示例#14
0
 def test_value(self):
     token = Token(Token.Type.CONTENT, "ok")
     self.assertEqual("ok", token.value)
示例#15
0
 def test_type(self):
     token = Token(Token.Type.CONTENT, "ok")
     self.assertEqual(Token.Type.CONTENT, token.type)