Exemplo n.º 1
0
    def tokenCreator(self, text, i, token=None):

        if not token or token.formed:
            token = Token()

        char, pos = ord(text[i]), i

        #Formar nuevas cadenas
        if (not token.inFormation and self.is_quote(char)):
            token.add(char)
            token.inFormation = True
            token.formed = False
            token.type = "string"

        elif (token.inFormation):
            if (self.is_quote(token.atFirst()) and not self.is_quote(char)):
                token.add(char)
            else:
                token.add(char)
                token.formed = True
        else:
            token = Token()

        pos += 1
        return (pos, token)
Exemplo n.º 2
0
    def tokenCreator(self, char, token):

        repeatChar = None

        if not token or token.formed:
            token = Token()

        charCode = ord(char)

        # Comment
        if not token.inFormation and self.is_semicolon(charCode):
            token.formed = False
            token.inFormation = True
            token.type = "COMMENT"
            token.add(charCode)

        elif token.inFormation and self.is_semicolon(token.atFirst()):
            if self.is_newLine(charCode):
                token = Token()
            else:
                pass

        # Quote String
        elif not token.inFormation and self.is_quote(charCode):
            token.formed = False
            token.inFormation = True
            token.type = "STRING"
            token.add(charCode)

        elif token.inFormation and self.is_quote(token.atFirst()):
            if self.is_quote(charCode):
                token.add(charCode)
                token.inFormation = False
                token.formed = True
            else:
                token.add(charCode)

        # DoubleQuote String
        elif not token.inFormation and self.is_doubleQuote(charCode):
            token.formed = False
            token.inFormation = True
            token.type = "STRING"
            token.add(charCode)

        elif token.inFormation and self.is_doubleQuote(token.atFirst()):
            if self.is_doubleQuote(charCode):
                token.add(charCode)
                token.inFormation = False
                token.formed = True
            else:
                token.add(charCode)

        # Arithmetic Operator
        elif not token.inFormation and self.is_arithmeticOp(charCode):
            token.add(charCode)
            token.inFormation = False
            token.formed = True
            token.type = "ARITHMETIC_OP"

        # Identifier
        elif not token.inFormation and self.is_alpha(charCode):
            token.add(charCode)
            token.inFormation = True
            Token.Formed = False
            token.type = "IDENT"

        elif token.inFormation and self.is_alpha(token.atFirst()):
            if self.is_alpha(charCode) or self.is_digit(charCode):
                token.add(charCode)
            else:
                token.inFormation = False
                token.formed = True
                repeatChar = True

        return (token, repeatChar)
Exemplo n.º 3
0
   def getToken(self):

      character, c1, c2 = self.next()

      self.__debug("%s token acquired" % c1)

      if character == None:
         return EOF

      """Let's get comments out of the way first
      Indentation doesn't care about them"""
      if c1 + c2 == "//":

         self.__debug("Comment token")

         token = Token(character)
         token.type = SHORTCOMMENT
         token.components += "/"

         #Pass the second slash
         self.next()

         character, c1, c2 = self.next()

         #Double slash comments last till the end of the line
         while c1 != "\n":
            self.__debug("Seeking to end of line")
            token.components += c1
            character, c1, c2 = self.next()

         self.__debug("Returning comment token")

         return token

      #Long comment type
      if c1 + c2 == "/*":

         self.__debug("Long comment token")

         token = Token(character)
         token.type = LONGCOMMENT
         token.components += "*"

         while c1 + c2 != "*/":
            self.__debug("Seeking to end of long comment")
            character, c1, c2 = self.next()
            token.components += c1

         token.components += "/"

         #Pass the ending slash
         self.next()

         self.__debug("Returning long comment token")

         return token

      indentAmount = 0

      #Space and Tab count equally for indentation
      #Don't care to be helpful to people who mix the styles
      if self.context == 'global' and c1 in INDENTATION:
         indentAmount += 1

         token = Token(character)
         token.type = INDENT

         self.__debug("Indentation token")

         while c2 in INDENTATION:

            self.__debug("Gathering all indentation")

            character, c1, c2 = self.next()
            #Keep adding spaces for consistency in printout
            token.components += ' '

         #Ignore indentation ending in a newline
         if c2 == "\n":
            return None

         last = self.indentStack.pop()

         self.__debug("Comparing current indentation amount to last")

         #This is a new, higher level of indentation
         if indentAmount > last:
            self.indentStack.append(last)
            self.indentStack.append(indentAmount)
         #Same indentation level; no token added
         elif indentAmount == last:
            self.indentStack.append(last)
            self.__debug("Indentation level matches")
            return None
         #This is actually a dedent; keep dedenting until the appropriate level is found
         else:
            self.__debug("Starting Dedenting")
            while indentAmount < last:
               self.__debug("Dedenting")
               tok = Token(character)
               tok.type = DEDENT
               tokens.append(tok)
               last = self.indentStack.pop()

               if indentAmount == last:
                  self.indentStack.append(last)
                  return None
               elif len(self.indentStack) == 0:
                  print "Previous indentation amount not found!"

         return Token

      if self.context == 'global' and c1 == DECLARATION:

         self.__debug("Name or variable declaration")

         if c2 == ":":
            self.__debug("Name declaration")
            #skip the { and colon
            self.next()
            character, c1, c2 = self.next()
            token = Token(character)
            token.type = NAME

            character, c1, c2 = self.next()

            while c1 != END_DECLARATION:
               token.components += c1

               self.__debug("Seeking declaration end")

               if c1 in SPACING:
                  print "name declaration contains invalid character"
                  return None

               character, c1, c2 = self.next()


            #Skip }
            self.next()

            #Clear trailing whitespace
            while c2 in INDENTATION:
               self.__debug("Clearing trailing whitespace")
               character, c1, c2 = self.next()

            if c1 != "\n":
               print "name declarations must end with a newline"

               while c1 != "\n":
                  self.__debug("Recovering bad name declaration")
                  character, c1, c2 = self.next()

            return token

         else:
            self.__debug("Variable declaration")
            #Skip the brace
            character, c1, c2 = self.next()
            token = Token(character)
            token.type = VARIABLE

            while c1 != SPACE:
               self.__debug("Gathering variable name")
               character, c1, c2 = self.next()
               token.components += c1

            #Throw the space out
            self.switchContext('variable')

            self.__debug("Returning variable declaration token")

            return token

      #Collect the entire variable contents
      if self.context == 'variable':

         self.__debug("Variable value")

         if c1 == END_DECLARATION:
            print "empty variable declaration"
            self.switchContext('global')
            return None

         token = Token(character)
         token.type = VALUE

         character, c1, c2 = self.next()
         while c1 != END_DECLARATION:
            self.__debug("Scanning for remainder of variable name")

            token.components += c1

            if c1 == '\\':
               if c2 in '\\:}':
                  token.components += c2
                  self.next()

            character, c1, c2 = self.next()

            #The token up to this point is actually a function call
            if c1 == ':':
               token.type = FUNCTION
               switchContext('function')
               #Clear trailing whitespace
               while c2 in INDENTATION:
                  self.__debug("SCANNING2")
                  character, c1, c2 = self.next()
               return token

         #skip }
         self.next()

         #Clear trailing whitespace
         while c2 in INDENTATION:
            self.__debug("SCANNING3")
            character, c1, c2 = self.next()

         self.switchContext('global')
         return token

      if self.context == 'function':
         self.restoreContext()
         #Enclosed function arguments
         if c1 == '(':
            self.switchContext('closed-funcargs')
            #Clear the paren
            self.next()

            #Clear trailing whitespace
            while c2 in INDENTATION:
               self.__debug("SCANNING4")
               character, c1, c2 = self.next()

         else:
            self.switchContext('funcargs')

      if self.context == 'funcargs':

         token = Token(character)
         token.type = ARGUMENT

         #Add initial spacing to the token
         while c1 in SPACING:
            self.__debug("SCANNING5")
            character, c1, c2 = self.next()
            token.components += c1

         #Then, consume everything up to the next whitespace
         while not c1 in SPACING:
            self.__debug("SCANNING6")
            if c1 == '\\':
               if c2 == ' ':
                  token.components += ' '
                  self.next()
            character, c1, c2 = self.next()
            token.components += c1

         #Argument list is over
         if c1 == "\n":
            self.next()
            self.restoreContext()
            return token

         #Do not add funcarg-delimiting whitespace
         self.next()

         return token

      if self.context == 'closed-funcargs':

         token = Token(character)
         token.type = ARGUMENT

         #Add initial spacing to the token
         while c1 in SPACING:
            self.__debug("SCANNING7")
            character, c1, c2 = self.next()
            token.components += c1

         #Then, consume everything up to the next whitespace
         while not c1 in SPACING:
            self.__debug("SCANNING8")
            if c1 == '\\':
               if c2 == ' ' or c2 == ')':
                  token.components += ' '
                  self.next()
            character, c1, c2 = self.next()
            token.components += c1

            if c2 == ')':
               #Funcargs ended; return context after stripping )
               self.next()
               self.restoreContext()
               return token

         return token

      #If we have not indented at all and we aren't using a name declaration, it's a selector
      if len(self.indentStack) == 1:
         self.__debug("Selector")
         token = Token(character)

         token.type = SELECTOR

         character, c1, c2 = self.next()

         while c1 != "\n":
            self.__debug("Seeking selector")
            token.components += c1

            #Declaration starting
            if c2 == DECLARATION:
               return token

            character, c1, c2 = self.next()

         #Remove the terminating newline
         self.next()

         self.__debug("Returning selector")

         return token

      return None
Exemplo n.º 4
0
    def analize (cls, text):
        word = []
        tokens = []
        prev_letter = ""

        # Разбивка на токены
        i = 0
        while i < len (text):
            letter = text[i]
            if letter == " ":
                if len (word) > 0:
                    token = Token ()
                    token.text = ''.join (word)
                    tokens.append (token)
                    word = []
            elif letter == "(" or \
                 letter == ")" or \
                 letter == ",":
                 #letter == "_":
                if len (word) > 0:
                    token = Token ()
                    token.text = ''.join (word)
                    tokens.append (token)
                    word = []
                token = Token ()
                token.text = letter
                tokens.append (token)
            elif letter == "." or \
                 letter == "_":
                j = i + 1
                if j == len (text):
                    if len (word) > 0:
                        token = Token ()
                        token.text = ''.join (word)
                        tokens.append (token)
                        word = []
                    token = Token ()
                    token.text = letter
                    tokens.append (token)
                while j < len (text):
                    letter = text[j]

                    if letter == " ":
                        break
                    elif letter == ")" or \
                         letter == "(" or \
                         letter == ",":
                        j = j - 1
                        break
                    if i == (j - 1):
                        if letter == " ":
                            if len (word) > 0:
                                token = Token ()
                                token.text = ''.join (word)
                                tokens.append (token)
                                word = []
                            token = Token ()
                            token.text = letter
                            tokens.append (token)
                            break
                        else:
                            word.append (text[i])
                            prev_letter = letter
                            word.append (letter)
                            j += 1
                    else:
                        prev_letter = letter
                        word.append (letter)
                        j += 1
                i = j
            elif letter == "=":
                if prev_letter == " " or \
                   prev_letter == "(":
                    if len (word) > 0:
                        token = Token ()
                        token.text = ''.join (word)
                        tokens.append (token)
                        word = []
                    token = Token ()
                    token.text = letter
                    tokens.append (token)
                else:
                    word.append (letter)
            elif letter == "?":
                if len (word) > 0:
                    token = Token ()
                    token.text = ''.join (word)
                    tokens.append (token)
                    word = []
                if prev_letter not in [" ", "("]:
                    token = Token ()
                    token.text = letter
                    tokens.append (token)
                else:
                    word.append (letter)
            elif letter == "\"":
                i += 1
                while i < len (text):
                    prev_letter = letter
                    letter = text[i]
                    if letter == "\"" and prev_letter != "\\":
                        break
                    else:
                        word.append (letter)
                    i += 1
                token = Token ()
                token.text = ''.join (word)
                token.type = TokenType.string
                tokens.append (token)
                word = []
            else:
                word.append (letter)
            prev_letter = letter
            i += 1

        # Идентификация токенов
        for token in tokens:
            #print token.text
            if token.type == TokenType.string:
                continue
            if token.text.find ('?') == 0 and len (token.text) > 1:
                s = token.text.replace ('?', '')
                query = "SELECT id FROM qsl_linkage WHERE name = \'" + s + "\';"
                cls.__cursor.execute (query)
                row = cls.__cursor.fetchone ()
                if row != None:
                    token.type = TokenType.linkage
                    token.linkage = TokenLinkage ()
                    token.linkage.id = row[0]
                    token.linkage.name = s
                else:
                    cls.__error_text = ErrorHelper.get_text (102, token.text)
                    return False
            elif token.text.find ('%') == 0 and len (token.text) > 1:
                token.type = TokenType.code_object
            elif token.text.find ('*') == 0:
                # Модификатор
                token.type = TokenType.modifier
            elif token.text == "(":
                token.type = TokenType.opening_bracket
            elif token.text == ")":
                token.type = TokenType.closing_bracket
            elif token.text == ",":
                token.type = TokenType.comma
            elif token.text == "_":
                token.type = TokenType.underscore
            elif token.text == ".":
                token.type = TokenType.point
            elif token.text == "?":
                token.type = TokenType.question_mark
            elif token.text == "=":
                token.type = TokenType.equal_sign
            else:
                query = "SELECT id, type FROM qsl_concept WHERE name = \'" + token.text + "\';"
                cls.__cursor.execute (query)
                row = cls.__cursor.fetchone ()
                if row != None:
                    token.type = TokenType.concept
                    token.concept = TokenConcept ()
                    token.concept.id = row[0]
                    token.concept.type = row[1]
                    token.concept.name = token.text
                else:
                    if token.text.isdigit ():
                        token.type = TokenType.number
                    else:
                        cls.__error_text = ErrorHelper.get_text (103, token.text)
                        return False

        node = cls.build_tree (tokens)
        if node != None:
            cls.proposition_tree = PropositionTree ()
            cls.proposition_tree.root_node = node
        else:
            return False

        return True
Exemplo n.º 5
0
    def analize(cls, text):
        word = []
        tokens = []
        prev_letter = ""

        # Разбивка на токены
        i = 0
        while i < len(text):
            letter = text[i]
            if letter == " ":
                if len(word) > 0:
                    token = Token()
                    token.text = ''.join(word)
                    tokens.append(token)
                    word = []
            elif letter == "(" or \
                 letter == ")" or \
                 letter == ",":
                #letter == "_":
                if len(word) > 0:
                    token = Token()
                    token.text = ''.join(word)
                    tokens.append(token)
                    word = []
                token = Token()
                token.text = letter
                tokens.append(token)
            elif letter == "." or \
                 letter == "_":
                j = i + 1
                if j == len(text):
                    if len(word) > 0:
                        token = Token()
                        token.text = ''.join(word)
                        tokens.append(token)
                        word = []
                    token = Token()
                    token.text = letter
                    tokens.append(token)
                while j < len(text):
                    letter = text[j]

                    if letter == " ":
                        break
                    elif letter == ")" or \
                         letter == "(" or \
                         letter == ",":
                        j = j - 1
                        break
                    if i == (j - 1):
                        if letter == " ":
                            if len(word) > 0:
                                token = Token()
                                token.text = ''.join(word)
                                tokens.append(token)
                                word = []
                            token = Token()
                            token.text = letter
                            tokens.append(token)
                            break
                        else:
                            word.append(text[i])
                            prev_letter = letter
                            word.append(letter)
                            j += 1
                    else:
                        prev_letter = letter
                        word.append(letter)
                        j += 1
                i = j
            elif letter == "=":
                if prev_letter == " " or \
                   prev_letter == "(":
                    if len(word) > 0:
                        token = Token()
                        token.text = ''.join(word)
                        tokens.append(token)
                        word = []
                    token = Token()
                    token.text = letter
                    tokens.append(token)
                else:
                    word.append(letter)
            elif letter == "?":
                if len(word) > 0:
                    token = Token()
                    token.text = ''.join(word)
                    tokens.append(token)
                    word = []
                if prev_letter not in [" ", "("]:
                    token = Token()
                    token.text = letter
                    tokens.append(token)
                else:
                    word.append(letter)
            elif letter == "\"":
                i += 1
                while i < len(text):
                    prev_letter = letter
                    letter = text[i]
                    if letter == "\"" and prev_letter != "\\":
                        break
                    else:
                        word.append(letter)
                    i += 1
                token = Token()
                token.text = ''.join(word)
                token.type = TokenType.string
                tokens.append(token)
                word = []
            else:
                word.append(letter)
            prev_letter = letter
            i += 1

        # Идентификация токенов
        for token in tokens:
            #print token.text
            if token.type == TokenType.string:
                continue
            if token.text.find('?') == 0 and len(token.text) > 1:
                s = token.text.replace('?', '')
                query = "SELECT id FROM qsl_linkage WHERE name = \'" + s + "\';"
                cls.__cursor.execute(query)
                row = cls.__cursor.fetchone()
                if row != None:
                    token.type = TokenType.linkage
                    token.linkage = TokenLinkage()
                    token.linkage.id = row[0]
                    token.linkage.name = s
                else:
                    cls.__error_text = ErrorHelper.get_text(102, token.text)
                    return False
            elif token.text.find('%') == 0 and len(token.text) > 1:
                token.type = TokenType.code_object
            elif token.text.find('*') == 0:
                # Модификатор
                token.type = TokenType.modifier
            elif token.text == "(":
                token.type = TokenType.opening_bracket
            elif token.text == ")":
                token.type = TokenType.closing_bracket
            elif token.text == ",":
                token.type = TokenType.comma
            elif token.text == "_":
                token.type = TokenType.underscore
            elif token.text == ".":
                token.type = TokenType.point
            elif token.text == "?":
                token.type = TokenType.question_mark
            elif token.text == "=":
                token.type = TokenType.equal_sign
            else:
                query = "SELECT id, type FROM qsl_concept WHERE name = \'" + token.text + "\';"
                cls.__cursor.execute(query)
                row = cls.__cursor.fetchone()
                if row != None:
                    token.type = TokenType.concept
                    token.concept = TokenConcept()
                    token.concept.id = row[0]
                    token.concept.type = row[1]
                    token.concept.name = token.text
                else:
                    if token.text.isdigit():
                        token.type = TokenType.number
                    else:
                        cls.__error_text = ErrorHelper.get_text(
                            103, token.text)
                        return False

        node = cls.build_tree(tokens)
        if node != None:
            cls.proposition_tree = PropositionTree()
            cls.proposition_tree.root_node = node
        else:
            return False

        return True