Exemplo n.º 1
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     RegexTokenBuilder.__escape_z__()
     PerlIdentifierTokenBuilder.__escape_z__()
     PerlDollarCaretIdentifierTokenBuilder.__escape_z__()
     PerlQStringTokenBuilder.__escape_z__()
     MRegexTokenBuilder.__escape_z__()
     SRegexTokenBuilder.__escape_z__()
     YRegexTokenBuilder.__escape_z__()
     TrRegexTokenBuilder.__escape_z__()
     PerlPrototypeTokenBuilder.__escape_z__()
     PerlSigilBraceTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 2
0
 def __escape_z__():
   InvalidTokenBuilder.__escape_z__()
   WhitespaceTokenBuilder.__escape_z__()
   NewlineTokenBuilder.__escape_z__()
   EscapedStringTokenBuilder.__escape_z__()
   PrefixedStringTokenBuilder.__escape_z__()
   SuffixedStringTokenBuilder.__escape_z__()
   IntegerTokenBuilder.__escape_z__()
   IntegerExponentTokenBuilder.__escape_z__()
   PrefixedIntegerTokenBuilder.__escape_z__()
   SuffixedIntegerTokenBuilder.__escape_z__()
   RealTokenBuilder.__escape_z__()
   RealExponentTokenBuilder.__escape_z__()
   SuffixedRealTokenBuilder.__escape_z__()
   IdentifierTokenBuilder.__escape_z__()
   PrefixedIdentifierTokenBuilder.__escape_z__()
   CaseInsensitiveListTokenBuilder.__escape_z__()
   CaseSensitiveListTokenBuilder.__escape_z__()
   SingleCharacterTokenBuilder.__escape_z__()
   SlashSlashCommentTokenBuilder.__escape_z__()
   SlashStarCommentTokenBuilder.__escape_z__()
   ClassTypeTokenBuilder.__escape_z__()
   HexRealExponentTokenBuilder.__escape_z__()
   NestedCommentTokenBuilder.__escape_z__()
   return 'Escape ?Z'
Exemplo n.º 3
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     ParensLabelTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 4
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 5
0
 def __escape_z__():
   InvalidTokenBuilder.__escape_z__()
   WhitespaceTokenBuilder.__escape_z__()
   NewlineTokenBuilder.__escape_z__()
   EscapedStringTokenBuilder.__escape_z__()
   PrefixedStringTokenBuilder.__escape_z__()
   IntegerTokenBuilder.__escape_z__()
   IntegerExponentTokenBuilder.__escape_z__()
   PrefixedIntegerTokenBuilder.__escape_z__()
   SuffixedIntegerTokenBuilder.__escape_z__()
   RealTokenBuilder.__escape_z__()
   AssemblyCommentTokenBuilder.__escape_z__()
   return 'Escape ?Z'
Exemplo n.º 6
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 7
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     TripleQuoteStringTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     BlockTokenBuilder.__escape_z__()
     BraceCommentTokenBuilder.__escape_z__()
     SlashSlashCommentTokenBuilder.__escape_z__()
     SlashStarCommentTokenBuilder.__escape_z__()
     GenericNumberTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 8
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     StringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     SuffixedIntegerTokenBuilder.__escape_z__()
     BlockTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 9
0
 def __escape_z__():
   InvalidTokenBuilder.__escape_z__()
   WhitespaceTokenBuilder.__escape_z__()
   NewlineTokenBuilder.__escape_z__()
   StuffedQuoteStringTokenBuilder.__escape_z__()
   IntegerTokenBuilder.__escape_z__()
   IntegerExponentTokenBuilder.__escape_z__()
   RealTokenBuilder.__escape_z__()
   RealExponentTokenBuilder.__escape_z__()
   CaseInsensitiveListTokenBuilder.__escape_z__()
   CaseSensitiveListTokenBuilder.__escape_z__()
   BlockTokenBuilder.__escape_z__()
   HTMLIdentifierTokenBuilder.__escape_z__()
   HTMLListTokenBuilder.__escape_z__()
   HTMLAttributeTokenBuilder.__escape_z__()
   HTMLUnicodeTokenBuilder.__escape_z__()
   return 'Escape ?Z'
Exemplo n.º 10
0
 def __escape_z__():
   InvalidTokenBuilder.__escape_z__()
   WhitespaceTokenBuilder.__escape_z__()
   NewlineTokenBuilder.__escape_z__()
   EscapedStringTokenBuilder.__escape_z__()
   IntegerTokenBuilder.__escape_z__()
   IntegerExponentTokenBuilder.__escape_z__()
   RealTokenBuilder.__escape_z__()
   RealExponentTokenBuilder.__escape_z__()
   SuffixedIdentifierTokenBuilder.__escape_z__()
   CaseInsensitiveListTokenBuilder.__escape_z__()
   CaseSensitiveListTokenBuilder.__escape_z__()
   LeadToEndOfLineTokenBuilder.__escape_z__()
   SingleCharacterTokenBuilder.__escape_z__()
   VisualBasicVariableTokenBuilder.__escape_z__()
   RemarkTokenBuilder.__escape_z__()
   return 'Escape ?Z'
Exemplo n.º 11
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     LuaBlockCommentTokenBuilder.__escape_z__()
     DoubleBracketStringTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 12
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     StuffedQuoteStringTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     NullTokenBuilder.__escape_z__()
     SqlBracketedIdentifierTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 13
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     PrefixedStringTokenBuilder.__escape_z__()
     PrefixedRawStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     PrefixedIdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     TripleQuoteStringTokenBuilder.__escape_z__()
     RawTripleQuoteCommentTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 14
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     StuffedQuoteStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     RemarkTokenBuilder.__escape_z__()
     CBasicVariableTokenBuilder.__escape_z__()
     CBasicLabelTokenBuilder.__escape_z__()
     CBasicSuffixedIntegerTokenBuilder.__escape_z__()
     CBasicLineContinuationTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 15
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     StuffedQuoteStringTokenBuilder.__escape_z__()
     PrefixedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     BlockTokenBuilder.__escape_z__()
     CobolIdentifierTokenBuilder.__escape_z__()
     PictureTokenBuilder.__escape_z__()
     CRPictureTokenBuilder.__escape_z__()
     CobolPreprocessorTokenBuilder.__escape_z__()
     AsteriskCommentTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 16
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     PrefixedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     SuffixedIntegerTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     LabelTokenBuilder.__escape_z__()
     AssemblyCommentTokenBuilder.__escape_z__()
     MultilineCommentTokenBuilder.__escape_z__()
     HashQuoteCharTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 17
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     SuffixedIdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     PrefixedIdentifierTokenBuilder.__escape_z__()
     TripleQuoteStringTokenBuilder.__escape_z__()
     SlashSlashCommentTokenBuilder.__escape_z__()
     SlashStarCommentTokenBuilder.__escape_z__()
     SwiftArgumentTokenBuilder.__escape_z__()
     SwiftSymbolTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 18
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     EscapedStringTokenBuilder.__escape_z__()
     PrefixedStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     IdentifierTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     BlockTokenBuilder.__escape_z__()
     TripleQuoteStringTokenBuilder.__escape_z__()
     SlashSlashCommentTokenBuilder.__escape_z__()
     TripleSlashCommentTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     NullTokenBuilder.__escape_z__()
     ClassTypeTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 19
0
 def __escape_z__():
   InvalidTokenBuilder.__escape_z__()
   WhitespaceTokenBuilder.__escape_z__()
   NewlineTokenBuilder.__escape_z__()
   IdentifierTokenBuilder.__escape_z__()
   EscapedStringTokenBuilder.__escape_z__()
   IntegerTokenBuilder.__escape_z__()
   IntegerExponentTokenBuilder.__escape_z__()
   RealTokenBuilder.__escape_z__()
   RealExponentTokenBuilder.__escape_z__()
   CaseInsensitiveListTokenBuilder.__escape_z__()
   CaseSensitiveListTokenBuilder.__escape_z__()
   SingleCharacterTokenBuilder.__escape_z__()
   LeadToEndOfLineTokenBuilder.__escape_z__()
   AsteriskCommentTokenBuilder.__escape_z__()
   DbaseSpecialFunctionTokenBuilder.__escape_z__()
   DbaseFilenameTokenBuilder.__escape_z__()
   WildCardIdentifierTokenBuilder.__escape_z__()
   BracketedStringTokenBuilder.__escape_z__()
   KeywordCommentTokenBuilder.__escape_z__()
   KeywordComment2TokenBuilder.__escape_z__()
   TextBlockTokenBuilder.__escape_z__()
   return 'Escape ?Z'
Exemplo n.º 20
0
 def __escape_z__():
     InvalidTokenBuilder.__escape_z__()
     WhitespaceTokenBuilder.__escape_z__()
     NewlineTokenBuilder.__escape_z__()
     StuffedQuoteStringTokenBuilder.__escape_z__()
     IntegerTokenBuilder.__escape_z__()
     SuffixedIntegerTokenBuilder.__escape_z__()
     IntegerExponentTokenBuilder.__escape_z__()
     RealTokenBuilder.__escape_z__()
     SuffixedRealTokenBuilder.__escape_z__()
     RealExponentTokenBuilder.__escape_z__()
     CaseInsensitiveListTokenBuilder.__escape_z__()
     CaseSensitiveListTokenBuilder.__escape_z__()
     SingleCharacterTokenBuilder.__escape_z__()
     PrefixedIntegerTokenBuilder.__escape_z__()
     LeadToEndOfLineTokenBuilder.__escape_z__()
     NullTokenBuilder.__escape_z__()
     BasicVariableTokenBuilder.__escape_z__()
     BasicLongVariableTokenBuilder.__escape_z__()
     RemarkTokenBuilder.__escape_z__()
     UserFunctionTokenBuilder.__escape_z__()
     LongUserFunctionTokenBuilder.__escape_z__()
     HardwareFunctionTokenBuilder.__escape_z__()
     return 'Escape ?Z'
Exemplo n.º 21
0
    def __init__(self):
        super().__init__()
        self.newlines_important = 'always'

        self.whitespace_tb = WhitespaceTokenBuilder()
        self.newline_tb = NewlineTokenBuilder()

        self.integer_tb = IntegerTokenBuilder(None)
        self.integer_exponent_tb = IntegerExponentTokenBuilder(None)
        self.real_tb = RealTokenBuilder(False, False, None)
        self.real_exponent_tb = RealExponentTokenBuilder(
            False, False, 'E', None)
        self.double_exponent_tb = RealExponentTokenBuilder(
            False, False, 'D', None)

        self.jcl_tb = JCLTokenBuilder()

        self.invalid_token_builder = InvalidTokenBuilder()
Exemplo n.º 22
0
  def __init__(self, code, version):
    super().__init__()
    ctrlz_char = ''
    code = self.TrimCtrlZText(code, ctrlz_char)

    self.newlines_important = 'always'

    operand_types = []

    whitespace_tb = WhitespaceTokenBuilder()
    newline_tb = NewlineTokenBuilder()

    integer_tb = IntegerTokenBuilder("'")
    integer_exponent_tb = IntegerExponentTokenBuilder("'")
    real_tb = RealTokenBuilder(False, False, "'")
    real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', "'")
    operand_types.append('number')

    leads = '_'
    extras = '_'

    if version == 'ii':
      extras = ":_'-"

    identifier_tb = IdentifierTokenBuilder(leads, extras)
    wild_card_identifier_tb = WildCardIdentifierTokenBuilder('*?', '*?:')
    operand_types.append('identifier')

    quotes = ['"', "'", "’"]
    string_tb = EscapedStringTokenBuilder(quotes, 0)
    bracket_string_tb = BracketedStringTokenBuilder()
    text_string_tb = TextBlockTokenBuilder('TEXT', 'ENDTEXT')
    operand_types.append('string')

    line_continuation_tb = SingleCharacterTokenBuilder(';', 'line continuation', False)

    comment_tb = AsteriskCommentTokenBuilder()
    line_comment_tb = LeadToEndOfLineTokenBuilder('&&', True, 'comment')

    if version == 'ii':
      known_operators = [
        '+', '-', '*', '/', '**', '^',
        '=', '<>', '#', '>', '>=', '<', '<=',
        '$',
        '.NOT.', '.AND.', '.OR.',
        '&', '$', '#', '!'
      ]

    if version == 'iii':
      known_operators = [
        '+', '-', '*', '/', '**', '^',
        '=', '<>', '#', '>', '>=', '<', '<=',
        '$',
        '.NOT.', '.AND.', '.OR.',
        '&', '$', '#', '!'
      ]

    known_operator_tb = CaseInsensitiveListTokenBuilder(known_operators, 'operator', False)

    if version == 'ii':
      self.unary_operators = [
        '+', '-',
        '.NOT.',
        '&', '$', '#', '!'
      ]

    if version == 'iii':
      self.unary_operators = [
        '+', '-',
        '.NOT.',
        '&'
      ]

    self.postfix_operators = []

    special_chars = []

    if version == 'ii':
      special_chars = ['*', '#']

    previous = ['if', 'case', 'while', 'store', '(', '.and.', '.or', '.not.']
    special_function_tb = DbaseSpecialFunctionTokenBuilder(special_chars, previous)

    groupers = ['(', ')', ',']
    group_starts = ['(', ',']
    group_mids = [',']
    group_ends = [')']

    groupers_tb = CaseSensitiveListTokenBuilder(groupers, 'group', False)

    if version == 'ii':
      keywords = [
        'ACCEPT', 'ACCE', 'APPEND', 'APPE',
        'CASE', 'CLEAR', 'CLEA', 'COPY', 'COUNT', 'CREATE', 'CREA',
        'DELETE', 'DELE', 'DISPLAY', 'DISP', 'DO',
        'EJECT', 'EJEC', 'ELSE', 'ENDCASE', 'ENDC', 'ENDDO', 'ENDD',
        'ENDIF', 'ENDI', 'ENDWHILE', 'ENDW', 'ERASE', 'ERAS',
        'FIND', 'FOR', 'FORMAT', 'FORM',
        'GET', 'GO', 'GOTO',
        'IF', 'INDEX',
        'LIKE', 'LOCATE', 'LOCA', 'LOOP',
        'OTHERWISE', 'OTHE',
        'PACK', 'PICTURE', 'PICT',
        'READ', 'RECALL', 'RECA', 'RELEASE', 'RELE', 'REPLACE', 'REPL',
        'REPORT', 'REPO', 'RESTORE', 'REST', 'RETURN', 'RETU',
        'SAVE', 'SAY', 'SELECT', 'SELE', 'SET', 'SKIP', 'SORT',
        'STORE', 'STOR', 'SUM',
        'TO',
        'USE', 'USING', 'USIN',
        'WAIT', 'WHILE', 'WHIL', 'WITH',
        '@', '?', '??',
        'ALTERNATE',
        'BELL',
        'COLON', 'COLOR', 'CONSOLE', 'CONS',
        'DELIMITERS',
        'INTENSITY',
        'PRINT',
        'TALK'
      ]

    if version == 'iii':
      keywords = [
        'ACCEPT', 'APPEND', 'ASSIST', 'AVERAGE',
        'BROWSE',
        'CALL', 'CANCEL', 'CASE', 'CHANGE', 'CLEAR', 'CLOSE', 'CONTINUE',
        'COPY', 'COUNT', 'CREATE',
        'DELETE', 'DIR', 'DISPLAY', 'DISP', 'DO',
        'EDIT', 'ELSE', 'ENDCASE', 'ENDDO', 'ENDIF', 'ENDWHILE', 'ERASE', 'EXIT',
        'EXPORT',
        'FIND', 'FOR', 'FROM',
        'GET', 'GO', 'GOTO',
        'HELP',
        'IF', 'IMPORT', 'INDEX', 'INPUT', 'INSERT',
        'JOIN',
        'LABEL', 'LIKE', 'LIST', 'LOAD', 'LOCATE', 'LOOP',
        'MODIFY',
        'OTHERWISE', 'OTHE',
        'PACK', 'PARAMETERS', 'PICTURE', 'PRIVATE', 'PROCEDURE', 'PUBLIC',
        'QUIT',
        'READ', 'RECALL', 'RELEASE', 'REPLACE', 'REPORT', 'RESTORE',
        'RESUME', 'RETURN', 'RETRY', 'RUN',
        'SAVE', 'SAY', 'SELECT', 'SELE', 'SEEK', 'SET', 'SKIP', 'SORT',
        'STORE', 'SUM', 'SUSPEND',
        'TO', 'TOTAL', 'TYPE',
        'UPDATE', 'USE',
        'WHILE', 'WITH',
        'ZAP',
        '@', '?', '??',
        'ALTERNATE',
        'BELL',
        'CARRY', 'CATALOG', 'CENTURY', 'COLOR', 'CONFIRM', 'CONSOLE',
        'DATE', 'AMERICAN', 'ANSI', 'BRITISH', 'ITALIAN', 'FRENCH', 'GERMAN',
        'DATABASES', 'DEBUG', 'DECIMALS', 'DEFAULT', 'DELETED',
        'DELIMITERS', 'DEVICE', 'DOHISTORY',
        'ECHO', 'ESCAPE', 'EXACT',
        'FIELDS', 'FILTER', 'FIXED', 'FORMAT', 'FUNCTION',
        'HEADING', 'HELP', 'HISTORY',
        'INTENSITY',
        'MARGIN', 'MEMO', 'WIDTH', 'MENUS', 'MESSAGE',
        'ODOMETER', 'ORDER',
        'PATH', 'PRINTER',
        'RELATION',
        'SAFETY', 'STATUS', 'STEP',
        'TALK', 'TITLE', 'TYPEAHEAD',
        'UNIQUE', 'VIEW', 'STRUCTURE', 'MEMORY', 'LABEL', 'QUERY', 'REPORT',
        'GETS', 'LOCK', 'FREEZE', 'NOFOLLOW', 'NOMENU'
      ]

    keyword_tb = CaseInsensitiveListTokenBuilder(keywords, 'keyword', False)

    keyword_comments = []
    if version == 'ii':
      keyword_comments = [
        'ELSE', 'ENDCASE', 'ENDC', 'ENDDO', 'ENDD', 'ENDIF', 'ENDI', 'ENDWHILE', 'ENDW',
        'NOTE', 'REMARK', 'REMA'
      ]

    keyword_comment_tb = KeywordCommentTokenBuilder(keyword_comments, False)

    keyword_comment2_tb = KeywordComment2TokenBuilder(['DO', 'CASE'], False)

    if version == 'ii':
      values = [
        'ALL', 'BLANK', 'BLAN', 'BOTTOM', 'BOTT', 'EOF', 'OFF', 'ON', 'TOP',
        'PRIMARY', 'PRIM', 'SECONDARY', 'SECO',
        '.T.', '.F.'
      ]

    if version == 'iii':
      values = [
        'ALL', 'BLANK', 'BLAN', 'BOTTOM', 'BOTT', 'EOF', 'OFF', 'ON', 'TOP',
        '.T.', '.F.'
      ]

    values_tb = CaseInsensitiveListTokenBuilder(values, 'value', True)
    operand_types.append('value')

    if version == 'ii':
      functions = [
        'ALLTRIM',
        'CHR', 'CTOD',
        'DATE', 'DATETIME', 'DAY', 'DELETED', 'DESCEND', 'DESC', 'DTOC', 'DTOS',
        'IIF',
        'LEFT', 'LTRIM',
        'MONTH',
        'PAGENO',
        'RECCOUNT', 'RECNO', 'RIGHT',
        'STOD', 'STR', 'SUBSTR',
        'TIME', 'TRIM',
        'UPPER',
        'VAL',
        'YEAR'
      ]

    if version == 'iii':
      functions = [
        'ABS', 'ASC', 'AT',
        'BOF',
        'CDOW', 'CHR', 'CMONTH', 'COL', 'CTOD',
        'DATE', 'DAY', 'DBF', 'DELETED', 'DISKSPACE', 'DOW', 'DTOC',
        'EOF', 'ERROR', 'EXP',
        'FILE', 'FKMAX', 'FKLABEL', 'FIELD', 'FOUND',
        'GETENV',
        'IIF', 'INKEY', 'INT', 'ISALPHA', 'ISCOLOR', 'ISLOWER', 'ISUPPER',
        'LEFT', 'LEN', 'LOG', 'LOWER', 'LTRIM', 'LUPDATE',
        'MAX', 'MESSAGE', 'MIN', 'MOD', 'MONTH',
        'NDX',
        'OS',
        'PCOL', 'PROW',
        'READKEY', 'RECCOUNT', 'RECNO', 'RECSIZE', 'REPLICATE', 'RIGHT',
        'RTRIM', 'ROUND', 'ROW',
        'TIME', 'TYPE',
        'SPACE', 'STUFF', 'SQRT', 'STR', 'SUBSTR',
        'TRANSFORM', 'TRIM',
        'UPPER',
        'VAL', 'VERSION',
        'YEAR'
      ]

    function_tb = CaseInsensitiveListTokenBuilder(functions, 'function', True)
    operand_types.append('function')

    filename_tb = DbaseFilenameTokenBuilder()

    invalid_token_builder = InvalidTokenBuilder()

    tokenbuilders1 = [
      newline_tb,
      whitespace_tb,
      line_continuation_tb,
      integer_tb,
      integer_exponent_tb,
      real_tb,
      real_exponent_tb,
      keyword_tb,
      keyword_comment_tb,
      keyword_comment2_tb,
      values_tb,
      groupers_tb,
      special_function_tb,
      comment_tb,         # before operators, to catch single asterisk on line
      known_operator_tb,
      function_tb,
      filename_tb,        # before identifier
      identifier_tb,
      string_tb,
      text_string_tb
    ]

    tokenbuilders_ii = [
      bracket_string_tb
    ]

    tokenbuilders_iii = [
      line_comment_tb
    ]

    tokenbuilders2 = [
      wild_card_identifier_tb,
      self.unknown_operator_tb,
      invalid_token_builder
    ]

    tokenbuilders = tokenbuilders1

    if version == 'ii':
      tokenbuilders += tokenbuilders_ii

    if version == 'iii':
      tokenbuilders += tokenbuilders_iii

    tokenbuilders += tokenbuilders2

    tokenizer = Tokenizer(tokenbuilders)
    tokens = tokenizer.tokenize(code)
    tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid operator')
    self.tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')

    self.calc_statistics()

    tokens = self.source_tokens()

    self.calc_token_confidence()
    self.calc_token_2_confidence()

    num_operators = self.count_my_tokens(['operator', 'invalid operator'])
    if num_operators > 0:
      self.calc_operator_confidence(num_operators)
      allow_pairs = []
      self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
      self.calc_operator_3_confidence(tokens, num_operators, group_ends, allow_pairs)
      self.calc_operator_4_confidence(tokens, num_operators, group_starts, allow_pairs)

    self.calc_group_confidence(tokens, group_mids)

    operand_types_2 = ['number', 'number', 'function', 'value', 'string', 'filename']
    self.calc_operand_n_confidence(tokens, operand_types_2, 2)
    self.calc_operand_n_confidence(tokens, operand_types, 4)

    self.calc_keyword_confidence()

    if version == 'ii':
      self.calc_line_format_confidence_ii()
    else:
      self.calc_line_format_confidence()

    self.calc_line_length_confidence(code, self.max_expected_line)
Exemplo n.º 23
0
    def __init__(self, code):
        super().__init__()

        self.newlines_important = 'parens'

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()
        stmt_separator_tb = SingleCharacterTokenBuilder(
            ';', 'statement separator', False)

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        real_tb = RealTokenBuilder(True, True, '_')
        real_exponent_tb = RealExponentTokenBuilder(True, True, 'E', '_')
        operand_types.append('number')

        identifier_tb = RubyIdentifierTokenBuilder()
        operand_types.append('identifier')

        symbol_tb = PrefixedIdentifierTokenBuilder(':', 'symbol', True)
        operand_types.append('symbol')

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 10)
        operand_types.append('string')

        regex_tb = RegexTokenBuilder()
        operand_types.append('regex')

        heredoc_tb = HereDocTokenBuilder('<<-')

        hash_comment_tb = LeadToEndOfLineTokenBuilder('#', False, 'comment')

        known_operators = [
            '!', '~', '**', '*', '/', '%', '+', '-', '<<', '>>', '&', '|', '^',
            '<', '<=', '>', '>=', '==', '===', '!=', '=~', '!~', '<=>', '&&',
            '||', '..', '...', '?', ':', '=', '**=', '*=', '/=', '%=', '+=',
            '-=', '<<=', '>>=', '&&=', '&=', '||=', '|=', '^=', 'not', 'and',
            'or', 'in', '.', '.:', '=>', '::', '<<-'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = ['+', '-', '!', '~', '&', '*', '**', '<<-']

        self.postfix_operators = ['++', '--']

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        keywords = [
            'BEGIN', 'END', 'alias', 'begin', 'break', 'case', 'class', 'def',
            'defined?', 'do', 'else', 'elsif', 'end', 'ensure', 'for', 'if',
            'module', 'next', 'redo', 'rescue', 'retry', 'return', 'then',
            'undef', 'unless', 'until', 'when', 'while', 'yield'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        values = ['nil', 'self', 'true', 'false', 'super']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        array_markers = ['%w', '%q', '%Q', '%i', '%s', '%x']

        array_marker_tb = CaseSensitiveListTokenBuilder(
            array_markers, 'identifier', True)

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, stmt_separator_tb, integer_tb,
            integer_exponent_tb, real_tb, real_exponent_tb, keyword_tb,
            values_tb, symbol_tb, known_operator_tb, groupers_tb, regex_tb,
            identifier_tb, array_marker_tb, string_tb, heredoc_tb,
            hash_comment_tb, self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')

        self.convert_bars_to_groups()
        self.convert_keywords_to_identifiers(['.'])
        self.convert_operators_to_identifiers()

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_parens_continued_lines(tokens)
        tokens = Examiner.join_operator_continued_lines(
            tokens, self.postfix_operators)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        # self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        openers = ['begin', 'def', 'do', 'class', 'module']
        closers = ['end']
        self.calc_paired_blockers_confidence(openers, closers)

        self.calc_line_length_confidence(code, self.max_expected_line)
Exemplo n.º 24
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder(None)
        integer_exponent_tb = IntegerExponentTokenBuilder(None)
        real_tb = RealTokenBuilder(False, False, None)
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', None)
        hex_constant_tb = PrefixedIntegerTokenBuilder(
            '0H', False, '0123456789ABCDEFabcdef')
        octal_constant_tb = PrefixedIntegerTokenBuilder(
            '0O', False, '01234567')
        binary_constant_tb = PrefixedIntegerTokenBuilder('0B', False, '01')
        operand_types.append('number')

        leads = '_$'
        extras = '_$'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        operand_types.append('string')

        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()

        terminators_tb = CaseInsensitiveListTokenBuilder(
            [';'], 'statement terminator', False)

        known_operators = [
            '+', '-', '*', '/', '%', '=', '==', '!=', '===', '!==', '>', '>=',
            '<', '<=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=',
            '>>=', '!', '&', '|', '~', '<<', '>>', '=>', '^', '.', ':', '++',
            '--', '&&', '||', '?', '$', '?.', 'new', 'delete'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = [
            '+', '-', '!', '~', '++', '--', ':', '$', 'new', 'delete'
        ]

        self.postfix_operators = ['++', '--', ':']

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseSensitiveListTokenBuilder(groupers, 'group', False)

        regex_tb = RegexTokenBuilder()

        keywords = [
            'break', 'case', 'catch', 'class', 'const', 'continue', 'debugger',
            'default', 'do', 'else', 'enum', 'export', 'extends', 'finally',
            'for', 'function', 'if', 'import', 'in', 'instanceof', 'return',
            'switch', 'throw', 'try', 'typeof', 'while', 'with', 'as',
            'implements', 'interface', 'let', 'package', 'private',
            'protected', 'public', 'static', 'yield', 'constructor', 'declare',
            'get', 'module', 'require', 'set', 'type', 'from', 'of'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = [
            'any', 'boolean', 'byte', 'char', 'number', 'string', 'symbol',
            'void', 'never', 'object'
        ]

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['this', 'super', 'null', 'true', 'false', 'undefined']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, terminators_tb, integer_tb,
            integer_exponent_tb, real_tb, real_exponent_tb, hex_constant_tb,
            octal_constant_tb, binary_constant_tb, keyword_tb, types_tb,
            values_tb, known_operator_tb, groupers_tb, regex_tb, identifier_tb,
            string_tb, slash_slash_comment_tb, slash_star_comment_tb,
            self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')
        self.convert_keywords_to_identifiers(['.'])

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Exemplo n.º 25
0
    def __init__(self, code, tab_size, wide):
        super().__init__()

        self.operand_types = []

        self.whitespace_tb = WhitespaceTokenBuilder()
        self.newline_tb = NewlineTokenBuilder()

        self.integer_tb = IntegerTokenBuilder(None)
        self.integer_exponent_tb = IntegerExponentTokenBuilder(None)
        self.binary_integer_tb = SuffixedIntegerTokenBuilder(['B'], False,
                                                             None)
        self.real_tb = RealTokenBuilder(False, False, None)
        self.real_exponent_tb = RealExponentTokenBuilder(
            False, False, 'E', None)
        self.binary_real_tb = SuffixedRealTokenBuilder(True, True, ['B'],
                                                       False, None)
        self.operand_types.append('number')

        leads = '_'
        extras = '_'
        self.identifier_tb = IdentifierTokenBuilder(leads, extras)
        self.operand_types.append('identifier')

        quotes = ['"', "'", "’"]
        self.string_tb = EscapedStringTokenBuilder(quotes, 0)
        self.operand_types.append('string')

        self.label_tb = PL1LabelTokenBuilder()
        self.operand_types.append('label')

        self.slash_star_comment_tb = SlashStarCommentTokenBuilder()

        self.jcl_tb = JCLTokenBuilder()

        directives = [
            '%ACTIVATE', '%DEACTIVATE', '%DECLARE', '%DCL', '%DICTIONARY',
            '%DO', '%ELSE', '%END', '%FATAL', '%GOTO', '%IF', '%INCLUDE',
            '%LIST', '%NOLIST', '%PAGE', '%PROCEDURE', '%PROC', '%REPLACE',
            '%RETURN', '%THEN'
        ]

        self.line_continuation_tb = SingleCharacterTokenBuilder(
            '\\', 'line continuation', False)
        self.preprocessor_tb = CaseInsensitiveListTokenBuilder(
            directives, 'preprocessor', False)
        self.title_tb = LeadToEndOfLineTokenBuilder('%TITLE', True,
                                                    'preprocessor')
        self.subtitle_tb = LeadToEndOfLineTokenBuilder('%SBTTL', True,
                                                       'preprocessor')
        self.error_tb = LeadToEndOfLineTokenBuilder('%ERROR', True,
                                                    'preprocessor')
        self.warn_tb = LeadToEndOfLineTokenBuilder('%WARN', True,
                                                   'preprocessor')
        self.inform_tb = LeadToEndOfLineTokenBuilder('%INFORM', True,
                                                     'preprocessor')
        self.terminators_tb = SingleCharacterTokenBuilder(
            ';', 'statement terminator', False)

        known_operators = [
            '+', '-', '*', '/', '**', '>', '<', '=', '>=', '<=', '¬>', '¬<',
            '¬=', '^>', '^<', '^=', '^', '~>', '~<', '~=', '~', '¬', '&', '&:',
            '|', '|:', '||', '!', '!:', '!!', ':'
        ]

        self.unary_operators = ['+', '-', '^', '~', '¬']

        self.postfix_operators = []

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        self.group_starts = ['(', '[', ',', '{']
        self.group_mids = [',']
        self.group_ends = [')', ']', '}']

        self.groupers_tb = CaseInsensitiveListTokenBuilder(
            groupers, 'group', False)

        self.known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        keywords = [
            'ALLOCATE', 'ALLOC', 'BEGIN', 'CALL', 'CLOSE', 'DECLARE', 'DCL',
            'DO', 'ELSE', 'END', 'FORMAT', 'FREE', 'GET', 'GOTO', 'GO TO',
            'IF', 'LEAVE', 'ON', 'OPEN', 'OTHERWISE', 'OTHER', 'PROCEDURE',
            'PROC', 'PUT', 'READ', 'RETURN', 'REVERT', 'REWRITE', 'SELECT',
            'SIGNAL', 'STOP', 'THEN', 'WHEN', 'WRITE'
        ]

        self.keyword_tb = CaseInsensitiveListTokenBuilder(
            keywords, 'keyword', False)

        attributes = [
            'ALIGNED', 'ANY', 'AREA', 'BASED', 'BUILTIN', 'CONDITION', 'COND',
            'CONTROLLED', 'CTL', 'DEFINED', 'DEF', 'DIRECT', 'ENTRY',
            'ENVIRONMENT', 'ENV', 'EXTERNAL', 'EXT', 'FILE', 'GLOBALDEF',
            'GLOBALREF', 'INITIAL', 'INIT', 'INPUT', 'INTERNAL', 'INT'
            'KEYED', 'LABEL', 'LIKE', 'LIST', 'MEMBER', 'NONVARYING', 'NONVAR',
            'OPTIONAL', 'OPTIONS', 'OUTPUT', 'PARAMETER', 'PARM', 'PICTURE',
            'PIC', 'POSITION', 'POS', 'PRECISION', 'PREC', 'PRINT', 'READONLY',
            'RECORD', 'REFER', 'RETURNS', 'SEQUENTIAL', 'SEQL', 'STATIC',
            'STREAM', 'STRUCTURE', 'TRUNCATE', 'UNALIGNED', 'UNAL', 'UNION',
            'UPDATE', 'VARIABLE', 'VARYING', 'VAR'
        ]

        self.attributes_tb = CaseInsensitiveListTokenBuilder(
            attributes, 'attribute', False)

        functions = [
            'ABS', 'ACOS', 'ACTUALCOUNT', 'ADD', 'ADDR', 'ADDREL',
            'ALLOCATION', 'ALLOCN', 'ASIN', 'ATAN', 'ATAND', 'ATANH',
            'AUTOMATIC', 'AUTO', 'BINARY', 'BIN', 'BIT', 'BOOL', 'BYTE',
            'BYTESIZE', 'CEIL', 'CHARACTER', 'CHAR', 'COLLATE', 'COPY', 'COS',
            'COSD', 'COSH', 'DATE', 'DATETIME', 'DECIMAL', 'DEC', 'DECODE',
            'DESCRIPTOR', 'DESC', 'DIMENSION', 'DIM', 'DIVIDE', 'EMPTY',
            'ENCODE', 'ERROR', 'EVERY', 'EXP', 'FIXED', 'FLOAT', 'FLOOR',
            'HBOUND', 'HIGH', 'INDEX', 'INFORM', 'INT', 'LBOUND', 'LENGTH',
            'LINE', 'LINENO', 'LOG', 'LOG10', 'LOG2', 'LOW', 'LTRIM', 'MAX',
            'MAXLENGTH', 'MIN', 'MOD', 'MULTIPLY', 'NULL', 'OFFSET',
            'ONARGSLIST', 'ONCHAR', 'ONCODE', 'ONFILE', 'ONKEY', 'ONSOURCE',
            'PAGENO', 'POINTER', 'PTR', 'POSINT', 'PRESENT', 'PROD', 'RANK',
            'REFERENCE', 'REVERSE', 'ROUND', 'RTRIM', 'SEARCH', 'SIGN', 'SIN',
            'SIND', 'SINH', 'SIZE', 'SOME', 'SQRT', 'STRING', 'SUBSTR',
            'SUBTRACT', 'SUM', 'TAN', 'TAND', 'TANH', 'TIME', 'TRANSLATE',
            'TRIM', 'TRUNC', 'UNSPEC', 'VALID', 'VALUE', 'VAL', 'VARIANT',
            'VERIFY', 'WARN'
        ]

        self.function_tb = CaseInsensitiveListTokenBuilder(
            functions, 'function', True)

        format_items = [
            'A', 'B', 'B1', 'B2', 'B3', 'B4', 'COLUMN', 'COL', 'E', 'F', 'P',
            'R', 'TAB', 'X'
        ]

        self.format_item_tb = CaseSensitiveListTokenBuilder(
            format_items, 'format', True)
        self.operand_types.append('format')

        options = [
            'APPEND', 'BACKUP_DATE', 'BATCH', 'BLOCK_BOUNDARY_FORMAT',
            'BLOCK_IO', 'BLOCK_SIZE', 'BUCKET_SIZE', 'BY', 'CANCEL_CONTROL_O',
            'CARRIAGE_RETURN_FORMAT', 'CONTIGUOUS', 'CONTIGUOUS_BEST_TRY',
            'CREATION_DATE', 'CURRENT_POSITION', 'DEFAULT_FILE_NAME',
            'DEFERRED_WRITE', 'DELETE', 'EDIT', 'EXPIRATION_DATE',
            'EXTENSION_SIZE', 'FAST_DELETE', 'FILE_ID', 'FILE_ID_TO',
            'FILE_SIZE', 'FIXED_CONTROL_FROM', 'FIXED_CONTROL_SIZE',
            'FIXED_CONTROL_SIZE_TO', 'FIXED_CONTROL_TO',
            'FIXED_LENGTH_RECORDS', 'FROM', 'GROUP_PROTECTION', 'IDENT',
            'IGNORE_LINE_MARKS', 'IN', 'INDEXED', 'INDEX_NUMBER',
            'INITIAL_FILL', 'INTO', 'KEY', 'KEYFROM', 'KEYTO', 'LINESIZE',
            'LOCK_ON_READ', 'LOCK_ON_WRITE', 'MAIN PROCEDURE',
            'MANUAL_UNLOCKING', 'MATCH_GREATER', 'MATCH_GREATER_EQUAL',
            'MATCH_NEXT', 'MATCH_NEXT_EQUAL', 'MAXIMUM_RECORD_NUMBER',
            'MAXIMUM_RECORD_SIZE', 'MULTIBLOCK_COUNT', 'MULTIBUFFER_COUNT',
            'NOLOCK', 'NONEXISTENT_RECORD', 'NONRECURSIVE', 'NORESCAN',
            'NO_ECHO', 'NO_FILTER', 'NO_SHARE', 'OWNER_GROUP', 'OWNER_ID',
            'OWNER_MEMBER', 'OWNER_PROTECTION', 'PAGE', 'PAGESIZE',
            'PRINTER_FORMAT', 'PROMPT', 'PURGE_TYPE_AHEAD', 'READ_AHEAD',
            'READ_CHECK', 'READ_REGARDLESS', 'RECORD_ID', 'RECORD_ID_ACCESS',
            'RECORD_ID_TO', 'RECURSIVE', 'REPEAT', 'RESCAN',
            'RETRIEVAL_POINTERS', 'REVISION_DATE', 'REWIND_ON_CLOSE',
            'REWIND_ON_OPEN', 'SCALARVARYING', 'SET READ', 'SHARED_READ',
            'SHARED_WRITE', 'SKIP', 'SNAP', 'SPOOL', 'STATEMENT', 'SUPERSEDE',
            'SYSTEM', 'SYSTEM_PROTECTION', 'TEMPORARY', 'TIMEOUT_PERIOD',
            'TITLE', 'TO', 'UNDERFLOW', 'UFL', 'UNTIL', 'USER_OPEN',
            'WAIT_FOR_RECORD', 'WHILE', 'WORLD_PROTECTION', 'WRITE_BEHIND',
            'WRITE_CHECK'
        ]

        self.options_tb = CaseInsensitiveListTokenBuilder(
            options, 'option', False)

        conditions = [
            'ANYCONDITION', 'CONVERSION', 'CONV', 'ENDFILE', 'ENDPAGE',
            'FINISH', 'FIXEDOVERFLOW', 'FOFL', 'OVERFLOW', 'OFL', 'STORAGE',
            'STRINGRANGE', 'STRG', 'SUBSCRIPTRANGE', 'SUBRG', 'UNDEFINEDFILE',
            'UNDF', 'VAXCONDITION', 'ZERODIVIDE', 'ZDIV'
        ]

        self.conditions_tb = CaseInsensitiveListTokenBuilder(
            conditions, 'condition', False)

        subroutines = [
            'DISPLAY', 'EXTEND', 'FLUSH', 'NEXT_VOLUME', 'RELEASE', 'RESIGNAL',
            'REWIND', 'SPACEBLOCK'
        ]

        self.subroutines_tb = CaseInsensitiveListTokenBuilder(
            subroutines, 'subroutine', False)

        types = [
            'FIXED', 'BINARY', 'FLOAT', 'DECIMAL', 'BIT', 'CHARACTER',
            'PICTURE'
        ]

        self.types_tb = CaseInsensitiveListTokenBuilder(types, 'type', True)
        self.operand_types.append('type')

        values = ['SYSIN', 'SYSPRINT']

        self.values_tb = CaseInsensitiveListTokenBuilder(values, 'value', True)
        self.operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        # tokenize as free-format
        tokenbuilders_free = [
            self.newline_tb, self.whitespace_tb, self.line_continuation_tb,
            self.terminators_tb, self.integer_tb, self.integer_exponent_tb,
            self.binary_integer_tb, self.real_tb, self.real_exponent_tb,
            self.binary_real_tb, self.keyword_tb, self.function_tb,
            self.attributes_tb, self.options_tb, self.conditions_tb,
            self.subroutines_tb, self.types_tb, self.values_tb,
            self.groupers_tb, self.known_operator_tb, self.identifier_tb,
            self.string_tb, self.label_tb, self.slash_star_comment_tb,
            self.preprocessor_tb, self.title_tb, self.subtitle_tb,
            self.error_tb, self.warn_tb, self.inform_tb, self.jcl_tb,
            self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer_free = Tokenizer(tokenbuilders_free)
        tokens_free = tokenizer_free.tokenize(code)
        tokens_free = Examiner.combine_adjacent_identical_tokens(
            tokens_free, 'invalid operator')
        tokens_free = Examiner.combine_adjacent_identical_tokens(
            tokens_free, 'invalid')
        self.tokens = tokens_free

        self.calc_statistics()
        statistics_free = self.statistics
        self.statistics = {}

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators,
                                            self.group_ends, allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            self.group_starts, allow_pairs)

        self.calc_group_confidence(tokens, self.group_mids)

        operand_types_2 = ['number', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, self.operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
        confidences_free = self.confidences
        self.confidences = {}
        errors_free = self.errors
        self.errors = []

        # tokenize as fixed-format
        tokenbuilders_fixed = [
            self.newline_tb, self.whitespace_tb, self.line_continuation_tb,
            self.terminators_tb, self.integer_tb, self.integer_exponent_tb,
            self.binary_integer_tb, self.real_tb, self.real_exponent_tb,
            self.binary_real_tb, self.keyword_tb, self.function_tb,
            self.attributes_tb, self.options_tb, self.conditions_tb,
            self.subroutines_tb, self.types_tb, self.values_tb,
            self.groupers_tb, self.known_operator_tb, self.identifier_tb,
            self.string_tb, self.label_tb, self.slash_star_comment_tb,
            self.preprocessor_tb, self.title_tb, self.subtitle_tb,
            self.error_tb, self.warn_tb, self.inform_tb, self.jcl_tb,
            self.unknown_operator_tb, invalid_token_builder
        ]

        comment_start_tb = PL1CommentStartTokenBuilder()
        comment_middle_tb = PL1CommentMiddleTokenBuilder()
        comment_end_tb = PL1CommentEndTokenBuilder()

        type1_tokenbuilders = [comment_start_tb]
        tokenbuilders_fixed_1 = tokenbuilders_fixed + type1_tokenbuilders + [
            invalid_token_builder
        ]
        tokenizer_fixed_1 = Tokenizer(tokenbuilders_fixed_1)

        type2_tokenbuilders = [
            comment_start_tb, comment_middle_tb, comment_end_tb
        ]
        tokenbuilders_fixed_2 = tokenbuilders_fixed + type2_tokenbuilders + [
            invalid_token_builder
        ]
        tokenizer_fixed_2 = Tokenizer(tokenbuilders_fixed_2)

        tokens_fixed = self.tokenize_code(code, tab_size, tokenizer_fixed_1,
                                          tokenizer_fixed_2, wide)
        tokens_fixed = Examiner.combine_adjacent_identical_tokens(
            tokens_fixed, 'invalid operator')
        tokens_fixed = Examiner.combine_adjacent_identical_tokens(
            tokens_fixed, 'invalid')
        tokens_fixed = Examiner.combine_adjacent_identical_tokens(
            tokens_fixed, 'whitespace')
        tokens_fixed = self.convert_broken_comments_to_comments(tokens_fixed)
        self.tokens = tokens_fixed

        self.calc_statistics()
        statistics_fixed = self.statistics
        self.statistics = {}

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators,
                                            self.group_ends, allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            self.group_starts, allow_pairs)

        self.calc_group_confidence(tokens, self.group_mids)

        operand_types_2 = ['number', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, self.operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
        confidences_fixed = self.confidences
        self.confidences = {}
        errors_fixed = self.errors
        self.errors = []

        # compute confidence for free-format and fixed-format
        confidence_free = 1.0
        if len(confidences_free) == 0:
            confidence_free = 0.0
        else:
            for key in confidences_free:
                factor = confidences_free[key]
                confidence_free *= factor

        confidence_fixed = 1.0
        if len(confidences_fixed) == 0:
            confidence_fixed = 0.0
        else:
            for key in confidences_fixed:
                factor = confidences_fixed[key]
                confidence_fixed *= factor

        # select the better of free-format and spaced-format
        if confidence_fixed > confidence_free:
            self.tokens = tokens_fixed
            self.statistics = statistics_fixed
            self.confidences = confidences_fixed
            self.errors = errors_fixed
        else:
            self.tokens = tokens_free
            self.statistics = statistics_free
            self.confidences = confidences_free
            self.errors = errors_free
Exemplo n.º 26
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()
        stmt_separator_tb = SingleCharacterTokenBuilder(
            ';', 'statement separator', False)

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        real_tb = RealTokenBuilder(True, True, '_')
        real_exponent_tb = RealExponentTokenBuilder(True, True, 'E', '_')
        operand_types.append('number')

        argument_tb = SwiftArgumentTokenBuilder()

        leads = '_'
        extras = '_'
        suffixes = '?'
        identifier_tb = SuffixedIdentifierTokenBuilder(leads, extras, suffixes)
        operand_types.append('identifier')

        attribute_tb = PrefixedIdentifierTokenBuilder('@', 'attribute', False)

        symbol_tb = SwiftSymbolTokenBuilder('.', 'symbol', True)
        operand_types.append('symbol')

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 10)
        triple_quote_comment_tb = TripleQuoteStringTokenBuilder(quotes)
        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()
        operand_types.append('string')

        known_operators = [
            '+', '-', '*', '/', '%', '==', '!=', '>', '<', '>=', '<=', '&&',
            '||', '!', '&', '|', '^', '~', '<<', '>>', '===', '=', '+=', '-=',
            '*=', '/=', '%=', '<<=', '>>=', '&=', '^=', '|=', '...', '..<',
            '?', ':', '.', '++', '--', '->', '??', '\\.', '&+', '&-', '&*'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = ['+', '-', '!', '~', '&', '++', '--', ':', '?']

        self.postfix_operators = ['++', '--', ':', '!', '?']

        groupers = ['(', ')', ',', '[', ']', '{', '}']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        keywords = [
            'associatedtype', 'class', 'deinit', 'enum', 'extension',
            'fileprivate', 'func', 'import', 'init', 'inout', 'internal',
            'let', 'open', 'operator', 'private', 'protocol', 'public',
            'static', 'struct', 'subscript', 'typealias', 'var', 'break',
            'case', 'continue', 'default', 'defer', 'do', 'else',
            'fallthrough', 'for', 'guard', 'if', 'in', 'repeat', 'return',
            'switch', 'where', 'while', 'as', 'Any', 'catch', 'is', 'rethrows',
            'super', 'throw', 'throws', 'try', 'try?', 'try!', '#available',
            '#colorLiteral', '#column', '#else', '#elseif', '#endif', '#file',
            '#fileLiteral', '#function', '#if', '#imageLiteral', '#line',
            '#selector', '#sourceLocation', 'associativity', 'convenience',
            'dynamic', 'didSet', 'final', 'get', 'infix', 'indirect', 'lazy',
            'left', 'mutating', 'none', 'nonmutating', 'optional', 'override',
            'postfix', 'precedence', 'prefix', 'Protocol', 'required', 'right',
            'set', 'Type', 'unowned', 'weak', 'willSet'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = [
            'char',
            'double',
            'float',
            'int',
            'long',
            'short',
        ]

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['nil', 'Self', 'false', 'true']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, stmt_separator_tb, integer_tb,
            integer_exponent_tb, real_tb, real_exponent_tb, argument_tb,
            keyword_tb, types_tb, values_tb, known_operator_tb, groupers_tb,
            identifier_tb, attribute_tb, symbol_tb, string_tb,
            slash_slash_comment_tb, slash_star_comment_tb,
            triple_quote_comment_tb, self.unknown_operator_tb,
            invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')
        self.convert_keywords_to_identifiers(['.'])

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()
        self.calc_line_length_confidence(code, self.max_expected_line)
Exemplo n.º 27
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        hex_integer_tb = PrefixedIntegerTokenBuilder('0x', False,
                                                     '0123456789abcdefABCDEF')
        real_tb = RealTokenBuilder(False, False, "'")
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', "'")
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        perl_identfier_tb = PerlIdentifierTokenBuilder()
        operand_types.append('identifier')

        specials = [
            '$_', '@_', '$$', '$"', '$(', '$)', '$>', '$<', '$;', '$]', '$[',
            '$&', '$`', "$'", '$+', '@+', '%+', '@-', '%-', '$,', '$.', '$/',
            '$\\', '$|', '$%', '$-', '$:', '$=', '$^', '$~', '$!', '$?', '$@',
            '$#', '$*'
        ]

        specials_tb = CaseInsensitiveListTokenBuilder(specials, 'identifier',
                                                      True)

        dollar_carat_tb = PerlDollarCaretIdentifierTokenBuilder()

        sigilbrace_tb = PerlSigilBraceTokenBuilder()

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        operand_types.append('string')

        q_string_tb = PerlQStringTokenBuilder()

        regex_tb = RegexTokenBuilder()
        m_regex_tb = MRegexTokenBuilder()
        s_regex_tb = SRegexTokenBuilder()
        y_regex_tb = YRegexTokenBuilder()
        tr_regex_tb = TrRegexTokenBuilder()
        operand_types.append('regex')

        prototype_tb = PerlPrototypeTokenBuilder()

        comment_tb = LeadToEndOfLineTokenBuilder('#', False, 'comment')

        line_continuation_tb = SingleCharacterTokenBuilder(
            '\\', 'line continuation', False)

        directives = ['#line']

        preprocessor_tb = CaseSensitiveListTokenBuilder(
            directives, 'preprocessor', False)

        terminators_tb = SingleCharacterTokenBuilder(';',
                                                     'statement terminator',
                                                     False)

        known_operators = [
            '+', '-', '*', '**', '/', '%', '=', '==', '!=', '>', '>=', '<',
            '<=', '**=', '+=', '*=', '&=', '&.=', '<<=', '&&=', '-=', '/=',
            '|=', '|.=', '>>=', '||=', '.=', '%=', '^=', '^.=', '//=', 'x=',
            'ne', 'gt', 'ge', 'le', 'lt', 'eq', '!', '&', '|', '~', '<<', '>>',
            '^', '.', '..', '...', '++', '--', '->', '=>', '&&', '||', '?',
            '<->', '<=>', 'and', 'cmp', 'or', 'xor'
        ]

        self.unary_operators = ['+', '-', '*', '!', '&', '~', '++', '--']

        self.postfix_operators = ['++', '--']

        groupers = ['(', ')', ',', '[', ']', '{', '}', ':', '::']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',', ':', '::']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        keywords = [
            'bless', 'break', 'continue', 'die', 'do', 'else', 'elsif', 'eval',
            'exit', 'exp', 'for', 'foreach', 'if', 'last', 'lock', 'my',
            'next', 'no', 'our', 'package', 'redo', 'return', 'say', 'sub',
            'taint', 'undef', 'unless', 'until', 'use', 'wantarray', 'while'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', True)

        values = ['NULL']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, line_continuation_tb, terminators_tb,
            integer_tb, integer_exponent_tb, hex_integer_tb, real_tb,
            real_exponent_tb, keyword_tb, values_tb, groupers_tb,
            known_operator_tb, prototype_tb, identifier_tb, perl_identfier_tb,
            specials_tb, dollar_carat_tb, sigilbrace_tb, string_tb,
            q_string_tb, regex_tb, m_regex_tb, s_regex_tb, y_regex_tb,
            tr_regex_tb, preprocessor_tb, comment_tb, self.unknown_operator_tb,
            invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code, ['__END__'])
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')
        tokens = Examiner.combine_identifier_colon(
            tokens, ['statement terminator', 'newline'], ['{'],
            ['whitespace', 'comment', 'line description'])
        self.tokens = tokens
        self.convert_identifiers_to_labels()

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence(['*', ';'])

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Exemplo n.º 28
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder('_')
        integer_exponent_tb = IntegerExponentTokenBuilder('_')
        real_tb = RealTokenBuilder(False, False, '_')
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', '_')
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        decorator_tb = PrefixedIdentifierTokenBuilder('@', 'decorator', False)
        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        operand_types.append('string')

        class_type_tb = ClassTypeTokenBuilder()
        operand_types.append('class')

        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()

        terminators_tb = SingleCharacterTokenBuilder(';',
                                                     'statement terminator',
                                                     False)

        known_operators = [
            '+', '-', '*', '/', '%', '=', '==', '!=', '>', '>=', '<', '<=',
            '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=', '!',
            '&', '|', '~', '<<', '>>', '>>>', '>>>=', '^', '.', '::', '++',
            '--', '&&', '||', '?', '->', 'new'
        ]

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        self.unary_operators = ['+', '-', '!', '~', '++', '--', 'new']

        self.postfix_operators = ['++', '--']

        groupers = ['(', ')', ',', '[', ']', '{', '}', ':']
        group_starts = ['(', '[', ',', '{']
        group_ends = [')', ']', '}']
        group_mids = [',', ':']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        keywords = [
            'abstract', 'assert', 'break', 'case', 'catch', 'class', 'const',
            'continue', 'default', 'do', 'else', 'enum', 'extends', 'final',
            'finally', 'for', 'goto', 'if', 'implements', 'import',
            'instanceof', 'interface', 'native', 'package', 'private',
            'protected', 'public', 'return', 'static', 'strictfp', 'super',
            'switch', 'synchronized', 'throw', 'throws', 'transient', 'try',
            'volatile', 'while'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = [
            'boolean', 'byte', 'char', 'double', 'float', 'int', 'long',
            'short', 'string', 'void', 'Integer', 'String', 'StringBuilder',
            'File', 'Exception', 'IOException'
        ]

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['false', 'null', 'this', 'true']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, terminators_tb, integer_tb,
            integer_exponent_tb, real_tb, real_exponent_tb, keyword_tb,
            types_tb, values_tb, known_operator_tb, groupers_tb, identifier_tb,
            class_type_tb, decorator_tb, string_tb, slash_slash_comment_tb,
            slash_star_comment_tb, self.unknown_operator_tb,
            invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')
        tokens = Examiner.combine_identifier_colon(
            tokens, ['statement terminator', 'newline'], ['{'],
            ['whitespace', 'comment'])
        self.tokens = tokens
        self.convert_identifiers_to_labels()

        self.convert_keywords_to_identifiers(['::', '.'])
        self.convert_operators_to_identifiers(['::', '.'])

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Exemplo n.º 29
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder("'")
        integer_exponent_tb = IntegerExponentTokenBuilder("'")
        hex_integer_tb = PrefixedIntegerTokenBuilder(
            '0x', False, '0123456789abcdefABCDEF_')
        binary_integer_tb = PrefixedIntegerTokenBuilder('0b', False, '01_')
        octal_integer_tb = PrefixedIntegerTokenBuilder('0c', False,
                                                       '01234567_')
        real_tb = RealTokenBuilder(False, False, "'")
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', "'")
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 0)
        operand_types.append('string')

        comment_tb = LeadToEndOfLineTokenBuilder('--', True, 'comment')

        known_operators = [
            ':=', '=', '/=', '<', '>', '<=', '>=', '+', '-', '*', '/', '//',
            '\\\\', '^', '|..|', '..', 'and', 'or', 'xor', 'not', 'and then',
            'or else', 'implies', '.', '@', '#', '|', '&'
        ]

        self.unary_operators = ['+', '-', 'not', '@', '#', '|', '&']

        self.postfix_operators = []

        groupers = ['(', ')', ',', '[', ']', '{', '}', ':', ';']
        group_starts = ['(', '[', ',', '{']
        group_mids = [',', ';', ':']
        group_ends = [')', ']', '}']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        keywords = [
            'across', 'agent', 'alias', 'all', 'as', 'assign', 'attribute',
            'check', 'class', 'convert', 'create', 'debug', 'deferred', 'do',
            'else', 'elseif', 'end', 'ensure', 'expanded', 'export',
            'external', 'feature', 'from', 'frozen', 'if', 'implies',
            'inherit', 'inspect', 'invariant', 'like', 'local', 'loop', 'note',
            'obsolete', 'old', 'once', 'only', 'redefine', 'rename', 'require',
            'rescue', 'retry', 'select', 'separate', 'then', 'undefine',
            'until', 'variant', 'when'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = ['Current', 'Precursor', 'Result', 'Void', 'TUPLE']

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['False', 'True', '?']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, integer_tb, integer_exponent_tb,
            hex_integer_tb, binary_integer_tb, octal_integer_tb, real_tb,
            real_exponent_tb, keyword_tb, types_tb, values_tb, groupers_tb,
            known_operator_tb, identifier_tb, string_tb, comment_tb,
            self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        self.tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid')

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence()

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)
Exemplo n.º 30
0
    def __init__(self, code):
        super().__init__()

        operand_types = []

        whitespace_tb = WhitespaceTokenBuilder()
        newline_tb = NewlineTokenBuilder()

        integer_tb = IntegerTokenBuilder(None)
        integer_exponent_tb = IntegerExponentTokenBuilder(None)
        real_tb = RealTokenBuilder(False, False, None)
        real_exponent_tb = RealExponentTokenBuilder(False, False, 'E', None)
        operand_types.append('number')

        leads = '_'
        extras = '_'
        identifier_tb = IdentifierTokenBuilder(leads, extras)
        operand_types.append('identifier')

        directive_tb = DirectiveTokenBuilder()

        quotes = ['"', "'", "’"]
        string_tb = EscapedStringTokenBuilder(quotes, 10)
        prefixed_string_tb = PrefixedStringTokenBuilder('@', False, quotes)
        operand_types.append('string')

        class_type_tb = ClassTypeTokenBuilder()
        operand_types.append('class')

        slash_slash_comment_tb = SlashSlashCommentTokenBuilder()
        slash_star_comment_tb = SlashStarCommentTokenBuilder()

        directives = [
            '#define', '#undef', '#ifdef', '#ifndef', '#if', '#endif', '#else',
            '#elif', '#import', '#line', '#include'
        ]

        line_continuation_tb = SingleCharacterTokenBuilder(
            '\\', 'line continuation', False)
        c_preprocessor_tb = CaseSensitiveListTokenBuilder(
            directives, 'preprocessor', False)
        c_warning_tb = LeadToEndOfLineTokenBuilder('#warning', True,
                                                   'preprocessor')
        c_error_tb = LeadToEndOfLineTokenBuilder('#error', True,
                                                 'preprocessor')
        c_pragma_tb = LeadToEndOfLineTokenBuilder('#pragma', True,
                                                  'preprocessor')
        terminators_tb = SingleCharacterTokenBuilder(';',
                                                     'statement terminator',
                                                     False)

        known_operators = [
            '+', '-', '*', '/', '%', '=', '==', '!=', '>', '>=', '<', '<=',
            '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '<<=', '>>=', '!',
            '&', '|', '<<', '>>', '~', '.', '->', '++', '--', '&&', '||', '^',
            '?', '##'
        ]

        self.unary_operators = [
            '+', '-', '*', '!', '&', '^', '~', '++', '--', '##'
        ]

        self.postfix_operators = ['++', '--', '&', '->', '*', '^']

        groupers = ['(', ')', ',', '[', ']', '{', '}', ':']
        group_starts = ['(', '[', ',', '{']
        group_ends = [')', ']', '}']
        group_mids = [',', ':']

        groupers_tb = CaseInsensitiveListTokenBuilder(groupers, 'group', False)

        known_operator_tb = CaseSensitiveListTokenBuilder(
            known_operators, 'operator', False)

        keywords = [
            'atomic', 'break', 'bycopy', 'byref', 'case', 'continue',
            'default', 'do', 'else', 'for', 'goto', 'if', 'IMP', 'in',
            'inline', 'inout', 'nonatomic', 'oneway', 'out', 'Protocol',
            'restrict', 'retain', 'return', 'SEL', 'sizeof', 'switch',
            'typedef', 'while', '@interface', '@end', '@implementation',
            '@protocol', '@class', '@public', '@protected', '@private',
            '@property', '@try', '@throw', '@catch()', '@finally',
            '@synthesize', '@dynamic', '@selector'
        ]

        keyword_tb = CaseSensitiveListTokenBuilder(keywords, 'keyword', False)

        types = [
            'auto', 'char', 'const', 'double', 'enum', 'extern', 'float', 'id',
            'int', 'long', 'register', 'short', 'signed', 'static', 'struct',
            'union', 'unsigned', 'void', 'volatile', '_Bool', '_Complex',
            '_Imaginary', 'BOOL', 'Class'
        ]

        types_tb = CaseSensitiveListTokenBuilder(types, 'type', True)
        operand_types.append('type')

        values = ['self', 'super', 'nil', 'YES', 'NO', 'NULL', '...']

        values_tb = CaseSensitiveListTokenBuilder(values, 'value', True)
        operand_types.append('value')

        invalid_token_builder = InvalidTokenBuilder()

        tokenbuilders = [
            newline_tb, whitespace_tb, line_continuation_tb, terminators_tb,
            integer_tb, integer_exponent_tb, real_tb, real_exponent_tb,
            keyword_tb, types_tb, values_tb, groupers_tb, known_operator_tb,
            directive_tb, identifier_tb, class_type_tb, string_tb,
            prefixed_string_tb, slash_slash_comment_tb, slash_star_comment_tb,
            c_preprocessor_tb, c_warning_tb, c_error_tb, c_pragma_tb,
            self.unknown_operator_tb, invalid_token_builder
        ]

        tokenizer = Tokenizer(tokenbuilders)
        tokens = tokenizer.tokenize(code)
        tokens = Examiner.combine_adjacent_identical_tokens(
            tokens, 'invalid operator')
        tokens = Examiner.combine_adjacent_identical_tokens(tokens, 'invalid')
        # tokens = Examiner.combine_identifier_colon(tokens, ['statement terminator', 'newline'], ['{'], ['whitespace', 'comment', 'line description'])
        self.tokens = tokens
        self.convert_identifiers_to_labels()
        self.convert_values_to_operators()

        self.calc_statistics()

        tokens = self.source_tokens()
        tokens = Examiner.join_all_lines(tokens)

        self.calc_token_confidence()
        self.calc_token_2_confidence(['*', ';'])

        num_operators = self.count_my_tokens(['operator', 'invalid operator'])
        if num_operators > 0:
            self.calc_operator_confidence(num_operators)
            allow_pairs = []
            self.calc_operator_2_confidence(tokens, num_operators, allow_pairs)
            self.calc_operator_3_confidence(tokens, num_operators, group_ends,
                                            allow_pairs)
            self.calc_operator_4_confidence(tokens, num_operators,
                                            group_starts, allow_pairs)

        self.calc_group_confidence(tokens, group_mids)

        operand_types_2 = ['number', 'string', 'symbol']
        self.calc_operand_n_confidence(tokens, operand_types_2, 2)
        self.calc_operand_n_confidence(tokens, operand_types, 4)

        self.calc_keyword_confidence()

        self.calc_paired_blockers_confidence(['{'], ['}'])
        self.calc_line_length_confidence(code, self.max_expected_line)