def setoptions(self, optionStr): optionStr = self.tonormalizedoptionstring(optionStr) if optionStr == 'default': optionValues = self.__defaultOptionValues else: optionValues = self.optionValues.copy() if optionStr != None and optionStr != '': for i in range(0, len(optionStr), 2): name = optionStr[i] value = optionStr[i + 1] assert name in optionValues optionValues[name] = self.__tov(value) self.optionValues = optionValues if optionValues['k']: switch_statement_rule = """ | r_case +(xcep(colon | semicolon | eof) any) colon | r_default colon | r_switch (block scan ^) """ simple_statement_removal_rule = "" else: switch_statement_rule = """ | +((r_case (id | l_bool | l_char | l_int) | r_default) colon) ((block scan ^) ?(null <- r_break semicolon) | (block <- (insert(LB) *(xcep(r_break | r_case | r_default) ^) insert(RB))) ?(null <- r_break semicolon)) // enclose each case clause by block | r_switch (block scan ^) """ simple_statement_removal_rule = """ TEXT scan= (null <- simple_statement) | r_class id *((r_extends | r_implements) id *(comma id)) (block scan ^); // recurse into top level of class definition """ patternStr = """TEXT scan= preq("&(a-z);") ( (r_abstract <- "abstract") | (r_assert <- "assert") | (r_boolean <- "boolean") | (r_break <- "break") | (r_byte <- "byte") | (r_case <- "case") | (r_catch <- "catch") | (m_charAt <- "charAt") | (r_char <- "char") | (r_class <- "class") | (m_clone <- "clone") | (m_compareTo <- "compareTo") | (r_continue <- "continue") | (r_const <- "const") | (r_default <- "default") | (m_dispose <- "dispose") | (r_double <- "double") | (r_do <- "do") | (r_else <- "else") | (r_enum <- "enum") | (m_equals <- "equals") | (r_extends <- "extends") | (r_false <- "false") | (r_finally <- "finally") | (r_final <- "final") | (r_float <- "float") | (r_for <- "for") | (m_getClass <- "getClass") | (m_get <- "get") | (r_goto <- "goto") | (m_hashCode <- "hashCode") | (m_hasNext <- "hasNext") | (r_if <- "if") | (r_implements <- "implements") | (r_import <- "import") | (r_instanceof <- "instanceof") | (r_interface <- "interface") | (r_int <- "int") | (m_iterator <- "iterator") | (m_length <- "length") | (r_long <- "long") | (r_native <- "native") | (r_new <- "new") | (m_next <- "next") | (r_null <- "null") | (r_package <- "package") | (r_private <- "private") | (r_protected <- "protected") | (r_public <- "public") | (r_return <- "return") | (m_run <- "run") | (r_short <- "short") | (m_size <- "size") | (r_static <- "static") | (r_strictfp <- "strictfp") // | (r_super <- "super") // keyword "super" is treated as an identifier | (r_switch <- "switch") | (r_synchronized <- "synchronized") // | (r_this <- "this") // keyword "this" is treated as an identifier | (m_toArray <- "toArray") | (m_toString <- "toString") | (r_throws <- "throws") | (r_throw <- "throw") | (r_transient <- "transient") | (r_true <- "true") | (r_try <- "try") | (r_void <- "void") | (r_volatile <- "volatile") | (r_while <- "while") ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);") | (word <- ("&(a-z);" | "&(A-Z);" | "_" | "$") *("&(a-z);" | "&(A-Z);" | "_" | "$" | "&(0-9);")) | (multiline_comment <- "/*" *(xcep("*/") any) "*/") | (singleline_comment <- "//" *(xcep(eol) any)) | (l_string <- """ *("&bslash;" any | xcep(""" | eol) any) """) | (l_char <- "&squot;" *("&bslash;" any | xcep("&squot;" | eol) any) "&squot;") | (l_float <- ( ((+"&(0-9);" "." *"&(0-9);")|(*"&(0-9);" "." +"&(0-9);")) ?(("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "F") // modified by Jan Vlegels, 2007/Apr/23 | +"&(0-9);" ("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "F") | +"&(0-9);" ("f" | "F") ) | (l_int <- (("0x" | "0X") +("&(0-9);" | "&(a-f);" | "&(A-F);") | +"&(0-9);") *("l" | "L")) | (semicolon <- ";") | (comma <- ",") | (LB <- "{") | (RB <- "}") | (LP <- "(") | (RP <- ")") | (LK <- "[") | (RK <- "]") // 4 char operator | (op_signed_rshift_assign <- ">>>=") // 3 char operators | (op_lshift_assign <- "<<=") | (op_rshift_assign <- ">>=") | (op_signed_rshift <- ">>>") // 2 char operators | (op_lshift <- "<<") // ">>" will not be recognized, becase this parser can not distinguish ">>" from ">" ">" | (op_increment <- "++") | (op_decrement <- "--") | (op_le <- "<=") | (op_ge <- ">=") | (op_eq <- "==") | (op_ne <- "!=") | (op_add_assign <- "+=") | (op_sub_assign <- "-=") | (op_mul_assign <- "*=") | (op_div_assign <- "/=") | (op_mod_assign <- "%%=") | (op_and_assign <- "&" "=") | (op_xor_assign <- "^=") | (op_or_assign <- "|=") | (op_logical_and <- "&" "&") | (op_logical_or <- "||") // single char operators | (op_star <- "*") // may mean mul or wildcard | (op_div <- "/") | (op_mod <- "%%") | (op_plus <- "+") // may mean add or sign plus | (op_minus <- "-") // may mean sub or sign minus | (op_amp <- "&") // may mean bitwise | (op_logical_neg <- "!") | (op_complement <- "~") | (op_or <- "|") | (op_xor <- "^") | (op_assign <- "=") | (OL <- "<") // may mean less than or template parameter | (OG <- ">") // may mean greater than or template parameter | (ques <- "?") | (colon <- ":") | (dot <- "."); TEXT scan= null <- multiline_comment | singleline_comment | " " | "&t;" | "&f;" | "&v;"| eol; TEXT scan= (r_int <- r_long | r_short) | (r_double <- r_float) | (l_bool <- r_true | r_false) | (l_string <- word dot (word match "getString") LP l_string RP); // support for externalized string TEXT scan= xcep(LB | RB | LP | RP | LK | RK) any | (block <- LB *^ RB) | (param <- LP *^ RP) | (index <- LK *^ RK); TEXT scan= word *(dot word) (template_param <- OL ?(ques ((word match "super") | r_extends)) ^ *((comma | op_amp) ?(ques ((word match "super") | r_extends)) ^) OG ) | (null <- OL (word *(dot word) ((word match "super") | r_extends) ^ | ^) *((comma | op_amp) (word *(dot word) ((word match "super") | r_extends) ^ | ^)) OG ) | word *(dot word) *index | ques *index | (block scan ^) | (param scan ^); // recurse into block, and param TEXT scan= ?(null <- (word match "this") dot) (id <- word *(dot word xcep(param)) ?template_param) | (id <- (word match "this")) | (l_string <- l_string +(op_plus l_string)) | (r_annotation_decl <- ("@" r_interface )) | (block scan ^) | (param scan ^) | (index scan ^); // recurse into block, index, and param // remove package, import TEXT scan= null <- r_package id semicolon | r_import id ?(dot op_star) semicolon; TEXT scan= id (index match LK RK) op_assign (block scan ^) semicolon // T a[] = { ... }; | r_new id *(dot id) +(index scan ^) | id +((index match LK RK) | (index scan ^)) | id insert(dot) insert(m_get) (index match (LP <- LK) *(xcep(RK) ^) (RP <- RK)) | dot (m_length <- m_size (param match LP RP)) | dot m_length (null <- (param match LP RP)) | (block scan ^) // recurse into block | (param scan ^) | (index scan ^); // recurse into expression TEXT scan= (null <- r_private | r_public | r_protected | r_synchronized | r_final | r_abstract | r_strictfp | r_volatile | r_transient) | (null <- "@" id ?param) | (null <- r_static xcep(LB)) | (null <- +(r_extends id *(comma id) | r_implements id *(comma id))) | (null <- r_throws id *(comma id)) | (interface_block <- (def_block <- r_interface id ?(r_extends id *(comma id)) block)) | (anotation_block <- (def_block <- r_annotation_decl id block)) | (block scan ^) | (param scan ^); // recurse into block and param // remove array initialization tables TEXT scan= op_assign (initialization_block <- preq(block)) (null <- block) semicolon | index (initialization_block <- preq(block)) (null <- block) | (block scan ^) // recurse into block | (param scan ^) | (index scan ^); // recurse into expression TEXT scan= xcep(id | param | index | l_float | l_int | block) any (null <- op_minus) // remove unary minus | (method_like <- m_charAt | m_compareTo | m_dispose | m_equals | m_getClass | m_get | m_hashCode | m_hasNext | m_iterator | m_length | m_next | m_run | m_size | m_toArray | m_toString) | ques insert(c_cond) // insert tokens for control-flow complexity counter | (block scan ^) // recurse into block | (param scan ^) | (index scan ^); // recurse into expression // remove simple delegations; remove empty method definition; remove getter, setter; remove redundant paren of return statement; remove assertion TEXT scan= (null <- (r_void | r_boolean | r_byte | r_char | r_double | r_float | r_int | r_short | r_object | r_string | id) *index (id | method_like) param ((block match LB ?r_return id dot id param semicolon RB) | (block match LB RB))) | (null <- (r_boolean | r_byte | r_char | r_double | r_float | r_int | r_short | r_object | r_string | id) *index (id | method_like) (param match LP RP) (block match LB r_return id semicolon RB)) | (null <- r_void (id | method_like) param (block match LB id op_assign id semicolon RB)) | r_return (param match (null <- LP) *(xcep(RP) any) (null <- RP)) semicolon | (null <- r_assert *(xcep(semicolon | eof) any) semicolon) | (block scan ^) | (param scan ^); // recurse into block and param TEXT scan= r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) *(r_else r_if param (block | (block <- insert(LB) ^ insert(RB)))) ?(r_else (block | (block <- insert(LB) ^ insert(RB)))) | r_else (block | (block <- insert(LB) ^ insert(RB))) | r_while param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) | r_for param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) | r_do ((block scan ^) | (block <- insert(LB) ^ insert(RB))) r_while param semicolon | r_try (block scan ^) *((r_catch param | r_finally) (block scan ^)) | (r_catch param | r_finally) (block scan ^) %(switch_statement_rule)s | *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_finally | r_switch) any) semicolon | (block scan ^) | (param scan ^); // recurse into block and param TEXT scan= r_if param block *(r_else r_if param block) ?(r_else block) | r_else block | r_while param block | r_for param block | r_do block r_while param semicolon | r_switch param block | r_try block *((r_catch param | r_finally) block) | (r_catch param | r_finally) block | (simple_statement <- *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_finally| r_switch | r_case | r_default) any) semicolon) | (block scan ^) | (param scan ^); // recurse into block and param %(simple_statement_removal_rule)s // enclose class/method/constructor definition by block TEXT scan= (def_block <- r_class id (block scan ^)) | (def_block <- r_new id param (block scan ^)) | (def_block <- (r_void | r_int | r_long | r_short | r_double | r_float | r_boolean | r_char | r_byte | id) *(index match LK RK) (id | method_like) param (block scan ^)) | (def_block <- id param (block scan ^)) // constructor | (block scan ^) | (param scan ^); // recurse into block and param // insert tokens for control-flow complexity counter TEXT scan= (r_if | r_switch) insert(c_cond) | (r_for | r_while) insert(c_loop) | (id | method_like) insert(c_func) (param scan ^) | (def_block scan ^) | (block scan ^) | (param scan ^) | (index scan ^) | (simple_statement scan ^); """ % (locals()) self.pat = easytorq.Pattern(patternStr) fmt = easytorq.CngFormatter() # parameter by default fmt.addreplace('id', 'id|%s') fmt.addreplace('id', 'id|%s') fmt.addflatten('block') fmt.addreplace('LB', '(brace') fmt.addreplace('RB', ')brace') fmt.addflatten('word') fmt.addflatten('param') fmt.addreplace('LP', '(paren') fmt.addreplace('RP', ')paren') fmt.addflatten('index') fmt.addreplace('LK', '(braket') fmt.addreplace('RK', ')braket') fmt.addflatten('simple_statement') fmt.addreplace('semicolon', 'suffix:semicolon') fmt.addreplace('colon', 'suffix:colon') fmt.addformat('def_block', '(def_block', ')def_block') fmt.addflatten('method_like') if not optionValues['d']: # requires exact match fmt.addreplace('l_int', 'l_int=%s') fmt.addreplace('l_float', 'l_float=%s') fmt.addreplace('l_bool', 'l_bool=%s') else: # non parameter by default fmt.addreplace('l_bool', 'l_bool|%s') fmt.addreplace('l_int', 'l_int|%s') fmt.addreplace('l_float', 'l_float|%s') if optionValues['r']: fmt.addnone('interface_block') else: fmt.addflatten('interface_block') fmt.addnone('anotation_block') if not optionValues['s']: # requires exact match fmt.addreplace('l_string', 'l_string=%s') fmt.addreplace('l_char', 'l_char=%s') else: # non parameter by default fmt.addreplace('l_string', 'l_string|%s') fmt.addreplace('l_char', 'l_char|%s') self.fmt = fmt
def setoptions(self, optionStr): if optionStr not in ( None, '', 'default' ): raise pp.InvalidOptionError, "invalid option: " + optionStr patternStr = """TEXT scan= +(xcep(eof) any) | insert(eol) eof; // ensure a line terminates by eol TEXT match= *( (macro_line <- "#" *(xcep(eof | eol) any)) eol | ?(label <- +("&(a-z);" | "&(A-Z)" | "&(0-9);" | "_") ":") *(xcep(eof | eol) any) eol ) *(xcep(eof) any) eof; TEXT scan= (comment <- "'" *(xcep(eof | eol) any)) | (comment <- "rem" (" " | "&t;") *(xcep(eof | eol) any)) | (null <- ((" " | "&t;") "_" *(" " | "&t;") eol)); // Continuation line TEXT scan= preq("&(a-z);" | "&(A-Z);") ( (r_AddHandler <- "AddHandler" | "addhandler" | "ADDHANDLER") | (r_AddressOf <- "AddressOf" | "addressof" | "ADDRESSOF") | (r_Alias <- "Alias" | "alias" | "ALIAS") | (r_AndAlso <- "AndAlso" | "andalso" | "ANDALSO") | (r_And <- "And" | "and" | "AND") | (r_Ansi <- "Ansi" | "ansi" | "ANSI") | (r_Assembly <- "Assembly" | "assembly" | "ASSEMBLY") | (r_As <- "As" | "as" | "AS") | (r_Auto <- "Auto" | "auto" | "AUTO") | (m_BeginProperty <- "BeginProperty" | "beginproperty" | "BEGINPROPERTY") | (r_Begin <- "Begin" | "begin" | "BEGIN") | (r_Boolean <- "Boolean" | "boolean" | "BOOLEAN") | (r_ByRef <- "ByRef" | "byref" | "BYREF") | (r_Byte <- "Byte" | "byte" | "BYTE") | (r_ByVal <- "ByVal" | "byval" | "BYVAL") | (r_Call <- "Call" | "call" | "CALL") | (r_Case <- "Case" | "case" | "CASE") | (r_Catch <- "Catch" | "catch" | "CATCH") | (r_CBool <- "CBool" | "cbool" | "CBOOL") | (r_CByte <- "CByte" | "cbyte" | "CBYTE") | (r_CChar <- "CChar" | "cchar" | "CCHAR") | (r_CDate <- "CDate" | "cdate" | "CDATE") | (r_CDec <- "CDec" | "cdec" | "CDEC") | (r_CDbl <- "CDbl" | "cdbl" | "CDBL") | (r_Char <- "Char" | "char" | "CHAR") | (r_CInt <- "CInt" | "cint" | "CINT") | (r_Class <- "Class" | "class" | "CLASS") | (r_CLng <- "CLng" | "clng" | "CLNG") | (r_CObj <- "CObj" | "cobj" | "COBJ") | (r_Const <- "Const" | "const" | "CONST") | (r_CShort <- "CShort" | "cshort" | "CSHORT") | (r_CSng <- "CSng" | "csng" | "CSNG") | (r_CStr <- "CStr" | "cstr" | "CSTR") | (r_CType <- "CType" | "ctype" | "CTYPE") | (r_Date <- "Date" | "date" | "DATE") | (r_Decimal <- "Decimal" | "decimal" | "DECIMAL") | (r_Declare <- "Declare" | "declare" | "DECLARE") | (r_Default <- "Default" | "default" | "DEFAULT") | (r_Delegate <- "Delegate" | "delegate" | "DELEGATE") | (r_Dim <- "Dim" | "dim" | "DIM") | (r_DirectCast <- "DirectCast" | "directcast" | "DIRECTCAST") | (r_Double <- "Double" | "double" | "DOUBLE") | (r_Do <- "Do" | "do" | "DO") | (r_Each <- "Each" | "each" | "EACH") | (r_ElseIf <- "ElseIf" | "elseif" | "ELSEIF") | (r_Else <- "Else" | "else" | "ELSE") | (m_EndProperty <- "EndProperty" | "endproperty" | "ENDPROPERTY") | (r_End <- "End" | "end" | "END") | (r_Enum <- "Enum" | "enum" | "ENUM") | (r_Erase <- "Erase" | "erase" | "ERASE") | (r_Error <- "Error" | "error" | "ERROR") | (r_Event <- "Event" | "event" | "EVENT") | (r_Exit <- "Exit" | "exit" | "EXIT") | (r_False <- "False" | "false" | "FALSE") | (r_Finally <- "Finally" | "finally" | "FINALLY") | (r_For <- "For" | "for" | "FOR") | (r_Friend <- "Friend" | "friend" | "FRIEND") | (r_Function <- "Function" | "function" | "FUNCTION") | (r_GetType <- "GetType" | "gettype" | "GETTYPE") | (r_Get <- "Get" | "get" | "GET") | (r_GoSub <- "GoSub" | "gosub" | "GOSUB") | (r_GoTo <- "GoTo" | "goto" | "GOTO") | (r_Handles <- "Handles" | "handles" | "HANDLES") | (r_If <- "If" | "if" | "IF") | (r_Implements <- "Implements" | "implements" | "IMPLEMENTS") | (r_Imports <- "Imports" | "imports" | "IMPORTS") | (r_Inherits <- "Inherits" | "inherits" | "INHERITS") | (r_Integer <- "Integer" | "integer" | "INTEGER") | (r_Interface <- "Interface" | "interface" | "INTERFACE") | (r_In <- "In" | "in" | "IN") | (m_IsArray <- "IsArray" | "isarray" | "ISARRAY") | (m_IsDate <- "IsDate" | "isdate" | "ISDATE") | (m_IsEmpty <- "IsEmpty" | "isempty" | "ISEMPTY") | (m_IsNull <- "IsNull" | "isnull" | "ISNULL") | (m_IsNumeric <- "IsNumeric" | "isnumeric" | "ISNUMERIC") | (m_IsObject <- "IsObject" | "isobject" | "ISOBJECT") | (r_Is <- "Is" | "is" | "IS") | (r_Let <- "Let" | "let" | "LET") | (r_Lib <- "Lib" | "lib" | "LIB") | (r_Like <- "Like" | "like" | "LIKE") | (r_Long <- "Long" | "long" | "LONG") | (r_Loop <- "Loop" | "loop" | "LOOP") // | (r_Me <- "Me" | "me" | "ME") | (r_Module <- "Module" | "module" | "MODULE") | (r_Mod <- "Mod" | "mod" | "MOD") | (r_MustInherit <- "MustInherit" | "mustinherit" | "MUSTINHERIT") | (r_MustOverride <- "MustOverride" | "mustoverride" | "MUSTOVERRIDE") | (r_MyBase <- "MyBase" | "mybase" | "MYBASE") | (r_MyClass <- "MyClass" | "myclass" | "MYCLASS") | (r_Namespace <- "Namespace" | "namespace" | "NAMESPACE") | (r_New <- "New" | "new" | "NEW") | (r_Next <- "Next" | "next" | "NEXT") | (r_Nothing <- "Nothing" | "nothing" | "NOTHING") | (r_NotInheritable <- "NotInheritable" | "notinheritable" | "NOTINHERITABLE") | (r_NotOverridable <- "NotOverridable" | "notoverridable" | "NOTOVERRIDABLE") | (r_Not <- "Not" | "not" | "NOT") | (r_Object <- "Object" | "object" | "OBJECT") | (r_On <- "On" | "on" | "ON") | (r_Optional <- "Optional" | "optional" | "OPTIONAL") | (r_Option <- "Option" | "option" | "OPTION") | (r_OrElse <- "OrElse" | "orelse" | "ORELSE") | (r_Or <- "Or" | "or" | "OR") | (r_Overloads <- "Overloads" | "overloads" | "OVERLOADS") | (r_Overridable <- "Overridable" | "overridable" | "OVERRIDABLE") | (r_Overrides <- "Overrides" | "overrides" | "OVERRIDES") | (r_ParamArray <- "ParamArray" | "paramarray" | "PARAMARRAY") | (r_Preserve <- "Preserve" | "preserve" | "PRESERVE") | (r_Private <- "Private" | "private" | "PRIVATE") | (r_Property <- "Property" | "property" | "PROPERTY") | (r_Protected <- "Protected" | "protected" | "PROTECTED") | (r_Public <- "Public" | "public" | "PUBLIC") | (r_RaiseEvent <- "RaiseEvent" | "raiseevent" | "RAISEEVENT") | (r_ReadOnly <- "ReadOnly" | "readonly" | "READONLY") | (r_ReDim <- "ReDim" | "redim" | "REDIM") | (r_REM <- "REM" | "rem" | "REM") | (r_RemoveHandler <- "RemoveHandler" | "removehandler" | "REMOVEHANDLER") | (r_Resume <- "Resume" | "resume" | "RESUME") | (r_Return <- "Return" | "return" | "RETURN") | (r_Select <- "Select" | "select" | "SELECT") | (r_Set <- "Set" | "set" | "SET") | (r_Shadows <- "Shadows" | "shadows" | "SHADOWS") | (r_Shared <- "Shared" | "shared" | "SHARED") | (r_Short <- "Short" | "short" | "SHORT") | (r_Single <- "Single" | "single" | "SINGLE") | (r_Static <- "Static" | "static" | "STATIC") | (r_Step <- "Step" | "step" | "STEP") | (r_Stop <- "Stop" | "stop" | "STOP") | (r_String <- "String" | "string" | "STRING") | (r_Structure <- "Structure" | "structure" | "STRUCTURE") | (r_Sub <- "Sub" | "sub" | "SUB") | (r_SyncLock <- "SyncLock" | "synclock" | "SYNCLOCK") | (r_Then <- "Then" | "then" | "THEN") | (r_Throw <- "Throw" | "throw" | "THROW") | (r_To <- "To" | "to" | "TO") | (r_True <- "True" | "true" | "TRUE") | (r_Try <- "Try" | "try" | "TRY") | (r_TypeOf <- "TypeOf" | "typeof" | "TYPEOF") | (r_Type <- "Type" | "type" | "TYPE") | (r_Unicode <- "Unicode" | "unicode" | "UNICODE") | (r_Until <- "Until" | "until" | "UNTIL") | (r_Variant <- "Variant" | "variant" | "VARIANT") | (r_Wend <- "Wend" | "wend" | "WEND") | (r_When <- "When" | "when" | "WHEN") | (r_While <- "While" | "while" | "WHILE") | (r_WithEvents <- "WithEvents" | "withevents" | "WITHEVENTS") | (r_With <- "With" | "with" | "WITH") | (r_WriteOnly <- "WriteOnly" | "writeonly" | "WRITEONLY") | (r_Xor <- "Xor" | "xor" | "XOR") | (r_GoSub <- "GoSub" | "gosub" | "GOSUB") | (r_Let <- "Let" | "let" | "LET") | (r_Variant <- "Variant" | "variant" | "VARIANT") | (m_MsgBox <- "MsgBox" | "msgbox" | "MSGBOX") | (m_Iif <- "Iif" | "iif" | "IIF") | (m_InputBox <- "InputBox" | "inputbox" | "INPUTBOX") ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);") | (word <- ("&(a-z);" | "&(A-Z);" | "_") *("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);")) | (word <- "[" ("&(a-z);" | "&(A-Z);" | "_") *("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);") "]") | (l_char <- """ *("""" | xcep(""" | eol) any) """) ("C" | "c") | (l_string <- """ *("""" | xcep(""" | eol) any) """) | (l_string <- "#" *(" " | "&t;" | "&(0-9);" | "/" | ":" | "AM" | "am" | "PM" | "pm") "#") // date | (l_float <- ( +"&(0-9);" "." *"&(0-9);" ?(("e" | "E") ?("-" | "+") +"&(0-9);") ?("F" | "f" | "R" | "r" | "D" | "d") | +"&(0-9);" ("e" | "E") ?("-" | "+") +"&(0-9);") ?("F" | "f" | "R" | "r" | "D" | "d") | +"&(0-9);" ("F" | "f" | "R" | "r" | "D" | "d") ) | (l_int <- ( "&" ("H" | "h") +("&(0-9);" | "&(a-f);" | "&(A-F);") | "&" ("O" | "o") +"&(0-7);" | +"&(0-9);") ?("S" | "s" | "I" | "i" | "L"| "l") ) | (LP <- "(") | (RP <- ")") | (op_plus_eq <- "+=") | (op_minus_eq <- "-=") | (op_mul_eq <- "*=") | (op_div_eq <- "/=") | (op_intdiv_eq <- "&slash;=") | (op_pow_eq <- "^=") | (op_append_eq <- "&=") | (op_lshift <- "<<") | (op_rshift <- ">>") | (op_not_eq <- "<>") | (op_ge <- ">=") | (op_le <- "<=") | (op_gt <- ">") | (op_lt <- "<") | (equal <- "=") | (exclamation <- "!") | (comma <- ",") | (dot <- ".") | (colon <- ":") | (plus <- "+") | (minus <- "-") | (op_mult <- "*") | (op_div <- "/") | (op_intdiv <- "&bslash;") | (amp <- "&") | (op_power <- "^"); TEXT scan= (null <- macro_line | comment | label | " " | "&t;") | (null <- m_BeginProperty *(xcep(m_EndProperty) any) m_EndProperty) | insert(statement_terminator) (null <- eol); TEXT scan= (r_EXIT_LOOP <- r_Exit (r_Do | r_While | r_Loop)) | (r_Exit_For <- r_Exit r_for) | (r_Exit_Function <- r_Exit r_Function) | (r_Exit_Property <- r_Exit r_Property) | (r_Exit_Sub <- r_Exit r_Sub) | (r_Exit_Try <- r_Exit r_Try) | (r_On_Error <- r_On r_Error) | (r_Resume_Next <- r_Resume r_Next); TEXT scan= (id <- ?(dot | exclamation) word *((dot | exclamation) word) ?("%" | amp | "@" | exclamation | "#" | "$")) | (statement_terminator <- ":") | (l_bool <- r_True | r_False) | (r_Integer <- r_Long | r_Byte) // type unification | (r_Double <- r_Single) // type unification | (null <- r_Then | r_Call | r_Let | r_ByRef | r_Dim) | (null <- r_Public | r_Private | r_Protected ?r_Friend | r_Friend | r_Overloads | r_Overrides | r_Overridable); TEXT scan= statement_terminator (null <- +statement_terminator); TEXT scan= (null <- r_Declare *(xcep(statement_terminator| eof) any) statement_terminator) | (null <- r_Interface *(xcep(r_End) any | r_End xcep(r_Interface) any) r_End r_Interface) | (null <- r_WithEvents id r_As id statement_terminator); TEXT scan= // note: the following rule will not identify blocks around if statement, // becase I can not invent a parsing rule which support both "if...end if" statement // and "if statement" enclosed in a line, that appears without "end if". (block <- r_Begin *^ r_End xcep(r_Class | r_Function | r_Get | r_Interface | r_Module | r_Namespace | r_Property | r_Set | r_Structure | r_Sub | r_Select | r_Type | r_Try | r_With | r_If)) | (def_block <- r_Class ?(id *statement_terminator ?(null <- r_Inherits id)) (block <- *^) r_End r_Class) | (def_block <- r_Structure ?(id *statement_terminator ?(null <- r_Inherits id)) (block <- *^) r_End r_Structure) | (block <- r_Namespace ?(id *statement_terminator (block <- *^)) r_End r_Namespace) | (block <- r_Do (r_While | r_Until) *^ r_Loop) | (block <- r_Do *^ r_Loop ?(r_While | r_Until)) | (block <- insert(r_Do) r_While *^ (r_Loop <- r_End r_While | r_Wend)) // While ... End While, While ... Wend --> Do While ... Loop | (block <- r_For ?r_Each *^ r_Next) | (def_block <- r_Function *^ r_End r_Function) | (def_block <- r_Get *^ r_End r_Get) | (def_block <- r_Interface *^ r_End r_Interface) | (def_block <- r_Module *^ r_End r_Module) | (def_block <- r_Property ?(r_Get | r_Let | r_Set) *^ r_End r_Property) | (def_block <- r_Set preq(LP) *^ r_End r_Set) | (def_block <- r_Sub *^ r_End r_Sub) | (block <- r_Select r_Case *^ *(r_Case ?r_Else (block <- *^)) r_End r_Select) | (def_block <- r_Type *^ r_End r_Type) | (block <- r_Try (block <- *^) *(r_Catch (block <- *^)) r_End r_Try) | (block <- r_With ?(null <- id) (block <- *^) r_End r_With) | r_EXIT_LOOP | r_Exit_For | r_Exit_Function | r_Exit_Property | r_Exit_Sub | r_Exit_Try | r_Exit xcep(eof) any | r_End r_If | +xcep(eof | r_End | r_Class | r_Do | r_Loop | r_For | r_Next | r_Function | r_Get | r_Interface | r_Module | r_Namespace | r_Property | r_Set | r_Structure | r_Sub | r_Select | r_Type | r_Try | r_With) any | xcep(eof | r_End | r_Loop | r_Next) any; // insert tokens for control-flow complexity counter TEXT scan= (r_End (r_If | r_Select | r_Loop | r_While)) | (r_If | r_Select) insert(c_cond) | r_For insert(c_loop) | (r_Do | r_Loop) (r_While | r_Until) insert(c_loop) | r_While insert(c_loop) | (block scan ^) | (def_block scan ^); // recurse into block TEXT scan= ((block scan ^) | (def_block scan ^)) *(null <- statement_terminator); """ self.pat = easytorq.Pattern(patternStr) fmt = easytorq.CngFormatter() # parameter by default fmt.addreplace('id', 'id|%s') # non parameter by default fmt.addreplace('l_bool', 'l_bool|%s') fmt.addreplace('l_char', 'l_char|%s') fmt.addreplace('l_int', 'l_int|%s') fmt.addreplace('l_float', 'l_float|%s') fmt.addreplace('l_string', 'l_string|%s') fmt.addflatten('word') fmt.addreplace('LP', '(paren') fmt.addreplace('RP', ')paren') fmt.addformat('block', '(block', ')block') fmt.addformat('def_block', '(def_block', ')def_block') fmt.addreplace('statement_terminator', 'suffix:colon') self.fmt = fmt
def setoptions(self, optionStr): optionStr = self.tonormalizedoptionstring(optionStr) if optionStr == 'default': optionValues = self.__defaultOptionValues else: optionValues = self.optionValues.copy() if optionStr != None and optionStr != '': for i in range(0, len(optionStr), 2): name = optionStr[i] value = optionStr[i + 1] assert name in optionValues optionValues[name] = self.__tov(value) self.optionValues = optionValues if optionValues['k']: switch_statement_rule = """ | r_case +(xcep(colon | semicolon | eof) any) colon | r_default colon | r_switch (block scan ^) """ simple_statement_removal_rule = "" else: switch_statement_rule = """ | +((r_case (id | l_bool | l_char | l_int) | r_default) colon) ((block scan ^) ?(null <- r_break semicolon) | (block <- (insert(LB) *(xcep(r_break | r_case | r_default) ^) insert(RB))) ?(null <- r_break semicolon)) // enclose each case clause by block | r_switch (block scan ^) """ simple_statement_removal_rule = "(null <- simple_statement) |" patternStr = """TEXT scan= preq("&(a-z);") ( (op_logical_and <- "and") | (op_and_assign <- "and_eq") | (m_abort <- "abort") | (r_auto <- "auto") | (r_amp <- "bitand") | (m_assert <- "assert") | (r_or <- "bitor") | (r_bool <- "bool") | (r_break <- "break") | (r_case <- "case") | (r_catch <- "catch") | (r_char <- "char") | (r_class <- "class") | (op_complement <- "compl") | (r_const_cast <- "const_cast") | (r_const <- "const") | (r_continue <- "continue") | (r_default <- "default") | (r_delete <- "delete") | (r_dynamic_cast <- "dynamic_cast") | (r_double <- "double") | (r_do <- "do") | (r_else <- "else") | (r_enum <- "enum") | (m_exit <- "exit") | (r_explicit <- "explicit") | (r_extern <- "extern") | (r_false <- "false") | (r_float <- "float") | (r_for <- "for") | (r_friend <- "friend") | (r_goto <- "goto") | (r_if <- "if") | (r_inline <- "inline") | (r_intmax <- "intmax_t") | (r_intptr <- "intptr_t") | (r_int64 <- ("int64_t" | "int_least64_t" | "int_fast64_t")) | (r_int32 <- ("int32_t" | "int_least32_t" | "int_fast32_t")) | (r_int16 <- ("int16_t" | "int_least16_t" | "int_fast16_t")) | (r_int8 <- ("int8_t" | "int_least8_t" | "int_fast8_t")) | (r_int <- "int") | (m_longjmp <- "longjmp") | (r_long <- "long") | (r_mutable <- "mutable") | (r_namespace <- "namespace") | (r_new <- "new") | (op_logical_neg <- "not") | (op_ne <- "not_eq") | (m_offsetof <- "offsetof") | (r_operator <- "operator") | (op_logical_or <- "or") | (op_or_assign <- "or_eq") | (r_private <- "private") | (r_protected <- "protected") | (m_ptrdiff_t <- "ptrdiff_t") | (r_public <- "public") | (r_register <- "register") | (r_reinterpret_cast <- "reinterpret_cast") | (r_restrict <- "restrict") | (r_return <- "return") | (r_short <- "short") | (m_setjmp <- "setjmp") | (r_signed <- "signed") | (r_sizeof <- "sizeof") | (m_size_t <- "size_t") | (r_static <- "static") | (r_static_cast <- "static_cast") | (r_struct <- "struct") | (r_switch <- "switch") | (r_template <- "template") // | (r_this <- "this") // keyword "this" is treated as an identifier | (r_throw <- "throw") | (r_true <- "true") | (r_try <- "try") | (r_typedef <- "typedef") | (r_typeid <- "typeid") | (r_typename <- "typename") | (r_union <- "union") | (r_unsigned <- "unsigned") | (r_uintmax <- "uintmax_t") | (r_uintptr <- "uintptr_t") | (r_uint64 <- ("uint64_t" | "uint_least64_t" | "uint_fast64_t")) | (r_uint32 <- ("uint32_t" | "uint_least32_t" | "uint_fast32_t")) | (r_uint16 <- ("uint16_t" | "uint_least16_t" | "uint_fast16_t")) | (r_uint8 <- ("uint8_t" | "uint_least8_t" | "uint_fast8_t")) | (r_using <- "using") | (r_virtual <- "virtual") | (r_void <- "void") | (r_volatile <- "volatile") | (m_wchar_t <- "wchar_t") | (r_while <- "while") | (op_xor <- "xor") | (op_xor_assign <- "xor_eq") | (m_assert <- "assert") ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);") | (word <- ("&(a-z);" | "&(A-Z);" | "_") *("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);")) | (multiline_comment <- "/*" *(xcep("*/") any) "*/") | (singleline_comment <- "//" *(xcep(eol) any) preq(eol)) | (l_string <- ?"L" """ *("&bslash;" any | xcep(""" | eol) any) """) | (l_char <- ?"L" "&squot;" *("&bslash;" any | xcep("&squot;" | eol) any) "&squot;") | (l_float <- ( +"&(0-9);" "." *"&(0-9);" ?(("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "l" | "F" | "L") | +"&(0-9);" ("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "l" | "F" | "L") | +"&(0-9);" ("f" | "F") ? ("l" | "L") ) | (l_int <- (("0x" | "0X") +("&(0-9);" | "&(a-f);" | "&(A-F);") | +"&(0-9);") *("u" | "l" | "U" | "L")) | (macro_line <- "#" *("&bslash;" *(" " | "&t;") eol | xcep(eol | eof | "/*" | "//") any | (multiline_comment <- "/*" *(xcep(eof | "*/") any) "*/")) preq(eol | eof | "//")) | (semicolon <- ";") | (comma <- ",") | (LB <- "{") | (RB <- "}") | (LP <- "(") | (RP <- ")") | (LK <- "[") | (RK <- "]") // 3 char operators | (op_lshift_assign <- "<<=") | (op_rshift_assign <- ">>=") | (op_pointer_to_member_from_pointer <- "->*") // 2 char operators | (op_scope_resolution <- "::") | (op_lshift <- "<<") | (op_rshift <- ">>") | (op_increment <- "++") | (op_decrement <- "--") | (op_member_access_from_pointer <- "->") | (op_le <- "<=") | (op_ge <- ">=") | (op_eq <- "==") | (op_ne <- "!=") | (op_add_assign <- "+=") | (op_sub_assign <- "-=") | (op_mul_assign <- "*=") | (op_div_assign <- "/=") | (op_mod_assign <- "%%=") | (op_and_assign <- "&" "=") | (op_xor_assign <- "^=") | (op_or_assign <- "|=") | (op_poiner_to_member_from_reference <- ".*") | (op_logical_and <- "&" "&") | (op_logical_or <- "||") // single char operators | (op_star <- "*") // may mean mul or indirection | (op_div <- "/") | (op_mod <- "%%") | (op_plus <- "+") // may mean add or sign plus | (op_minus <- "-") // may mean sub or sign minus | (op_amp <- "&") // may mean bitwise and or indirection | (op_logical_neg <- "!") | (op_complement <- "~") | (op_or <- "|") | (op_xor <- "^") | (op_assign <- "=") | (OL <- "<") // may mean less than or template parameter | (OG <- ">") // may mean greater than or template parameter | (ques <- "?") | (colon <- ":") | (dot <- "."); TEXT scan= (null <- macro_line | multiline_comment | singleline_comment | " " | "&t;" | "&f;" | "&bslash;" *(" " | "&t;") eol | eol) | (r_int <- (r_intmax | r_intptr | r_int64 | r_int32 | r_int16)) | (r_int <- (r_uintmax | r_uintptr | r_uint64 | r_uint32 | r_uint16)) | (r_int <- m_wchar_t) | (r_char <- r_int8) | (r_char <- r_uint8); TEXT scan= preq(r_operator) ( (word <- r_operator comma) | (word <- r_operator (op_logical_neg | op_logical_and | op_logical_or)) | (word <- r_operator (op_ne | op_eq | OG | OL | op_ge | op_le)) | (word <- r_operator op_mod) | (word <- r_operator (op_mod_assign | op_and_assign | op_add_assign | op_mul_assign | op_add_assign | op_sub_assign | op_div_assign | op_lshift_assign | op_assign | op_rshift_assign | op_xor_assign)) | (word <- r_operator (op_amp | op_star)) | (word <- r_operator LP RP) | (word <- r_operator (op_plus | op_minus)) | (word <- r_operator (op_increment | op_decrement)) | (word <- r_operator (op_member_access_from_pointer | op_pointer_to_member_from_pointer)) | (word <- r_operator op_div) | (word <- r_operator (op_lshift | op_rshift)) | (word <- r_operator LK RK) | (word <- r_operator op_xor) | (word <- r_operator op_complement) | (word <- r_operator (r_delete | r_new)) | (word <- r_operator r_bool) ); TEXT scan= (r_int <- (r_signed | r_unsigned)(r_long r_long r_int | r_long r_int | r_short r_int | r_int)) | (r_int <- (r_signed | r_unsigned)(r_long r_long | r_long | r_short)) | (r_char <- (r_signed | r_unsigned) r_char) | (r_int <- r_signed | r_unsigned) | (r_int <- r_long r_long | r_long | r_short) | (r_int <- m_size_t | m_ptrdiff_t | wchar_t) | (r_float <- r_long r_double | r_double) | (l_int <- (word match "NULL")) | (l_bool <- r_true | r_false) | (l_string <- +l_string) | (null <- (r_private | r_public | r_protected) colon) | (null <- r_virtual | r_inline | r_static) | (word <- op_scope_resolution word *(op_scope_resolution word) ?(op_scope_resolution op_complement word)) | (word <- word +(op_scope_resolution word) ?(op_scope_resolution op_complement word)) | (word <- word op_scope_resolution op_complement word); TEXT scan= xcep(LB | RB | LP | RP | LK | RK) any | (block <- LB *^ RB) | (null <- LP op_star) *^ (null <- RP) (op_member_access_from_pointer <- dot) | (index <- LK *^ RK) | (param <- (LP (null <- r_void) RP | LP *^ RP)); TEXT scan= xcep(OL | OG | block | param | semicolon) any | (template_param <- OL *^ OG) | (block scan ^) | (param scan ^) | (index scan ^); // recurse into block and param TEXT scan= ?(null <- (word match "this" op_member_access_from_pointer)) (id <- word ?(null <- template_param) *((dot | op_member_access_from_pointer) word xcep(param)) ?(null <- template_param)) | (id <- (word match "this")) | (r_const_cast | r_dynamic_cast | r_reinterpret_cast | r_static_cast) (null <- template_param) | (block scan ^) | (param scan ^) | (index scan ^); // recurse into block and param TEXT scan= op_assign (initialization_block <- preq(block)) (null <- block) semicolon | (r_class | r_struct) id (null <- colon *(r_public | r_private | r_protected | r_virtual) id *(comma *(r_public | r_private | r_protected | r_virtual) id)) | (null <- r_enum ?id block) | (null <- m_assert param semicolon) | r_return (param match (null <- LP) *(xcep(RP) any) (null <- RP)) semicolon | (block scan ^); // recurse into block TEXT scan= xcep(id | param | RK | l_float | l_int | block) any (null <- op_minus) | (null <- r_struct | r_union | r_enum) id xcep(block | colon) | ques insert(c_cond) // insert tokens for control-flow complexity counter | (block scan ^) // recurse into block | (param scan ^) | (index scan ^); // recurse into expression TEXT scan= (value_list <- (l_bool | l_string | l_int | l_char | l_float | id) +(comma (l_string | l_int | l_char | l_float | id) ?comma)) | (block scan ^); TEXT scan= r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) *(r_else r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))) ?(r_else ((block scan ^) | (block <- insert(LB) ^ insert(RB)))) | r_else ((block scan ^) | (block <- insert(LB) ^ insert(RB))) | r_while param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) | r_for param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) | r_do (block | (block <- insert(LB) ^ insert(RB))) r_while param semicolon | r_try (block scan ^) *(r_catch param (block scan ^)) | r_catch (block scan ^) %(switch_statement_rule)s | (r_return | r_break | r_continue | op_assign) *(xcep(block | LB | semicolon) any) semicolon | (null <- (r_friend | r_typedef) *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_switch) any) semicolon) | (null <- r_using r_namespace id semicolon) | (null <- r_namespace op_eq id semicolon) | *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_switch) any) semicolon | (block scan ^); // recurse into block TEXT scan= r_if param block *(r_else r_if param block) ?(r_else block) | r_else block | r_while param block | r_for param block | r_do block r_while param semicolon | r_try block *(r_catch param block) | r_catch block | r_switch param block | (simple_statement <- *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_do | r_try | r_catch | r_switch | r_case | r_default) any) semicolon) | (block scan ^); // recurse into block TEXT scan= (simple_statement match (r_return | r_continue | r_break | r_throw) +any) | %(simple_statement_removal_rule)s // mark simple getter/setter/delegation/empty block (+(r_void | r_int | r_char | r_float | r_bool | r_class | r_struct | r_enum | r_union | r_const | r_volatile | op_star | op_amp | index | id) param ?r_const ?(r_throw param) ?(colon id param *(comma id param))) (getter_body <- (block match LB (simple_statement match r_return ?(id op_member_access_from_pointer) id ?param semicolon) RB) | (block match LB (simple_statement match ?(id op_member_access_from_pointer) id param semicolon) RB) | ( block match LB RB)) | r_namespace id (block scan ^) | r_extern l_string (block scan ^) // recurse into extern "C" block | (r_struct | r_union) ?id (block scan ^) | (r_class | r_struct | r_union) id (block scan ^); // recurse into top level of class definition // enclose class/method/function definition by block TEXT scan= ( ?(null <- +(r_template template_param)) ( (null <- (r_class | r_struct | r_union) ?id (block match LB RB)) // remove empty structure definition | (def_block <- r_class id (block scan ^)) | (def_block <- (r_struct | r_union) ?id (block scan ^)) | (null <- +(r_void | r_int | r_char | r_float | r_bool | r_class | r_struct | r_enum | r_union | r_const | r_volatile | op_star | op_amp | index | (id <- op_complement id) | ?r_typename id) insert(c_func) param ?r_const ?(null <- (r_throw param)) ?(colon id param *(comma id param)) getter_body) | (def_block <- +(r_int | r_char | r_float | r_bool | r_class | r_struct | r_enum | r_union | r_const | r_volatile | op_star | op_amp | index | (id <- op_complement id) | ?r_typename id ) insert(c_func) param ?r_const ?(null <- (r_throw param)) ?(null <- colon id param *(comma id param)) // remove initialization list of constructor (block scan ^)) | (def_block <- r_void id insert(c_func) param ?r_const ?(null <- (r_throw param)) (block scan ^)) ) ) | (block scan ^); // insert tokens for control-flow complexity counter TEXT scan= (r_if | r_switch) insert(c_cond) | (r_for | r_while) insert(c_loop) | (def_block scan ^) | (block scan ^); // recurse into block TEXT scan= (id | r_int | r_char | r_float | r_bool) id (param scan ^) *(comma id ?(param scan ^)) | id insert(c_func) (param scan ^) | (def_block scan ^) | (block scan ^) | (simple_statement scan ^) | (param scan ^) | (index scan ^); // recurse into block, simple_statement, param, index """ % (locals()) self.pat = easytorq.Pattern(patternStr) fmt = easytorq.CngFormatter() # parameter by default fmt.addreplace('id', 'id|%s') # non parameter by default fmt.addreplace('l_bool', 'l_bool|%s') fmt.addreplace('l_char', 'l_char|%s') fmt.addreplace('l_int', 'l_int|%s') fmt.addreplace('l_float', 'l_float|%s') fmt.addreplace('l_string', 'l_string|%s') fmt.addflatten('block') fmt.addreplace('LB', '(brace') fmt.addreplace('RB', ')brace') fmt.addflatten('word') fmt.addflatten('param') fmt.addreplace('LP', '(paren') fmt.addreplace('RP', ')paren') fmt.addflatten('index') fmt.addreplace('LK', '(braket') fmt.addreplace('RK', ')braket') fmt.addterminate('macro_line') fmt.addflatten('simple_statement') fmt.addreplace('semicolon', 'suffix:semicolon') fmt.addreplace('colon', 'suffix:colon') fmt.addformat('def_block', '(def_block', ')def_block') fmt.addflatten('value_list') self.fmt = fmt
def setoptions(self, optionStr): if optionStr not in (None, '', 'default'): raise pp.InvalidOptionError, "invalid option: " + optionStr patternStr = """TEXT scan= ( chars <- +("&(a-z);" | "&(A-Z);" | "&(0-9);") ) | ( space <- +( "&#x(0-20);" | "" | "&#x(80-a0);" | "&#x(2000-200f);" | " " | eol ) ) | ( punct <- "&#x(21-2f);" | "&#x(3a-3f);" | "&#x(5b-5f);" | "&#x(7b-7e);" | "&#x(a1-bf);" | "&#x(2010-205f);" | "&#x(20a0-20b5);" | "&#x(2190-21ff);" | "&#x(2200-22ff);" | "&#x(2300-23db);" | "&#x(2400-2426);" | "&#x(2440-244a);" | "&#x(2600-26b1);" | "&#x(2701-27be);" | "&#x(2a00-2aff);" | "&#x(27c0-27ef);" | "&#x(27f0-27ff);" | "&#x(2900-297f);" | "&#x(2980-29ff);" | "&#x(2b00-2b13);" | "&#x(2500-257f);" | "&#x(2580-259f);" | "&#x(25a0-25ff);" | "&#x(2e00-2e17);" | "&#x(3001-303f);" | "&#x(4dc0-4dff);" | "&#x(fe10-fe19);" | "&#x(ff01-ff0f);" | "&#x(ff01-ff0f);" | "&#x(ff1a-ff1f);" | "&#x(ff3b-ff3f);" | "&#x(ff5b-ff65);" | "&#x(ffe0-ffee);" | "&#x(1d300-1d356);" ) | ( chars <- xcep(eof) any ); TEXT scan= (null <- space) | (word <- +chars); """ self.pat = easytorq.Pattern(patternStr) fmt = easytorq.CngFormatter() fmt.addreplace('punct', 't/%s') fmt.addreplace('word', 't/%s') self.fmt = fmt
def setoptions(self, optionStr): optionStr = self.tonormalizedoptionstring(optionStr) if optionStr == 'default': optionValues = self.__defaultOptionValues else: optionValues = self.optionValues.copy() if optionStr != None and optionStr != '': for i in range(0, len(optionStr), 2): name = optionStr[i] value = optionStr[i + 1] assert name in optionValues optionValues[name] = self.__tov(value) self.optionValues = optionValues if optionValues['k']: switch_statement_rule = """ | r_case +(xcep(colon | semicolon | eof) any) colon | r_default colon | r_switch (block scan ^) """ simple_statement_removal_rule = "" else: switch_statement_rule = """ | +((r_case (id | l_bool | l_char | l_int) | r_default) colon) ((block scan ^) ?(null <- r_break semicolon) | (block <- (insert(LB) *(xcep(r_break | r_case | r_default) ^) insert(RB))) ?(null <- r_break semicolon)) // enclose each case clause by block | r_switch (block scan ^) """ simple_statement_removal_rule = """ TEXT scan= (null <- simple_statement) | r_namespace id (block scan ^) | (r_class | r_struct) id (null <- ?(colon *(xcep(semicolon | block) any))) (block scan ^); // recurse into top level of class definition """ patternStr = """TEXT scan= preq("&(a-z);") ( (r_abstract <- "abstract") (r_alias <- "alias") | (r_as <- "as") // | (r_base <- "base") // keyword "base" is treated as an identifier | (r_bool <- "bool") | (r_break <- "break") | (r_byte <- "byte") | (r_case <- "case") | (r_catch <- "catch") | (r_char <- "char") | (r_checked <- "checked") | (r_class <- "class") | (r_const <- "const") | (r_continue <- "continue") | (r_decimal <- "decimal") | (r_default <- "default") | (r_delegate <- "delegate") | (r_double <- "double") | (r_do <- "do") | (r_else <- "else") | (r_enum <- "enum") | (r_event <- "event") | (r_explicit <- "explicit") | (r_extern <- "extern") | (r_false <- "false") | (r_finally <- "finally") | (r_fixed <- "fixed") | (r_float <- "float") | (r_foreach <- "foreach") | (r_for <- "for") | (r_get <- "get") | (r_goto <- "goto") | (r_if <- "if") | (r_implicit <- "implicit") | (r_interface <- "interface") | (r_internal <- "internal") | (r_int <- "int") | (r_in <- "in") | (r_is <- "is") | (r_lock <- "lock") | (r_long <- "long") | (r_namespace <- "namespace") | (r_new <- "new") | (r_null <- "null") //| (r_object <- "object") // keyword "object" is treated as an identifier | (r_operator <- "operator") | (r_out <- "out") | (r_override <- "override") | (r_params <- "params") | (r_partial <- "partial") | (r_private <- "private") | (r_protected <- "protected") | (r_public <- "public") | (r_readonly <- "readonly") | (r_ref <- "ref") | (r_return <- "return") | (r_sbyte <- "sbyte") | (r_sealed <- "sealed") | (r_set <- "set") | (r_short <- "short") | (r_sizeof <- "sizeof") | (r_stackalloc <- "stackalloc") | (r_static <- "static") | (r_string <- "string") | (r_struct <- "struct") | (r_switch <- "switch") // | (r_this <- "this") // keyword "this" is treated as an identifier | (r_throw <- "throw") | (r_true <- "true") | (r_try <- "try") | (r_typeof <- "typeof") | (r_uint <- "uint") | (r_ulong <- "ulong") | (r_unchecked <- "unchecked") | (r_unsafe <- "unsafe") | (r_ushort <- "ushort") | (r_using <- "using") // | (r_value <- "value") // keyword "value" is treated as an identifier | (r_virtual <- "virtual") | (r_void <- "void") | (r_volatile <- "volatile") | (r_while <- "while") | (r_yield <- "yield") ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);") | preq("&(A-Z);") ( (m_Clone <- "Clone") | (m_CompareTo <- "CompareTo") | (m_Dispose <- "Dispose") | (m_Equals <- "Equals") | (m_GetHashCode <- "GetHashCode") | (m_GetType <- "GetType") | (m_InitializeComponent <- "InitializeComponent") | (m_Nullable <- ?"System." "Nullable") | (m_ReferenceEquals <- "ReferenceEquals") | (m_ToString <- "ToString") | (r_object <- "System.Object" | "Object") | (r_string <- "System.String" | "String") | (r_char <- "System.Char" | "Char") | (r_sbyte <- "System.SByte" | "SByte") | (r_short <- "System.Int16" | "Int16") | (r_ushort <- "System.UInt16" | "UInt16") | (r_int <- "System.Int32" | "Int32") | (r_uint <- "System.UInt32" | "UInt32") | (r_long <- "System.Int64" | "Int64") | (r_ulong <- "System.UInt64" | "UInt64") | (r_float <- "System.Single" | "Single") | (r_double <- "System.Double" | "Double") | (r_bool <- "System.Boolean" | "Boolean") | (r_decimal <- "System.Decimal" | "Decimal") ) xcep("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);") | (word <- ?"@" ("&(a-z);" | "&(A-Z);" | "_") *("&(a-z);" | "&(A-Z);" | "_" | "&(0-9);")) | (multiline_comment <- "/*" *(+"*" (xcep("/") any) | xcep("*") any) +"*" "/") | (singleline_comment <- "//" *(xcep(eol) any)) | (l_string <- "@" """ *(""" """ | xcep(""" | eof) any) """) // berbatim string | (l_string <- """ *("&bslash;" any | xcep(""" | eol) any) """) | (l_char <- "&squot;" *("&bslash;" any | xcep("&squot;" | """ | eol) any) "&squot;") | (l_float <- ( +"&(0-9);" "." *"&(0-9);" ?(("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "F" | "d" | "D" | "m" | "M") | +"&(0-9);" ("e" | "E") ?("-" | "+") +"&(0-9);") ?("f" | "F" | "d" | "D" | "m" | "M") | +"&(0-9);" ("f" | "F" | "d" | "D" | "m" | "M") ) | (l_int <- (("0x" | "0X") +("&(0-9);" | "&(a-f);" | "&(A-F);") | +"&(0-9);") *("l" | "L" | "u" | "U")) | (macro_line <- "#" *(xcep(eof | eol) any)) | (semicolon <- ";") | (comma <- ",") | (LB <- "{") | (RB <- "}") | (LP <- "(") | (RP <- ")") | (LK <- "[") | (RK <- "]") // 3 char operators | (op_lshift_assign <- "<<=") | (op_rshift_assign <- ">>=") // 2 char operators | (op_lshift <- "<<") | (op_rshift <- ">>") | (op_increment <- "++") | (op_decrement <- "--") | (op_le <- "<=") | (op_ge <- ">=") | (op_eq <- "==") | (op_ne <- "!=") | (op_add_assign <- "+=") | (op_sub_assign <- "-=") | (op_mul_assign <- "*=") | (op_div_assign <- "/=") | (op_mod_assign <- "%%=") | (op_and_assign <- "&" "=") | (op_xor_assign <- "^=") | (op_or_assign <- "|=") | (op_logical_and <- "&" "&") | (op_logical_or <- "||") | (op_lambda <- "=>") | (op_namespace_alias_resolution <- "::") // single char operators | (op_star <- "*") // may mean mul or wildcard | (op_div <- "/") | (op_mod <- "%%") | (op_plus <- "+") // may mean add or sign plus | (op_minus <- "-") // may mean sub or sign minus | (op_amp <- "&") | (op_logical_neg <- "!") | (op_complement <- "~") | (op_or <- "|") | (op_xor <- "^") | (op_assign <- "=") | (OL <- "<") // may mean less than or template parameter | (OG <- ">") // may mean greater than or template parameter | (ques <- "?") | (colon <- ":") | (dot <- "."); TEXT scan= null <- macro_line | multiline_comment | singleline_comment | " " | "&t;" | eol; TEXT scan= null <- (?r_extern r_alias | r_event | r_delegate xcep(LP | LB | semicolon) any | r_using xcep(LP | semicolon) any) *(xcep(semicolon) any) semicolon; TEXT match= (null <- +(attribute <- (LK *(xcep(eof | RK) any) RK))) *any | *any; TEXT scan= (semicolon | RB) (null <- +(attribute <- (LK *(xcep(eof | RK) any) RK))); // remove attribute TEXT scan= (r_byte <- r_sbyte) | (r_int <- r_uint | r_short | r_ushort | r_long | r_ulong) | (r_double <- r_float) | (l_bool <- r_true | r_false); TEXT scan= xcep(LB | RB | LP | RP) any | (get_set_decl <- LB r_get semicolon ?(r_set semicolon) RB | LB r_set semicolon RB) | (block <- LB *^ RB) | (param <- LP *^ RP) | (index <- LK *^ RK); // remove generated code TEXT scan= (null <- r_void m_InitializeComponent (param match LP RP) block) | (null <- r_void m_Dispose (param match LP r_bool (word match "disposing") RP) (block match LB r_if (param match LP (word match "disposing") RP) (block match LB r_if (param match LP (word match "components") op_ne r_null RP) (block match LB (word match "components") dot m_Dispose (param match LP RP) semicolon RB) RB) (word match "base") dot m_Dispose (param match LP (word match "disposing") RP) semicolon RB) ) | (block scan ^); // recurse into block TEXT scan= xcep(OL | OG | block | param) any | (template_param <- OL *^ OG) | (block scan ^) | (param scan ^); // recurse into block and param TEXT scan= ?(null <- (word match "this") dot) (id <- word *((dot | op_namespace_alias_resolution) word) ?template_param ?ques) | (id <- (word match "this")) | (string_litral <- l_string +(op_plus l_string)) | (block scan ^) | (param scan ^); // recurse into block and param TEXT scan= (null <- m_Nullable OL) id (null <- OG) | ques insert(c_cond) // insert tokens for control-flow complexity counter | (block scan ^) | (param scan ^); // recurse into block and param // remove simple delegations; remove empty method definition; remove redundant paren of return statement TEXT scan= (null <- (r_void | r_int | r_long | r_short | r_double | r_float | r_bool | r_char | r_byte | r_decimal | r_string | r_object | id) *(index match LK RK) id param ((block match LB ?r_return id dot id param semicolon RB) | (block match LB RB))) | r_return (param match (null <- LP) *(xcep(RP) any) (null <- RP)) semicolon | op_assign (initialization_block <- preq(block)) (null <- block) semicolon | index (initialization_block <- preq(block)) (null <- block) | (null <- r_enum id ?(colon any) (initialization_block <- preq(block)) (null <- block)) | (null <- r_private | r_public | r_protected | r_internal | r_override | r_virtual | r_sealed | r_unsafe | r_static | r_partial) | (null <- r_get) (null <- (block match LB r_return (id | l_string | l_char | l_int | l_float | l_bool) semicolon RB)) // simple getter | (null <- r_set) (null <- (block match LB id op_assign id semicolon RB)) // simple setter | (null <- r_interface id ?(colon id *(comma id)) block) | (block scan ^) | (param scan ^); // recurse into block and param TEXT scan= r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) *(r_else r_if param ((block scan ^) | (block <- insert(LB) ^ insert(RB)))) ?(r_else ((block scan ^) | (block <- insert(LB) ^ insert(RB)))) | r_else ((block scan ^) | (block <- insert(LB) ^ insert(RB))) | r_while param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) | (r_for | r_foreach) param ((block scan ^) | (block <- insert(LB) ^ insert(RB))) | r_do ((block scan ^)| (block <- insert(LB) ^ insert(RB))) r_while param semicolon | r_try (block scan ^) *((r_catch ?param | r_finally) (block scan ^)) | (r_catch ?param | r_finally) (block scan ^) %(switch_statement_rule)s | *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_foreach | r_do | r_try | r_catch | r_finally | r_switch) any) semicolon | (block scan ^); // recurse into block TEXT scan= r_if param block *(r_else r_if param block) ?(r_else block) | r_else block | r_while param block | (r_for | r_foreach) param block | r_do block r_while param semicolon | r_try block *((r_catch ?param | r_finally) block) | (r_catch ?param | r_finally) block | r_switch param block | +(r_using param) block | r_delegate ?param block | (simple_statement <- *(xcep(block | LB | semicolon | r_if | r_while | r_for | r_foreach | r_do | r_try | r_catch | r_finally | r_switch | r_case | r_default | r_using | r_delegate) any) semicolon) | (null <- (r_int | r_long | r_short | r_double | r_float | r_bool | r_char | r_byte | r_decimal | r_string | r_object | id) *(index match LK RK) id (block match LB RB)) // property without any getter/setter | (block scan ^); // recurse into block %(simple_statement_removal_rule)s // enclose class/method definition by block TEXT scan= (def_block <- (r_class | r_struct) id (block scan ^)) | (def_block <- (r_void | r_int | r_long | r_short | r_double | r_float | r_bool | r_char | r_byte | r_decimal | r_string | r_object | id) ( (id param (block scan ^)) | (id (block scan ^)) ) ) | (def_block <- (r_get | r_set) (block scan ^)) | (block scan ^); // insert tokens for control-flow complexity counter TEXT scan= (r_if | r_switch) insert(c_cond) | (r_for | r_while | r_foreach) insert(c_loop) | id insert(c_func) (param scan ^) | (r_get | r_set) insert(c_func) (block scan ^) | (def_block scan ^) | (block scan ^) | (param scan ^) | (index scan ^) | (simple_statement scan ^); """ % (locals()) self.pat = easytorq.Pattern(patternStr) fmt = easytorq.CngFormatter() # parameter by default fmt.addreplace('id', 'id|%s') # non parameter by default fmt.addreplace('l_bool', 'l_bool|%s') fmt.addreplace('l_char', 'l_char|%s') fmt.addreplace('l_int', 'l_int|%s') fmt.addreplace('l_float', 'l_float|%s') fmt.addreplace('l_string', 'l_string|%s') fmt.addflatten('block') fmt.addreplace('LB', '(brace') fmt.addreplace('RB', ')brace') fmt.addflatten('word') fmt.addflatten('param') fmt.addreplace('LP', '(paren') fmt.addreplace('RP', ')paren') fmt.addflatten('index') fmt.addreplace('LK', '(braket') fmt.addreplace('RK', ')braket') fmt.addflatten('simple_statement') fmt.addreplace('semicolon', 'suffix:semicolon') fmt.addreplace('colon', 'suffix:colon') fmt.addformat('def_block', '(def_block', ')def_block') self.fmt = fmt