def process_number_literal(possible_number): if is_number(possible_number) and possible_number not in tabs: parts_of_number = [] if possible_number.startswith('-'): parts_of_number.append('-') possible_number = possible_number[1:] if possible_number.startswith("0x"): parts_of_number.append(HexStart()) possible_number = possible_number[2:] hex = True else: hex = False for ch in possible_number: if ch == '.': parts_of_number.append(DecimalPoint()) elif ch == 'l' or ch == 'L': parts_of_number.append(L()) elif (ch == 'f' or ch == 'F') and not hex: parts_of_number.append(F()) elif (ch == 'd' or ch == 'D') and not hex: parts_of_number.append(D()) elif (ch == 'e' or ch == 'E') and not hex: parts_of_number.append(E()) else: parts_of_number.append(ch) return Number(parts_of_number) else: return ParseableToken(possible_number)
def test_split_verbose_log_statement(self): text = ''' logger.info("The value is " + val); ''' actual = spl_verbose([ParseableToken(text)], None) expected = [ '\n', ParseableToken("logger"), '.', ParseableToken("info"), '(', Quote(), ParseableToken("The"), ParseableToken("value"), ParseableToken("is"), Quote(), '+', ParseableToken('val'), ')', ';', '\n', ] self.assertEqual(expected, actual)
def test_split_verbose2(self): text = ''' float[] floats = {}; //floats were removed BigAWESOMEString[] a2y = "abc".doSplit("\\""); ''' actual = spl_verbose([ParseableToken(text)], None) expected = [ '\n', ParseableToken("float"), '[', ']', ParseableToken("floats"), '=', '{', '}', ';', OneLineCommentStart(), ParseableToken("floats"), ParseableToken("were"), ParseableToken("removed"), '\n', ParseableToken("BigAWESOMEString"), '[', ']', ParseableToken("a2y"), '=', Quote(), ParseableToken("abc"), Quote(), '.', ParseableToken("doSplit"), '(', Quote(), Backslash(), Quote(), Quote(), ')', ';', '\n', ] self.assertEqual(expected, actual)
def split_to_key_words_and_identifiers(token, multiline_comments_regex, two_char_regex, one_char_regex, to_drop): if isinstance(token, ParseableToken): raw_result = [] result = [] comment_tokens_separated = re.split(multiline_comments_regex, str(token)) for st in comment_tokens_separated: if re.fullmatch(multiline_comments_regex, st): raw_result.append(st) else: two_char_tokens_separated = re.split(two_char_regex, st) for st in two_char_tokens_separated: if re.fullmatch(two_char_regex, st): raw_result.append(st) else: one_char_token_separated = re.split(one_char_regex, st) raw_result.extend( list( filter( None, itertools.chain.from_iterable([ re.split(to_drop, st) for st in one_char_token_separated ])))) for raw_str in raw_result: if not raw_str in characters: result.append(ParseableToken(raw_str)) elif raw_str == "/*": result.append(MultilineCommentStart()) elif raw_str == "*/": result.append(MultilineCommentEnd()) elif raw_str == "//": result.append(OneLineCommentStart()) elif raw_str == "\"": result.append(Quote()) elif raw_str == "\\": result.append(Backslash()) elif raw_str == "\t": result.append(Tab()) else: result.append(raw_str) return result elif isinstance(token, ProcessableTokenContainer): res = [] for subtoken in token.get_subtokens(): res.extend( split_to_key_words_and_identifiers(subtoken, multiline_comments_regex, two_char_regex, one_char_regex, to_drop)) return res else: return [token]
def replace_4whitespaces_with_tabs(token_list, context): result = [] for token in token_list: if isinstance(token, ParseableToken): split_line = re.split("( {4})", str(token)) result.extend([(Tab() if w == " " * 4 else ParseableToken(w)) for w in split_line]) elif isinstance(token, ProcessableTokenContainer): for subtoken in token.get_subtokens(): result.extend(replace_4whitespaces_with_tabs(subtoken)) else: result.append(token) return result
def test_with_numbers_split(self): token = [StringLiteral([":", ParseableToken("_test_my123GmyClass_")])] actual = simple_split(token, {}) expected = [StringLiteral([":", SplitContainer([ Underscore(), Word.from_("test"), Underscore(), Word.from_("my"), Word.from_("123"), Word.from_("Gmy"), Word.from_("Class"), Underscore() ])])] self.assertEqual(actual, expected)
def from_list(lst): return list(map(lambda x: ParseableToken(x), lst))
def from_string(str): return list(map(lambda x: ParseableToken(x), str.split(" ")))
def from_file(lines): return [ w for line in lines for w in (ParseableToken( line if len(line) > 0 and line[-1] != '\n' else line[:-1]), NewLine()) ]
def test_split_verbose3(self): text = ''' // this code won't compile but the preprocessing still has to be done corrrectly 9a ** abc1 ~-|=?==!=/* gj **/ ''' actual = spl_verbose([ParseableToken(text)], None) expected = [ '\n', OneLineCommentStart(), ParseableToken("this"), ParseableToken("code"), ParseableToken("won"), "'", ParseableToken("t"), ParseableToken("compile"), ParseableToken("but"), ParseableToken("the"), ParseableToken("preprocessing"), ParseableToken("still"), ParseableToken("has"), ParseableToken("to"), ParseableToken("be"), ParseableToken("done"), ParseableToken("corrrectly"), '\n', ParseableToken("9a"), '**', ParseableToken("abc1"), '\n', '~', '-', '|=', '?', '==', '!=', MultilineCommentStart(), ParseableToken("gj"), '*', MultilineCommentEnd(), '\n' ] self.assertEqual(expected, actual)
def test_split_verbose1(self): text = ''' long[] lovely_longs = {/* there should be some longs here*/}; int[] _my_favoRite_ints_ = {/* ints here*/}; ''' actual = spl_verbose([ParseableToken(text)], None) expected = [ '\n', ParseableToken("long"), '[', ']', ParseableToken("lovely_longs"), '=', '{', MultilineCommentStart(), ParseableToken("there"), ParseableToken("should"), ParseableToken("be"), ParseableToken("some"), ParseableToken("longs"), ParseableToken("here"), MultilineCommentEnd(), "}", ';', '\n', ParseableToken("int"), '[', ']', ParseableToken("_my_favoRite_ints_"), '=', '{', MultilineCommentStart(), ParseableToken("ints"), ParseableToken("here"), MultilineCommentEnd(), '}', ';', '\n', ] self.assertEqual(expected, actual)
def test_split_verbose4(self): text = ''' a++a b--b c+=c d-=d e/=e f*=f g%=g h$h i<=i j>=j k@k l^=l m&=m n#n o>>o p<<p q&&q r||r +*!/><\t\n {}[],.-:();&|\\'~%^ /*multi-line MyComment_ *// _operations ''' actual = spl_verbose([ParseableToken(text)], None) expected = [ '\n', ParseableToken('a'), '++', ParseableToken('a'), '\n', ParseableToken('b'), '--', ParseableToken('b'), '\n', ParseableToken('c'), '+=', ParseableToken('c'), '\n', ParseableToken('d'), '-=', ParseableToken('d'), '\n', ParseableToken('e'), '/=', ParseableToken('e'), '\n', ParseableToken('f'), '*=', ParseableToken('f'), '\n', ParseableToken('g'), '%=', ParseableToken('g'), '\n', ParseableToken('h'), '$', ParseableToken('h'), '\n', ParseableToken('i'), '<=', ParseableToken('i'), '\n', ParseableToken('j'), '>=', ParseableToken('j'), '\n', ParseableToken('k'), '@', ParseableToken('k'), '\n', ParseableToken('l'), '^=', ParseableToken('l'), '\n', ParseableToken('m'), '&=', ParseableToken('m'), '\n', ParseableToken('n'), '#', ParseableToken('n'), '\n', ParseableToken('o'), '>>', ParseableToken('o'), '\n', ParseableToken('p'), '<<', ParseableToken('p'), '\n', ParseableToken('q'), '&&', ParseableToken('q'), '\n', ParseableToken('r'), '||', ParseableToken('r'), '\n', '+', '*', '!', '/', '>', '<', Tab(), '\n', '\n', '{', '}', '[', ']', ',', '.', '-', ':', '(', ')', ';', '&', '|', Backslash(), "'", '~', '%', '^', '\n', MultilineCommentStart(), ParseableToken("multi"), '-', ParseableToken("line"), ParseableToken("MyComment_"), '\n', MultilineCommentEnd(), '/', '\n', ParseableToken("_operations"), '\n' ] self.assertEqual(expected, actual)