def test_tokenize_twice(test_examples, keep_comments=False): for i, (x, _) in enumerate(test_examples): tokenized_once = tokenize_cpp(x, keep_comments=keep_comments) tokenized_twice = tokenize_cpp(detokenize_cpp( tokenized_once), keep_comments=keep_comments) if tokenized_once != tokenized_twice: lenght = min(len(tokenized_twice), len(tokenized_once)) char_message = "" for j in range(lenght): if tokenized_twice[j] != tokenized_once[j]: char_message = f"expected token '{tokenized_once[j]}' at index {j} but found '{tokenized_twice[j]}'" if char_message == "": char_message = f"expected length {len(tokenized_once)}, found {len(tokenized_twice)}" raise Exception( f"Expected:\n==========\n{tokenized_once}\nbut found:\n==========\n{tokenized_twice} \n==========\n{char_message}")
def test_detokenize_invertible(test_examples): for i, (x, _) in enumerate(test_examples): x_ = detokenize_cpp(tokenize_cpp(x, keep_comments=False)) if x_.strip() != x.strip(): raise Exception( f"Expected:\n==========\n{x.strip()}\nbut found:\n==========\n{x_.strip()}" )
def test_tokenizer(test_examples, keep_comments): for i, (x, y) in enumerate(test_examples): y_ = tokenize_cpp(x, keep_comments=keep_comments) if y_ != y: line_diff = [j for j, (line, line_) in enumerate( zip(y, y_)) if line != line_] line_diff = line_diff[-1] if len(line_diff) > 0 else -1 raise Exception( f"Difference at {line_diff}\nExpected:\n==========\n{y}\nbut found:\n==========\n{y_}")
def test_detokenize_non_invertible(test_examples): for i, (x, y) in enumerate(test_examples): y_ = detokenize_cpp(tokenize_cpp(x, keep_comments=False)) if y_ != y: lenght = min(len(y_), len(y)) char_message = "" for j in range(lenght): if y_[j] != y[j]: char_message = f"expected character '{y[j]}' at index {j} but found '{y_[j]}'" if char_message == "": char_message = f"expected length {len(y)}, found {len(y_)}" raise Exception( f"Expected:\n==========\n{y}\nbut found:\n==========\n{y_} \n==========\n{char_message}")