def build_test_example():
    example_id = 'test-id-0'
    label = 1
    comment_type = 'Return'
    old_comment_raw = '@return the highest score'
    old_comment_subtokens = subtokenize_comment(old_comment_raw).split()
    new_comment_raw = '@return the lowest score'
    new_comment_subtokens = subtokenize_comment(new_comment_raw).split()
    span_minimal_diff_comment_subtokens, _, _ = compute_minimal_comment_diffs(
        old_comment_subtokens, new_comment_subtokens)
    old_code_raw = 'public int getBestScore()\n{\n\treturn Collections.max(scores);\n}'
    old_code_subtokens = subtokenize_code(old_code_raw).split()
    new_code_raw = 'public int getBestScore()\n{\n\treturn Collections.min(scores);\n}'
    new_code_subtokens = subtokenize_code(new_code_raw).split()
    span_diff_code_subtokens, token_diff_code_subtokens, _ = compute_code_diffs(
        old_code_subtokens, new_code_subtokens)

    # TODO: Add code for parsing ASTs
    old_ast = None
    new_ast = None
    diff_ast = None

    return DiffASTExample(example_id, label, comment_type, old_comment_raw,
                          old_comment_subtokens, new_comment_raw,
                          new_comment_subtokens,
                          span_minimal_diff_comment_subtokens, old_code_raw,
                          old_code_subtokens, new_code_raw, new_code_subtokens,
                          span_diff_code_subtokens, token_diff_code_subtokens,
                          old_ast, new_ast, diff_ast)
old_nl_subtokens, old_nl_subtoken_labels, old_nl_subtoken_indices = subtokenize_comment(
    old_nl)
old_code_subtokens, old_code_subtoken_labels, old_code_subtoken_indices = subtokenize_code(
    old_code)

new_nl_subtokens, new_nl_subtoken_labels, new_nl_subtoken_indices = subtokenize_comment(
    new_nl)
new_code_subtokens, new_code_subtoken_labels, new_code_subtoken_indices = subtokenize_code(
    new_code)

span_diff_tokens, span_diff_labels, span_diff_indices = compute_code_diff_spans(
    old_code_subtokens, old_code_subtoken_labels, old_code_subtoken_indices,
    new_code_subtokens, new_code_subtoken_labels, new_code_subtoken_indices)

_, token_diff_tokens, _ = compute_code_diffs(old_code_subtokens,
                                             new_code_subtokens)

comment_edit_spans, _, _ = compute_minimal_comment_diffs(
    old_nl_subtokens, new_nl_subtokens)

example = DiffExample(id='test_id',
                      old_comment=' '.join(old_nl_subtokens),
                      old_comment_tokens=old_nl_subtokens,
                      new_comment=' '.join(new_nl_subtokens),
                      new_comment_tokens=new_nl_subtokens,
                      old_code=' '.join(old_code_subtokens),
                      old_code_tokens=old_code_subtokens,
                      new_code=' '.join(new_code_subtokens),
                      new_code_tokens=new_code_subtokens,
                      span_diff_code=' '.join(span_diff_tokens),
                      span_diff_code_tokens=span_diff_tokens,
            diff_subtoken_map.append([DELETE_END])
    
    assert len(diff_labels) == len(diff_subtokens)
    assert len(diff_indices) == len(diff_subtokens)
    assert len(diff_subtoken_map) == len(diff_subtokens)
    assert len(diff_token_map) == len(diff_tokens)
    return diff_labels, diff_indices, diff_token_map, diff_subtoken_map

if __name__ == "__main__":
    # Demo for extracting tokenization features for one example
    # Corresponds to what is written in tokenization_features.json files
    ex = build_test_example()

    old_code_tokens = tokenize_clean_code(ex.old_code_raw).split()
    new_code_tokens = tokenize_clean_code(ex.new_code_raw).split()
    span_diff_code_tokens, _, _ = compute_code_diffs(old_code_tokens, new_code_tokens)

    edit_span_subtoken_labels, edit_span_subtoken_indices, edit_span_token_map, edit_span_subtoken_map = get_diff_subtoken_labels(
        ex.span_diff_code_subtokens, ex.old_code_subtokens, old_code_tokens, ex.new_code_subtokens, new_code_tokens,
        span_diff_code_tokens, ex.old_code_raw, ex.new_code_raw)
        
    old_comment_tokens = tokenize_comment(ex.old_comment_raw).split()

    prefix = []
    if ex.comment_type == 'Return':
        prefix = ['@return']
    elif ex.comment_type == 'Param':
        prefix = ['@param']
    
    old_nl_subtoken_labels, old_nl_subtoken_indices, old_nl_token_map, old_nl_subtoken_map = get_subtoken_labels(
        prefix + ex.old_comment_subtokens, prefix + old_comment_tokens, parse_comment=True)