Пример #1
0
def get_cores(line_code):
    line_code = round_trip(line_code)
    try:
        tokens = tokenize(line_code)
        result = parse_func_header(tokens)
        return []
    except:
        result = decompose_line(line_code)
        cores = []
        for key in result:
            if 'stmt' in key:
                stmt, depth = result[key]
                stmt = tokenize(stmt)
                segment_info = parse_chunk(stmt)
                cores += segment_info['nodes']
        return cores
Пример #2
0
def extract_extra_braces(c):
    tokens = filter_space(tokenize(c, new_line=False))
    extra_close_curly, extra_open_curly, goto_stmt = False, False, False
    if len(tokens) >= 1 and tokens[-1].kind == 'operator' and tokens[-1].value == ':':
        goto_stmt = True
    elif len(tokens) >= 2 and tokens[-2].kind == 'operator' and tokens[-2].value == ':' \
        and (tokens[-1].kind == 'semicolon' and tokens[-1].value == ';'):
        goto_stmt = True
    if goto_stmt:
        return extra_close_curly, extra_open_curly, goto_stmt

    stack = []
    num_tokens = len(tokens)
    for idx, t in enumerate(tokens):
        kind = t.kind
        if kind == 'open_curly':
            stack.append((idx, kind))
        elif kind == 'close_curly':
            if len(stack) != 0 and stack[-1][1] == 'open_curly':
                del stack[-1]
            else:
                stack.append((idx, kind))

    for idx, kind in stack:
        if (kind == 'close_curly' and idx != 0) or (kind == 'open_curly' and idx != num_tokens - 1):
            return None
        if kind == 'close_curly':
            extra_close_curly = True
        if kind == 'open_curly':
            extra_open_curly = True
    return extra_close_curly, extra_open_curly, goto_stmt
def parse_gen(code, grammar):
    tokens = filter_space(tokenize(round_trip(code), new_line=False))
    result = OrderedDict()
    result['code'] = code
    result['start_w_close_curly'] = start_w_close_curly(tokens)
    result['end_w_open_curly'] = end_w_open_curly(tokens)
    cur_idx = 0
    if tokens[-1].value == ';':
        tokens = tokens[:-1]
    for atom in grammar:
        if cur_idx >= len(tokens):
            next_idx, content = None, None
        else:
            next_idx, content = atom.f(cur_idx, tokens)
        if next_idx is None and not atom.optional:
            raise parse_single_line_exception(
                'Error parsing atom %s, which is required' % atom.name)
        result[atom.name] = content
        if next_idx is not None:
            cur_idx = next_idx
    if 'stmt' in result:
        result['new_scope'] = result['stmt'] is None
    else:
        result['new_scope'] = False
    return result
Пример #4
0
def to_onmt(s):
    values = [
        plain2onmt_tok(t.value, t.kind) for t in tokenize(s, new_line=False)
        if t.kind != 'whitespace'
    ]
    result = ' '.join([v for v in values if v is not None])
    return result
def return_result_for_line_marker(code):
    tokens = filter_space(tokenize(round_trip(code), new_line=False))
    result = OrderedDict()
    result['code'] = code
    result['start_w_close_curly'] = start_w_close_curly(tokens)
    result['end_w_open_curly'] = end_w_open_curly(tokens)
    result['new_scope'] = result['end_w_open_curly']
    result['line_type'] = 'marker'
    return result
Пример #6
0
def annotate_type(psu, table):
    values = [plain2onmt_tok(t.value, t.kind) for t in tokenize(psu, new_line=False)
              if t.kind != 'whitespace']
    def get_type_from_table(v):
        if v not in table:
            return 'None'
        else:
            return str(table[v][0]).replace(' ', '_').replace('\'', '').replace('\"', '')
    result = ' '.join([(v + '│' + get_type_from_table(v)) for v in values])
    return result
Пример #7
0
def type_line(line_code):
    line_code = round_trip(line_code)
    atoms_declared, atoms_used, prototype = {}, {}, None
    forest = []
    try:
        try:
            tokens = tokenize(line_code, new_line=False)
            result = parse_func_header(tokens)
            line_type = 'prototype' if result['is_prototype'] else 'function'
            prototype = adddepth2func(result, atoms_declared)
            sw_close_curly, ew_open_curly = start_w_close_curly(
                tokens), end_w_open_curly(tokens)
        except ParseChunkError:
            result = decompose_line(line_code)
            if result is None:
                return None
            line_type = result['line_type']
            sw_close_curly, ew_open_curly = result[
                'start_w_close_curly'], result['end_w_open_curly']
            for key in result:
                if 'stmt' in key:
                    stmt, depth = result[key]
                    stmt = tokenize(stmt, new_line=False)
                    segment_info = parse_chunk(stmt)
                    addvar_decl2line(segment_info, atoms_declared, depth)
                    parse_var_used(segment_info['nodes'], atoms_used, depth)
                    forest += segment_info['nodes']

        return {
            # used for consider scope
            'line_type': line_type,
            'start_w_close_curly': sw_close_curly,
            'end_w_open_curly': ew_open_curly,
            'line_complete': len(line_code) > 0
            and line_code[-1] in ('}', ';'),
            'atoms_declared': atoms_declared,
            'atoms_used': atoms_used,
            'prototype': prototype,
            'forest': forest,
            'code': line_code
        }
    except ParseChunkError:
        return None
Пример #8
0
def braces_acceptable(program_str):
    tokens = filter_space(tokenize(program_str, new_line=False))
    counter = 0
    for t in tokens:
        if t.kind == 'open_curly':
            counter += 1
        elif t.kind == 'close_curly':
            counter -= 1
        if counter < 0:
            return False
    return counter == 0
def split_by_semicolon(code):
    tokens = filter_space(tokenize(code, new_line=False))
    semi_idxes = [
        idx for idx, token in enumerate(tokens) if token.kind == "semicolon"
    ]
    semi_idxes = [-1] + semi_idxes + [len(tokens)]
    segments = [
        join_tokens(tokens[semi_idxes[i] + 1:semi_idxes[i + 1]])
        for i in range(len(semi_idxes) - 1)
    ]
    return segments
def return_result_for_trivial_code(code):
    tokens = filter_space(tokenize(round_trip(code), new_line=False))
    result = OrderedDict()
    result['code'] = code
    result['start_w_close_curly'] = start_w_close_curly(tokens)
    result['end_w_open_curly'] = end_w_open_curly(tokens)
    result['new_scope'] = result['end_w_open_curly']
    if code == '{':
        result['line_type'] = 'open_curly_only'
    elif code == '}':
        result['line_type'] = 'close_curly_only'
    elif code == '':
        result['line_type'] = 'empty'
    elif code == ';':
        result['line_type'] = 'line'
    return result
def parse_dowhile(code):
    while_idx = code.index('while')
    do_part, while_part = code[:while_idx], code[while_idx:]
    do_result = parse_doline(do_part)
    while_result = parse_whileline(while_part)
    result = OrderedDict()
    for key in do_result:
        result[key] = do_result[key]
    for key in while_result:
        result[key] = while_result[key]

    tokens = filter_space(tokenize(round_trip(code), new_line=False))
    result['code'] = code
    result['start_w_close_curly'] = start_w_close_curly(tokens)
    result['end_w_open_curly'] = end_w_open_curly(tokens)
    result['new_scope'] = False
    return result
def line_well_formed_brace(code):
    tokens = filter_space(tokenize(code, new_line=False))
    start, end = 0, len(tokens)
    if len(tokens) == 0:
        return True
    if tokens[0].kind == 'close_curly':
        start += 1
    if tokens[-1].kind == 'open_curly':
        end -= 1
    count = 0
    for token in tokens[start:end]:
        if token.kind == 'open_curly':
            count += 1
        elif token.kind == 'close_curly':
            count -= 1
        if count < 0:
            return False
    if count != 0:
        return False
    return True
Пример #13
0
 def __init__(self, program):
     self.raw_lines = program.split('\n')
     self.tokens_by_line = [
         tokenize(raw_line) for raw_line in self.raw_lines
     ]
     self.line_ids = list(
         chain(*[[line_num] * len(tokens)
                 for line_num, tokens in enumerate(self.tokens_by_line)]))
     self.all_tokens = list(chain(*self.tokens_by_line))
     self.lc2token_idx = dict([
         ((line_id + 1, token.offset + 1), token_idx)
         for token_idx, (
             line_id,
             token) in enumerate(zip(self.line_ids, self.all_tokens))
     ])
     for idx, token in enumerate(self.all_tokens):
         token.set_pid(idx)
         token.set_program(self)
         token.set_lid(self.line_ids[idx])
     self.start_idx = self.lc2token_idx[(1, 1)]
     self.covered = [False for _ in range(len(self.all_tokens))]
Пример #14
0
 def __init__(self, code: str):
     self._toks = list(tokenize(code))
def has_key_word(code, kword):
    tokens = tokenize(code)
    for t in tokens:
        if t.value == kword and t.kind != 'in-quote':
            return True
    return False