def which_fix_goes_first(program, fix1, fix2):
    try:
        fix1_location = extract_line_number(' '.join(fix1.split()[1:]))
        fix2_location = extract_line_number(' '.join(fix2.split()[1:]))
    except Exception:
        raise

    if not fix_ids_are_in_program(
            recompose_program(get_lines(program)[fix2_location:]),
            fix2) and fix_ids_are_in_program(
                recompose_program(get_lines(program)[fix1_location:]), fix1):
        return fix1

    if not fix_ids_are_in_program(
            recompose_program(get_lines(program)[fix1_location:]),
            fix1) and fix_ids_are_in_program(
                recompose_program(get_lines(program)[fix2_location:]), fix2):
        return fix2

    if not fix_ids_are_in_program(
            recompose_program(get_lines(program)[fix1_location:]),
            fix1) and not fix_ids_are_in_program(
                recompose_program(get_lines(program)[fix2_location:]), fix2):
        raise CouldNotFindUsesForEitherException

    if fix1_location < fix2_location:
        return fix1
    elif fix2_location < fix1_location:
        return fix2

    prog_lines = get_lines(program)
    id_in_fix1 = None
    id_in_fix2 = None

    for token in fix1.split():
        if '_<id>_' in token:
            assert id_in_fix1 is None, fix1
            id_in_fix1 = token
        elif token == '_<op>_[':
            break

    for token in fix2.split():
        if '_<id>_' in token:
            assert id_in_fix2 is None, fix2
            id_in_fix2 = token
        elif token == '_<op>_[':
            break

    assert id_in_fix1 != id_in_fix2, fix1 + ' & ' + fix2
    assert fix1_location == fix2_location

    for i in range(fix1_location, len(prog_lines)):
        for token in prog_lines[i].split():
            if token == id_in_fix1:
                return fix1
            elif token == id_in_fix2:
                return fix2

    assert False, 'unreachable code'
    raise CouldNotFindUsesForEitherException
Пример #2
0
 def find_and_replace(self, org_prog, corrupted_prog, regex, replacement='', name='', include_kind=False):
     # for pointer mutate
     if regex == '[^)@,#\]] (_<op>_\*)(?! _<number>_)':
         positions = [m.span(1) for m in re.finditer(regex, corrupted_prog)]
     else:
         positions = [m.span() for m in re.finditer(regex, corrupted_prog)]
             
     if len(positions) > 1:
         to_corrupt = np.random.randint(len(positions))
     elif len(positions) == 1:
         to_corrupt = 0
     elif include_kind:
         return corrupted_prog, None, None, None
     else:
         return corrupted_prog, None, None
             
     corrupted_prog = corrupted_prog[:positions[to_corrupt][0]] + replacement + corrupted_prog[positions[to_corrupt][1]:]
     
     fix = isolate_line(org_prog, positions[to_corrupt][0])
     line = extract_line_number(fix)
     
     if include_kind:
         return corrupted_prog, fix, line, name
     else:
         return corrupted_prog, fix, line
Пример #3
0
def meets_criterion(incorrect_program_tokens, fix, name_dict, type_, name_seq=None, silent=True):
    lines = get_lines(incorrect_program_tokens)
    fix = _truncate_fix(fix)

    if _is_stop_signal(fix):
        #print 'is stop signal'
        return False

    try:
        fix_line_number = extract_line_number(fix)
    except Exception:
        #print 'failed to extract line number from fix'
        return False

    if fix_line_number >= len(lines):
        #print 'localization is pointing to line that doesn\'t exist'
        return False

    fix_line = lines[fix_line_number]

    # Make sure number of IDs is the same
    if len(re.findall('_<id>_\w*', fix_line)) != len(re.findall('_<id>_\w*', fix)):
        if not silent:
            print 'number of ids is not the same'
        return False

    keywords_regex = '_<keyword>_\w+|_<type>_\w+|_<APIcall>_\w+|_<include>_\w+'

    if type_ == 'replace' and re.findall(keywords_regex, fix_line) != re.findall(keywords_regex, fix):
        if not silent:
            print 'important words (keywords, etc.) change drastically'
        return False

    return True
Пример #4
0
def meets_criterion(incorrect_program_tokens, fix, type_, silent=True):
    lines = get_lines(incorrect_program_tokens)
    fix = _truncate_fix(fix)

    if _is_stop_signal(fix):
        return False

    try:
        fix_line_number = extract_line_number(fix)
    except FailedToGetLineNumberException:
        return False

    if fix_line_number >= len(lines):
        return False

    fix_line = lines[fix_line_number]

    # Make sure number of IDs is the same
    if len(re.findall('_<id>_\w*', fix_line)) != len(
            re.findall('_<id>_\w*', fix)):
        if not silent:
            print 'number of ids is not the same'
        return False

    keywords_regex = '_<keyword>_\w+|_<type>_\w+|_<APIcall>_\w+|_<include>_\w+'

    if type_ == 'replace' and re.findall(
            keywords_regex, fix_line) != re.findall(keywords_regex, fix):
        if not silent:
            print 'important words (keywords, etc.) change drastically'
        return False

    return True
Пример #5
0
def apply_fix(program, fix, kind='replace', check_literals=False):
    # Break up program string into lines
    lines = get_lines(program)

    # Truncate the fix
    fix = _truncate_fix(fix)

    # Make sure there are two parts
    if len(fix.split('~')) != 2:
        raise InvalidFixLocationException

    # Retrieve insertion location
    try:
        if kind == 'replace':
            fix_location = extract_line_number(fix)
        else:
            assert kind == 'insert'

            if fix.split()[0] != '_<insertion>_':
                print "Warning: First token did not suggest insertion (should not happen)"

            fix_location = extract_line_number(' '.join(fix.split()[1]))
    except FailedToGetLineNumberException:
        raise InvalidFixLocationException

    # Remove line number
    fix = _remove_line_number(fix)

    # Insert the fix
    if kind == 'replace':
        try:
            if lines[fix_location].count('_<id>_') != fix.count('_<id>_'):
                raise SubstitutionFailedException
            if check_literals:
                for lit in ['string', 'char', 'number']:
                    if lines[fix_location].count('_<%s>' % lit) != fix.count('_<%s>_' % lit):
                        raise SubstitutionFailedException

            lines[fix_location] = replace_ids(fix, lines[fix_location])
        except IndexError:
            raise InvalidFixLocationException
    else:
        assert kind == 'insert'
        lines.insert(fix_location+1, fix)

    return recompose_program(lines)
Пример #6
0
def find_and_replace(org_prog, corrupted_prog, regex, replacement, extra_ids, last_id):
    positions = [m.span() for m in re.finditer(regex, corrupted_prog)]
            
    if len(positions) > 1:
        to_corrupt = np.random.randint(len(positions))
    elif len(positions) == 1:
        to_corrupt = 0
    else:
        return corrupted_prog, None, None, extra_ids, last_id
            
    corrupted_prog = corrupted_prog[:positions[to_corrupt][0]] + replacement + corrupted_prog[positions[to_corrupt][1]:]                            
    
    fix = isolate_line(org_prog, positions[to_corrupt][0])
    line = extract_line_number(org_prog, positions[to_corrupt][0])
    
    return corrupted_prog, fix, line, extra_ids, last_id
Пример #7
0
    def find_and_replace(self, corrupted_prog, regex, replacement,
                         mutation_name):
        positions = [m.span() for m in re.finditer(regex, corrupted_prog)]

        if len(positions) > 1:
            to_corrupt = self.rng.randint(len(positions))
        elif len(positions) == 1:
            to_corrupt = 0
        else:
            return corrupted_prog, None, mutation_name

        line_number = extract_line_number(
            isolate_line(corrupted_prog, positions[to_corrupt][0]))

        corrupted_prog = corrupted_prog[:positions[to_corrupt][0]] + \
            replacement + corrupted_prog[positions[to_corrupt][1]:]

        return corrupted_prog, line_number, mutation_name
Пример #8
0
    def find_and_replace(self, org_prog, corrupted_prog, regex, replacement, mutation_name):
                       
        # special handling for pointer mutate
        if regex == '[^)@,#\]] (_<op>_\*)(?! _<number>_)':
            positions = [m.span(1) for m in re.finditer(regex, corrupted_prog)]
        else:
            positions = [m.span() for m in re.finditer(regex, corrupted_prog)]
                
        if len(positions) > 1:
            to_corrupt = self.rng.randint(len(positions))
        elif len(positions) == 1:
            to_corrupt = 0
        else:
            return corrupted_prog, None, mutation_name       
                
        line_number = extract_line_number(isolate_line(corrupted_prog, positions[to_corrupt][0]))

        corrupted_prog = corrupted_prog[:positions[to_corrupt][0]] + replacement + corrupted_prog[positions[to_corrupt][1]:]
                
        return corrupted_prog, line_number, mutation_name
Пример #9
0
    def process(self, source_code_array, max_attempts=6):
        sequences_of_programs = {}
        fixes_suggested_by_network = {}
        entries = []
        entries_ids = []
        errors = {}
        fixes_to_return = {}
        error_messages = {}

        # Wrap it up into a nice box
        for idx, source_code in enumerate(source_code_array):
            program, name_dict, name_sequence, literal_sequence = C_Tokenizer().tokenize(source_code)
            entries.append((idx, program, name_dict, name_sequence, literal_sequence))
            entries_ids.append((idx, program, name_dict, name_sequence, literal_sequence))
            sequences_of_programs[idx] = [program]
            fixes_suggested_by_network[idx] = []
            errors[idx], _ = compilation_errors(source_code)
            error_messages[idx] = []
            fixes_to_return[idx] = []

        network = self.network

        if self.task == 'ids':
            normalize_names = False
            fix_kind = 'insert'
            
        else:
            assert self.task == 'typo'
            normalize_names = True
            fix_kind = 'replace'

        # Reinitialize `entries'
        entries = entries_ids

        try:
            for round_ in range(max_attempts):
                to_delete = []
                input_ = []

                for i, entry in enumerate(entries):
                    idx, program, name_dict, name_sequence, literal_sequence = entry

                    try:
                        program_vector = vectorize(sequences_of_programs[idx][-1], network['in_seq_length'], network['dictionary'], normalize_names=normalize_names, reverse=True, append_eos=False)
                    except VectorizationFailedException:
                        program_vector = None

                    if program_vector is not None:
                        input_.append(program_vector)
                    else:
                        to_delete.append(i)
                        error_messages[idx].append('VectorizationFailed')

                # Delete
                to_delete = sorted(to_delete)[::-1]

                for i in to_delete:
                    del entries[i]

                assert len(input_) == len(entries)

                if len(input_) == 0:
                    break

                # Pass it through the network
                fix_vectors = get_fixes(network['session'], input_, network)
                fixes = []

                # Devectorize them
                for i, fix_vector in enumerate(fix_vectors):
                    idx, _, _, _, _ = entries[i]

                    fix = devectorize(fix_vector, network['dictionary'])
                    fixes_suggested_by_network[idx].append(fix)
                    fixes.append(fix)

                to_delete = []

                # Apply fixes
                for i, entry, fix in zip(range(len(fixes)), entries, fixes):
                    idx, program, name_dict, name_sequence, literal_sequence = entry

                    try:
                        program = sequences_of_programs[idx][-1]
                        program = apply_fix(program, fix, kind=fix_kind, check_literals=True)
                        sequences_of_programs[idx].append(program)
                        regen_source_code = tokens_to_source(program, name_dict, clang_format=True, literal_seq=literal_sequence)
                        this_errors, _ = compilation_errors(regen_source_code)

                        if len(fix.strip().split()) > 0 and len(this_errors) > len(errors[idx]):
                            to_delete.append(i)
                            error_messages[idx].append('ErrorsIncreased')
                        else:
                            errors[idx] = this_errors
                    except IndexError:
                        to_delete.append(i)
                        error_messages[idx].append('IndexError')
                    except VectorizationFailedException as e:
                        to_delete.append(i)
                        error_messages[idx].append('VectorizationFailed')
                    except InvalidFixLocationException:
                        to_delete.append(i)

                        if fix.strip().split()[0] == '_eos_':
                            error_messages[idx].append('OK')
                        else:
                            error_messages[idx].append('InvalidFixLocation')
                    except SubstitutionFailedException:
                        to_delete.append(i)
                        error_messages[idx].append('SubstitutionFailed')
                    else:
                        assert len(fix.strip().split()) == 0 or fix.strip().split()[0] != '_eos_'

                        if fix_kind == 'insert':
                            fix_ = ' '.join(fix.split()[1:])
                            fix_line = extract_line_number(fix_) + 1
                            fixes_to_return[idx].append('%s at line %d: %s' % (fix_kind, fix_line, ''.join(fix_to_source(fix_, program, name_dict, clang_format=True).split('\n'))))
                        else:
                            fix_line = extract_line_number(fix) + 1
                            fixes_to_return[idx].append('%s at line %d: %s' % (fix_kind, fix_line, ''.join(fix_to_source(fix, program, name_dict, name_seq=name_sequence, literal_seq=literal_sequence, clang_format=True).split('\n'))))

                # Delete
                to_delete = sorted(to_delete)[::-1]

                for i in to_delete:
                    del entries[i]

        except KeyError as e:
            pass

        except InvalidFixLocationException:
            pass

        except SubstitutionFailedException:
            pass
        # -----------

        repaired_programs = {}

        for idx in sequences_of_programs:
            repaired_programs[idx] = tokens_to_source(sequences_of_programs[idx][-1], name_dict, clang_format=True, literal_seq=literal_sequence)
            repaired_programs[idx] = repaired_programs[idx].strip()

        return fixes_to_return, repaired_programs, error_messages
Пример #10
0
def apply_fix(program, fix, kind='replace', check_literals=False):
    print "apply_fix passed"
    # Break up program string into lines
    lines = get_lines(program)

    print "*******************"
    print "lines ="
    print lines
    print "*******************"
    print "lines length :", len(lines)
    # Truncate the fix
    fix = _truncate_fix(fix)
    print "*******************"
    print "fix ="
    print fix
    print "*******************"
    print "fix.split('~') :", fix.split('~')
    print "len(fix.split('~')) :", len(fix.split('~'))
    # Make sure there are two parts
    if len(fix.split('~')) != 2:
        print "InvalidFixLocationExeption"
        print "can not split 2 part"
        raise InvalidFixLocationException
    print "Retrieve insertion location"
    # Retrieve insertion location
    try:
        print "if replace 1"
        if kind == 'replace':
            fix_location = extract_line_number(fix)
            print "kind == replace"
            print "*******************"
            print "fix_location ="
            print fix_location
            print "*******************"
        else:
            assert kind == 'insert'

            if fix.split()[0] != '_<insertion>_':
                print "Warning: First token did not suggest insertion (should not happen)"

            fix_location = extract_line_number(' '.join(fix.split()[1]))
            print "*******************"
            print "fix_location =="
            print fix_location
            print "*******************"
    except FailedToGetLineNumberException:
        raise InvalidFixLocationException
    print "Remove line number"
    # Remove line number
    fix = _remove_line_number(fix)

    print "*******************"
    print "fix ="
    print fix
    print "*******************"
    # Insert the fix
    if kind == 'replace':
        print "if replace 2"
        try:
            check_literals = False  #debug
            if lines[fix_location].count('_<id>_') != fix.count('_<id>_'):
                print "not include original id"
                raise SubstitutionFailedException
            if check_literals:
                print "check literals"
                for lit in ['string', 'char', 'number']:
                    if lines[fix_location].count('_<%s>' % lit) != fix.count(
                            '_<%s>_' % lit):
                        print "not include original literal"
                        raise SubstitutionFailedException

            lines[fix_location] = replace_ids(fix, lines[fix_location])
        except IndexError:
            print "InvalidFixLocationException"
            raise InvalidFixLocationException
    else:
        assert kind == 'insert'
        lines.insert(fix_location + 1, fix)
    print "apply_fix end"
    return recompose_program(lines)