def format_program_vector(self, program_vector): lines = split_list(self.filter_program_vector(program_vector), delimiter=self.new_line, keep_delimiter=True) output = [] for i, line in enumerate(lines): output += [ '[%-2d, %-3d]' % (i, len(line)) + ', '.join(map(str, line)) ] return '\n'.join(output)
def get_errors_from_ground_truth(self, program_vector, original_program_vector): programA_lines = split_list(self.filter_program_vector(program_vector), delimiter=self.new_line, keep_delimiter=True) programB_lines = split_list( self.filter_program_vector(original_program_vector), delimiter=self.new_line, keep_delimiter=True) if len(programA_lines) != len(programB_lines): raise ValueError('Program are of different lengths: %d and %d!' % (len(programA_lines), len(programB_lines))) faulty_lines = [] for index, (lineA, lineB) in enumerate(zip(programA_lines, programB_lines)): if lineA != lineB: edit_count = self.fix_line(copy.deepcopy(lineA), copy.deepcopy(lineB)) for _ in range(edit_count): faulty_lines.append(index) return faulty_lines
def cursor_of_line(self, program, line_number, position='init'): program_line_count = self.get_line_count(program) assert line_number >= 0 assert line_number <= program_line_count if line_number == 0 and position == 'init': # this is the first line of the program return 0 if line_number == program_line_count and position == 'end': # this is the last line of the program cursor = len(program) - 1 assert program[cursor] == self.new_line return cursor lines = split_list(program, self.new_line) current_cursor = 0 for current_line_number, line in enumerate(lines): if current_line_number < line_number: current_cursor += len(line) elif current_line_number == line_number: if position == 'init': return current_cursor elif position == 'end': return current_cursor + len(line) - 1 else: raise ValueError( 'position could be either "init" or "end", not "%s"' % position)
def localize_error(self, programA, programB): '''programA is the mutated program while programB is the original one!''' assert type(programA) == list and type( programB) == list, 'types:{}, {}, programs:\n{}\n{}'.format( type(programA), type(programB), programA, programB) assert type(self.new_line) == int programA = self.filter_program_vector(programA) programB = self.filter_program_vector(programB) programA_lines = split_list(programA, delimiter=self.new_line, keep_delimiter=True) programB_lines = split_list(programB, delimiter=self.new_line, keep_delimiter=True) if len(programA_lines) != len(programB_lines): raise ValueError('Program are of different lengths: %d and %d!' % (len(programA_lines), len(programB_lines))) different_line_num = None for j, (A_line, B_line) in enumerate(zip(programA_lines, programB_lines)): if A_line != B_line: different_line_num = j break error_at = None if different_line_num is not None: error_at = sum( [len(line) for line in programA_lines[:different_line_num]]) for i, (a, b) in enumerate(zip(A_line, B_line)): if a != b: break error_at += i assert error_at is not None assert programA[error_at] == a, '{},{},{},{}'.format( self.format_program_vector(programA), '\na:', a, programA[error_at]) assert programB[error_at] == b, '{},{},{},{}'.format( self.format_program_vector(programB), '\nb:', b, programB[error_at]) fix_action = None if a in self.mutables and b in self.mutables: del_cost = self.fix_line(A_line[i + 1:], B_line[i:], recursive=True) ins_cost = self.fix_line(A_line[i:], B_line[i + 1:], recursive=True) fix_action = 'delete' if del_cost <= ins_cost else 'insert' # b has been deleted from original line if fix_action == 'insert' or (a not in self.mutables and b in self.mutables): fix_action = 'insert' + ' ' + self.rev_tl_dict[b] # 原来这里是self.rev_tl_dict[b][-1] 不知道为什么要这么搞 # a is a duplicated token elif fix_action == 'delete' or (a in self.mutables and b not in self.mutables): fix_action = 'delete' + ' ' + self.rev_tl_dict[a] else: err_str = 'Should not happen:' + self.devectorize( A_line) + '\n' + self.devectorize(B_line) assert False, err_str return different_line_num, error_at, fix_action
def get_line(self, program, line_number): return split_list(program, self.new_line)[line_number]