def compare_pca_results(): original = CSVReader(pca_mask_folder, "results-original.csv") results = CSVReader(pca_mask_folder, "results-mask.csv") csv_writer = CSVWriter(pca_mask_folder, "results-merge.csv") assert (original.total_lines == results.total_lines) while original.continue_reading: original_line = original.read_line() results_line = results.read_line() coder_name = original_line[3] if coder_name in ["Coder", "CoderBase"]: # assert(original_line == results_line) csv_writer.write_row(parse_line(original_line)) else: if len(original_line[4]) > 0: # algorithm params line original_line, results_line = write_params_line( csv_writer, original_line, results_line) original_line = parse_line(original_line) results_line = parse_mask_line(results_line) # if original_line == results_line: # continue csv_writer.write_row(['O'] + original_line[1:] + ['O']) csv_writer.write_row(['M'] + results_line[1:] + ['M']) diff = [] for i in range(7, len(original_line)): original_value, results_value = original_line[i], results_line[ i] if i % 2 == 0: # floats (percentages) # diff_value = results_value + ' - ' + original_value diff_value = float(results_value) - float(original_value) diff_value = diff_scenarios(diff_value) else: # integers (bits) if original_value == results_value: diff_value = '===' else: diff_value = c_int(results_value) - c_int( original_value) diff_value = '+++' + str( diff_value) if diff_value > 0 else '---' + str( diff_value) diff.append(diff_value) diff_line = original_line[:7] + diff csv_writer.write_row(diff_line) original.close() results.close() csv_writer.close()
def read_data(input_path, input_filename, column_index): data = [] data = np.array(data) csv_file = CSVReader(input_path, input_filename) csv_file.goto_row(3) column_name = csv_file.read_line()[column_index] csv_file.goto_first_data_row() while csv_file.continue_reading: value = csv_file.read_line()[column_index] data = np.append(data, float(value)) return [column_name, data]
def filter_dataset(original_path, original_filename, csv_writer, algorithms_array, print_header=False): csv_reader = CSVReader(original_path, original_filename) matching_algorithm = False while csv_reader.continue_reading: line = csv_reader.read_line() filename, algorithm_name = line[1], line[3] if len(algorithm_name) > 0: if matching_algorithm and algorithm_name not in algorithms_array: matching_algorithm = False elif not matching_algorithm and algorithm_name in algorithms_array: matching_algorithm = True if line[0] == "Dataset": if print_header: csv_writer.write_row(line) elif len(filename) > 0 or matching_algorithm: csv_writer.write_row(line) csv_reader.close()
def compare(): test_files_path = OSUtils.cpp_project_path() + "/test_files" csv_reader_1 = CSVReader(OSUtils.datasets_csv_path() + "[1]irkis/", "vwc_1202.dat.csv") csv_reader_2 = CSVReader(test_files_path, "vwc_1202.dat.csv-CoderSF-Decode.csv") while csv_reader_1.continue_reading: assert ( csv_reader_1.current_line_count == csv_reader_2.current_line_count) current_line_count = csv_reader_1.current_line_count line_1 = csv_reader_1.read_line() line_2 = csv_reader_2.read_line() compare_lines(line_1, line_2, current_line_count) print "COMPARE SUCCESS!!" csv_reader_1.close() csv_reader_2.close()
class GzipResultsReader(object): def __init__(self, path, filename): self.input = CSVReader(path, filename) def gzip_and_base_bits(self, dataset_name, filename, column_name): self.input.goto_row(0) self.__find_match(0, dataset_name) self.__find_match(1, filename) # filename can be 'Global' self.__find_match(2, column_name) gzip_bits, base_bits = int(self.line[4]), int(float(self.line[5])) return gzip_bits, base_bits def __find_match(self, col_index, value): # print col_index, value while self.input.continue_reading: line = self.input.read_line() if line[col_index] == value: self.line = line return raise KeyError('Reached EOF')
class ResultsReader(object): def __init__(self, mode, mask_mode): if mask_mode in ["NM", "M"]: input_path, input_filename = ResultsPaths.get_path_and_filename( mode, mask_mode) else: input_path, input_filename = mode, mask_mode self.input_file = CSVReader(input_path, input_filename) def read_line_no_count(self): return self.input_file.read_line() def __read_line(self): self.line_count += 1 self.line = self.input_file.read_line() return self.line def __goto_file_start(self): self.input_file.goto_row(0) self.line = None self.line_count = 0 def full_results(self): self.__goto_file_start() self.__read_line() # headers line lines_array = [] while self.continue_reading(): self.__read_line() lines_array.append(self.line) return lines_array def dataset_results(self, dataset_name, change_index=CSVConstants.INDEX_DATASET): self.find_dataset(dataset_name) return ResultsReader.add_until_change(self, change_index) def filename_results(self, dataset_name, filename, change_index=CSVConstants.INDEX_FILENAME): self.find_filename_in_dataset(dataset_name, filename) return ResultsReader.add_until_change(self, change_index) def find_dataset(self, dataset_name): self.__goto_file_start() self.__find_next_line(CSVConstants.INDEX_DATASET, dataset_name, False) def find_filename(self, filename): self.__goto_file_start() self.__find_next_line(CSVConstants.INDEX_FILENAME, filename, False) def find_threshold(self, threshold): # self.__goto_file_start() self.__find_next_line(CSVConstants.INDEX_THRESHOLD, threshold, True) def find_filename_in_dataset(self, dataset_name, filename): self.find_dataset(dataset_name) self.__find_next_line(CSVConstants.INDEX_FILENAME, filename, False) def continue_reading(self): return self.input_file.continue_reading def __find_next_line(self, index, value, is_integer): if self.line_count > 0 and ResultsReader.matching_line( self.line, index, value, is_integer): return True while self.input_file.continue_reading: self.__read_line() if ResultsReader.matching_line(self.line, index, value, is_integer): return True raise Exception("ERROR: __find_next_line") @staticmethod def matching_line(line, index, value, is_integer): value_in_index = line[index] if len(value_in_index) == 0: return False value_to_compare = int( value_in_index) if is_integer else value_in_index return value == value_to_compare @staticmethod def copy_until_change(results_reader, output_file, line_index): first_line = True while results_reader.continue_reading() and (first_line or len( results_reader.line[line_index]) == 0): output_file.write_row(results_reader.line) results_reader.__read_line() first_line = False if not results_reader.continue_reading(): output_file.write_row(results_reader.line) @staticmethod def add_until_change(results_reader, line_index): lines_array = [] first_line = True while results_reader.continue_reading() and (first_line or len( results_reader.line[line_index]) == 0): lines_array.append(results_reader.line) results_reader.__read_line() first_line = False if not results_reader.continue_reading(): lines_array.append(results_reader.line) return lines_array @staticmethod def set_percentages(line, line_total): assert (len(line) == len(line_total)) for index in range(len(line)): if CSVConstants.is_percentage_index(index): total, value = line_total[index - 1], line[index - 1] percentage = MathUtils.calculate_percent(total, value) line[index] = percentage return line @staticmethod def convert_lines(lines): return [ResultsReader.convert_line(line) for line in lines] @staticmethod def convert_line(line): new_line = [] # 0 1 2 3 4 5 6 # Dataset, Filename, #rows, Coder, %, Error Threshold, Window Param # new_line.append(line[CSVConstants.INDEX_DATASET]) new_line.append(line[CSVConstants.INDEX_FILENAME]) no_rows = line[CSVConstants.INDEX_NO_ROWS] new_line.append( MathUtils.str_to_int(no_rows) if isinstance(no_rows, int) else '') new_line.append(line[CSVConstants.INDEX_ALGORITHM]) # Coder threshold = line[CSVConstants.INDEX_THRESHOLD] new_line.append(int(threshold) if len(threshold) > 0 else None) # % new_line.append('') # Error Threshold window = line[CSVConstants.INDEX_WINDOW] new_line.append( int(window) if len(window) > 0 else None) # Window Param # 7 8 9 10 11 12 # Size (B), CR (%), Delta - Size (data), Delta - Size (mask), Delta - Size (total), Delta - CR (%), ... # for index in range(CSVConstants.INDEX_TOTAL_SIZE, len(line)): if CSVConstants.is_percentage_index(index): value = line[index] if value.count('.') > 1: # e.g. "1.145.49" value = value.replace('.', '', 1) # "1.145.49" => "1145.49" new_line.append(float(value)) else: new_line.append(MathUtils.str_to_int(line[index])) return new_line
import sys sys.path.append('.') from file_utils.csv_utils.csv_writer import CSVWriter from file_utils.csv_utils.csv_reader import CSVReader path = "/Users/pablocerve/Documents/FING/Proyecto/pc-tesis/dataset_parser/scripts/informe/results/3.1/06.2020/2-complete" file = "results-mm3.csv" new_file = "results-mm3-p.csv" reader = CSVReader(path, file) writer = CSVWriter(path, new_file) while reader.continue_reading: line = reader.read_line() if "0" in line[4]: line[4] = int(float(line[4]) * 100) writer.write_row(line) writer.close() reader.close()
class CSVCompare: def __init__(self, file1_path, file1_filename, file2_path, file2_filename): self.file1 = CSVReader(file1_path, file1_filename) self.file2 = CSVReader(file2_path, file2_filename) self.file1_first_data_row = CSVReader.first_data_row(file1_path, file1_filename) self.file2_first_data_row = CSVReader.first_data_row(file2_path, file2_filename) assert(self.file1_first_data_row == self.file2_first_data_row) # # error_thresholds is an array with the the maximum difference between the original and the compressed values # in a near-lossless compression schema. # If error_thresholds is None then we consider a lossless compression schema. # # if abort is True then the comparison stops as soon as an error is found. # # Returns True iff there is no error. # def compare(self, error_thresholds=None, abort=True): self.error_thresholds = self._check_error_thresholds(error_thresholds) self.abort = abort same_file = self._check_same_file() self._print_result(same_file) return same_file #################################################################################################################### @classmethod def _check_error_thresholds(cls, error_thresholds): if error_thresholds is not None: for error in error_thresholds: assert(error == PandasTools.NO_DATA or (isinstance(error, int) and error >= 0)) return error_thresholds def _get_threshold(self, col_index): if self.error_thresholds is None: return 0 error_thresholds_len = len(self.error_thresholds) assert(col_index < error_thresholds_len) return self.error_thresholds[col_index] # else: # GAMPS coder # data_columns_group_count = len(self.error_thresholds) - 1 # if data_columns_group_count == 1: # index = 1 # else: # index = col_index % data_columns_group_count # index = data_columns_group_count if index == 0 else index # return self.error_thresholds[index] def _print_result(self, same_file): if same_file: print("SAME FILES!") else: print("DIFFERENT FILES!") if self.error_thresholds is None: print("Compared with all thresholds = 0.") else: print("Compared with thresholds = ", self.error_thresholds) def _check_same_file(self): same_file = True continue_while = True self.row_count = 0 while continue_while and self.file1.continue_reading and self.file2.continue_reading: row1, row2 = self.file1.read_line(), self.file2.read_line() if self.row_count < self.file1_first_data_row + 1: same_file = self._compare_header_rows(row1, row2) continue_while = same_file # if there is a mismatch in the header rows, exit the while else: same_row = self._compare_data_rows(row1, row2) if not same_row: same_file = False # if there is a mismatch in the data rows, exit or not depending on the self.abort flag if self.abort: continue_while = False self.row_count += 1 if self._only_one_file_ends(): same_file = False return same_file @classmethod def _compare_header_rows(cls, row1, row2): if row1 == row2: return True # the non-data rows must match exactly print("Difference in the header rows") print(row1) print(row2) return False # # Returns False iff: # - the length of the rows does not match. # OR # - the error_threshold constraint does not hold. # def _compare_data_rows(self, row1, row2): if len(row1) != len(row2): print("len(row1) = %s != %s = len(row2)" % (len(row1), len(row2))) return False same_row = True for col_index in range(len(row1)): value1, value2 = row1[col_index], row2[col_index] same_row_value = self._compare_values(value1, value2, col_index) if not same_row_value: same_row = False self._print_idx_error(col_index, value1, value2) return same_row def _compare_values(self, value1, value2, col_index): same_row_value = True if value1 == PandasTools.NO_DATA or value2 == PandasTools.NO_DATA: # both values must be PandasTools.NO_DATA if value1 != PandasTools.NO_DATA or value2 != PandasTools.NO_DATA: same_row_value = False else: error = self._get_threshold(col_index) # print(value1, value2) assert(isinstance(error, int) and error >= 0) if error == 0: # compare strings instead of int if value1 != value2: same_row_value = False else: # compare ints abs_diff = abs(int(value1) - int(value2)) if abs_diff > error: print('abs_diff', abs_diff, 'error_threshold', error) same_row_value = False return same_row_value def _print_idx_error(self, col_index, value1, value2): print("row_count = %s, col_index = %s, value1 = '%s', value2 = '%s'" % (self.row_count, col_index, value1, value2)) # # Returns true iff one file has more rows than the other. # def _only_one_file_ends(self): if not self.file1.continue_reading and self.file2.continue_reading: print("file1 is shorter than file2") return True elif self.file1.continue_reading and not self.file2.continue_reading: print("file2 is shorter than file1") return True return False