示例#1
0
def compare_pca_results():
    original = CSVReader(pca_mask_folder, "results-original.csv")
    results = CSVReader(pca_mask_folder, "results-mask.csv")
    csv_writer = CSVWriter(pca_mask_folder, "results-merge.csv")

    assert (original.total_lines == results.total_lines)

    while original.continue_reading:
        original_line = original.read_line()
        results_line = results.read_line()

        coder_name = original_line[3]
        if coder_name in ["Coder", "CoderBase"]:
            # assert(original_line == results_line)
            csv_writer.write_row(parse_line(original_line))
        else:
            if len(original_line[4]) > 0:  # algorithm params line
                original_line, results_line = write_params_line(
                    csv_writer, original_line, results_line)
            original_line = parse_line(original_line)
            results_line = parse_mask_line(results_line)

            # if original_line == results_line:
            #     continue

            csv_writer.write_row(['O'] + original_line[1:] + ['O'])
            csv_writer.write_row(['M'] + results_line[1:] + ['M'])

            diff = []
            for i in range(7, len(original_line)):
                original_value, results_value = original_line[i], results_line[
                    i]
                if i % 2 == 0:
                    # floats (percentages)
                    # diff_value = results_value + ' - ' + original_value
                    diff_value = float(results_value) - float(original_value)
                    diff_value = diff_scenarios(diff_value)
                else:
                    # integers (bits)
                    if original_value == results_value:
                        diff_value = '==='
                    else:
                        diff_value = c_int(results_value) - c_int(
                            original_value)
                        diff_value = '+++' + str(
                            diff_value) if diff_value > 0 else '---' + str(
                                diff_value)
                diff.append(diff_value)
            diff_line = original_line[:7] + diff
            csv_writer.write_row(diff_line)

    original.close()
    results.close()
    csv_writer.close()
示例#2
0
def read_data(input_path, input_filename, column_index):
    data = []
    data = np.array(data)

    csv_file = CSVReader(input_path, input_filename)
    csv_file.goto_row(3)
    column_name = csv_file.read_line()[column_index]
    csv_file.goto_first_data_row()
    while csv_file.continue_reading:
        value = csv_file.read_line()[column_index]
        data = np.append(data, float(value))

    return [column_name, data]
示例#3
0
def filter_dataset(original_path,
                   original_filename,
                   csv_writer,
                   algorithms_array,
                   print_header=False):
    csv_reader = CSVReader(original_path, original_filename)

    matching_algorithm = False
    while csv_reader.continue_reading:
        line = csv_reader.read_line()

        filename, algorithm_name = line[1], line[3]

        if len(algorithm_name) > 0:
            if matching_algorithm and algorithm_name not in algorithms_array:
                matching_algorithm = False
            elif not matching_algorithm and algorithm_name in algorithms_array:
                matching_algorithm = True

        if line[0] == "Dataset":
            if print_header:
                csv_writer.write_row(line)
        elif len(filename) > 0 or matching_algorithm:
            csv_writer.write_row(line)

    csv_reader.close()
示例#4
0
def compare():
    test_files_path = OSUtils.cpp_project_path() + "/test_files"
    csv_reader_1 = CSVReader(OSUtils.datasets_csv_path() + "[1]irkis/",
                             "vwc_1202.dat.csv")
    csv_reader_2 = CSVReader(test_files_path,
                             "vwc_1202.dat.csv-CoderSF-Decode.csv")

    while csv_reader_1.continue_reading:
        assert (
            csv_reader_1.current_line_count == csv_reader_2.current_line_count)
        current_line_count = csv_reader_1.current_line_count

        line_1 = csv_reader_1.read_line()
        line_2 = csv_reader_2.read_line()

        compare_lines(line_1, line_2, current_line_count)

    print "COMPARE SUCCESS!!"
    csv_reader_1.close()
    csv_reader_2.close()
class GzipResultsReader(object):
    def __init__(self, path, filename):
        self.input = CSVReader(path, filename)

    def gzip_and_base_bits(self, dataset_name, filename, column_name):
        self.input.goto_row(0)
        self.__find_match(0, dataset_name)
        self.__find_match(1, filename)  # filename can be 'Global'
        self.__find_match(2, column_name)
        gzip_bits, base_bits = int(self.line[4]), int(float(self.line[5]))
        return gzip_bits, base_bits

    def __find_match(self, col_index, value):
        # print col_index, value
        while self.input.continue_reading:
            line = self.input.read_line()
            if line[col_index] == value:
                self.line = line
                return
        raise KeyError('Reached EOF')
示例#6
0
class ResultsReader(object):
    def __init__(self, mode, mask_mode):
        if mask_mode in ["NM", "M"]:
            input_path, input_filename = ResultsPaths.get_path_and_filename(
                mode, mask_mode)
        else:
            input_path, input_filename = mode, mask_mode
        self.input_file = CSVReader(input_path, input_filename)

    def read_line_no_count(self):
        return self.input_file.read_line()

    def __read_line(self):
        self.line_count += 1
        self.line = self.input_file.read_line()
        return self.line

    def __goto_file_start(self):
        self.input_file.goto_row(0)
        self.line = None
        self.line_count = 0

    def full_results(self):
        self.__goto_file_start()
        self.__read_line()  # headers line
        lines_array = []
        while self.continue_reading():
            self.__read_line()
            lines_array.append(self.line)
        return lines_array

    def dataset_results(self,
                        dataset_name,
                        change_index=CSVConstants.INDEX_DATASET):
        self.find_dataset(dataset_name)
        return ResultsReader.add_until_change(self, change_index)

    def filename_results(self,
                         dataset_name,
                         filename,
                         change_index=CSVConstants.INDEX_FILENAME):
        self.find_filename_in_dataset(dataset_name, filename)
        return ResultsReader.add_until_change(self, change_index)

    def find_dataset(self, dataset_name):
        self.__goto_file_start()
        self.__find_next_line(CSVConstants.INDEX_DATASET, dataset_name, False)

    def find_filename(self, filename):
        self.__goto_file_start()
        self.__find_next_line(CSVConstants.INDEX_FILENAME, filename, False)

    def find_threshold(self, threshold):
        # self.__goto_file_start()
        self.__find_next_line(CSVConstants.INDEX_THRESHOLD, threshold, True)

    def find_filename_in_dataset(self, dataset_name, filename):
        self.find_dataset(dataset_name)
        self.__find_next_line(CSVConstants.INDEX_FILENAME, filename, False)

    def continue_reading(self):
        return self.input_file.continue_reading

    def __find_next_line(self, index, value, is_integer):
        if self.line_count > 0 and ResultsReader.matching_line(
                self.line, index, value, is_integer):
            return True

        while self.input_file.continue_reading:
            self.__read_line()
            if ResultsReader.matching_line(self.line, index, value,
                                           is_integer):
                return True
        raise Exception("ERROR: __find_next_line")

    @staticmethod
    def matching_line(line, index, value, is_integer):
        value_in_index = line[index]
        if len(value_in_index) == 0:
            return False
        value_to_compare = int(
            value_in_index) if is_integer else value_in_index
        return value == value_to_compare

    @staticmethod
    def copy_until_change(results_reader, output_file, line_index):
        first_line = True
        while results_reader.continue_reading() and (first_line or len(
                results_reader.line[line_index]) == 0):
            output_file.write_row(results_reader.line)
            results_reader.__read_line()
            first_line = False
        if not results_reader.continue_reading():
            output_file.write_row(results_reader.line)

    @staticmethod
    def add_until_change(results_reader, line_index):
        lines_array = []
        first_line = True
        while results_reader.continue_reading() and (first_line or len(
                results_reader.line[line_index]) == 0):
            lines_array.append(results_reader.line)
            results_reader.__read_line()
            first_line = False
        if not results_reader.continue_reading():
            lines_array.append(results_reader.line)
        return lines_array

    @staticmethod
    def set_percentages(line, line_total):
        assert (len(line) == len(line_total))
        for index in range(len(line)):
            if CSVConstants.is_percentage_index(index):
                total, value = line_total[index - 1], line[index - 1]
                percentage = MathUtils.calculate_percent(total, value)
                line[index] = percentage
        return line

    @staticmethod
    def convert_lines(lines):
        return [ResultsReader.convert_line(line) for line in lines]

    @staticmethod
    def convert_line(line):
        new_line = []
        #    0         1        2     3    4         5                6
        # Dataset, Filename, #rows, Coder, %, Error Threshold, Window Param
        #
        new_line.append(line[CSVConstants.INDEX_DATASET])
        new_line.append(line[CSVConstants.INDEX_FILENAME])

        no_rows = line[CSVConstants.INDEX_NO_ROWS]
        new_line.append(
            MathUtils.str_to_int(no_rows) if isinstance(no_rows, int) else '')

        new_line.append(line[CSVConstants.INDEX_ALGORITHM])  # Coder
        threshold = line[CSVConstants.INDEX_THRESHOLD]
        new_line.append(int(threshold) if len(threshold) > 0 else None)  # %
        new_line.append('')  # Error Threshold
        window = line[CSVConstants.INDEX_WINDOW]
        new_line.append(
            int(window) if len(window) > 0 else None)  # Window Param

        #    7         8             9                     10                  11                 12
        # Size (B), CR (%), Delta - Size (data), Delta - Size (mask), Delta - Size (total), Delta - CR (%), ...
        #
        for index in range(CSVConstants.INDEX_TOTAL_SIZE, len(line)):
            if CSVConstants.is_percentage_index(index):
                value = line[index]
                if value.count('.') > 1:  # e.g. "1.145.49"
                    value = value.replace('.', '',
                                          1)  # "1.145.49" => "1145.49"
                new_line.append(float(value))
            else:
                new_line.append(MathUtils.str_to_int(line[index]))
        return new_line
示例#7
0
import sys
sys.path.append('.')

from file_utils.csv_utils.csv_writer import CSVWriter
from file_utils.csv_utils.csv_reader import CSVReader

path = "/Users/pablocerve/Documents/FING/Proyecto/pc-tesis/dataset_parser/scripts/informe/results/3.1/06.2020/2-complete"

file = "results-mm3.csv"

new_file = "results-mm3-p.csv"

reader = CSVReader(path, file)
writer = CSVWriter(path, new_file)

while reader.continue_reading:
    line = reader.read_line()
    if "0" in line[4]:
        line[4] = int(float(line[4]) * 100)
    writer.write_row(line)

writer.close()
reader.close()
示例#8
0
class CSVCompare:
    def __init__(self, file1_path, file1_filename, file2_path, file2_filename):
        self.file1 = CSVReader(file1_path, file1_filename)
        self.file2 = CSVReader(file2_path, file2_filename)

        self.file1_first_data_row = CSVReader.first_data_row(file1_path, file1_filename)
        self.file2_first_data_row = CSVReader.first_data_row(file2_path, file2_filename)

        assert(self.file1_first_data_row == self.file2_first_data_row)

    #
    # error_thresholds is an array with the the maximum difference between the original and the compressed values
    # in a near-lossless compression schema.
    # If error_thresholds is None then we consider a lossless compression schema.
    #
    # if abort is True then the comparison stops as soon as an error is found.
    #
    # Returns True iff there is no error.
    #
    def compare(self, error_thresholds=None, abort=True):
        self.error_thresholds = self._check_error_thresholds(error_thresholds)
        self.abort = abort
        same_file = self._check_same_file()
        self._print_result(same_file)
        return same_file

    ####################################################################################################################

    @classmethod
    def _check_error_thresholds(cls, error_thresholds):
        if error_thresholds is not None:
            for error in error_thresholds:
                assert(error == PandasTools.NO_DATA or (isinstance(error, int) and error >= 0))
        return error_thresholds

    def _get_threshold(self, col_index):
        if self.error_thresholds is None:
           return 0
        error_thresholds_len = len(self.error_thresholds)
        assert(col_index < error_thresholds_len)
        return self.error_thresholds[col_index]
        # else:  # GAMPS coder
        #     data_columns_group_count = len(self.error_thresholds) - 1
        #     if data_columns_group_count == 1:
        #         index = 1
        #     else:
        #         index = col_index % data_columns_group_count
        #         index = data_columns_group_count if index == 0 else index
        #     return self.error_thresholds[index]

    def _print_result(self, same_file):
        if same_file:
            print("SAME FILES!")
        else:
            print("DIFFERENT FILES!")
        if self.error_thresholds is None:
            print("Compared with all thresholds = 0.")
        else:
            print("Compared with thresholds = ", self.error_thresholds)

    def _check_same_file(self):
        same_file = True
        continue_while = True
        self.row_count = 0

        while continue_while and self.file1.continue_reading and self.file2.continue_reading:
            row1, row2 = self.file1.read_line(), self.file2.read_line()
            if self.row_count < self.file1_first_data_row + 1:
                same_file = self._compare_header_rows(row1, row2)
                continue_while = same_file  # if there is a mismatch in the header rows, exit the while
            else:
                same_row = self._compare_data_rows(row1, row2)
                if not same_row:
                    same_file = False
                    # if there is a mismatch in the data rows, exit or not depending on the self.abort flag
                    if self.abort:
                        continue_while = False

            self.row_count += 1

        if self._only_one_file_ends():
            same_file = False

        return same_file

    @classmethod
    def _compare_header_rows(cls, row1, row2):
        if row1 == row2:
            return True
        # the non-data rows must match exactly
        print("Difference in the header rows")
        print(row1)
        print(row2)
        return False

    #
    # Returns False iff:
    # - the length of the rows does not match.
    # OR
    # - the error_threshold constraint does not hold.
    #
    def _compare_data_rows(self, row1, row2):
        if len(row1) != len(row2):
            print("len(row1) = %s != %s = len(row2)" % (len(row1), len(row2)))
            return False

        same_row = True
        for col_index in range(len(row1)):
            value1, value2 = row1[col_index], row2[col_index]
            same_row_value = self._compare_values(value1, value2, col_index)

            if not same_row_value:
                same_row = False
                self._print_idx_error(col_index, value1, value2)

        return same_row

    def _compare_values(self, value1, value2, col_index):
        same_row_value = True

        if value1 == PandasTools.NO_DATA or value2 == PandasTools.NO_DATA:  # both values must be PandasTools.NO_DATA
            if value1 != PandasTools.NO_DATA or value2 != PandasTools.NO_DATA:
                same_row_value = False

        else:
            error = self._get_threshold(col_index)
            # print(value1, value2)
            assert(isinstance(error, int) and error >= 0)

            if error == 0:  # compare strings instead of int
                if value1 != value2:
                    same_row_value = False

            else:
                # compare ints
                abs_diff = abs(int(value1) - int(value2))
                if abs_diff > error:
                    print('abs_diff', abs_diff, 'error_threshold', error)
                    same_row_value = False

        return same_row_value

    def _print_idx_error(self, col_index, value1, value2):
        print("row_count = %s, col_index = %s, value1 = '%s', value2 = '%s'" % (self.row_count, col_index, value1, value2))

    #
    # Returns true iff one file has more rows than the other.
    #
    def _only_one_file_ends(self):
        if not self.file1.continue_reading and self.file2.continue_reading:
            print("file1 is shorter than file2")
            return True
        elif self.file1.continue_reading and not self.file2.continue_reading:
            print("file2 is shorter than file1")
            return True
        return False