Exemplo n.º 1
0
    def analyze_file(self) -> csvhelper.Dialect:
        """ analyzes a file to determine the structure of the file in terms
            of whether or it it is delimited, what the delimiter is, etc.
        """
        if os.path.getsize(self.fqfn) == 0:
            raise IOErrorEmptyFile("Empty File")

        if self._delimiter:
            if self._quoting_num is None:
                self._quoting_num = csv.QUOTE_MINIMAL
            self.dialect = csvhelper.Dialect(self._delimiter,
                                             self._get_has_header(),
                                             self._quoting_num,
                                             self.quote_char, None, None, '\n',
                                             False)
        else:
            self.dialect = self._get_dialect()
            self.dialect.lineterminator = '\n'
            self._quoting_num = self.dialect.quoting
            self._delimiter = self.dialect.delimiter

        self.format_type = self._get_format_type()
        self.dialect.has_header = self._get_has_header(self._has_header)
        self._has_header = self.dialect.has_header

        # unrelated to dialect, actually uses csv dialect info:
        self.field_cnt = self._get_field_cnt()
        self.record_cnt, self.record_cnt_is_est = self._count_records()

        return self.dialect
Exemplo n.º 2
0
    def setup_method(self, method):
        self.temp_dir = tempfile.mkdtemp(prefix='gristle_diff_')
        self.dialect = csvhelper.Dialect(delimiter='|',
                                         quoting=csv.QUOTE_NONE,
                                         has_header=False)
        self.dialect.delimiter = '\t'
        file1_recs = [['chg-row', '4', '14'], ['del-row', '6', '16'],
                      ['same-row', '8', '18']]
        self.file1 = generate_test_file(self.temp_dir, 'old_', '.csv',
                                        self.dialect, file1_recs)

        file2_recs = [['chg-row', '4', '1a'], ['new-row', '13a', '45b'],
                      ['same-row', '8', '18']]
        self.file2 = generate_test_file(self.temp_dir, 'new_', '.csv',
                                        self.dialect, file2_recs)
        self.config = Config(self.temp_dir)
        self.config.add_property({'delimiter': 'tab'})
        self.config.add_property({'has_header': False})
        self.config.add_property({'quoting': csv.QUOTE_NONE})
        self.config.add_property({'col_names': ['col0', 'col1', 'col2']})
        self.config.add_property({'key_cols': ['0']})
        self.config.add_property({'compare_cols': ['2']})
        self.config.add_property({'temp_dir': self.temp_dir})
        self.config.add_property({'files': [self.file1, self.file2]})
        self.config.add_assignment('chgnew', 'col1', 'copy', None, 'old',
                                   'col0')
Exemplo n.º 3
0
 def setup_method(self, method):
     self.temp_dir = tempfile.mkdtemp(prefix='gristle_test_')
     self.dialect = csvhelper.Dialect(delimiter=',',
                                      quoting=csv.QUOTE_NONE,
                                      has_header=False)
     self.fqfn = create_test_file(self.temp_dir)
     self.out_dir = tempfile.mkdtemp(prefix='gristle_out_')
Exemplo n.º 4
0
 def setup_method(self, method):
     self.record_cnt = 100
     self.dialect = csvhelper.Dialect(delimiter='|',
                                      quoting=csv.QUOTE_NONE,
                                      has_header=False)
     self.test_fqfn = generate_test_file1(self.dialect, self.record_cnt)
     self.file_typer = mod.FileTyper(self.dialect, self.test_fqfn)
     self.file_typer.analyze_file()
Exemplo n.º 5
0
    def setup_method(self, method):
        self.temp_dir = tempfile.mkdtemp(prefix='gristle_diff_')
        self.dialect = csvhelper.Dialect(delimiter=',', quoting=csv.QUOTE_NONE, has_header=False)

        start_time = time.time()
        print('\ncreating test files - starting')
        self.files = CreateTestFiles(1000000, self.temp_dir)
        print('creating test files - done with duration of %d seconds' % int(time.time() - start_time))
Exemplo n.º 6
0
    def test_quote_all(self):
        dialect = csvhelper.Dialect(delimiter='|',
                                    quoting=csv.QUOTE_ALL,
                                    has_header=False)
        self.test_fqfn = generate_test_file1(dialect, self.record_cnt)
        file_typer = mod.FileTyper(dialect, self.test_fqfn)
        file_typer.analyze_file()

        assert file_typer.record_cnt == self.record_cnt
        assert file_typer.field_cnt == 4
Exemplo n.º 7
0
    def test_load_and_gets(self):
        dialect = csvhelper.Dialect(delimiter='|', quoting=csv.QUOTE_ALL, has_header=True)
        fqfn = ttools.make_team_file(self.temp_dir, dialect, 10)

        header = csvhelper.Header()
        header.load_from_file(fqfn, dialect)

        assert len(header.raw_field_names)
        assert len(header.field_names)

        assert header.get_field_position('role') == 2
        assert header.get_field_name(3) == 'name'

        assert header.get_field_position_from_any('3') == 3
        assert header.get_field_position_from_any('name') == 3
Exemplo n.º 8
0
    def test_empty_file(self):

        dialect = csvhelper.Dialect(delimiter='|', quoting=csv.QUOTE_ALL, has_header=False)
        fqfn = ttools.make_team_file(self.temp_dir, dialect, 0)

        with pytest.raises(EOFError):
            resulting_dialect = csvhelper.get_dialect([fqfn],
                                                      delimiter=None,
                                                      quoting=None,
                                                      quotechar=None,
                                                      has_header=None,
                                                      doublequote=None,
                                                      escapechar=None,
                                                      skipinitialspace=False,
                                                      verbosity='normal')
Exemplo n.º 9
0
    def test_multiple_files(self):

        dialect = csvhelper.Dialect(delimiter='|', quoting=csv.QUOTE_ALL, has_header=False)
        fqfn1 = ttools.make_team_file(self.temp_dir, dialect, 0)
        fqfn2 = ttools.make_team_file(self.temp_dir, dialect, 1000)

        resulting_dialect = csvhelper.get_dialect([fqfn1, fqfn2],
                                                  delimiter=None,
                                                  quoting=None,
                                                  quotechar=None,
                                                  has_header=None,
                                                  doublequote=None,
                                                  escapechar=None,
                                                  skipinitialspace=False,
                                                  verbosity='normal')
        assert resulting_dialect.delimiter == '|'
        assert resulting_dialect.quoting == csv.QUOTE_ALL
        assert resulting_dialect.quotechar == '"'
        assert resulting_dialect.has_header is False
Exemplo n.º 10
0
 def test_nondup(self):
     in_fqfn = create_test_file(self.temp_dir, duplicate=True)
     dialect = csvhelper.Dialect(delimiter=',',
                                 quoting=csv.QUOTE_NONE,
                                 quotechar=None,
                                 has_header=False,
                                 doublequote=False)
     out_fqfn = in_fqfn + '.sorted'
     cmd = f''' {pjoin(SCRIPT_DIR, 'gristle_sorter')}   \
                 -i {in_fqfn}
                 -o {out_fqfn}
                 -k 0sf
           '''
     executor(cmd, expect_success=True)
     recs = get_file_contents(out_fqfn, dialect)
     assert recs[0][0] == '1'
     assert recs[1][0] == '2'
     assert recs[2][0] == '3'
     assert recs[3][0] == '3'
     assert recs[4][0] == '4'
Exemplo n.º 11
0
 def test_two_keys(self):
     in_fqfn = create_complex_test_file(self.temp_dir, header=False)
     dialect = csvhelper.Dialect(delimiter=',',
                                 quoting=csv.QUOTE_NONE,
                                 quotechar=None,
                                 has_header=False,
                                 doublequote=False)
     out_fqfn = in_fqfn + '.sorted'
     cmd = f''' {pjoin(SCRIPT_DIR, 'gristle_sorter')}   \
                 -i {in_fqfn}
                 -o {out_fqfn}
                 -k 0ir 1sf
                 -q quote_none -d ',' --has-no-header --no-doublequote
           '''
     executor(cmd, expect_success=True)
     actual_recs = get_file_contents(out_fqfn, dialect)
     expected_recs = [['4', 'aaa', 'a23'], ['4', 'aba', 'a23'],
                      ['4', 'bbb', 'a23'], ['3', 'aaa', 'b23'],
                      ['3', 'aaa', 'b23'], ['1', 'aaa', 'a23']]
     pp(actual_recs)
     assert actual_recs == expected_recs
Exemplo n.º 12
0
    def test_get_overridden_dialect(self):

        dialect = csvhelper.Dialect(delimiter='|', quoting=csv.QUOTE_ALL, has_header=False)
        fqfn = ttools.make_team_file(self.temp_dir, dialect, 1000)

        resulting_dialect = csvhelper.get_dialect([fqfn],
                                                  delimiter=',',
                                                  quoting='quote_none',
                                                  quotechar='!',
                                                  has_header=True,
                                                  doublequote=False,
                                                  escapechar='\\',
                                                  skipinitialspace=False,
                                                  verbosity='normal')

        assert resulting_dialect.delimiter == ','
        assert resulting_dialect.quoting == csv.QUOTE_NONE
        assert resulting_dialect.quotechar == '!'
        assert resulting_dialect.has_header is True
        assert resulting_dialect.doublequote is False
        assert resulting_dialect.escapechar == '\\'
Exemplo n.º 13
0
 def test_non_override(self):
     dialect = csvhelper.Dialect(delimiter='|',
                                 has_header=False,
                                 quoting=csv.QUOTE_NONE,
                                 quotechar='!',
                                 doublequote=False,
                                 escapechar='\\')
     override_dialect = csvhelper.override_dialect(dialect,
                                                   delimiter=None,
                                                   quoting=None,
                                                   quotechar=None,
                                                   has_header=None,
                                                   doublequote=None,
                                                   skipinitialspace=False,
                                                   escapechar=None)
     assert override_dialect.delimiter == '|'
     assert override_dialect.quoting == csv.QUOTE_NONE
     assert override_dialect.quotechar == '!'
     assert override_dialect.has_header is False
     assert override_dialect.doublequote is False
     assert override_dialect.escapechar == '\\'
Exemplo n.º 14
0
 def setup_method(self, method):
     self.temp_dir = tempfile.mkdtemp(prefix='gristle_diff_')
     self.dialect = csvhelper.Dialect(delimiter='|',
                                      quoting=csv.QUOTE_NONE,
                                      has_header=False)