예제 #1
0
 def rows(self):
     try:
         with open(self.file_name, 'r') as fh:
             sample = fh.read(4096 * 10)
             encoding = guess_encoding(sample)
             if encoding != 'utf-8':
                 log.info("Decode [%s]: %s", self.file_name, encoding)
             sample = sample.decode(encoding, 'replace')
             dialect = Sniffer().sniff(sample)
             fh.seek(0)
             for row in DictReader(
                     fh,
                     encoding=encoding,
                     delimiter=dialect.delimiter.encode(encoding)):
                 yield row
     except Exception as exc:
         log.error('Failed reading file [%s]: %s', self.file_name, exc)
예제 #2
0
    def _get_csv_reader(self, *args, **kwargs):
        """Guess CSV dialect, and return CSV reader."""
        # Skip the first line, as csv headers are more likely to have weird
        # character distributions than the actual data.
        self.csvfile.readline()

        # Read a significant chunk of the data to improve the odds of
        # determining the dialect.  MCM is often run on very wide csv files.
        dialect = Sniffer().sniff(self.csvfile.read(16384))
        self.csvfile.seek(0)

        if 'reader_type' not in kwargs:
            return DictReader(self.csvfile, errors='replace')

        else:
            reader_type = kwargs.get('reader_type')
            del kwargs['reader_type']
            return reader_type(self.csvfile, dialect, **kwargs)