示例#1
0
    def validate_text(dialect, attr):
        val = getattr(dialect, attr)
        if not isinstance(val, text_type):
            if type(val) == bytes:
                raise Error('"{0}" must be string, not bytes'.format(attr))
            raise Error('"{0}" must be string, not {1}'.format(
                attr,
                type(val).__name__))

        if len(val) != 1:
            raise Error('"{0}" must be a 1-character string'.format(attr))
示例#2
0
    def sniff(self, sample, delimiters=None):
        """
        Returns a dialect (or None) corresponding to the sample
        """

        quotechar, doublequote, delimiter, skipinitialspace = \
                   self._guess_quote_and_delimiter(sample, delimiters)
        if not delimiter:
            delimiter, skipinitialspace = self._guess_delimiter(
                sample, delimiters)

        if not delimiter:
            raise Error("Could not determine delimiter")

        class dialect(Dialect):
            _name = "sniffed"
            lineterminator = '\r\n'
            quoting = QUOTE_MINIMAL
            # escapechar = ''

        dialect.doublequote = doublequote
        dialect.delimiter = delimiter
        # _csv.reader won't accept a quotechar of ''
        dialect.quotechar = quotechar or '"'
        dialect.skipinitialspace = skipinitialspace

        return dialect
示例#3
0
 def writerow(self, row):
     try:
         iter(row)
     except TypeError as err:
         msg = "iterable expected, not %s" % type(row).__name__
         raise Error(msg) from err
     return self.writer.writerow([_escape(field) for field in row])
示例#4
0
 def _parse_eat_crnl(self, c):
     if c == '\n' or c == '\r':
         pass
     elif c == '\0':
         self.state = START_RECORD
     else:
         raise Error('new-line character seen in unquoted field - do you '
                     'need to open the file in universal-newline mode?')
示例#5
0
    def writerow(self, row):
        if row is None:
            raise Error('row must be an iterable')

        row = list(row)
        only = len(row) == 1
        row = [self.strategy.prepare(field, only=only) for field in row]

        line = self.dialect.delimiter.join(row) + self.dialect.lineterminator
        self.fileobj.write(line)
示例#6
0
    def validate(cls, dialect):
        dialect = cls.extend(dialect)

        if not isinstance(dialect.quoting, int):
            raise Error('"quoting" must be an integer')

        if dialect.delimiter is None:
            raise Error('delimiter must be set')
        cls.validate_text(dialect, 'delimiter')

        if dialect.lineterminator is None:
            raise Error('lineterminator must be set')
        if not isinstance(dialect.lineterminator, text_type):
            raise Error('"lineterminator" must be a string')

        if dialect.quoting not in [
                QUOTE_NONE, QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_ALL
        ]:
            raise Error('Invalid quoting specified')

        if dialect.quoting != QUOTE_NONE:
            if dialect.quotechar is None and dialect.escapechar is None:
                raise Error('quotechar must be set if quoting enabled')
            if dialect.quotechar is not None:
                cls.validate_text(dialect, 'quotechar')
示例#7
0
    def __next__(self):
        self.parse_reset()

        while True:
            try:
                lineobj = next(self.input_iter)
            except StopIteration:
                if len(self.field) != 0 or self.state == IN_QUOTED_FIELD:
                    if self.dialect.strict:
                        raise Error('unexpected end of data')
                    self.parse_save_field()
                if self.fields:
                    break
                raise

            if not isinstance(lineobj, text_type):
                typ = type(lineobj)
                typ_name = 'bytes' if typ == bytes else typ.__name__
                err_str = ('iterator should return strings, not {0}'
                           ' (did you open the file in text mode?)')
                raise Error(err_str.format(typ_name))

            self.line_num += 1
            for c in lineobj:
                if c == '\0':
                    raise Error('line contains NULL byte')
                self.parse_process_char(c)

            self.parse_process_char('\0')

            if self.state == START_RECORD:
                break

        fields = self.fields
        self.fields = None
        return fields
示例#8
0
    def prepare(self, raw_field, only=None):
        field = text_type(raw_field if raw_field is not None else '')
        quoted = self.quoted(field=field, raw_field=raw_field, only=only)

        escape_re = self.escape_re(quoted=quoted)
        escapechar = self.escapechar(quoted=quoted)

        if escape_re.search(field):
            escapechar = '\\\\' if escapechar == '\\' else escapechar
            if not escapechar:
                raise Error('No escapechar is set')
            escape_replace = r'{escapechar}\1'.format(escapechar=escapechar)
            field = escape_re.sub(escape_replace, field)

        if quoted:
            field = '{quotechar}{field}{quotechar}'.format(
                quotechar=self.dialect.quotechar, field=field)

        return field
示例#9
0
 def _parse_quote_in_quoted_field(self, c):
     if (self.dialect.quoting != QUOTE_NONE
             and c == self.dialect.quotechar):
         # save "" as "
         self.parse_add_char(c)
         self.state = IN_QUOTED_FIELD
     elif c == self.dialect.delimiter:
         self.parse_save_field()
         self.state = START_FIELD
     elif c == '\n' or c == '\r' or c == '\0':
         # End of line = return [fields]
         self.parse_save_field()
         self.state = START_RECORD if c == '\0' else EAT_CRNL
     elif not self.dialect.strict:
         self.parse_add_char(c)
         self.state = IN_FIELD
     else:
         # illegal
         raise Error("{delimiter}' expected after '{quotechar}".format(
             delimiter=self.dialect.delimiter,
             quotechar=self.dialect.quotechar,
         ))
示例#10
0
def get_dialect(name):
    try:
        return _dialect_registry[name]
    except KeyError:
        raise Error('Could not find dialect {0}'.format(name))
示例#11
0
def unregister_dialect(name):
    try:
        _dialect_registry.pop(name)
    except KeyError:
        raise Error('"{name}" not a registered dialect'.format(name=name))
示例#12
0
 def parse_add_char(self, c):
     if len(self.field) >= field_size_limit():
         raise Error('field size limit exceeded')
     self.field.append(c)
示例#13
0
 def quoted(self, field, only, **kwargs):
     if field == '' and only:
         raise Error('single empty field record must be quoted')
     return False