Exemplo n.º 1
0
def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True):
    """Reads any supported table type according to a specified
    MIME type or file extension or automatically detecting the
    type.

    Best matching TableSet loaded with the fileobject is returned.
    Matching is done by looking at the type (e.g mimetype='text/csv'), then
    the file extension (e.g. extension='tsv'), then autodetecting the
    file format by using the magic library which looks at the first few
    bytes of the file BUT is often wrong. Consult the source for recognized
    MIME types and file extensions.

    On error it raises messytables.ReadError
    """

    short_ext = clean_ext(extension)
    # Auto-detect if the caller has offered no clue. (Because the
    # auto-detection routine is pretty poor.)
    error = []

    if mimetype is not None:
        attempt = guess_mime(mimetype)
        if attempt:
            return parsers[attempt](fileobj)
        else:
            error.append(
                'Did not recognise MIME type given: "{mimetype}".'.format(
                    mimetype=mimetype))

    if short_ext is not '':
        attempt = guess_ext(short_ext)
        if attempt:
            return parsers[attempt](fileobj)
        else:
            error.append(
                'Did not recognise extension "{ext}" (given "{full})".'.format(
                    ext=short_ext, full=extension))

    if auto_detect:
        magic_mime = get_mime(fileobj)
        attempt = guess_mime(magic_mime)
        if attempt:
            return parsers[attempt](fileobj)
        else:
            error.append(
                'Did not recognise detected MIME type: "{mimetype}".'.format(
                    mimetype=magic_mime))

    if error:
        raise messytables.ReadError('any: \n'.join(error))
    else:
        raise messytables.ReadError("any: Did not attempt any detection.")
Exemplo n.º 2
0
Arquivo: zip.py Projeto: bwica/dpusher
    def __init__(self, fileobj, **kw):
        """
        On error it will raise messytables.ReadError.
        """
        tables = []
        found = []
        z = zipfile.ZipFile(fileobj, 'r')
        try:
            for f in z.infolist():
                ext = None

                # ignore metadata folders added by Mac OS X
                if '__MACOSX' in f.filename:
                    continue

                if "." in f.filename:
                    ext = f.filename[f.filename.rindex(".") + 1:]

                try:
                    filetables = messytables.any.any_tableset(z.open(f),
                                                              extension=ext,
                                                              **kw)
                except ValueError as e:
                    found.append(f.filename + ": " + e.message)
                    continue

                tables.extend(filetables.tables)

            if len(tables) == 0:
                raise messytables.ReadError('''ZIP file has no recognized
                    tables (%s).''' % ', '.join(found))
        finally:
            z.close()

        self._tables = tables
Exemplo n.º 3
0
    def raw(self, sample=False):
        def rows():
            for line in self._sample:
                if PY2:
                    yield line.encode('utf-8')
                else:
                    yield line
            if not sample:
                for line in self.lines:
                    if PY2:
                        yield line.encode('utf-8')
                    else:
                        yield line

        # Fix the maximum field size to something a little larger
        csv.field_size_limit(256000)

        try:
            for row in csv.reader(rows(),
                                  dialect=self._dialect, **self._overrides):
                yield [Cell(to_unicode_or_bust(c)) for c in row]
        except csv.Error as err:
            if u'newline inside string' in unicode_string(err) and sample:
                pass
            elif u'line contains NULL byte' in unicode_string(err):
                pass
            else:
                raise messytables.ReadError('Error reading CSV: %r', err)