Пример #1
0
def maybe_read_encoded_stream(reader, encoding=None):
    """ read an encoded stream from the reader and transform the bytes to unicode
        if required based on the encoding

        Parameters
        ----------
        reader : a streamable file-like object
        encoding : optional, the encoding to attempt to read

        Returns
        -------
        a tuple of (a stream of decoded bytes, the encoding which was used)

        """

    if compat.PY3 or encoding is not None:  # pragma: no cover
        if encoding:
            errors = 'strict'
        else:
            errors = 'replace'
            encoding = 'utf-8'
        reader = StringIO(reader.read().decode(encoding, errors))
    else:
        encoding = None
    return reader, encoding
Пример #2
0
def maybe_read_encoded_stream(reader, encoding=None):
    """read an encoded stream from the reader and transform the bytes to
    unicode if required based on the encoding

        Parameters
        ----------
        reader : a streamable file-like object
        encoding : optional, the encoding to attempt to read

        Returns
        -------
        a tuple of (a stream of decoded bytes, the encoding which was used)

    """

    if compat.PY3 or encoding is not None:  # pragma: no cover
        if encoding:
            errors = 'strict'
        else:
            errors = 'replace'
            encoding = 'utf-8'
        reader = StringIO(reader.read().decode(encoding, errors))
    else:
        encoding = None
    return reader, encoding
Пример #3
0
def read_iso_ts(indat,
                dense=True,
                parse_dates=True,
                extended_columns=False,
                force_freq=None):
    '''
    Reads the format printed by 'print_iso' and maybe other formats.
    '''
    import csv
    from pandas.compat import StringIO

    if force_freq is not None:
        # force_freq implies a dense series
        dense = True

    index_col = 0
    if parse_dates is False:
        index_col = False

    # Would want this to be more generic...
    na_values = []
    for spc in range(20)[1:]:
        spcs = ' ' * spc
        na_values.append(spcs)
        na_values.append(spcs + 'nan')

    fpi = None

    # Handle Series by converting to DataFrame
    if isinstance(indat, pd.Series):
        indat = pd.DataFrame(indat)

    if isinstance(indat, pd.DataFrame):
        if indat.index.is_all_dates:
            indat.index.name = 'Datetime'
            if dense:
                return asbestfreq(indat, force_freq=force_freq)
            else:
                return indat
        else:
            indat.index.name = 'UniqueID'
            return indat

    has_header = False
    dialect = csv.excel
    if isinstance(indat, str) or isinstance(indat, bytes):
        try:
            indat = str(indat, encoding='utf-8')
        except:
            pass
        if indat == '-':
            # if from stdin format must be the tstoolbox standard
            has_header = True
            fpi = openinput(indat)
        elif '\n' in indat or '\r' in indat:
            # a string
            fpi = StringIO(indat)
        elif os.path.exists(indat):
            # Is it a pickled file?
            try:
                result = pd.io.pickle.read_pickle(indat)
                fpi = False
            except:
                # Maybe a CSV file?
                fpi = openinput(indat)
        else:
            raise ValueError('''
*
*   File {0} doesn't exist.
*
'''.format(indat))
    else:
        raise ValueError('''
*
*   Can't figure out what was passed to read_iso_ts.
*
''')

    if fpi:
        try:
            fpi.seek(0)
            readsome = fpi.read(2048)
            fpi.seek(0)
            dialect = csv.Sniffer().sniff(readsome, delimiters=', \t:|')
            has_header = csv.Sniffer().has_header(readsome)
        except:
            # This is an assumption.
            has_header = True

        if extended_columns is True:
            fname = os.path.splitext(os.path.basename(fpi.name))[0]
            fstr = '{0}.{1}'
        else:
            fname = ''
            fstr = '{1}'
        if fname == '<stdin>':
            fname = '_'
        if has_header:
            result = pd.io.parsers.read_table(fpi,
                                              header=0,
                                              dialect=dialect,
                                              index_col=index_col,
                                              parse_dates=True,
                                              skipinitialspace=True)
            result.columns = [
                fstr.format(fname, i.strip()) for i in result.columns
            ]
        else:
            result = pd.io.parsers.read_table(fpi,
                                              header=None,
                                              dialect=dialect,
                                              index_col=0,
                                              parse_dates=True,
                                              skipinitialspace=True)
            if len(result.columns) == 1:
                result.columns = [fname]
            else:
                result.columns = [
                    fstr.format(fname, i.strip()) for i in result.columns
                ]

    if result.index.is_all_dates is True:
        result.index.name = 'Datetime'

        if dense:
            try:
                return asbestfreq(result, force_freq=force_freq)
            except ValueError:
                return result
    else:
        if result.index.name != 'UniqueID':
            result.reset_index(level=0, inplace=True)
        result.index.name = 'UniqueID'
    return result