def maybe_read_encoded_stream(reader, encoding=None): """ read an encoded stream from the reader and transform the bytes to unicode if required based on the encoding Parameters ---------- reader : a streamable file-like object encoding : optional, the encoding to attempt to read Returns ------- a tuple of (a stream of decoded bytes, the encoding which was used) """ if compat.PY3 or encoding is not None: # pragma: no cover if encoding: errors = 'strict' else: errors = 'replace' encoding = 'utf-8' reader = StringIO(reader.read().decode(encoding, errors)) else: encoding = None return reader, encoding
def maybe_read_encoded_stream(reader, encoding=None): """read an encoded stream from the reader and transform the bytes to unicode if required based on the encoding Parameters ---------- reader : a streamable file-like object encoding : optional, the encoding to attempt to read Returns ------- a tuple of (a stream of decoded bytes, the encoding which was used) """ if compat.PY3 or encoding is not None: # pragma: no cover if encoding: errors = 'strict' else: errors = 'replace' encoding = 'utf-8' reader = StringIO(reader.read().decode(encoding, errors)) else: encoding = None return reader, encoding
def read_iso_ts(indat, dense=True, parse_dates=True, extended_columns=False, force_freq=None): ''' Reads the format printed by 'print_iso' and maybe other formats. ''' import csv from pandas.compat import StringIO if force_freq is not None: # force_freq implies a dense series dense = True index_col = 0 if parse_dates is False: index_col = False # Would want this to be more generic... na_values = [] for spc in range(20)[1:]: spcs = ' ' * spc na_values.append(spcs) na_values.append(spcs + 'nan') fpi = None # Handle Series by converting to DataFrame if isinstance(indat, pd.Series): indat = pd.DataFrame(indat) if isinstance(indat, pd.DataFrame): if indat.index.is_all_dates: indat.index.name = 'Datetime' if dense: return asbestfreq(indat, force_freq=force_freq) else: return indat else: indat.index.name = 'UniqueID' return indat has_header = False dialect = csv.excel if isinstance(indat, str) or isinstance(indat, bytes): try: indat = str(indat, encoding='utf-8') except: pass if indat == '-': # if from stdin format must be the tstoolbox standard has_header = True fpi = openinput(indat) elif '\n' in indat or '\r' in indat: # a string fpi = StringIO(indat) elif os.path.exists(indat): # Is it a pickled file? try: result = pd.io.pickle.read_pickle(indat) fpi = False except: # Maybe a CSV file? fpi = openinput(indat) else: raise ValueError(''' * * File {0} doesn't exist. * '''.format(indat)) else: raise ValueError(''' * * Can't figure out what was passed to read_iso_ts. * ''') if fpi: try: fpi.seek(0) readsome = fpi.read(2048) fpi.seek(0) dialect = csv.Sniffer().sniff(readsome, delimiters=', \t:|') has_header = csv.Sniffer().has_header(readsome) except: # This is an assumption. has_header = True if extended_columns is True: fname = os.path.splitext(os.path.basename(fpi.name))[0] fstr = '{0}.{1}' else: fname = '' fstr = '{1}' if fname == '<stdin>': fname = '_' if has_header: result = pd.io.parsers.read_table(fpi, header=0, dialect=dialect, index_col=index_col, parse_dates=True, skipinitialspace=True) result.columns = [ fstr.format(fname, i.strip()) for i in result.columns ] else: result = pd.io.parsers.read_table(fpi, header=None, dialect=dialect, index_col=0, parse_dates=True, skipinitialspace=True) if len(result.columns) == 1: result.columns = [fname] else: result.columns = [ fstr.format(fname, i.strip()) for i in result.columns ] if result.index.is_all_dates is True: result.index.name = 'Datetime' if dense: try: return asbestfreq(result, force_freq=force_freq) except ValueError: return result else: if result.index.name != 'UniqueID': result.reset_index(level=0, inplace=True) result.index.name = 'UniqueID' return result