Пример #1
0
    def _read_one_data(self, ftppath, params):

        if re.search(_ZIP_RE, ftppath) is not None:
            index_file = self._read_zipfile(ftppath)
        elif re.search(_GZ_RE, ftppath) is not None:
            index_file = self._read_gzfile(ftppath)
        else:
            index_file = StringIO()
            index_list = []
            try:
                self._sec_ftp_session.retrlines('RETR ' + ftppath,
                                                index_list.append)
            except EOFError:
                raise RemoteDataError('FTP server has closed the connection.')

            for line in index_list:
                index_file.write(line + '\n')
            index_file.seek(0)

        index_file = self._remove_header(index_file)
        index = read_csv(index_file, delimiter='|', header=None,
                         index_col=False, names=_COLUMNS,
                         low_memory=False, dtype=_COLUMN_TYPES)
        index['filename'] = index['filename'].map(self._fix_old_file_paths)
        return index
Пример #2
0
    def _read_one_data(self, ftppath, params):

        if re.search(_ZIP_RE, ftppath) is not None:
            index_file = self._read_zipfile(ftppath)
        elif re.search(_GZ_RE, ftppath) is not None:
            index_file = self._read_gzfile(ftppath)
        else:
            index_file = StringIO()
            index_list = []
            try:
                self._sec_ftp_session.retrlines('RETR ' + ftppath,
                                                index_list.append)
            except EOFError:
                raise RemoteDataError('FTP server has closed the connection.')

            for line in index_list:
                index_file.write(line + '\n')
            index_file.seek(0)

        index_file = self._remove_header(index_file)
        index = read_csv(index_file, delimiter='|', header=None,
                         index_col=False, names=_COLUMNS,
                         low_memory=False, dtype=_COLUMN_TYPES)
        index['filename'] = index['filename'].map(self._fix_old_file_paths)
        return index
Пример #3
0
 def test_to_csv_stringio(self):
     buf = StringIO()
     self.frame.to_csv(buf)
     buf.seek(0)
     recons = read_csv(buf, index_col=0)
     # TODO to_csv drops column name
     assert_frame_equal(recons, self.frame, check_names=False)
Пример #4
0
 def test_to_csv_stringio(self):
     buf = StringIO()
     self.frame.to_csv(buf)
     buf.seek(0)
     recons = read_csv(buf, index_col=0)
     # TODO to_csv drops column name
     assert_frame_equal(recons, self.frame, check_names=False)
Пример #5
0
    def test_to_csv_unicode_index(self):
        buf = StringIO()
        s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")])

        s.to_csv(buf, encoding="UTF-8")
        buf.seek(0)

        s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
        assert_series_equal(s, s2)
Пример #6
0
    def test_info_duplicate_columns_shows_correct_dtypes(self):
        # GH11761
        io = StringIO()

        frame = DataFrame([[1, 2.0]], columns=["a", "a"])
        frame.info(buf=io)
        io.seek(0)
        lines = io.readlines()
        self.assertEqual("a    1 non-null int64\n", lines[3])
        self.assertEqual("a    1 non-null float64\n", lines[4])
Пример #7
0
    def test_info_duplicate_columns_shows_correct_dtypes(self):
        # GH11761
        io = StringIO()

        frame = DataFrame([[1, 2.0]], columns=['a', 'a'])
        frame.info(buf=io)
        io.seek(0)
        lines = io.readlines()
        self.assertEqual('a    1 non-null int64\n', lines[3])
        self.assertEqual('a    1 non-null float64\n', lines[4])
Пример #8
0
    def _remove_header(self, data):
        header = True
        cleaned_datafile = StringIO()
        for line in data:
            if header is False:
                cleaned_datafile.write(line + '\n')
            elif re.search(_DIVIDER, line) is not None:
                header = False

        cleaned_datafile.seek(0)
        return cleaned_datafile
Пример #9
0
    def _remove_header(self, data):
        header = True
        cleaned_datafile = StringIO()
        for line in data:
            if header is False:
                cleaned_datafile.write(line + '\n')
            elif re.search(_DIVIDER, line) is not None:
                header = False

        cleaned_datafile.seek(0)
        return cleaned_datafile
Пример #10
0
    def test_info_duplicate_columns_shows_correct_dtypes(self):
        # GH11761
        io = StringIO()

        frame = DataFrame([[1, 2.0]],
                          columns=['a', 'a'])
        frame.info(buf=io)
        io.seek(0)
        lines = io.readlines()
        assert 'a    1 non-null int64\n' == lines[3]
        assert 'a    1 non-null float64\n' == lines[4]
Пример #11
0
 def _read_url_as_StringIO(self, url, params=None):
     """
     Open url (and retry)
     """
     response = self._get_response(url, params=params)
     out = StringIO()
     if isinstance(response.content, compat.binary_type):
         out.write(bytes_to_str(response.content))
     else:
         out.write(response.content)
     out.seek(0)
     return out
Пример #12
0
    def test_to_csv_unicode_index_col(self):
        buf = StringIO('')
        df = DataFrame(
            [["\u05d0", "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]],
            columns=["\u05d0", "\u05d1", "\u05d2", "\u05d3"],
            index=["\u05d0", "\u05d1"])

        df.to_csv(buf, encoding='UTF-8')
        buf.seek(0)

        df2 = read_csv(buf, index_col=0, encoding='UTF-8')
        assert_frame_equal(df, df2)
 def _read_url_as_StringIO(self, url, params=None):
     """
     Open url (and retry)
     """
     response = self._get_response(url, params=params)
     out = StringIO()
     if isinstance(response.content, compat.binary_type):
         out.write(bytes_to_str(response.content))
     else:
         out.write(response.content)
     out.seek(0)
     return out
Пример #14
0
    def test_to_csv_unicode_index_col(self):
        buf = StringIO('')
        df = DataFrame(
            [[u("\u05d0"), "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]],
            columns=[u("\u05d0"),
                     u("\u05d1"), u("\u05d2"), u("\u05d3")],
            index=[u("\u05d0"), u("\u05d1")])

        df.to_csv(buf, encoding='UTF-8')
        buf.seek(0)

        df2 = read_csv(buf, index_col=0, encoding='UTF-8')
        assert_frame_equal(df, df2)
Пример #15
0
 def _read_url_as_StringIO(self, url, params=None):
     """
     Open url (and retry)
     """
     response = self._get_response(url, params=params)
     text = self._sanitize_response(response)
     out = StringIO()
     if len(text) == 0:
         service = self.__class__.__name__
         raise IOError("{} request returned no data; check URL for invalid "
                       "inputs: {}".format(service, self.url))
     if isinstance(text, compat.binary_type):
         out.write(bytes_to_str(text))
     else:
         out.write(text)
     out.seek(0)
     return out
Пример #16
0
    def _read_url_as_StringIO(self, url, params=None, min=0, errors='ignore'):
        """重写基类同名方法

        根据派生类提供的encoding解析文本
        """
        response = self._get_response(url, params=params)
        text = self._sanitize_response(response)
        out = StringIO()
        if len(text) <= self._read_url_as_StringIO_min_len:
            if self._read_url_as_StringIO_less_min_len:
                service = self.__class__.__name__
                raise IOError("{} request returned no data; check URL for "
                              "invalid inputs: {}".format(service, self.url))
            else:
                return None
        if isinstance(text, compat.binary_type):
            out.write(bytes_to_str(text, encoding=self._encoding))
        else:
            out.write(text)
        out.seek(0)
        return out
Пример #17
0
def read_iso_ts(indat,
                dense=True,
                parse_dates=True,
                extended_columns=False,
                force_freq=None):
    '''
    Reads the format printed by 'print_iso' and maybe other formats.
    '''
    import csv
    from pandas.compat import StringIO

    if force_freq is not None:
        # force_freq implies a dense series
        dense = True

    index_col = 0
    if parse_dates is False:
        index_col = False

    # Would want this to be more generic...
    na_values = []
    for spc in range(20)[1:]:
        spcs = ' ' * spc
        na_values.append(spcs)
        na_values.append(spcs + 'nan')

    fpi = None

    # Handle Series by converting to DataFrame
    if isinstance(indat, pd.Series):
        indat = pd.DataFrame(indat)

    if isinstance(indat, pd.DataFrame):
        if indat.index.is_all_dates:
            indat.index.name = 'Datetime'
            if dense:
                return asbestfreq(indat, force_freq=force_freq)
            else:
                return indat
        else:
            indat.index.name = 'UniqueID'
            return indat

    has_header = False
    dialect = csv.excel
    if isinstance(indat, str) or isinstance(indat, bytes):
        try:
            indat = str(indat, encoding='utf-8')
        except:
            pass
        if indat == '-':
            # if from stdin format must be the tstoolbox standard
            has_header = True
            fpi = openinput(indat)
        elif '\n' in indat or '\r' in indat:
            # a string
            fpi = StringIO(indat)
        elif os.path.exists(indat):
            # Is it a pickled file?
            try:
                result = pd.io.pickle.read_pickle(indat)
                fpi = False
            except:
                # Maybe a CSV file?
                fpi = openinput(indat)
        else:
            raise ValueError('''
*
*   File {0} doesn't exist.
*
'''.format(indat))
    else:
        raise ValueError('''
*
*   Can't figure out what was passed to read_iso_ts.
*
''')

    if fpi:
        try:
            fpi.seek(0)
            readsome = fpi.read(2048)
            fpi.seek(0)
            dialect = csv.Sniffer().sniff(readsome, delimiters=', \t:|')
            has_header = csv.Sniffer().has_header(readsome)
        except:
            # This is an assumption.
            has_header = True

        if extended_columns is True:
            fname = os.path.splitext(os.path.basename(fpi.name))[0]
            fstr = '{0}.{1}'
        else:
            fname = ''
            fstr = '{1}'
        if fname == '<stdin>':
            fname = '_'
        if has_header:
            result = pd.io.parsers.read_table(fpi,
                                              header=0,
                                              dialect=dialect,
                                              index_col=index_col,
                                              parse_dates=True,
                                              skipinitialspace=True)
            result.columns = [
                fstr.format(fname, i.strip()) for i in result.columns
            ]
        else:
            result = pd.io.parsers.read_table(fpi,
                                              header=None,
                                              dialect=dialect,
                                              index_col=0,
                                              parse_dates=True,
                                              skipinitialspace=True)
            if len(result.columns) == 1:
                result.columns = [fname]
            else:
                result.columns = [
                    fstr.format(fname, i.strip()) for i in result.columns
                ]

    if result.index.is_all_dates is True:
        result.index.name = 'Datetime'

        if dense:
            try:
                return asbestfreq(result, force_freq=force_freq)
            except ValueError:
                return result
    else:
        if result.index.name != 'UniqueID':
            result.reset_index(level=0, inplace=True)
        result.index.name = 'UniqueID'
    return result