Пример #1
0
    def test_to_object_array_width(self):
        # see gh-13320
        rows = [[1, 2, 3], [4, 5, 6]]

        expected = np.array(rows, dtype=object)
        out = lib.to_object_array(rows)
        tm.assert_numpy_array_equal(out, expected)

        expected = np.array(rows, dtype=object)
        out = lib.to_object_array(rows, min_width=1)
        tm.assert_numpy_array_equal(out, expected)

        expected = np.array([[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object)
        out = lib.to_object_array(rows, min_width=5)
        tm.assert_numpy_array_equal(out, expected)
Пример #2
0
    def test_to_object_array_width(self):
        # see gh-13320
        rows = [[1, 2, 3], [4, 5, 6]]

        expected = np.array(rows, dtype=object)
        out = lib.to_object_array(rows)
        tm.assert_numpy_array_equal(out, expected)

        expected = np.array(rows, dtype=object)
        out = lib.to_object_array(rows, min_width=1)
        tm.assert_numpy_array_equal(out, expected)

        expected = np.array([[1, 2, 3, None, None], [4, 5, 6, None, None]],
                            dtype=object)
        out = lib.to_object_array(rows, min_width=5)
        tm.assert_numpy_array_equal(out, expected)
Пример #3
0
    def _rows_to_cols(self, content):
        zipped_content = list(lib.to_object_array(content).T)

        col_len = len(self.orig_columns)
        zip_len = len(zipped_content)

        if self._implicit_index:
            if np.isscalar(self.index_col):
                col_len += 1
            else:
                col_len += len(self.index_col)

        if col_len != zip_len:
            row_num = -1
            i = 0
            for (i, l) in enumerate(content):
                if len(l) != col_len:
                    break

            footers = 0
            if self.skip_footer:
                footers = self.skip_footer
                if footers > 0:
                    footers = footers - self.pos
            row_num = self.pos - (len(content) - i - footers)

            msg = "Expecting %d columns, got %d in row %d" % (col_len, zip_len, row_num)
            raise ValueError(msg)

        return zipped_content
Пример #4
0
    def get_chunk(self, rows=None):
        if rows is not None and self.skip_footer:
            raise ValueError('skip_footer not supported for iteration')

        try:
            content = self._get_lines(rows)
        except StopIteration:
            if self._first_chunk:
                content = []
            else:
                raise

        # done with first read, next time raise StopIteration
        self._first_chunk = False

        if len(content) == 0: # pragma: no cover
            if self.index_col is not None:
                if np.isscalar(self.index_col):
                    index = Index([], name=self.index_name)
                else:
                    index = MultiIndex.from_arrays([[]] * len(self.index_col),
                                                   names=self.index_name)
            else:
                index = Index([])

            return DataFrame(index=index, columns=self.columns)

        zipped_content = list(lib.to_object_array(content).T)

        if not self._has_complex_date_col and self.index_col is not None:
            index = self._get_simple_index(zipped_content)
            index = self._agg_index(index)
        else:
            index = Index(np.arange(len(content)))

        col_len, zip_len = len(self.columns), len(zipped_content)
        if col_len != zip_len:
            row_num = -1
            for (i, l) in enumerate(content):
                if len(l) != col_len:
                    break

            footers = 0
            if self.skip_footer:
                footers = self.skip_footer
            row_num = self.pos - (len(content) - i + footers)

            msg = ('Expecting %d columns, got %d in row %d' %
                   (col_len, zip_len, row_num))
            raise ValueError(msg)

        data = dict((k, v) for k, v in izip(self.columns, zipped_content))

        # apply converters
        for col, f in self.converters.iteritems():
            if isinstance(col, int) and col not in self.columns:
                col = self.columns[col]
            data[col] = lib.map_infer(data[col], f)

        columns = list(self.columns)
        if self.parse_dates is not None:
            data, columns = self._process_date_conversion(data)

        data = _convert_to_ndarrays(data, self.na_values, self.verbose)

        df = DataFrame(data=data, columns=columns, index=index)
        if self._has_complex_date_col and self.index_col is not None:
            if not self._name_processed:
                self.index_name = self._get_index_name(list(columns))
                self._name_processed = True
            data = dict(((k, v) for k, v in df.iteritems()))
            index = self._get_complex_date_index(data, col_names=columns,
                                                 parse_dates=False)
            index = self._agg_index(index, False)
            data = dict(((k, v.values) for k, v in data.iteritems()))
            df = DataFrame(data=data, columns=columns, index=index)

        if self.squeeze and len(df.columns) == 1:
            return df[df.columns[0]]
        return df
Пример #5
0
    def get_chunk(self, rows=None):
        if rows is not None and self.skip_footer:
            raise ValueError('skip_footer not supported for iteration')

        try:
            content = self._get_lines(rows)
        except StopIteration:
            if self._first_chunk:
                content = []
            else:
                raise

        # done with first read, next time raise StopIteration
        self._first_chunk = False

        if len(content) == 0:  # pragma: no cover
            if self.index_col is not None:
                if np.isscalar(self.index_col):
                    index = Index([], name=self.index_name)
                else:
                    index = MultiIndex.from_arrays([[]] * len(self.index_col),
                                                   names=self.index_name)
            else:
                index = Index([])

            return DataFrame(index=index, columns=self.columns)

        zipped_content = list(lib.to_object_array(content).T)

        if not self._has_complex_date_col and self.index_col is not None:
            index = self._get_simple_index(zipped_content)
            index = self._agg_index(index)
        else:
            index = Index(np.arange(len(content)))

        col_len, zip_len = len(self.columns), len(zipped_content)
        if col_len != zip_len:
            row_num = -1
            for (i, l) in enumerate(content):
                if len(l) != col_len:
                    break

            footers = 0
            if self.skip_footer:
                footers = self.skip_footer
            row_num = self.pos - (len(content) - i + footers)

            msg = ('Expecting %d columns, got %d in row %d' %
                   (col_len, zip_len, row_num))
            raise ValueError(msg)

        data = dict((k, v) for k, v in izip(self.columns, zipped_content))

        # apply converters
        for col, f in self.converters.iteritems():
            if isinstance(col, int) and col not in self.columns:
                col = self.columns[col]
            data[col] = lib.map_infer(data[col], f)

        columns = list(self.columns)
        if self.parse_dates is not None:
            data, columns = self._process_date_conversion(data)

        data = _convert_to_ndarrays(data, self.na_values, self.verbose)

        df = DataFrame(data=data, columns=columns, index=index)
        if self._has_complex_date_col and self.index_col is not None:
            if not self._name_processed:
                self.index_name = self._get_index_name(list(columns))
                self._name_processed = True
            data = dict(((k, v) for k, v in df.iteritems()))
            index = self._get_complex_date_index(data,
                                                 col_names=columns,
                                                 parse_dates=False)
            index = self._agg_index(index, False)
            data = dict(((k, v.values) for k, v in data.iteritems()))
            df = DataFrame(data=data, columns=columns, index=index)

        if self.squeeze and len(df.columns) == 1:
            return df[df.columns[0]]
        return df