def test_to_object_array_width(self): # see gh-13320 rows = [[1, 2, 3], [4, 5, 6]] expected = np.array(rows, dtype=object) out = lib.to_object_array(rows) tm.assert_numpy_array_equal(out, expected) expected = np.array(rows, dtype=object) out = lib.to_object_array(rows, min_width=1) tm.assert_numpy_array_equal(out, expected) expected = np.array([[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object) out = lib.to_object_array(rows, min_width=5) tm.assert_numpy_array_equal(out, expected)
def _rows_to_cols(self, content): zipped_content = list(lib.to_object_array(content).T) col_len = len(self.orig_columns) zip_len = len(zipped_content) if self._implicit_index: if np.isscalar(self.index_col): col_len += 1 else: col_len += len(self.index_col) if col_len != zip_len: row_num = -1 i = 0 for (i, l) in enumerate(content): if len(l) != col_len: break footers = 0 if self.skip_footer: footers = self.skip_footer if footers > 0: footers = footers - self.pos row_num = self.pos - (len(content) - i - footers) msg = "Expecting %d columns, got %d in row %d" % (col_len, zip_len, row_num) raise ValueError(msg) return zipped_content
def get_chunk(self, rows=None): if rows is not None and self.skip_footer: raise ValueError('skip_footer not supported for iteration') try: content = self._get_lines(rows) except StopIteration: if self._first_chunk: content = [] else: raise # done with first read, next time raise StopIteration self._first_chunk = False if len(content) == 0: # pragma: no cover if self.index_col is not None: if np.isscalar(self.index_col): index = Index([], name=self.index_name) else: index = MultiIndex.from_arrays([[]] * len(self.index_col), names=self.index_name) else: index = Index([]) return DataFrame(index=index, columns=self.columns) zipped_content = list(lib.to_object_array(content).T) if not self._has_complex_date_col and self.index_col is not None: index = self._get_simple_index(zipped_content) index = self._agg_index(index) else: index = Index(np.arange(len(content))) col_len, zip_len = len(self.columns), len(zipped_content) if col_len != zip_len: row_num = -1 for (i, l) in enumerate(content): if len(l) != col_len: break footers = 0 if self.skip_footer: footers = self.skip_footer row_num = self.pos - (len(content) - i + footers) msg = ('Expecting %d columns, got %d in row %d' % (col_len, zip_len, row_num)) raise ValueError(msg) data = dict((k, v) for k, v in izip(self.columns, zipped_content)) # apply converters for col, f in self.converters.iteritems(): if isinstance(col, int) and col not in self.columns: col = self.columns[col] data[col] = lib.map_infer(data[col], f) columns = list(self.columns) if self.parse_dates is not None: data, columns = self._process_date_conversion(data) data = _convert_to_ndarrays(data, self.na_values, self.verbose) df = DataFrame(data=data, columns=columns, index=index) if self._has_complex_date_col and self.index_col is not None: if not self._name_processed: self.index_name = self._get_index_name(list(columns)) self._name_processed = True data = dict(((k, v) for k, v in df.iteritems())) index = self._get_complex_date_index(data, col_names=columns, parse_dates=False) index = self._agg_index(index, False) data = dict(((k, v.values) for k, v in data.iteritems())) df = DataFrame(data=data, columns=columns, index=index) if self.squeeze and len(df.columns) == 1: return df[df.columns[0]] return df