def _agg_index(self, index, try_parse_dates: bool = True) -> Index: arrays = [] for i, arr in enumerate(index): if try_parse_dates and self._should_parse_dates(i): arr = self._date_conv(arr) if self.na_filter: col_na_values = self.na_values col_na_fvalues = self.na_fvalues else: col_na_values = set() col_na_fvalues = set() if isinstance(self.na_values, dict): assert self.index_names is not None col_name = self.index_names[i] if col_name is not None: col_na_values, col_na_fvalues = _get_na_values( col_name, self.na_values, self.na_fvalues, self.keep_default_na) arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) names = self.index_names index = ensure_index_from_sequences(arrays, names) return index
def _agg_index(self, index, try_parse_dates=True) -> Index: arrays = [] for i, arr in enumerate(index): if try_parse_dates and self._should_parse_dates(i): arr = self._date_conv(arr) if self.na_filter: col_na_values = self.na_values col_na_fvalues = self.na_fvalues else: col_na_values = set() col_na_fvalues = set() if isinstance(self.na_values, dict): # pandas\io\parsers.py:1678: error: Value of type # "Optional[Any]" is not indexable [index] col_name = self.index_names[i] # type: ignore[index] if col_name is not None: col_na_values, col_na_fvalues = _get_na_values( col_name, self.na_values, self.na_fvalues, self.keep_default_na) arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) names = self.index_names index = ensure_index_from_sequences(arrays, names) return index
def _get_empty_meta( self, columns, index_col, index_names, dtype: DtypeArg | None = None ): columns = list(columns) # Convert `dtype` to a defaultdict of some kind. # This will enable us to write `dtype[col_name]` # without worrying about KeyError issues later on. if not is_dict_like(dtype): # if dtype == None, default will be object. default_dtype = dtype or object # error: Argument 1 to "defaultdict" has incompatible type "Callable[[], # Union[ExtensionDtype, str, dtype[Any], Type[object], Dict[Hashable, # Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float], # Type[int], Type[complex], Type[bool], Type[object]]]]]"; expected # "Optional[Callable[[], Union[ExtensionDtype, str, dtype[Any], # Type[object]]]]" # error: Incompatible return value type (got "Union[ExtensionDtype, str, # dtype[Any], Type[object], Dict[Hashable, Union[ExtensionDtype, Union[str, # dtype[Any]], Type[str], Type[float], Type[int], Type[complex], Type[bool], # Type[object]]]]", expected "Union[ExtensionDtype, str, dtype[Any], # Type[object]]") dtype = defaultdict( lambda: default_dtype # type: ignore[arg-type, return-value] ) else: dtype = cast(dict, dtype) dtype = defaultdict( lambda: object, {columns[k] if is_integer(k) else k: v for k, v in dtype.items()}, ) # Even though we have no data, the "index" of the empty DataFrame # could for example still be an empty MultiIndex. Thus, we need to # check whether we have any index columns specified, via either: # # 1) index_col (column indices) # 2) index_names (column names) # # Both must be non-null to ensure a successful construction. Otherwise, # we have to create a generic empty Index. if (index_col is None or index_col is False) or index_names is None: index = Index([]) else: data = [Series([], dtype=dtype[name]) for name in index_names] index = ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) col_dict = {col_name: Series([], dtype=dtype[col_name]) for col_name in columns} return index, columns, col_dict
def _get_empty_meta(self, columns, index_col, index_names, dtype: DtypeArg | None = None): columns = list(columns) # Convert `dtype` to a defaultdict of some kind. # This will enable us to write `dtype[col_name]` # without worrying about KeyError issues later on. dtype_dict: defaultdict[Hashable, Any] if not is_dict_like(dtype): # if dtype == None, default will be object. default_dtype = dtype or object dtype_dict = defaultdict(lambda: default_dtype) else: dtype = cast(dict, dtype) dtype_dict = defaultdict( lambda: object, { columns[k] if is_integer(k) else k: v for k, v in dtype.items() }, ) # Even though we have no data, the "index" of the empty DataFrame # could for example still be an empty MultiIndex. Thus, we need to # check whether we have any index columns specified, via either: # # 1) index_col (column indices) # 2) index_names (column names) # # Both must be non-null to ensure a successful construction. Otherwise, # we have to create a generic empty Index. if (index_col is None or index_col is False) or index_names is None: index = Index([]) else: data = [Series([], dtype=dtype_dict[name]) for name in index_names] index = ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) col_dict = { col_name: Series([], dtype=dtype_dict[col_name]) for col_name in columns } return index, columns, col_dict
def _agg_index(self, index, try_parse_dates: bool = True) -> Index: arrays = [] converters = self._clean_mapping(self.converters) for i, arr in enumerate(index): if try_parse_dates and self._should_parse_dates(i): arr = self._date_conv(arr) if self.na_filter: col_na_values = self.na_values col_na_fvalues = self.na_fvalues else: col_na_values = set() col_na_fvalues = set() if isinstance(self.na_values, dict): assert self.index_names is not None col_name = self.index_names[i] if col_name is not None: col_na_values, col_na_fvalues = _get_na_values( col_name, self.na_values, self.na_fvalues, self.keep_default_na ) clean_dtypes = self._clean_mapping(self.dtype) cast_type = None index_converter = False if self.index_names is not None: if isinstance(clean_dtypes, dict): cast_type = clean_dtypes.get(self.index_names[i], None) if isinstance(converters, dict): index_converter = converters.get(self.index_names[i]) is not None try_num_bool = not ( cast_type and is_string_dtype(cast_type) or index_converter ) arr, _ = self._infer_types( arr, col_na_values | col_na_fvalues, try_num_bool ) arrays.append(arr) names = self.index_names index = ensure_index_from_sequences(arrays, names) return index
def read(self, nrows=None): try: data = self._reader.read(nrows) except StopIteration: # error: Cannot determine type of '_first_chunk' if self._first_chunk: # type: ignore[has-type] self._first_chunk = False names = self._maybe_dedup_names(self.orig_names) index, columns, col_dict = self._get_empty_meta( names, self.index_col, self.index_names, dtype=self.kwds.get("dtype"), ) columns = self._maybe_make_multi_index_columns(columns, self.col_names) if self.usecols is not None: columns = self._filter_usecols(columns) col_dict = {k: v for k, v in col_dict.items() if k in columns} return index, columns, col_dict else: self.close() raise # Done with first read, next time raise StopIteration self._first_chunk = False # error: Cannot determine type of 'names' names = self.names # type: ignore[has-type] if self._reader.leading_cols: if self._has_complex_date_col: raise NotImplementedError("file structure not yet supported") # implicit index, no index names arrays = [] for i in range(self._reader.leading_cols): if self.index_col is None: values = data.pop(i) else: values = data.pop(self.index_col[i]) values = self._maybe_parse_dates(values, i, try_parse_dates=True) arrays.append(values) index = ensure_index_from_sequences(arrays) if self.usecols is not None: names = self._filter_usecols(names) names = self._maybe_dedup_names(names) # rename dict keys data_tups = sorted(data.items()) data = {k: v for k, (i, v) in zip(names, data_tups)} names, data = self._do_date_conversions(names, data) else: # rename dict keys data_tups = sorted(data.items()) # ugh, mutation # assert for mypy, orig_names is List or None, None would error in list(...) assert self.orig_names is not None names = list(self.orig_names) names = self._maybe_dedup_names(names) if self.usecols is not None: names = self._filter_usecols(names) # columns as list alldata = [x[1] for x in data_tups] data = {k: v for k, (i, v) in zip(names, data_tups)} names, data = self._do_date_conversions(names, data) index, names = self._make_index(data, alldata, names) # maybe create a mi on the columns names = self._maybe_make_multi_index_columns(names, self.col_names) return index, names, data