示例#1
0
 def __init__(
     self,
     filepath_or_buffer: Optional[Any] = None,
     filter_: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None,
     force_valid_ids: bool = True,
     fillvalues: Optional[Dict[str, Any]] = None,
     as_array: Optional[Any] = None,
     timeout: Optional[float] = None,
     save_context: Optional[Any] = None,  # FIXME seems more like a bool
     recovery: int = 0,  # FIXME seems more like a bool
     recovery_tag: Union[str, int] = "",
     recovery_table_size: int = 3,
     save_step_size: int = 100000,
     **kwds: Any,
 ) -> None:
     super(CSVLoader, self).__init__(**kwds)
     self.tags.add(self.TAG_SOURCE)
     self.default_step_size = kwds.get("chunksize", 1000)  # initial guess
     kwds.setdefault("chunksize", self.default_step_size)
     # Filter out the module keywords from the csv loader keywords
     csv_kwds = filter_kwds(kwds, pd.read_csv)
     # When called with a specified chunksize, it returns a parser
     self.filepath_or_buffer = filepath_or_buffer
     self.force_valid_ids = force_valid_ids
     self.parser: Optional[Parser] = None
     self.csv_kwds = csv_kwds
     self._compression = csv_kwds.get("compression", "infer")
     csv_kwds["compression"] = None
     self._encoding = csv_kwds.get("encoding", None)
     csv_kwds["encoding"] = None
     self._rows_read = 0
     if filter_ is not None and not callable(filter_):
         raise ProgressiveError(
             "filter parameter should be callable or None")
     self._filter = filter_
     # self._input_stream: Optional[Any] = (
     #     None  # stream that returns a position through the 'tell()' method
     # )
     self._input_encoding = None
     self._input_compression = None
     self._input_size = 0  # length of the file or input stream when available
     self._timeout_csv = timeout
     self._table_params: Dict[str, Any] = dict(name=self.name,
                                               fillvalues=fillvalues)
     self._as_array = as_array
     self._save_context = (True if save_context is None
                           and is_recoverable(filepath_or_buffer) else
                           False)
     self._recovery = recovery
     self._recovery_table_size = recovery_table_size
     self._recovery_table: Optional[Table] = None
     self._recovery_table_name = f"csv_loader_recovery_{recovery_tag}"
     self._recovery_table_inv: Optional[Table] = None
     self._recovery_table_inv_name = f"csv_loader_recovery_invariant_{recovery_tag}"
     self._save_step_size = save_step_size
     self._last_saved_id = 0
     if self._recovery and not self.recovery_tables_exist():
         self._recovery = False
     if not self._recovery:
         self.trunc_recovery_tables()
示例#2
0
 def __init__(self, **kwds: Any) -> None:
     """Merge(how='inner', on=None, left_on=None, right_on=None,
      left_index=False, right_index=False,
     sort=False,suffixes=('_x', '_y'), copy=True,
     indicator=False)
     """
     super(Merge, self).__init__(**kwds)
     self.merge_kwds = filter_kwds(kwds, merge)
     self._context: Dict[str, Any] = {}
    def __init__(self,
                 filepath_or_buffer: Optional[Any] = None,
                 filter_: Optional[Callable[[pd.DataFrame],
                                            pd.DataFrame]] = None,
                 force_valid_ids: bool = True,
                 fillvalues: Optional[Dict[str, Any]] = None,
                 throttle: Union[bool, int, float] = False,
                 **kwds: Any) -> None:
        super().__init__(**kwds)
        self.default_step_size = kwds.get("chunksize", 1000)  # initial guess
        kwds.setdefault("chunksize", self.default_step_size)
        # Filter out the module keywords from the csv loader keywords
        csv_kwds: Dict[str, Any] = filter_kwds(kwds, pd.read_csv)
        # When called with a specified chunksize, it returns a parser
        self.filepath_or_buffer = filepath_or_buffer
        self.force_valid_ids = force_valid_ids
        if throttle and isinstance(throttle, integer_types + (float, )):
            self.throttle = throttle
        else:
            self.throttle = False
        self.parser: Optional[pd.TextReader] = None
        self.csv_kwds = csv_kwds
        self._compression: Any = csv_kwds.get("compression", "infer")
        csv_kwds["compression"] = None
        self._encoding: Any = csv_kwds.get("encoding", None)
        csv_kwds["encoding"] = None
        self._nrows = csv_kwds.get("nrows")
        csv_kwds["nrows"] = None  # nrows clashes with chunksize

        self._rows_read = 0
        if filter_ is not None and not callable(filter_):
            raise ProgressiveError(
                "filter parameter should be callable or None")
        self._filter: Optional[Callable[[pd.DataFrame],
                                        pd.DataFrame]] = filter_
        self._input_stream: Optional[
            io.
            IOBase] = None  # stream that returns a position through the 'tell()' method
        self._input_encoding: Optional[str] = None
        self._input_compression: Optional[str] = None
        self._input_size = 0  # length of the file or input stream when available
        self._file_mode = False
        self._table_params: Dict[str, Any] = dict(name=self.name,
                                                  fillvalues=fillvalues)
示例#4
0
 def __init__(self, **kwds: Any) -> None:
     super(BinJoin, self).__init__(**kwds)
     self.join_kwds = filter_kwds(kwds, join)
     self._dialog = Dialog(self)
示例#5
0
 def __init__(self, **kwds: Any) -> None:
     """Join(on=None, how='left', lsuffix='', rsuffix='',
     sort=False,name=None)
     """
     super(Join, self).__init__(**kwds)
     self.join_kwds = filter_kwds(kwds, join)
示例#6
0
 def __init__(self, **kwds: Any) -> None:
     super(Paste, self).__init__(**kwds)
     self.join_kwds = filter_kwds(kwds, join)