def test_strptime(): arr = pa.array(["5/1/2020", None, "12/13/1900"]) got = pc.strptime(arr, format='%m/%d/%Y', unit='s') expected = pa.array([datetime(2020, 5, 1), None, datetime(1900, 12, 13)], type=pa.timestamp('s')) assert got == expected
def __init__(self, schema, table): super(InMemoryColumnarTable, self).__init__(schema) self.num_rows = table.num_rows self.columns = table.columns for idx in range(len(self.columns)): if self.columns[idx].type.equals(pa.int64()): self.columns[idx] = self.columns[idx].cast(pa.float64()) if self.columns[idx].type.equals(pa.string()): if re.match("^[0-9]{4}[\-][0-9]{2}[\-][0-9]{2}$", self.columns[idx][0].as_py()): self.columns[idx] = compute.strptime(self.columns[idx], format="%Y-%m-%d", unit='us') self.attr_to_idx = {a.aname: i for i, a in enumerate(self.schema)}
def to_arrow_table(self, *, skip_date_conversion: bool = False) -> pa.Table: """Export the report data to an Apache Arrow Table. Keyword Args: skip_date_conversion: Whether to skip automatically converting date columns to the ``timestamp[ns]`` format. Defaults to ``False``. Returns: The newly constructed Apache Arrow Table. .. versionadded:: 3.2.0 .. versionchanged:: 3.6.0 Time series columns are now converted to ``timestamp[ns]`` format instead of ``timestamp[us]`` format. """ if analytix.can_use("pyarrow"): import pyarrow as pa import pyarrow.compute as pc else: raise errors.MissingOptionalComponents("pyarrow") table = pa.table(list(zip(*self.data["rows"])), names=self.columns) if not skip_date_conversion: s = {"day", "month"} & set(table.column_names) if len(s): col = next(iter(s)) fmt = {"day": "%Y-%m-%d", "month": "%Y-%m"}[col] dt_series = pc.strptime(table.column(col), format=fmt, unit="ns") table = table.set_column(0, "day", dt_series) _log.info(f"Converted {col!r} column to timestamp[ns] format") return table