Exemplo n.º 1
0
def pyarrow_transform(batch: pa.Table) -> pa.Table:
    batch = batch.filter(pac.equal(batch["variety"], "Versicolor"))
    batch = batch.append_column(
        "normalized.sepal.length",
        pac.divide(batch["sepal.length"], pac.max(batch["sepal.length"])),
    )
    return batch.drop(["sepal.length"])
def _split_into_per_realization_tables(table: pa.Table) -> Dict[int, pa.Table]:
    per_real_tables: Dict[int, pa.Table] = {}
    unique_reals = table.column("REAL").unique().to_pylist()
    for real in unique_reals:
        # pylint: disable=no-member
        mask = pc.is_in(table["REAL"], value_set=pa.array([real]))
        real_table = table.filter(mask).drop(["REAL"])
        per_real_tables[real] = real_table

    return per_real_tables
Exemplo n.º 3
0
def find_intersected_dates_between_realizations(table: pa.Table) -> np.ndarray:
    """Find the intersection of dates present in all the realizations
    The input table must contain both REAL and DATE columns, but this function makes
    no assumptions about sorting of either column"""

    unique_reals = table.column("REAL").unique().to_numpy()

    date_intersection = None
    for real in unique_reals:
        # pylint: disable=no-member
        real_mask = pc.is_in(table["REAL"], value_set=pa.array([real]))
        dates_in_real = table.filter(real_mask).column(
            "DATE").unique().to_numpy()
        if date_intersection is None:
            date_intersection = dates_in_real
        else:
            date_intersection = np.intersect1d(date_intersection,
                                               dates_in_real,
                                               assume_unique=True)

    if date_intersection is not None:
        return date_intersection

    return np.empty(0, dtype=np.datetime64)
Exemplo n.º 4
0
def pyarrow_filter_rows(batch: pyarrow.Table) -> pyarrow.Table:
    return batch.filter(pyarrow.compute.equal(batch["variety"], "Versicolor"))