Exemplo n.º 1
0
def get_schema_df(a: int) -> FlyteSchema[superset_cols]:
    """
    Generate a sample dataframe
    """
    s = FlyteSchema()
    s.open().write(
        pd.DataFrame({
            "Name": ["Tom", "Joseph"],
            "Age": [a, 22],
            "Height": [160, 178]
        }))
    return s
Exemplo n.º 2
0
    def to_python_value(
        self, ctx: FlyteContext, lv: Literal,
        expected_python_type: Type[pandera.typing.DataFrame]
    ) -> pandera.typing.DataFrame:
        if not (lv and lv.scalar and lv.scalar.schema):
            raise AssertionError(
                "Can only covert a literal schema to a pandera schema")

        def downloader(x, y):
            ctx.file_access.download_directory(x, y)

        df = FlyteSchema(
            local_path=ctx.file_access.get_random_local_directory(),
            remote_path=lv.scalar.schema.uri,
            downloader=downloader,
            supported_mode=SchemaOpenMode.READ,
        )
        return self._pandera_schema(expected_python_type)(df.open().all())
Exemplo n.º 3
0
def upload_result() -> Result:
    """
    Flytekit will upload FlyteFile, FlyteDirectory, and FlyteSchema to blob store (GCP, S3)
    """
    df = pd.DataFrame({"Name": ["Tom", "Joseph"], "Age": [20, 22]})
    temp_dir = tempfile.mkdtemp(prefix="flyte-")

    schema_path = temp_dir + "/schema.parquet"
    df.to_parquet(schema_path)

    file_path = tempfile.NamedTemporaryFile(delete=False)
    file_path.write(b"Hello world!")
    fs = Result(
        schema=FlyteSchema(temp_dir),
        file=FlyteFile(file_path.name),
        directory=FlyteDirectory(temp_dir),
    )
    return fs
Exemplo n.º 4
0
def convert_timestamp_column(dataframe: FlyteSchema,
                             timestamp_column: str) -> FlyteSchema:
    df = dataframe.open().all()
    df[timestamp_column] = pd.to_datetime(df[timestamp_column])
    return df
Exemplo n.º 5
0
def print_schema(s: FlyteSchema):
    df = s.open().all()
    print(df.to_markdown())
Exemplo n.º 6
0
def manipulate_athena_schema(s: FlyteSchema) -> FlyteSchema:
    df = s.open().all()
    return df[df.total_vaccinations.notnull()]