Exemplo n.º 1
0
def bundle(spark_session, spark_df_schema, spark_pipeline_model):
    #spark_df_as_java = _py2java(spark_session, spark_df)
    #spark_df_schema_as_java = spark_df_as_java.schema.__call__()
    spark_df_schema_as_json = spark_df_schema.json()
    with open('model.schema', 'wb') as pkl_file:
        pickle.dump(spark_df_schema_as_json, pkl_file)

    spark_pipeline_model.write().overwrite().save('model.parquet')

    ## SERVE FROM HERE
    with open('model.schema', 'rb') as pkl_file:
        from pyspark.sql.types import _parse_datatype_json_string
        restored_spark_df_schema_as_json = pickle.load(pkl_file)
        restored_spark_df_schema = _parse_datatype_json_string(
            restored_spark_df_schema_as_json)
        restored_spark_df_schema_as_java = _py2java(spark_session,
                                                    restored_spark_df_schema)

    restored_spark_pipeline_model = PipelineModel.read().load('model.parquet')
    restored_spark_pipeline_model_as_java = restored_spark_pipeline_model._to_java(
    )

    return spark_session._jvm.org.jpmml.sparkml.ConverterUtil.toPMMLByteArray(
        restored_spark_df_schema_as_java,
        restored_spark_pipeline_model_as_java)
Exemplo n.º 2
0
def ext_schema_of_xml_df(df, options={}):
    assert len(df.columns) == 1

    scala_options = spark._jvm.PythonUtils.toScalaMap(options)
    java_xml_module = getattr(getattr(
        spark._jvm.com.databricks.spark.xml, "package$"), "MODULE$")
    java_schema = java_xml_module.schema_of_xml_df(df._jdf, scala_options)
    return _parse_datatype_json_string(java_schema.json())
Exemplo n.º 3
0
    def schema(self):
        """Returns the schema of this DataFrame (represented by
        a L{StructType}).

        >>> df.schema()
        StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
        """
        return _parse_datatype_json_string(self._jdf.schema().json())
Exemplo n.º 4
0
 def check_datatype(datatype):
     pickled = pickle.loads(pickle.dumps(datatype))
     assert datatype == pickled
     scala_datatype = self.spark._jsparkSession.parseDataType(
         datatype.json())
     python_datatype = _parse_datatype_json_string(
         scala_datatype.json())
     assert datatype == python_datatype
Exemplo n.º 5
0
    def schema(self):
        """Returns the schema of this DataFrame (represented by
        a L{StructType}).

        >>> df.schema()
        StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
        """
        return _parse_datatype_json_string(self._jdf.schema().json())
Exemplo n.º 6
0
    def schema(self):
        """Returns the schema of this :class:`DataFrame` as a :class:`types.StructType`.

        >>> df.schema
        StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
        """
        if self._schema is None:
            self._schema = _parse_datatype_json_string(self._jdf.schema().json())
        return self._schema
Exemplo n.º 7
0
    def schema(self):
        """Returns the schema of this :class:`DataFrame` as a :class:`types.StructType`.

        >>> df.schema
        StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
        """
        if self._schema is None:
            self._schema = _parse_datatype_json_string(self._jdf.schema().json())
        return self._schema
Exemplo n.º 8
0
    def schema(self):
        """Returns the schema of this :class:`DataFrame` as a :class:`types.StructType`.

        >>> df.schema
        StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
        """
        if self._schema is None:
            try:
                self._schema = _parse_datatype_json_string(self._jdf.schema().json())
            except AttributeError as e:
                raise Exception("Unable to parse datatype from schema. %s" % e)
        return self._schema
Exemplo n.º 9
0
    def schema(self):
        """Returns the schema of this :class:`DataFrame` as a :class:`types.StructType`.

        >>> df.schema
        StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
        """
        if self._schema is None:
            try:
                self._schema = _parse_datatype_json_string(
                    self._jdf.schema().json())
            except AttributeError as e:
                raise Exception("Unable to parse datatype from schema. %s" % e)
        return self._schema
Exemplo n.º 10
0
    def imageSchema(self):
        """
        Returns the image schema.

        :return: a :class:`StructType` with a single column of images
               named "image" (nullable).

        .. versionadded:: 2.3.0
        """

        if self._imageSchema is None:
            ctx = SparkContext._active_spark_context
            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageSchema()
            self._imageSchema = _parse_datatype_json_string(jschema.json())
        return self._imageSchema
Exemplo n.º 11
0
    def columnSchema(self):
        """
        Returns the schema for the image column.

        :return: a :class:`StructType` for image column,
            ``struct<origin:string, height:int, width:int, nChannels:int, mode:int, data:binary>``.

        .. versionadded:: 2.4.0
        """

        if self._columnSchema is None:
            ctx = SparkContext._active_spark_context
            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.columnSchema()
            self._columnSchema = _parse_datatype_json_string(jschema.json())
        return self._columnSchema
Exemplo n.º 12
0
    def imageSchema(self):
        """
        Returns the image schema.

        :return: a :class:`StructType` with a single column of images
               named "image" (nullable).

        .. versionadded:: 2.3.0
        """

        if self._imageSchema is None:
            ctx = SparkContext._active_spark_context
            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageSchema()
            self._imageSchema = _parse_datatype_json_string(jschema.json())
        return self._imageSchema
    def columnSchema(self):
        """
        Returns the schema for the image column.

        :return: a :class:`StructType` for image column,
            ``struct<origin:string, height:int, width:int, nChannels:int, mode:int, data:binary>``.

        .. versionadded:: 2.4.0
        """

        if self._columnSchema is None:
            ctx = SparkContext._active_spark_context
            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.columnSchema()
            self._columnSchema = _parse_datatype_json_string(jschema.json())
        return self._columnSchema
Exemplo n.º 14
0
    def imageSchema(self) -> StructType:
        """
        Returns the image schema.

        Returns
        -------
        :class:`StructType`
            with a single column of images named "image" (nullable)
            and having the same type returned by :meth:`columnSchema`.

        .. versionadded:: 2.3.0
        """

        if self._imageSchema is None:
            ctx = SparkContext._active_spark_context
            assert ctx is not None and ctx._jvm is not None
            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageSchema(
            )
            self._imageSchema = cast(
                StructType, _parse_datatype_json_string(jschema.json()))
        return self._imageSchema
Exemplo n.º 15
0
 def check_datatype(datatype):
     pickled = pickle.loads(pickle.dumps(datatype))
     assert datatype == pickled
     scala_datatype = self.sqlCtx._ssql_ctx.parseDataType(datatype.json())
     python_datatype = _parse_datatype_json_string(scala_datatype.json())
     assert datatype == python_datatype
Exemplo n.º 16
0
 def check_datatype(datatype):
     pickled = pickle.loads(pickle.dumps(datatype))
     assert datatype == pickled
     scala_datatype = self.spark._jsparkSession.parseDataType(datatype.json())
     python_datatype = _parse_datatype_json_string(scala_datatype.json())
     assert datatype == python_datatype
Exemplo n.º 17
0
 def to_spark_schema(self, avsc_json: str) -> DataType:
     avsc_jvm = self.spark._jvm.org.apache.avro.Schema.Parser().parse(avsc_json)
     spark_type_jvm = self.schema_converters.toSqlType(avsc_jvm)
     return _parse_datatype_json_string(spark_type_jvm.dataType().json())
Exemplo n.º 18
0
import sys
from pyspark.sql.types import StringType
from pyspark.serializers import CloudPickleSerializer
f = open(sys.argv[1], 'wb')

from pyspark.files import SparkFiles
from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import ArrayType, DataType
from pyspark.sql.types import DoubleType, IntegerType, FloatType, LongType, StringType
from pyspark.sql.types import _parse_datatype_json_string

return_type = _parse_datatype_json_string(sys.argv[2])
print("function return type: " + str(return_type))
archive_path = sys.argv[3]


def predict(*args):
    import pandas
    from mlflow.pyfunc.spark_model_cache import SparkModelCache
    from mlflow.pyfunc import load_pyfunc  # pylint: disable=cyclic-import
    # elem_type = IntegerType
    elem_type = return_type

    if isinstance(elem_type, ArrayType):
        elem_type = elem_type.elementType

    supported_types = [
        IntegerType, LongType, FloatType, DoubleType, StringType
    ]

    if not any([isinstance(elem_type, x) for x in supported_types]):
Exemplo n.º 19
0
 def check_datatype(datatype):
     pickled = pickle.loads(pickle.dumps(datatype))
     assert datatype == pickled
     scala_datatype = self.sqlCtx._ssql_ctx.parseDataType(datatype.json())
     python_datatype = _parse_datatype_json_string(scala_datatype.json())
     assert datatype == python_datatype