Python Load примеры использования

Язык программирования: Python

Пространство имен/Пакет: optimus.io.load

Класс/Тип: Load

Примеров на hotexamples.com: 5

Python Load - 5 примеров найдено. Это лучшие примеры Python кода для optimus.io.load.Load, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Load(5)

Основные методы

Load (5)

Пример #1

Показать файл

    def __init__(self,
                 master="local[*]",
                 app_name="optimus",
                 checkpoint=False,
                 path=None,
                 file_system="local",
                 verbose=False,
                 dl=False):
        """
        Transform and roll out
        :param master: 'Master', 'local' or ip address to a cluster
        :param app_name: Spark app name
        :param path: path to the checkpoint folder
        :param checkpoint: If True create a checkpoint folder
        :param file_system: 'local' or 'hadoop'
        """

        if verbose is True:
            logging.basicConfig(format="%(message)s", level=logging.INFO)
        elif verbose is False:
            logging.propagate = False
            logging.disable(logging.NOTSET)

        if dl is True:
            Optimus.add_spark_packages([
                "databricks:spark-deep-learning:1.1.0-spark2.3-s_2.11 pyspark-shell"
            ])

            Spark.instance = Spark(master, app_name)
            from optimus.dl.models import DL
            self.dl = DL()
        else:

            Spark.instance = Spark(master, app_name)
            pass

        if path is None:
            path = os.getcwd()

        # Initialize Spark
        logging.info("""
                             ____        __  _                     
                            / __ \____  / /_(_)___ ___  __  _______
                           / / / / __ \/ __/ / __ `__ \/ / / / ___/
                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
                          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
                              /_/                                  
                              """)

        logging.info(STARTING_OPTIMUS)
        if checkpoint is True:
            self.set_check_point_folder(path, file_system)

        logging.info(SUCCESS)

        self.create = Create()
        self.load = Load()
        self.read = self.spark.read
        self.profiler = Profiler()
        self.ml = ML()

Пример #2

Показать файл

    def __init__(self, master="local[*]", app_name="optimus", checkpoint=False, path=None, file_system="local",
                 verbose=False):
        """
        Transform and roll out
        :param master: 'Master', 'local' or ip address to a cluster
        :param app_name: Spark app name
        :param path: path to the checkpoint folder
        :param checkpoint: If True create a checkpoint folder
        :param file_system: 'local' or 'hadoop'
        """

        if verbose is True:
            level = logging.INFO
            logging.basicConfig(format="%(message)s", level=level)
        elif verbose is False:
            logging.propagate = False
            logging.disable(logging.NOTSET)

        if path is None:
            path = os.getcwd()

        # Initialize Spark
        logging.info("""
                             ____        __  _                     
                            / __ \____  / /_(_)___ ___  __  _______
                           / / / / __ \/ __/ / __ `__ \/ / / / ___/
                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
                          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
                              /_/                                  
                              """)

        logging.info(STARTING_OPTIMUS)
        Spark.instance = Spark(master, app_name)
        if checkpoint is True:
            self.set_check_point_folder(path, file_system)

        logging.info(SUCCESS)

        self.create = Create()
        self.load = Load()
        self.read = self.spark.read

Пример #3

Показать файл

    def __init__(self,
                 session=None,
                 master="local[*]",
                 app_name="optimus",
                 checkpoint=False,
                 path=None,
                 file_system="local",
                 verbose=False,
                 server=False,
                 repositories=None,
                 packages=None,
                 jars=None,
                 driver_class_path=None,
                 options=None,
                 additional_options=None,
                 comm=None,
                 load_avro=False,
                 cache=True):
        """
        Transform and roll out
        :param master: 'Master', 'local' or ip address to a cluster
        :param app_name: Spark app name
        :param path: path to the checkpoint folder
        :param checkpoint: If True create a checkpoint folder
        :param file_system: 'local' or 'hadoop'
        :param additional_options:


        :param options: Configuration options that are passed to spark-submit.
            See `the list of possible options
            <https://spark.apache.org/docs/2.4.1/configuration.html#available-properties>`_.
            Note that any options set already through PYSPARK_SUBMIT_ARGS will override
            these.
        :type options: (dict[str,str])
        :param repositories: List of additional maven repositories for package lookup.
        :type repositories: (list[str])

        :param packages: Spark packages that should be installed.
        :type packages: (list[str])

        :param jars: Full paths to jar files that we want to include to the session.
        :type jars: (list[str])

        """

        self.preserve = False

        Optimus.cache = cache

        if comm is True:
            Comm.instance = Comm()
        else:
            Comm.instance = comm

        if jars is None:
            jars = []

        if driver_class_path is None:
            driver_class_path = []

        if session is None:
            # Creating Spark Session
            # If a Spark session in not passed by argument create one

            self.master = master
            self.app_name = app_name

            if options is None:
                options = {}

            self.options = options

            # Initialize as lists
            self.packages = val_to_list(packages)
            self.repositories = val_to_list(repositories)
            self.jars = val_to_list(jars)
            self.driver_class_path = val_to_list(driver_class_path)

            self.additional_options = additional_options

            self.verbose(verbose)

            # Because avro depends of a external package you can decide if should be loaded
            if load_avro == "2.4":
                self._add_spark_packages(
                    ["org.apache.spark:spark-avro_2.12:2.4.3"])

            elif load_avro == "2.3":
                self._add_spark_packages(
                    ["com.databricks:spark-avro_2.11:4.0.0"])

            jdbc_jars = [
                "/jars/spark-redis-2.4.1-SNAPSHOT-jar-with-dependencies.jar",
                "/jars/RedshiftJDBC42-1.2.16.1027.jar",
                "/jars/mysql-connector-java-8.0.16.jar", "/jars/ojdbc8.jar",
                "/jars/postgresql-42.2.5.jar", "/jars/presto-jdbc-0.224.jar",
                "/jars/spark-cassandra-connector_2.11-2.4.1.jar",
                "/jars/sqlite-jdbc-3.27.2.1.jar",
                "/jars/mssql-jdbc-7.4.1.jre8.jar"
            ]

            self._add_jars(absolute_path(jdbc_jars, "uri"))
            self._add_driver_class_path(absolute_path(jdbc_jars, "posix"))

            self._create_session()

            if path is None:
                path = os.getcwd()

            if checkpoint is True:
                self._set_check_point_folder(path, file_system)

        else:
            # If a session is passed by arguments just save the reference
            # logger.print("Spark session")
            Spark.instance = Spark().load(session)

        # Initialize Spark
        logger.print("""
                             ____        __  _                     
                            / __ \____  / /_(_)___ ___  __  _______
                           / / / / __ \/ __/ / __ `__ \/ / / / ___/
                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
                          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
                              /_/                                  
                              """)

        logger.print(STARTING_OPTIMUS)

        # Pickling
        Spark.instance.sc.addPyFile(absolute_path("/infer.py"))

        if server:
            logger.print("Starting Optimus Server...")
            s = Server()
            s.start()
            self.server_instance = s

        logger.print(SUCCESS)

        self.create = Create()
        self.load = Load()
        self.read = self.spark.read

        # Create singleton profiler
        Profiler.instance = Profiler()
        self.profiler = Profiler.instance
        self.ml = ML()

        # Set global output as html
        self.output("html")

Пример #4

Показать файл

Файл: optimus.py Проект: schatzr/Optimus

    def __init__(self,
                 session=None,
                 master="local[*]",
                 app_name="optimus",
                 checkpoint=False,
                 path=None,
                 file_system="local",
                 verbose=False,
                 dl=False,
                 server=False,
                 repositories=None,
                 packages=None,
                 jars=None,
                 options=None,
                 additional_options=None,
                 enricher_host="localhost",
                 enricher_port=27017,
                 queue_url=None,
                 queue_exchange=None,
                 queue_routing_key="optimus"):
        """
        Transform and roll out
        :param master: 'Master', 'local' or ip address to a cluster
        :param app_name: Spark app name
        :param path: path to the checkpoint folder
        :param checkpoint: If True create a checkpoint folder
        :param file_system: 'local' or 'hadoop'
        :param additional_options:


        :param options: Configuration options that are passed to spark-submit.
            See `the list of possible options
            <https://spark.apache.org/docs/2.1.0/configuration.html#available-properties>`_.
            Note that any options set already through PYSPARK_SUBMIT_ARGS will override
            these.
        :type options: (dict[str,str])
        :param repositories: List of additional maven repositories for package lookup.
        :type repositories: (list[str])

        :param packages: Spark packages that should be installed.
        :type packages: (list[str])

        :param jars: Full paths to jar files that we want to include to the session.
        :type jars: (list[str])

        """
        if session is None:
            # print("Creating Spark Session...")
            # If a Spark session in not passed by argument create it

            self.master = master
            self.app_name = app_name

            if options is None:
                options = {}

            self.options = options

            if packages is None:
                packages = []
            else:
                packages = val_to_list(packages)

            self.packages = packages
            self.repositories = repositories

            if jars is None:
                jars = {}

            self.jars = jars
            self.additional_options = additional_options

            self.verbose(verbose)

            # Load Avro.
            # TODO: if the Spark 2.4 version is going to be used this is not neccesesary.
            #  Maybe we can check a priori which version fo Spark is going to be used
            # self._add_spark_packages(["com.databricks:spark-avro_2.11:4.0.0"])

            if dl is True:
                self._add_spark_packages(
                    ["databricks:spark-deep-learning:1.5.0-spark2.4-s_2.11"])

                self._start_session()

                from optimus.dl.models import DL
                self.dl = DL()
            else:
                self._start_session()

            if path is None:
                path = os.getcwd()

            if checkpoint is True:
                self._set_check_point_folder(path, file_system)

        else:
            # If a session is passed by arguments  just save the reference
            Spark.instance = session

        # Initialize Spark
        logger.print("""
                             ____        __  _                     
                            / __ \____  / /_(_)___ ___  __  _______
                           / / / / __ \/ __/ / __ `__ \/ / / / ___/
                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
                          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
                              /_/                                  
                              """)

        logger.print(STARTING_OPTIMUS)

        if server:
            logger.print("Starting Optimus Server...")
            s = Server()
            s.start()
            self.server_instance = s

        logger.print(SUCCESS)

        self.create = Create()
        self.load = Load()
        self.read = self.spark.read
        self.profiler = Profiler(queue_url=queue_url,
                                 queue_exchange=queue_exchange,
                                 queue_routing_key=queue_routing_key)
        self.ml = ML()
        self.enricher = Enricher(
            op=self,
            host=enricher_host,
            port=enricher_port,
        )

Пример #5

Показать файл

    def __init__(self, session=None, master="local[*]", app_name="optimus", checkpoint=False, path=None,
                 file_system="local",
                 verbose=False,
                 server=False,
                 repositories=None,
                 packages=None,
                 jars=None,
                 driver_class_path=None,
                 options=None,
                 additional_options=None,
                 queue_url=None,
                 queue_exchange=None,
                 queue_routing_key="optimus"
                 ):

        """
        Transform and roll out
        :param master: 'Master', 'local' or ip address to a cluster
        :param app_name: Spark app name
        :param path: path to the checkpoint folder
        :param checkpoint: If True create a checkpoint folder
        :param file_system: 'local' or 'hadoop'
        :param additional_options:


        :param options: Configuration options that are passed to spark-submit.
            See `the list of possible options
            <https://spark.apache.org/docs/2.4.1/configuration.html#available-properties>`_.
            Note that any options set already through PYSPARK_SUBMIT_ARGS will override
            these.
        :type options: (dict[str,str])
        :param repositories: List of additional maven repositories for package lookup.
        :type repositories: (list[str])

        :param packages: Spark packages that should be installed.
        :type packages: (list[str])

        :param jars: Full paths to jar files that we want to include to the session.
        :type jars: (list[str])

        """
        if session is None:
            # print("Creating Spark Session...")
            # If a Spark session in not passed by argument create it

            self.master = master
            self.app_name = app_name

            if options is None:
                options = {}

            self.options = options

            if packages is None:
                packages = []
            else:
                packages = val_to_list(packages)

            self.packages = packages
            self.repositories = repositories

            # Jars
            self.jars = jars
            self._add_jars(jars)

            # Class Drive Path
            self.driver_class_path = driver_class_path
            self._add_driver_class_path(driver_class_path)

            # Additional Options
            self.additional_options = additional_options

            self.verbose(verbose)

            # Load Avro.
            # TODO:
            #  if the Spark 2.4 version is going to be used this is not neccesesary.
            #  Maybe we can check a priori which version fo Spark is going to be used
            self._add_spark_packages(["com.databricks:spark-avro_2.11:4.0.0"])

            def c(files):
                return [Path(path + file).as_posix() for file in files]

            path = os.path.dirname(os.path.abspath(__file__))

            # Add databases jars
            self._add_jars(["../jars/RedshiftJDBC42-1.2.16.1027.jar", "../jars/mysql-connector-java-8.0.16.jar",
                            "../jars/ojdbc7.jar", "../jars/postgresql-42.2.5.jar"])

            self._add_driver_class_path(
                c(["//jars//RedshiftJDBC42-1.2.16.1027.jar", "//jars//mysql-connector-java-8.0.16.jar",
                   "//jars//ojdbc7.jar", "//jars//postgresql-42.2.5.jar"]))

            self._start_session()

            if path is None:
                path = os.getcwd()

            if checkpoint is True:
                self._set_check_point_folder(path, file_system)

        else:
            # If a session is passed by arguments just save the reference

            Spark.instance = Spark().load(session)

        # Initialize Spark
        logger.print("""
                             ____        __  _                     
                            / __ \____  / /_(_)___ ___  __  _______
                           / / / / __ \/ __/ / __ `__ \/ / / / ___/
                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
                          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
                              /_/                                  
                              """)

        logger.print(STARTING_OPTIMUS)

        if server:
            logger.print("Starting Optimus Server...")
            s = Server()
            s.start()
            self.server_instance = s

        logger.print(SUCCESS)

        self.create = Create()
        self.load = Load()
        self.read = self.spark.read
        self.profiler = Profiler(
            queue_url=queue_url,
            queue_exchange=queue_exchange,
            queue_routing_key=queue_routing_key
        )
        self.ml = ML()

        #
        self._load_css()

        # Set global output as html
        self.output("html")