Пример #1
0
    def load_table(config,
                   destination_system,
                   destination_database,
                   destination_environment,
                   destination_table,
                   load_type,
                   emr_cluster_id=None,
                   spark_params=None):
        ds = DataSystem(config, destination_system, destination_database,
                        destination_environment)

        # hadoop
        if ds.database_type == DataSystem.DatabaseType.EMR:
            if ds.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.load.load_executor_hadoop import LoadExecutorHadoop
                LoadExecutorHadoop.create(
                    config_path=config,
                    destination_system=destination_system,
                    destination_database=destination_database,
                    destination_environment=destination_environment,
                    destination_table=destination_table,
                    load_type=load_type,
                    emr_cluster_id=emr_cluster_id,
                    spark_params_str=spark_params).run()
            else:
                raise m3d_exceptions.M3DUnsupportedStorageException(
                    ds.storage_type)
        else:
            raise m3d_exceptions.M3DUnsupportedDatabaseTypeException(
                ds.database_type)
Пример #2
0
    def drop_table(config,
                   destination_system,
                   destination_database,
                   destination_environment,
                   destination_table,
                   emr_cluster_id=None):
        # create abstract table object to retrieve source technology
        abstract_table = Table(config, destination_system,
                               destination_database, destination_environment,
                               destination_table)
        destination_system_technology = abstract_table.get_destination_technology(
        )

        # hadoop
        if destination_system_technology == DataSystem.SystemTechnology.HIVE:
            if abstract_table.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.emr.emr_system import EMRSystem
                emr_system = EMRSystem(config, destination_system,
                                       destination_database,
                                       destination_environment, emr_cluster_id)
                emr_system.add_cluster_tag(EMRSystem.EMRClusterTag.API_METHOD,
                                           M3D.drop_table.__name__)
                emr_system.drop_table(destination_table)
            else:
                raise m3d_exceptions.M3DUnsupportedStorageException(
                    abstract_table.storage_type)
        else:
            raise m3d_exceptions.M3DUnsupportedDestinationSystemException(
                destination_system_technology)
Пример #3
0
    def load_table(config,
                   cluster_mode,
                   destination_system,
                   destination_database,
                   destination_environment,
                   destination_table,
                   load_type,
                   emr_cluster_id=None,
                   spark_params=None):
        # create abstract table object to retrieve source technology
        abstract_table = Table(config, cluster_mode, destination_system,
                               destination_database, destination_environment,
                               destination_table)
        destination_system_technology = abstract_table.get_destination_technology(
        )

        # hadoop
        if destination_system_technology == DataSystem.SystemTechnology.HIVE:
            if abstract_table.storage_type == DataSystem.StorageType.S3:
                from m3d.hadoop.load.load_executor_hadoop import LoadExecutorHadoop
                LoadExecutorHadoop.create(
                    config_path=config,
                    cluster_mode=cluster_mode,
                    destination_system=destination_system,
                    destination_database=destination_database,
                    destination_environment=destination_environment,
                    destination_table=destination_table,
                    load_type=load_type,
                    emr_cluster_id=emr_cluster_id,
                    spark_params_str=spark_params).run()

            else:
                raise m3d_exceptions.M3DUnsupportedStorageException(
                    abstract_table.storage_type)
        else:
            raise m3d_exceptions.M3DUnsupportedDestinationSystemException(
                destination_system_technology)