Пример #1
0
    def execute(self, context: Dict[str, str]):
        mssql = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
        self.log.info(
            "Dumping Microsoft SQL Server query results to local file")
        with mssql.get_conn() as conn:
            with conn.cursor() as cursor:
                cursor.execute(self.sql)
                with NamedTemporaryFile("w") as tmp_file:
                    csv_writer = csv.writer(tmp_file,
                                            delimiter=self.delimiter,
                                            encoding='utf-8')
                    field_dict = OrderedDict()
                    col_count = 0
                    for field in cursor.description:
                        col_count += 1
                        col_position = "Column{position}".format(
                            position=col_count)
                        field_dict[col_position if field[0] ==
                                   '' else field[0]] = self.type_map(field[1])
                    csv_writer.writerows(cursor)
                    tmp_file.flush()

            hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
            self.log.info("Loading file into Hive")
            hive.load_file(tmp_file.name,
                           self.hive_table,
                           field_dict=field_dict,
                           create=self.create,
                           partition=self.partition,
                           delimiter=self.delimiter,
                           recreate=self.recreate,
                           tblproperties=self.tblproperties)
Пример #2
0
 def execute(self, context: 'Context') -> None:
     big_query_hook = BigQueryHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         location=self.location,
         impersonation_chain=self.impersonation_chain,
     )
     project_id, dataset_id, table_id = self.source_project_dataset_table.split(
         '.')
     BigQueryTableLink.persist(
         context=context,
         task_instance=self,
         dataset_id=dataset_id,
         project_id=project_id,
         table_id=table_id,
     )
     mssql_hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id,
                            schema=self.database)
     for rows in bigquery_get_data(
             self.log,
             self.dataset_id,
             self.table_id,
             big_query_hook,
             self.batch_size,
             self.selected_fields,
     ):
         mssql_hook.insert_rows(
             table=self.mssql_table,
             rows=rows,
             target_fields=self.selected_fields,
             replace=self.replace,
         )
 def execute(self, context):
     mssql_hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id,
                            schema=self.database)
     for rows in self._bq_get_data():
         mssql_hook.insert_rows(self.mssql_table,
                                rows,
                                replace=self.replace)
Пример #4
0
    def test_get_conn_should_return_connection(self, get_connection, mssql_get_conn):
        get_connection.return_value = PYMSSQL_CONN
        mssql_get_conn.return_value = mock.Mock()

        hook = MsSqlHook()
        conn = hook.get_conn()

        self.assertEqual(mssql_get_conn.return_value, conn)
        mssql_get_conn.assert_called_once()
Пример #5
0
    def test_get_autocommit_should_return_autocommit_state(self, get_connection, mssql_get_conn):
        get_connection.return_value = PYMSSQL_CONN
        mssql_get_conn.return_value = mock.Mock()
        mssql_get_conn.return_value.autocommit_state = 'autocommit_state'

        hook = MsSqlHook()
        conn = hook.get_conn()

        mssql_get_conn.assert_called_once()
        self.assertEqual(hook.get_autocommit(conn), 'autocommit_state')
Пример #6
0
    def query(self):
        """
        Queries MSSQL and returns a cursor of results.

        :return: mssql cursor
        """
        mssql = MsSqlHook(mssql_conn_id=self.mssql_conn_id)
        conn = mssql.get_conn()
        cursor = conn.cursor()
        cursor.execute(self.sql)
        return cursor
Пример #7
0
    def test_set_autocommit_should_invoke_autocommit(self, get_connection, mssql_get_conn):
        get_connection.return_value = PYMSSQL_CONN
        mssql_get_conn.return_value = mock.Mock()
        autocommit_value = mock.Mock()

        hook = MsSqlHook()
        conn = hook.get_conn()

        hook.set_autocommit(conn, autocommit_value)
        mssql_get_conn.assert_called_once()
        mssql_get_conn.return_value.autocommit.assert_called_once_with(autocommit_value)
Пример #8
0
def insert_mssql_hook():
    mssql_hook = MsSqlHook(mssql_conn_id='airflow_mssql', schema='airflow')

    rows = [
        ('India', 'Asia'),
        ('Germany', 'Europe'),
        ('Argentina', 'South America'),
        ('Ghana', 'Africa'),
        ('Japan', 'Asia'),
        ('Namibia', 'Africa'),
    ]
    target_fields = ['name', 'continent']
    mssql_hook.insert_rows(table='Country',
                           rows=rows,
                           target_fields=target_fields)
Пример #9
0
    def get_hook(self) -> Optional[Union[MsSqlHook, 'DbApiHook']]:
        """
        Will retrieve hook as determined by :meth:`~.Connection.get_hook` if one is defined, and
        :class:`~.MsSqlHook` otherwise.

        For example, if the connection ``conn_type`` is ``'odbc'``, :class:`~.OdbcHook` will be used.
        """
        if not self._hook:
            conn = MsSqlHook.get_connection(conn_id=self.mssql_conn_id)
            try:
                self._hook = conn.get_hook()
                self._hook.schema = self.database  # type: ignore[union-attr]
            except AirflowException:
                self._hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id,
                                       schema=self.database)
        return self._hook
Пример #10
0
    def get_hook(self):
        """
        Will retrieve hook as determined by Connection.

        If conn_type is ``'odbc'``, will use
        :py:class:`~airflow.providers.odbc.hooks.odbc.OdbcHook`.
        Otherwise, :py:class:`~airflow.providers.microsoft.mssql.hooks.mssql.MsSqlHook` will be used.
        """
        if not self._hook:
            conn = MsSqlHook.get_connection(conn_id=self.mssql_conn_id)
            try:
                self._hook: Union[MsSqlHook, OdbcHook] = conn.get_hook()
                self._hook.schema = self.database
            except AirflowException:
                self._hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id, schema=self.database)
        return self._hook
Пример #11
0
 def execute(self, context: 'Context') -> None:
     big_query_hook = BigQueryHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         location=self.location,
         impersonation_chain=self.impersonation_chain,
     )
     mssql_hook = MsSqlHook(mssql_conn_id=self.mssql_conn_id, schema=self.database)
     for rows in bigquery_get_data(
         self.log,
         self.dataset_id,
         self.table_id,
         big_query_hook,
         self.batch_size,
         self.selected_fields,
     ):
         mssql_hook.insert_rows(
             table=self.mssql_table,
             rows=rows,
             target_fields=self.selected_fields,
             replace=self.replace,
         )
Пример #12
0
 def get_hook(self):
     if self.conn_type == 'mysql':
         from airflow.providers.mysql.hooks.mysql import MySqlHook
         return MySqlHook(mysql_conn_id=self.conn_id)
     elif self.conn_type == 'google_cloud_platform':
         from airflow.gcp.hooks.bigquery import BigQueryHook
         return BigQueryHook(bigquery_conn_id=self.conn_id)
     elif self.conn_type == 'postgres':
         from airflow.providers.postgres.hooks.postgres import PostgresHook
         return PostgresHook(postgres_conn_id=self.conn_id)
     elif self.conn_type == 'pig_cli':
         from airflow.providers.apache.pig.hooks.pig import PigCliHook
         return PigCliHook(pig_cli_conn_id=self.conn_id)
     elif self.conn_type == 'hive_cli':
         from airflow.providers.apache.hive.hooks.hive import HiveCliHook
         return HiveCliHook(hive_cli_conn_id=self.conn_id)
     elif self.conn_type == 'presto':
         from airflow.providers.presto.hooks.presto import PrestoHook
         return PrestoHook(presto_conn_id=self.conn_id)
     elif self.conn_type == 'hiveserver2':
         from airflow.providers.apache.hive.hooks.hive import HiveServer2Hook
         return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
     elif self.conn_type == 'sqlite':
         from airflow.providers.sqlite.hooks.sqlite import SqliteHook
         return SqliteHook(sqlite_conn_id=self.conn_id)
     elif self.conn_type == 'jdbc':
         from airflow.providers.jdbc.hooks.jdbc import JdbcHook
         return JdbcHook(jdbc_conn_id=self.conn_id)
     elif self.conn_type == 'mssql':
         from airflow.providers.microsoft.mssql.hooks.mssql import MsSqlHook
         return MsSqlHook(mssql_conn_id=self.conn_id)
     elif self.conn_type == 'odbc':
         from airflow.providers.odbc.hooks.odbc import OdbcHook
         return OdbcHook(odbc_conn_id=self.conn_id)
     elif self.conn_type == 'oracle':
         from airflow.providers.oracle.hooks.oracle import OracleHook
         return OracleHook(oracle_conn_id=self.conn_id)
     elif self.conn_type == 'vertica':
         from airflow.providers.vertica.hooks.vertica import VerticaHook
         return VerticaHook(vertica_conn_id=self.conn_id)
     elif self.conn_type == 'cloudant':
         from airflow.providers.cloudant.hooks.cloudant import CloudantHook
         return CloudantHook(cloudant_conn_id=self.conn_id)
     elif self.conn_type == 'jira':
         from airflow.providers.jira.hooks.jira import JiraHook
         return JiraHook(jira_conn_id=self.conn_id)
     elif self.conn_type == 'redis':
         from airflow.providers.redis.hooks.redis import RedisHook
         return RedisHook(redis_conn_id=self.conn_id)
     elif self.conn_type == 'wasb':
         from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
         return WasbHook(wasb_conn_id=self.conn_id)
     elif self.conn_type == 'docker':
         from airflow.providers.docker.hooks.docker import DockerHook
         return DockerHook(docker_conn_id=self.conn_id)
     elif self.conn_type == 'azure_data_lake':
         from airflow.providers.microsoft.azure.hooks.azure_data_lake import AzureDataLakeHook
         return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id)
     elif self.conn_type == 'azure_cosmos':
         from airflow.providers.microsoft.azure.hooks.azure_cosmos import AzureCosmosDBHook
         return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id)
     elif self.conn_type == 'cassandra':
         from airflow.providers.apache.cassandra.hooks.cassandra import CassandraHook
         return CassandraHook(cassandra_conn_id=self.conn_id)
     elif self.conn_type == 'mongo':
         from airflow.providers.mongo.hooks.mongo import MongoHook
         return MongoHook(conn_id=self.conn_id)
     elif self.conn_type == 'gcpcloudsql':
         from airflow.gcp.hooks.cloud_sql import CloudSQLDatabaseHook
         return CloudSQLDatabaseHook(gcp_cloudsql_conn_id=self.conn_id)
     elif self.conn_type == 'grpc':
         from airflow.providers.grpc.hooks.grpc import GrpcHook
         return GrpcHook(grpc_conn_id=self.conn_id)
     raise AirflowException("Unknown hook type {}".format(self.conn_type))