Exemplo n.º 1
0
def extract_data_source(ti):
    try:

        conn = OdbcHook().get_conn()
        most_recent_date = ti.xcom_pull(key='most_recent_date',
                                        task_ids=['catchup_date'])
        print('most_recent_date')
        print(most_recent_date)
        sql = None
        if not np.array(most_recent_date
                        ) or not most_recent_date or most_recent_date is None:
            print('run without xcom value')
            sql = 'SELECT TOP 10 *  FROM [dbo].[MV_Amarok_DailyProdWellData]'
            df = pd.read_sql(sql, conn)
        else:
            print('run with xcom value')
            sql = 'SELECT TOP 10 *  FROM [dbo].[MV_Amarok_DailyProdWellData] where Date > ?'
            df = pd.read_sql(sql, conn, params=[most_recent_date])
        print(sql)
        df.to_json('/home/mdkumarmca/data/daily_production_data.json',
                   orient='records')

    except pyodbc.Error as err:
        print('Failed to read data from table', err)
    finally:
        if conn:
            conn.close()
            print('ODBC connection is closed')
Exemplo n.º 2
0
def extract_most_recent_date(ti):
    try:

        conn = OdbcHook().get_conn()
        cursor = conn.cursor()
        most_recent_date = ti.xcom_pull(key='most_recent_date',
                                        task_ids=['catchup_date'])
        print(most_recent_date)
        sql = None
        if not np.array(most_recent_date
                        ) or not most_recent_date or most_recent_date is None:
            sql = 'SELECT MAX(minDate.Date) AS Date FROM(SELECT TOP 10 *  FROM [dbo].[MV_Amarok_DailyProdWellData] ORDER BY Date) AS minDate'
            cursor.execute(sql)
        else:
            sql = 'SELECT MAX(minDate.Date) AS Date FROM(SELECT TOP 10 * FROM [dbo].[MV_Amarok_DailyProdWellData] where Date > ? ORDER BY Date) AS minDate'
            cursor.execute(sql, most_recent_date)

        print(sql)
        row = cursor.fetchone()
        if row is not None:
            ti.xcom_push(key='most_recent_date',
                         value=row.Date.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3])
        print(row.Date)
        cursor.close()

    except pyodbc.Error as err:
        print('Failed to read data from table', err)
    finally:
        if conn:
            conn.close()
            print('ODBC connection is closed')
 def get_data(self):
     odbcHook = OdbcHook(conn_id=self.odbc_conn_id)
     with closing(odbcHook.get_conn()) as conn:
         with closing(conn.cursor()) as cur:
             cur.execute(self.odbc_sql)
             rows = cur.fetchall()
             rows = [row[0] for row in rows]
             return rows
def del_duplicados(**kwargs):
    try:
        query = 'EXEC [dbo].[Remover_Duplicados] @NOMBRE_TABLA = "Unificado"'
        odbc_hook = OdbcHook(odbc_conn_id='con_ej_pi_mssql',
                             database='Testing_ETL',
                             driver='{ODBC Driver 17 for SQL Server}')
        odbc_hook.run(query)
    except Exception as e:
        logging.error(f'Ejercucion de Query: {query} - {e}')
def load_to_db(query, i):
    try:
        odbc_hook = OdbcHook(odbc_conn_id='con_ej_pi_mssql',
                             database='Testing_ETL',
                             driver='{ODBC Driver 17 for SQL Server}')
        odbc_hook.run(query)
        return i + 1
    except Exception as e:
        logging.error(f'Ejercucion de Query: {query} - {e}')
    return i
Exemplo n.º 6
0
    def get_hook(self=None, hook_params=None, conn_params=None):
        hook_params = hook_params or {}
        conn_params = conn_params or {}
        connection = Connection(
            **{
                **dict(login='******', password='******', host='host', schema='schema', port=1234),
                **conn_params,
            }
        )

        hook = OdbcHook(**hook_params)
        hook.get_connection = mock.Mock()
        hook.get_connection.return_value = connection
        return hook
def sample_select():
    odbc_hook = OdbcHook() 
    # above will use 'odbc_default' connection
    # which you must setup in Admin->Connections
    # http://airflow.apache.org/docs/apache-airflow-providers-odbc/stable/connections/odbc.html#configuring-the-connection
    
    cnxn = odbc_hook.get_conn()

    cursor = cnxn.cursor()
    cursor.execute("SELECT @@SERVERNAME, @@VERSION;")
    row = cursor.fetchone()
    while row:
        print("Server Name:" + row[0])
        print("Server Version:" + row[1])
        row = cursor.fetchone()
Exemplo n.º 8
0
def _rank_movies(odbc_conn_id, wasb_conn_id, ratings_container,
                 rankings_container, **context):
    year = context["execution_date"].year
    month = context["execution_date"].month

    # Determine storage account name, needed for query source URL.
    blob_account_name = WasbHook.get_connection(wasb_conn_id).login

    query = RANK_QUERY.format(
        year=year,
        month=month,
        blob_account_name=blob_account_name,
        blob_container=ratings_container,
    )
    logging.info(f"Executing query: {query}")

    odbc_hook = OdbcHook(odbc_conn_id, driver="ODBC Driver 17 for SQL Server")

    with odbc_hook.get_conn() as conn:
        with conn.cursor() as cursor:
            cursor.execute(query)

            rows = cursor.fetchall()
            colnames = [field[0] for field in cursor.description]

    ranking = pd.DataFrame.from_records(rows, columns=colnames)
    logging.info(f"Retrieved {ranking.shape[0]} rows")

    # Write ranking to temp file.
    logging.info(
        f"Writing results to {rankings_container}/{year}/{month:02d}.csv")
    with tempfile.TemporaryDirectory() as tmp_dir:
        tmp_path = path.join(tmp_dir, "ranking.csv")
        ranking.to_csv(tmp_path, index=False)

        # Upload file to Azure Blob.
        wasb_hook = WasbHook(wasb_conn_id)
        wasb_hook.load_file(
            tmp_path,
            container_name=rankings_container,
            blob_name=f"{year}/{month:02d}.csv",
        )
Exemplo n.º 9
0
    def execute(self, context):
        mongoHook = MongoHook(conn_id=self.mongo_conn_id)
        self.mongo_db = mongoHook.connection.schema

        log.info('odbc_conn_id: %s', self.odbc_conn_id)
        log.info('postgres_conn_id: %s', self.postgres_conn_id)
        log.info('mongo_conn_id: %s', self.mongo_conn_id)
        log.info('postgres_sql: %s', self.postgres_sql)
        # log.info('prev_exec_date: %s', self.prev_exec_date)
        log.info('mongo_db: %s', self.mongo_db)
        log.info('mongo_collection: %s', self.mongo_collection)

        odbcHook = OdbcHook(self.odbc_conn_id)

        well_data = self.get_data()
        log.info('postgres well data: %s', well_data)
        most_recent_date = self.get_most_recent_date(mongoHook)
        print(most_recent_date)
        if most_recent_date:
            print('store most recent date inside airflow variable')
            Variable.set("most_recent_date", most_recent_date)

        with closing(odbcHook.get_conn()) as conn:
            for index, well in well_data.iterrows():
                print(well['well_name'], well['is_newly_added'])
                if well is not None and well['is_newly_added']:
                    sql = 'SELECT *  FROM [dbo].[MV_Amarok_DailyProdWellDemoData] where Name = ?'
                    df = pd.read_sql(sql, conn, params=[well['well_name']])
                else:
                    sql = 'SELECT *  FROM [dbo].[MV_Amarok_DailyProdWellDemoData] where Name = ? and Date > ?'
                    df = pd.read_sql(
                        sql,
                        conn,
                        params=[well['well_name'], most_recent_date])

                if not df.empty:
                    data_dict = df.to_dict("records")
                    self.insert_records(mongoHook, data_dict)
Exemplo n.º 10
0
 def get_hook(self):
     if self.conn_type == 'mysql':
         from airflow.providers.mysql.hooks.mysql import MySqlHook
         return MySqlHook(mysql_conn_id=self.conn_id)
     elif self.conn_type == 'google_cloud_platform':
         from airflow.gcp.hooks.bigquery import BigQueryHook
         return BigQueryHook(bigquery_conn_id=self.conn_id)
     elif self.conn_type == 'postgres':
         from airflow.providers.postgres.hooks.postgres import PostgresHook
         return PostgresHook(postgres_conn_id=self.conn_id)
     elif self.conn_type == 'pig_cli':
         from airflow.providers.apache.pig.hooks.pig import PigCliHook
         return PigCliHook(pig_cli_conn_id=self.conn_id)
     elif self.conn_type == 'hive_cli':
         from airflow.providers.apache.hive.hooks.hive import HiveCliHook
         return HiveCliHook(hive_cli_conn_id=self.conn_id)
     elif self.conn_type == 'presto':
         from airflow.providers.presto.hooks.presto import PrestoHook
         return PrestoHook(presto_conn_id=self.conn_id)
     elif self.conn_type == 'hiveserver2':
         from airflow.providers.apache.hive.hooks.hive import HiveServer2Hook
         return HiveServer2Hook(hiveserver2_conn_id=self.conn_id)
     elif self.conn_type == 'sqlite':
         from airflow.providers.sqlite.hooks.sqlite import SqliteHook
         return SqliteHook(sqlite_conn_id=self.conn_id)
     elif self.conn_type == 'jdbc':
         from airflow.providers.jdbc.hooks.jdbc import JdbcHook
         return JdbcHook(jdbc_conn_id=self.conn_id)
     elif self.conn_type == 'mssql':
         from airflow.providers.microsoft.mssql.hooks.mssql import MsSqlHook
         return MsSqlHook(mssql_conn_id=self.conn_id)
     elif self.conn_type == 'odbc':
         from airflow.providers.odbc.hooks.odbc import OdbcHook
         return OdbcHook(odbc_conn_id=self.conn_id)
     elif self.conn_type == 'oracle':
         from airflow.providers.oracle.hooks.oracle import OracleHook
         return OracleHook(oracle_conn_id=self.conn_id)
     elif self.conn_type == 'vertica':
         from airflow.providers.vertica.hooks.vertica import VerticaHook
         return VerticaHook(vertica_conn_id=self.conn_id)
     elif self.conn_type == 'cloudant':
         from airflow.providers.cloudant.hooks.cloudant import CloudantHook
         return CloudantHook(cloudant_conn_id=self.conn_id)
     elif self.conn_type == 'jira':
         from airflow.providers.jira.hooks.jira import JiraHook
         return JiraHook(jira_conn_id=self.conn_id)
     elif self.conn_type == 'redis':
         from airflow.providers.redis.hooks.redis import RedisHook
         return RedisHook(redis_conn_id=self.conn_id)
     elif self.conn_type == 'wasb':
         from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
         return WasbHook(wasb_conn_id=self.conn_id)
     elif self.conn_type == 'docker':
         from airflow.providers.docker.hooks.docker import DockerHook
         return DockerHook(docker_conn_id=self.conn_id)
     elif self.conn_type == 'azure_data_lake':
         from airflow.providers.microsoft.azure.hooks.azure_data_lake import AzureDataLakeHook
         return AzureDataLakeHook(azure_data_lake_conn_id=self.conn_id)
     elif self.conn_type == 'azure_cosmos':
         from airflow.providers.microsoft.azure.hooks.azure_cosmos import AzureCosmosDBHook
         return AzureCosmosDBHook(azure_cosmos_conn_id=self.conn_id)
     elif self.conn_type == 'cassandra':
         from airflow.providers.apache.cassandra.hooks.cassandra import CassandraHook
         return CassandraHook(cassandra_conn_id=self.conn_id)
     elif self.conn_type == 'mongo':
         from airflow.providers.mongo.hooks.mongo import MongoHook
         return MongoHook(conn_id=self.conn_id)
     elif self.conn_type == 'gcpcloudsql':
         from airflow.gcp.hooks.cloud_sql import CloudSQLDatabaseHook
         return CloudSQLDatabaseHook(gcp_cloudsql_conn_id=self.conn_id)
     elif self.conn_type == 'grpc':
         from airflow.providers.grpc.hooks.grpc import GrpcHook
         return GrpcHook(grpc_conn_id=self.conn_id)
     raise AirflowException("Unknown hook type {}".format(self.conn_type))
Exemplo n.º 11
0
def check_datasource_connection():
    conn = OdbcHook().get_conn()
    cursor = conn.cursor()
    cursor.close()
    conn.close()