Пример #1
0
def initialize_etl():
    logging.info('Creating connections, pool and sql path')

    session = Session()

    def create_new_conn(session, attributes):
        new_conn = models.Connection()
        new_conn.conn_id = attributes.get("conn_id")
        new_conn.conn_type = attributes.get('conn_type')
        new_conn.host = attributes.get('host')
        new_conn.port = attributes.get('port')
        new_conn.schema = attributes.get('schema')
        new_conn.login = attributes.get('login')
        new_conn.set_password(attributes.get('password'))

        session.add(new_conn)
        session.commit()

    new_var = models.Variable()
    new_var.key = "sql_path"
    new_var.set_val("/usr/local/airflow/sql")
    session.add(new_var)
    session.commit()

    new_var = models.Variable()
    new_var.key = "data_file_path"
    new_var.set_val("/usr/local/airflow/data")
    session.add(new_var)
    session.commit()

    session.close()
Пример #2
0
def update_variables(variables):
    """
    Add or update Airflow variables.

    :param variables: Variables as dictionary.
    :return: Job done.
    """
    print('Updating variables...')
    session = settings.Session()
    try:
        for k, v in variables.items():
            existing = session.query(models.Variable).filter_by(key=k).first()
            if existing:
                existing.val = v
                session.merge(existing)
                print('\tUPDATED: variable %s' % k)
            else:
                var = models.Variable(key=k, val=v)
                session.add(var)
                print('\tADDED: variable %s' % k)

        session.commit()
        print('Changes commited.')
    finally:
        session.close()
Пример #3
0
def init_datavault_example():
    logging.info('Creating connections, pool and sql path')

    session = Session()

    def create_new_conn(session, attributes):
        new_conn = models.Connection()
        new_conn.conn_id = attributes.get("conn_id")
        new_conn.conn_type = attributes.get('conn_type')
        new_conn.host = attributes.get('host')
        new_conn.port = attributes.get('port')
        new_conn.schema = attributes.get('schema')
        new_conn.login = attributes.get('login')
        new_conn.set_password(attributes.get('password'))

        session.add(new_conn)
        session.commit()

    create_new_conn(
        session, {
            "conn_id": "oltp",
            "conn_type": "postgres",
            "host": "postgres",
            "port": 5432,
            "schema": "orders",
            "login": "******",
            "password": "******"
        })

    create_new_conn(
        session, {
            "conn_id": "dwh",
            "conn_type": "postgres",
            "host": "postgres",
            "port": 5432,
            "schema": "dwh",
            "login": "******",
            "password": "******"
        })

    create_new_conn(
        session, {
            "conn_id": "datavault",
            "conn_type": "postgres",
            "host": "postgres",
            "port": 5432,
            "schema": "datavault",
            "login": "******",
            "password": "******"
        })

    new_var = models.Variable()
    new_var.key = "sql_path"
    new_var.set_val("/usr/local/airflow/sql")
    session.add(new_var)
    session.commit()

    session.close()
def initialize_etl_example():
    logging.info('Creating connections, pool and sql path')

    session = Session()

    def create_new_conn(session, attributes):
        new_conn = models.Connection()
        new_conn.conn_id = attributes.get("conn_id")
        new_conn.conn_type = attributes.get('conn_type')
        new_conn.host = attributes.get('host')
        new_conn.port = attributes.get('port')
        new_conn.schema = attributes.get('schema')
        new_conn.login = attributes.get('login')
        new_conn.set_password(attributes.get('password'))

        session.add(new_conn)
        session.commit()

    create_new_conn(
        session, {
            "conn_id": "mssql",
            "conn_type": "MS SQL Server",
            "host": "mssql",
            "port": 1433,
            "schema": "master",
            "login": "******",
            "password": "******"
        })

    create_new_conn(
        session, {
            "conn_id": "postgres_dwh",
            "conn_type": "postgres",
            "host": "postgres",
            "port": 5432,
            "schema": "dwh",
            "login": "******",
            "password": "******"
        })

    new_var = models.Variable()
    new_var.key = "sql_path"
    new_var.set_val("/usr/local/airflow/sql")
    session.add(new_var)
    session.commit()

    new_pool = models.Pool()
    new_pool.pool = "postgres_dwh"
    new_pool.slots = 10
    new_pool.description = "Allows max. 10 connections to the DWH"

    session.add(new_pool)
    session.commit()

    session.close()
Пример #5
0
def initialize_example():
    logging.info('Reading config')
    session = Session()

    def create_new_conn(session, attributes):
        new_conn = models.Connection()
        new_conn.conn_id = attributes.get("conn_id")
        new_conn.conn_type = attributes.get('conn_type')
        new_conn.host = attributes.get('host')
        new_conn.port = attributes.get('port')
        new_conn.schema = attributes.get('schema')
        new_conn.login = attributes.get('login')
        new_conn.set_password(attributes.get('password'))

        session.add(new_conn)
        session.commit()

    def config(filename='../.postgresql.ini'):
        # create a parser
        parser = ConfigParser(strict=False)
        # read config file
        parser.read(filename)
        # get section, default to postgresql
        db_config = {}
        logging.info('Creating connections and sql path')

        for section in parser.sections():
            conn_id = section
            conn_type = section.split('_')
            for param in parser.items(section):
                db_config[param[0]] = param[1]

            create_new_conn(
                session, {
                    "conn_id": conn_id,
                    "conn_type": conn_type[0],
                    "host": db_config['host'],
                    "port": db_config['port'],
                    "schema": db_config['dbname'],
                    "login": db_config['user'],
                    "password": db_config['password']
                })

    config()

    new_var = models.Variable()
    new_var.key = "sql_path"
    new_var.set_val("/Users/jackychu/airflow/sql")
    session.add(new_var)
    session.commit()

    session.close()
Пример #6
0
def init_hive_example():
    logging.info('Creating connections, pool and sql path')

    session = Session()

    def create_new_conn(session, attributes):
        new_conn = models.Connection()
        new_conn.conn_id = attributes.get("conn_id")
        new_conn.conn_type = attributes.get('conn_type')
        new_conn.host = attributes.get('host')
        new_conn.port = attributes.get('port')
        new_conn.schema = attributes.get('schema')
        new_conn.login = attributes.get('login')
        new_conn.set_extra(attributes.get('extra'))
        new_conn.set_password(attributes.get('password'))

        session.add(new_conn)
        session.commit()

    create_new_conn(
        session, {
            "conn_id": "postgres_oltp",
            "conn_type": "postgres",
            "host": "postgres",
            "port": 5432,
            "schema": "orders",
            "login": "******",
            "password": "******"
        })

    create_new_conn(
        session, {
            "conn_id":
            "hive_staging",
            "conn_type":
            "hive_cli",
            "host":
            "hive",
            "schema":
            "default",
            "port":
            10000,
            "login":
            "******",
            "password":
            "******",
            "extra":
            json.dumps({
                "hive_cli_params": "",
                "auth": "none",
                "use_beeline": "true"
            })
        })

    new_var = models.Variable()
    new_var.key = "sql_path"
    new_var.set_val("/usr/local/airflow/sql")
    session.add(new_var)
    new_var = models.Variable()
    new_var.key = "hive_sql_path"
    new_var.set_val("/usr/local/airflow/hql")
    session.add(new_var)
    session.commit()

    session.close()
def generate_config():
    logging.info('Creating connections, pool and sql path')

    session = Session()

    def create_new_conn(session, attributes):
        if Session.query(models.Connection).filter(
                models.Connection.conn_id == attributes.get(
                    "conn_id")).count() == 0:
            new_conn = models.Connection()
            new_conn.conn_id = attributes.get("conn_id")
            new_conn.conn_type = attributes.get('conn_type')
            new_conn.host = attributes.get('host')
            new_conn.port = attributes.get('port')
            new_conn.schema = attributes.get('schema')
            new_conn.login = attributes.get('login')
            new_conn.set_password(attributes.get('password'))
            session.add(new_conn)
            session.commit()
        else:
            logging.info('Connection {} already exists'.format(
                attributes.get("conn_id")))

    create_new_conn(
        session, {
            "conn_id": "mysql_oltp",
            "conn_type": "mysql",
            "host": "host.docker.internal",
            "port": 3306,
            "schema": "employees",
            "login": "******",
            "password": "******"
        })

    create_new_conn(
        session, {
            "conn_id": "mysql_dwh",
            "conn_type": "mysql",
            "host": "host.docker.internal",
            "port": 3306,
            "schema": "dwh",
            "login": "******",
            "password": "******"
        })

    create_new_conn(
        session, {
            "conn_id": "postgres_oltp",
            "conn_type": "postgres",
            "host": "host.docker.internal",
            "port": 5432,
            "schema": "dwh",
            "login": "******",
            "password": "******"
        })

    create_new_conn(
        session, {
            "conn_id": "postgres_dwh",
            "conn_type": "postgres",
            "host": "host.docker.internal",
            "port": 5432,
            "schema": "dwh",
            "login": "******",
            "password": "******"
        })

    if Session.query(models.Variable).filter(
            models.Variable.key == "sql_template_paths").count() == 0:
        new_var = models.Variable()
        new_var.key = "sql_template_paths"
        new_var.set_val("./sql_templates")
        session.add(new_var)
        session.commit()
    else:
        logging.info('Variable sql_template_paths already exists')

    if Session.query(
            models.Pool).filter(models.Pool.pool == "mysql_dwh").count() == 0:
        new_pool = models.Pool()
        new_pool.pool = "mysql_dwh"
        new_pool.slots = 10
        new_pool.description = "Allows max. 10 connections to the DWH"
        session.add(new_pool)
        session.commit()
    else:
        logging.info('Pool mysql_dwh already exists')

    session.close()