Exemplo n.º 1
0
def get_plugin_config():
    " Return the plugin configuration "
    config = []
    i = 0
    # Iterate over the configurations
    while True:
        suffix = str(
            i
        ) if i != 0 else ''  # the first configuration doesn't have a suffix
        try:
            if not configuration.has_option(CONFIG_SECTION,
                                            'REPOSITORY_URL' + suffix):
                break
        except:  # backports.configparser.NoSectionError and friends
            break
        repository_url = configuration.get(CONFIG_SECTION,
                                           'REPOSITORY_URL' + suffix)
        dag = configuration.get(CONFIG_SECTION, 'DAG' + suffix)
        token = configuration.get(CONFIG_SECTION, 'TOKEN' +
                                  suffix) if configuration.has_option(
                                      CONFIG_SECTION, 'TOKEN' +
                                      suffix) else None
        config.append({
            'i': i,
            'repository_url': repository_url,
            'dag_id': dag,
            'token': token
        })
        i = i + 1
    return config
Exemplo n.º 2
0
def login(self, request):
    """Login a user. A function is executed on a new user session.
    Analyze request, parse JWT and create a new user object.
    :param self: required parameter for flask_login
    :param request: a request object
    :type request: Request
    """
    if current_user.is_authenticated():
        flash("You are already logged in")
        return redirect(url_for('admin.index'))

    auth_header = request.headers.get('Authorization')
    jwt_obj = parse_jwt_header(auth_header)

    if not jwt_obj:
        response = Response('JWT header is invalid or absent.',
                            mimetype='text/plain')
        return response

    name = jwt_obj.get('name', '')
    email = jwt_obj.get('email', '')
    groups = jwt_obj.get('groups', [])

    admin_group = None
    if conf.has_option('webserver', 'dex_group_admin'):
        admin_group = conf.get('webserver', 'dex_group_admin')

    profiler_group = None
    if conf.has_option('webserver', 'dex_group_profiler'):
        profiler_group = conf.get('webserver', 'dex_group_profiler')

    admin_email = None
    if conf.has_option('webserver', 'dex_admin_email'):
        admin_email = conf.get('webserver', 'dex_admin_email')

    is_superuser = False
    is_data_profiler = False

    if admin_group or profiler_group:
        if admin_email and admin_email == email:  # if the user is a Admin
            is_superuser = True
            is_data_profiler = True
        elif admin_group and admin_group in groups:
            is_superuser = True
        elif profiler_group and profiler_group in groups:
            is_data_profiler = True
        else:
            response = Response("Access denied for user '{}'".format(email),
                                mimetype='text/plain')
            return response
    else:
        is_superuser = True
        is_data_profiler = True

    flask_login.login_user(DexUser(name, email, is_superuser,
                                   is_data_profiler))
    return redirect(request.args.get("next") or url_for("admin.index"))
Exemplo n.º 3
0
def load_user_from_header(auth_header):
    """Reload a user data from the header.
    :param auth_header: Authorization header
    :return user object, or ``None`` if header is invalid
    :rtype DexUser
    """
    jwt_obj = parse_jwt_header(auth_header)

    if not jwt_obj:
        return None

    name = jwt_obj.get('name', '')
    email = jwt_obj.get('email', '')
    groups = jwt_obj.get('groups', [])

    admin_groups = []
    if conf.has_option('webserver', 'dex_group_admin') and \
            conf.get('webserver', 'dex_group_admin'):
        admin_groups = conf.get('webserver', 'dex_group_admin').split(' ')

    profiler_groups = []
    if conf.has_option('webserver', 'dex_group_profiler') and \
            conf.get('webserver', 'dex_group_profiler'):
        profiler_groups = conf.get('webserver',
                                   'dex_group_profiler').split(' ')

    admin_email = None
    if conf.has_option('webserver', 'dex_admin_email'):
        admin_email = conf.get('webserver', 'dex_admin_email')

    is_superuser = False
    is_data_profiler = False

    if admin_groups or profiler_groups:
        if admin_email and admin_email == email:  # if the user is a Admin
            is_superuser = True
            is_data_profiler = True
        else:
            for group in groups:
                if group in admin_groups:
                    is_superuser = True
                if group in profiler_groups:
                    is_data_profiler = True
    else:
        is_superuser = True
        is_data_profiler = True

    return DexUser(name, email, is_superuser, is_data_profiler)
Exemplo n.º 4
0
def get_default(key, default=None):
    import airflow.configuration as conf

    if conf.has_option("airflowdocker", key):
        return conf.get("airflowdocker", key)
    else:
        return default
Exemplo n.º 5
0
def get_boolean_default(key, default):
    import airflow.configuration as conf

    if conf.has_option("airflowdocker", key):
        return conf.getboolean("airflowdocker", key)
    else:
        return default
Exemplo n.º 6
0
    def try_login(username, password):
        conn = get_ldap_connection(configuration.get("ldap", "bind_user"),
                                   configuration.get("ldap", "bind_password"))

        search_filter = "(&({0})({1}={2}))".format(
            configuration.get("ldap", "user_filter"),
            configuration.get("ldap", "user_name_attr"),
            username
        )

        search_scopes = {
            "LEVEL": LEVEL,
            "SUBTREE": SUBTREE,
            "BASE": BASE
        }

        search_scope = LEVEL
        if configuration.has_option("ldap", "search_scope"):
            search_scope = SUBTREE if configuration.get("ldap", "search_scope") == "SUBTREE" else LEVEL

        # todo: BASE or ONELEVEL?

        res = conn.search(native(configuration.get("ldap", "basedn")),
                          native(search_filter),
                          search_scope=native(search_scope))

        # todo: use list or result?
        if not res:
            log.info("Cannot find user %s", username)
            raise AuthenticationError("Invalid username or password")

        entry = conn.response[0]

        conn.unbind()

        if 'dn' not in entry:
            # The search filter for the user did not return any values, so an
            # invalid user was used for credentials.
            raise AuthenticationError("Invalid username or password")

        try:
            conn = get_ldap_connection(entry['dn'], password)
        except KeyError as e:
            log.error("""
            Unable to parse LDAP structure. If you're using Active Directory and not specifying an OU, you must set search_scope=SUBTREE in airflow.cfg.
            %s
            """ % traceback.format_exc())
            raise LdapException("Could not parse LDAP structure. Try setting search_scope in airflow.cfg, or check logs")

        if not conn:
            log.info("Password incorrect for user %s", username)
            raise AuthenticationError("Invalid username or password")
Exemplo n.º 7
0
    def try_login(username, password):
        conn = get_ldap_connection(configuration.get("ldap", "bind_user"),
                                   configuration.get("ldap", "bind_password"))

        search_filter = "(&({0})({1}={2}))".format(
            configuration.get("ldap", "user_filter"),
            configuration.get("ldap", "user_name_attr"),
            username
        )

        search_scopes = {
            "LEVEL": LEVEL,
            "SUBTREE": SUBTREE,
            "BASE": BASE
        }

        search_scope = LEVEL
        if configuration.has_option("ldap", "search_scope"):
            search_scope = SUBTREE if configuration.get("ldap", "search_scope") == "SUBTREE" else LEVEL

        # todo: BASE or ONELEVEL?

        res = conn.search(native(configuration.get("ldap", "basedn")),
                          native(search_filter),
                          search_scope=native(search_scope))

        # todo: use list or result?
        if not res:
            _log.info("Cannot find user %s", username)
            raise AuthenticationError("Invalid username or password")

        entry = conn.response[0]

        conn.unbind()

        if 'dn' not in entry:
            # The search filter for the user did not return any values, so an
            # invalid user was used for credentials.
            raise AuthenticationError("Invalid username or password")

        try:
            conn = get_ldap_connection(entry['dn'], password)
        except KeyError as e:
            _log.error("""
            Unable to parse LDAP structure. If you're using Active Directory and not specifying an OU, you must set search_scope=SUBTREE in airflow.cfg.
            %s
            """ % traceback.format_exc())
            raise LdapException("Could not parse LDAP structure. Try setting search_scope in airflow.cfg, or check logs")

        if not conn:
            _log.info("Password incorrect for user %s", username)
            raise AuthenticationError("Invalid username or password")
Exemplo n.º 8
0
def is_super_user(self):
    airflow_version = airflow.__version__
    if float(airflow_version[:3]) <= 1.10:
        """
        We need to add the following configuration in the airflow.cfg file for versions < 2.0
        [user_management_plugin]
        #provide comma separated list of admin users
        #Ex: admin_users=admin,airflow
        admin_users=airflow
        """
        users = configuration.get("user_management_plugin", "admin_users") if configuration.has_option("user_management_plugin", "admin_users" ) else ""
        if users != "":
            admin_users = users.split(",")
            if admin_users.__contains__(current_user.user.username):
                return True
        else:
            logging.error("No Admin users were provided")
            return False
    else:
        # For versions >= 2.0 we check for the superuser flag
        return current_user.user.superuser
Exemplo n.º 9
0
from airflow.config_templates.default_celery import DEFAULT_CELERY_CONFIG
from airflow.exceptions import AirflowException
from airflow.executors.base_executor import BaseExecutor
from airflow import configuration
from airflow.utils.log.logging_mixin import LoggingMixin
from airflow.utils.module_loading import import_string

PARALLELISM = configuration.get('core', 'PARALLELISM')

'''
To start the celery worker, run the command:
airflow worker
'''

if configuration.has_option('celery', 'celery_config_options'):
    celery_configuration = import_string(
        configuration.get('celery', 'celery_config_options')
    )
else:
    celery_configuration = DEFAULT_CELERY_CONFIG

app = Celery(
    configuration.get('celery', 'CELERY_APP_NAME'),
    config_source=celery_configuration)


@app.task
def execute_command(command):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
Exemplo n.º 10
0
    pass the call the singleton. (Ex: airflow.configuration.get calls conf.get). However, not all methods on conf
    are abstracted out, including the all important "has_section" & "add_section" (if we try to set an option
    on a section that doesn't exist, NoSectionError is raised). Therefore, we break the abstraction here once to
    access the singleton directly

    :param str section: the section
    """
    # This uses the singleton described above to make sure the section exists
    if not airflow_configuration.conf.has_section(section_name):
        airflow_configuration.conf.add_section(section_name)


_ensure_section_exists("fileflow")

# Set some fileflow settings to a default if they do not already exist.
if not airflow_configuration.has_option("fileflow", "environment"):
    airflow_configuration.set("fileflow", "environment", "production")

if not airflow_configuration.has_option("fileflow", "storage_prefix"):
    airflow_configuration.set("fileflow", "storage_prefix", "storage")

if not airflow_configuration.has_option("fileflow", "storage_type"):
    airflow_configuration.set("fileflow", "storage_type", "file")

if airflow_configuration.get("fileflow", "storage_type") == "s3":
    if not airflow_configuration.has_option("fileflow", "aws_bucket_name"):
        airflow_configuration.set("fileflow", "aws_bucket_name", "mybeautifulbucket")

    # For AWS keys, check the AIRFLOW__ style environment variables first
    # Otherwise, fallback to the boto configuration
    aws_access_key_id_env_var = os.environ.get(
Exemplo n.º 11
0
from airflow.config_templates.default_celery import DEFAULT_CELERY_CONFIG
from airflow.exceptions import AirflowException
from airflow.executors.base_executor import BaseExecutor
from airflow import configuration
from airflow.utils.log.logging_mixin import LoggingMixin


PARALLELISM = configuration.get('core', 'PARALLELISM')

'''
To start the celery worker, run the command:
airflow worker
'''

if configuration.has_option('celery', 'celery_config_options'):
    celery_configuration = configuration.get('celery', 'celery_config_options')
else:
    celery_configuration = DEFAULT_CELERY_CONFIG

app = Celery(
    configuration.get('celery', 'CELERY_APP_NAME'),
    config_source=celery_configuration)


@app.task
def execute_command(command):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
    try:
        subprocess.check_call(command, shell=True)
Exemplo n.º 12
0
    BACKFILL_URL="/api/v1.0/backfill",
    LIST_DAGS_URL="/api/v1.0/list_dags",
    KERBEROS_URL="/api/v1.0/kerberos",
    WORKER_URL="/api/v1.0/worker",
    SCHEDULER_URL="/api/v1.0/scheduler",
    TASK_STATE_URL="/api/v1.0/task_state",
    TRIGGER_DAG_URL="/api/v1.0/trigger_dag",
    REFRESH_DAG_URL="/api/v1.0/refresh_dag",
    DEPLOY_DAG_URL="/api/v1.0/deploy_dag"
)

airflow_webserver_base_url = configuration.get('webserver', 'BASE_URL')
dags_folder = configuration.get('core', 'DAGS_FOLDER')
rest_api_plugin_http_token_header_name = "rest_api_plugin_http_token"
expected_http_token = None
if configuration.has_option("webserver", "REST_API_PLUGIN_EXPECTED_HTTP_TOKEN"):
    expected_http_token = configuration.get("webserver", "REST_API_PLUGIN_EXPECTED_HTTP_TOKEN")


def get_base_response(status="OK", call_time=datetime.now(), include_arguments=True):
    base_response = {"status": status, "call_time": call_time}
    if include_arguments:
        base_response["arguments"] = request.args
    return base_response


def get_final_response(base_response, output=None, airflow_cmd=None):
    final_response = base_response
    final_response["response_time"] = datetime.now()
    if output:
        final_response["output"] = output
Exemplo n.º 13
0
def generate_biowardrobe_workflow():

    _settings.cursor.execute("select * from experimenttype limit 1")

    field_names = [i[0] for i in _settings.cursor.description]
    if 'workflow' not in field_names:
        apply_sql_patch("labdata_alter.sql")
        apply_sql_patch("experimenttype_alter.sql")

    apply_sql_patch("experimenttype_patch.sql")

    _template = u"""#!/usr/bin/env python3
from airflow import DAG
from biowardrobe_airflow_analysis import biowardrobe_workflow
dag = biowardrobe_workflow("{}")
"""
    _settings.cursor.execute("select workflow from experimenttype")
    for (workflow, ) in _settings.cursor.fetchall():
        if not workflow:
            continue

        _filename = os.path.abspath(
            os.path.join(
                DAGS_FOLDER,
                os.path.basename(os.path.splitext(workflow)[0]) + '.py'))
        with open(_filename, 'w') as generated_workflow_stream:
            generated_workflow_stream.write(_template.format(workflow))

    _template = u"""#!/usr/bin/env python3
from airflow import DAG
from biowardrobe_airflow_analysis import BioWardrobeDownloadDAG, BioWardrobeDownloadTriggerDAG
d = BioWardrobeDownloadDAG
dt= BioWardrobeDownloadTriggerDAG
"""
    with open(os.path.join(DAGS_FOLDER, 'biowardrobe_download.py'),
              'w') as generated_workflow_stream:
        generated_workflow_stream.write(_template)

    _template = u"""#!/usr/bin/env python3
from airflow import DAG
from biowardrobe_airflow_analysis import BioWardrobeForceRunDAG
d = BioWardrobeForceRunDAG
"""
    with open(os.path.join(DAGS_FOLDER, 'biowardrobe_force_run.py'),
              'w') as generated_workflow_stream:
        generated_workflow_stream.write(_template)

    merge_conn(
        models.Connection(conn_id='biowardrobe',
                          conn_type='mysql',
                          host=_settings.config[0],
                          login=_settings.config[1],
                          password=_settings.config[2],
                          schema=_settings.config[3],
                          extra="{\"cursor\":\"dictcursor\"}"))

    try:
        api_client.get_pool(name='biowardrobe_download')
    except Exception as e:
        api_client.create_pool(name='biowardrobe_download',
                               slots=5,
                               description="pool to download files")
    try:
        api_client.get_pool(name='biowardrobe_basic_analysis')
    except Exception as e:
        api_client.create_pool(name='biowardrobe_basic_analysis',
                               slots=1,
                               description="pool to run basic analysis")

    if not conf.has_option('cwl', 'tmp_folder'):
        if not os.path.exists(conf.AIRFLOW_CONFIG + '.orig'):
            copyfile(conf.AIRFLOW_CONFIG, conf.AIRFLOW_CONFIG + '.orig')
        with open(conf.AIRFLOW_CONFIG, 'w') as fp:
            # for s in ['mesos', 'kerberos', 'celery', 'smtp', 'email', 'dask', 'ldap']:
            #     conf.conf.remove_section(s)

            conf.conf.add_section('cwl')
            conf.set('cwl', 'tmp_folder', os.path.join(AIRFLOW_HOME, 'tmp'))

            conf.set('core', 'logging_level', 'WARNING')
            conf.set('core', 'load_examples', 'False')
            conf.set('webserver', 'dag_default_view', 'graph')
            conf.set('webserver', 'dag_orientation', 'TB')
            conf.set('webserver', 'web_server_worker_timeout', '120')
            conf.set('scheduler', 'job_heartbeat_sec', '20')
            conf.set('scheduler', 'scheduler_heartbeat_sec', '20')
            conf.set('scheduler', 'min_file_process_interval', '30')
            conf.conf.write(fp)

    startup_scripts = [
        'com.datirium.airflow-scheduler.plist',
        'com.datirium.airflow-webserver.plist'
    ]
    if platform == "darwin":
        _sys_dir = os.path.expanduser('~/Library/LaunchAgents')
        for scripts in startup_scripts:
            with open(os.path.join(system_folder, 'macosx', scripts),
                      'r') as s:
                data = s.read()
                # OS X
            dst = os.path.join(_sys_dir, scripts)

            if os.path.exists(dst):
                with open(dst + '.new', 'w') as w:
                    w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME))
            else:
                with open(dst, 'w') as w:
                    w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME))
Exemplo n.º 14
0
"""
Extend from the airflow configuration and address any missing fileflow related configuration values.
"""
from airflow import configuration as airflow_configuration
import os
import boto

# Set some fileflow settings to a default if they do not already exist.

if not airflow_configuration.has_option('fileflow', 'environment'):
    airflow_configuration.set('fileflow', 'environment', 'production')

if not airflow_configuration.has_option('fileflow', 'storage_prefix'):
    airflow_configuration.set('fileflow', 'storage_prefix', 'storage')

if not airflow_configuration.has_option('fileflow', 'storage_type'):
    airflow_configuration.set('fileflow', 'storage_type', 'file')

if not airflow_configuration.has_option('fileflow', 'aws_bucket_name'):
    airflow_configuration.set('fileflow', 'aws_bucket_name',
                              'mybeautifulbucket')

# For AWS keys, check the AIRFLOW__ style environment variables first
# Otherwise, fallback to the boto configuration
aws_access_key_id_env_var = os.environ.get(
    'AIRFLOW__FILEFLOW__AWS_ACCESS_KEY_ID', False)
aws_secret_access_key_env_var = os.environ.get(
    'AIRFLOW__FILEFLOW__AWS_SECRET_ACCESS_KEY', False)
boto_config = boto.pyami.config.Config()

if not airflow_configuration.has_option('fileflow', 'aws_access_key_id'):
Exemplo n.º 15
0
CLIs this REST API exposes are Defined here: http://airflow.incubator.apache.org/cli.html
"""

# Location of the REST Endpoint
# Note: Changing this will only effect where the messages are posted to on the web interface and will not change where the endpoint actually resides
rest_api_endpoint = "/admin/rest_api/api"

# Getting Versions and Global variables
hostname = socket.gethostname()
airflow_version = airflow.__version__

# Getting configurations from airflow.cfg file
airflow_webserver_base_url = configuration.get('webserver', 'BASE_URL')
airflow_base_log_folder = configuration.get('core', 'BASE_LOG_FOLDER')
airflow_dags_folder = configuration.get('core', 'DAGS_FOLDER')
log_loading = configuration.getboolean("rest_api_plugin", "LOG_LOADING") if configuration.has_option("rest_api_plugin", "LOG_LOADING") else False
filter_loading_messages_in_cli_response = configuration.getboolean("rest_api_plugin", "FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE") if configuration.has_option("rest_api_plugin", "FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE") else True

# Using UTF-8 Encoding so that response messages don't have any characters in them that can't be handled
os.environ['PYTHONIOENCODING'] = 'utf-8'


"""
Metadata that defines a single API:
{
    "name": "{string}",                     # Name of the API (cli command to be executed)
    "description": "{string}",              # Description of the API
    "airflow_version": "{string}",          # Version the API was available in to allow people to better determine if the API is available. (to be displayed on the Admin page)
    "http_method": "{string}",              # HTTP method to use when calling the function. (Default: GET) (Optional)
    "background_mode": {boolean},           # Whether to run the process in the background if its a CLI API (Optional)
    "arguments": [                          # List of arguments that can be provided to the API
Exemplo n.º 16
0
def generate_biowardrobe_workflow():

    _template = u"""#!/usr/bin/env python3
from airflow import DAG
from biowardrobe_cwl_workflows import workflow
dag = workflow("{}")
"""
    all_workflows = available()
    for workflow in all_workflows:
        if not workflow:
            continue

        _filename = os.path.abspath(
            os.path.join(
                DAGS_FOLDER,
                os.path.basename(os.path.splitext(workflow)[0]) + '.py'))
        print(_filename)
        with open(_filename, 'w') as generated_workflow_stream:
            generated_workflow_stream.write(_template.format(workflow))

    try:
        api_client.get_pool(name='basic_analysis')
    except Exception as e:
        api_client.create_pool(name='basic_analysis',
                               slots=1,
                               description="pool to run basic analysis")

    if not conf.has_option('cwl', 'tmp_folder'):
        if not os.path.exists(conf.AIRFLOW_CONFIG + '.orig'):
            copyfile(conf.AIRFLOW_CONFIG, conf.AIRFLOW_CONFIG + '.orig')
        with open(conf.AIRFLOW_CONFIG, 'w') as fp:
            # for s in ['mesos', 'kerberos', 'celery', 'smtp', 'email', 'dask', 'ldap']:
            #     conf.conf.remove_section(s)

            conf.conf.add_section('cwl')
            conf.set('cwl', 'tmp_folder', os.path.join(AIRFLOW_HOME, 'tmp'))

            conf.set('core', 'logging_level', 'WARNING')
            conf.set('core', 'load_examples', 'False')
            conf.set('webserver', 'dag_default_view', 'graph')
            conf.set('webserver', 'dag_orientation', 'TB')
            conf.set('webserver', 'web_server_worker_timeout', '120')
            conf.set('scheduler', 'job_heartbeat_sec', '20')
            conf.set('scheduler', 'scheduler_heartbeat_sec', '20')
            conf.set('scheduler', 'min_file_process_interval', '30')
            conf.conf.write(fp)

    # startup_scripts = ['com.datirium.airflow-scheduler.plist', 'com.datirium.airflow-webserver.plist']
    # if platform == "darwin":
    #     _sys_dir = os.path.expanduser('~/Library/LaunchAgents')
    #     for scripts in startup_scripts:
    #         with open(os.path.join(system_folder, 'macosx', scripts), 'r') as s:
    #             data = s.read()
    #             # OS X
    #         dst = os.path.join(_sys_dir, scripts)
    #
    #         if os.path.exists(dst):
    #             with open(dst + '.new', 'w') as w:
    #                 w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME))
    #         else:
    #             with open(dst, 'w') as w:
    #                 w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME))

    # if platform == "linux" or platform == "linux2":
    # linux
    # elif platform == "win32":
    # Windows...

    # TODO: tmp, dags do not exist ???


# generate_biowardrobe_workflow()