示例#1
0
    def execute(self, context):
        """
        Execute the bash command in a temporary directory
        which will be cleaned afterwards
        """
        bash_command = self.bash_command
        logging.info("tmp dir root location: \n" + gettempdir())
        with TemporaryDirectory(prefix='airflowtmp') as tmp_dir:
            with NamedTemporaryFile(dir=tmp_dir, prefix=self.task_id) as f:

                f.write(bytes(bash_command, 'utf_8'))
                f.flush()
                fname = f.name
                script_location = tmp_dir + "/" + fname
                logging.info("Temporary script "
                             "location :{0}".format(script_location))
                logging.info("Running command: " + bash_command)
                sp = Popen(['bash', fname],
                           stdout=PIPE,
                           stderr=STDOUT,
                           cwd=tmp_dir,
                           env=self.env)

                self.sp = sp

                logging.info("Output:")
                for line in iter(sp.stdout.readline, b''):
                    logging.info(line.strip())
                sp.wait()
                logging.info("Command exited with "
                             "return code {0}".format(sp.returncode))

                if sp.returncode:
                    raise AirflowException("Bash command failed")
示例#2
0
    def execute(self, context):
        logging.info('Starting docker container from image ' + self.image)

        tls_config = None
        if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key:
            tls_config = tls.TLSConfig(ca_cert=self.tls_ca_cert,
                                       client_cert=(self.tls_client_cert,
                                                    self.tls_client_key),
                                       verify=True,
                                       ssl_version=self.tls_ssl_version,
                                       assert_hostname=self.tls_hostname)
            self.docker_url = self.docker_url.replace('tcp://', 'https://')

        self.cli = Client(base_url=self.docker_url,
                          version=self.api_version,
                          tls=tls_config)

        if ':' not in self.image:
            image = self.image + ':latest'
        else:
            image = self.image

        if self.force_pull or len(self.cli.images(name=image)) == 0:
            logging.info('Pulling docker image ' + image)
            for l in self.cli.pull(image, stream=True):
                output = json.loads(l)
                logging.info("{}".format(output['status']))

        cpu_shares = int(round(self.cpus * 1024))

        with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir:
            self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir
            self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir))

            self.container = self.cli.create_container(
                command=self.get_command(),
                cpu_shares=cpu_shares,
                environment=self.environment,
                host_config=self.cli.create_host_config(
                    binds=self.volumes, network_mode=self.network_mode),
                image=image,
                mem_limit=self.mem_limit,
                user=self.user)
            self.cli.start(self.container['Id'])

            line = ''
            for line in self.cli.logs(container=self.container['Id'],
                                      stream=True):
                logging.info("{}".format(line.strip()))

            exit_code = self.cli.wait(self.container['Id'])
            if exit_code != 0:
                raise AirflowException('docker container failed')

            if self.xcom_push:
                return self.cli.logs(
                    container=self.container['Id']) if self.xcom_all else str(
                        line.strip())
示例#3
0
    def run_cli(self, hql, schema=None):
        """
        Run an hql statement using the hive cli

        >>> hh = HiveCliHook()
        >>> result = hh.run_cli("USE airflow;")
        >>> ("OK" in result)
        True
        """
        conn = self.conn
        schema = schema or conn.schema
        if schema:
            hql = "USE {schema};\n{hql}".format(**locals())

        with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir:
            with NamedTemporaryFile(dir=tmp_dir) as f:
                f.write(hql)
                f.flush()
                fname = f.name
                hive_bin = 'hive'
                cmd_extra = []
                if self.use_beeline:
                    hive_bin = 'beeline'
                    jdbc_url = (
                        "jdbc:hive2://"
                        "{0}:{1}/{2}"
                        ";auth=noSasl"
                    ).format(conn.host, conn.port, conn.schema)
                    cmd_extra += ['-u', jdbc_url]
                    if conn.login:
                        cmd_extra += ['-n', conn.login]
                    if conn.password:
                        cmd_extra += ['-p', conn.password]
                    cmd_extra += ['-p', conn.login]
                hive_cmd = [hive_bin, '-f', fname] + cmd_extra
                if self.hive_cli_params:
                    hive_params_list = self.hive_cli_params.split()
                    hive_cmd.extend(hive_params_list)
                logging.info(" ".join(hive_cmd))
                sp = subprocess.Popen(
                    hive_cmd,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    cwd=tmp_dir)
                all_err = ''
                self.sp = sp
                stdout = ''
                for line in iter(sp.stdout.readline, ''):
                    stdout += line
                    logging.info(line.strip())
                sp.wait()

                if sp.returncode:
                    raise AirflowException(all_err)

                return stdout
示例#4
0
    def run_cli(self, pig, verbose=True):
        """
        Run an pig script using the pig cli

        >>> ph = PigCliHook()
        >>> result = ph.run_cli("ls /;")
        >>> ("hdfs://" in result)
        True
        """

        with TemporaryDirectory(prefix='airflow_pigop_') as tmp_dir:
            with NamedTemporaryFile(dir=tmp_dir) as f:
                f.write(pig)
                f.flush()
                fname = f.name
                pig_bin = 'pig'
                cmd_extra = []

                pig_cmd = [pig_bin, '-f', fname] + cmd_extra

                if self.pig_properties:
                    pig_properties_list = self.pig_properties.split()
                    pig_cmd.extend(pig_properties_list)
                if verbose:
                    logging.info(" ".join(pig_cmd))
                sp = subprocess.Popen(pig_cmd,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.STDOUT,
                                      cwd=tmp_dir)
                self.sp = sp
                stdout = ''
                for line in iter(sp.stdout.readline, ''):
                    stdout += line
                    if verbose:
                        logging.info(line.strip())
                sp.wait()

                if sp.returncode:
                    raise AirflowException(stdout)

                return stdout
示例#5
0
    def run_cli(self, hql, schema=None):
        '''
        Run an hql statement using the hive cli

        >>> hh = HiveCliHook()
        >>> result = hh.run_cli("USE airflow;")
        >>> ("OK" in result)
        True
        '''
        if schema:
            hql = "USE {schema};\n{hql}".format(**locals())

        with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir:
            with NamedTemporaryFile(dir=tmp_dir) as f:
                f.write(hql)
                f.flush()
                fname = f.name
                hive_cmd = ['hive', '-f', fname]
                if self.hive_cli_params:
                    hive_params_list = self.hive_cli_params.split()
                    hive_cmd.extend(hive_params_list)
                sp = subprocess.Popen(hive_cmd,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.STDOUT,
                                      cwd=tmp_dir)
                all_err = ''
                self.sp = sp
                stdout = ""
                for line in iter(sp.stdout.readline, ''):
                    stdout += line
                    logging.info(line.strip())
                sp.wait()

                if sp.returncode:
                    raise AirflowException(all_err)

                return stdout
示例#6
0
    def run_cli(self, hql, schema=None, verbose=True):
        """
        Run an hql statement using the hive cli

        >>> hh = HiveCliHook()
        >>> result = hh.run_cli("USE airflow;")
        >>> ("OK" in result)
        True
        """
        conn = self.conn
        schema = schema or conn.schema
        if schema:
            hql = "USE {schema};\n{hql}".format(**locals())

        with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir:
            with NamedTemporaryFile(dir=tmp_dir) as f:
                f.write(hql)
                f.flush()
                fname = f.name
                hive_bin = 'hive'
                cmd_extra = []

                if self.use_beeline:
                    hive_bin = 'beeline'
                    if conf.get('core', 'security') == 'kerberos':
                        template = conn.extra_dejson.get('principal',"hive/[email protected]")
                        template = utils.replace_hostname_pattern(utils.get_components(template))

                        proxy_user = ""
                        if conn.extra_dejson.get('proxy_user') == "login" and conn.login:
                            proxy_user = "******".format(conn.login)
                        elif conn.extra_dejson.get('proxy_user') == "owner" and self.run_as:
                            proxy_user = "******".format(self.run_as)

                        jdbc_url = (
                            "jdbc:hive2://"
                            "{0}:{1}/{2}"
                            ";principal={3}{4}"
                        ).format(conn.host, conn.port, conn.schema, template, proxy_user)
                    else:
                        jdbc_url = (
                            "jdbc:hive2://"
                            "{0}:{1}/{2}"
                            ";auth=noSasl"
                        ).format(conn.host, conn.port, conn.schema)

                    cmd_extra += ['-u', jdbc_url]
                    if conn.login:
                        cmd_extra += ['-n', conn.login]
                    if conn.password:
                        cmd_extra += ['-p', conn.password]

                hive_cmd = [hive_bin, '-f', fname] + cmd_extra

                if self.hive_cli_params:
                    hive_params_list = self.hive_cli_params.split()
                    hive_cmd.extend(hive_params_list)
                if verbose:
                    logging.info(" ".join(hive_cmd))
                sp = subprocess.Popen(
                    hive_cmd,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    cwd=tmp_dir)
                self.sp = sp
                stdout = ''
                for line in iter(sp.stdout.readline, ''):
                    stdout += line
                    if verbose:
                        logging.info(line.strip())
                sp.wait()

                if sp.returncode:
                    raise AirflowException(stdout)

                return stdout