def execute(self, context): """ Execute the bash command in a temporary directory which will be cleaned afterwards """ bash_command = self.bash_command logging.info("tmp dir root location: \n" + gettempdir()) with TemporaryDirectory(prefix='airflowtmp') as tmp_dir: with NamedTemporaryFile(dir=tmp_dir, prefix=self.task_id) as f: f.write(bytes(bash_command, 'utf_8')) f.flush() fname = f.name script_location = tmp_dir + "/" + fname logging.info("Temporary script " "location :{0}".format(script_location)) logging.info("Running command: " + bash_command) sp = Popen(['bash', fname], stdout=PIPE, stderr=STDOUT, cwd=tmp_dir, env=self.env) self.sp = sp logging.info("Output:") for line in iter(sp.stdout.readline, b''): logging.info(line.strip()) sp.wait() logging.info("Command exited with " "return code {0}".format(sp.returncode)) if sp.returncode: raise AirflowException("Bash command failed")
def execute(self, context): logging.info('Starting docker container from image ' + self.image) tls_config = None if self.tls_ca_cert and self.tls_client_cert and self.tls_client_key: tls_config = tls.TLSConfig(ca_cert=self.tls_ca_cert, client_cert=(self.tls_client_cert, self.tls_client_key), verify=True, ssl_version=self.tls_ssl_version, assert_hostname=self.tls_hostname) self.docker_url = self.docker_url.replace('tcp://', 'https://') self.cli = Client(base_url=self.docker_url, version=self.api_version, tls=tls_config) if ':' not in self.image: image = self.image + ':latest' else: image = self.image if self.force_pull or len(self.cli.images(name=image)) == 0: logging.info('Pulling docker image ' + image) for l in self.cli.pull(image, stream=True): output = json.loads(l) logging.info("{}".format(output['status'])) cpu_shares = int(round(self.cpus * 1024)) with TemporaryDirectory(prefix='airflowtmp') as host_tmp_dir: self.environment['AIRFLOW_TMP_DIR'] = self.tmp_dir self.volumes.append('{0}:{1}'.format(host_tmp_dir, self.tmp_dir)) self.container = self.cli.create_container( command=self.get_command(), cpu_shares=cpu_shares, environment=self.environment, host_config=self.cli.create_host_config( binds=self.volumes, network_mode=self.network_mode), image=image, mem_limit=self.mem_limit, user=self.user) self.cli.start(self.container['Id']) line = '' for line in self.cli.logs(container=self.container['Id'], stream=True): logging.info("{}".format(line.strip())) exit_code = self.cli.wait(self.container['Id']) if exit_code != 0: raise AirflowException('docker container failed') if self.xcom_push: return self.cli.logs( container=self.container['Id']) if self.xcom_all else str( line.strip())
def run_cli(self, hql, schema=None): """ Run an hql statement using the hive cli >>> hh = HiveCliHook() >>> result = hh.run_cli("USE airflow;") >>> ("OK" in result) True """ conn = self.conn schema = schema or conn.schema if schema: hql = "USE {schema};\n{hql}".format(**locals()) with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir: with NamedTemporaryFile(dir=tmp_dir) as f: f.write(hql) f.flush() fname = f.name hive_bin = 'hive' cmd_extra = [] if self.use_beeline: hive_bin = 'beeline' jdbc_url = ( "jdbc:hive2://" "{0}:{1}/{2}" ";auth=noSasl" ).format(conn.host, conn.port, conn.schema) cmd_extra += ['-u', jdbc_url] if conn.login: cmd_extra += ['-n', conn.login] if conn.password: cmd_extra += ['-p', conn.password] cmd_extra += ['-p', conn.login] hive_cmd = [hive_bin, '-f', fname] + cmd_extra if self.hive_cli_params: hive_params_list = self.hive_cli_params.split() hive_cmd.extend(hive_params_list) logging.info(" ".join(hive_cmd)) sp = subprocess.Popen( hive_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=tmp_dir) all_err = '' self.sp = sp stdout = '' for line in iter(sp.stdout.readline, ''): stdout += line logging.info(line.strip()) sp.wait() if sp.returncode: raise AirflowException(all_err) return stdout
def run_cli(self, pig, verbose=True): """ Run an pig script using the pig cli >>> ph = PigCliHook() >>> result = ph.run_cli("ls /;") >>> ("hdfs://" in result) True """ with TemporaryDirectory(prefix='airflow_pigop_') as tmp_dir: with NamedTemporaryFile(dir=tmp_dir) as f: f.write(pig) f.flush() fname = f.name pig_bin = 'pig' cmd_extra = [] pig_cmd = [pig_bin, '-f', fname] + cmd_extra if self.pig_properties: pig_properties_list = self.pig_properties.split() pig_cmd.extend(pig_properties_list) if verbose: logging.info(" ".join(pig_cmd)) sp = subprocess.Popen(pig_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=tmp_dir) self.sp = sp stdout = '' for line in iter(sp.stdout.readline, ''): stdout += line if verbose: logging.info(line.strip()) sp.wait() if sp.returncode: raise AirflowException(stdout) return stdout
def run_cli(self, hql, schema=None): ''' Run an hql statement using the hive cli >>> hh = HiveCliHook() >>> result = hh.run_cli("USE airflow;") >>> ("OK" in result) True ''' if schema: hql = "USE {schema};\n{hql}".format(**locals()) with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir: with NamedTemporaryFile(dir=tmp_dir) as f: f.write(hql) f.flush() fname = f.name hive_cmd = ['hive', '-f', fname] if self.hive_cli_params: hive_params_list = self.hive_cli_params.split() hive_cmd.extend(hive_params_list) sp = subprocess.Popen(hive_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=tmp_dir) all_err = '' self.sp = sp stdout = "" for line in iter(sp.stdout.readline, ''): stdout += line logging.info(line.strip()) sp.wait() if sp.returncode: raise AirflowException(all_err) return stdout
def run_cli(self, hql, schema=None, verbose=True): """ Run an hql statement using the hive cli >>> hh = HiveCliHook() >>> result = hh.run_cli("USE airflow;") >>> ("OK" in result) True """ conn = self.conn schema = schema or conn.schema if schema: hql = "USE {schema};\n{hql}".format(**locals()) with TemporaryDirectory(prefix='airflow_hiveop_') as tmp_dir: with NamedTemporaryFile(dir=tmp_dir) as f: f.write(hql) f.flush() fname = f.name hive_bin = 'hive' cmd_extra = [] if self.use_beeline: hive_bin = 'beeline' if conf.get('core', 'security') == 'kerberos': template = conn.extra_dejson.get('principal',"hive/[email protected]") template = utils.replace_hostname_pattern(utils.get_components(template)) proxy_user = "" if conn.extra_dejson.get('proxy_user') == "login" and conn.login: proxy_user = "******".format(conn.login) elif conn.extra_dejson.get('proxy_user') == "owner" and self.run_as: proxy_user = "******".format(self.run_as) jdbc_url = ( "jdbc:hive2://" "{0}:{1}/{2}" ";principal={3}{4}" ).format(conn.host, conn.port, conn.schema, template, proxy_user) else: jdbc_url = ( "jdbc:hive2://" "{0}:{1}/{2}" ";auth=noSasl" ).format(conn.host, conn.port, conn.schema) cmd_extra += ['-u', jdbc_url] if conn.login: cmd_extra += ['-n', conn.login] if conn.password: cmd_extra += ['-p', conn.password] hive_cmd = [hive_bin, '-f', fname] + cmd_extra if self.hive_cli_params: hive_params_list = self.hive_cli_params.split() hive_cmd.extend(hive_params_list) if verbose: logging.info(" ".join(hive_cmd)) sp = subprocess.Popen( hive_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=tmp_dir) self.sp = sp stdout = '' for line in iter(sp.stdout.readline, ''): stdout += line if verbose: logging.info(line.strip()) sp.wait() if sp.returncode: raise AirflowException(stdout) return stdout