def monitor_command(self, command, sql_stmt): _command = command time.sleep(10) try: _command = Command.find(_command.id) except: time.sleep(30) _command = Command.find(_command.id) total_sleep_time = 0 retries = 1000 command_id = _command.id for i in range(retries): if _command.status == 'error': raise AirflowException( 'Statement failed: https://api.qubole.com/v2/analyze?command_id=%s\n %s' % (command_id, sql_stmt)) elif Command.is_done(_command.status): return else: total_sleep_time += 10 if total_sleep_time > self.expected_runtime * 1.5: raise AirflowException( "RS Total estimated runtime was exceeded, please adjust estimation in DAG if the process requires more time to complete query %s" % sql_stmt) time.sleep(10) _command = Command.find(command_id) raise AirflowException( 'RS_monitor_command call for %s failed. https://api.qubole.com/v2/analyze?command_id=%s' % (sql_stmt, command.id))
def qubole_by_id_raw(api_token,hcid,filename): Qubole.configure(api_token=api_token) cmd = Command.find(hcid) out_file = filename + '.csv' with open(out_file, 'wb') as writer: cmd.get_results(writer) return out_file
def handle_failure_retry(context): ti = context['ti'] cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id) if cmd_id is not None: cmd = Command.find(cmd_id) if cmd is not None: if cmd.status == 'running': log.info('Cancelling the Qubole Command Id: %s', cmd_id) cmd.cancel()
def qubole_by_id(api_token,hcid,filename): Qubole.configure(api_token=api_token) cmd = Command.find(hcid) out_file = filename + '.csv' with open(out_file, 'wb') as writer: cmd.get_results(writer) df = pd.read_csv(out_file, delimiter='\t') return df
def handle_failure_retry(context): ti = context['ti'] cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id) if cmd_id is not None: cmd = Command.find(cmd_id) if cmd is not None: if cmd.status == 'running': log = LoggingMixin().log log.info('Cancelling the Qubole Command Id: %s', cmd_id) cmd.cancel()
def run_query(self, query, user): qbol.configure(api_token=self.configuration['token'], api_url='%s/api' % self.configuration['endpoint']) try: cls = PrestoCommand if (self.configuration['query_type'] == 'presto') else HiveCommand cmd = cls.create(query=query, label=self.configuration['cluster']) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == 'done': fp = StringIO() cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False, qlog=None, arguments=['true']) results = fp.getvalue() fp.close() data = results.split('\r\n') columns = self.fetch_columns([ (i, TYPE_STRING) for i in data.pop(0).split('\t') ]) rows = [ dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data ] json_data = json_dumps({'columns': columns, 'rows': rows}) except KeyboardInterrupt: logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error
def handle_failure_retry(context) -> None: """Handle retries in case of failures""" ti = context['ti'] cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id) if cmd_id is not None: cmd = Command.find(cmd_id) if cmd is not None: if cmd.status == 'done': log.info('Command ID: %s has been succeeded, hence marking this TI as Success.', cmd_id) ti.state = State.SUCCESS elif cmd.status == 'running': log.info('Cancelling the Qubole Command Id: %s', cmd_id) cmd.cancel()
def handle_failure_retry(context): ti = context['ti'] cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id) if cmd_id is not None: cmd = Command.find(cmd_id) if cmd is not None: log = LoggingMixin().log if cmd.status == 'done': log.info('Command ID: %s has been succeeded, hence marking this ' 'TI as Success.', cmd_id) ti.state = State.SUCCESS elif cmd.status == 'running': log.info('Cancelling the Qubole Command Id: %s', cmd_id) cmd.cancel()
def handle_failure_retry(context): ti = context['ti'] cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id) if cmd_id is not None: logger = logging.getLogger('airflow').getChild("QuboleHook") cmd = Command.find(cmd_id) if cmd is not None: if cmd.status == 'done': logger.info( 'Command ID: %s has been succeeded, hence marking this ' 'TI as Success.', cmd_id) ti.state = State.SUCCESS elif cmd.status == 'running': logger.info('Cancelling the Qubole Command Id: %s', cmd_id) cmd.cancel()
def qubole(api_token,sql,replacements,filename): Qubole.configure(api_token=api_token) with open(sql,'r') as f: query = f.read() label='Trading-spark' query = find_replace_multi(query,replacements) hc = HiveCommand.run(query=query, label=label) cmd = Command.find(hc.id) out_file = filename + '.csv' with open(out_file, 'wb') as writer: cmd.get_results(writer) df = pd.read_csv(out_file, delimiter='\t') return df
def run_query(self, query, user): qbol.configure(api_token=self.configuration['token'], api_url='%s/api' % self.configuration['endpoint']) try: cls = PrestoCommand if(self.configuration['query_type'] == 'presto') else HiveCommand cmd = cls.create(query=query, label=self.configuration['cluster']) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == 'done': fp = StringIO() cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False, qlog=None, arguments=['true']) results = fp.getvalue() fp.close() data = results.split('\r\n') columns = self.fetch_columns([(i, TYPE_STRING) for i in data.pop(0).split('\t')]) rows = [dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data] json_data = json_dumps({'columns': columns, 'rows': rows}) except KeyboardInterrupt: logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error
def run_query(self, query, user): qbol.configure( api_token=self.configuration.get("token"), api_url="%s/api" % self.configuration.get("endpoint"), ) try: query_type = self.configuration.get("query_type", "hive") if query_type == "quantum": cmd = SqlCommand.create(query=query) elif query_type == "hive": cmd = HiveCommand.create( query=query, label=self.configuration.get("cluster")) elif query_type == "presto": cmd = PrestoCommand.create( query=query, label=self.configuration.get("cluster")) else: raise Exception("Invalid Query Type:%s.\ It must be : hive / presto / quantum." % self.configuration.get("query_type")) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == "done": fp = StringIO() cmd.get_results( fp=fp, inline=True, delim="\t", fetch=False, qlog=None, arguments=["true"], ) results = fp.getvalue() fp.close() data = results.split("\r\n") columns = self.fetch_columns([ (i, TYPE_STRING) for i in data.pop(0).split("\t") ]) rows = [ dict( zip((column["name"] for column in columns), row.split("\t"))) for row in data ] json_data = json_dumps({"columns": columns, "rows": rows}) except KeyboardInterrupt: logging.info("Sending KILL signal to Qubole Command Id: %s", cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error