def done_qubole(query_id): """Sends query_id to Qubole and retrieves the data as pandas DataFrame. :param int query_id: query_id ready in Qubole :return: pandas DataFrame with response data. :rtype: pandas.DataFrame """ with execute_with_handling_errors(config.get_value, 'qubole', 'api_token') as api_token: if api_token is None: return pd.DataFrame([]) Qubole.configure(api_token=api_token) with execute_with_handling_errors(Command().find, id=query_id) as res: if res is None: return pd.DataFrame([]) print("Id: %s, Status: %s" % (str(res.id), res.status)) try: response_buffer = io.BytesIO() res.get_results(response_buffer) return qubole_output_to_df(response_buffer.getvalue()) except Exception as e: print(e) print("Oops! There was a problem. Try again...") return pd.DataFrame([])
def __init__(self, table_name, expected_runtime, dag_id, task_id): Qubole.configure(api_token='%s' % os.environ['QUBOLE_API_TOKEN']) self.table_name = table_name self.expected_runtime = expected_runtime self.dag_id = dag_id self.task_id = task_id self.host = os.environ['RS_HOST'] self.port = os.environ['RS_PORT'] self.user = os.environ['RS_USER'] self.password = os.environ['RS_PASSWORD'] self.db = os.environ['RS_DB'] self.s3_bucket = os.environ['S3_BUCKET'] self.rs_s3_auth = os.environ['RS_S3_AUTH'] self.connection_string = 'dbname=%s host=%s port=%s user=%s password=%s connect_timeout=1200' % \ (self.db, self.host, self.port, self.user, self.password) self.s3_resource = boto3.resource( 's3', region_name='us-west-2', aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY']) self.s3_client = boto3.client( 's3', region_name='us-west-2', aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
def run_qds_command(env, cluster, token, qds_command): try: Qubole.configure(api_token=token, api_url=env) shell_cmd = ShellCommand.run(inline=qds_command, label=cluster) return shell_cmd except Exception as e: raise AfctlDeploymentException(e)
def qubole_by_id_raw(api_token,hcid,filename): Qubole.configure(api_token=api_token) cmd = Command.find(hcid) out_file = filename + '.csv' with open(out_file, 'wb') as writer: cmd.get_results(writer) return out_file
def __init__(self, *args, **kwargs): conn = self.get_connection(kwargs['qubole_conn_id']) Qubole.configure(api_token=conn.password, api_url=conn.host) self.task_id = kwargs['task_id'] self.dag_id = kwargs['dag'].dag_id self.kwargs = kwargs self.cls = COMMAND_CLASSES[self.kwargs['command_type']] self.cmd = None
def __init__(self, *args, **kwargs) -> None: # pylint: disable=unused-argument super().__init__() conn = self.get_connection(kwargs.get('qubole_conn_id', self.default_conn_name)) Qubole.configure(api_token=conn.password, api_url=conn.host) self.task_id = kwargs['task_id'] self.dag_id = kwargs['dag'].dag_id self.kwargs = kwargs self.cls = COMMAND_CLASSES[self.kwargs['command_type']] self.cmd = None self.task_instance = None
def qubole_by_id(api_token,hcid,filename): Qubole.configure(api_token=api_token) cmd = Command.find(hcid) out_file = filename + '.csv' with open(out_file, 'wb') as writer: cmd.get_results(writer) df = pd.read_csv(out_file, delimiter='\t') return df
def __init__(self, *args, **kwargs) -> None: super().__init__() conn = self.get_connection( kwargs.get('qubole_conn_id', self.default_conn_name)) Qubole.configure(api_token=conn.password, api_url=conn.host) self.task_id = kwargs['task_id'] self.dag_id = kwargs['dag'].dag_id self.kwargs = kwargs self.cls = COMMAND_CLASSES[self.kwargs['command_type']] self.cmd: Optional[Command] = None self.task_instance: Optional["TaskInstance"] = None
def get(query, delete_file=True, filepath='', delimiter=';', query_type='presto', cluster_label=None): with execute_with_handling_errors(config.get_value, 'qubole', 'api_token') as api_token: if api_token is None: return try: Qubole.configure(api_token=api_token) except UnauthorizedAccess: print("Invalid credentials were provided") return if isinstance(query, int): with execute_with_handling_errors(Command().find, id=query) as command: if command is None: return elif query_type == 'presto': with execute_with_handling_errors(PrestoCommand.run, query=query, label=cluster_label) as command: if command is None: return elif query_type == 'hive': with execute_with_handling_errors(HiveCommand.run, query=query, label=cluster_label) as command: if command is None: return else: print('Please verify your input.') return if filepath != '': file = open(filepath, 'w+') else: file = tempfile.NamedTemporaryFile(mode='w+', delete=delete_file) if command.status == 'done': _get_results(command, file, delimiter) file.seek(0) return file else: raise Exception( 'Could not retrieve query results (id: %s, status: %s)' % (command.id, command.status))
def run_query(self, query, user): qbol.configure(api_token=self.configuration['token'], api_url='%s/api' % self.configuration['endpoint']) try: cls = PrestoCommand if (self.configuration['query_type'] == 'presto') else HiveCommand cmd = cls.create(query=query, label=self.configuration['cluster']) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == 'done': fp = StringIO() cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False, qlog=None, arguments=['true']) results = fp.getvalue() fp.close() data = results.split('\r\n') columns = self.fetch_columns([ (i, TYPE_STRING) for i in data.pop(0).split('\t') ]) rows = [ dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data ] json_data = json_dumps({'columns': columns, 'rows': rows}) except KeyboardInterrupt: logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error
def __init__(self, label, program, language, arguments, expected_runtime, dag_id, task_id, ds): Qubole.configure(api_token='%s' % os.environ['QUBOLE_API_TOKEN']) self.label = label self.program = program self.language = language self.arguments = arguments if expected_runtime == 0: self.expected_runtime = 7200 # 2 hour default else: self.expected_runtime = expected_runtime self.dag_id = dag_id self.task_id = task_id self.ds = ds
def __init__(self, task_run): super(QuboleCtrl, self).__init__(task_run=task_run) self.qubole_config = task_run.task.spark_engine # type: QuboleConfig self.qubole_cmd_id = None self.qubole_job_url = None Qubole.configure( api_token=self.qubole_config.api_token, api_url=self.qubole_config.api_url, cloud_name=self.qubole_config.cloud, ) self._setup_qubole_loggers()
def _configure_qubole(self): logging.basicConfig(level=logging.INFO) logger = logging.getLogger('qds_connection') logger.propagate = False qdslog = logging.getLogger('qds') if not self.config.API_TOKEN: raise Exception("You didn't specify your QUBOLE_API_TOKEN in " "your environment before running commands on " "Qubole!\n. It can be found at http://api.qubole" ".com/users/edit") Qubole.configure(api_token=self.config.API_TOKEN, api_url=self.config.API_URL, version=self.config.API_VERSION, poll_interval=self.config.POLL_INTERVAL_SEC) return qdslog
def poke(self, context): conn = BaseHook.get_connection(self.qubole_conn_id) Qubole.configure(api_token=conn.password, api_url=conn.host) this.log.info('Poking: %s', self.data) status = False try: status = self.sensor_class.check(self.data) except Exception as e: logging.exception(e) status = False this.log.info('Status of this Poke: %s', status) return status
def poke(self, context): conn = BaseHook.get_connection(self.qubole_conn_id) Qubole.configure(api_token=conn.password, api_url=conn.host) self.log.info('Poking: %s', self.data) status = False try: status = self.sensor_class.check(self.data) # pylint: disable=no-member except Exception as e: # pylint: disable=broad-except self.log.exception(e) status = False self.log.info('Status of this Poke: %s', status) return status
def poke(self, context): global this # apache/incubator-airflow/pull/3297#issuecomment-385988083 conn = BaseHook.get_connection(self.qubole_conn_id) Qubole.configure(api_token=conn.password, api_url=conn.host) this.log.info('Poking: %s', self.data) status = False try: status = self.sensor_class.check(self.data) except Exception as e: this.log.exception(e) status = False this.log.info('Status of this Poke: %s', status) return status
def qubole(api_token,sql,replacements,filename): Qubole.configure(api_token=api_token) with open(sql,'r') as f: query = f.read() label='Trading-spark' query = find_replace_multi(query,replacements) hc = HiveCommand.run(query=query, label=label) cmd = Command.find(hc.id) out_file = filename + '.csv' with open(out_file, 'wb') as writer: cmd.get_results(writer) df = pd.read_csv(out_file, delimiter='\t') return df
def request_qubole(input_query, query_type='presto', cluster_label=None): """Sends SQL query to Qubole and retrieves the data as pandas DataFrame. :param str input_query: query in chosen language (SQL) :param str query_type: query language specification {'presto' (default) or 'hive'} :param str cluster_label: Name of the Qubole cluster :return: pandas DataFrame with response data. :rtype: pandas.DataFrame """ with execute_with_handling_errors(config.get_value, 'qubole', 'api_token') as api_token: if api_token is None: return pd.DataFrame([]) Qubole.configure(api_token=api_token) # run query if query_type == 'presto': with execute_with_handling_errors(PrestoCommand.run, query=input_query, label=cluster_label) as hc: if hc is None: return pd.DataFrame([]) elif query_type == 'hive': with execute_with_handling_errors(HiveCommand.run, query=input_query, label=cluster_label) as hc: if hc is None: return pd.DataFrame([]) else: print('Wrong query type') return pd.DataFrame([]) print("Id: %s, Status: %s" % (str(hc.id), hc.status)) try: hc.get_results(fp=open('./temp_qubole_output', 'wb')) with open('./temp_qubole_output', 'rb') as f: data = f.read() return qubole_output_to_df(data) except Exception as e: print(e) print("Oops! There was a problem. Try again...") return pd.DataFrame([])
def __init__(self, access=None, secret = None, testmode=False, db_parallelism=None, mode=None, db_table=None, db_where=None, db_columns=None, db_boundary_query=None, db_extract_query=None, db_split_column=None, hive_table=None, part_spec=None, db_user=None, db_passwd=None, db_host=None, db_port=None, db_type=None, db_name=None, api_token = None, api_url=None, fetch_size = None): self.temp_location = "/tmp/sqoop/"+uuid.uuid1().hex self.tmp_dir = tempfile.mkdtemp(prefix="/media/ephemeral0/logs"+"/sqoop") logger.info("Temp Directory is:" + self.tmp_dir) self.access = access self.secret = secret self.api_token = api_token self.api_url = api_url self.fetch_size = fetch_size self.redshift_sink = False self.__loadImportParamsFromCid(testmode, db_parallelism, mode, db_table, db_where, db_columns, db_boundary_query, db_extract_query, db_split_column, hive_table, part_spec, db_user, db_passwd, db_host, db_port, db_type, db_name) self.sqoop_cmd=["/usr/lib/sqoop-h2/bin/sqoop"] self.sqoop_cmd.extend(["import"]) self.__addBasicOptions() self.__extendCmdSpecificOptions() Qubole.configure(api_token=api_token, api_url=api_url) self.cluster_label = Cluster.show(os.popen("cat /usr/lib/hustler/bin/nodeinfo_src.sh | grep cluster_id").read().split("=")[1].strip().replace('"',''))['cluster']['label'][0]
def poke(self, context: dict) -> bool: conn = BaseHook.get_connection(self.qubole_conn_id) Qubole.configure(api_token=conn.password, api_url=conn.host) self.log.info('Poking: %s', self.data) status = False try: status = self.sensor_class.check(self.data) # type: ignore[attr-defined] except Exception as e: self.log.exception(e) status = False self.log.info('Status of this Poke: %s', status) return status
def execute(self): logger.info("Running DbImportCommand " + str(self.sqoop_cmd)) if self.api_url is None: Qubole.configure(api_token=self.api_token) else: Qubole.configure(api_token=self.api_token, api_url = self.api_url) p = Popen(self.sqoop_cmd, cwd=self.tmp_dir) retCode = p.wait() a= os.popen("grep s3_default_db_location /usr/lib/hustler/bin/nodeinfo_src.sh").read() print(self.temp_location) print(self.get_s3_loc()) p = Popen(["hadoop", "dfs","-cp", self.temp_location, self.get_s3_loc() + self.temp_location]) retCode1 = p.wait() if retCode != 0 or retCode1 != 0: logger.warn("sqoop retCode = " + str(retCode)) self.__runCleanupScript() self.__runDfsCleanup() return(retCode or retCode1) else: logger.debug("sqoop retCode = " + str(retCode)) retCode = 1 if self.cmd_row['test_mode']: logger.debug("Not running hive in test mode.") retCode = 0 else: logger.info("Running hive script.") self.fixHiveQuery() q = open(self.tmp_dir+"/hive_query.q").read() logger.info("Query is: " + q) cmd=HiveCommand.create(query=q, label=self.cluster_label) while not Command.is_done(cmd.status): time.sleep(5) cmd = Command.find(cmd.id) logger.info("Hive command id: " + str(cmd.id) + "status: "+ str(cmd.status)) logger.info(cmd.status) if cmd.status == "done": retCode = 0 if retCode != 0: self.__runCleanupScript() self.__runDfsCleanup() return(retCode)
def main(): logging.basicConfig(level=logging.INFO) if (len(sys.argv) < 3): usage() if (len(sys.argv) >= 2 and sys.argv[1] == "-h"): usage(0) api_token = sys.argv[1] output_path = sys.argv[2] Qubole.configure(api_token=api_token) args = HadoopCommand.parse(("streaming -files s3n://paid-qubole/HadoopAPIExamples/WordCountPython/mapper.py,s3n://paid-qubole/HadoopAPIExamples/WordCountPython/reducer.py -mapper mapper.py -reducer reducer.py -numReduceTasks 1 -input s3n://paid-qubole/default-datasets/gutenberg -output %s" % output_path).split()) cmd = HadoopCommand.run(**args) print("Streaming Job run via command id: %s, finished with status %s" % (cmd.id, cmd.status))
def main(): root = logging.getLogger() root.setLevel(logging.INFO) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(module)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) root.addHandler(ch) # I am using this slightly complicated trick to pass config in the constructor of # other packages. Better way to do this ? config_parser, argparser = setup_parsers() config_args, remaining_argv = config_parser.parse_known_args() config = load_config(config_args) args = argparser.parse_args(remaining_argv) if args.debug: ch.setLevel(logging.DEBUG) root.setLevel(logging.DEBUG) logging.debug("Debug is ON!") if args.log_file is not None: fh = logging.FileHandler(args.log_file, mode='w') fh.setLevel(logging.DEBUG) fh.setFormatter(formatter) root.setLevel(logging.DEBUG) root.addHandler(fh) try: Qubole.configure( api_token=config.get("default", "auth_token"), api_url=config.get("default", "api_url"), skip_ssl_cert_check=True ) args.func(config, args) finally: logging.debug("Cleaning up")
def connect(api_token=None, poll_interval=None): # Try setting from environment variables if api_token is None: api_token = os.getenv('QDS_API_TOKEN') if poll_interval is None: poll_interval = os.getenv('QDS_POLL_INTERVAL') api_url = os.getenv('QDS_API_URL') api_version = os.getenv('QDS_API_VERSION') # If they aren't set, resort to default values if api_url is None: api_url = "https://api.qubole.com/api/" if api_token is None: sys.stderr.write("No API Token provided\n") if api_version is None: api_version = "v1.2" if poll_interval is None: poll_interval = 5 Qubole.configure(api_token=api_token, api_url=api_url, version=api_version, poll_interval=poll_interval, skip_ssl_cert_check=False)
def run_query(self, query, user): qbol.configure(api_token=self.configuration['token'], api_url='%s/api' % self.configuration['endpoint']) try: cls = PrestoCommand if(self.configuration['query_type'] == 'presto') else HiveCommand cmd = cls.create(query=query, label=self.configuration['cluster']) logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status) while not Command.is_done(cmd.status): time.sleep(qbol.poll_interval) cmd = Command.find(cmd.id) logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status) rows = [] columns = [] error = None if cmd.status == 'done': fp = StringIO() cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False, qlog=None, arguments=['true']) results = fp.getvalue() fp.close() data = results.split('\r\n') columns = self.fetch_columns([(i, TYPE_STRING) for i in data.pop(0).split('\t')]) rows = [dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data] json_data = json_dumps({'columns': columns, 'rows': rows}) except KeyboardInterrupt: logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id) cmd.cancel() error = "Query cancelled by user." json_data = None return json_data, error
def __init__(self, table_name, expected_runtime, dag_id, task_id): Qubole.configure(api_token='%s' % os.environ['QUBOLE_API_TOKEN']) self.table_name = table_name self.expected_runtime = expected_runtime self.dag_id = dag_id self.task_id = task_id self.host = os.environ['DB_HOST'] self.user = os.environ['DB_USER'] self.password = os.environ['DB_PASSWORD'] self.db = os.environ['DB'] self.s3_bucket = os.environ['S3_BUCKET'] self.s3_resource = boto3.resource( 's3', region_name='us-west-2', aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY']) self.s3_client = boto3.client( 's3', region_name='us-west-2', aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
def __init__(self, db_type, raw_sql, expected_runtime, dag_id, task_id, ds): Qubole.configure(api_token='%s' % os.environ['QUBOLE_API_TOKEN']) if db_type.upper() == 'PRESTO_CSV': self.label = 'presto_no_compression' elif db_type.upper() == 'PROD_PRESTO': self.label = 'Prod-Presto' elif db_type.upper() == 'DEV_PRESTO': self.label = 'Dev-Presto' elif db_type.upper() == 'HIVE': self.label = 'default' else: msg = 'Need to specify correct query type: presto_csv or presto_orc' raise Exception(msg) self.raw_sql = raw_sql self.db_type = db_type if expected_runtime == 0: self.expected_runtime = 7200 # 2 hour default else: self.expected_runtime = expected_runtime self.dag_id = dag_id self.task_id = task_id self.ds = ds
def main(): logging.basicConfig(level=logging.INFO) if (len(sys.argv) < 3): usage() if (len(sys.argv) >= 2 and sys.argv[1] == "-h"): usage(0) api_token = sys.argv[1] output_path = sys.argv[2] Qubole.configure(api_token=api_token) args = HadoopCommand.parse(( "streaming -files s3n://paid-qubole/HadoopAPIExamples/WordCountPython/mapper.py,s3n://paid-qubole/HadoopAPIExamples/WordCountPython/reducer.py -mapper mapper.py -reducer reducer.py -numReduceTasks 1 -input s3n://paid-qubole/default-datasets/gutenberg -output %s" % output_path).split()) cmd = HadoopCommand.run(**args) print("Streaming Job run via command id: %s, finished with status %s" % (cmd.id, cmd.status))
def main(): optparser = OptionParser(usage=usage_str) optparser.add_option("--token", dest="api_token", default=os.getenv('QDS_API_TOKEN'), help="api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN") optparser.add_option("--url", dest="api_url", default=os.getenv('QDS_API_URL'), help="base url for QDS REST API. defaults to https://api.qubole.com/api ") optparser.add_option("--version", dest="api_version", default=os.getenv('QDS_API_VERSION'), help="version of REST API to access. defaults to v1.2") optparser.add_option("--poll_interval", dest="poll_interval", type=int, default=os.getenv('QDS_POLL_INTERVAL'), help="interval for polling API for completion and other events. defaults to 5s") optparser.add_option("--skip_ssl_cert_check", dest="skip_ssl_cert_check", action="store_true", default=False, help="skip verification of server SSL certificate. Insecure: use with caution.") optparser.add_option("-v", dest="verbose", action="store_true", default=False, help="verbose mode - info level logging") optparser.add_option("--vv", dest="chatty", action="store_true", default=False, help="very verbose mode - debug level logging") optparser.disable_interspersed_args() (options, args) = optparser.parse_args() if options.chatty: logging.basicConfig(level=logging.DEBUG) elif options.verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARN) if options.api_token is None: sys.stderr.write("No API Token provided\n") usage(optparser) if options.api_url is None: options.api_url = "https://api.qubole.com/api/" if options.api_version is None: options.api_version = "v1.2" if options.poll_interval is None: options.poll_interval = 5 if options.skip_ssl_cert_check is None: options.skip_ssl_cert_check = False elif options.skip_ssl_cert_check: log.warn("Insecure mode enabled: skipping SSL cert verification\n") Qubole.configure(api_token=options.api_token, api_url=options.api_url, version=options.api_version, poll_interval=options.poll_interval, skip_ssl_cert_check=options.skip_ssl_cert_check) if len(args) < 1: sys.stderr.write("Missing first argument containing subcommand\n") usage(optparser) a0 = args.pop(0) if a0 in CommandClasses: return cmdmain(a0, args) if a0 == "account": return accountmain(args) if a0 == "cluster": api_version_number = float(options.api_version[1:]) return clustermain(args, api_version_number) if a0 == "action": return actionmain(args) if a0 == "scheduler": return schedulermain(args) if a0 == "report": return reportmain(args) if a0 == "dbtap": return dbtapmain(args) if a0 == "group": return groupmain(args) if a0 == "role": return rolemain(args) if a0 == "app": return appmain(args) cmdset = set(CommandClasses.keys()) sys.stderr.write("First command must be one of <%s>\n" % "|".join(cmdset.union(["cluster", "action", "scheduler", "report", "dbtap", "role", "group", "app", "account"]))) usage(optparser)
def backoff_poll_interval(self, multiple=2): QDS.configure(QDS.api_token, poll_interval=QDS.poll_interval * multiple)
return Response() @app.route('/run_scaling', methods=['POST']) @login_required def run_scaling(): for _ in range(10): run_hive_query_asynchronous( cluster_label=config['hadoop_cluster_name'], query_filename='top_10_revenue_generating_products.sql', qubole_database_name=config['qubole_database_name']) return Response() def parse_command_line_args(): parser = argparse.ArgumentParser(description='Quick start App') parser.add_argument('--config', required=True, help='Configuration') parser.add_argument('--extra-config', help='Configuration of clusters and notebooks') return parser.parse_args() if __name__ == "__main__": logging.basicConfig(stream=sys.stderr, level=logging.INFO) args = parse_command_line_args() config = read_config(args.config) app.secret_key = os.urandom(47) app.config.update(config) Qubole.configure(api_token=config['qubole_api_token']) app.run(host='0.0.0.0', port=int(config['port']), threaded=True)
def main(): optparser = OptionParser(usage=usage_str) optparser.add_option("--token", dest="api_token", default=os.getenv('QDS_API_TOKEN'), help="api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN") optparser.add_option("--url", dest="api_url", default=os.getenv('QDS_API_URL'), help="base url for QDS REST API. defaults to https://api.qubole.com/api ") optparser.add_option("--version", dest="api_version", default=os.getenv('QDS_API_VERSION'), help="version of REST API to access. defaults to v1.2") optparser.add_option("--poll_interval", dest="poll_interval", default=os.getenv('QDS_POLL_INTERVAL'), help="interval for polling API for completion and other events. defaults to 5s") optparser.add_option("--skip_ssl_cert_check", dest="skip_ssl_cert_check", action="store_true", default=False, help="skip verification of server SSL certificate. Insecure: use with caution.") optparser.add_option("-v", dest="verbose", action="store_true", default=False, help="verbose mode - info level logging") optparser.add_option("--vv", dest="chatty", action="store_true", default=False, help="very verbose mode - debug level logging") optparser.disable_interspersed_args() (options, args) = optparser.parse_args() if options.chatty: logging.basicConfig(level=logging.DEBUG) elif options.verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARN) pass if options.api_token is None: sys.stderr.write("No API Token provided\n") usage(optparser) if options.api_url is None: options.api_url = "https://api.qubole.com/api/"; if options.api_version is None: options.api_version = "v1.2"; if options.poll_interval is None: options.poll_interval = 5; if options.skip_ssl_cert_check is None: options.skip_ssl_cert_check = False elif options.skip_ssl_cert_check: sys.stderr.write("[WARN] Insecure mode enabled: skipping SSL cert verification\n") Qubole.configure(api_token=options.api_token, api_url=options.api_url, version=options.api_version, poll_interval=options.poll_interval, skip_ssl_cert_check=options.skip_ssl_cert_check) if len(args) < 1: sys.stderr.write("Missing first argument containing command type\n") usage(optparser) cmdsuffix = "cmd" cmdset = set([x + cmdsuffix for x in ["hive", "pig", "hadoop", "shell", "dbexport", "presto"]]) a0 = args.pop(0) if (a0 in cmdset): return cmdmain(a0[:a0.find(cmdsuffix)], args) if (a0 == "hadoop_cluster"): return clustermain(a0, args) sys.stderr.write("First command must be one of <%s>\n" % "|".join(cmdset.union(["hadoop_cluster"]))) usage(optparser)
from qds_sdk.qubole import Qubole Qubole.configure(api_token='BjnuEktAoiyFiHZCuLSZxdUFFsrzn15h3Hj9an3xjACBkwqiYCNJcNYygsaLvAFg',api_url="http://localhost:3000/api/") from qds_sdk.commands import * hc=HiveCommand.create(query='show tables',retry='4') print "Id: %s, Status: %s" % (str(hc.id), hc.status)
def main(): optparser = OptionParser(usage=usage_str) optparser.add_option( "--token", dest="api_token", default=os.getenv('QDS_API_TOKEN'), help= "api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN" ) optparser.add_option( "--url", dest="api_url", default=os.getenv('QDS_API_URL'), help= "base url for QDS REST API. defaults to https://api.qubole.com/api ") optparser.add_option( "--version", dest="api_version", default=os.getenv('QDS_API_VERSION'), help="version of REST API to access. defaults to v1.2") optparser.add_option( "--poll_interval", dest="poll_interval", type=int, default=os.getenv('QDS_POLL_INTERVAL'), help= "interval for polling API for completion and other events. defaults to 5s" ) optparser.add_option( "--skip_ssl_cert_check", dest="skip_ssl_cert_check", action="store_true", default=False, help= "skip verification of server SSL certificate. Insecure: use with caution." ) optparser.add_option("-v", dest="verbose", action="store_true", default=False, help="verbose mode - info level logging") optparser.add_option("--vv", dest="chatty", action="store_true", default=False, help="very verbose mode - debug level logging") optparser.disable_interspersed_args() (options, args) = optparser.parse_args() if options.chatty: logging.basicConfig(level=logging.DEBUG) elif options.verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARN) if options.api_token is None: sys.stderr.write("No API Token provided\n") usage(optparser) if options.api_url is None: options.api_url = "https://api.qubole.com/api/" if options.api_version is None: options.api_version = "v1.2" if options.poll_interval is None: options.poll_interval = 5 if options.skip_ssl_cert_check is None: options.skip_ssl_cert_check = False elif options.skip_ssl_cert_check: log.warn("Insecure mode enabled: skipping SSL cert verification\n") Qubole.configure(api_token=options.api_token, api_url=options.api_url, version=options.api_version, poll_interval=options.poll_interval, skip_ssl_cert_check=options.skip_ssl_cert_check) if len(args) < 1: sys.stderr.write("Missing first argument containing subcommand\n") usage(optparser) a0 = args.pop(0) if a0 in CommandClasses: return cmdmain(a0, args) if a0 == "account": return accountmain(args) if a0 == "cluster": api_version_number = float(options.api_version[1:]) return clustermain(args, api_version_number) if a0 == "action": return actionmain(args) if a0 == "scheduler": return schedulermain(args) if a0 == "report": return reportmain(args) if a0 == "dbtap": return dbtapmain(args) if a0 == "group": return groupmain(args) if a0 == "role": return rolemain(args) if a0 == "app": return appmain(args) if a0 == "nezha": return nezhamain(args) if a0 == "user": return usermain(args) if a0 == "template": return templatemain(args) cmdset = set(CommandClasses.keys()) sys.stderr.write("First command must be one of <%s>\n" % "|".join( cmdset.union([ "cluster", "action", "scheduler", "report", "dbtap", "role", "group", "app", "account", "nezha", "user", "template" ]))) usage(optparser)
def main(): optparser = OptionParser(usage=usage_str) optparser.add_option("--token", dest="api_token", default=os.getenv('QDS_API_TOKEN'), help="api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN") optparser.add_option("--url", dest="api_url", default=os.getenv('QDS_API_URL'), help="base url for QDS REST API. defaults to https://api.qubole.com/api ") optparser.add_option("--version", dest="api_version", default=os.getenv('QDS_API_VERSION'), help="version of REST API to access. defaults to v1.2") optparser.add_option("--poll_interval", dest="poll_interval", default=os.getenv('QDS_POLL_INTERVAL'), help="interval for polling API for completion and other events. defaults to 5s") optparser.add_option("-v", dest="verbose", action="store_true", default=False, help="verbose mode - info level logging") optparser.add_option("--vv", dest="chatty", action="store_true", default=False, help="very verbose mode - debug level logging") optparser.disable_interspersed_args() (options, args) = optparser.parse_args() if options.chatty: logging.basicConfig(level=logging.DEBUG) elif options.verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARN) pass if options.api_token is None: sys.stderr.write("No API Token provided\n") usage(optparser) if options.api_url is None: options.api_url = "https://api.qubole.com/api/"; if options.api_version is None: options.api_version = "v1.2"; if options.poll_interval is None: options.poll_interval = 5; Qubole.configure(api_token=options.api_token, api_url=options.api_url, version=options.api_version, poll_interval=options.poll_interval) if len(args) < 1: sys.stderr.write("Missing first argument containing command type\n") usage() cmdset = set(["hive", "pig", "hadoop", "shell"]) cmdsuffix = "cmd" cmd = args.pop(0) if ((cmd.find(cmdsuffix) != len(cmd)-3) or (cmd[:cmd.find(cmdsuffix)] not in cmdset)): sys.stderr.write("First command must be one of <%s>\n" % "|".join(cmdset)) usage() return cmdmain(cmd[:cmd.find(cmdsuffix)], args)
def qb_configure(api_token, api_url): return Qubole.configure(api_token=api_token, api_url=api_url)
def __init__(self, name, context, **kwargs): super(QuboleCluster, self).__init__(name, context, kwargs=kwargs) self._filesystem = S3Filesystem(self.logger, context, **kwargs) Qubole.configure(api_token=context.settings['qds_api_token'])
def hivecommand_from_r(query = None, poll_interval = None , sample_size = None, macros = None , tags = None, cluster_label = None, notify = None, name = None, api_token = None ): api_url = os.getenv('QDS_API_URL') api_version = os.getenv('QDS_API_VERSION') if poll_interval is None: poll_interval = os.getenv('QDS_POLL_INTERVAL') if api_token is None: api_token = os.getenv('QDS_API_TOKEN') chatty = False verbose = False skip_ssl_cert_check = None api_url = None api_version = None queryString = "" #reconstruct the queryString for to be parsed by hivecommand.parse function if query is not None: queryString += " --query '%s' "%str(query) if macros is not None: queryString += " --macros '%s' "%str(macros) if tags is not None: queryString += " --tags '%s' "%str(tags) if sample_size is not None: queryString += " --sample_size '%s' "%str(sample_size) if cluster_label is not None: queryString += " --cluster-label '%s' "%str(cluster_label) if notify is not None: queryString += " --notify '%s' "%str(notify) if name is not None: queryString += " --name '%s' "%str(name) if chatty: logging.basicConfig(level=logging.DEBUG) elif verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARN) if api_token is None: sys.stderr.write("No API Token provided\n") if api_url is None: api_url = "https://api.qubole.com/api/" if api_version is None: api_version = "v1.2" if poll_interval is None: poll_interval = 5 if skip_ssl_cert_check is None: skip_ssl_cert_check = False elif skip_ssl_cert_check: log.warn("Insecure mode enabled: skipping SSL cert verification\n") Qubole.configure(api_token=api_token, api_url=api_url, version=api_version, poll_interval=poll_interval, skip_ssl_cert_check=skip_ssl_cert_check) try: return(hivecommand(queryString)) except qds_sdk.exception.Error as e: sys.stderr.write("Error: Status code %s (%s) from url %s\n" % (e.request.status_code, e.__class__.__name__, e.request.url)) except qds_sdk.exception.ConfigError as e: sys.stderr.write("Configuration error: %s\n" % str(e)) except qds_sdk.exception.ParseError as e: sys.stderr.write("Error: %s\n" % str(e)) except Exception: traceback.print_exc(file=sys.stderr)
def __init__(self): signal.signal(signal.SIGINT, self.exit) # For non api.qubole env, set the env in configure() Qubole.configure(api_token=self.API_TOKEN)
def set_token(self, api_token): QDS.configure(api_token=api_token)