Пример #1
0
def done_qubole(query_id):
    """Sends query_id to Qubole and retrieves
    the data as pandas DataFrame.

    :param int query_id: query_id ready in Qubole
    :return:  pandas DataFrame with response data.
    :rtype: pandas.DataFrame
    """
    with execute_with_handling_errors(config.get_value, 'qubole',
                                      'api_token') as api_token:
        if api_token is None:
            return pd.DataFrame([])

    Qubole.configure(api_token=api_token)

    with execute_with_handling_errors(Command().find, id=query_id) as res:
        if res is None:
            return pd.DataFrame([])

    print("Id: %s, Status: %s" % (str(res.id), res.status))

    try:
        response_buffer = io.BytesIO()
        res.get_results(response_buffer)
        return qubole_output_to_df(response_buffer.getvalue())

    except Exception as e:
        print(e)
        print("Oops!  There was a problem.  Try again...")
        return pd.DataFrame([])
Пример #2
0
    def __init__(self, table_name, expected_runtime, dag_id, task_id):
        Qubole.configure(api_token='%s' % os.environ['QUBOLE_API_TOKEN'])
        self.table_name = table_name
        self.expected_runtime = expected_runtime
        self.dag_id = dag_id
        self.task_id = task_id
        self.host = os.environ['RS_HOST']
        self.port = os.environ['RS_PORT']
        self.user = os.environ['RS_USER']
        self.password = os.environ['RS_PASSWORD']
        self.db = os.environ['RS_DB']
        self.s3_bucket = os.environ['S3_BUCKET']
        self.rs_s3_auth = os.environ['RS_S3_AUTH']
        self.connection_string = 'dbname=%s host=%s port=%s user=%s password=%s connect_timeout=1200' % \
                                 (self.db, self.host, self.port, self.user, self.password)

        self.s3_resource = boto3.resource(
            's3',
            region_name='us-west-2',
            aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
            aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])

        self.s3_client = boto3.client(
            's3',
            region_name='us-west-2',
            aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
            aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
Пример #3
0
    def run_qds_command(env, cluster, token, qds_command):
        try:
            Qubole.configure(api_token=token, api_url=env)
            shell_cmd = ShellCommand.run(inline=qds_command, label=cluster)
            return shell_cmd

        except Exception as e:
            raise AfctlDeploymentException(e)
Пример #4
0
def qubole_by_id_raw(api_token,hcid,filename):
    Qubole.configure(api_token=api_token)
    cmd = Command.find(hcid)
    out_file = filename + '.csv'
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    return out_file
 def __init__(self, *args, **kwargs):
     conn = self.get_connection(kwargs['qubole_conn_id'])
     Qubole.configure(api_token=conn.password, api_url=conn.host)
     self.task_id = kwargs['task_id']
     self.dag_id = kwargs['dag'].dag_id
     self.kwargs = kwargs
     self.cls = COMMAND_CLASSES[self.kwargs['command_type']]
     self.cmd = None
Пример #6
0
 def __init__(self, *args, **kwargs) -> None:  # pylint: disable=unused-argument
     super().__init__()
     conn = self.get_connection(kwargs.get('qubole_conn_id', self.default_conn_name))
     Qubole.configure(api_token=conn.password, api_url=conn.host)
     self.task_id = kwargs['task_id']
     self.dag_id = kwargs['dag'].dag_id
     self.kwargs = kwargs
     self.cls = COMMAND_CLASSES[self.kwargs['command_type']]
     self.cmd = None
     self.task_instance = None
Пример #7
0
def qubole_by_id(api_token,hcid,filename):
    Qubole.configure(api_token=api_token)
    cmd = Command.find(hcid)
    out_file = filename + '.csv'
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    df = pd.read_csv(out_file, delimiter='\t')

    return df
Пример #8
0
 def __init__(self, *args, **kwargs) -> None:
     super().__init__()
     conn = self.get_connection(
         kwargs.get('qubole_conn_id', self.default_conn_name))
     Qubole.configure(api_token=conn.password, api_url=conn.host)
     self.task_id = kwargs['task_id']
     self.dag_id = kwargs['dag'].dag_id
     self.kwargs = kwargs
     self.cls = COMMAND_CLASSES[self.kwargs['command_type']]
     self.cmd: Optional[Command] = None
     self.task_instance: Optional["TaskInstance"] = None
Пример #9
0
def get(query,
        delete_file=True,
        filepath='',
        delimiter=';',
        query_type='presto',
        cluster_label=None):

    with execute_with_handling_errors(config.get_value, 'qubole',
                                      'api_token') as api_token:
        if api_token is None:
            return

    try:
        Qubole.configure(api_token=api_token)
    except UnauthorizedAccess:
        print("Invalid credentials were provided")
        return

    if isinstance(query, int):
        with execute_with_handling_errors(Command().find, id=query) as command:
            if command is None:
                return
    elif query_type == 'presto':
        with execute_with_handling_errors(PrestoCommand.run,
                                          query=query,
                                          label=cluster_label) as command:
            if command is None:
                return
    elif query_type == 'hive':
        with execute_with_handling_errors(HiveCommand.run,
                                          query=query,
                                          label=cluster_label) as command:
            if command is None:
                return
    else:
        print('Please verify your input.')
        return

    if filepath != '':
        file = open(filepath, 'w+')
    else:
        file = tempfile.NamedTemporaryFile(mode='w+', delete=delete_file)

    if command.status == 'done':
        _get_results(command, file, delimiter)
        file.seek(0)

        return file
    else:
        raise Exception(
            'Could not retrieve query results (id: %s, status: %s)' %
            (command.id, command.status))
Пример #10
0
    def run_query(self, query, user):
        qbol.configure(api_token=self.configuration['token'],
                       api_url='%s/api' % self.configuration['endpoint'])

        try:
            cls = PrestoCommand if (self.configuration['query_type']
                                    == 'presto') else HiveCommand
            cmd = cls.create(query=query, label=self.configuration['cluster'])
            logging.info("Qubole command created with Id: %s and Status: %s",
                         cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id,
                             cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == 'done':
                fp = StringIO()
                cmd.get_results(fp=fp,
                                inline=True,
                                delim='\t',
                                fetch=False,
                                qlog=None,
                                arguments=['true'])

                results = fp.getvalue()
                fp.close()

                data = results.split('\r\n')
                columns = self.fetch_columns([
                    (i, TYPE_STRING) for i in data.pop(0).split('\t')
                ])
                rows = [
                    dict(zip((c['name'] for c in columns), row.split('\t')))
                    for row in data
                ]

            json_data = json_dumps({'columns': columns, 'rows': rows})
        except KeyboardInterrupt:
            logging.info('Sending KILL signal to Qubole Command Id: %s',
                         cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Пример #11
0
 def __init__(self, label, program, language, arguments, expected_runtime,
              dag_id, task_id, ds):
     Qubole.configure(api_token='%s' % os.environ['QUBOLE_API_TOKEN'])
     self.label = label
     self.program = program
     self.language = language
     self.arguments = arguments
     if expected_runtime == 0:
         self.expected_runtime = 7200  # 2 hour default
     else:
         self.expected_runtime = expected_runtime
     self.dag_id = dag_id
     self.task_id = task_id
     self.ds = ds
Пример #12
0
    def __init__(self, task_run):
        super(QuboleCtrl, self).__init__(task_run=task_run)
        self.qubole_config = task_run.task.spark_engine  # type: QuboleConfig

        self.qubole_cmd_id = None
        self.qubole_job_url = None

        Qubole.configure(
            api_token=self.qubole_config.api_token,
            api_url=self.qubole_config.api_url,
            cloud_name=self.qubole_config.cloud,
        )

        self._setup_qubole_loggers()
Пример #13
0
 def _configure_qubole(self):
     logging.basicConfig(level=logging.INFO)
     logger = logging.getLogger('qds_connection')
     logger.propagate = False
     qdslog = logging.getLogger('qds')
     if not self.config.API_TOKEN:
         raise Exception("You didn't specify your QUBOLE_API_TOKEN in "
                         "your environment before running commands on "
                         "Qubole!\n. It can be found at http://api.qubole"
                         ".com/users/edit")
     Qubole.configure(api_token=self.config.API_TOKEN,
                      api_url=self.config.API_URL,
                      version=self.config.API_VERSION,
                      poll_interval=self.config.POLL_INTERVAL_SEC)
     return qdslog
Пример #14
0
 def _configure_qubole(self):
     logging.basicConfig(level=logging.INFO)
     logger = logging.getLogger('qds_connection')
     logger.propagate = False
     qdslog = logging.getLogger('qds')
     if not self.config.API_TOKEN:
         raise Exception("You didn't specify your QUBOLE_API_TOKEN in "
                         "your environment before running commands on "
                         "Qubole!\n. It can be found at http://api.qubole"
                         ".com/users/edit")
     Qubole.configure(api_token=self.config.API_TOKEN,
                      api_url=self.config.API_URL,
                      version=self.config.API_VERSION,
                      poll_interval=self.config.POLL_INTERVAL_SEC)
     return qdslog
Пример #15
0
    def poke(self, context):
        conn = BaseHook.get_connection(self.qubole_conn_id)
        Qubole.configure(api_token=conn.password, api_url=conn.host)

        this.log.info('Poking: %s', self.data)

        status = False
        try:
            status = self.sensor_class.check(self.data)
        except Exception as e:
            logging.exception(e)
            status = False

        this.log.info('Status of this Poke: %s', status)

        return status
Пример #16
0
    def poke(self, context):
        conn = BaseHook.get_connection(self.qubole_conn_id)
        Qubole.configure(api_token=conn.password, api_url=conn.host)

        this.log.info('Poking: %s', self.data)

        status = False
        try:
            status = self.sensor_class.check(self.data)
        except Exception as e:
            logging.exception(e)
            status = False

        this.log.info('Status of this Poke: %s', status)

        return status
Пример #17
0
    def poke(self, context):

        conn = BaseHook.get_connection(self.qubole_conn_id)
        Qubole.configure(api_token=conn.password, api_url=conn.host)

        self.log.info('Poking: %s', self.data)

        status = False
        try:
            status = self.sensor_class.check(self.data)  # pylint: disable=no-member
        except Exception as e:  # pylint: disable=broad-except
            self.log.exception(e)
            status = False

        self.log.info('Status of this Poke: %s', status)

        return status
Пример #18
0
    def poke(self, context):
        global this  # apache/incubator-airflow/pull/3297#issuecomment-385988083
        conn = BaseHook.get_connection(self.qubole_conn_id)
        Qubole.configure(api_token=conn.password, api_url=conn.host)

        this.log.info('Poking: %s', self.data)

        status = False
        try:
            status = self.sensor_class.check(self.data)
        except Exception as e:
            this.log.exception(e)
            status = False

        this.log.info('Status of this Poke: %s', status)

        return status
Пример #19
0
def qubole(api_token,sql,replacements,filename):
    Qubole.configure(api_token=api_token)
    with open(sql,'r') as f:
        query = f.read()
        
    label='Trading-spark'
    query = find_replace_multi(query,replacements)
    hc = HiveCommand.run(query=query, label=label)
    cmd = Command.find(hc.id)
    out_file = filename + '.csv'
    
    with open(out_file, 'wb') as writer:
        cmd.get_results(writer)

    df = pd.read_csv(out_file, delimiter='\t')

    return df
Пример #20
0
def request_qubole(input_query, query_type='presto', cluster_label=None):
    """Sends SQL query to Qubole and retrieves
    the data as pandas DataFrame.

    :param str input_query: query in chosen language (SQL)
    :param str query_type: query language specification {'presto' (default) or 'hive'}
    :param str cluster_label: Name of the Qubole cluster
    :return:  pandas DataFrame with response data.
    :rtype: pandas.DataFrame
    """
    with execute_with_handling_errors(config.get_value, 'qubole',
                                      'api_token') as api_token:
        if api_token is None:
            return pd.DataFrame([])

    Qubole.configure(api_token=api_token)

    # run query
    if query_type == 'presto':
        with execute_with_handling_errors(PrestoCommand.run,
                                          query=input_query,
                                          label=cluster_label) as hc:
            if hc is None:
                return pd.DataFrame([])
    elif query_type == 'hive':
        with execute_with_handling_errors(HiveCommand.run,
                                          query=input_query,
                                          label=cluster_label) as hc:
            if hc is None:
                return pd.DataFrame([])
    else:
        print('Wrong query type')
        return pd.DataFrame([])

    print("Id: %s, Status: %s" % (str(hc.id), hc.status))

    try:
        hc.get_results(fp=open('./temp_qubole_output', 'wb'))
        with open('./temp_qubole_output', 'rb') as f:
            data = f.read()
        return qubole_output_to_df(data)

    except Exception as e:
        print(e)
        print("Oops!  There was a problem.  Try again...")
        return pd.DataFrame([])
 def __init__(self, access=None, secret = None, testmode=False, db_parallelism=None, mode=None, db_table=None, db_where=None, db_columns=None, db_boundary_query=None, db_extract_query=None, db_split_column=None, hive_table=None, part_spec=None, db_user=None, db_passwd=None, db_host=None, db_port=None, db_type=None, db_name=None, api_token = None, api_url=None, fetch_size = None):
   self.temp_location = "/tmp/sqoop/"+uuid.uuid1().hex
   self.tmp_dir = tempfile.mkdtemp(prefix="/media/ephemeral0/logs"+"/sqoop")
   logger.info("Temp Directory is:" + self.tmp_dir)
   self.access = access
   self.secret = secret
   self.api_token = api_token
   self.api_url = api_url
   self.fetch_size = fetch_size
   self.redshift_sink = False
   self.__loadImportParamsFromCid(testmode, db_parallelism, mode, db_table, db_where, db_columns, db_boundary_query, db_extract_query, db_split_column, hive_table, part_spec, db_user, db_passwd, db_host, db_port, db_type, db_name)
   self.sqoop_cmd=["/usr/lib/sqoop-h2/bin/sqoop"]
   self.sqoop_cmd.extend(["import"])
   self.__addBasicOptions()
   self.__extendCmdSpecificOptions()
   Qubole.configure(api_token=api_token, api_url=api_url)
   self.cluster_label = Cluster.show(os.popen("cat /usr/lib/hustler/bin/nodeinfo_src.sh | grep cluster_id").read().split("=")[1].strip().replace('"',''))['cluster']['label'][0]
Пример #22
0
    def poke(self, context: dict) -> bool:

        conn = BaseHook.get_connection(self.qubole_conn_id)
        Qubole.configure(api_token=conn.password, api_url=conn.host)

        self.log.info('Poking: %s', self.data)

        status = False
        try:
            status = self.sensor_class.check(self.data)  # type: ignore[attr-defined]
        except Exception as e:
            self.log.exception(e)
            status = False

        self.log.info('Status of this Poke: %s', status)

        return status
 def execute(self):
   logger.info("Running DbImportCommand " + str(self.sqoop_cmd))
   if self.api_url is None:
     Qubole.configure(api_token=self.api_token)
   else:
     Qubole.configure(api_token=self.api_token, api_url = self.api_url)
   p = Popen(self.sqoop_cmd, cwd=self.tmp_dir)
   retCode = p.wait()
   a= os.popen("grep s3_default_db_location /usr/lib/hustler/bin/nodeinfo_src.sh").read()
   print(self.temp_location)
   print(self.get_s3_loc())
   p = Popen(["hadoop", "dfs","-cp", self.temp_location, self.get_s3_loc() + self.temp_location])
   retCode1 = p.wait()
   if retCode != 0 or retCode1 != 0:
     logger.warn("sqoop retCode = " + str(retCode))
     self.__runCleanupScript() 
     self.__runDfsCleanup()
     return(retCode or retCode1)
   else:
     logger.debug("sqoop retCode = " + str(retCode))
   retCode = 1
   if self.cmd_row['test_mode']:
     logger.debug("Not running hive in test mode.")
     retCode = 0
   else:
     logger.info("Running hive script.")
     self.fixHiveQuery()
     q = open(self.tmp_dir+"/hive_query.q").read()
     logger.info("Query is: " + q)
     cmd=HiveCommand.create(query=q, label=self.cluster_label)
     while not Command.is_done(cmd.status):
       time.sleep(5)
       cmd = Command.find(cmd.id)
       logger.info("Hive command id: " + str(cmd.id) + "status: "+ str(cmd.status))
     logger.info(cmd.status)
     if cmd.status ==  "done":
       retCode = 0
   if retCode != 0:
     self.__runCleanupScript() 
           
   self.__runDfsCleanup()
   return(retCode)
Пример #24
0
def main():

    logging.basicConfig(level=logging.INFO)

    if (len(sys.argv) < 3):
        usage()

    if (len(sys.argv) >= 2 and sys.argv[1] == "-h"):
        usage(0)

    api_token = sys.argv[1]
    output_path = sys.argv[2]
        
    Qubole.configure(api_token=api_token)

    args = HadoopCommand.parse(("streaming -files s3n://paid-qubole/HadoopAPIExamples/WordCountPython/mapper.py,s3n://paid-qubole/HadoopAPIExamples/WordCountPython/reducer.py -mapper mapper.py -reducer reducer.py -numReduceTasks 1 -input s3n://paid-qubole/default-datasets/gutenberg -output %s" % output_path).split())

    cmd = HadoopCommand.run(**args)
    
    print("Streaming Job run via command id: %s, finished with status %s" 
          % (cmd.id, cmd.status))
Пример #25
0
def main():
    root = logging.getLogger()
    root.setLevel(logging.INFO)

    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(module)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    root.addHandler(ch)

    # I am using this slightly complicated trick to pass config in the constructor of
    # other packages. Better way to do this ?

    config_parser, argparser = setup_parsers()

    config_args, remaining_argv = config_parser.parse_known_args()
    config = load_config(config_args)

    args = argparser.parse_args(remaining_argv)

    if args.debug:
        ch.setLevel(logging.DEBUG)
        root.setLevel(logging.DEBUG)
        logging.debug("Debug is ON!")
    if args.log_file is not None:
        fh = logging.FileHandler(args.log_file, mode='w')
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(formatter)

        root.setLevel(logging.DEBUG)
        root.addHandler(fh)
    try:
        Qubole.configure(
            api_token=config.get("default", "auth_token"),
            api_url=config.get("default", "api_url"),
            skip_ssl_cert_check=True
        )
        args.func(config, args)
    finally:
        logging.debug("Cleaning up")
Пример #26
0
def main():
    root = logging.getLogger()
    root.setLevel(logging.INFO)

    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(module)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    root.addHandler(ch)

    # I am using this slightly complicated trick to pass config in the constructor of
    # other packages. Better way to do this ?

    config_parser, argparser = setup_parsers()

    config_args, remaining_argv = config_parser.parse_known_args()
    config = load_config(config_args)

    args = argparser.parse_args(remaining_argv)

    if args.debug:
        ch.setLevel(logging.DEBUG)
        root.setLevel(logging.DEBUG)
        logging.debug("Debug is ON!")
    if args.log_file is not None:
        fh = logging.FileHandler(args.log_file, mode='w')
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(formatter)

        root.setLevel(logging.DEBUG)
        root.addHandler(fh)
    try:
        Qubole.configure(
            api_token=config.get("default", "auth_token"),
            api_url=config.get("default", "api_url"),
            skip_ssl_cert_check=True
        )
        args.func(config, args)
    finally:
        logging.debug("Cleaning up")
Пример #27
0
def connect(api_token=None, poll_interval=None):
    # Try setting from environment variables
    if api_token is None:
        api_token = os.getenv('QDS_API_TOKEN')
    if poll_interval is None:
        poll_interval = os.getenv('QDS_POLL_INTERVAL')
    api_url = os.getenv('QDS_API_URL')
    api_version = os.getenv('QDS_API_VERSION')
    # If they aren't set, resort to default values
    if api_url is None:
        api_url = "https://api.qubole.com/api/"
    if api_token is None:
        sys.stderr.write("No API Token provided\n")
    if api_version is None:
        api_version = "v1.2"
    if poll_interval is None:
        poll_interval = 5
    Qubole.configure(api_token=api_token,
                     api_url=api_url,
                     version=api_version,
                     poll_interval=poll_interval,
                     skip_ssl_cert_check=False)
Пример #28
0
def connect(api_token=None, poll_interval=None):
    # Try setting from environment variables
    if api_token is None:
        api_token = os.getenv('QDS_API_TOKEN')
    if poll_interval is None:
        poll_interval = os.getenv('QDS_POLL_INTERVAL')
    api_url = os.getenv('QDS_API_URL')
    api_version = os.getenv('QDS_API_VERSION')
    # If they aren't set, resort to default values
    if api_url is None:
        api_url = "https://api.qubole.com/api/"
    if api_token is None:
        sys.stderr.write("No API Token provided\n")
    if api_version is None:
        api_version = "v1.2"
    if poll_interval is None:
        poll_interval = 5
    Qubole.configure(api_token=api_token,
                     api_url=api_url,
                     version=api_version,
                     poll_interval=poll_interval,
                     skip_ssl_cert_check=False)
Пример #29
0
    def run_query(self, query, user):
        qbol.configure(api_token=self.configuration['token'],
                       api_url='%s/api' % self.configuration['endpoint'])

        try:
            cls = PrestoCommand if(self.configuration['query_type'] == 'presto') else HiveCommand
            cmd = cls.create(query=query, label=self.configuration['cluster'])
            logging.info("Qubole command created with Id: %s and Status: %s", cmd.id, cmd.status)

            while not Command.is_done(cmd.status):
                time.sleep(qbol.poll_interval)
                cmd = Command.find(cmd.id)
                logging.info("Qubole command Id: %s and Status: %s", cmd.id, cmd.status)

            rows = []
            columns = []
            error = None

            if cmd.status == 'done':
                fp = StringIO()
                cmd.get_results(fp=fp, inline=True, delim='\t', fetch=False,
                                qlog=None, arguments=['true'])

                results = fp.getvalue()
                fp.close()

                data = results.split('\r\n')
                columns = self.fetch_columns([(i, TYPE_STRING) for i in data.pop(0).split('\t')])
                rows = [dict(zip((c['name'] for c in columns), row.split('\t'))) for row in data]

            json_data = json_dumps({'columns': columns, 'rows': rows})
        except KeyboardInterrupt:
            logging.info('Sending KILL signal to Qubole Command Id: %s', cmd.id)
            cmd.cancel()
            error = "Query cancelled by user."
            json_data = None

        return json_data, error
Пример #30
0
    def __init__(self, table_name, expected_runtime, dag_id, task_id):
        Qubole.configure(api_token='%s' % os.environ['QUBOLE_API_TOKEN'])
        self.table_name = table_name
        self.expected_runtime = expected_runtime
        self.dag_id = dag_id
        self.task_id = task_id
        self.host = os.environ['DB_HOST']
        self.user = os.environ['DB_USER']
        self.password = os.environ['DB_PASSWORD']
        self.db = os.environ['DB']
        self.s3_bucket = os.environ['S3_BUCKET']

        self.s3_resource = boto3.resource(
            's3',
            region_name='us-west-2',
            aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
            aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])

        self.s3_client = boto3.client(
            's3',
            region_name='us-west-2',
            aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
            aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'])
Пример #31
0
 def __init__(self, db_type, raw_sql, expected_runtime, dag_id, task_id,
              ds):
     Qubole.configure(api_token='%s' % os.environ['QUBOLE_API_TOKEN'])
     if db_type.upper() == 'PRESTO_CSV':
         self.label = 'presto_no_compression'
     elif db_type.upper() == 'PROD_PRESTO':
         self.label = 'Prod-Presto'
     elif db_type.upper() == 'DEV_PRESTO':
         self.label = 'Dev-Presto'
     elif db_type.upper() == 'HIVE':
         self.label = 'default'
     else:
         msg = 'Need to specify correct query type: presto_csv or presto_orc'
         raise Exception(msg)
     self.raw_sql = raw_sql
     self.db_type = db_type
     if expected_runtime == 0:
         self.expected_runtime = 7200  # 2 hour default
     else:
         self.expected_runtime = expected_runtime
     self.dag_id = dag_id
     self.task_id = task_id
     self.ds = ds
Пример #32
0
def main():

    logging.basicConfig(level=logging.INFO)

    if (len(sys.argv) < 3):
        usage()

    if (len(sys.argv) >= 2 and sys.argv[1] == "-h"):
        usage(0)

    api_token = sys.argv[1]
    output_path = sys.argv[2]

    Qubole.configure(api_token=api_token)

    args = HadoopCommand.parse((
        "streaming -files s3n://paid-qubole/HadoopAPIExamples/WordCountPython/mapper.py,s3n://paid-qubole/HadoopAPIExamples/WordCountPython/reducer.py -mapper mapper.py -reducer reducer.py -numReduceTasks 1 -input s3n://paid-qubole/default-datasets/gutenberg -output %s"
        % output_path).split())

    cmd = HadoopCommand.run(**args)

    print("Streaming Job run via command id: %s, finished with status %s" %
          (cmd.id, cmd.status))
Пример #33
0
def main():

    optparser = OptionParser(usage=usage_str)
    optparser.add_option("--token", dest="api_token",
                         default=os.getenv('QDS_API_TOKEN'),
                         help="api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN")

    optparser.add_option("--url", dest="api_url",
                         default=os.getenv('QDS_API_URL'),
                         help="base url for QDS REST API. defaults to https://api.qubole.com/api ")

    optparser.add_option("--version", dest="api_version",
                         default=os.getenv('QDS_API_VERSION'),
                         help="version of REST API to access. defaults to v1.2")

    optparser.add_option("--poll_interval", dest="poll_interval",
                         type=int,
                         default=os.getenv('QDS_POLL_INTERVAL'),
                         help="interval for polling API for completion and other events. defaults to 5s")

    optparser.add_option("--skip_ssl_cert_check", dest="skip_ssl_cert_check", action="store_true",
                         default=False,
                         help="skip verification of server SSL certificate. Insecure: use with caution.")

    optparser.add_option("-v", dest="verbose", action="store_true",
                         default=False,
                         help="verbose mode - info level logging")

    optparser.add_option("--vv", dest="chatty", action="store_true",
                         default=False,
                         help="very verbose mode - debug level logging")

    optparser.disable_interspersed_args()
    (options, args) = optparser.parse_args()

    if options.chatty:
        logging.basicConfig(level=logging.DEBUG)
    elif options.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARN)

    if options.api_token is None:
        sys.stderr.write("No API Token provided\n")
        usage(optparser)

    if options.api_url is None:
        options.api_url = "https://api.qubole.com/api/"

    if options.api_version is None:
        options.api_version = "v1.2"

    if options.poll_interval is None:
        options.poll_interval = 5

    if options.skip_ssl_cert_check is None:
        options.skip_ssl_cert_check = False
    elif options.skip_ssl_cert_check:
        log.warn("Insecure mode enabled: skipping SSL cert verification\n")

    Qubole.configure(api_token=options.api_token,
                     api_url=options.api_url,
                     version=options.api_version,
                     poll_interval=options.poll_interval,
                     skip_ssl_cert_check=options.skip_ssl_cert_check)

    if len(args) < 1:
        sys.stderr.write("Missing first argument containing subcommand\n")
        usage(optparser)

    a0 = args.pop(0)
    if a0 in CommandClasses:
        return cmdmain(a0, args)

    if a0 == "account":
        return accountmain(args)

    if a0 == "cluster":
        api_version_number = float(options.api_version[1:])
        return clustermain(args, api_version_number)

    if a0 == "action":
        return actionmain(args)

    if a0 == "scheduler":
        return schedulermain(args)

    if a0 == "report":
        return reportmain(args)

    if a0 == "dbtap":
        return dbtapmain(args)

    if a0 == "group":
        return groupmain(args)

    if a0 == "role":
        return rolemain(args)

    if a0 == "app":
        return appmain(args)

    cmdset = set(CommandClasses.keys())
    sys.stderr.write("First command must be one of <%s>\n" %
                     "|".join(cmdset.union(["cluster", "action", "scheduler", "report",
                       "dbtap", "role", "group", "app", "account"])))

    usage(optparser)
Пример #34
0
 def backoff_poll_interval(self, multiple=2):
     QDS.configure(QDS.api_token, poll_interval=QDS.poll_interval * multiple)
Пример #35
0
    return Response()


@app.route('/run_scaling', methods=['POST'])
@login_required
def run_scaling():
    for _ in range(10):
        run_hive_query_asynchronous(
            cluster_label=config['hadoop_cluster_name'],
            query_filename='top_10_revenue_generating_products.sql',
            qubole_database_name=config['qubole_database_name'])
    return Response()


def parse_command_line_args():
    parser = argparse.ArgumentParser(description='Quick start App')
    parser.add_argument('--config', required=True, help='Configuration')
    parser.add_argument('--extra-config',
                        help='Configuration of clusters and notebooks')
    return parser.parse_args()


if __name__ == "__main__":
    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    args = parse_command_line_args()
    config = read_config(args.config)
    app.secret_key = os.urandom(47)
    app.config.update(config)
    Qubole.configure(api_token=config['qubole_api_token'])
    app.run(host='0.0.0.0', port=int(config['port']), threaded=True)
Пример #36
0
def main():

    optparser = OptionParser(usage=usage_str)
    optparser.add_option("--token", dest="api_token", 
                         default=os.getenv('QDS_API_TOKEN'),
                         help="api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN")

    optparser.add_option("--url", dest="api_url", 
                         default=os.getenv('QDS_API_URL'),
                         help="base url for QDS REST API. defaults to https://api.qubole.com/api ")

    optparser.add_option("--version", dest="api_version", 
                         default=os.getenv('QDS_API_VERSION'),
                         help="version of REST API to access. defaults to v1.2")

    optparser.add_option("--poll_interval", dest="poll_interval", 
                         default=os.getenv('QDS_POLL_INTERVAL'),
                         help="interval for polling API for completion and other events. defaults to 5s")

    optparser.add_option("--skip_ssl_cert_check", dest="skip_ssl_cert_check", action="store_true",
                         default=False,
                         help="skip verification of server SSL certificate. Insecure: use with caution.")

    optparser.add_option("-v", dest="verbose", action="store_true",
                         default=False,
                         help="verbose mode - info level logging")

    optparser.add_option("--vv", dest="chatty", action="store_true",
                         default=False,
                         help="very verbose mode - debug level logging")


    optparser.disable_interspersed_args()
    (options, args) = optparser.parse_args()

    if options.chatty:
        logging.basicConfig(level=logging.DEBUG)
    elif options.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARN)
        pass

    if options.api_token is None:
        sys.stderr.write("No API Token provided\n")
        usage(optparser)

    if options.api_url is None:
        options.api_url = "https://api.qubole.com/api/";

    if options.api_version is None:
        options.api_version = "v1.2";

    if options.poll_interval is None:
        options.poll_interval = 5;

    if options.skip_ssl_cert_check is None:
        options.skip_ssl_cert_check = False
    elif options.skip_ssl_cert_check:
        sys.stderr.write("[WARN] Insecure mode enabled: skipping SSL cert verification\n")
        
    Qubole.configure(api_token=options.api_token,
                     api_url=options.api_url,
                     version=options.api_version,
                     poll_interval=options.poll_interval,
                     skip_ssl_cert_check=options.skip_ssl_cert_check)
                     

    if len(args) < 1:
        sys.stderr.write("Missing first argument containing command type\n")
        usage(optparser)

    cmdsuffix = "cmd"
    cmdset = set([x + cmdsuffix for x in ["hive", "pig", "hadoop", "shell", "dbexport", "presto"]])


    a0 = args.pop(0)

    if (a0 in cmdset):
        return cmdmain(a0[:a0.find(cmdsuffix)], args)

    if (a0 == "hadoop_cluster"):
        return clustermain(a0, args)

    sys.stderr.write("First command must be one of <%s>\n" % 
                     "|".join(cmdset.union(["hadoop_cluster"])))
    usage(optparser)
Пример #37
0
from qds_sdk.qubole import Qubole

Qubole.configure(api_token='BjnuEktAoiyFiHZCuLSZxdUFFsrzn15h3Hj9an3xjACBkwqiYCNJcNYygsaLvAFg',api_url="http://localhost:3000/api/")

from qds_sdk.commands import *

hc=HiveCommand.create(query='show tables',retry='4')
print "Id: %s, Status: %s" % (str(hc.id), hc.status)
Пример #38
0
def main():
    optparser = OptionParser(usage=usage_str)
    optparser.add_option(
        "--token",
        dest="api_token",
        default=os.getenv('QDS_API_TOKEN'),
        help=
        "api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN"
    )

    optparser.add_option(
        "--url",
        dest="api_url",
        default=os.getenv('QDS_API_URL'),
        help=
        "base url for QDS REST API. defaults to https://api.qubole.com/api ")

    optparser.add_option(
        "--version",
        dest="api_version",
        default=os.getenv('QDS_API_VERSION'),
        help="version of REST API to access. defaults to v1.2")

    optparser.add_option(
        "--poll_interval",
        dest="poll_interval",
        type=int,
        default=os.getenv('QDS_POLL_INTERVAL'),
        help=
        "interval for polling API for completion and other events. defaults to 5s"
    )

    optparser.add_option(
        "--skip_ssl_cert_check",
        dest="skip_ssl_cert_check",
        action="store_true",
        default=False,
        help=
        "skip verification of server SSL certificate. Insecure: use with caution."
    )

    optparser.add_option("-v",
                         dest="verbose",
                         action="store_true",
                         default=False,
                         help="verbose mode - info level logging")

    optparser.add_option("--vv",
                         dest="chatty",
                         action="store_true",
                         default=False,
                         help="very verbose mode - debug level logging")

    optparser.disable_interspersed_args()
    (options, args) = optparser.parse_args()

    if options.chatty:
        logging.basicConfig(level=logging.DEBUG)
    elif options.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARN)

    if options.api_token is None:
        sys.stderr.write("No API Token provided\n")
        usage(optparser)

    if options.api_url is None:
        options.api_url = "https://api.qubole.com/api/"

    if options.api_version is None:
        options.api_version = "v1.2"

    if options.poll_interval is None:
        options.poll_interval = 5

    if options.skip_ssl_cert_check is None:
        options.skip_ssl_cert_check = False
    elif options.skip_ssl_cert_check:
        log.warn("Insecure mode enabled: skipping SSL cert verification\n")

    Qubole.configure(api_token=options.api_token,
                     api_url=options.api_url,
                     version=options.api_version,
                     poll_interval=options.poll_interval,
                     skip_ssl_cert_check=options.skip_ssl_cert_check)

    if len(args) < 1:
        sys.stderr.write("Missing first argument containing subcommand\n")
        usage(optparser)

    a0 = args.pop(0)
    if a0 in CommandClasses:
        return cmdmain(a0, args)

    if a0 == "account":
        return accountmain(args)

    if a0 == "cluster":
        api_version_number = float(options.api_version[1:])
        return clustermain(args, api_version_number)

    if a0 == "action":
        return actionmain(args)

    if a0 == "scheduler":
        return schedulermain(args)

    if a0 == "report":
        return reportmain(args)

    if a0 == "dbtap":
        return dbtapmain(args)

    if a0 == "group":
        return groupmain(args)

    if a0 == "role":
        return rolemain(args)

    if a0 == "app":
        return appmain(args)

    if a0 == "nezha":
        return nezhamain(args)

    if a0 == "user":
        return usermain(args)
    if a0 == "template":
        return templatemain(args)

    cmdset = set(CommandClasses.keys())
    sys.stderr.write("First command must be one of <%s>\n" % "|".join(
        cmdset.union([
            "cluster", "action", "scheduler", "report", "dbtap", "role",
            "group", "app", "account", "nezha", "user", "template"
        ])))
    usage(optparser)
Пример #39
0
def main():

    optparser = OptionParser(usage=usage_str)
    optparser.add_option("--token", dest="api_token", 
                         default=os.getenv('QDS_API_TOKEN'),
                         help="api token for accessing Qubole. must be specified via command line or passed in via environment variable QDS_API_TOKEN")

    optparser.add_option("--url", dest="api_url", 
                         default=os.getenv('QDS_API_URL'),
                         help="base url for QDS REST API. defaults to https://api.qubole.com/api ")

    optparser.add_option("--version", dest="api_version", 
                         default=os.getenv('QDS_API_VERSION'),
                         help="version of REST API to access. defaults to v1.2")

    optparser.add_option("--poll_interval", dest="poll_interval", 
                         default=os.getenv('QDS_POLL_INTERVAL'),
                         help="interval for polling API for completion and other events. defaults to 5s")

    optparser.add_option("-v", dest="verbose", action="store_true",
                         default=False,
                         help="verbose mode - info level logging")

    optparser.add_option("--vv", dest="chatty", action="store_true",
                         default=False,
                         help="very verbose mode - debug level logging")


    optparser.disable_interspersed_args()
    (options, args) = optparser.parse_args()

    if options.chatty:
        logging.basicConfig(level=logging.DEBUG)
    elif options.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARN)
        pass

    if options.api_token is None:
        sys.stderr.write("No API Token provided\n")
        usage(optparser)

    if options.api_url is None:
        options.api_url = "https://api.qubole.com/api/";

    if options.api_version is None:
        options.api_version = "v1.2";

    if options.poll_interval is None:
        options.poll_interval = 5;

        
    Qubole.configure(api_token=options.api_token,
                     api_url=options.api_url,
                     version=options.api_version,
                     poll_interval=options.poll_interval)
                     

    if len(args) < 1:
        sys.stderr.write("Missing first argument containing command type\n")
        usage()

    cmdset = set(["hive", "pig", "hadoop", "shell"])
    cmdsuffix = "cmd"

    cmd = args.pop(0)

    if ((cmd.find(cmdsuffix) != len(cmd)-3) or
        (cmd[:cmd.find(cmdsuffix)] not in cmdset)):
        sys.stderr.write("First command must be one of <%s>\n" % "|".join(cmdset))
        usage()
        
    return cmdmain(cmd[:cmd.find(cmdsuffix)], args)
def qb_configure(api_token, api_url):
    return Qubole.configure(api_token=api_token, api_url=api_url)
Пример #41
0
 def __init__(self, name, context, **kwargs):
     super(QuboleCluster, self).__init__(name, context, kwargs=kwargs)
     self._filesystem = S3Filesystem(self.logger, context, **kwargs)
     Qubole.configure(api_token=context.settings['qds_api_token'])
Пример #42
0
def hivecommand_from_r(query = None, poll_interval = None , sample_size = None, macros = None , tags = None, cluster_label = None, notify = None, name = None, api_token = None ): 
    api_url = os.getenv('QDS_API_URL')
    api_version = os.getenv('QDS_API_VERSION')
    if poll_interval is None:
        poll_interval = os.getenv('QDS_POLL_INTERVAL')
    
    if api_token is None:
        api_token = os.getenv('QDS_API_TOKEN') 
    
    chatty = False
    verbose = False
    
    skip_ssl_cert_check = None
    api_url = None
    api_version = None

    queryString = ""
    #reconstruct the queryString for to be parsed by hivecommand.parse function
    if query is not None:
        queryString += " --query '%s' "%str(query)
    
    if macros is not None:
        queryString += " --macros '%s' "%str(macros)
    
    if tags is not None:
        queryString += " --tags '%s' "%str(tags)
    
    if sample_size is not None:
        queryString += " --sample_size '%s' "%str(sample_size)
    
    if cluster_label is not None:
        queryString += " --cluster-label '%s' "%str(cluster_label)
    
    if notify is not None:
        queryString += " --notify '%s' "%str(notify)
    
    if name is not None:
        queryString += " --name '%s' "%str(name)

    if chatty:
        logging.basicConfig(level=logging.DEBUG)
    elif verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARN)
    
    if api_token is None:
        sys.stderr.write("No API Token provided\n")
    
    if api_url is None:
        api_url = "https://api.qubole.com/api/"
    
    if api_version is None:
        api_version = "v1.2"
    
    if poll_interval is None:
        poll_interval = 5
    
    if skip_ssl_cert_check is None:
        skip_ssl_cert_check = False
    elif skip_ssl_cert_check:
        log.warn("Insecure mode enabled: skipping SSL cert verification\n")
     
    Qubole.configure(api_token=api_token,
            api_url=api_url,
            version=api_version,
            poll_interval=poll_interval,
            skip_ssl_cert_check=skip_ssl_cert_check)


    try:
        return(hivecommand(queryString))
    except qds_sdk.exception.Error as e:
        sys.stderr.write("Error: Status code %s (%s) from url %s\n" %
                         (e.request.status_code, e.__class__.__name__,
                          e.request.url))
        
    except qds_sdk.exception.ConfigError as e:
        sys.stderr.write("Configuration error: %s\n" % str(e))
        
    except qds_sdk.exception.ParseError as e:
        sys.stderr.write("Error: %s\n" % str(e))
        
        
    except Exception:
        traceback.print_exc(file=sys.stderr)
Пример #43
0
 def __init__(self):
     signal.signal(signal.SIGINT, self.exit)
     # For non api.qubole env, set the env in configure()
     Qubole.configure(api_token=self.API_TOKEN)
Пример #44
0
 def set_token(self, api_token):
     QDS.configure(api_token=api_token)