def list(cls, label=None, cluster_id=None, state=None): """ List existing clusters present in your account. Kwargs: `state`: list only those clusters which are in this state Returns: List of clusters satisfying the given criteria """ if cluster_id is not None: return cls.show(cluster_id) if label is not None: return cls.show(label) conn = Qubole.agent(version="v2") cluster_list = conn.get(cls.rest_entity_path) if state is None: # return the complete list since state is None return conn.get(cls.rest_entity_path) # filter clusters based on state result = [] if 'clusters' in cluster_list: for cluster in cluster_list['clusters']: if state.lower() == cluster['state'].lower(): result.append(cluster) return result
def terminate(cls, cluster_id_label): """ Terminate the cluster with id/label `cluster_id_label`. """ conn = Qubole.agent() data = {"state": "terminate"} return conn.put(cls.element_path(cluster_id_label) + "/state", data)
def update(cls, cluster_id_label, cluster_info): """ Update the cluster with id/label `cluster_id_label` using information provided in `cluster_info`. """ conn = Qubole.agent(version="v2") return conn.put(cls.element_path(cluster_id_label), data=cluster_info)
def add_node(cls, cluster_id_label, parameters=None): """ Add a node to an existing cluster """ conn = Qubole.agent() parameters = {} if not parameters else parameters return conn.post(cls.element_path(cluster_id_label) + "/nodes", data={"parameters" : parameters})
def find_by_name(name): conn = Qubole.agent() if name is not None: schedjson = conn.get(Scheduler.rest_entity_path, params={"name":name}) if schedjson["schedules"]: return Scheduler(schedjson["schedules"][0]) return None
def index(cls): """ Shows a list of all available reports by issuing a GET request to the /reports endpoint. """ conn = Qubole.agent() return conn.get(cls.rest_entity_path)
def get_results(self, fp=sys.stdout, inline=True, delim=None): """ Fetches the result for the command represented by this object Args: `fp`: a file object to write the results to directly """ result_path = self.meta_data['results_resource'] conn = Qubole.agent() r = conn.get(result_path, {'inline': inline}) if r.get('inline'): if sys.version_info < (3, 0, 0): fp.write(r['results'].encode('utf8')) else: import io if isinstance(fp, io.TextIOBase): fp.buffer.write(r['results'].encode('utf8')) elif isinstance(fp, io.BufferedIOBase) or isinstance(fp, io.RawIOBase): fp.write(r['results'].encode('utf8')) else: # Can this happen? Don't know what's the right thing to do in this case. pass else: acc = Account.find() boto_conn = boto.connect_s3(aws_access_key_id=acc.storage_access_key, aws_secret_access_key=acc.storage_secret_key) log.info("Starting download from result locations: [%s]" % ",".join(r['result_location'])) #fetch latest value of num_result_dir num_result_dir = Command.find(self.id).num_result_dir for s3_path in r['result_location']: # In Python 3, in this case, `fp` should always be binary mode. _download_to_local(boto_conn, s3_path, fp, num_result_dir, delim=delim)
def start(cls, cluster_id_label): """ Start the cluster with id/label `cluster_id_label`. """ conn = Qubole.agent() data = {"state": "start"} return conn.put(cls.element_path(cluster_id_label) + "/state", data)
def find(cls, name="default", **kwargs): if (name is None) or (name == "default"): conn = Qubole.agent() return cls(conn.get(cls.rest_entity_path)) else: raise ParseError("Bad name %s" % name, "Hadoop Clusters can only be named 'default' currently")
def clone(cls, cluster_id_label, cluster_info): """ Update the cluster with id/label `cluster_id_label` using information provided in `cluster_info`. """ conn = Qubole.agent() return conn.post(cls.element_path(cluster_id_label) + '/clone', data=cluster_info)
def check(cls, data): """ Method to call the sensors api with json payload :param data: valid json object :return: True or False """ conn = Qubole.agent() return conn.post(cls.rest_entity_path, data=data)['status']
def update_node(cls, cluster_id_label, command, private_dns, parameters=None): """ Add a node to an existing cluster """ conn = Qubole.agent() parameters = {} if not parameters else parameters data = {"command" : command, "private_dns" : private_dns, "parameters" : parameters} return conn.put(cls.element_path(cluster_id_label) + "/nodes", data)
def get_log_id(cls, id): """ Fetches log for the command represented by this id Args: `id`: command id """ conn = Qubole.agent() r = conn.get_raw(cls.element_path(id) + "/logs") return r.text
def cancel_id(cls, id): """ Cancels command denoted by this id Args: `id`: command id """ conn = Qubole.agent() data = {"status": "kill"} return conn.put(cls.element_path(id), data)
def snapshot(cls, cluster_id_label, s3_location, backup_type): """ Create hbase snapshot full/incremental """ conn = Qubole.agent() parameters = {} parameters['s3_location'] = s3_location if backup_type: parameters['backup_type'] = backup_type return conn.post(cls.element_path(cluster_id_label) + "/snapshots", data=parameters)
def createTemplate(data): """ Create a new template. Args: `data`: json data required for creating a template Returns: Dictionary containing the details of the template with its ID. """ conn = Qubole.agent() return conn.post(Template.rest_entity_path, data)
def get_log(self): """ Fetches log for the command represented by this object Returns: The log as a string """ log_path = self.meta_data['logs_resource'] conn = Qubole.agent() r = conn.get_raw(log_path) return r.text
def restore_point(cls, cluster_id_label, s3_location, backup_id, table_names, overwrite=True, automatic=True): """ Restoring cluster from a given hbase snapshot id """ conn = Qubole.agent() parameters = {} parameters['s3_location'] = s3_location parameters['backup_id'] = backup_id parameters['table_names'] = table_names parameters['overwrite'] = overwrite parameters['automatic'] = automatic return conn.post(cls.element_path(cluster_id_label) + "/restore_point", data=parameters)
def editTemplate(id, data): """ Edit an existing template. Args: `id`: ID of the template to edit `data`: json data to be updated Returns: Dictionary containing the updated details of the template. """ conn = Qubole.agent() return conn.put(Template.element_path(id), data)
def viewTemplate(id): """ View an existing Template details. Args: `id`: ID of the template to fetch Returns: Dictionary containing the details of the template. """ conn = Qubole.agent() return conn.get(Template.element_path(id))
def get_results(self, fp=sys.stdout, inline=True, delim=None, fetch=True): """ Fetches the result for the command represented by this object get_results will retrieve results of the command and write to stdout by default. Optionally one can write to a filestream specified in `fp`. The `inline` argument decides whether the result can be returned as a CRLF separated string. In cases where the results are greater than 20MB, get_results will attempt to read from s3 and write to fp. The retrieval of results from s3 can be turned off by the `fetch` argument Args: `fp`: a file object to write the results to directly `inline`: whether or not results are returned inline as CRLF separated string `fetch`: True to fetch the result even if it is greater than 20MB, False to only get the result location on s3 """ result_path = self.meta_data["results_resource"] conn = Qubole.agent() r = conn.get(result_path, {"inline": inline}) if r.get("inline"): if sys.version_info < (3, 0, 0): fp.write(r["results"].encode("utf8")) else: import io if isinstance(fp, io.TextIOBase): fp.buffer.write(r["results"].encode("utf8")) elif isinstance(fp, io.BufferedIOBase) or isinstance(fp, io.RawIOBase): fp.write(r["results"].encode("utf8")) else: # Can this happen? Don't know what's the right thing to do in this case. pass else: if fetch: acc = Account.find() boto_conn = boto.connect_s3( aws_access_key_id=acc.storage_access_key, aws_secret_access_key=acc.storage_secret_key ) log.info("Starting download from result locations: [%s]" % ",".join(r["result_location"])) # fetch latest value of num_result_dir num_result_dir = Command.find(self.id).num_result_dir for s3_path in r["result_location"]: # In Python 3, # If the delim is None, fp should be in binary mode because # boto expects it to be. # If the delim is not None, then both text and binary modes # work. _download_to_local(boto_conn, s3_path, fp, num_result_dir, delim=delim) else: fp.write(",".join(r["result_location"]))
def show(cls, report_name, data): """ Shows a report by issuing a GET request to the /reports/report_name endpoint. Args: `report_name`: the name of the report to show `data`: the parameters for the report """ conn = Qubole.agent() return conn.get(cls.element_path(report_name), data)
def submitTemplate(id, data={}): """ Submit an existing Template. Args: `id`: ID of the template to submit `data`: json data containing the input_vars Returns: Dictionary containing Command Object details. """ conn = Qubole.agent() path = str(id) + "/run" return conn.post(Template.element_path(path), data)
def cloneTemplate(id, data={}): """ Clone an existing template. Args: `id`: ID of the template to be cloned `data`: json data to override Returns: Dictionary containing the updated details of the template. """ conn = Qubole.agent() path = str(id) + "/duplicate" return conn.post(Template.element_path(path), data)
def get_jobs_id(cls, id): """ Fetches information about the hadoop jobs which were started by this command id. This information is only available for commands which have completed (i.e. Status = 'done', 'cancelled' or 'error'.) Also, the cluster which ran this command should be running for this information to be available. Otherwise only the URL and job_id is shown. Args: `id`: command id """ conn = Qubole.agent() r = conn.get_raw(cls.element_path(id) + "/jobs") return r.text
def create(cls, name, config=None, kind="spark"): """ Create a new app. Args: `name`: the name of the app `config`: a dictionary of key-value pairs `kind`: kind of the app (default=spark) """ conn = Qubole.agent() return conn.post(cls.rest_entity_path, data={'name': name, 'config': config, 'kind': kind})
def list(page = None, per_page = None): conn = Qubole.agent() url_path = Action.rest_entity_path params = {} if page is not None: params['page'] = page if per_page is not None: params['per_page'] = per_page #Todo Page numbers are thrown away right now actjson = conn.get(url_path, params) actlist = [] for a in actjson["actions"]: actlist.append(Action(a)) return actlist
def reassign_label(cls, destination_cluster, label): """ Reassign a label from one cluster to another. Args: `destination_cluster`: id/label of the cluster to move the label to `label`: label to be moved from the source cluster """ conn = Qubole.agent() data = { "destination_cluster": destination_cluster, "label": label } return conn.put(cls.rest_entity_path + "/reassign-label", data)
def update_snapshot_schedule(cls, cluster_id_label, s3_location=None, frequency_unit=None, frequency_num=None, status=None): """ Update for snapshot schedule """ conn = Qubole.agent() data = {} if s3_location is not None: data["s3_location"] = s3_location if frequency_unit is not None: data["frequency_unit"] = frequency_unit if frequency_num is not None: data["frequency_num"] = frequency_num if status is not None: data["status"] = status return conn.put(cls.element_path(cluster_id_label) + "/snapshot_schedule", data)
def list(page=None, per_page=None): conn = Qubole.agent() url_path = Group.rest_entity_path page_attr = [] if page is not None: page_attr.append("page=%s" % page) if per_page is not None: page_attr.append("per_page=%s" % per_page) if page_attr: url_path = "%s?%s" % (Group.rest_entity_path, "&".join(page_attr)) groupjson = conn.get(url_path) grouplist = [] for s in groupjson["groups"]: grouplist.append(Group(s)) return grouplist
def unassign_role(role_id, qbol_group_id): conn = Qubole.agent() url_path = "groups/%s/roles/%s/unassign" % (qbol_group_id, role_id) return conn.put(url_path)
def list_groups(role_id): conn = Qubole.agent() url_path = "roles/%s/groups" % role_id return conn.get(url_path)
def delete(cls, cluster_id_label): """ Delete the cluster with id/label `cluster_id_label`. """ conn = Qubole.agent() return conn.delete(cls.element_path(cluster_id_label))
def enable_disable(cls, path, data): conn = Qubole.agent() return conn.post(cls.element_path(path), data)
def invite(cls, path, data): conn = Qubole.agent() return conn.post(cls.element_path(path), data)
def add_user(group_id, user_id): conn = Qubole.agent() url_path = "groups/%s/qbol_users/%s/add" % (group_id, user_id) return conn.put(url_path)
def duplicate(group_id, **kwargs): conn = Qubole.agent() url_path = "groups/%s/duplicate" % group_id return conn.post(url_path, data=kwargs)
def delete(role_id): conn = Qubole.agent() url_path = "roles/%s" % role_id return conn.delete(url_path)
def delete(self): conn = Qubole.agent() return conn.delete(self.element_path(self.id))
def show(cls, cluster_id_label): """ Show information about the cluster with id/label `cluster_id_label`. """ conn = Qubole.agent() return conn.get(cls.element_path(cluster_id_label))
def rerun(self, instance_id): conn = Qubole.agent() url_path = self.element_path(id) + "/instances/" + instance_id + "/rerun" return conn.post(url_path)['status']
def status(cls, cluster_id_label): """ Show the status of the cluster with id/label `cluster_id_label`. """ conn = Qubole.agent() return conn.get(cls.element_path(cluster_id_label) + "/state")
def update(group_id, **kwargs): conn = Qubole.agent() url_path = "groups/%s" % group_id return conn.put(url_path, data=kwargs)
def delete(group_id): conn = Qubole.agent() url_path = "groups/%s" % group_id return conn.delete(url_path)
def duplicate(role_id, **kwargs): conn = Qubole.agent() url_path = "roles/%s/duplicate" % role_id return conn.post(url_path, data=kwargs)
def tables(self): conn = Qubole.agent() return conn.get("%s/tables" % self.element_path(self.id))
def kill(self): conn = Qubole.agent() data = {"status": "kill"} return conn.put(self.element_path(self.id), data)
def remove_user(group_id, user_id): conn = Qubole.agent() url_path = "groups/%s/qbol_users/%s/remove" % (group_id, user_id) return conn.put(url_path)
def update(role_id, **kwargs): conn = Qubole.agent() url_path = "roles/%s" % role_id return conn.put(url_path, data=kwargs)
def resume(self): conn = Qubole.agent() data = {"status": "resume"} return conn.put(self.element_path(self.id), data)
def edit(self, **kwargs): conn = Qubole.agent() return DbTap(conn.put(self.element_path(self.id), data=kwargs))
def suspend(self): conn = Qubole.agent() data = {"status": "suspend"} return conn.put(self.element_path(self.id), data)
def list_users(group_id): conn = Qubole.agent() url_path = "groups/%s/qbol_users" % group_id return conn.get(url_path)
def list_roles(group_id): conn = Qubole.agent() url_path = "groups/%s/roles" % group_id return conn.get(url_path)
def create(cls, cluster_info): """ Create a new cluster using information provided in `cluster_info`. """ conn = Qubole.agent() return conn.post(cls.rest_entity_path, data=cluster_info)
def create(cls, pipeline_name, create_type, **kwargs): """ Create a pipeline object by issuing a POST request to the /pipeline?mode=wizard endpoint Note - this creates pipeline in draft mode Args: pipeline_name: Name to be given. create_type: 1->Assisted, 2->Jar, 3->Code **kwargs: keyword arguments specific to create type Returns: response """ conn = Qubole.agent() url = Pipelines.rest_entity_path if create_type is None: raise ParseError("Provide create_type for Pipeline.", None) if not kwargs or create_type == 1: data = { "data": { "attributes": { "name": pipeline_name, "status": "DRAFT", "create_type": create_type }, "type": "pipeline" } } url = url + "?mode=wizard" else: data = { "data": { "type": "pipeline", "attributes": { "name": pipeline_name, "create_type": create_type, "properties": { "cluster_label": kwargs.get('cluster_label'), "can_retry": kwargs.get('can_retry'), "command_line_options": kwargs.get('command_line_options'), "user_arguments": kwargs.get('user_arguments') } }, "relationships": { "alerts": { "data": { "type": "pipeline/alerts", "attributes": { "can_notify": kwargs.get('can_notify'), "notification_channels": kwargs.get('channel_ids') } } } } } } if create_type == 2: data['data']['attributes']['properties']['jar_path'] = \ kwargs.get('jar_path') data['data']['attributes']['properties']['main_class_name'] = \ kwargs.get('main_class_name') elif create_type == 3: data['data']['attributes']['properties']['code'] = \ kwargs.get('code') data['data']['attributes']['properties']['language'] = \ kwargs.get('language') response = conn.post(url, data) cls.pipeline_id = Pipelines.get_pipline_id(response) cls.pipeline_name = pipeline_name return response