def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do command = link.get_command() try: from dataiku.doctor import commands COMMANDS = { command_name: json_api(command_method) for (command_name, command_method) in commands._list_commands() } task = command["command"] arg = command.get("arg", "") logging.info("Running analysis command: %s" % task) if task not in COMMANDS: raise ValueError("Command %s is unknown." % task) else: api_command = COMMANDS[task] ret = api_command(arg) link.send_json(ret) # send end of stream link.send_string('') except: link.send_string('') # mark failure traceback.print_exc() link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do command = link.get_command() try: # logging.info("Got %s" % json.dumps(command)) config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] ctx = {"config": config, "plugin_config": plugin_config} exec(code, ctx, ctx) functions = [o for o in ctx.values() if inspect.isfunction(o)] f = functions[0] if len(functions) == 1 else ctx.get('process', None) if f is None: raise Exception('No function "process" defined') object_type = command['objectType'] full_name = command['fullName'] partition_id = command.get('partitionId', None) if object_type == "DATASET": obj_arg = Dataset(full_name) elif object_type == "SAVED_MODEL": obj_arg = Model(full_name) elif object_type == "MANAGED_FOLDER": obj_arg = Folder(full_name) # work and get output if command['command'] == 'compute': result = compute_metric(obj_arg, partition_id, f) elif command['command'] == 'check': last_values = command.get("lastValues", {}) result = run_check(obj_arg, partition_id, last_values, f) else: raise Exception("Unknown command") if result is None: raise Exception("Code did not return a result") link.send_json(result) except: traceback.print_exc() link.send_string('') # send null to mark failure link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do command = link.get_command() try: config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] # get the exporter object clazz = get_clazz_in_code(code, Exporter) arg_count = len(inspect.getargspec(clazz.__init__).args) exporter = None if arg_count == 1: exporter = clazz() elif arg_count == 2: exporter = clazz(config) elif arg_count == 3: exporter = clazz(config, plugin_config) else: raise Exception( "Wrong signature of the Exporter subclass: %i args" % arg_count) # get task and dispatch work to exporter task = command["task"] if task == "export": # schema is mandatory with link.get_input() as input: row_count = export_rows( exporter, command["exportBehavior"], command["schema"], input, command.get("destinationFilePath", None)) else: raise Exception("Unexpected task %s" % task) # send ack link.send_json({'ok': True, 'count': row_count}) except: link.send_string('') # mark failure traceback.print_exc() link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() try: # get work to do command = link.get_command() config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] root = command["root"] path = command["path"] limit = command["limit"] # get the fs provider object clazz = get_clazz_in_code(code, FSProvider) arg_count = len(inspect.getargspec(clazz.__init__).args) fsprovider = None if arg_count == 1: fsprovider = clazz() elif arg_count == 2: fsprovider = clazz(root) elif arg_count == 3: fsprovider = clazz(root, config) elif arg_count == 4: fsprovider = clazz(root, config, plugin_config) else: reason = "Wrong signature of the FSProvider subclass: %i args" % arg_count raise Exception(reason) with link.get_output() as output: fsprovider.read(path, output, limit) # send end of stream link.send_string('') # send ack link.send_json({'ok':True}) except: link.send_string('') # end stream to send ack traceback.print_exc() error = get_json_friendly_error() link.send_json({'ok':False, 'error':error}) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do try: command = link.get_command() config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] root = command["root"] # get the fs provider object clazz = get_clazz_in_code(code, FSProvider) arg_count = len(inspect.getargspec(clazz.__init__).args) fsprovider = None if arg_count == 1: fsprovider = clazz() elif arg_count == 2: fsprovider = clazz(root) elif arg_count == 3: fsprovider = clazz(root, config) elif arg_count == 4: fsprovider = clazz(root, config, plugin_config) else: reason = "Wrong signature of the FSProvider subclass: %i args" % arg_count link.send_json({'ok': False, 'reason': reason}) raise Exception(reason) link.send_json({'ok': True}) # loop and process commands closed = False while not closed: request = link.read_json() if request is None: break closed, response = handle_request(request, fsprovider, closed) link.send_json(response) # send end of stream link.send_string('') except: link.send_string('') # mark failure traceback.print_exc() link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do try: # retrieve the initialization info and initiate serving command = link.get_command() function_name = command.get('functionName') code_file = command.get('codeFilePath') data_folders = command.get('resourceFolderPaths', []) loaded_function = LoadedFunction(code_file, function_name, data_folders) logging.info("Predictor ready") link.send_json({"ok": True}) stored_exception = None # loop and process commands while True: request = link.read_json() if request is None: break used_api_key = request.get("usedAPIKey", None) if used_api_key is not None: os.environ["DKU_CURRENT_REQUEST_USED_API_KEY"] = used_api_key before = time.time() response = loaded_function.predict(request["params"]) after = time.time() link.send_json({ 'ok': True, 'resp': response, 'execTimeUS': int(1000000 * (after - before)) }) if used_api_key is not None: del os.environ["DKU_CURRENT_REQUEST_USED_API_KEY"] # send end of stream logging.info("Work done") link.send_string('') except: logging.exception("Function user code failed") link.send_string('') # send null to mark failure link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do try: # retrieve the initialization info and initiate serving command = link.get_command() model_type = command.get('modelType') code_file = command.get('codeFilePath') data_folder = command.get('resourceFolderPath', None) loaded_model = LoadedModel(model_type, code_file, data_folder) logging.info("Predictor ready") link.send_json({"ok":True}) stored_exception = None # loop and process commands while True: request = link.read_json() if request is None: break used_api_key = request.get("usedAPIKey", None) if used_api_key is not None: os.environ["DKU_CURRENT_REQUEST_USED_API_KEY"] = used_api_key before = time.time() response = handle_predict(loaded_model, request["obj"]) after = time.time() response["execTimeUS"] = int(1000000 * (after-before)) link.send_json(response) if used_api_key is not None: del os.environ["DKU_CURRENT_REQUEST_USED_API_KEY"] # send end of stream logging.info("Work done") link.send_string('') except: ex_type, ex, tb = sys.exc_info() traceback.print_exc() link.send_string('') # send null to mark failure link.send_json({'errorType': str(ex_type), 'message':str(ex), 'traceback':traceback.extract_tb(tb)}) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do try: # retrieve the initialization info and initiate serving command = link.get_command() model_folder = command.get('modelFolder') try: conditional_outputs = load_from_filepath(osp.join(model_folder, "conditional_outputs.json")) except Exception as e: logging.exception("Can't load conditional outputs: " + str(e)) conditional_outputs = [] predictor = build_predictor_for_saved_model(model_folder, "PREDICTION", conditional_outputs) logging.info("Predictor ready") link.send_json({"ok":True}) stored_exception = None # loop and process commands while True: request = link.read_json() if request is None: break before = time.time() response = handle_predict(predictor, request) after = time.time() response["execTimeUS"] = int(1000000 * (after-before)) link.send_json(response) # send end of stream logging.info("Work done") link.send_string('') except: logging.exception("Prediction user code failed") link.send_string('') # send null to mark failure link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): logging.info("Starting Webapp backend") # Connect link = JavaLink(port, secret) link.connect() logging.info("Webapp backend connected to DSS") # get work to do command = link.get_command() try: logging.info("Starting backend for web app: %s.%s" % (command["projectKey"], command["webAppId"])) @app.route('/__ping') def ping(): return "pong" # Execute user's code exec(command["code"], globals(), globals()) # in globals so that flask can find them # Start the server from werkzeug.serving import make_server srv = make_server("127.0.0.1", 0, app) myport = srv.server_port link.send_json({'type': 'STARTED', "port": myport}) srv.serve_forever() except: logging.exception("Backend main loop failed") link.send_json({'type': 'ERROR'}) link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() command_is_streaming = None # get work to do command = link.get_command() try: config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] # get the formatter object clazz = get_clazz_in_code(code, Formatter) arg_count = len(inspect.getargspec(clazz.__init__).args) formatter = None if arg_count == 1: formatter = clazz() elif arg_count == 2: formatter = clazz(config) elif arg_count == 3: formatter = clazz(config, plugin_config) else: raise Exception( "Wrong signature of the Formatter subclass: %i args" % arg_count) # get task and dispatch work to formatter task = command["task"] if task == "read": # extract mode command_is_streaming = True with link.get_input() as input, link.get_output() as output: row_count = extract_rows(formatter, command.get("schema", None), input, output) # send end of stream link.send_string('') # send acknowledgment link.send_json({'ok': True, 'count': row_count}) elif task == "write": # format mode (schema is mandatory) command_is_streaming = True with link.get_input() as input, link.get_output() as output: row_count = format_rows(formatter, command["schema"], input, output) # send end of stream link.send_string('') # send acknowledgment link.send_json({'ok': True, 'count': row_count}) elif task == "schema": # read schema mode command_is_streaming = False with link.get_input() as input: schema = extract_schema(formatter, input) if schema is not None: link.send_json(schema) else: link.send_json({ 'columns': [{ 'name': '__dku_empty_schema__', 'type': 'string' }] }) else: raise Exception("Unexpected task %s" % task) except: traceback.print_exc() error = get_json_friendly_error() link.send_string( '') # send null to mark failure or to mark end of stream if not command_is_streaming: link.send_json(error) else: link.send_json({'ok': False, 'error': error}) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do command = link.get_command() try: project_key = command.get("projectKey", {}) config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] cluster_id = command["clusterId"] cluster_name = command["clusterName"] # get the exporter object clazz = get_clazz_in_code(code, Cluster) arg_count = len(inspect.getargspec(clazz.__init__).args) cluster = None if arg_count == 5: cluster = clazz(cluster_id, cluster_name, config, plugin_config) else: raise Exception("Wrong signature of the Cluster subclass: %i args but expected 5 (self, cluster_id, name, config, plugin_config)" % arg_count) # work call_name = command["type"] data = command.get("data", {}) if call_name == 'start': # init progress reporting if relevant report_progress = get_progress_callback(cluster.get_start_progress_target, link) arg_count = len(inspect.getargspec(cluster.start).args) if arg_count == 1: result = cluster.start() elif arg_count == 2: result = cluster.start(report_progress) if isinstance(result, list) or isinstance(result, tuple): cluster_settings = result[0] cluster_data = result[1] if len(result) > 1 else {} cluster_setup = {} cluster_setup['hadoopSettings'] = cluster_settings.get('hadoop', None) cluster_setup['hiveSettings'] = cluster_settings.get('hive', None) cluster_setup['impalaSettings'] = cluster_settings.get('impala', None) cluster_setup['sparkSettings'] = cluster_settings.get('spark', None) cluster_setup['data'] = cluster_data send_result_json(cluster_setup, link) else: raise Exception("start() didn't return an object of a valid type: %s" % type(result)) elif call_name == 'stop': # init progress reporting if relevant report_progress = get_progress_callback(cluster.get_stop_progress_target, link) arg_count = len(inspect.getargspec(cluster.stop).args) if arg_count == 2: result = cluster.stop(data) elif arg_count == 3: result = cluster.stop(data, report_progress) send_result_json({'ok':True}, link) else: if hasattr(cluster, call_name): action_attr = getattr(cluster, call_name) if inspect.ismethod(action_attr): result = action_attr(data) # convert to something that is legit for a JsonObject if result is None: result = {} if not isinstance(result, dict): result = {'result':result} # send link.send_json({'ok':True, 'response':result}) else: raise Exception("Wrong call type : %s is not a method" % call_name) else: raise Exception("Wrong call type : %s" % call_name) # send end of stream (data is expected as a stream) link.send_string('') except: traceback.print_exc() send_error(link) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do try: command = link.get_command() config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] # get the connector object clazz = get_clazz_in_code(code, Connector) arg_count = len(inspect.getargspec(clazz.__init__).args) connector = None if arg_count == 1: connector = clazz() elif arg_count == 2: connector = clazz(config) elif arg_count == 3: connector = clazz(config, plugin_config) else: raise Exception( "Wrong signature of the Connector subclass: %i args" % arg_count) link.send_json({'ok': True}) stored_error = None # loop and process commands while True: request = link.read_json() if request is None: break response = None task = request["task"] logging.info("Processing task: %s" % task) if task == "read_rows": schema = request.get("schema", None) partitioning = request.get("partitioning", None) partition_id = request.get("partitionId", None) limit = request.get("limit", None) stored_error = None try: with link.get_output() as output: read_rows(connector, schema, partitioning, partition_id, limit, output) except: logging.exception("Connector send fail, storing exception") stored_error = get_json_friendly_error() link.send_string('') elif task == "finish_read_session": if stored_error is None: link.send_json({"ok": True}) else: link.send_json({"ok": False, "error": stored_error}) elif task == "write_rows": schema = request.get("schema", None) partitioning = request.get("partitioning", None) partition_id = request.get("partitionId", None) with link.get_input() as input: write_rows(connector, schema, partitioning, partition_id, input) link.send_json({'ok': True}) elif task == "get_schema": link.send_json({'schema': connector.get_read_schema()}) elif task == "get_partitioning_scheme": link.send_json({'partitioning': connector.get_partitioning()}) elif task == "list_partitions": partitioning = request.get("partitioning", None) link.send_json( {'partitions': connector.list_partitions(partitioning)}) elif task == "partition_exists": partitioning = request.get("partitioning", None) partition_id = request.get("partitionId", None) link.send_json({ "exists": connector.partition_exists(partitioning, partition_id) }) elif task == "records_count": partitioning = request.get("partitioning", None) partition_id = request.get("partitionId", None) link.send_json({ "count": connector.get_records_count(partitioning, partition_id) }) else: raise Exception("Unexpected task %s" % task) # send end of stream logging.info("Work done") link.send_string('') except: link.send_string('') # mark failure traceback.print_exc() link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() is_serving = False # get work to do try: command = link.get_command() plugin_config = command.get("pluginConfig", {}) code = command["code"] # get the helper function ctx = {} python2_friendly_exec(code, ctx, ctx) functions = [o for o in ctx.values() if inspect.isfunction(o)] f = functions[0] if len(functions) == 1 else ctx.get('do', None) if f is None: raise Exception('No function "do" defined') f_args_count = len(inspect.getargspec(f).args) if f_args_count >= 5: reason = "Too many arguments for the do() function : %i args" % f_args_count raise Exception(reason) link.send_json({'ok': True}) def call_do(payload, config, plugin_config, inputs): result = None if f_args_count == 0: result = f() if f_args_count == 1: result = f(payload) if f_args_count == 2: result = f(payload, config) if f_args_count == 3: result = f(payload, config, plugin_config) if f_args_count == 4: result = f(payload, config, plugin_config, inputs) return result is_serving = True # loop and process commands while True: request = link.read_json() if request is None: break response = call_do(request.get('payload', None), request.get('config', {}), plugin_config, request.get('inputs', [])) if response is None: raise Exception("Empty response to %s" % json.dumps(request)) link.send_json(response) # send end of stream link.send_string('') except: error = get_json_friendly_error() link.send_string('') # mark failure traceback.print_exc() if not is_serving: link.send_json(error) else: link.send_json({'ok': False, 'error': error}) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do command = link.get_command() try: code = command["code"] notebook = command["notebook"] project_key = command.get("projectKey", None) dataset_project_key = command.get("datasetProjectKey", None) dataset_name = command.get("datasetName", None) # get the expansion function ctx = {} python2_friendly_exec(code, ctx, ctx) functions = [o for o in ctx.values() if inspect.isfunction(o)] f = functions[0] if len(functions) == 1 else ctx.get('expand', None) if f is None: raise Exception('No function "expand" defined') f_args_count = len(inspect.getargspec(f).args) if f_args_count >= 5: reason = "Too many arguments for the expand() function : %i args" % f_args_count raise Exception(reason) if f_args_count < 1: reason = "Too few arguments for the expand() function : %i args" % f_args_count raise Exception(reason) def call_expand(notebook, project_key, dataset_project_key, dataset_name): result = None if f_args_count == 1: result = f(notebook) if f_args_count == 2: result = f(notebook, project_key) if f_args_count == 3: result = f(notebook, project_key, dataset_project_key) if f_args_count == 4: result = f(notebook, project_key, dataset_project_key, dataset_name) return result # expand for real expanded = call_expand(notebook, project_key, dataset_project_key, dataset_name) # send result if isinstance( expanded, unicode ): # we need to send a str (for python streams) and the java end is waiting for utf8 expanded = expanded.encode('utf8') link.send_block(expanded) # send end of stream link.send_string('') # send ack link.send_json({'ok': True, 'count': len(expanded)}) except: link.send_string('') # mark failure traceback.print_exc() link.send_json({'ok': False, 'error': get_json_friendly_error()}) finally: # done link.close()