def whizzml_script(args, api): """Returns the ID of the script to be used to generate the output """ # each language has its own script, so first check: # - whether the script exists in the account # - whether it has the same version # else, we act as if we wanted to upgrade the script script_dir = os.path.join(REIFY_PACKAGE_PATH, SCRIPT_FILE.get(args.language, args.language)) if not args.upgrade: # the script is retrieved by name # Reading the name of the script with open(os.path.join(script_dir, "metadata.json")) as meta_file: meta = json.load(meta_file) # check for the last script used to retrain the model query_string = "name=%s" % meta["name"] reify_script = get_last_resource( \ "script", api=api, query_string=query_string) else: reify_script = None # create or retrieve the script to generate the output # if --upgrade, we force rebuilding the scriptified script if reify_script is None: try: shutil.rmtree( os.path.join(BIGMLER_SCRIPTS_DIRECTORY, SCRIPT_FILE.get(args.language, args.language))) except Exception, exc: pass
def whizzml_script(args, api): """Returns the ID of the script to be used to generate the output """ # each language has its own script, so first check: # - whether the script exists in the account # - whether it has the same version # else, we act as if we wanted to upgrade the script script_dir = os.path.join(REIFY_PACKAGE_PATH, SCRIPT_FILE.get(args.language, args.language)) if not args.upgrade: # the script is retrieved by name # Reading the name of the script with open(os.path.join(script_dir, "metadata.json")) as meta_file: meta = json.load(meta_file) # check for the last script used to retrain the model query_string = "name=%s" % meta["name"] reify_script = get_last_resource( \ "script", api=api, query_string=query_string) else: reify_script = None # create or retrieve the script to generate the output # if --upgrade, we force rebuilding the scriptified script if reify_script is None : try: shutil.rmtree(os.path.join(BIGMLER_SCRIPTS_DIRECTORY, SCRIPT_FILE.get(args.language, args.language))) except Exception, exc: pass
def library_processing(api, args, session_file=None, path=None, log=None): """Creating or retrieving a library """ library = None resume = args.resume if args.code_file or args.code: # If resuming, try to extract args.library form log files if resume: message = u.dated("Library not found. Resuming.\n") resume, library = c.checkpoint(c.is_library_created, path, debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if not resume: args.resume = resume if args.code_file: try: with open(args.code_file) as code_file: source_code = code_file.read() except IOError: sys.exit("Failed to find the source code file: %s" % args.code_file) else: source_code = args.code # Check if there's a created project for it args.project_id = pp.project_processing(api, args, resume, session_file=session_file, path=path, log=log) # Check if we are upgrading if args.upgrade: library = u.get_last_resource("library", api, build_query_string(args)) log_created_resources("library", path, library, mode='a') message = u.dated("Library found: %s \n" " (library ID: %s)\n" % (args.name, library)) u.log_message(message, log_file=session_file, console=args.verbosity) if library is None: library_args = rl.set_library_args(args) add_version_tag(library_args, args.name) library = rl.create_library(source_code, library_args, args, api, path, session_file, log) return library
def library_processing(api, args, session_file=None, path=None, log=None): """Creating or retrieving a library """ library = None resume = args.resume if args.code_file or args.code: # If resuming, try to extract args.library form log files if resume: message = u.dated("Library not found. Resuming.\n") resume, library = c.checkpoint( c.is_library_created, path, debug=args.debug, message=message, log_file=session_file, console=args.verbosity) if not resume: args.resume = resume if args.code_file: try: with open(args.code_file) as code_file: source_code = code_file.read() except IOError: sys.exit("Failed to find the source code file: %s" % args.code_file) else: source_code = args.code # Check if there's a created project for it args.project_id = pp.project_processing( api, args, resume, session_file=session_file, path=path, log=log) # Check if we are upgrading if args.upgrade: library = u.get_last_resource("library", api, build_query_string(args)) r.log_created_resources("library", path, library, mode='a') message = u.dated("Library found: %s \n" " (library ID: %s)\n" % (args.name, library)) u.log_message(message, log_file=session_file, console=args.verbosity) if library is None: library_args = r.set_library_args(args) add_version_tag(library_args, args.name) library = r.create_library(source_code, library_args, args, api, path, session_file, log) return library
def retrain_model(args, api, common_options, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_last_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break if args.upgrade: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) script_id = None else: # check for the last script used to retrain the model query_string = "tags=retrain:%s" % resource_id script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # look for the script that creates the rebuild script. retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = ['whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY] whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # new bigmler command: creating the retrain script execute_command = ['execute', '--script', reify_script, '--output-dir', args.output_dir] command_args, _, _, exe_session_file, _ = get_context(execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file)
def retrain_model(args, api, common_options, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_last_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break if args.upgrade: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) script_id = None else: # check for the last script used to retrain the model query_string = "tags=retrain:%s" % resource_id script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception, exc: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # look for the script that creates the rebuild script. retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = [ 'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY ] whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # new bigmler command: creating the retrain script execute_command = [ 'execute', '--script', reify_script, '--output-dir', args.output_dir ] command_args, _, _, exe_session_file, _ = get_context( execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file)
def retrain_model(args, api, command, session_file=None): """Retrieve or create the retrain script for a model and execute it with the new provided data """ retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain", "scripts") try: os.remove(UPGRADE_FILE) reify_script = None try: shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY) except OSError: pass except OSError: # look for the script that creates the rebuild script. reify_script = get_script_id(retrain_file) if reify_script is None: # new bigmler command: creating the scriptify scripts whizzml_command = [ 'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH, '--output-dir', BIGMLER_SCRIPTS_DIRECTORY ] add_api_context(whizzml_command, args) whizzml_dispatcher(args=whizzml_command) reify_script = get_script_id(retrain_file) # retrieve the modeling resource to be retrained by tag or id if args.resource_id: resource_id = args.resource_id reference_tag = "retrain:%s" % resource_id else: for model_type in MODEL_TYPES: if hasattr(args, "%s_tag" % model_type) and \ getattr(args, "%s_tag" % model_type) is not None: tag = getattr(args, "%s_tag" % model_type) query_string = "tags=%s" % tag resource_id = get_first_resource( \ model_type.replace("_", ""), api=api, query_string=query_string) if resource_id is None: sys.exit("Failed to find the %s with tag %s. " "Please, check the tag and" " the connection info (domain and credentials)." % (model_type.replace("_", " "), tag)) reference_tag = tag break # updating the dataset that generated the model with the reference tag model = api.getters[get_resource_type(resource_id)](resource_id) dataset_id = model["object"]["dataset"] dataset = api.get_dataset(dataset_id) tags = dataset["object"]["tags"] if reference_tag not in tags: tags.append(reference_tag) api.update_dataset(dataset_id, {"tags": tags}) # if --upgrade, we force rebuilding the scriptified script if args.upgrade: script_id = None else: # check for the last script used to retrain the model query_string = "tags=%s" % reference_tag script_id = get_last_resource( \ "script", api=api, query_string=query_string) if script_id is None: # if the script to retrain does not exist: # check whether the model exists try: bigml.api.check_resource(resource_id, raise_on_error=True, api=api) except Exception: sys.exit("Failed to find the model %s. Please, check its ID and" " the connection info (domain and credentials)." % resource_id) # new bigmler command: creating the retrain script execute_command = [ 'execute', '--script', reify_script, '--tag', reference_tag, '--output-dir', args.output_dir ] command.propagate(execute_command) command_args, _, _, exe_session_file, _ = get_context( execute_command, EXE_SETTINGS) command_args.arguments_ = [["model-resource", resource_id]] command_args.inputs = json.dumps(command_args.arguments_) # process the command execute_whizzml(command_args, api, session_file) script_id = extract_retrain_id(command_args, api, session_file) # apply the retrain script to the new data: # add new data: depending on the script we will need to use # a source-url, a source or a dataset if args.add: script_inputs = api.get_script(script_id)['object']['inputs'] input_type = script_inputs[0]['type'] command_args, api, exe_session_file = \ create_input(args, api, input_type, script_id, command) # process the command execute_whizzml(command_args, api, exe_session_file) with open("%s.json" % command_args.output) as file_handler: model_resource_id = json.load(file_handler)['result'] message = (u'The new retrained model is: %s.\n' u'You can use the\n\n%s\n\nquery to retrieve the latest' u' retrained model.\n\n') % \ (model_resource_id, last_resource_url( \ resource_id, api, \ "limit=1;full=yes;tags=%s" % reference_tag)) log_message(message, log_file=session_file, console=1)
def script_processing(api, args, session_file=None, path=None, log=None): """Creating or retrieving a script """ script = None resume = args.resume if args.code_file or args.code: # If resuming, try to extract args.script form log files if resume: message = u.dated("Script not found. Resuming.\n") resume, args.script = c.checkpoint( c.are_scripts_created, path, debug=args.debug, message=message, log_file=session_file, console=args.verbosity) script = args.script if not resume: args.resume = resume imports_code = [] if args.embedded_imports is not None: for import_file in args.embedded_imports_: with open(import_file) as code_file: imports_code.append(code_file.read()) if args.code_file: try: with open(args.code_file) as code_file: source_code = code_file.read() except IOError: sys.exit("Failed to find the source code file: %s" % args.code_file) else: source_code = args.code if imports_code: source_code = "%s\n%s" % ("\n".join(imports_code), source_code) # Check if there's a created project for it args.project_id = pp.project_processing( api, args, resume, session_file=session_file, path=path, log=log) # Check if we are upgrading if args.upgrade: script = u.get_last_resource("script", api, build_query_string(args)) r.log_created_resources("script", path, script, mode='a') message = u.dated("Script found: %s" "\n (script ID: %s)\n" % (args.name, script)) u.log_message(message, log_file=session_file, console=args.verbosity) if script is None: script_args = r.set_script_args(args) add_version_tag(script_args, args.name) script = r.create_script(source_code, script_args, args, api, path, session_file, log) args.script = script if isinstance(script, basestring) else \ script.get('resource') scripts = [script] # If a script is provided either through the command line or in resume # steps, we use it. elif args.script: script = bigml.api.get_script_id(args.script) scripts = [script] elif args.scripts: scripts = [script for script in args.script_ids] script = scripts[0] return script, scripts