示例#1
0
def whizzml_script(args, api):
    """Returns the ID of the script to be used to generate the output

    """
    # each language has its own script, so first check:
    # - whether the script exists in the account
    # - whether it has the same version
    # else, we act as if we wanted to upgrade the script
    script_dir = os.path.join(REIFY_PACKAGE_PATH,
                              SCRIPT_FILE.get(args.language, args.language))
    if not args.upgrade:
        # the script is retrieved by name
        # Reading the name of the script
        with open(os.path.join(script_dir, "metadata.json")) as meta_file:
            meta = json.load(meta_file)
        # check for the last script used to retrain the model
        query_string = "name=%s" % meta["name"]
        reify_script = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)
    else:
        reify_script = None

    # create or retrieve the script to generate the output
    # if --upgrade, we force rebuilding the scriptified script
    if reify_script is None:
        try:
            shutil.rmtree(
                os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                             SCRIPT_FILE.get(args.language, args.language)))
        except Exception, exc:
            pass
示例#2
0
def whizzml_script(args, api):
    """Returns the ID of the script to be used to generate the output

    """
    # each language has its own script, so first check:
    # - whether the script exists in the account
    # - whether it has the same version
    # else, we act as if we wanted to upgrade the script
    script_dir = os.path.join(REIFY_PACKAGE_PATH,
                              SCRIPT_FILE.get(args.language, args.language))
    if not args.upgrade:
        # the script is retrieved by name
        # Reading the name of the script
        with open(os.path.join(script_dir, "metadata.json")) as meta_file:
            meta = json.load(meta_file)
        # check for the last script used to retrain the model
        query_string = "name=%s" % meta["name"]
        reify_script = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)
    else:
        reify_script = None

    # create or retrieve the script to generate the output
    # if --upgrade, we force rebuilding the scriptified script
    if reify_script is None :
        try:
            shutil.rmtree(os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                                       SCRIPT_FILE.get(args.language,
                                                       args.language)))
        except Exception, exc:
            pass
示例#3
0
def library_processing(api, args, session_file=None, path=None, log=None):
    """Creating or retrieving a library

    """

    library = None
    resume = args.resume
    if args.code_file or args.code:
        # If resuming, try to extract args.library form log files

        if resume:
            message = u.dated("Library not found. Resuming.\n")
            resume, library = c.checkpoint(c.is_library_created,
                                           path,
                                           debug=args.debug,
                                           message=message,
                                           log_file=session_file,
                                           console=args.verbosity)

        if not resume:
            args.resume = resume
            if args.code_file:
                try:
                    with open(args.code_file) as code_file:
                        source_code = code_file.read()
                except IOError:
                    sys.exit("Failed to find the source code file: %s" %
                             args.code_file)
            else:
                source_code = args.code
            # Check if there's a created project for it
            args.project_id = pp.project_processing(api,
                                                    args,
                                                    resume,
                                                    session_file=session_file,
                                                    path=path,
                                                    log=log)
            # Check if we are upgrading
            if args.upgrade:
                library = u.get_last_resource("library", api,
                                              build_query_string(args))
                log_created_resources("library", path, library, mode='a')
                message = u.dated("Library found: %s \n"
                                  "    (library ID: %s)\n" %
                                  (args.name, library))
                u.log_message(message,
                              log_file=session_file,
                              console=args.verbosity)
            if library is None:
                library_args = rl.set_library_args(args)
                add_version_tag(library_args, args.name)
                library = rl.create_library(source_code, library_args, args,
                                            api, path, session_file, log)
    return library
示例#4
0
def library_processing(api, args,
                       session_file=None, path=None, log=None):
    """Creating or retrieving a library

    """

    library = None
    resume = args.resume
    if args.code_file or args.code:
        # If resuming, try to extract args.library form log files

        if resume:
            message = u.dated("Library not found. Resuming.\n")
            resume, library = c.checkpoint(
                c.is_library_created, path, debug=args.debug, message=message,
                log_file=session_file, console=args.verbosity)

        if not resume:
            args.resume = resume
            if args.code_file:
                try:
                    with open(args.code_file) as code_file:
                        source_code = code_file.read()
                except IOError:
                    sys.exit("Failed to find the source code file: %s" %
                             args.code_file)
            else:
                source_code = args.code
            # Check if there's a created project for it
            args.project_id = pp.project_processing(
                api, args, resume, session_file=session_file,
                path=path, log=log)
            # Check if we are upgrading
            if args.upgrade:
                library = u.get_last_resource("library",
                                              api,
                                              build_query_string(args))
                r.log_created_resources("library", path,
                                        library, mode='a')
                message = u.dated("Library found: %s \n"
                                  "    (library ID: %s)\n" %
                                  (args.name, library))
                u.log_message(message, log_file=session_file,
                              console=args.verbosity)
            if library is None:
                library_args = r.set_library_args(args)
                add_version_tag(library_args, args.name)
                library = r.create_library(source_code, library_args, args, api,
                                           path, session_file, log)
    return library
示例#5
0
文件: retrain.py 项目: mmerce/bigmler
def retrain_model(args, api, common_options, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_last_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    if args.upgrade:
        shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=retrain:%s" % resource_id
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception, exc:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # look for the script that creates the rebuild script.
        retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                                    "retrain",
                                    "scripts")
        reify_script = get_script_id(retrain_file)

        if reify_script is None:
            # new bigmler command: creating the scriptify scripts
            whizzml_command = ['whizzml',
                               '--package-dir', INCREMENTAL_PACKAGE_PATH,
                               '--output-dir', BIGMLER_SCRIPTS_DIRECTORY]
            whizzml_dispatcher(args=whizzml_command)
            reify_script = get_script_id(retrain_file)

        # new bigmler command: creating the retrain script
        execute_command = ['execute',
                           '--script', reify_script,
                           '--output-dir', args.output_dir]
        command_args, _, _, exe_session_file, _ = get_context(execute_command,
                                                              EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)
示例#6
0
def retrain_model(args, api, common_options, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_last_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    if args.upgrade:
        shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=retrain:%s" % resource_id
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception, exc:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # look for the script that creates the rebuild script.
        retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain",
                                    "scripts")
        reify_script = get_script_id(retrain_file)

        if reify_script is None:
            # new bigmler command: creating the scriptify scripts
            whizzml_command = [
                'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH,
                '--output-dir', BIGMLER_SCRIPTS_DIRECTORY
            ]
            whizzml_dispatcher(args=whizzml_command)
            reify_script = get_script_id(retrain_file)

        # new bigmler command: creating the retrain script
        execute_command = [
            'execute', '--script', reify_script, '--output-dir',
            args.output_dir
        ]
        command_args, _, _, exe_session_file, _ = get_context(
            execute_command, EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)
示例#7
0
def retrain_model(args, api, command, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain",
                                "scripts")
    try:
        os.remove(UPGRADE_FILE)
        reify_script = None
        try:
            shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        except OSError:
            pass
    except OSError:
        # look for the script that creates the rebuild script.
        reify_script = get_script_id(retrain_file)

    if reify_script is None:
        # new bigmler command: creating the scriptify scripts
        whizzml_command = [
            'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH,
            '--output-dir', BIGMLER_SCRIPTS_DIRECTORY
        ]
        add_api_context(whizzml_command, args)
        whizzml_dispatcher(args=whizzml_command)
        reify_script = get_script_id(retrain_file)

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_first_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    # updating the dataset that generated the model with the reference tag
    model = api.getters[get_resource_type(resource_id)](resource_id)
    dataset_id = model["object"]["dataset"]
    dataset = api.get_dataset(dataset_id)
    tags = dataset["object"]["tags"]
    if reference_tag not in tags:
        tags.append(reference_tag)
        api.update_dataset(dataset_id, {"tags": tags})

    # if --upgrade, we force rebuilding the scriptified script
    if args.upgrade:
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=%s" % reference_tag
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # new bigmler command: creating the retrain script
        execute_command = [
            'execute', '--script', reify_script, '--tag', reference_tag,
            '--output-dir', args.output_dir
        ]
        command.propagate(execute_command)
        command_args, _, _, exe_session_file, _ = get_context(
            execute_command, EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)

        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)

    # apply the retrain script to the new data:
    # add new data: depending on the script we will need to use
    # a source-url, a source or a dataset
    if args.add:
        script_inputs = api.get_script(script_id)['object']['inputs']
        input_type = script_inputs[0]['type']
        command_args, api, exe_session_file = \
            create_input(args, api, input_type, script_id, command)

        # process the command
        execute_whizzml(command_args, api, exe_session_file)

        with open("%s.json" % command_args.output) as file_handler:
            model_resource_id = json.load(file_handler)['result']
            message = (u'The new retrained model is: %s.\n'
                       u'You can use the\n\n%s\n\nquery to retrieve the latest'
                       u' retrained model.\n\n') % \
                (model_resource_id, last_resource_url( \
                resource_id, api, \
                "limit=1;full=yes;tags=%s" % reference_tag))
            log_message(message, log_file=session_file, console=1)
示例#8
0
def script_processing(api, args,
                      session_file=None, path=None, log=None):
    """Creating or retrieving a script

    """
    script = None
    resume = args.resume
    if args.code_file or args.code:
        # If resuming, try to extract args.script form log files

        if resume:
            message = u.dated("Script not found. Resuming.\n")
            resume, args.script = c.checkpoint(
                c.are_scripts_created, path, debug=args.debug, message=message,
                log_file=session_file, console=args.verbosity)
            script = args.script
        if not resume:
            args.resume = resume
            imports_code = []
            if args.embedded_imports is not None:
                for import_file in args.embedded_imports_:
                    with open(import_file) as code_file:
                        imports_code.append(code_file.read())
            if args.code_file:
                try:
                    with open(args.code_file) as code_file:
                        source_code = code_file.read()
                except IOError:
                    sys.exit("Failed to find the source code file: %s" %
                             args.code_file)
            else:
                source_code = args.code
            if imports_code:
                source_code = "%s\n%s" % ("\n".join(imports_code), source_code)
            # Check if there's a created project for it
            args.project_id = pp.project_processing(
                api, args, resume, session_file=session_file,
                path=path, log=log)

            # Check if we are upgrading
            if args.upgrade:
                script = u.get_last_resource("script",
                                             api,
                                             build_query_string(args))
                r.log_created_resources("script", path,
                                        script, mode='a')
                message = u.dated("Script found: %s"
                                  "\n    (script ID: %s)\n" %
                                  (args.name, script))
                u.log_message(message, log_file=session_file,
                              console=args.verbosity)
            if script is None:
                script_args = r.set_script_args(args)
                add_version_tag(script_args, args.name)
                script = r.create_script(source_code, script_args, args, api,
                                         path, session_file, log)

            args.script = script if isinstance(script, basestring) else \
                script.get('resource')
        scripts = [script]

    # If a script is provided either through the command line or in resume
    # steps, we use it.
    elif args.script:
        script = bigml.api.get_script_id(args.script)
        scripts = [script]
    elif args.scripts:
        scripts = [script for script in args.script_ids]
        script = scripts[0]
    return script, scripts