Пример #1
0
def create_input(args, api, input_type, script_id):
    """ Creates the resources used as input for the retrain script when adding
        new data.
        When remote sources are used, the input is usually the remote url.
        If a local source is used, then the input should be a source-id
        or a dataset-id

    """
    if input_type in ['source-id', 'dataset-id']:

        source_command = ["main", "--train", args.add,
                          "--output-dir", args.output_dir,
                          STOP_WORKFLOW[input_type]]
        command_args, _, _, main_session_file, _ = get_context( \
            source_command, MAIN_SETTINGS)
        command_args.predictions = command_args.output
        a.get_output_args(api, command_args, False)
        compute_output(api, command_args)
        resource_type = input_type[:-3]
        resource_id = getattr(command_args, resource_type)
    else:
        resource_type = "source-url"
        resource_id = args.add
    # apply the retrain script to the new resource
    execute_command = ['execute',
                       '--script', script_id,
                       '--output-dir', args.output_dir]
    add_api_context(execute_command, args)

    command_args, _, _, exe_session_file, _ = get_context( \
        execute_command, EXE_SETTINGS)
    command_args.arguments_ = [["%s1" % resource_type, resource_id],
                               ["datasets-limit", args.window_size]]
    command_args.inputs = json.dumps(command_args.arguments_)
    return command_args
Пример #2
0
def connector_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command_args, _, api, session_file, _ = get_context(args, SETTINGS)

    path = u.check_dir(command_args.output)
    log = None
    if command_args.log_file:
        u.check_dir(command_args.log_file)
        log = command_args.log_file
        # If --clear_logs the log files are cleared
        clear_log_files([log])
    if not command_args.external_connector_id and \
            u.has_connection_info(command_args):
        # create connector
        pec.connector_processing(api,
                                 command_args,
                                 command_args.resume,
                                 session_file=session_file,
                                 path=path,
                                 log=log)
    if command_args.external_connector_id and (
            command_args.connector_attributes or command_args.name
            or command_args.tag or command_args.description
            or command_args.category):
        # update connector's attributes
        pec.update_external_connector(command_args, api, command_args.resume, \
            session_file=session_file)

    u.log_message("_" * 80 + "\n", log_file=session_file)
    u.print_generated_files(command_args.output_dir,
                            log_file=session_file,
                            verbosity=command_args.verbosity)
Пример #3
0
def retrain_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer retrain

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    # parses the command line to get the context args and the log files to use
    command_args, command, api, session_file, resume = get_context(
        args, SETTINGS)

    # --id or --model-tag, --ensemble-tag, etc. is compulsory
    if check_compulsory_options(command.flags, command_args):
        retrain_model(command_args,
                      api,
                      command.common_options,
                      session_file=session_file)
        u.log_message("_" * 80 + "\n", log_file=session_file)
    else:
        sys.exit("You must provide the ID of the resource to be"
                 " retrained in the --id option or a unique tag"
                 " to retrieve such ID."
                 " Type bigmler retrain --help\n"
                 " to see all the available options.")
Пример #4
0
def reify_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, _, api, session_file, _ = get_context(args, SETTINGS)

    def logger(message):
        """Partial to log messages according to args.verbosity

        """
        u.log_message(u.dated(message), \
            log_file=session_file, console=command_args.verbosity)

    message = "Starting reification for %s\n\n" % command_args.resource_id
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    reify_resources(command_args, api)
    message = "\nReification complete. See the results in %s\n\n" % \
        command_args.output
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    u.log_message("_" * 80 + "\n", log_file=session_file)

    u.print_generated_files(command_args.output_dir,
                            log_file=session_file,
                            verbosity=command_args.verbosity)
Пример #5
0
def project_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command_args, command, api, session_file, resume = get_context(args,
                                                                   SETTINGS)

    path = u.check_dir(command_args.output)
    log = None
    if command_args.log_file:
        u.check_dir(command_args.log_file)
        log = command_args.log_file
        # If --clear_logs the log files are cleared
        clear_log_files([log])
    if not command_args.project_id and command_args.name:
        command_args.project = command_args.name
    if command_args.project:
        # create project
        pp.project_processing(
            api, command_args, command_args.resume, session_file=session_file,
            path=path, log=log, create=True)
    if command_args.project_id and (
            command_args.project_attributes or
            command_args.name or command_args.tag or command_args.description
            or command_args.category):
        # update project's attributes
        pp.update_project(command_args, api, command_args.resume, \
            session_file=session_file)

    u.log_message("_" * 80 + "\n", log_file=session_file)
    u.print_generated_files(command_args.output_dir, log_file=session_file,
                            verbosity=command_args.verbosity)
Пример #6
0
def project_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    command_args, command, api, session_file, resume = get_context(args,
                                                                   SETTINGS)

    path = u.check_dir(command_args.output)
    log = None
    if command_args.log_file:
        u.check_dir(command_args.log_file)
        log = command_args.log_file
        # If --clear_logs the log files are cleared
        clear_log_files([log])
    if not command_args.project_id and command_args.name:
        command_args.project = command_args.name
    if command_args.project:
        # create project
        pp.project_processing(
            api, command_args, command_args.resume, session_file=session_file,
            path=path, log=log, create=True)
    if command_args.project_id and (
            command_args.project_attributes or
            command_args.name or command_args.tag or command_args.description
            or command_args.category):
        # update project's attributes
        pp.update_project(command_args, api, command_args.resume, \
            session_file=session_file)

    u.log_message("_" * 80 + "\n", log_file=session_file)
    u.print_generated_files(command_args.output_dir, log_file=session_file,
                            verbosity=command_args.verbosity)
Пример #7
0
def reify_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, command, api, session_file, resume = get_context(args,
                                                                   SETTINGS)

    def logger(message):
        """Partial to log messages according to args.verbosity

        """
        u.log_message(u.dated(message), \
            log_file=session_file, console=command_args.verbosity)

    print command_args.output, command_args.output_dir
    message = "Starting reification for %s\n\n" % command_args.resource_id
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    reify_resources(command_args, api, logger)
    message = "\nReification complete. See the results in %s\n\n" % \
        command_args.output
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    u.log_message("_" * 80 + "\n", log_file=session_file)

    u.print_generated_files(command_args.output_dir, log_file=session_file,
                            verbosity=command_args.verbosity)
Пример #8
0
def execute_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, _, api, session_file, _ = get_context(args, SETTINGS)

    # process the command
    execute_whizzml(command_args, api, session_file)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #9
0
def create_input(args, api, input_type, script_id, command):
    """ Creates the resources used as input for the retrain script when adding
        new data.
        When remote sources are used, the input is usually the remote url.
        If a local source is used, then the input should be a source-id
        or a dataset-id

    """
    if input_type in ['source-id', 'dataset-id']:

        source_command = [
            "main", "--train", args.add, "--output-dir", args.output_dir,
            STOP_WORKFLOW[input_type]
        ]
        command.propagate(source_command)
        command_args, _, _, main_session_file, _ = get_context( \
            source_command, MAIN_SETTINGS)
        command_args.predictions = command_args.output
        a.get_output_args(api, command_args, False)
        compute_output(api, command_args)
        resource_type = input_type[:-3]
        resource_id = getattr(command_args, resource_type)
    else:
        resource_type = "source-url"
        resource_id = args.add
    # apply the retrain script to the new resource
    execute_command = [
        'execute', '--script', script_id, '--output-dir', args.output_dir
    ]
    command.propagate(execute_command)
    command_args, _, _, exe_session_file, _ = get_context( \
        execute_command, EXE_SETTINGS)
    command_args.arguments_ = [["%s1" % resource_type, resource_id],
                               ["datasets-limit", args.window_size]]
    command_args.inputs = json.dumps(command_args.arguments_)

    return command_args, api, exe_session_file
Пример #10
0
def execute_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, command, api, session_file, resume = get_context(args,
                                                                   SETTINGS)

    # process the command
    execute_whizzml(command_args, api, session_file)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #11
0
def association_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, _, api, session_file, resume = get_context(args, SETTINGS)

    # Selects the action to perform
    if a.has_train(command_args) or a.has_test(command_args):
        output_args = a.get_output_args(api, command_args, resume)
        compute_output(**output_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #12
0
def association_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, command, api, session_file, resume = get_context(args,
                                                                   SETTINGS)

    # Selects the action to perform
    if a.has_train(command_args) or a.has_test(command_args):
        output_args = a.get_output_args(api, command_args, resume)
        compute_output(**output_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #13
0
def pca_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    settings = {}
    settings.update(SETTINGS)

    command_args, _, api, session_file, _ = get_context(args, settings)

    # Selects the action to perform
    if (a.has_train(command_args) or a.has_test(command_args)
            or command_args.export_fields):
        compute_output(api, command_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #14
0
def fusion_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    settings = {}
    settings.update(SETTINGS)
    if '--evaluate' in args:
        settings.update({"default_output": "evaluation"})

    command_args, _, api, session_file, _ = get_context(args, settings)

    # Selects the action to perform
    if a.has_value(command_args, "fusion_models_") or a.has_test(command_args):
        compute_output(api, command_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #15
0
def whizzml_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer whizzml

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, command, api, _, resume = get_context(args, SETTINGS)

    # package_dir
    if command_args.package_dir is not None:
        command_args.package_dir = os.path.expanduser(command_args.package_dir)
        create_package(command_args, api, command, resume=resume)
    else:
        sys.exit("You must use the --package-dir flag pointing to the"
                 " directory where the metadata.json file is. Type\n"
                 "    bigmler whizzml --help\n"
                 " to see all the available options.")
Пример #16
0
def pca_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    settings = {}
    settings.update(SETTINGS)

    command_args, command, api, session_file, resume = get_context(args,
                                                                   settings)

    # Selects the action to perform
    if (a.has_train(command_args) or a.has_test(command_args)
            or command_args.export_fields):
        compute_output(api, command_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #17
0
def main_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    settings = {}
    settings.update(SETTINGS)
    if '--evaluate' in args:
        settings.update({"default_output": "evaluation"})

    command_args, _, api, session_file, _ = get_context(args, settings)
    # the predictions flag prevails to store the results
    if (a.has_train(command_args) or a.has_test(command_args)
            or command_args.votes_dirs):
        compute_output(api, command_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #18
0
def main_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    settings = {}
    settings.update(SETTINGS)
    if '--evaluate' in args:
        settings.update({"default_output": "evaluation"})

    command_args, command, api, session_file, resume = get_context(args,
                                                                   settings)
    # the predictions flag prevails to store the results
    if (a.has_train(command_args) or a.has_test(command_args)
            or command_args.votes_dirs):
        compute_output(api, command_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #19
0
def fusion_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different processing functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    settings = {}
    settings.update(SETTINGS)
    if '--evaluate' in args:
        settings.update({"default_output": "evaluation"})

    command_args, command, api, session_file, resume = get_context(args,
                                                                   settings)

    # Selects the action to perform
    if a.has_value(command_args, "fusion_models_") or a.has_test(command_args):
        compute_output(api, command_args)
    u.log_message("_" * 80 + "\n", log_file=session_file)
Пример #20
0
def export_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different export functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, _, api, session_file, _ = get_context(args, SETTINGS)
    # Creates the corresponding api instance
    resource = command_args.ensemble or command_args.model
    message = "Generating %s code for %s\n\n" % (command_args.language,
                                                 resource)
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    export_code(command_args, api)
    u.log_message("_" * 80 + "\n", log_file=session_file)

    u.print_generated_files(command_args.output_dir,
                            log_file=session_file,
                            verbosity=command_args.verbosity)
Пример #21
0
def export_dispatcher(args=sys.argv[1:]):
    """Parses command line and calls the different export functions

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, command, api, session_file, resume = get_context(args,
                                                                   SETTINGS)
    # Creates the corresponding api instance
    resource = command_args.ensemble or command_args.model
    message = "Generating %s code for %s\n\n" % (command_args.language,
                                                 resource)
    u.log_message(message, \
        log_file=session_file, console=command_args.verbosity)
    export_code(command_args, api)
    u.log_message("_" * 80 + "\n", log_file=session_file)

    u.print_generated_files(command_args.output_dir, log_file=session_file,
                            verbosity=command_args.verbosity)
Пример #22
0
def whizzml_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer whizzml

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    command_args, command, api, session_file, resume = get_context(args,
                                                                   SETTINGS)

    # package_dir
    if command_args.package_dir is not None:
        command_args.package_dir = os.path.expanduser(command_args.package_dir)
        create_package(command_args, api, command.common_options,
                       resume=resume)
    else:
        sys.exit("You must use the --package-dir flag pointing to the"
                 " directory where the metadata.json file is. Type\n"
                 "    bigmler whizzml --help\n"
                 " to see all the available options.")
Пример #23
0
def retrain_dispatcher(args=sys.argv[1:]):
    """Main processing of the parsed options for BigMLer retrain

    """

    # If --clear-logs the log files are cleared
    if "--clear-logs" in args:
        clear_log_files(LOG_FILES)

    # parses the command line to get the context args and the log files to use
    command_args, command, api, session_file, resume = get_context(args,
                                                                   SETTINGS)

    # --id or --model-tag, --ensemble-tag, etc. is compulsory
    if check_compulsory_options(command.flags, command_args):
        retrain_model(command_args, api, command.common_options,
                      session_file=session_file)
        u.log_message("_" * 80 + "\n", log_file=session_file)
    else:
        sys.exit("You must provide the ID of the resource to be"
                 " retrained in the --id option or a unique tag"
                 " to retrieve such ID."
                 " Type bigmler retrain --help\n"
                 " to see all the available options.")
Пример #24
0
                           '--script', reify_script,
                           '--output-dir', args.output_dir]
        command_args, _, _, exe_session_file, _ = get_context(execute_command,
                                                              EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)

    # apply the retrain script to the new data:
    # create a source with the new data
    if args.add:
        source_command = ["main", "--train", args.add, "--no-dataset",
                          "--output-dir", args.output_dir]
        command_args, _, _, main_session_file, _ = get_context(source_command,
                                                               MAIN_SETTINGS)
        command_args.predictions = command_args.output
        a.get_output_args(api, command_args, False)
        compute_output(api, command_args)
        source_id = command_args.source
        # apply the retrain script to the new source
        execute_command = ['execute',
                           '--script', script_id,
                           '--output-dir', args.output_dir]
        command_args, _, _, exe_session_file, _ = get_context(execute_command,
                                                              EXE_SETTINGS)
        command_args.arguments_ = [["source1", source_id],
                                   ["datasets-limit", args.window_size]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
Пример #25
0
    # check whether the resource exists
    try:
        check_resource(resource_id, raise_on_error=True, api=api)
    except Exception, exc:
        sys.exit("Failed to find the resource %s. Please, check its ID and"
                 " the connection info (domain and credentials)." %
                 resource_id)

    reify_script = whizzml_script(args, api)

    # apply the reify script to the resource
    execute_command = [
        'execute', '--script', reify_script, '--output-dir', args.output_dir
    ]
    command_args, _, _, exe_session_file, _ = get_context( \
        execute_command, EXE_SETTINGS)
    command_args.arguments_ = [["res-id", resource_id]]
    command_args.inputs = json.dumps(command_args.arguments_)

    # process the command
    session_file = None
    execute_whizzml(command_args, api, session_file)
    with open("%s.json" % command_args.output) as file_handler:
        exe_output = json.load(file_handler)['result']

    if args.language == "nb":
        write_nb_output(resource_id, \
            exe_output, args.output.replace(".py", ".ipynb"), api)
        return
    elif args.language == "whizzml":
        output = exe_output["source_code"]
Пример #26
0
    # check whether the resource exists
    try:
        check_resource(resource_id, raise_on_error=True, api=api)
    except Exception, exc:
        sys.exit("Failed to find the resource %s. Please, check its ID and"
                 " the connection info (domain and credentials)." %
                 resource_id)

    reify_script = whizzml_script(args, api)

    # apply the reify script to the resource
    execute_command = ['execute',
                       '--script', reify_script,
                       '--output-dir', args.output_dir]
    command_args, _, _, exe_session_file, _ = get_context( \
        execute_command, EXE_SETTINGS)
    command_args.arguments_ = [["res-id", resource_id]]
    command_args.inputs = json.dumps(command_args.arguments_)

    # process the command
    session_file = None
    execute_whizzml(command_args, api, session_file)
    with open("%s.json" % command_args.output) as file_handler:
        exe_output = json.load(file_handler)['result']

    if args.language == "nb":
        write_nb_output(resource_id, \
            exe_output, args.output.replace(".py", ".ipynb"), api)
        return
    elif args.language == "whizzml":
        output = exe_output["source_code"]
Пример #27
0
        command_args, _, _, exe_session_file, _ = get_context(
            execute_command, EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)

    # apply the retrain script to the new data:
    # create a source with the new data
    if args.add:
        source_command = [
            "main", "--train", args.add, "--no-dataset", "--output-dir",
            args.output_dir
        ]
        command_args, _, _, main_session_file, _ = get_context(
            source_command, MAIN_SETTINGS)
        command_args.predictions = command_args.output
        a.get_output_args(api, command_args, False)
        compute_output(api, command_args)
        source_id = command_args.source
        # apply the retrain script to the new source
        execute_command = [
            'execute', '--script', script_id, '--output-dir', args.output_dir
        ]
        command_args, _, _, exe_session_file, _ = get_context(
            execute_command, EXE_SETTINGS)
        command_args.arguments_ = [["source1", source_id],
                                   ["datasets-limit", args.window_size]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
Пример #28
0
def retrain_model(args, api, common_options, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_last_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    if args.upgrade:
        shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=retrain:%s" % resource_id
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception, exc:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # look for the script that creates the rebuild script.
        retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain",
                                    "scripts")
        reify_script = get_script_id(retrain_file)

        if reify_script is None:
            # new bigmler command: creating the scriptify scripts
            whizzml_command = [
                'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH,
                '--output-dir', BIGMLER_SCRIPTS_DIRECTORY
            ]
            whizzml_dispatcher(args=whizzml_command)
            reify_script = get_script_id(retrain_file)

        # new bigmler command: creating the retrain script
        execute_command = [
            'execute', '--script', reify_script, '--output-dir',
            args.output_dir
        ]
        command_args, _, _, exe_session_file, _ = get_context(
            execute_command, EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)
Пример #29
0
def retrain_model(args, api, command, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY, "retrain",
                                "scripts")
    try:
        os.remove(UPGRADE_FILE)
        reify_script = None
        try:
            shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        except OSError:
            pass
    except OSError:
        # look for the script that creates the rebuild script.
        reify_script = get_script_id(retrain_file)

    if reify_script is None:
        # new bigmler command: creating the scriptify scripts
        whizzml_command = [
            'whizzml', '--package-dir', INCREMENTAL_PACKAGE_PATH,
            '--output-dir', BIGMLER_SCRIPTS_DIRECTORY
        ]
        add_api_context(whizzml_command, args)
        whizzml_dispatcher(args=whizzml_command)
        reify_script = get_script_id(retrain_file)

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_first_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    # updating the dataset that generated the model with the reference tag
    model = api.getters[get_resource_type(resource_id)](resource_id)
    dataset_id = model["object"]["dataset"]
    dataset = api.get_dataset(dataset_id)
    tags = dataset["object"]["tags"]
    if reference_tag not in tags:
        tags.append(reference_tag)
        api.update_dataset(dataset_id, {"tags": tags})

    # if --upgrade, we force rebuilding the scriptified script
    if args.upgrade:
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=%s" % reference_tag
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # new bigmler command: creating the retrain script
        execute_command = [
            'execute', '--script', reify_script, '--tag', reference_tag,
            '--output-dir', args.output_dir
        ]
        command.propagate(execute_command)
        command_args, _, _, exe_session_file, _ = get_context(
            execute_command, EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)

        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)

    # apply the retrain script to the new data:
    # add new data: depending on the script we will need to use
    # a source-url, a source or a dataset
    if args.add:
        script_inputs = api.get_script(script_id)['object']['inputs']
        input_type = script_inputs[0]['type']
        command_args, api, exe_session_file = \
            create_input(args, api, input_type, script_id, command)

        # process the command
        execute_whizzml(command_args, api, exe_session_file)

        with open("%s.json" % command_args.output) as file_handler:
            model_resource_id = json.load(file_handler)['result']
            message = (u'The new retrained model is: %s.\n'
                       u'You can use the\n\n%s\n\nquery to retrieve the latest'
                       u' retrained model.\n\n') % \
                (model_resource_id, last_resource_url( \
                resource_id, api, \
                "limit=1;full=yes;tags=%s" % reference_tag))
            log_message(message, log_file=session_file, console=1)
Пример #30
0
def reify_resources(args, api):
    """ Extracts the properties of the created resources and generates
        code to rebuild them

    """

    resource_id = get_resource_id(args.resource_id)
    if resource_id is None:
        sys.exit("Failed to match a valid resource ID. Please, check: %s" %
                 args.resource_id)

    # check whether the resource exists
    try:
        check_resource(resource_id, raise_on_error=True, api=api)
    except Exception:
        sys.exit("Failed to find the resource %s. Please, check its ID and"
                 " the connection info (domain and credentials)." %
                 resource_id)

    reify_script = whizzml_script(args, api)

    # apply the reify script to the resource
    execute_command = [
        'execute', '--script', reify_script, '--output-dir', args.output_dir
    ]
    command_args, _, _, _, _ = get_context( \
        execute_command, EXE_SETTINGS)
    command_args.arguments_ = [["res-id", resource_id]]
    command_args.inputs = json.dumps(command_args.arguments_)

    # process the command
    session_file = None
    execute_whizzml(command_args, api, session_file)
    with open("%s.json" % command_args.output) as file_handler:
        exe_output = json.load(file_handler)['result']

    if args.language == "nb":
        write_nb_output(resource_id, \
            exe_output, args.output.replace(".py", ".ipynb"), api)
        return
    elif args.language == "whizzml":
        output = exe_output["source_code"]
        args.output = args.output.replace(".py", ".whizzml")
        exe_output["source_code"] = args.output
        exe_output["kind"] = "script"
        with open(os.path.join(os.path.dirname(args.output), "metadata.json"),
                  "w") as meta_handler:
            meta_handler.write(json.dumps(exe_output))
    else:
        output = python_output(exe_output, api)
        prefix = u"""\
#!/usr/bin/env python
# -​*- coding: utf-8 -*​-
\"\"\"Python code to reify %s

Generated by BigMLer
\"\"\"


def main():

""" % resource_id
        suffix = u"""\
if __name__ == "__main__":
    main()
"""
        output = "%s%s\n%s" % (prefix, output, suffix)

    write_to_utf8(args.output, output)

    sts = os.stat(args.output)
    os.chmod(args.output, sts.st_mode | stat.S_IEXEC)
Пример #31
0
def retrain_model(args, api, common_options, session_file=None):
    """Retrieve or create the retrain script for a model and
    execute it with the new provided data

    """

    # retrieve the modeling resource to be retrained by tag or id
    if args.resource_id:
        resource_id = args.resource_id
        reference_tag = "retrain:%s" % resource_id
    else:
        for model_type in MODEL_TYPES:
            if hasattr(args, "%s_tag" % model_type) and \
                    getattr(args, "%s_tag" % model_type) is not None:
                tag = getattr(args, "%s_tag" % model_type)
                query_string = "tags=%s" % tag
                resource_id = get_last_resource( \
                    model_type.replace("_", ""),
                    api=api,
                    query_string=query_string)
                if resource_id is None:
                    sys.exit("Failed to find the %s with tag %s. "
                             "Please, check the tag and"
                             " the connection info (domain and credentials)." %
                             (model_type.replace("_", " "), tag))
                reference_tag = tag
                break
    if args.upgrade:
        shutil.rmtree(BIGMLER_SCRIPTS_DIRECTORY)
        script_id = None
    else:
        # check for the last script used to retrain the model
        query_string = "tags=retrain:%s" % resource_id
        script_id = get_last_resource( \
            "script",
            api=api,
            query_string=query_string)

    if script_id is None:
        # if the script to retrain does not exist:

        # check whether the model exists
        try:
            bigml.api.check_resource(resource_id, raise_on_error=True, api=api)
        except Exception, exc:
            sys.exit("Failed to find the model %s. Please, check its ID and"
                     " the connection info (domain and credentials)." %
                     resource_id)

        # look for the script that creates the rebuild script.
        retrain_file = os.path.join(BIGMLER_SCRIPTS_DIRECTORY,
                                    "retrain",
                                    "scripts")
        reify_script = get_script_id(retrain_file)

        if reify_script is None:
            # new bigmler command: creating the scriptify scripts
            whizzml_command = ['whizzml',
                               '--package-dir', INCREMENTAL_PACKAGE_PATH,
                               '--output-dir', BIGMLER_SCRIPTS_DIRECTORY]
            whizzml_dispatcher(args=whizzml_command)
            reify_script = get_script_id(retrain_file)

        # new bigmler command: creating the retrain script
        execute_command = ['execute',
                           '--script', reify_script,
                           '--output-dir', args.output_dir]
        command_args, _, _, exe_session_file, _ = get_context(execute_command,
                                                              EXE_SETTINGS)
        command_args.arguments_ = [["model-resource", resource_id]]
        command_args.inputs = json.dumps(command_args.arguments_)
        # process the command
        execute_whizzml(command_args, api, session_file)
        script_id = extract_retrain_id(command_args, api, session_file)