Exemplo n.º 1
0
def set_pca_args(args, name=None, fields=None, pca_fields=None):
    """Return pca arguments dict

    """
    if name is None:
        name = args.name
    if pca_fields is None:
        pca_fields = args.pca_fields_

    pca_args = set_basic_args(args, name)
    pca_args.update({
        "seed": SEED if args.seed is None else args.seed,
        "pca_seed": SEED if args.seed is None else args.seed
    })

    pca_args.update({"sample_rate": args.sample_rate})
    pca_args = update_sample_parameters_args( \
        pca_args, args)
    if fields is not None:
        input_fields = fields.fields.keys()
    if pca_fields and fields is not None:
        input_fields = configure_input_fields(fields, pca_fields)
    if args.exclude_objective:
        input_fields = [field for field in input_fields \
            if field not in args.exclude_fields]
    pca_args.update(input_fields=input_fields)

    if 'pca' in args.json_args:
        update_json_args(pca_args, args.json_args.get('pca'), fields)
    return pca_args
Exemplo n.º 2
0
def set_external_connector_args(args, name=None):
    """Return external connector arguments dict

    """
    if name is None:
        name = args.name
    external_connector_args = set_basic_args(args, name)

    source = "postgresql" if args.source is None else args.source
    external_connector_args.update({"source": source})

    connection_keys = EXTERNAL_CONNECTION_ATTRS.values()
    connection_keys.remove("source")

    connection_info = {}
    for key in connection_keys:
        if hasattr(args, key) and getattr(args, key):
            connection_info.update({key: getattr(args, key)})
    if not connection_info:
        # try to read environment variables
        connection_info = get_env_connection_info()
    args.connection_info = connection_info

    if args.hosts:
        args.connection_info.update({"hosts": args.hosts.split(",")})

    # rare arguments must be provided in a JSON file
    if args.connection_json_:
        args.connection_info.update(args.connection_json_)

    if 'external_connector' in args.json_args:
        update_json_args(external_connector_args,
                         args.json_args.get('external_connector'), None)

    return external_connector_args
Exemplo n.º 3
0
def set_evaluation_args(args, fields=None,
                        dataset_fields=None, name=None):
    """Return evaluation args dict

    """
    if name is None:
        name = args.name
    evaluation_args = set_basic_args(args, name)

    if hasattr(args, 'method') and (args.number_of_models > 1
                                    or args.ensemble):
        evaluation_args.update(combiner=args.method)
    if hasattr(args, 'method') and args.method:
        evaluation_args.update({"combiner": args.method})
        if args.method == THRESHOLD_CODE:
            threshold = {}
            if hasattr(args, 'threshold') and args.threshold is not None:
                threshold.update(k=args.threshold)
            if hasattr(args, 'threshold_class') \
                    and args.threshold_class is not None:
                threshold.update({"class": args.threshold_class})
            evaluation_args.update(threshold=threshold)
    if args.fields_map_ and fields is not None:
        if dataset_fields is None:
            dataset_fields = fields
        evaluation_args.update({"fields_map": map_fields(args.fields_map_,
                                                         fields,
                                                         dataset_fields)})
    if hasattr(args, 'missing_strategy') and args.missing_strategy:
        evaluation_args.update(missing_strategy=args.missing_strategy)
    if 'evaluation' in args.json_args:
        update_json_args(
            evaluation_args, args.json_args.get('evaluation'), fields)
    # if evaluating time series we need to use ranges
    if args.subcommand == "time-series" and args.test_split == 0 and \
            not args.has_test_datasets_:
        args.range_ = [int(args.max_rows * EVALUATE_SAMPLE_RATE) + 1,
                       args.max_rows]
        evaluation_args.update({"range": args.range_})
        return evaluation_args
    # Two cases to use out_of_bag and sample_rate: standard evaluations where
    # only the training set is provided, and cross_validation
    # [--dataset|--test] [--model|--models|--model-tag|--ensemble] --evaluate
    if (((hasattr(args, "dataset") and args.dataset) or args.test_set)
            and args.has_supervised_):
        return evaluation_args
    # [--train|--dataset] --test-split --evaluate
    if args.test_split > 0 and (args.training_set or args.dataset):
        return evaluation_args
    # --datasets --test-datasets or equivalents
    #if args.datasets and (args.test_datasets or args.dataset_off):
    if args.has_datasets_ and (args.has_test_datasets_ or args.dataset_off):
        return evaluation_args
    if args.sample_rate == 1:
        args.sample_rate = EVALUATE_SAMPLE_RATE
    evaluation_args.update(out_of_bag=True, seed=SEED,
                           sample_rate=args.sample_rate)
    return evaluation_args
Exemplo n.º 4
0
def set_project_args(args, name=None):
    """Return project arguments dict

    """
    if name is None:
        name = args.name
    project_args = set_basic_args(args, name)
    if 'project' in args.json_args:
        update_json_args(project_args, args.json_args.get('project'), None)
    return project_args
Exemplo n.º 5
0
def set_fusion_args(args, name=None, fields=None):
    """Return fusion arguments dict

    """
    if name is None:
        name = args.name

    fusion_args = set_basic_args(args, name)

    if 'fusion' in args.json_args:
        update_json_args(fusion_args, args.json_args.get('fusion'), fields)
    return fusion_args
Exemplo n.º 6
0
def set_sample_args(args, name=None):
    """Return sample arguments dict

    """
    if name is None:
        name = args.name

    sample_args = set_basic_args(args, name)

    if 'sample' in args.json_args:
        update_json_args(sample_args, args.json_args.get('sample'))
    return sample_args
Exemplo n.º 7
0
def set_forecast_args(args, fields=None):
    """Return forecast dict

    """
    forecast_args = set_basic_args(args, args.name)
    forecast_args.update({
        "intervals": args.intervals,
    })

    if 'forecast' in args.json_args:
        update_json_args(forecast_args, args.json_args.get('forecast'), fields)

    return forecast_args
Exemplo n.º 8
0
def set_library_args(args, name=None):
    """Returns a library arguments dict

    """

    if name is None:
        name = args.name
    library_args = set_basic_args(args, name)
    if args.project_id is not None:
        library_args.update({"project": args.project_id})
    if args.imports is not None:
        library_args.update({"imports": args.imports_})
    update_attributes(library_args, args.json_args.get('library'))
    return library_args
Exemplo n.º 9
0
def set_basic_dataset_args(args, name=None):
    """Return dataset basic arguments dict

    """
    if name is None:
        name = args.name
    dataset_args = set_basic_args(args, name)
    if args.sample_rate != 1 and args.no_model:
        dataset_args.update({
            "seed": SEED if args.seed is None else args.seed,
            "sample_rate": args.sample_rate
        })

    if hasattr(args, "range") and args.range_:
        dataset_args.update({"range": args.range_})
    return dataset_args
Exemplo n.º 10
0
def set_topic_model_args(args,
                         name=None,
                         fields=None,
                         topic_model_fields=None):
    """Return topic_model arguments dict

    """
    if name is None:
        name = args.name
    if topic_model_fields is None:
        topic_model_fields = args.topic_model_fields_

    topic_model_args = set_basic_args(args, name)
    topic_model_args.update({
        "seed":
        SEED if args.seed is None else args.seed,
        "topicmodel_seed":
        SEED if args.seed is None else args.seed
    })

    if topic_model_fields and fields is not None:
        input_fields = configure_input_fields(fields, topic_model_fields)
        topic_model_args.update(input_fields=input_fields)
    topic_model_args.update({"sample_rate": args.sample_rate})
    topic_model_args.update({"bigrams": args.bigrams})
    topic_model_args.update({"case_sensitive": args.case_sensitive})
    if args.number_of_topics is not None:
        topic_model_args.update({"number_of_topics": args.number_of_topics})
    if args.term_limit is not None:
        topic_model_args.update({"term_limit": args.term_limit})
    if args.top_n_terms is not None:
        topic_model_args.update({"top_n_terms": args.top_n_terms})
    if args.minimum_name_terms is not None:
        topic_model_args.update(
            {"minimum_name_terms": args.minimum_name_terms})

    if args.excluded_terms:
        topic_model_args.update({"excluded_terms": args.excluded_terms_})

    topic_model_args = update_sample_parameters_args( \
        topic_model_args, args)

    if 'topic_model' in args.json_args:
        update_json_args(topic_model_args, args.json_args.get('topic_model'),
                         fields)
    return topic_model_args
Exemplo n.º 11
0
def set_script_args(args, name=None):
    """Returns a script arguments dict

    """

    if name is None:
        name = args.name
    script_args = set_basic_args(args, name)
    if args.project_id is not None:
        script_args.update({"project": args.project_id})
    if args.imports is not None:
        script_args.update({"imports": args.imports_})
    if args.parameters_ is not None:
        script_args.update({"inputs": args.parameters_})
    if args.declare_outputs_:
        script_args.update({"outputs": args.declare_outputs_})
    update_attributes(script_args, args.json_args.get('script'))
    return script_args
Exemplo n.º 12
0
def set_execution_args(args, name=None):
    """Returns an execution arguments dict

    """

    if name is None:
        name = args.name
    execution_args = set_basic_args(args, name)
    if args.project_id is not None:
        execution_args.update({"project": args.project_id})
    if args.arguments_:
        execution_args.update({"inputs": args.arguments_})
    if args.creation_defaults is not None:
        execution_args.update({"creation_defaults": args.creation_defaults_})
    if args.outputs_:
        execution_args.update({"outputs": args.outputs_})
    if args.input_maps_:
        execution_args.update({"input_maps_": args.input_maps_})
    update_attributes(execution_args, args.json_args.get('execution'))
    return execution_args
Exemplo n.º 13
0
def set_source_args(args,
                    name=None,
                    multi_label_data=None,
                    data_set_header=None,
                    fields=None):
    """Returns a source arguments dict

    """

    if name is None:
        name = args.name
    source_args = set_basic_args(args, name)
    if args.project_id is not None:
        source_args.update({"project": args.project_id})
    # if header is set, use it
    if data_set_header is not None:
        source_args.update({"source_parser": {"header": data_set_header}})
    # If user has given an OS locale, try to add the locale used in bigml.com
    if args.user_locale is not None:
        source_locale = bigml_locale(args.user_locale)
        if source_locale is None:
            log_message("WARNING: %s locale equivalence not found."
                        " Using %s instead.\n" %
                        (args.user_locale, LOCALE_DEFAULT),
                        log_file=None,
                        console=True)
            source_locale = LOCALE_DEFAULT
        source_args.update({'source_parser': {}})
        source_args["source_parser"].update({'locale': source_locale})
    # If user has set a training separator, use it.
    if args.training_separator is not None:
        training_separator = decode2(args.training_separator,
                                     encoding="string_escape")
        source_args["source_parser"].update({'separator': training_separator})
    # If uploading a multi-label file, add the user_metadata info needed to
    # manage the multi-label fields
    if (hasattr(args, 'multi_label') and args.multi_label
            and multi_label_data is not None):
        source_args.update(
            {"user_metadata": {
                "multi_label_data": multi_label_data
            }})

    # to update fields attributes or types you must have a previous fields
    # structure (at update time)
    if fields:
        if args.field_attributes_:
            update_attributes(source_args, {"fields": args.field_attributes_},
                              by_column=True,
                              fields=fields)
        if args.types_:
            update_attributes(source_args, {"fields": args.types_},
                              by_column=True,
                              fields=fields)
        if args.import_fields:
            fields_struct = fields.new_fields_structure(args.import_fields)
            check_fields_struct(fields_struct, "source")
            update_attributes(source_args, fields_struct)
        if 'source' in args.json_args:
            update_json_args(source_args, args.json_args.get('source'), fields)
    return source_args