Exemplo n.º 1
0
class Hyperparams(hyperparams.Hyperparams):
    proportion_of_features = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=1.0,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="proportion of top features from input dataset to keep",
    )
    only_numeric_cols = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="consider only numeric columns for feature selection",
    )
Exemplo n.º 2
0
class Hyperparams(hyperparams.Hyperparams):
    threshold = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=0.0,
        upper_inclusive=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="pca score threshold for feature selection",
    )
    only_numeric_cols = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="consider only numeric columns for feature selection",
    )
Exemplo n.º 3
0
class Hyperparams(hyperparams.Hyperparams):
    metrics = hyperparams.Set(
        elements=MetricsHyperparams,
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of metrics to compute.",
    )
    all_labels = hyperparams.Set(
        elements=AllLabelsHyperparams,
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="All labels available in a dataset, per target column. When provided for a target column, it overrides all labels from metadata or data for that target column.",
    )
    add_normalized_scores = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Add additional column with normalized scores?"
    )
Exemplo n.º 4
0
class Hyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    inference_model = hyperparams.Enumeration(
        default='moco',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['amdim', 'moco'],
        description='type pretrained inference model to use')
    batch_size = hyperparams.UniformInt(
        lower=1,
        upper=512,
        default=256,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="inference batch size",
    )
    pool_features = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to pool features across spatial dimensions in returned frame",
    )
    decompress_data = hyperparams.Hyperparameter[bool](
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="If True, applies LZO decompression algorithm to the data. \
                    Compressed data stores a header consisting of the dtype character and the \
                    data shape as unsigned integers. Given c struct alignment, will occupy \
                    16 bytes (1 + 4 + 4 + 4 + 3 ) padding")
Exemplo n.º 5
0
class Hyperparams(hyperparams.Hyperparams):
    algorithm = hyperparams.Enumeration(
        default='HDBSCAN',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        values=['DBSCAN', 'HDBSCAN'],
        description='type of clustering algorithm to use')
    eps = hyperparams.Uniform(
        lower=0,
        upper=sys.maxsize,
        default=0.5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'maximum distance between two samples for them to be considered as in the same neigborhood, \
        used in DBSCAN algorithm')
    min_cluster_size = hyperparams.UniformInt(
        lower=2,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='the minimum size of clusters')
    min_samples = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=5,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'The number of samples in a neighbourhood for a point to be considered a core point.'
    )
    long_format = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "whether the input dataset is already formatted in long format or not")
    pass
Exemplo n.º 6
0
class GroupUpHyperparameter(hyperparams.Hyperparams):
    verbose = UniformBool(
        default=False,
        semantic_types=[
            'http://schema.org/Boolean',
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    # exclude_columns = hyperparams.Set(
    #     elements=hyperparams.Hyperparameter[int](-1),
    #     default=(),
    #     semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
    #     description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    # )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='replace',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    # use_semantic_types = hyperparams.UniformBool(
    #     default=False,
    #     semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
    #     description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    # )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
Exemplo n.º 7
0
class Hyperparams(hyperparams.Hyperparams):
    n_components = hyperparams.UniformInt(
        lower=1,
        upper=3,
        upper_inclusive=True,
        default=2,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description='dimension of the embedded space')

    long_format = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "whether the input dataset is already formatted in long format or not")
    pass
Exemplo n.º 8
0
class Vgg16Hyperparams(hyperparams.Hyperparams):
    layer_index = hyperparams.UniformInt(
        lower=0,
        upper=4,
        default=0,
        description=
        "Specify the layer of the neural network to use for features. Lower numbered layers correspond to higher-level abstract features. The number of features by layer index are [25088, 100352, 200704, 401408]",
        semantic_types=[
            "http://schema.org/Integer",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    generate_metadata = hyperparams.UniformBool(
        default=False,
        description=
        "A control parameter to set whether to generate metada after the feature extraction. It will be very slow if the columns length is very large. For the default condition, it will turn off to accelerate the program running.",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
Exemplo n.º 9
0
class Hyperparams(hyperparams.Hyperparams):
    '''
    eps : Maximum distortion rate as defined by the Johnson-Lindenstrauss lemma.
    '''
    eps = hyperparams.Uniform(
        lower=0.1,
        upper=0.5,
        default=0.2,
        semantic_types=[
            "http://schema.org/Float",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    generate_metadata = hyperparams.UniformBool(
        default=True,
        description=
        "A control parameter to set whether to generate metada after the feature extraction. It will be very slow if the columns length is very large. For the default condition, it will turn off to accelerate the program running.",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ])
Exemplo n.º 10
0
class HorizontalConcatHyperparams(hyperparams.Hyperparams):
    ignore_index = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether new df should use original index or not"
    )
    to_semantic_types = hyperparams.Set(
        elements=hyperparams.Hyperparameter[str](""),
        default=("https://metadata.datadrivendiscovery.org/types/Attribute",
                 "https://metadata.datadrivendiscovery.org/types/OrdinalData",
                 "https://metadata.datadrivendiscovery.org/types/CategoricalData"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"],
        description="Sementic typer to add for output dataframe"
    )
    column_name = hyperparams.Hyperparameter[int](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        default=100,
        description="the control params for index name"
    )
Exemplo n.º 11
0
class Hyperparams(hyperparams.Hyperparams):
    """
    Hyper-parameters for this primitive.
    """
    num_samples = hyperparams.Hyperparameter[int](
        default=10,
        description="Samples to sample from the posterior.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    batch_size = hyperparams.Hyperparameter[int](
        default=128,
        description="Batch size for training.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    eval_num_traces = hyperparams.Hyperparameter[int](
        default=10,
        description='Traces to evaluate during inference.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    train_num_traces = hyperparams.Hyperparameter[int](
        default=5000000,
        description='Traces to evaluate per training step.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    use_pretrained = hyperparams.UniformBool(
        default=True,
        description="Whether to use pre-trained ImageNet weights",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
Exemplo n.º 12
0
class UnfoldHyperparams(hyperparams.Hyperparams):
    unfold_semantic_types = hyperparams.Set(
        elements=hyperparams.Hyperparameter[str]("str"),
        default=["https://metadata.datadrivendiscovery.org/types/PredictedTarget"],
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description=
        """
        A set of semantic types that the primitive will unfold.
        Only 'https://metadata.datadrivendiscovery.org/types/PredictedTarget' by default.
        """,
    )
    use_pipeline_id_semantic_type = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description=
        """
        Controls whether semantic_type will be used for finding pipeline id column in input dataframe.
        If true, it will look for 'https://metadata.datadrivendiscovery.org/types/PipelineId' for pipeline id column,
        and create attribute columns using header: attribute_{pipeline_id}. 
        eg. 'binaryClass_{a3180751-33aa-4790-9e70-c79672ce1278}'
        If false, create attribute columns using header: attribute_{0,1,2,...}.
        eg. 'binaryClass_0', 'binaryClass_1'
        """,
    )
Exemplo n.º 13
0
class FileReaderHyperparams(hyperparams.Hyperparams):
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column does not contain filenames for supported media types, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='append',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should columns with read files be appended, should they replace original columns, or should only columns with read files be returned?",
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
Exemplo n.º 14
0
class Hyperparams(hyperparams.Hyperparams):

    # Keep previous
    dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]](
        default=None,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.",
    )
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(2, ),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(
            0,
            1,
            3,
        ),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='append',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    return_semantic_type = hyperparams.Enumeration[str](
        values=[
            'https://metadata.datadrivendiscovery.org/types/Attribute',
            'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'
        ],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description=
        'Decides what semantic type to attach to generated attributes',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
Exemplo n.º 15
0
class Hyperparams(hyperparams.Hyperparams):
    # Tuning
    lags = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(1, ),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description="Set of lag indices to use in model.",
    )
    K = hyperparams.UniformInt(
        lower=0,
        upper=100000000,
        default=2,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Length of latent embedding dimension.",
    )
    lambda_f = hyperparams.Uniform(
        lower=0,
        upper=100000000,
        default=1.0,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Regularization parameter used for matrix F.",
    )
    lambda_x = hyperparams.Uniform(
        lower=0,
        upper=100000000,
        default=1.0,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Regularization parameter used for matrix X.",
    )
    lambda_w = hyperparams.Uniform(
        lower=0,
        upper=100000000,
        default=1.0,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Regularization parameter used for matrix W.",
    )
    alpha = hyperparams.Uniform(
        lower=0,
        upper=100000000,
        default=1000.0,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "Regularization parameter used for make the sum of lag coefficient close to 1. That helps to avoid big deviations when forecasting.",
    )
    eta = hyperparams.Uniform(
        lower=0,
        upper=100000000,
        default=1.0,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        "Regularization parameter used for X when undercovering autoregressive dependencies.",
    )
    max_iter = hyperparams.UniformInt(
        lower=0,
        upper=100000000,
        default=1000,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Number of iterations of updating matrices F, X and W.",
    )
    F_step = hyperparams.Uniform(
        lower=0,
        upper=100000000,
        default=0.0001,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Step of gradient descent when updating matrix F.",
    )
    X_step = hyperparams.Uniform(
        lower=0,
        upper=100000000,
        default=0.0001,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Step of gradient descent when updating matrix X.",
    )
    W_step = hyperparams.Uniform(
        lower=0,
        upper=100000000,
        default=0.0001,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description="Step of gradient descent when updating matrix W.",
    )

    # Control
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='append',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    return_semantic_type = hyperparams.Enumeration[str](
        values=[
            'https://metadata.datadrivendiscovery.org/types/Attribute',
            'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'
        ],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description=
        'Decides what semantic type to attach to generated attributes',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
Exemplo n.º 16
0
class Hyperparams(hyperparams.Hyperparams):
    weights_filepath = hyperparams.Hyperparameter[str](
        default='model_weights.h5',
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="weights of trained model will be saved to this filepath",
    )
    emb_dim = hyperparams.UniformInt(
        lower=8,
        upper=256,
        default=32,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "number of cells to use in the categorical embedding component of the model",
    )
    lstm_dim = hyperparams.UniformInt(
        lower=8,
        upper=256,
        default=32,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="number of cells to use in the lstm component of the model",
    )
    epochs = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=10,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="number of training epochs",
    )
    steps_per_epoch = hyperparams.UniformInt(
        lower=5,
        upper=200,
        default=10,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="number of steps to do per epoch",
    )
    early_stopping_patience = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "number of epochs to wait before invoking early stopping criterion",
    )
    early_stopping_delta = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=0,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "early stopping will interpret change of < delta in desired direction "
        + "will increment early stopping counter state",
    )
    learning_rate = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=1e-3,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="learning rate",
    )
    batch_size = hyperparams.UniformInt(
        lower=1,
        upper=256,
        default=64,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="batch size",
    )
    dropout_rate = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=0.2,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "dropout to use in lstm model (input and recurrent transform)",
    )
    count_data = hyperparams.Union[typing.Union[bool, None]](
        configuration=collections.OrderedDict(
            user_selected=hyperparams.UniformBool(default=True),
            auto_selected=hyperparams.Hyperparameter[None](default=None),
        ),
        default="auto_selected",
        description=
        "Whether we should label the target column as real or count (positive) "
        +
        "based on user input or automatic selection. For example, user might want to specify "
        +
        "positive only count data if target column is real-valued, but domain is > 0",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
    )
    window_size = hyperparams.UniformInt(
        lower=10,
        upper=sys.maxsize,
        default=20,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="window size of sampled time series in training process",
    )
    negative_obs = hyperparams.UniformInt(
        lower=0,
        upper=10,
        default=1,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "whether to sample time series with padded observations before t=0 in training ",
    )
    val_split = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=0,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "proportion of training records to set aside for validation. Ignored "
        + "if iterations flag in `fit` method is not None",
    )
    # seed_predictions_with_all_data = hyperparams.UniformBool(
    #     default=True,
    #     semantic_types=[
    #         "https://metadata.datadrivendiscovery.org/types/TuningParameter"
    #     ],
    #     description="whether to pass all batches of training data through model before making test predictions "
    #     + "otherwise only one batch of training data (of length window size) will be passed through model",
    # )
    confidence_interval_horizon = hyperparams.UniformInt(
        lower=1,
        upper=100,
        default=2,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "horizon for confidence interval forecasts. Exposed through auxiliary "
        + "'produce_confidence_intervals' method",
    )
    confidence_interval_alpha = hyperparams.Uniform(
        lower=0.01,
        upper=1,
        default=0.1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "significance level for confidence interval, i.e. alpha = 0.05 " +
        "returns a 95%% confdience interval from alpha / 2 to 1 - (alpha / 2) "
        + "Exposed through auxiliary 'produce_confidence_intervals' method ",
    )
    confidence_interval_samples = hyperparams.UniformInt(
        lower=1,
        upper=1000,
        default=100,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "number of samples to draw at each timestep, which will be used to calculate "
        + "confidence intervals",
    )
Exemplo n.º 17
0
class Hyperparams(hyperparams.Hyperparams):
    """
    Hyper-parameters for this primitive.
    """
    # Global Hyperparams
    global default_projdict
    default_projdict = OrderedDict()
    default_projdict['CCA'] = True

    nTrees = hyperparams.UniformInt(
        lower=1,
        upper=10000,
        default=100,
        description="Number of trees to create.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter',
            'https://metadata.datadrivendiscovery.org/types/ResourcesUseParameter',
        ],
    )
    parallelprocessing = hyperparams.UniformBool(
        default=True,
        description="Use multi-cpu processing.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    lambda_ = hyperparams.Enumeration[str](
        values=['log', 'sqrt', 'all'],
        default='log',
        description="Number of features to subsample at each node",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    splitCriterion = hyperparams.Enumeration[str](
        values=['mse'],
        default='mse',
        description=
        "Split criterion/impurity measure to use.  Default is 'mse' for Regression.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    minPointsLeaf = hyperparams.Hyperparameter[int](
        default=3,
        description=
        "Minimum number of points allowed a leaf node for split to be permitted.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    bSepPred = hyperparams.UniformBool(
        default=False,
        description=
        "Whether to predict each class seperately as a multilabel classification problem (True) or treat classes within the same output as mutually exclusive (False)",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    taskWeights = hyperparams.Enumeration[str](
        values=[
            'even', 'uneven'
        ],  # TODO: Add support for inputing weights list, currently only even supported.
        default='even',
        description=
        "Weights to apply to each output task in calculating the gain.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    bProjBoot = hyperparams.Enumeration[Union[bool, str]](
        values=['default', True, False],
        default='default',
        description=
        "Whether to use projection bootstrapping.  If set to default, then true unless lambda=D, i.e. we all features at each node.  In this case we resort to bagging instead of projection bootstrapping",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    bBagTrees = hyperparams.Enumeration[Union[bool, str]](
        values=['default', True, False],
        default='default',
        description=
        "Whether to use Breiman's bagging by training each tree on a bootstrap sample",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    projections = hyperparams.Hyperparameter[dict](
        default=default_projdict,
        description=
        "Whether to use projection bootstrapping.  If set to default, then true unless lambda=D, i.e. we all features at each node.  In this case we resort to bagging instead of projection bootstrapping",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    treeRotation = hyperparams.Enumeration[str](
        values=['none', 'pca', 'random', 'rotationForest'],
        default='none',
        description=
        'Pre-rotation to be applied to each tree seperately before rotating.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    propTrain = hyperparams.Bounded[float](
        lower=0.1,
        upper=1.0,
        default=1.0,
        description=
        "Proportion of the data to train each tree on, but for large datasets it may be possible to only use a subset of the data for training each tree.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    # Numerical stability options. Default values works for most cases
    epsilonCCA = hyperparams.Hyperparameter[float](
        default=1.0000e-04,
        description=
        "Tolerance parameter for rank reduction during the CCA. It can be desirable to lower if the data has extreme correlation, in which this finite value could eliminate the true signal",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    mseErrorTolerance = hyperparams.Hyperparameter[float](
        default=1e-6,
        description=
        " When doing regression with mse splits, the node is made into a leaf if the mse (i.e. variance) of the data is less  than this tolerance times the mse of the full data set.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    maxDepthSplit = hyperparams.Hyperparameter[str](
        default='stack',
        description=
        "Maximum depth of a node when splitting is still allowed. When set to 'stack' this is set to the maximum value that prevents crashes (usually ~500 which should never really be reached in sensible scenarios)",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    XVariationTol = hyperparams.Hyperparameter[float](
        default=1.0e-10,
        description=
        "Points closer than this tolerance (after scaling the data to unit standard deviation) are considered the same the avoid splitting on numerical error.  Rare would want to change.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    # Options that may want to be set if using algorithms building on CCFs
    RotForM = hyperparams.Hyperparameter[int](
        default=3,
        description=
        "Size of feature subsets taken for each rotation.  Default as per WEKA and rotation forest paper",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    RotForpS = hyperparams.Hyperparameter[float](
        default=0.7500,
        description=
        "Proportion of points to subsample for calculating each PCA projection.  Default as per WEKA but not rotation forest paper",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    RotForpClassLeaveOut = hyperparams.Hyperparameter[float](
        default=0.5000,
        description=
        "Proportion of classes to randomly eliminate for each PCA projection.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    # Properties that can be set but should generally be avoided, use Default works best.
    minPointsForSplit = hyperparams.Hyperparameter[int](
        default=6,
        description="Minimum points for parent node",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    dirIfEqual = hyperparams.Enumeration[str](
        values=['first', 'rand'],
        default='first',
        description=
        " When multiple projection vectors can give equivalent split criterion scores, one can either choose which to use randomly ('rand') or take the first ('first') on the basis that the components are in decreasing order of correlation for CCA.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    bContinueProjBootDegenerate = hyperparams.UniformBool(
        default=True,
        description=
        "In the scenario where the projection bootstrap makes the local data pure or have no X variation, the algorithm can either set the node to be a leaf or resort to using the original data for the CCA",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    multiTaskGainCombination = hyperparams.Enumeration[str](
        values=['mean', 'max'],
        default='mean',
        description=
        "Method for combining multiple gain metrics in multi-output tasks. Valid options are 'mean' (default) - average of the gains which for all the considered metrics is equal to the joint gain, or the 'max' gain on any of the tasks.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    missingValuesMethod = hyperparams.Enumeration[str](
        values=['mean', 'random'],
        default='random',
        description="Method for dealing with missing values.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    bUseOutputComponentsMSE = hyperparams.UniformBool(
        default=False,
        description=
        "If true, doing regression with multiple outputs and doing CCA projections.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    # Options that allow nonlinear features to be included in the CCA
    # in accordance with Lopez-Paz's randomized kernel cca.
    bRCCA = hyperparams.UniformBool(
        default=False,
        description=
        "Options that allow nonlinear features to be included in the CCA.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    rccaLengthScale = hyperparams.Hyperparameter[float](
        default=0.1000,
        description="Parameter for bRCCA, if set to True.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    rccaNFeatures = hyperparams.Hyperparameter[int](
        default=50,
        description="Parameter for bRCCA, if set to True.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    rccaRegLambda = hyperparams.Hyperparameter[float](
        default=1.0000e-03,
        description="Parameter for bRCCA, if set to True.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    rccaIncludeOriginal = hyperparams.UniformBool(
        default=False,
        description="Parameter for bRCCA, if set to True.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    # Inputs and outputs HyperParams
    use_inputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of inputs column indices to force primitive to operate on. If any specified column cannot be used, it is skipped.",
    )
    exclude_inputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of inputs column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    use_outputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of outputs column indices to force primitive to operate on. If any specified column cannot be used, it is skipped.",
    )
    exclude_outputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of outputs column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default=
        'append',  # Default value depends on the nature of the primitive.
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should resulting columns be appended, should they replace original columns, or should only resulting columns be returned?",
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Throw an exception if no column is selected/provided. Otherwise issue a warning.",
    )
Exemplo n.º 18
0
class Hyperparams(hyperparams.Hyperparams):
    max_lag_order = hyperparams.Union[Union[int, None]](
        configuration=collections.OrderedDict(
            user_selected=hyperparams.UniformInt(lower=0, upper=100,
                                                 default=1),
            auto_selected=hyperparams.Hyperparameter[None](
                default=None,
                description="Lag order of regressions automatically selected",
            ),
        ),
        default="user_selected",
        description=
        "The lag order to apply to regressions. If user-selected, the same lag will be "
        +
        "applied to all regressions. If auto-selected, different lags can be selected for different "
        + "regressions.",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
    )
    seasonal = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to perform ARIMA prediction with seasonal component",
    )
    seasonal_differencing = hyperparams.UniformInt(
        lower=1,
        upper=365,
        default=1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "period of seasonal differencing to use in ARIMA prediction",
    )
    dynamic = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to perform dynamic in-sample prediction with ARIMA model",
    )
    interpret_value = hyperparams.Enumeration(
        default="lag_order",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["series", "lag_order"],
        description=
        "whether to return weight coefficients for each series or each lag order "
        + "separately in the regression",
    )
    interpret_pooling = hyperparams.Enumeration(
        default="avg",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        values=["avg", "max"],
        description="whether to pool weight coefficients via average or max",
    )
    confidence_interval_horizon = hyperparams.UniformInt(
        lower=1,
        upper=100,
        default=2,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "horizon for confidence interval forecasts. Exposed through auxiliary "
        + "'produce_confidence_intervals' method",
    )
    confidence_interval_alpha = hyperparams.Uniform(
        lower=0.01,
        upper=1,
        default=0.1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "significance level for confidence interval, i.e. alpha = 0.05 " +
        "returns a 95%% confdience interval from alpha / 2 to 1 - (alpha / 2) . "
        + "Exposed through auxiliary 'produce_confidence_intervals' method",
    )
Exemplo n.º 19
0
class Hyperparams(Hyperparams_ODBase):
    ######## Add more Hyperparamters #######

    svd_solver = hyperparams.Enumeration[str](
        values=['auto', 'full', 'arpack', 'randomized'],
        default='auto',
        description='Algorithm of solver.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )

    n_components = hyperparams.Union[Union[int, None]](
        configuration=OrderedDict(
            init=hyperparams.Hyperparameter[int](
                default=1, # {},
            ),
            ninit=hyperparams.Hyperparameter[None](
                default=None,
            ),
        ),
        default='ninit',
        description='Number of components to keep. It should be smaller than the window_size.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
    )

    #     hyperparams.Hyperparameter[int](
    #     default=1,
    #     description='Number of components to keep. It should be smaller than the window_size.',
    #     semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    # )

    n_selected_components = hyperparams.Union[Union[int, None]](
        configuration=OrderedDict(
            init=hyperparams.Hyperparameter[int](
                default=1, # {},
            ),
            ninit=hyperparams.Hyperparameter[None](
                default=None,
            ),
        ),
        default='ninit',
        description='Number of selected principal components for calculating the outlier scores. It is not necessarily equal to the total number of the principal components. If not set, use all principal components.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
    )

    tol = hyperparams.Hyperparameter[float](
        default=0.,
        description='Tolerance for singular values computed by svd_solver == `arpack`.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )

    iterated_power = hyperparams.Union[Union[int, str]](
        configuration=OrderedDict(
            init=hyperparams.Hyperparameter[int](
                default=1, # {},
            ),
            ninit=hyperparams.Hyperparameter[str](
                default='auto',
            ),
        ),
        default='ninit',
        description='Number of iterations for the power method computed by svd_solver == `randomized`.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
    )

    random_state = hyperparams.Union[Union[int, None]](
        configuration=OrderedDict(
            init=hyperparams.Hyperparameter[int](
                default=0,
            ),
            ninit=hyperparams.Hyperparameter[None](
                default=None,
            ),
        ),
        default='ninit',
        description='the seed used by the random number generator.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
    )

    whiten = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
        description="If True, the eigenvalues are used in score computation. The eigenvectors with small eigenvalues comes with more importance in outlier score calculation.",
    )

    standardization = hyperparams.UniformBool(
            default=True,
            semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            description="If True, perform standardization first to convert data to zero mean and unit variance.",
    )

    pass
class Hyperparams(hyperparams.Hyperparams):
    hidden_layer_sizes = hyperparams.List(
        elements=hyperparams.Bounded(1, None, 100),
        default=(100, ),
        min_size=1,
        max_size=None,
        description='The ith element represents the number of neurons in the ith hidden layer.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    activation = hyperparams.Enumeration[str](
        values=['identity', 'logistic', 'tanh', 'relu'],
        default='relu',
        description='Activation function for the hidden layer.  - \'identity\', no-op activation, useful to implement linear bottleneck, returns f(x) = x  - \'logistic\', the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)).  - \'tanh\', the hyperbolic tan function, returns f(x) = tanh(x).  - \'relu\', the rectified linear unit function, returns f(x) = max(0, x)',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    solver = hyperparams.Choice(
        choices={
            'lbfgs': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'max_fun': hyperparams.Bounded[int](
                        default=15000,
                        lower=1,
                        upper=None,
                        description='Maximum number of loss function calls',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            ),
            'sgd': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'learning_rate': hyperparams.Enumeration[str](
                        values=['constant', 'invscaling', 'adaptive'],
                        default='constant',
                        description='Learning rate schedule for weight updates. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'learning_rate_init': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.001,
                        description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'power_t': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.5,
                        description='The exponent for inverse scaling learning rate. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'shuffle': hyperparams.UniformBool(
                        default=True,
                        description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'momentum': hyperparams.Bounded[float](
                        default=0.9,
                        lower=0,
                        upper=1,
                        description='Momentum for gradient descent update. Should be between 0 and 1. Only used when solver=’sgd’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'nesterovs_momentum': hyperparams.UniformBool(
                        default=True,
                        description='Whether to use Nesterov’s momentum. Only used when solver=’sgd’ and momentum > 0.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'early_stopping': hyperparams.UniformBool(
                        default=False,
                        description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'n_iter_no_change': hyperparams.Bounded[int](
                        default=10,
                        lower=1,
                        upper=None,
                        description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            ),
            'adam': hyperparams.Hyperparams.define(
                configuration=OrderedDict({
                    'learning_rate_init': hyperparams.Bounded[float](
                        lower=0,
                        upper=None,
                        default=0.001,
                        description='The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'shuffle': hyperparams.UniformBool(
                        default=True,
                        description='Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'early_stopping': hyperparams.UniformBool(
                        default=False,
                        description='Whether to use early stopping to terminate training when validation score is not improving.If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for n_iter_no_change consecutive epochs.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'beta_1': hyperparams.Bounded[float](
                        default=0.9,
                        lower=0,
                        upper=1,
                        description='Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1).',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'beta_2': hyperparams.Bounded[float](
                        default=0.999,
                        lower=0,
                        upper=1,
                        description='Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1).',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'epsilon': hyperparams.Bounded[float](
                        default=1e-08,
                        lower=0,
                        upper=None,
                        description='Value for numerical stability in adam. Only used when solver=’adam’',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    ),
                    'n_iter_no_change': hyperparams.Bounded[int](
                        default=10,
                        lower=1,
                        upper=None,
                        description='Maximum number of epochs to not meet tol improvement. Only effective when solver=’sgd’ or ‘adam’.',
                        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
                    )
                })
            )
        },
        default='adam',
        description='The solver for weight optimization.  - \'lbfgs\' is an optimizer in the family of quasi-Newton methods.  - \'sgd\' refers to stochastic gradient descent.  - \'adam\' refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba  Note: The default solver \'adam\' works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, \'lbfgs\' can converge faster and perform better.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    alpha = hyperparams.Bounded[float](
        lower=0,
        upper=None,
        default=0.0001,
        description='L2 penalty (regularization term) parameter.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    batch_size = hyperparams.Union(
        configuration=OrderedDict({
            'int': hyperparams.Bounded[int](
                lower=0,
                upper=None,
                default=16,
                description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch',
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            ),
            'auto': hyperparams.Constant(
                default='auto',
                description='When set to \'auto\', batch_size=min(200, n_samples)',
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            )
        }),
        default='auto',
        description='Size of minibatches for stochastic optimizers. If the solver is \'lbfgs\', the classifier will not use minibatch. When set to "auto", `batch_size=min(200, n_samples)`',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    max_iter = hyperparams.Bounded[int](
        lower=0,
        upper=None,
        default=200,
        description='Maximum number of iterations. The solver iterates until convergence (determined by \'tol\') or this number of iterations. For stochastic solvers (\'sgd\', \'adam\'), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    tol = hyperparams.Bounded[float](
        default=0.0001,
        lower=0,
        upper=None,
        description='Tolerance for the optimization. When the loss or score is not improving by at least ``tol`` for ``n_iter_no_change`` consecutive iterations, unless ``learning_rate`` is set to \'adaptive\', convergence is considered to be reached and training stops.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    warm_start = hyperparams.UniformBool(
        default=False,
        description='When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. See :term:`the Glossary <warm_start>`.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    validation_fraction = hyperparams.Bounded[float](
        default=0.1,
        lower=0,
        upper=None,
        description='The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    
    use_inputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to use as training input. If any specified column cannot be parsed, it is skipped.",
    )
    use_outputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to use as training target. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_inputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not use as training inputs. Applicable only if \"use_columns\" is not provided.",
    )
    exclude_outputs_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not use as training target. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    
    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget'],
        default='https://metadata.datadrivendiscovery.org/types/PredictedTarget',
        description='Decides what semantic type to attach to generated output',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
Exemplo n.º 21
0
class Hyperparams(hyperparams.Hyperparams):
    """
    Hyper-parameters for this primitive.
    """
    use_pretrained = hyperparams.UniformBool(
        default=True,
        description="Whether to use pre-trained ImageNet weights",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    train_endToend = hyperparams.UniformBool(
        default=False,
        description=
        "Whether to train the network end to end or fine-tune the last layer only.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    use_batch_norm = hyperparams.UniformBool(
        default=False,
        description="Whether to use batch norm for VGG network",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    feature_extract_only = hyperparams.UniformBool(
        default=True,
        description=
        "Whether to use CNN as feature extraction only without training",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    include_top = hyperparams.UniformBool(
        default=True,
        description=
        "Whether to use top layers, i.e. final fully connected layers",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    img_resize = hyperparams.Hyperparameter[int](
        default=224,
        description="Size to resize the input image",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    output_dim = hyperparams.Hyperparameter[int](
        default=1000,
        description='Dimensions of CNN output.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    last_activation_type = hyperparams.Enumeration[str](
        values=['linear', 'relu', 'tanh', 'sigmoid', 'softmax'],
        default='linear',
        description=
        'Type of activation (non-linearity) following the last layer.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    cnn_type = hyperparams.Enumeration[str](
        values=['vgg', 'googlenet', 'mobilenet', 'resnet'],
        default='resnet',
        description='Type of convolutional neural network to use.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
    )
    loss_type = hyperparams.Enumeration[str](
        values=['mse', 'crossentropy', 'l1'],
        default='mse',
        description=
        'Type of loss used for the local training (fit) of this primitive.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    optimizer_type = hyperparams.Enumeration[str](
        values=['adam', 'sgd'],
        default='adam',
        description='Type of optimizer used during training (fit).',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    minibatch_size = hyperparams.Hyperparameter[int](
        default=32,
        description='Minibatch size used during training (fit).',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
    learning_rate = hyperparams.Hyperparameter[float](
        default=0.0001,
        description='Learning rate used during training (fit).',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])
    momentum = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=0.9,
        description=
        'Momentum used during training (fit), only for optimizer_type sgd.')
    weight_decay = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=0.0001,
        description=
        'Weight decay (L2 regularization) used during training (fit).')
    shuffle = hyperparams.UniformBool(
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=True,
        description='Shuffle minibatches in each epoch of training (fit).')
    fit_threshold = hyperparams.Hyperparameter[float](
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        default=1e-5,
        description='Threshold of loss value to early stop training (fit).')
    num_iterations = hyperparams.Hyperparameter[int](
        default=100,
        description="Number of iterations to train the model.",
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )
Exemplo n.º 22
0
class Hyperparams(Hyperparams_ODBase):
    ######## Add more Hyperparamters #######

    n_estimators = hyperparams.Hyperparameter[int](
        default=100,
        description='The number of base estimators in the ensemble.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    max_samples = hyperparams.Enumeration[str](
        values=['auto', 'int', 'float'],
        default='auto',  # 'box-cox', #
        description=
        'The number of samples to draw from X to train each base estimator.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    max_features = hyperparams.Hyperparameter[float](
        default=1.,
        description=
        'The number of features to draw from X to train each base estimator.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    bootstrap = hyperparams.UniformBool(
        default=False,
        description=
        'If True, individual trees are fit on random subsets of the training data sampled with replacement. If False, sampling without replacement is performed.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    behaviour = hyperparams.Enumeration[str](
        values=['old', 'new'],
        default='new',
        description=
        'Refer to https://github.com/yzhao062/pyod/blob/master/pyod/models/iforest.py.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    random_state = hyperparams.Union[Union[int, None]](
        configuration=OrderedDict(
            init=hyperparams.Hyperparameter[int](default=0, ),
            ninit=hyperparams.Hyperparameter[None](default=None, ),
        ),
        default='ninit',
        description='the seed used by the random number generator.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )

    verbose = hyperparams.Hyperparameter[int](
        default=0,
        description='Controls the verbosity of the tree building process.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    pass
Exemplo n.º 23
0
class Hyperparams(hyperparams.Hyperparams):
    n_jobs = hyperparams.Hyperparameter[int](
        default=-1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The value of the n_jobs parameter for the joblib library",
    )
    left_col = hyperparams.Union[typing.Union[str, typing.Sequence[str]]](
        configuration=collections.OrderedDict(
            set=hyperparams.Set(
                elements=hyperparams.Hyperparameter[str](
                    default="",
                    semantic_types=[
                        "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                    ],
                    description="Name of the column.",
                ),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
            ),
            str=hyperparams.Hyperparameter[str](
                default="",
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="Name of the column.",
            ),
        ),
        default="str",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Columns to join on from left dataframe",
    )
    right_col = hyperparams.Union[typing.Union[str, typing.Sequence[str]]](
        configuration=collections.OrderedDict(
            set=hyperparams.Set(
                elements=hyperparams.Hyperparameter[str](
                    default="",
                    semantic_types=[
                        "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                    ],
                    description="Name of the column.",
                ),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
            ),
            str=hyperparams.Hyperparameter[str](
                default="",
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="Name of the column.",
            ),
        ),
        default="str",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Columns to join on from right dataframe",
    )
    accuracy = hyperparams.Union[typing.Union[float, typing.Sequence[float]]](
        configuration=collections.OrderedDict(
            set=hyperparams.List(
                elements=hyperparams.Hyperparameter[float](-1),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A list of accuracies, corresponding respectively to the columns to join on.",
            ),
            float=hyperparams.Hyperparameter[float](0),
        ),
        default="float",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Required accuracy of join ranging from 0.0 to 1.0, where 1.0 is an exact match.",
    )
    join_type = hyperparams.Enumeration[str](
        default="left",
        values=("left", "right", "outer", "inner", "cross"),
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="The type of join between two dataframes.",
    )
    absolute_accuracy = hyperparams.Union[typing.Union[bool, typing.Sequence[bool]]](
        configuration=collections.OrderedDict(
            set=hyperparams.List(
                elements=hyperparams.UniformBool(False),
                default=(),
                semantic_types=[
                    "https://metadata.datadrivendiscovery.org/types/ControlParameter"
                ],
                description="A list of flags for absolute values, corresponding respectively to the columns to join on.",
            ),
            bool=hyperparams.UniformBool(False),
        ),
        default="bool",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="Used for numeric to use absolute comparison instead of percentage.",
    )
Exemplo n.º 24
0
class Hyperparams(Hyperparams_ODBase):
    ######## Add more Hyperparamters #######

    kernel = hyperparams.Enumeration[str](
        values=['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
        default='rbf',
        description='Specifies the kernel type to be used in the algorithm.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    nu = hyperparams.Uniform(
        lower=0.,
        upper=1.,
        default=0.5,
        description=
        'An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    degree = hyperparams.Hyperparameter[int](
        default=3,
        description=
        'Degree of the polynomial kernel function (poly). Ignored by all other kernels.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    gamma = hyperparams.Union[Union[float, str]](
        configuration=OrderedDict(
            init=hyperparams.Hyperparameter[float](default=0., ),
            ninit=hyperparams.Hyperparameter[str](default='auto', ),
        ),
        default='ninit',
        description=
        'Kernel coefficient for rbf, poly and sigmoid. If gamma is auto then 1/n_features will be used instead.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
    )

    coef0 = hyperparams.Hyperparameter[float](
        default=0.,
        description=
        'Independent term in kernel function. It is only significant in poly and sigmoid.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    tol = hyperparams.Hyperparameter[float](
        default=0.001,
        description='Tolerance for stopping criterion.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    shrinking = hyperparams.UniformBool(
        default=True,
        description='Whether to use the shrinking heuristic.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    cache_size = hyperparams.Hyperparameter[int](
        default=200,
        description='Specify the size of the kernel cache (in MB).',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    verbose = hyperparams.UniformBool(
        default=False,
        description=
        'Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    max_iter = hyperparams.Hyperparameter[int](
        default=-1,
        description=
        'Hard limit on iterations within solver, or -1 for no limit.',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ])

    pass
Exemplo n.º 25
0
class Hyperparams(hyperparams.Hyperparams):
    weights_filepath = hyperparams.Hyperparameter[str](
        default='model_weights.pth',
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="weights of trained model will be saved to this filepath",
    )
    image_dim = hyperparams.UniformInt(
        lower=1,
        upper=512,
        default=120,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="input dimension of image (height and width)",
    )
    feature_dim = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=2048,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "feature dimension after reshaping flattened feature vector",
    )
    batch_size = hyperparams.UniformInt(
        lower=1,
        upper=512,
        default=256,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="training and inference batch size",
    )
    epochs = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=25,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter'
        ],
        description=
        'how many epochs for which to finetune classification head (happens first)'
    )
    learning_rate = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=0.1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="learning rate",
    )
    # explanation_method = hyperparams.Enumeration(
    #     default = 'gradcam',
    #     semantic_types = [
    #         'https://metadata.datadrivendiscovery.org/types/ControlParameter'
    #     ],
    #     values = [
    #         'gradcam',
    #         'gradcam-gbprop'
    #     ],
    #     description = 'Determines whether the output is a dataframe with just predictions,\
    #         or an additional feature added to the input dataframe.'
    # )
    explain_all_classes = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to return explanations for all classes or only the predicted class"
    )
    all_confidences = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to return explanations all classes and all confidences from produce method"
    )
Exemplo n.º 26
0
class IQRHyperparams(hyperparams.Hyperparams):
    quantile_range_lowerbound = hyperparams.Uniform(
        lower=0.0,
        upper=25.0,
        default=25.0,
        upper_inclusive=True,
        description="IQR - Quantile range used to calculate scale",
        semantic_types=[
            "http://schema.org/Float",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    quantile_range_upperbound = hyperparams.Uniform(
        lower=75.0,
        upper=100.0,
        default=75.0,
        upper_inclusive=True,
        description="IQR - Quantile range used to calculate scale",
        semantic_types=[
            "http://schema.org/Float",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    with_centering = hyperparams.UniformBool(
        default=True,
        description=" If True, center the data before scaling ",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    with_scaling = hyperparams.UniformBool(
        default=True,
        description="If True, scale the data to unit variance "
        "(or equivalently, unit standard deviation).",
        semantic_types=[
            "http://schema.org/Boolean",
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ])

    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='replace',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
Exemplo n.º 27
0
class Hyperparams(hyperparams.Hyperparams):
    weights_filepath = hyperparams.Hyperparameter[str](
        default="model_weights.pth",
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="weights of trained model will be saved to this filepath",
    )
    image_dim = hyperparams.UniformInt(
        lower=1,
        upper=512,
        default=120,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description="input dimension of image (height and width)",
    )
    feature_dim = hyperparams.UniformInt(
        lower=1,
        upper=sys.maxsize,
        default=2048,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "feature dimension after reshaping flattened feature vector",
    )
    batch_size = hyperparams.UniformInt(
        lower=1,
        upper=512,
        default=256,
        upper_inclusive=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="training and inference batch size",
    )
    epochs = hyperparams.UniformInt(
        lower=0,
        upper=sys.maxsize,
        default=25,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description=
        "how many epochs for which to finetune classification head (happens first)",
    )
    learning_rate = hyperparams.Uniform(
        lower=0.0,
        upper=1.0,
        default=0.1,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/TuningParameter"
        ],
        description="learning rate",
    )
    explain_all_classes = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to return explanations for all classes or only the predicted class",
    )
    all_confidences = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            "https://metadata.datadrivendiscovery.org/types/ControlParameter"
        ],
        description=
        "whether to return explanations for all classes and all confidences from produce method",
    )
Exemplo n.º 28
0
class Hyperparams(hyperparams.Hyperparams):
    missing_values = hyperparams.Union(
        configuration=OrderedDict({
            'int': hyperparams.Hyperparameter[int](
                default=0,
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            ),
            'float': hyperparams.Hyperparameter[float](
                default=numpy.nan,
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            )
        }),
        default='float',
        description='The placeholder for the missing values. All occurrences of `missing_values` will be imputed.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
    strategy = hyperparams.Enumeration[str](
        default='mean',
        values=['median', 'most_frequent', 'mean', 'constant'],
        description='The imputation strategy.  - If "mean", then replace missing values using the mean along each column. Can only be used with numeric data. - If "median", then replace missing values using the median along each column. Can only be used with numeric data. - If "most_frequent", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. - If "constant", then replace missing values with fill_value. Can be used with strings or numeric data.  .. versionadded:: 0.20 strategy="constant" for fixed value imputation.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    add_indicator = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    fill_value = hyperparams.Union(
        configuration=OrderedDict({
            'int': hyperparams.Hyperparameter[int](
                default=0,
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            ),
            'none': hyperparams.Constant(
                default=None,
                semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'],
            )
        }),
        default='none',
        description='When strategy == "constant", fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and "missing_value" for strings or object data types.',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter']
    )
    
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='new',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'],
        description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )
    
    return_semantic_type = hyperparams.Enumeration[str](
        values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description='Decides what semantic type to attach to generated attributes',
        semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter']
    )
Exemplo n.º 29
0
class Hyperparams(hyperparams.Hyperparams):

    #Tuning Parameter
    #default -1 considers entire time series is considered
    window_size = hyperparams.Hyperparameter(
        default=-1,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/TuningParameter',
        ],
        description="Window Size for decomposition")
    #control parameter
    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='append',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )
    error_on_no_input = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.",
    )

    return_semantic_type = hyperparams.Enumeration[str](
        values=[
            'https://metadata.datadrivendiscovery.org/types/Attribute',
            'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'
        ],
        default='https://metadata.datadrivendiscovery.org/types/Attribute',
        description=
        'Decides what semantic type to attach to generated attributes',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
class CleaningFeaturizerHyperparameter(hyperparams.Hyperparams):
    features = hyperparams.Hyperparameter[Union[str, None]](
        None,
        description=
        'Select one or more operations to perform: "split_date_column", "split_phone_number_column", "split_alpha_numeric_column", "split_multi_value_column"',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])

    split_on_column_with_avg_len = hyperparams.Uniform(
        default=30,
        lower=10,
        upper=100,
        upper_inclusive=True,
        description=
        'Threshold of avg column length for splitting punctuation or alphanumeric',
        semantic_types=[
            'http://schema.org/Integer',
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])

    num_threshold = hyperparams.Uniform(
        default=0.1,
        lower=0.1,
        upper=0.5,
        upper_inclusive=True,
        description='Threshold for number character density of a column',
        semantic_types=[
            'http://schema.org/Float',
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])
    common_threshold = hyperparams.Uniform(
        default=0.9,
        lower=0.7,
        upper=0.9,
        upper_inclusive=True,
        description='Threshold for rows containing specific punctuation',
        semantic_types=[
            'http://schema.org/Float',
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ])

    use_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.",
    )
    exclude_columns = hyperparams.Set(
        elements=hyperparams.Hyperparameter[int](-1),
        default=(),
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.",
    )
    return_result = hyperparams.Enumeration(
        values=['append', 'replace', 'new'],
        default='replace',
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.",
    )
    use_semantic_types = hyperparams.UniformBool(
        default=False,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe"
    )
    add_index_columns = hyperparams.UniformBool(
        default=True,
        semantic_types=[
            'https://metadata.datadrivendiscovery.org/types/ControlParameter'
        ],
        description=
        "Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".",
    )