class Hyperparams(hyperparams.Hyperparams): n_neighbors = hyperparams.UniformInt( lower=0, upper=sys.maxsize, default=5, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "number of neighbors on which to make classification decision", ) distance_metric = hyperparams.Enumeration( default="euclidean", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], values=["euclidean", "dtw"], description= "whether to use euclidean or dynamic time warping distance metric in KNN computation", ) sample_weighting = hyperparams.Enumeration( default="uniform", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], values=["uniform", "inverse_distance"], description= "whether to weight points uniformly or by the inverse of their distance", )
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default="HDBSCAN", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["DBSCAN", "HDBSCAN"], description="type of clustering algorithm to use", ) eps = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=0.5, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "maximum distance between two samples for them to be considered as in \ the same neigborhood, used in DBSCAN algorithm", ) min_cluster_size = hyperparams.UniformInt( lower=2, upper=sys.maxsize, default=5, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="the minimum size of clusters", ) min_samples = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=5, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "The number of samples in a neighbourhood for a point to be considered a core point.", ) cluster_selection_method = hyperparams.Enumeration( default="eom", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], values=["leaf", "eom"], description= "Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN", ) required_output = hyperparams.Enumeration( default="feature", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["prediction", "feature"], description= "Determines whether the output is a dataframe with just predictions,\ or an additional feature added to the input dataframe.", )
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='HDBSCAN', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['DBSCAN', 'HDBSCAN'], description='type of clustering algorithm to use') eps = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=0.5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'maximum distance between two samples for them to be considered as in the same neigborhood, \ used in DBSCAN algorithm') min_cluster_size = hyperparams.UniformInt( lower=2, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='the minimum size of clusters') min_samples = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'The number of samples in a neighbourhood for a point to be considered a core point.' ) cluster_selection_method = hyperparams.Enumeration( default='eom', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], values=['leaf', 'eom'], description= 'Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN' ) required_output = hyperparams.Enumeration( default='feature', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['prediction', 'feature'], description= 'Determines whether the output is a dataframe with just predictions,\ or an additional feature added to the input dataframe.') pass
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='HDBSCAN', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['DBSCAN', 'HDBSCAN'], description='type of clustering algorithm to use') eps = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=0.5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'maximum distance between two samples for them to be considered as in the same neigborhood, \ used in DBSCAN algorithm') min_cluster_size = hyperparams.UniformInt( lower=2, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='the minimum size of clusters') min_samples = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'The number of samples in a neighbourhood for a point to be considered a core point.' ) long_format = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "whether the input dataset is already formatted in long format or not") cluster_selection_method = hyperparams.Enumeration( default='eom', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], values=['leaf', 'eom'], description= 'Determines how clusters are selected from the cluster hierarchy tree for HDBSCAN' ) pass
class Hyperparams(hyperparams.Hyperparams): n_clusters = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=8, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='The dimension of the projection space') n_init = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=10, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'Number of times the k-means algorithm will be run with different centroid seeds' ) n_neighbors = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=10, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'Number of neighbors when constructing the affintiy matrix using n-neighbors, ignored for affinity="rbf"' ) affinity = hyperparams.Enumeration( default='rbf', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], values=['rbf', 'nearest_neighbors'], description='method to construct affinity matrix') task_type = hyperparams.Enumeration( default='classification', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['clustering', 'classification'], description= 'Determines whether the output is a dataframe with just predictions,\ or an additional feature added to the input dataframe.') pass
class Hyperparams(hyperparams.Hyperparams): # Added by Guanchu with_mean = hyperparams.UniformBool( default=True, description='If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) with_std = hyperparams.UniformBool( default=True, description='If True, scale the data to unit variance (or equivalently, unit standard deviation).', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) # copy = hyperparams.UniformBool( # default=True, # description='If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned.', # semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] # ) # Keep previous use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='new', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", ) error_on_no_input = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", ) return_semantic_type = hyperparams.Enumeration[str]( values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], default='https://metadata.datadrivendiscovery.org/types/Attribute', description='Decides what semantic type to attach to generated attributes', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] )
class Hyperparams(hyperparams.Hyperparams): reduce_method = hyperparams.Enumeration( default = 'pca', semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], values = ['pca', 'svd'], description = 'dimensionality reduction method that is applied to feature vectors' ) reduce_dimension = hyperparams.UniformInt( lower=0, upper=1024, default=128, upper_inclusive=True, semantic_types=["https://metadata.datadrivendiscovery.org/types/ControlParameter"], description="number of dimensions in reduced feature vectors", ) gem_p = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=1, upper_inclusive=True, semantic_types=["https://metadata.datadrivendiscovery.org/types/TuningParameter"], description="parameter p in generalized mean pooling; p > 1 increases the constrast of the \ pooled feature map; p = 1 equivalent to average pooling; p = +inf equivalent to \ max pooling.", )
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default="TimeSeriesKMeans", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["GlobalAlignmentKernelKMeans", "TimeSeriesKMeans"], description="type of clustering algorithm to use", ) nclusters = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=3, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="number of clusters to user in kernel kmeans algorithm", ) n_init = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=10, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "Number of times the k-means algorithm will be run with different centroid seeds. \ Final result will be the best output on n_init consecutive runs in terms of inertia", ) time_col_index = hyperparams.Hyperparameter[Union[int, None]]( default=None, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Index of column in input dataframe containing timestamps.", ) value_col_index = hyperparams.Hyperparameter[Union[int, None]]( default=None, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Index of column in input dataframe containing the values associated with the timestamps.", ) grouping_col_index = hyperparams.Hyperparameter[Union[int, None]]( default=None, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Index of column in input dataframe containing the values used to mark timeseries groups", ) output_col_name = hyperparams.Hyperparameter[str]( default="__cluster", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Name to assign to cluster column that is appended to the input dataset", )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "A set of column indices to force primitive to operate on. If any specified column does not contain filenames for supported media types, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= 'A set of column indices to not operate on. Applicable only if "use_columns" is not provided.', ) return_result = hyperparams.Enumeration( values=["append", "replace", "new"], default="append", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Should columns with read files be appended, should they replace original columns, or should only columns with read files be returned?", ) add_index_columns = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= 'Also include primary index columns if input data has them. Applicable only if "return_result" is set to "new".', ) compress_data = hyperparams.Hyperparameter[bool]( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="If True, applies LZO compression algorithm to the data.\ Store a header consisting of the dtype character and the data shape as unsigned integers.\ Given c struct alignment, will occupy 16 bytes (1 + 4 + 4 + 4 + 3 ) padding", ) n_jobs = hyperparams.Hyperparameter[int]( default=64, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="The value of the n_jobs parameter for the joblib library", ) band_column = hyperparams.Hyperparameter[str]( default="band", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "Name of the band column used if no band semantic type is present.", )
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='GlobalAlignmentKernelKMeans', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans'], description='type of clustering algorithm to use') nclusters = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=3, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='number of clusters \ to user in kernel kmeans algorithm') long_format = hyperparams.UniformBool( default=False, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "whether the input dataset is already formatted in long format or not") pass
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='TimeSeriesKMeans', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans'], description='type of clustering algorithm to use') nclusters = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=3, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='number of clusters \ to user in kernel kmeans algorithm') n_init = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=10, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='Number of times the k-means algorithm \ will be run with different centroid seeds. Final result will be the best output on n_init consecutive runs in terms of inertia' ) pass
class VoterHyperparameter(hyperparams.Hyperparams): classifier_voting_strategy = hyperparams.Enumeration( values=['random', 'majority'], default='majority', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="For classification problem, pick a prediction result if there are multiple results based on which strategy" )
class UEncHyperparameter(hyperparams.Hyperparams): text2int = hyperparams.UniformBool( default=False, description='Whether to convert everything to numerical. For text columns, each row may get converted into a column', semantic_types=['http://schema.org/Boolean', 'https://metadata.datadrivendiscovery.org/types/ControlParameter']) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='replace', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", )
class Hyperparams(hyperparams.Hyperparams): metric = hyperparams.Enumeration[str]( values=classification_metrics + regression_metrics + clustering_metrics, default="f1Macro", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "The D3M scoring metric to use during the fit phase. This can be any of the regression, classification or " + "clustering metrics.", ) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) encoder_type = hyperparams.Enumeration( default="svm", values=["svm", "tfidf"], semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Vectorization Strategy.", )
class Hyperparams(hyperparams.Hyperparams): geocoding_resolution = hyperparams.Enumeration( default="city", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], values=["city", "country", "state", "postcode"], description="type of clustering algorithm to use", ) rampup_timeout = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=100, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "timeout, how much time to give elastic search database to startup, may vary based on infrastructure", ) cache_size = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=2000, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="LRU cache size", )
class EnsembleVotingHyperparams(hyperparams.Hyperparams): ensemble_method = hyperparams.Enumeration( values=['majority', 'mean', 'max', 'min', 'median', 'random'], default='majority', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description="Controls which ensemble method should be used", )
class Hyperparams(hyperparams.Hyperparams): # Added by Mia endog = hyperparams.Bounded[int]( lower = 2, upper = None, default = 3, description='Array like time seires.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) # keep previous norm = hyperparams.Enumeration[str]( default='l2', values=['l1', 'l2', 'max'], description='The norm to use to normalize each non zero sample.', semantic_types=['https://metadata.datadrivendiscovery.org/types/TuningParameter'] ) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='new', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", ) error_on_no_input = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", ) return_semantic_type = hyperparams.Enumeration[str]( values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], default='https://metadata.datadrivendiscovery.org/types/Attribute', description='Decides what semantic type to attach to generated attributes', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] )
class MetricsHyperparams(hyperparams.Hyperparams, set_names=False): metric = hyperparams.Enumeration( values=[metric.name for metric in problem.PerformanceMetric], # Default is ignored. # TODO: Remove default. See: https://gitlab.com/datadrivendiscovery/d3m/issues/141 default='ACCURACY', ) pos_label = hyperparams.Hyperparameter[typing.Union[str, None]](None) k = hyperparams.Hyperparameter[typing.Union[int, None]](None)
def setup(self): self.numerical = hyperparams.Uniform( lower=0, upper=1, default=0.5, ) self.enumeration = hyperparams.Enumeration( values=list(range(1000)), default=0, )
class Hyperparams(hyperparams.Hyperparams): window_size = hyperparams.UniformInt( lower = 0, upper = 100, #TODO: Define the correct the upper bound default=50, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="window size to calculate" ) # Keep previous dataframe_resource = hyperparams.Hyperparameter[typing.Union[str, None]]( default=None, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Resource ID of a DataFrame to extract if there are multiple tabular resources inside a Dataset and none is a dataset entry point.", ) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(2,), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(0,1,3,), semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='new', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) use_semantic_types = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Controls whether semantic_types metadata will be used for filtering columns in input dataframe. Setting this to false makes the code ignore return_result and will produce only the output dataframe" ) add_index_columns = hyperparams.UniformBool( default=False, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Also include primary index columns if input data has them. Applicable only if \"return_result\" is set to \"new\".", ) error_on_no_input = hyperparams.UniformBool( default=True, semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="Throw an exception if no input column is selected/provided. Defaults to true to behave like sklearn. To prevent pipelines from breaking set this to False.", ) return_semantic_type = hyperparams.Enumeration[str]( values=['https://metadata.datadrivendiscovery.org/types/Attribute', 'https://metadata.datadrivendiscovery.org/types/ConstructedAttribute'], default='https://metadata.datadrivendiscovery.org/types/Attribute', description='Decides what semantic type to attach to generated attributes', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'] )
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='text_rank', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['luhn', 'edmundson', 'lsa', 'text_rank', 'sum_basic', 'kl'], description='type of summarization algorithm to use') source_type = hyperparams.Enumeration( default='plain_text', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['plain_text', 'url'], description='type of source documents to be analyzed') language = hyperparams.Enumeration( default='english', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=[ 'danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian', 'italian', 'norwegian', 'porter', 'portuguese', 'romanian', 'russian', 'spanish', 'swedish' ], description='language to use for the NLTK stemming process') return_result = hyperparams.Enumeration( default='all', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=['new', 'all', 'replace'], description='what data should be returned') nsentences = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=20, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description='number of summary sentences to return') pass
class Hyperparams(hyperparams.Hyperparams): max_percent_null = hyperparams.Bounded[float]( default=.5, lower=0, upper=1, description= 'The maximum percentage of null values allowed in returned features. A lower value means features may have more null nulls.', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) max_correlation = hyperparams.Bounded[float]( default=.9, lower=0, upper=1, description= 'The maximum allowed correlation between any two features returned. A lower value means features will be more uncorrelated', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ]) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "A set of column indices to not operate on. Applicable only if \"use_columns\" is not provided.", ) return_result = hyperparams.Enumeration( values=['append', 'replace', 'new'], default='new', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "Should parsed columns be appended, should they replace original columns, or should only parsed columns be returned? This hyperparam is ignored if use_semantic_types is set to false.", ) max_features = hyperparams.Hyperparameter[int]( default=100, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= "Cap the number of generated features to this number. If -1, no limit." )
class Hyperparams(hyperparams.Hyperparams): parsing_semantics = hyperparams.Set( elements=hyperparams.Enumeration( values=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", "https://metadata.datadrivendiscovery.org/types/CategoricalData", ], default="http://schema.org/Float", ), default=( "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", ), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of semantic types to parse. One can provide a subset of supported semantic types to limit what the primitive parses.", ) use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of column indices to force primitive to operate on. If any specified column cannot be parsed, it is skipped.", ) exclude_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description='A set of column indices to not operate on. Applicable only if "use_columns" is not provided.', ) error_handling = hyperparams.Enumeration[str]( default="coerce", values=("ignore", "raise", "coerce"), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Setting to deal with error when converting a column to numeric value.", ) fuzzy_time_parsing = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="Use fuzzy time parsing.", )
class Hyperparams(hyperparams.Hyperparams): reduce_method = hyperparams.Enumeration( default="pca", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["pca", "svd"], description= "dimensionality reduction method that is applied to feature vectors", ) reduce_dimension = hyperparams.UniformInt( lower=0, upper=1024, default=128, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="number of dimensions in reduced feature vectors", ) gem_p = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=1, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description= "parameter p in generalized mean pooling; p > 1 increases the constrast of the \ pooled feature map; p = 1 equivalent to average pooling; p = +inf equivalent to \ max pooling.", ) denominator_min = hyperparams.UniformInt( lower=0, upper=sys.maxsize, default=5, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "only ranks according to positive annotations until this many negative \ annotations are obtained", ) dot_products_cache = hyperparams.Hyperparameter[str]( default="dot_product_cache", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "already computed dot products will be cached in this location", )
class Hyperparams(hyperparams.Hyperparams): """ """ keep_option = hyperparams.Enumeration( values=['first', 'average'], default='first', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "When dropping rows, choose to keep the first one of duplicated data or calculate their average", )
class Hyperparams(hyperparams.Hyperparams): use_columns = hyperparams.Set( elements=hyperparams.Hyperparameter[int](-1), default=(), semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="A set of column indices to force primitive to operate on. \ If any specified column cannot be parsed, it is skipped.", ) inference_model = hyperparams.Enumeration( default="moco", semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], values=["amdim", "moco"], description="type pretrained inference model to use", ) batch_size = hyperparams.UniformInt( lower=1, upper=512, default=256, upper_inclusive=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/TuningParameter" ], description="inference batch size", ) pool_features = hyperparams.UniformBool( default=True, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description= "whether to pool features across spatial dimensions in returned frame", ) decompress_data = hyperparams.Hyperparameter[bool]( default=False, semantic_types=[ "https://metadata.datadrivendiscovery.org/types/ControlParameter" ], description="If True, applies LZ4 decompression algorithm to the data. \ Compressed data stores a header consisting of the dtype character and the \ data shape as unsigned integers. Given c struct alignment, will occupy \ 16 bytes (1 + 4 + 4 + 4 + 3 ) padding", )
class Hyperparams(hyperparams.Hyperparams): continuity_option = hyperparams.Enumeration( values=['ablation', 'imputation'], default='imputation', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description="Choose ablation or imputation the original data", ) interval = hyperparams.Uniform( default=1, lower=0.000000001, upper=10000000000, description='Only used in imputation, give the timestamp interval.', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ])
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration(default = 'Disparate_Impact_Remover', semantic_types = ['https://metadata.datadrivendiscovery.org/types/ControlParameter'], values = ['Disparate_Impact_Remover', 'Learning_Fair_Representations', 'Reweighing'], description = 'type of fairness pre-processing algorithm to use') protected_attribute_cols = hyperparams.List( elements=hyperparams.Hyperparameter[int](-1), default=[], semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], description="A set of column indices to use as protected attributes.", ) favorable_label = hyperparams.Bounded[float]( lower=0., upper=1., default=1., description='label value which is considered favorable (i.e. positive) in the binary label case', semantic_types=['https://metadata.datadrivendiscovery.org/types/ControlParameter'], ) pass
class Hyperparams(hyperparams.Hyperparams): algorithm = hyperparams.Enumeration( default='GlobalAlignmentKernelKMeans', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], values=[ 'GlobalAlignmentKernelKMeans', 'TimeSeriesKMeans', 'DBSCAN', 'HDBSCAN' ], description='type of clustering algorithm to use') nclusters = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=3, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description='number of clusters \ to user in kernel kmeans algorithm') eps = hyperparams.Uniform( lower=0, upper=sys.maxsize, default=0.5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'maximum distance between two samples for them to be considered as in the same neigborhood, \ used in DBSCAN algorithm') min_samples = hyperparams.UniformInt( lower=1, upper=sys.maxsize, default=5, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= 'number of samples in a neighborhood for a point to be considered as a core point, \ used in DBSCAN and HDBSCAN algorithms') pass
class Hyperparams(Hyperparams_ODBase): ######## Add more Hyperparamters ####### n_neighbors = hyperparams.Hyperparameter[int]( default=10, semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/TuningParameter' ], description= "Number of neighbors to use by default for k neighbors queries.", ) method = hyperparams.Enumeration( values=['fast', 'default'], default='fast', semantic_types=[ 'https://metadata.datadrivendiscovery.org/types/ControlParameter' ], description= "'fast': fast ABOD. Only consider n_neighbors of training points 'default': original ABOD with all training points, which could be slow", )