def tfidf2(table, group_by=None, **params): check_required_parameters(_tfidf2, params, ['table']) params = get_default_from_parameters_if_required(params, _tfidf2) param_validation_check = [greater_than_or_equal_to(params, 0, 'min_df'), greater_than_or_equal_to(params, 2, 'num_voca'), greater_than(params, 0, 'max_df')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_tfidf2, table, group_by=group_by, **params) else: return _tfidf2(table, **params)
def gaussian_mixture_train(table, group_by=None, **params): check_required_parameters(_gaussian_mixture_train, params, ['table']) params = get_default_from_parameters_if_required(params, _gaussian_mixture_train) param_validation_check = [greater_than_or_equal_to(params, 1, 'number_of_components'), greater_than(params, 0, 'tolerance'), greater_than(params, 0, 'regularize_covariance'), greater_than_or_equal_to(params, 1, 'max_iteration')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_gaussian_mixture_train, table, group_by=group_by, **params) else: return _gaussian_mixture_train(table, **params)
def friedman_test(table, group_by=None, **params): check_required_parameters(_friedman_test, params, ['table']) params = get_default_from_parameters_if_required(params, _friedman_test) if group_by is not None: return _function_by_group(_friedman_test, table, group_by=group_by, **params) else: return _friedman_test(table, **params)
def one_sample_ttest(table, group_by=None, **params): check_required_parameters(_one_sample_ttest, params, ['table']) params = get_default_from_parameters_if_required(params, _one_sample_ttest) param_validation_check = [from_to(params, 0, 1, 'conf_level')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_one_sample_ttest, table, group_by=group_by, **params) else: return _one_sample_ttest(table, **params)
def autocorrelation(table, group_by=None, **params): check_required_parameters(_autocorrelation, params, ['table']) params = get_default_from_parameters_if_required(params, _autocorrelation) param_validation_check = [greater_than_or_equal_to(params, 1, 'nlags'), from_under(params, 0.0, 1.0, 'conf_level')] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_autocorrelation, table, group_by=group_by, **params) return grouped_model else: return _autocorrelation(table, **params)
def isotonic_regression_train(table, group_by=None, **params): params = get_default_from_parameters_if_required( params, _isotonic_regression_train) check_required_parameters(_isotonic_regression_train, params, ['table']) if group_by is not None: grouped_model = _function_by_group(_isotonic_regression_train, table, group_by=group_by, **params) return grouped_model else: return _isotonic_regression_train(table, **params)
def wilcoxon_test2(table, group_by=None, **params): check_required_parameters(_wilcoxon_test2, params, ['table']) params = get_default_from_parameters_if_required(params, _wilcoxon_test2) if group_by is not None: return _function_by_group(_wilcoxon_test2, table, group_by=group_by, **params) else: return _wilcoxon_test2(table, **params)
def holt_winters_predict(model, **params): check_required_parameters(_holt_winters_predict, params, ['model']) params = get_default_from_parameters_if_required(params, _holt_winters_predict) param_validation_check = [ greater_than_or_equal_to(params, 1, 'prediction_num') ] validate(*param_validation_check) if '_grouped_data' in model: return _function_by_group(_holt_winters_predict, model=model, **params) else: return _holt_winters_predict(model, **params)
def ewma(table, group_by=None, **params): check_required_parameters(_ewma, params, ['table']) params = get_default_from_parameters_if_required(params, _ewma) param_validation_check = [ greater_than_or_equal_to(params, 1, 'period_number'), from_to(params, 0, 1, 'custom_ratio') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_ewma, table, group_by=group_by, **params) else: return _ewma(table, **params)
def profile_table(table, group_by=None, **params): check_required_parameters(_profile_table, params, ['table']) params = get_default_from_parameters_if_required(params, _profile_table) param_validation_check = [greater_than_or_equal_to(params, 1, 'bins'), greater_than(params, 0.0, 'correlation_threshold')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_profile_table, table, group_by=group_by, **params) else: return _profile_table(table, **params)
def mean_shift(table, group_by=None, **params): check_required_parameters(_mean_shift, params, ['table']) params = get_default_from_parameters_if_required(params, _mean_shift) param_validation_check = [greater_than(params, 0.0, 'bandwidth')] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_mean_shift, table, group_by=group_by, **params) return grouped_model else: return _mean_shift(table, **params)
def unit_root_test(table, group_by=None, **params): params = get_default_from_parameters_if_required(params, _unit_root_test) param_validation_check = [greater_than_or_equal_to(params, 0, 'maxlag')] validate(*param_validation_check) check_required_parameters(_unit_root_test, params, ['table']) if group_by is not None: return _function_by_group(_unit_root_test, table, group_by=group_by, **params) else: return _unit_root_test(table, **params)
def word2vec(table, **params): check_required_parameters(_word2vec, params, ['table']) params = get_default_from_parameters_if_required(params, _word2vec) param_validation_check = [greater_than_or_equal_to(params, 1, 'size'), greater_than_or_equal_to(params, 1, 'window'), greater_than_or_equal_to(params, 1, 'min_count'), greater_than_or_equal_to(params, 1, 'train_epoch'), greater_than_or_equal_to(params, 1, 'workers'), greater_than_or_equal_to(params, 1, 'negative'), greater_than_or_equal_to(params, 1, 'topn')] validate(*param_validation_check) return _word2vec(table, **params)
def knn_regression(train_table, test_table, **params): check_required_parameters(_knn_regression, params, ['train_table', 'test_table']) params = get_default_from_parameters_if_required(params, _knn_regression) param_validation_check = [ greater_than_or_equal_to(params, 1, 'k'), greater_than_or_equal_to(params, 1, 'leaf_size'), greater_than_or_equal_to(params, 1, 'p') ] validate(*param_validation_check) return _knn_regression(train_table, test_table, **params)
def xgb_regression_train(table, group_by=None, **params): params = get_default_from_parameters_if_required(params, _xgb_regression_train) param_validation_check = [greater_than_or_equal_to(params, 1, 'max_depth'), greater_than_or_equal_to(params, 0.0, 'learning_rate'), greater_than_or_equal_to(params, 1, 'n_estimators')] validate(*param_validation_check) check_required_parameters(_xgb_regression_train, params, ['table']) if group_by is not None: grouped_model = _function_by_group(_xgb_regression_train, table, group_by=group_by, **params) return grouped_model else: return _xgb_regression_train(table, **params)
def replace_missing_string(table, group_by=None, **params): check_required_parameters(_replace_missing_string, params, ['table']) params = get_default_from_parameters_if_required(params, _replace_missing_string) param_validation_check = [greater_than_or_equal_to(params, 1, 'limit')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_replace_missing_string, table, group_by=group_by, **params) else: return _replace_missing_string(table, **params)
def holt_winters_train(table, group_by=None, **params): check_required_parameters(_holt_winters_train, params, ['table']) params = get_default_from_parameters_if_required(params, _holt_winters_train) param_validation_check = [greater_than_or_equal_to(params, 2, 'period')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_holt_winters_train, table, group_by=group_by, **params) else: return _holt_winters_train(table, **params)
def association_rule_visualization(table, group_by=None, **params): params = get_default_from_parameters_if_required(params, _association_rule_visualization) param_validation_check = [greater_than(params, 0, 'figure_size_muliplier'), greater_than(params, 0, 'edge_length_scaling'), greater_than(params, 0, 'node_size_scaling'), greater_than(params, 0, 'font_size')] validate(*param_validation_check) check_required_parameters(_association_rule_visualization, params, ['table']) if group_by is not None: return _function_by_group(_association_rule_visualization, table, group_by=group_by, **params) else: return _association_rule_visualization(table, **params)
def gsdmm(table, **params): check_required_parameters(_gsdmm, params, ['table']) params = get_default_from_parameters_if_required(params, _gsdmm) param_validation_check = [ greater_than_or_equal_to(params, 2, 'K'), greater_than_or_equal_to(params, 0.0, 'alpha'), greater_than_or_equal_to(params, 0.0, 'beta'), greater_than_or_equal_to(params, 1, 'max_iter'), greater_than_or_equal_to(params, 1, 'num_topic_words') ] validate(*param_validation_check) return _gsdmm(table, **params)
def term_term_mtx(table, model, group_by=None, **params): check_required_parameters(_term_term_mtx, params, ['table', 'model']) params = get_default_from_parameters_if_required(params, _term_term_mtx) param_validation_check = [] validate(*param_validation_check) if '_grouped_data' in model: return _function_by_group(_term_term_mtx, table, model, group_by=group_by, **params) else: return _term_term_mtx(table, model, **params)
def logistic_regression_train(table, group_by=None, **params): check_required_parameters(_logistic_regression_train, params, ['table']) params = get_default_from_parameters_if_required(params, _logistic_regression_train) param_validation_check = [greater_than(params, 0.0, 'C'), greater_than_or_equal_to(params, 1, 'max_iter'), greater_than(params, 0.0, 'tol')] validate(*param_validation_check) if group_by is not None: grouped_model = _function_by_group(_logistic_regression_train, table, group_by=group_by, **params) return grouped_model else: return _logistic_regression_train(table, **params)
def lda(table, group_by=None, **params): check_required_parameters(_lda, params, ['table']) params = get_default_from_parameters_if_required(params, _lda) if (params['solver'] == 'svd'): if (params['shrinkage'] == 'float'): param_validation_check = [ greater_than_or_equal_to(params, 0, 'tol'), greater_than_or_equal_to(params, 1, 'n_components'), greater_than_or_equal_to(params, 0, 'shrinkage_value'), less_than_or_equal_to(params, 1, 'shrinkage_value') ] else: param_validation_check = [ greater_than_or_equal_to(params, 0, 'tol'), greater_than_or_equal_to(params, 1, 'n_components') ] else: if (params['shrinkage'] == 'float'): param_validation_check = [ greater_than_or_equal_to(params, 1, 'n_components'), greater_than_or_equal_to(params, 0, 'shrinkage_value'), less_than_or_equal_to(params, 1, 'shrinkage_value') ] else: param_validation_check = [ greater_than_or_equal_to(params, 1, 'n_components') ] validate(*param_validation_check) if group_by is not None: label_col = "" for param in params: if param == "label_col": label_col = params[param] for group in group_by: if group == label_col: elist = [] elist.append({ '0100': "Group by column should be different from label column" }) print(elist) raise BrighticsFunctionException.from_errors(elist) grouped_model = _function_by_group(_lda, table, group_by=group_by, **params) return grouped_model else: return _lda(table, **params)
def moving_average(table, group_by=None, **params): check_required_parameters(_moving_average, params, ['table']) params = get_default_from_parameters_if_required(params, _moving_average) param_validation_check = [ greater_than_or_equal_to(params, 1, 'window_size') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_moving_average, table, group_by=group_by, **params) else: return _moving_average(table, **params)
def agglomerative_clustering(table, group_by=None, **params): check_required_parameters(_agglomerative_clustering, params, ['table']) params = get_default_from_parameters_if_required( params, _agglomerative_clustering) if group_by is not None: grouped_model = _function_by_group(_agglomerative_clustering, table, group_by=group_by, **params) return grouped_model else: return _agglomerative_clustering(table, **params)
def lda(table, group_by=None, **params): check_required_parameters(_lda, params, ['table']) params = get_default_from_parameters_if_required(params, _lda) param_validation_check = [greater_than_or_equal_to(params, 2, 'num_voca'), greater_than_or_equal_to(params, 2, 'num_topic'), from_to(params, 2, params['num_voca'], 'num_topic_word'), greater_than_or_equal_to(params, 1, 'max_iter'), greater_than(params, 1.0, 'learning_offset')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_lda, table, group_by=group_by, **params) else: return _lda(table, **params)
def savgol_filter(table, group_by=None, **params): check_required_parameters(_savgol_filter, params, ['table']) params = get_default_from_parameters_if_required(params, _savgol_filter) param_validation_check = [ greater_than_or_equal_to(params, 1, 'window_length') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_savgol_filter, table, group_by=group_by, **params) else: return _savgol_filter(table, **params)
def bow(table, group_by=None, **params): check_required_parameters(_bow, params, ['table']) params = get_default_from_parameters_if_required(params, _bow) param_validation_check = [ greater_than_or_equal_to(params, 0, 'no_below'), less_than_or_equal_to(params, 1.0, 'no_above'), greater_than(params, 0.0, 'no_above'), greater_than_or_equal_to(params, 1, 'keep_n') ] validate(*param_validation_check) if group_by is not None: return _function_by_group(_bow, table, group_by=group_by, **params) else: return _bow(table, **params)
def random_forest_regression_train(table, group_by=None, **params): check_required_parameters(_random_forest_regression_train, params, ['table']) params = get_default_from_parameters_if_required(params,_random_forest_regression_train) param_validation_check = [greater_than_or_equal_to(params, 1, 'n_estimators'), greater_than_or_equal_to(params, 1, 'max_depth'), greater_than_or_equal_to(params, 1, 'min_samples_split'), greater_than_or_equal_to(params, 1, 'min_samples_leaf')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_random_forest_regression_train, table, group_by=group_by, **params) else: return _random_forest_regression_train(table, **params)
def naive_bayes_train(table, group_by=None, **params): params = get_default_from_parameters_if_required(params, _naive_bayes_train) param_validation_check = [greater_than(params, 0, 'alpha')] validate(*param_validation_check) check_required_parameters(_naive_bayes_train, params, ['table']) if group_by is not None: return _function_by_group(_naive_bayes_train, table, group_by=group_by, **params) else: return _naive_bayes_train(table, **params)
def tukeys_range_test(table, group_by=None, **params): check_required_parameters(_tukeys_range_test, params, ['table']) params = get_default_from_parameters_if_required(params, _tukeys_range_test) param_validation_check = [from_under(params, 0.001, 0.9, 'alpha')] validate(*param_validation_check) if group_by is not None: return _function_by_group(_tukeys_range_test, table, group_by=group_by, **params) else: return _tukeys_range_test(table, **params)