def get_joint_entropy(feature_class_array): entropies = [ get_entropy( translate_into_tuples(feature_class_pair[0], feature_class_pair[1])) for feature_class_pair in feature_class_array ] return profile_distribution(entropies)
def get_correlations_by_class(X_sample, Y_sample): correlations = [] XY = pd.concat([X_sample,Y_sample], axis=1) XY_grouped_by_class = XY.groupby(Y_sample.name) for label in Y_sample.unique(): group = XY_grouped_by_class.get_group(label).drop(Y_sample.name, axis=1) correlations.extend(get_canonical_correlations(group)) return profile_distribution(correlations)
def get_numeric_kurtosis(numeric_features_array): kurtoses = [feature.kurtosis() for feature in numeric_features_array] return profile_distribution(kurtoses)
def get_numeric_skewness(numeric_features_array): skews = [feature.skew() for feature in numeric_features_array] return profile_distribution(skews)
def get_numeric_stdev(numeric_features_array): stdevs = [feature.std() for feature in numeric_features_array] return profile_distribution(stdevs)
def get_correlations(X_sample, column_types): correlations = get_canonical_correlations(X_sample, column_types) profile_distribution(correlations)
def get_numeric_means(numeric_features_array): means = [feature.mean() for feature in numeric_features_array] return profile_distribution(means)
def get_string_length_kurtosis(string_lengths_array): kurtoses = [feature.kurtosis() for feature in string_lengths_array] return profile_distribution(kurtoses)
def get_string_length_skewness(string_lengths_array): skews = [feature.skew() for feature in string_lengths_array] return profile_distribution(skews)
def get_string_length_stdev(string_lengths_array): stdevs = [feature.std() for feature in string_lengths_array] return profile_distribution(stdevs)
def get_string_length_means(string_lengths_array): means = [feature.mean() for feature in string_lengths_array] return profile_distribution(means)
def get_mutual_information(feature_class_array): mi_scores = [ mutual_info_score(*feature_class_pair) for feature_class_pair in feature_class_array ] return profile_distribution(mi_scores)
def get_attribute_entropy(feature_array): entropies = [get_entropy(feature) for feature in feature_array] return profile_distribution(entropies)
def get_decision_tree_level_sizes(tree): return profile_distribution(tree.level_sizes)
def get_decision_tree_attributes(tree): return profile_distribution(tree.get_attributes())
def get_decision_tree_branch_lengths(tree): return profile_distribution(tree.branch_lengths)