def add_metafeatures(pairs, data): if not CONFIG.ADD_METAFEATURES: return data mf = metafeatures(pairs) if CONFIG.COMPUTE_METAFEATURE_COMBINATIONS: return to_2d(data, mf, column_combinations(data, mf)) else: return to_2d(data, mf)
def aggregate_proxy(func, data, aggregate, **kwargs): """ performs the logic on whether or not to aggregate based on a boolean flag (aggregate) """ if aggregate: # have each element be a column return aggregate_apply(func, to_2d(data).T, **kwargs) else: assert len(data.shape) == 1 return func(data, **kwargs)
def aggregate_apply(func, items, **kwargs): """ performs a function with each item in a collection of items as a first argument, then aggregating the results into a 1-D list """ preaggregate = [] for item in items: preaggregate.append(func(item, **kwargs)) features = [] if len(preaggregate) > 0: preaggregate2d = to_2d(np.array(preaggregate)) for feat in preaggregate2d.T: for aggregator in CONFIG.AGGREGATORS: result_append(features, aggregator(feat)) return features
def estimator_features(A_feat, B_feat, current_type): assert current_type in ("NN", "NC", "CN", "CC") A_type, B_type = current_type y = B_feat if A_type == "N": # convert numerical to 2-D matrix X = to_2d(A_feat) elif A_type == "C": # convert categorical to binary matrix X = LabelBinarizer().fit_transform(A_feat) else: raise Exception("improper A_type: {}".format(A_type)) if B_type == "N": return regression_features(X, y) elif B_type == "C": return classification_features(X, y) else: raise Exception("improper B_type: {}".format(B_type))
def estimator_features_wrapper(B_data, current_type): assert len(B_data.shape) == 1 return estimator_features(to_2d(A_data), B_data, current_type)