def test_reduce_raises_if_combining_different_origins_or_attributes( differ: Text): # create features accordingly arbitrary_fixed_type = FEATURE_TYPE_SENTENCE features_list = [] for idx in range(2): first_dim = 1 arbitrary_matrix_matching_type = np.full(shape=(first_dim, 1), fill_value=1) config = dict( features=arbitrary_matrix_matching_type, attribute="fixed-attribute" if differ != "attribute" else f"attr-{idx}", feature_type=arbitrary_fixed_type, origin="fixed-origin" if differ != "origin" else f"origin-{idx}", ) feat = Features(**config) features_list.append(feat) # reduce! if differ == "attribute": message = "Expected all Features to describe the same attribute" expected_origin = ["origin"] else: message = "Expected 'origin-1' to be the origin of the 0-th" expected_origin = ["origin-1"] with pytest.raises(ValueError, match=message): Features.reduce(features_list, expected_origins=expected_origin)
def test_reduce(shuffle_mode: Text, num_features_per_combination: Tuple[int, int, int, int]): # all combinations - in the expected order # (i.e. all sparse before all dense and sequence before sentence) all_combinations = [ (FEATURE_TYPE_SEQUENCE, True), (FEATURE_TYPE_SENTENCE, True), (FEATURE_TYPE_SEQUENCE, False), (FEATURE_TYPE_SENTENCE, False), ] # multiply accordingly and mess up the order chosen_combinations = [ spec for spec, num in zip(all_combinations, num_features_per_combination) for _ in range(num) ] if shuffle_mode == "reversed": messed_up_order = reversed(chosen_combinations) else: # Note: rng.permutation would mess up the types rng = np.random.default_rng(23452345) permutation = rng.permutation(len(chosen_combinations)) messed_up_order = [chosen_combinations[idx] for idx in permutation] # create features accordingly features_list = [] for idx, (type, is_sparse) in enumerate(messed_up_order): first_dim = 1 if type == FEATURE_TYPE_SEQUENCE else 3 matrix = np.full(shape=(first_dim, 1), fill_value=1) if is_sparse: matrix = scipy.sparse.coo_matrix(matrix) config = dict( features=matrix, attribute="fixed-attribute", # must be the same feature_type=type, origin="origin-does-matter-here", # must be the same ) feat = Features(**config) features_list.append(feat) # reduce! reduced_list = Features.reduce(features_list) assert len(reduced_list) == sum(num > 0 for num in num_features_per_combination) idx = 0 for num, (type, is_sparse) in zip(num_features_per_combination, all_combinations): if num == 0: # nothing to check here - because we already checked the length above # and check the types and shape of all existing features in this loop pass else: feature = reduced_list[idx] assert feature.is_sparse() == is_sparse assert feature.type == type assert feature.features.shape[-1] == num idx += 1
def _extract_state_features( self, sub_state: SubState, precomputations: Optional[MessageContainerForCoreFeaturization], sparse: bool = False, ) -> Dict[Text, List[Features]]: # Remove entities from possible attributes attributes = set( attribute for attribute in sub_state.keys() if attribute != ENTITIES ) if precomputations is not None: # Collect features for all those attributes attributes_to_features = precomputations.collect_features( sub_state, attributes=attributes ) # if features for INTENT or ACTION_NAME exist, # they are always sparse sequence features; # transform them to sentence sparse features if attributes_to_features.get(INTENT): attributes_to_features[INTENT] = self._to_sparse_sentence_features( attributes_to_features[INTENT] ) if attributes_to_features.get(ACTION_NAME): attributes_to_features[ACTION_NAME] = self._to_sparse_sentence_features( attributes_to_features[ACTION_NAME] ) # Combine and sort the features: # Per attribute, combine features of same type and level into one Feature, # and (if there are any such features) store the results in a list where # - all the sparse features are listed first and a # - sequence feature is always listed before the sentence feature of the # same type (sparse/not sparse). output = { attribute: Features.reduce( features_list=features_list, expected_origins=None ) for attribute, features_list in attributes_to_features.items() if len(features_list) > 0 # otherwise, following will fail } else: output = {} # Check that the name attribute has features name_attribute = self._get_name_attribute(attributes) if name_attribute and name_attribute not in output: # nlu pipeline didn't create features for user or action # this might happen, for example, when we have action_name in the state # but it did not get featurized because only character level # CountVectorsFeaturizer was included in the config. output[name_attribute] = self._create_features( sub_state, name_attribute, sparse ) return output