def build_vectorization_pipeline( self) -> Tuple[List[Tuple[str, Any]], Callable[[], List[str]]]: vect_cur = CountVectorizer(strip_accents='unicode', analyzer='word', stop_words='english', tokenizer=vectorizers.whole_value_as_token) vect_amount = vectorizers.NumberVectorizer() def get_feature_names_(vect_cur_, vect_amount_): def res(): return ['currency_' + str(c) for c in vect_cur_.get_feature_names()] \ + ['amount_' + str(fn) for fn in vect_amount_.get_feature_names()] return res return [ ('vect', FeatureUnion(transformer_list=[( 'currency', Pipeline([ ('selector', vectorizers.DictItemSelector(item='currency')), ('clean', vectorizers.ReplaceNoneTransformer('')), ('vect', vect_cur), ('tfidf', TfidfTransformer()), ])), ('amount', Pipeline([ ('selector', vectorizers.DictItemSelector( item='amount')), ('vect', vect_amount), ]))])) ], get_feature_names_(vect_cur, vect_amount)
def build_vectorization_pipeline(self) -> List[Tuple[str, Any]]: return [ ('vect', FeatureUnion(transformer_list=[ ('currency', Pipeline([ ('selector', vectorizers.DictItemSelector(item='currency')), ('clean', vectorizers.ReplaceNoneTransformer('')), ('vect', CountVectorizer(strip_accents='unicode', analyzer='word', stop_words='english', tokenizer=vectorizers.whole_value_as_token)), ('tfidf', TfidfTransformer()), ])), ('amount', Pipeline([ ('selector', vectorizers.DictItemSelector(item='amount')), ('vect', vectorizers.NumberVectorizer()), ])) ])) ]
def build_vectorization_pipeline( self) -> Tuple[List[Tuple[str, Any]], Callable[[], List[str]]]: vect = vectorizers.NumberVectorizer( to_float_converter=lambda d: d.total_seconds() if d else 0 if d else 0) return [('vect', vect)], self._wrap_get_feature_names(vect)
def build_vectorization_pipeline( self) -> Tuple[List[Tuple[str, Any]], Callable[[], List[str]]]: vect = vectorizers.NumberVectorizer() return [('vect', vect)], self._wrap_get_feature_names(vect)
def build_vectorization_pipeline(self) -> List[Tuple[str, Any]]: return [('vect', vectorizers.NumberVectorizer())]
def build_vectorization_pipeline(self) -> List[Tuple[str, Any]]: return [('vect', vectorizers.NumberVectorizer(to_float_converter=lambda d: d.total_seconds() if d else 0 if d else 0))]