def get_lexicon_count_pipeline(tokenizer): lexpipe = skpipeline.Pipeline([ ('lexfeatures', CountBasedTransformer(tokenizer)), ('lexvect', dv.DictVectorizer()), ]) return lexpipe
def get_named_entity_weight_pipeline(language): ntransformer = NNEsTransformer(language) nvect = dv.DictVectorizer() ne_rate_pipe = skpipeline.Pipeline([ ('neratefeat', ntransformer), ('neratevect', nvect), ]) return ne_rate_pipe
def get_keyword_pipeline(word): ttransformer = TermPresenceTransformer(word) tvect = dv.DictVectorizer() wordpipe = skpipeline.Pipeline([ ('wordpresfeat', ttransformer), ('wordpresvect', tvect), ]) return wordpipe
def get_polylglot_polarity_count_pipe(lang): ptransformer = PolyglotPolarityCountTransformer(lang) tvect = dv.DictVectorizer() polaritypipe = skpipeline.Pipeline([ ('polyglotpolaritycfeat', ptransformer), ('polyglotpolaritycvect', tvect), ]) return polaritypipe
def get_polylglot_polarity_value_pipe(lang): ptransformer = PolyglotPolarityValueTransformer(lang) tvect = dv.DictVectorizer() polaritypipe = skpipeline.Pipeline([ ('polyglotpolarityvfeat', ptransformer), ('polyglotpolarityvvect', tvect), ]) return polaritypipe
def vectorizer(): dict_vec = dict_vectorizer.DictVectorizer(sparse=False) data = dict_vec.fit_transform([{ 'name': 'jiujue', 'age': 10 }, { 'name': 'mmp', 'age': 11 }, { 'name': 'sam', 'age': 12 }]) print(dict_vec.get_feature_names()) print(data) print(type(data)) pass