def __init__(self, normal_vectorizer=None, clean_vectorizer=None):
        self.mf_generator = MetaFeatureGenerator()
        self.fit_complete = False
        if normal_vectorizer and clean_vectorizer:
            self.fit_complete = True

        if normal_vectorizer:
            self.normal_vectorizer = normal_vectorizer
        else:
            self.normal_vectorizer = Vectorizer()

        if clean_vectorizer:
            self.clean_vectorizer = clean_vectorizer
        else:
            self.clean_vectorizer = Vectorizer()
Exemplo n.º 2
0
    def __init__(self, normal_vectorizer=None, clean_vectorizer=None):
        self.mf_generator = MetaFeatureGenerator()
        self.fit_complete = False
        if normal_vectorizer and clean_vectorizer:
            self.fit_complete = True

        if normal_vectorizer:
            self.normal_vectorizer = normal_vectorizer
        else:
            self.normal_vectorizer = Vectorizer()

        if clean_vectorizer:
            self.clean_vectorizer = clean_vectorizer
        else:
            self.clean_vectorizer = Vectorizer()
Exemplo n.º 3
0
class FeatureGenerator(object):
    def __init__(self, normal_vectorizer=None, clean_vectorizer=None):
        self.mf_generator = MetaFeatureGenerator()
        self.fit_complete = False
        if normal_vectorizer and clean_vectorizer:
            self.fit_complete = True

        if normal_vectorizer:
            self.normal_vectorizer = normal_vectorizer
        else:
            self.normal_vectorizer = Vectorizer()

        if clean_vectorizer:
            self.clean_vectorizer = clean_vectorizer
        else:
            self.clean_vectorizer = Vectorizer()

    def fit(self, input_text, input_scores):
        self.normal_vectorizer.fit(input_text, input_scores)
        clean_text = [
            self.mf_generator.generate_clean_stem_text(t) for t in input_text
        ]
        self.clean_vectorizer.fit(clean_text, input_scores)

    def get_features(self, text):
        vec_feats = self.generate_vectorizer_features(text)
        vec_keys = self.normal_vectorizer.vocab + self.clean_vectorizer.vocab

        meta_feats = self.generate_meta_features(text)
        meta_keys = meta_feats.keys()
        meta_keys.sort()
        meta_feat_arr = np.matrix([meta_feats[k] for k in meta_keys])

        self.colnames = vec_keys + meta_keys

        return np.hstack([vec_feats, meta_feat_arr])

    def generate_meta_features(self, text):
        feats = self.mf_generator.generate_meta_features(text)
        return feats

    def generate_vectorizer_features(self, text):
        clean_text = self.mf_generator.generate_clean_stem_text(text)
        feats = self.normal_vectorizer.get_features([text])
        clean_feats = self.clean_vectorizer.get_features([clean_text])
        return np.hstack([feats, clean_feats])
class FeatureGenerator(object):
    def __init__(self, normal_vectorizer=None, clean_vectorizer=None):
        self.mf_generator = MetaFeatureGenerator()
        self.fit_complete = False
        if normal_vectorizer and clean_vectorizer:
            self.fit_complete = True

        if normal_vectorizer:
            self.normal_vectorizer = normal_vectorizer
        else:
            self.normal_vectorizer = Vectorizer()

        if clean_vectorizer:
            self.clean_vectorizer = clean_vectorizer
        else:
            self.clean_vectorizer = Vectorizer()

    def fit(self, input_text, input_scores):
        self.normal_vectorizer.fit(input_text, input_scores)
        clean_text = [self.mf_generator.generate_clean_stem_text(t) for t in input_text]
        self.clean_vectorizer.fit(clean_text, input_scores)

    def get_features(self, text):
        vec_feats = self.generate_vectorizer_features(text)
        vec_keys = self.normal_vectorizer.vocab + self.clean_vectorizer.vocab

        meta_feats = self.generate_meta_features(text)
        meta_keys = meta_feats.keys()
        meta_keys.sort()
        meta_feat_arr = np.matrix([meta_feats[k] for k in meta_keys])

        self.colnames = vec_keys + meta_keys

        return np.hstack([vec_feats, meta_feat_arr])

    def generate_meta_features(self, text):
        feats = self.mf_generator.generate_meta_features(text)
        return feats

    def generate_vectorizer_features(self, text):
        clean_text = self.mf_generator.generate_clean_stem_text(text)
        feats = self.normal_vectorizer.get_features([text])
        clean_feats = self.clean_vectorizer.get_features([clean_text])
        return np.hstack([feats, clean_feats])