def generate_keras_extractor(documents): cleaner = clean.DocumentCleaner() documents = cleaner.clean(documents, True) extractor = keras_extractor.KerasLSTMFeatureExtractor() extractor.build_feature_set(documents) return extractor
def generate_bag_of_ngrams_extractor(documents, nfeatures, ngrams): cleaner = clean.DocumentCleaner() documents = cleaner.clean(documents, True) extractor = bag_of_ngrams_extractor.BagOfNGramsFeatureExtractor() extractor.build_feature_set(documents, nfeatures, ngrams) return extractor
def generate_nb_input(documents, labels, label_set): SHOULD_ADD_NEGATIONS = True cleaner = clean.DocumentCleaner() documents = cleaner.clean(documents, SHOULD_ADD_NEGATIONS) nb_input = partition_documents_by_class(documents, labels, label_set) return nb_input
def generate_glove_extractor(documents, nfeatures): SHOULD_ADD_NEGATIONS = False cleaner = clean.DocumentCleaner() documents = cleaner.clean(documents, SHOULD_ADD_NEGATIONS) extractor = glove_extractor.GloveFeatureExtractor() extractor.build_feature_set(documents, nfeatures) return extractor
def generate_input(documents, extractor, SHOULD_ADD_NEGATIONS=True): cleaner = clean.DocumentCleaner() documents = cleaner.clean(documents, SHOULD_ADD_NEGATIONS) input = extractor.extract_features(documents) return input