Пример #1
0
def generate_keras_extractor(documents):
    cleaner = clean.DocumentCleaner()
    documents = cleaner.clean(documents, True)
    extractor = keras_extractor.KerasLSTMFeatureExtractor()
    extractor.build_feature_set(documents)

    return extractor
Пример #2
0
def generate_bag_of_ngrams_extractor(documents, nfeatures, ngrams):
    cleaner = clean.DocumentCleaner()
    documents = cleaner.clean(documents, True)
    extractor = bag_of_ngrams_extractor.BagOfNGramsFeatureExtractor()
    extractor.build_feature_set(documents, nfeatures, ngrams)

    return extractor
Пример #3
0
def generate_nb_input(documents, labels, label_set):
    SHOULD_ADD_NEGATIONS = True
    cleaner = clean.DocumentCleaner()
    documents = cleaner.clean(documents, SHOULD_ADD_NEGATIONS)
    nb_input = partition_documents_by_class(documents, labels, label_set)

    return nb_input
Пример #4
0
def generate_glove_extractor(documents, nfeatures):
    SHOULD_ADD_NEGATIONS = False
    cleaner = clean.DocumentCleaner()
    documents = cleaner.clean(documents, SHOULD_ADD_NEGATIONS)
    extractor = glove_extractor.GloveFeatureExtractor()
    extractor.build_feature_set(documents, nfeatures)

    return extractor
Пример #5
0
def generate_input(documents, extractor, SHOULD_ADD_NEGATIONS=True):
    cleaner = clean.DocumentCleaner()
    documents = cleaner.clean(documents, SHOULD_ADD_NEGATIONS)
    input = extractor.extract_features(documents)
    return input