Python FeatureExtractor примеры использования

Язык программирования: Python

Пространство имен/Пакет: featureextraction

Класс/Тип: FeatureExtractor

Примеров на hotexamples.com: 10

Python FeatureExtractor - 10 примеров найдено. Это лучшие примеры Python кода для featureextraction.FeatureExtractor, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

FeatureExtractor(4)

get_feature_vectors(3)

get_all_feature_function_names(1)

get_passages(1)

Пример #1

Показать файл

Файл: __init__.py Проект: NoahCarnahan/plagcomps

def get_plagiarism(text, atom_type, features, cluster_method, k):
    '''
    Return a list of tuples of the form [((0, 18), .5), ((20, 45),  .91), ...]
    In each tuple there is a span tuple and a confidence. The span tuple
    corresponds to an atom of the document and the confidence value corresponds
    to how confident we are that that span was plagiarized. 
    '''
    # Create a FeatureExtractor
    feature_extractor = FeatureExtractor(text)
    # get feature vectors
    feature_vecs = feature_extractor.get_feature_vectors(features, atom_type)
    # cluster
    confidences = cluster(cluster_method, k, feature_vecs)
    # Return it
    return zip(tokenize(text, atom_type), confidences) # should feature extractor have a method that returns the spans it used instead?

Пример #2

Показать файл

def get_plagiarism(text, atom_type, features, cluster_method, k):
    '''
    Return a list of tuples of the form [((0, 18), .5), ((20, 45),  .91), ...]
    In each tuple there is a span tuple and a confidence. The span tuple
    corresponds to an atom of the document and the confidence value corresponds
    to how confident we are that that span was plagiarized. 
    '''
    # Create a FeatureExtractor
    feature_extractor = FeatureExtractor(text)
    # get feature vectors
    feature_vecs = feature_extractor.get_feature_vectors(features, atom_type)
    # cluster
    confidences = cluster(cluster_method, k, feature_vecs)
    # Return it
    return zip(tokenize(text, atom_type), confidences) # should feature extractor have a method that returns the spans it used instead?

Пример #3

Показать файл

Файл: nn_trainer.py Проект: zachwooddoughty/summarize

 def __init__(self):
     self.articles_dir = "articles/"
     self.done_articles_file = "nn_trained_articles.pkl"
     self.dataset_file = "nn_dataset.pkl"
     self.nn_file = "nn.pkl"
     self.features = FeatureExtractor.get_all_feature_function_names(
         include_nested=True)

Пример #4

Показать файл

Файл: __init__.py Проект: NoahCarnahan/plagcomps

def get_plagiarism_passages(text, atom_type, features, cluster_method='none', k=2):
    '''
    Return a list of passages, each of which contains
    a starting/ending index, its text, its atom_type, and a dictionary of
    its features
    '''
    # Extract passage objects (including their feature vectors)
    feature_extractor = FeatureExtractor(text)
    passages = feature_extractor.get_passages(features, atom_type)
    feature_vecs = [p.features.values() for p in passages]

    # If just testing feature extraction, don't cluster passages
    if cluster_method != 'none':
        # Cluster the passages and set their confidences
        confidences = cluster(cluster_method, k, feature_vecs)
        for psg, conf in zip(passages, confidences):
            psg.set_plag_confidence(conf)
    
    # List of passages with plag. conf. set
    return passages

Пример #5

Показать файл

def get_plagiarism_passages(text, atom_type, features, cluster_method='none', k=2):
    '''
    Return a list of passages, each of which contains
    a starting/ending index, its text, its atom_type, and a dictionary of
    its features
    '''
    # Extract passage objects (including their feature vectors)
    feature_extractor = FeatureExtractor(text)
    passages = feature_extractor.get_passages(features, atom_type)
    feature_vecs = [p.features.values() for p in passages]

    # If just testing feature extraction, don't cluster passages
    if cluster_method != 'none':
        # Cluster the passages and set their confidences
        confidences = cluster(cluster_method, k, feature_vecs)
        for psg, conf in zip(passages, confidences):
            psg.set_plag_confidence(conf)
    
    # List of passages with plag. conf. set
    return passages

Пример #6

Показать файл

Файл: nn_trainer.py Проект: zachwooddoughty/summarize

    def build_nn(self):
        nn = buildNetwork(len(self.features), len(self.features)/2, 1)
        dataset = None

        if os.path.isfile(self.dataset_file):
            with open(self.dataset_file, "rb") as f:
                dataset = cPickle.load(f)

        if dataset:
            trainer = BackpropTrainer(nn, dataset)
            trainer.trainEpochs(epochs=1000)

        with open(self.nn_file, "wb") as f:
            cPickle.dump(nn, f)
        
        s = " "
        while len(s) > 0:
            s = raw_input("Test sentence: ")
            extractor = FeatureExtractor(s)
            vectors = extractor.get_feature_vectors(self.features, "sentence")[0]
            print nn.activate(vectors)
            print "__"*8

Пример #7

Показать файл

Файл: nn_trainer.py Проект: zachwooddoughty/summarize

    def build_nn(self):
        nn = buildNetwork(len(self.features), len(self.features) / 2, 1)
        dataset = None

        if os.path.isfile(self.dataset_file):
            with open(self.dataset_file, "rb") as f:
                dataset = cPickle.load(f)

        if dataset:
            trainer = BackpropTrainer(nn, dataset)
            trainer.trainEpochs(epochs=1000)

        with open(self.nn_file, "wb") as f:
            cPickle.dump(nn, f)

        s = " "
        while len(s) > 0:
            s = raw_input("Test sentence: ")
            extractor = FeatureExtractor(s)
            vectors = extractor.get_feature_vectors(self.features,
                                                    "sentence")[0]
            print nn.activate(vectors)
            print "__" * 8

Пример #8

Показать файл

Файл: nn_trainer.py Проект: zachwooddoughty/summarize

    def build_dataset(self):

        if os.path.isfile(self.dataset_file):
            with open(self.dataset_file, "rb") as f:
                dataset = cPickle.load(f)
        else:
            dataset = SupervisedDataSet(len(features), 1)

        if os.path.isfile(self.done_articles_file):
            with open(self.done_articles_file, "rb") as f:
                done_articles = cPickle.load(f)
        else:
            done_articles = {}

        value = -1
        decision = "y"

        for file_name in os.listdir(self.articles_dir):
            print "\n\n"
            print "---" * 10
            decision = raw_input("Do another article? [y/n] ")
            if decision[0].lower() != "y":
                break

            with open("articles/" + file_name) as article:
                text = ""
                first = True
                for line in article.readlines()[1:]:
                    text += line
                sentences = tokenize(text, "sentence", return_spans=False)

                article_position = done_articles.get(file_name, 0)
                if article_position >= len(sentences):
                    continue

                print "Looking at:", file_name, "from position", article_position

                for sentence in sentences[article_position:]:
                    extractor = FeatureExtractor(sentence)
                    vectors = extractor.get_feature_vectors(
                        features, "sentence")[0]
                    print sentence

                    value = -1
                    while value == -1:
                        rating = raw_input("nothing=OK, space=bad, q=quit: ")
                        if rating == "":
                            value = [0]
                        elif rating[:1].lower() == "q":
                            value = None
                        elif rating[:1] == " ":
                            value = [1]

                    # quit on q
                    if value == None:
                        break

                    dataset.appendLinked(vectors, value)
                    done_articles[file_name] = done_articles.get(file_name,
                                                                 0) + 1

        with open(self.dataset_file, "wb") as f:
            cPickle.dump(dataset, f)
        with open(self.done_articles_file, "wb") as f:
            cPickle.dump(done_articles, f)

Пример #9

Показать файл

Файл: nn_trainer.py Проект: zachwooddoughty/summarize

    def build_dataset(self):

        if os.path.isfile(self.dataset_file):
            with open(self.dataset_file, "rb") as f:
                dataset = cPickle.load(f)
        else:
            dataset = SupervisedDataSet(len(features), 1)

        if os.path.isfile(self.done_articles_file):
            with open(self.done_articles_file, "rb") as f:
                done_articles = cPickle.load(f)
        else:
            done_articles = {}

        value = -1
        decision = "y"

        for file_name in os.listdir(self.articles_dir):
            print "\n\n"
            print "---"*10
            decision = raw_input("Do another article? [y/n] ")
            if decision[0].lower() != "y":
                break

            with open("articles/" + file_name) as article:
                text = ""
                first = True
                for line in article.readlines()[1:]:
                    text += line
                sentences = tokenize(text, "sentence", return_spans=False)

                article_position = done_articles.get(file_name, 0) 
                if article_position >= len(sentences):
                    continue

                print "Looking at:", file_name, "from position", article_position
                
                for sentence in sentences[article_position:]:
                    extractor = FeatureExtractor(sentence)
                    vectors = extractor.get_feature_vectors(features, "sentence")[0]
                    print sentence

                    value = -1
                    while value == -1:
                        rating = raw_input("nothing=OK, space=bad, q=quit: ")
                        if rating == "":
                            value = [0]
                        elif rating[:1].lower() == "q":
                            value = None
                        elif rating[:1] == " ":
                            value = [1]

                    # quit on q
                    if value == None:
                        break
                    
                    dataset.appendLinked(vectors, value)
                    done_articles[file_name] = done_articles.get(file_name, 0) + 1

        with open(self.dataset_file, "wb") as f:
            cPickle.dump(dataset, f)
        with open(self.done_articles_file, "wb") as f:
            cPickle.dump(done_articles, f)

Пример #10

Показать файл

Файл: nn_trainer.py Проект: zachwooddoughty/summarize

 def __init__(self):
     self.articles_dir = "articles/"
     self.done_articles_file = "nn_trained_articles.pkl"
     self.dataset_file = "nn_dataset.pkl"
     self.nn_file = "nn.pkl"
     self.features = FeatureExtractor.get_all_feature_function_names(include_nested=True)