Python TextProcessor.TextProcessor示例

编程语言: Python

命名空间/包名称: processors

类/类型: TextProcessor

方法/功能: TextProcessor

hotexamples.com的示例: 6

Python TextProcessor.TextProcessor - 已找到6个示例。这些是从开源项目中提取的最受好评的processors.TextProcessor.TextProcessor现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

TextProcessor(6)

get_features(2)

iterate(1)

run(1)

示例#1

0

显示文件

文件： run.py 项目： shyam15287/infuse

    def text_profiles_similarity(self):
        """Compute and return similarity scores between profiles, based on text 
        features and KMeans clustering.
        """

        # Text (TF-IDF)
        processor = TextProcessor(store_docs=True,
                                  clusters={'kmeans': lambda: KMeans(5)})
        processor.run()

        # dictionary containing metrics for the profiles
        docs = []
        for username, cluster in processor.clusters["kmeans"].items():
            # for each cluster, build up a new dataset, we will then use it to
            # compare the profiles
            for label in np.unique(cluster.labels_):
                # get only the documents with this label
                docs.append(" ".join([
                    processor.stored_docs[username][i]
                    for i, val in enumerate(cluster.labels_ == label) if val
                ]))

        features = processor.get_features(docs)
        self._processor = processor
        return euclidean_distances(features, features)

示例#2

0

显示文件

文件： run.py 项目： shyam15287/infuse

 def __init__(self):
     self.output_path = OUTPUT_PATH
     self._processor = None
     self._usernames = None
     self._rankings = None
     self._default_processor = lambda: TextProcessor(
         store_docs=True, clusters={"kmeans": lambda: KMeans(5)})

示例#3

0

显示文件

文件： run.py 项目： shyam15287/infuse

    def text_users_similarity(self):
        """Compute the similarity between users using text features"""

        processor = self._processor = TextProcessor()
        features = []
        for user, docs in processor.iterate():
            features.append(processor.get_features(docs, user))

        # draw the matrix for alexis
        draw_matrix(euclidean_distances(features[0], features[0]),
                    "text_alexis", OUTPUT_PATH)

示例#4

0

显示文件

文件： run.py 项目： shyam15287/infuse

    def compare_pca(self):
        """Compare the clusters generated with different values for the dimensions
        of the PCA
        """

        processors = (TextProcessor(N=50, algorithms=["kmeans"]),
                      TextProcessor(N=100, algorithms=["kmeans"]),
                      TextProcessor(N=200, algorithms=["kmeans"]))

        users_cluster = defaultdict(list)
        for processor in processors:
            # don't use random centers for kmeans to be able to compare them
            processor._particular_user = "******"

            processor.run()
            for user, cluster in processor.clusters['kmeans'].items():
                users_cluster[user].append(np.bincount(cluster.labels_))

        for user, bincounts in users_cluster.items():
            compare_pies(bincounts, "compare_%s.png" % user, self.output_path)

示例#5

0

显示文件

文件： run.py 项目： shyam15287/infuse

    def get_topics(self, n_topics=4):
        """Print the topics using a RandomizedPCA"""
        tp = TextProcessor("docs")
        inverse_vocabulary = dict([(y, x)
                                   for (x, y) in tp.vec.vocabulary.items()])

        for user, docs in tp.iterate():
            transformed_docs = tp.get_features(docs, user)
            print "top %s topics for %s" % (n_topics, user)
            for i in range(n_topics):
                top_words = [
                    inverse_vocabulary[n]
                    for n in transformed_docs[i].argsort()[-10:][::-1]
                ]
                print "  - " + ", ".join(top_words)
            print "---"

示例#6

0

显示文件

文件： run.py 项目： shyam15287/infuse

 def run_processors(self):
     args = {'draw_2d': True, 'draw_pie': True}
     processors = (TextProcessor(**args), ContextProcessor(**args))
     for processor in processors:
         processor.run()