Python getWords示例

编程语言: Python

命名空间/包名称: util.statsUtil

方法/功能: getWords

hotexamples.com的示例: 4

Python getWords - 已找到4个示例。这些是从开源项目中提取的最受好评的util.statsUtil.getWords现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： convStats.py 项目： melody40/monorepo

 def _getLexicalStats(messages):
     words = statsUtil.getWords(messages)
     #text = nltk.Text(words)
     tokensCount = len(words)
     vocabularyCount = len(set(words))
     if tokensCount == 0:
         lexicalRichness = 0
     else:
         lexicalRichness = vocabularyCount / tokensCount
     return tokensCount, vocabularyCount, lexicalRichness

示例#2

显示文件

文件： convStats.py 项目： 5agado/conversation-analyzer

 def _getLexicalStats(messages):
     words = statsUtil.getWords(messages)
     # text = nltk.Text(words)
     tokensCount = len(words)
     vocabularyCount = len(set(words))
     if tokensCount == 0:
         lexicalRichness = 0
     else:
         lexicalRichness = vocabularyCount / tokensCount
     return tokensCount, vocabularyCount, lexicalRichness

示例#3

显示文件

    def _generateLexicalStatsBy(self, groupByColumns=[]):
        res = self.df.rename(columns={'text':'text'})
        #enough, probably the best is to make another simpler method in statsUtil
        res = res.groupby(['sender'] + groupByColumns, as_index=False).agg(
            {'text' : lambda x: tuple(statsUtil.getWords(" ".join(x)))})
        res['tokensCount'] = res['text'].apply(lambda x: len(x))
        res['vocabularyCount'] = res['text'].apply(lambda x: len(set(x)))

        res.drop('text', axis=1, inplace=True)

        if groupByColumns:
            tot = res.groupby(groupByColumns, as_index=False).sum()
            tot['sender'] = "total"
            res = pd.concat([res, tot])
            #TODO Missing tokencount = zero case
            res['lexicalRichness'] = res['vocabularyCount']/res['tokensCount']
            return res
        else:
            res.set_index(['sender'], inplace=True)
            res.loc['total'] = res.sum()
            res['lexicalRichness'] = res['vocabularyCount']/res['tokensCount']
            return res[['tokensCount', 'vocabularyCount', 'lexicalRichness']]

示例#4

显示文件

文件： wordsCountStats.py 项目： mh-uq/conversation-analyzer

    def _computeWordsCount(msgs, groupByColumns):
        """
        Generates dataframe with words count for each group-by entry.
        Grouping is done on passed columns plus the sender one.
        """

        # Group messages by sender and specified feature, concatenating text field
        grouped_msgs = msgs.groupby(groupByColumns).agg(
            {'text': lambda x: " ".join(x)})

        # Count-vectorize msgs, using own defined analyzer (tokenizer)
        vectorizer = CountVectorizer(analyzer=lambda x: statsUtil.getWords(x))
        X = vectorizer.fit_transform(grouped_msgs['text'].values)

        # Create count matrix using words as columns
        countMatrix = pd.DataFrame(X.toarray(),
                                   index=grouped_msgs.index,
                                   columns=vectorizer.get_feature_names())

        # Join data while dropping text column
        wordsCount = grouped_msgs.drop('text', axis=1).join(countMatrix)

        return wordsCount