Python get_LIWC示例

编程语言: Python

命名空间/包名称: nlp.feature_extractor

方法/功能: get_LIWC

hotexamples.com的示例: 2

Python get_LIWC - 已找到2个示例。这些是从开源项目中提取的最受好评的nlp.feature_extractor.get_LIWC现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： baseline.py 项目： syngeek/sarcasm

 def get_local_features(self, text):
     feature_vector = dict()
     text_obj = TextObj(text)
     feature_extractor.get_ngrams(feature_vector, text_obj.tokens)
     feature_extractor.get_ngrams(feature_vector, text_obj.tokens, n=2)
     feature_extractor.get_initialisms(feature_vector, text_obj.tokens)
     feature_extractor.get_basic_lengths(feature_vector, text_obj.text, text_obj.sentences, text_obj.tokens)
     feature_extractor.get_repeated_punct(feature_vector, text_obj.text)
     feature_extractor.get_LIWC(feature_vector, text_obj.text)
     
     return feature_vector

示例#2

显示文件

文件： email_filter.py 项目： sampwing/cmps223

def build_features(body, subject, stemmer=None, wn=None, features=[]):#'bow_disc']): #'bow_disc', 'liwc'
    all_bow = dict()
    body_bow = dict()
    subj_bow = dict()
    stemmed_body = stemmer.stem(body)
    stemmed_subj = stemmer.stem(subject)
    tokens_body = wordpunct_tokenize(stemmed_body)
    tokens_subj = wordpunct_tokenize(stemmed_subj)
    all_bow = {'bow_{}'.format(token): True for token in tokens_body + tokens_subj}
    if 'bow_desc' in features:
        body_bow = {'body_{}'.format(token): True for token in tokens_body}
        subj_bow = {'subject_{}'.format(token): True for token in tokens_subj}
    polarity = defaultdict(int)
    subj_liwc = dict()
    body_liwc = dict()
    if 'liwc' in features:
        get_LIWC(subj_liwc, stemmed_subj)
        get_LIWC(body_liwc, stemmed_body)
    if 'polarity' in features:
       for token in tokens_subj + tokens_body:
           result = msol.lookup(token)
           if result == 'negative' or result == 'positive':
               polarity[result] += 1
    all_wn = dict()
    if 'wn' in features:
        for token in tokens_body + tokens_subj:
            result = wn.lookup(token)
            if len(result) > 0:
                for element in result:
                    all_wn['WN_{}'.format(element)] = True
    return combine_dicts(all_bow,
                         subj_bow,
                         body_bow,
                         all_wn,
                         prepend_key(subj_liwc, 'SUBJLIWC'),
                         prepend_key(body_liwc, 'BODYLIWC'),
                         polarity,
                        )