def process_comment(data,**kwargs): logging.info("$$$$ nlp.process_comment[]") cache = voxpop.VPE.get_caches().get(key=data['cache_id']) try: comment = cache.get_cached()[data['comment_id']] except KeyError: logging.info("$$$$ nlp.process_comment: ITEM NOT FOUND IN MEMORY ["+data['comment_id']+"]") return False tokens = nltk.word_tokenize(nltk.clean_html(comment['commentBody'])) words = {'positive':[], 'negative':[], 'strong':[], 'weak':[], 'active':[], 'passive':[]} for token in tokens: with lasswell_lock: parsed = lp.parse(token) if parsed: logging.info(token + str(parsed)) if parsed[0] == 1: words['positive'].append(token) elif parsed[0] == -1: words['negative'].append(token) if parsed[1] == 1: words['strong'].append(token) elif parsed[1] == -1: words['weak'].append(token) if parsed[2] == 1: words['active'].append(token) elif parsed[2] == -1: words['passive'].append(token) comment['words'] = words cache.set(data['comment_id'], comment) return True
def parse(self, text): adjectives = [] for w,pos in nltk.pos_tag(nltk.word_tokenize(nltk.clean_html(text))): if pos == "JJ": adjectives.append([w,evaluative(w),potency(w),activity(w)]) return adjectives