Пример #1
0
def process_comment(data,**kwargs):
	logging.info("$$$$ nlp.process_comment[]")
	cache = voxpop.VPE.get_caches().get(key=data['cache_id'])
	try:
		comment = cache.get_cached()[data['comment_id']]
	except KeyError:
		logging.info("$$$$ nlp.process_comment: ITEM NOT FOUND IN MEMORY ["+data['comment_id']+"]")
		return False
	tokens = nltk.word_tokenize(nltk.clean_html(comment['commentBody']))
	words = {'positive':[], 'negative':[], 'strong':[], 'weak':[], 'active':[], 'passive':[]}
	for token in tokens:
		with lasswell_lock:
			parsed = lp.parse(token)
		if parsed:
			logging.info(token + str(parsed))
			if parsed[0] == 1:
				words['positive'].append(token)
			elif parsed[0] == -1:
				words['negative'].append(token)
			if parsed[1] == 1:
				words['strong'].append(token)
			elif parsed[1] == -1:
				words['weak'].append(token)
			if parsed[2] == 1:
				words['active'].append(token)
			elif parsed[2] == -1:
				words['passive'].append(token)
	comment['words'] = words
	cache.set(data['comment_id'], comment)
	return True
Пример #2
0
	def parse(self, text):
		adjectives = []
		for w,pos in nltk.pos_tag(nltk.word_tokenize(nltk.clean_html(text))):
			if pos == "JJ":
				adjectives.append([w,evaluative(w),potency(w),activity(w)])
		return adjectives