Exemplo n.º 1
0
	def highlight(self, threshold=0.5):

		print("highlight : return list of chats and scores ")

		lexrank = LexRank()
		
		lexrank.summarize(" ".join(self.preprocessed))
		lexrank_sentences = [x.text for x in lexrank.sentences]

		scores = lexrank.sentence_score_pair()  
		
		preprocessed = self.preprocessed[:]
		preprocessed = [x.strip().strip(".").strip() for x in preprocessed]
		lex_idx = 0 
		skip_amount = 0
		jump = 0 
		for ts_sentence in self.ts.chat_to_sentence_mapping:
			ts_idx, chat_idxs, sentence = ts_sentence
			
			if lex_idx >= len(scores): break
			
			if len(sentence.strip()) == 0 :
				jump += 1
			else :
				if preprocessed[lex_idx + skip_amount] != scores[lex_idx][1] :
					skip_amount += 1 
				else :					
					scores[lex_idx] = list(scores[lex_idx])
					scores[lex_idx][0] = lex_idx + jump + skip_amount
					scores[lex_idx].append(chat_idxs)
					lex_idx += 1; 
		self.highlight_lexrank = scores[:]
		print("highlight result")
		return_list = self._map_to_chat(self.highlight_lexrank)
		for chat in return_list :
			if chat[0] == 1 : 
				print(chat)
		return return_list
Exemplo n.º 2
0
	def include_additional_frequency(self, threshold = 0.05):
		total_noise_num =0
		total_noise_num_by_person = []
		total_noise_num_by_noise = []

		# print(self.ts.sentence_reaction_mapping)
		# print(self.ts.person_reaction_frequency)

		for i in range(len(self.ts.person_reaction_frequency)):
			sum1 = 0
			if i == 0:
				total_noise_num_by_noise = self.ts.person_reaction_frequency[i][1]
			for j in range(len(self.ts.person_reaction_frequency[i][1])):
				if(i>0):
					total_noise_num_by_noise[j]+=self.ts.person_reaction_frequency[i][1][j]
				sum1+=self.ts.person_reaction_frequency[i][1][j]
			
			total_noise_num_by_person.append(sum1)
			total_noise_num +=sum1

		#value that the person uses reaction often
		#if it is high, then the score should be less because the person usually use reaction
		#total_noise_num_by_person[i]/total_noise_num
		
		#value the people use the reaction often
		#if it two low, then it would be just noise thing.

		usable_noise = []
		result = [0] * len(self.ts.sentence_reaction_mapping)
		for i in range(len(total_noise_num_by_noise)):
			if total_noise_num_by_noise[i]/total_noise_num >= threshold:
				usable_noise.append(i)
		
		for i in range(len(self.ts.sentence_reaction_mapping)):
			current = scipy.stats.norm(i-1,1.5)

			person_index = 0
			for l in range(len(self.ts.person_reaction_frequency)):
				if self.ts.sentence_reaction_mapping[i][1] == self.ts.person_reaction_frequency[l][0]:
					person_index = l
					break
			for j in range(len(usable_noise)):
				for k in range(len(result)):
					if(total_noise_num_by_person[person_index] ==0):
						continue
					result[k]+=	current.pdf(k) * self.ts.sentence_reaction_mapping[i][2][usable_noise[j]] / total_noise_num_by_person[person_index]


		sum1 = 0
		for i in range(len(result)):
			sum1 += result[i]

		for i in range(len(result)):
			result[i] = result[i]/sum1


		lexrank = LexRank()
		
		lexrank.summarize(" ".join(self.preprocessed))
		lexrank_sentences = [x.text for x in lexrank.sentences]

		scores = lexrank.sentence_score_pair()  
		
		preprocessed = self.preprocessed[:]
		preprocessed = [x.strip().strip(".").strip() for x in preprocessed]
		lex_idx = 0 
		skip_amount = 0
		jump = 0 
		for ts_sentence in self.ts.chat_to_sentence_mapping:
			ts_idx, chat_idxs, sentence = ts_sentence
			
			if lex_idx >= len(scores): break
			
			if len(sentence.strip()) == 0 :
				jump += 1
			else :
				if preprocessed[lex_idx + skip_amount] != scores[lex_idx][1] :
					skip_amount += 1 
				else :					
					scores[lex_idx] = list(scores[lex_idx])
					scores[lex_idx][0] = lex_idx + jump + skip_amount
					scores[lex_idx].append(chat_idxs)
					lex_idx += 1

		for i in range(len(result)):
			for j in range(len(scores)):
				if i in scores[j][3]:
					scores[j][2]+=result[i]*0.1
					break
		for j in range(len(scores)):
			scores[j][2] = scores[j][2]/1.1
		print(scores)

		self.highlight_lexrank = scores[:]
		print("highlight result")
		return_list = self._map_to_chat(self.highlight_lexrank)
		for chat in return_list :
			if chat[0] == 1 : 
				print(chat)
		return return_list
Exemplo n.º 3
0
	def include_additional (self, threshold=0.5):

		print("additionals : ")
		lexrank = LexRank()

		input_seq = [x[1]+"." for x in self.ts.with_reaction]
		#?? input_seq = [x.strip().strip(".").strip() for x in input_seq]
		
		lexrank.summarize(" ".join(input_seq))
		lexrank_sentences = [x.text for x in lexrank.sentences]

		scores = lexrank.sentence_score_pair()  

		preprocessed = self.preprocessed[:]
		preprocessed = [x.strip().strip(".").strip() for x in preprocessed]
		
		lex_idx = 0 
		skip_amount = 0
		jump = 0 


		# from scores to reaction index mapping (change scores' index to reactions)
		
		for ts_sentence in self.ts.with_reaction:
			ts_idx, sentence = ts_sentence
			sentence = sentence.strip(" ")
			# print (ts_sentence, scores[lex_idx])
			
			if lex_idx >= len(scores): break
			
			if len(sentence.strip()) == 0 or len(sentence.split(" ")) <2:
				jump += 1
			else :
				if sentence != scores[lex_idx][1] :
					skip_amount += 1 
				else :					
					scores[lex_idx] = list(scores[lex_idx])
					scores[lex_idx][0] = lex_idx + jump + skip_amount
					
					lex_idx += 1; 

		self.additional_lexrank = scores[:]
		additional_dict = {} 
		for line in self.additional_lexrank:
			i, sentence, score = line 
			additional_dict[i] = score 

		idx = 0 
		for line in self.highlight_lexrank:
			i, sentence, score, chat_idxs = line 
			if i in additional_dict.keys() :
				self.highlight_lexrank[idx][2] += additional_dict[i]
				self.highlight_lexrank[idx][2] = self.highlight_lexrank[idx][2] * 0.8

			idx += 1 

		print ("new highlights with reaction")
		return_list = self._map_to_chat (self.highlight_lexrank)
		for chat in return_list :
			if chat[0] == 1 : 
				print(chat)
		
		return return_list