Exemplo n.º 1
0
	def __init__(self, freq_data, selected_docs, context_doc_groups, global_scores):
		print 'start query'
		self.selected_docs = selected_docs
		self.phrase_cnt = agg_phrase_cnt(freq_data, selected_docs)
		self.phrase_df = agg_phrase_df(freq_data, selected_docs)
		self.phrase_cnt_context = {}
		self.phrase_df_context = {}
		if len(self.phrase_df) > 0:
			self.max_df = max(self.phrase_df.values())
		else:
			self.max_df = 0
		self.max_df_context = {}
		self.dc_context = {}
		self.self_dc = len(selected_docs)
		self.sum_cnt = sum(self.phrase_cnt.values())
		self.sum_cnt_context = {}
		self.global_scores = global_scores
		for group, docs in context_doc_groups.items():
			self.phrase_cnt_context[group] = agg_phrase_cnt(freq_data, docs)
			self.phrase_df_context[group] = agg_phrase_df(freq_data, docs)
			if len(self.phrase_df_context[group]) > 0:
				self.max_df_context[group] = max(self.phrase_df_context[group].values())
			else:
				self.max_df_context[group] = 0
			self.dc_context[group] = len(docs)
			self.sum_cnt_context[group] = sum(self.phrase_cnt_context[group].values())

		# added for exploration
		self.context_groups = {}
		self.ranked_list = []
Exemplo n.º 2
0
	def update_selected_docs(self, freq_data, selected_docs, phrases=[]):
		if not phrases:
			self.selected_docs = selected_docs
			self.phrase_cnt = agg_phrase_cnt(freq_data, selected_docs)
			self.phrase_df = agg_phrase_df(freq_data, selected_docs)
			if len(self.phrase_df) > 0:
				self.max_df = max(self.phrase_df.values())
			else:
				self.max_df = 0
			self.self_dc = len(selected_docs)
			self.sum_cnt = sum(self.phrase_cnt.values())
			self.ranked_list = []