def getFreAndWeight(self): try: doc = self.documentEdit.text() term = self.termEdit.text() freq = inverted_index.frequence(term, doc + ".txt") weight = inverted_index.weight(term, doc + ".txt") self.freqLabel.setText('Frequence ' + str(freq)) self.weightLabel.setText('Weight %.2f' % weight) except FileNotFoundError: self.popup("No document found", "There is no document in the collectin with that name", "No document Found")
def cosinus_measure(query, doc_name): """ Return Cosinus measure between a document and a query :param query:The query string :type: str :param doc_name: the document name :type: str :return: inner product :type: float """ x_values = [1 for term in query] y_values = [ pow(inverted_index.weight(term, doc_name), 2) for term in query ] x_sum = sum(x_values) y_sum = sum(y_values) measure = inner_product(query, doc_name) / math.sqrt(x_sum * y_sum) return measure
def inner_product(query, doc_name): """ Return the inner product between a document and a query :param query:The query string :type: str :param doc_name: the document name :type: str :return: inner product :type: float """ sum_ = 0 for term in query: x = 1 y = inverted_index.weight(term, doc_name) sum_ += x * y return sum_
def dice_coefficient(query, doc_name): """ Return Dice coefficient between a document and a query :param query:The query string :type: str :param doc_name: the document name :type: str :return: inner product :type: float """ x_values = [1 for term in query] y_values = [ pow(inverted_index.weight(term, doc_name), 2) for term in query ] x_sum = sum(x_values) y_sum = sum(y_values) coeff = (2 * inner_product(query, doc_name)) / x_sum + y_sum return coeff
def jackard_measure(query, doc_name): """ Return jackard measure between a document and a query :param query:The query string :type: str :param doc_name: the document name :type: str :return: inner product :type: float """ x_values = [1 for term in query] y_values = [ pow(inverted_index.weight(term, doc_name), 2) for term in query ] x_sum = sum(x_values) y_sum = sum(y_values) measure = inner_product( query, doc_name) / (x_sum + y_sum - inner_product(query, doc_name)) return measure