def __init__(self, initial_query, initial_ranked_docs, patent_info): self.initial_query = initial_query self.initial_ranked_docs = initial_ranked_docs self.terms_dictionary = self.initial_query.terms_dictionary self.patent_info = patent_info self.terms = self.get_expanded_terms() self.terms_dist = util.getFreqDist(self.terms) self.tf_weights = util.get_tf_weights(self.terms_dist)
def __init__(self, query_file, terms_dictionary, patent_info): self.terms_dictionary = self.get_terms_dictionary(terms_dictionary) self.patent_info = patent_info self.corpus_size = len(patent_info) - 1 self.avg_doc_length = self.get_avg_doc_length(patent_info) self.title, self.desc = Query.parse(query_file) self.terms = self.get_terms() self.terms_dist = util.getFreqDist(self.terms) self.tf_weights = util.get_tf_weights(self.terms_dist) self.docterm_matrix = self.get_docterms_matrix()
def __init__(self, doc_file): # Raw fields extracted from XML file self.fields = self.parse(doc_file) # Initialization of fields self.id = self.fields["patent number"] self.abstract = self.get_abstract() self.terms = self.get_terms() self.cites = self.get_cited_patents() self.cited_by = [] self.related = self.get_related_patents() self.ipc = self.get_IPC_groups() self.upc = self.get_UPC_groups() self.terms_dist = util.getFreqDist(self.terms) self.tf_weights = util.get_tf_weights(self.terms_dist)