예제 #1
0
파일: Query.py 프로젝트: jinified/cs3245
    def __init__(self, initial_query, initial_ranked_docs, patent_info):
        self.initial_query = initial_query
        self.initial_ranked_docs = initial_ranked_docs
        self.terms_dictionary = self.initial_query.terms_dictionary
        self.patent_info = patent_info
        self.terms = self.get_expanded_terms()

        self.terms_dist = util.getFreqDist(self.terms)
        self.tf_weights = util.get_tf_weights(self.terms_dist)
예제 #2
0
파일: Query.py 프로젝트: jinified/cs3245
    def __init__(self, query_file, terms_dictionary, patent_info):
        self.terms_dictionary = self.get_terms_dictionary(terms_dictionary)
        self.patent_info = patent_info
        self.corpus_size = len(patent_info) - 1
        self.avg_doc_length = self.get_avg_doc_length(patent_info)

        self.title, self.desc = Query.parse(query_file)
        self.terms = self.get_terms()

        self.terms_dist = util.getFreqDist(self.terms)
        self.tf_weights = util.get_tf_weights(self.terms_dist)

        self.docterm_matrix = self.get_docterms_matrix()
예제 #3
0
파일: Document.py 프로젝트: jinified/cs3245
    def __init__(self, doc_file):
        # Raw fields extracted from XML file
        self.fields = self.parse(doc_file)

        # Initialization of fields
        self.id = self.fields["patent number"]
        self.abstract = self.get_abstract()
        self.terms = self.get_terms()
        self.cites = self.get_cited_patents()
        self.cited_by = []
        self.related = self.get_related_patents()
        self.ipc = self.get_IPC_groups()
        self.upc = self.get_UPC_groups()

        self.terms_dist = util.getFreqDist(self.terms)
        self.tf_weights = util.get_tf_weights(self.terms_dist)