예제 #1
0
 def testMakeobjectsfromxml(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     for thread in threads:
         for document in thread._documents:
             print(document.text)
 def testtokenizerfromfile(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     threads_tokenized = tokenizer.tokenize()
     for thread in threads_tokenized:
         print(thread._query._body)
         for document in thread._documents:
             print(document._text)
예제 #3
0
 def test_final(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     collection_tokenized = tokenizer.tokenize()
     coll_model = CollectionModel(collection_tokenized)
     doc_model = DocumentModel(collection_tokenized)
     ret_model = RetrievalModel(collection_tokenized, doc_model, coll_model)
     ret_model.calculate_relevance()
 def testdocumentmodel(self):
     reader = Reader()
     soup = reader.readfile()
     threads = reader.makeobjectsfromxml(soup)
     tokenizer = Tokenizer(threads)
     threads_tokenized = tokenizer.tokenize()
     collection_model = CollectionModel(threads_tokenized)
     freq_collection = collection_model.calculate_frequency()
     print(freq_collection)
     document_model = DocumentModel(threads_tokenized)
     freq_document = document_model.calculate_frequency()
     print(freq_document)