Пример #1
0
 def _classify(self):
     classifier = Classifier(self.preprocessed_contents,
                             max_pages=self.max_pages,
                             webpage_title=self._website.get_title())
     classifier.classify()
     self.webpage_topic_is_plural = classifier.is_webpage_topic_plural()
Пример #2
0
if __name__ == "__main__":

    file_handler = FileHandler()

    configuration_provider = ConfigurationProvider(configuration_file)
    tree_browser = HTMLTreeBrowser()

    specification_registry = SpecificationRegistry(configuration_provider, tree_browser)
    content_downloader = WebPageContentDownloader()
    tree_builder = HTMLTreeBuilder()

    configuration_generator = ConfigurationGenerator(configuration_provider,
                                                     specification_registry,
                                                     content_downloader,
                                                     tree_builder)

    url_map = file_handler.get_url_map(configuration_provider.get_classified_input_file_name())

    configuration_generator.generate_configuration(url_map)

    url_map_to_classify = file_handler.get_url_map(configuration_provider.get_unclassified_input_file_name())

    classifier = Classifier(configuration_provider, specification_registry, content_downloader, tree_builder)

    classification = classifier.classify(url_map_to_classify.keys())

    file_handler.write_classification(configuration_provider.get_output_file_name(), classification)

    SummaryPrinter().print_summary(classification, url_map_to_classify)
Пример #3
0
from classification.classifier import Classifier
from classification.text_processor import TextProcessor
from preprocess.stemmer import Stemmer
from preprocess.tokenizer import Tokenizer

text_processor = TextProcessor(Stemmer(), Tokenizer())
classifier = Classifier(text_processor)

while True:
    user_input = input()
    print(classifier.classify(user_input))