def test_logic(backend): # Graph graph = graphs.Manager('sqlite:///data/graph.db') # Frontier settings = Settings() settings.BACKEND = backend settings.LOGGING_MANAGER_ENABLED = True settings.LOGGING_BACKEND_ENABLED = True settings.LOGGING_DEBUGGING_ENABLED = False settings.TEST_MODE = True frontier = FrontierManager.from_settings(settings) # Tester tester = FrontierTester(frontier, graph) tester.run(add_all_pages=True) # Show crawling sequence print('-' * 80) print(frontier.backend.name) print('-' * 80) for page in tester.sequence: print(page.url)
def test_logic(backend): # Graph graph = graphs.Manager('sqlite:///data/graph.db') # Frontier settings = Settings() settings.BACKEND = backend settings.LOGGING_MANAGER_ENABLED = True settings.LOGGING_BACKEND_ENABLED = True settings.LOGGING_DEBUGGING_ENABLED = False settings.TEST_MODE = True frontier = FrontierManager.from_settings(settings) # Tester tester = FrontierTester(frontier, graph) tester.run(add_all_pages=True) # Show crawling sequence print '-'*80 print frontier.backend.name print '-'*80 for page in tester.sequence: print page.url
""" Frontier initialization from settings """ from frontera import FrontierManager, Settings, graphs, Request, Response SETTINGS = Settings() SETTINGS.BACKEND = 'frontera.contrib.backends.memory.FIFO' SETTINGS.LOGGING_MANAGER_ENABLED = True SETTINGS.LOGGING_BACKEND_ENABLED = True SETTINGS.LOGGING_DEBUGGING_ENABLED = True SETTINGS.TEST_MODE = True if __name__ == '__main__': # Create graph graph = graphs.Manager('sqlite:///data/graph.db') # Create frontier from settings frontier = FrontierManager.from_settings(SETTINGS) # Add seeds frontier.add_seeds([Request(seed.url) for seed in graph.seeds]) # Get next requests next_requests = frontier.get_next_requests() # Crawl pages for request in next_requests: # Fake page crawling crawled_page = graph.get_page(request.url)
""" Frontier tester usage example """ from frontera import FrontierManager, FrontierTester, Settings, graphs if __name__ == '__main__': # Graph graph = graphs.Manager('sqlite:///data/graph.db') # Frontier settings = Settings() settings.TEST_MODE = True settings.LOGGING_MANAGER_ENABLED = True settings.LOGGING_BACKEND_ENABLED = True settings.LOGGING_DEBUGGING_ENABLED = False frontier = FrontierManager.from_settings(settings) # Tester tester = FrontierTester(frontier, graph) # Run test tester.run() # Show crawling sequence for page in tester.sequence: print page.url
""" Frontier tester usage example """ from frontera import FrontierManager, FrontierTester, Settings, graphs if __name__ == '__main__': # Graph graph = graphs.Manager('sqlite:///data/graph.db') # Frontier settings = Settings() settings.TEST_MODE = True settings.LOGGING_MANAGER_ENABLED = True settings.LOGGING_BACKEND_ENABLED = True settings.LOGGING_DEBUGGING_ENABLED = False frontier = FrontierManager.from_settings(settings) # Tester tester = FrontierTester(frontier, graph) # Run test tester.run() # Show crawling sequence for page in tester.sequence: print(page.url)