Пример #1
0
 def testCrawlerConfigWorks(self):
     file_handle = StringIO.StringIO(textwrap.dedent("""
     [General]
     database_address: my_database_address
     download_folder: my_download_folder
     zip_size_limit: 30000
     """))
     config = CrawlerConfig(file_handle)
     self.assertEqual('my_database_address', config.database_address())
     self.assertEqual('my_download_folder', config.download_folder())
     self.assertEqual(30000, config.zip_size_limit())
Пример #2
0
from crawler_config import CrawlerConfig
from crawler_thread import CrawlerThread
from downloader_thread import DownloaderThread

import argparse
import Queue
import threading

parser = argparse.ArgumentParser(description="Crawls the web looking for 3D object models.")
parser.add_argument("--config", action="store", type=str)
parser.add_argument("--instances", action="store", type=int, default=10)

if __name__ == "__main__":
    # TODO(brunonery): verify arguments and fail gracefully if necessary.
    args = parser.parse_args()
    config = CrawlerConfig(open(args.config))
    # Prepare database and locks.
    database_handler = DatabaseHandler(config.database_address())
    database_handler.Init()
    url_lock = threading.Lock()
    # Prepare download queue.
    download_queue = Queue.Queue()
    # Start all threads.
    crawler_thread_list = []
    for i in range(args.instances):
        current_thread = CrawlerThread(database_handler, download_queue, url_lock)
        crawler_thread_list.append(current_thread)
        current_thread.start()
    downloader_thread_list = []
    # TODO(brunonery): have different number of crawler and downloader threads.
    for i in range(args.instances):