import sys
import time

sys.path.append("../web_crawler")
from web_crawler import WebCrawler

sys.path.append("..")
from privileges import construct_full_privilege, privileges_bigger_or_equal


master_crawler = WebCrawler.create_master (
    privileges = construct_full_privilege(),
    start_url = "http://antyweb.pl/"
)


WebCrawler.create_worker (
    privileges = construct_full_privilege(),
    master = master_crawler,
    max_internal_expansion = 5,
    max_external_expansion = 3,
    max_crawling_depth = 100,
)

master_crawler.run()

time.sleep(60*60*24*3)
master_crawler.terminate()

"""

import sys
import time

sys.path.append("../web_crawler")
from web_crawler import WebCrawler

sys.path.append("..")
from privileges import construct_full_privilege, privileges_bigger_or_equal


master_crawler = WebCrawler.create_master (
    privileges = construct_full_privilege(),
    start_url = "http://rss.wp.pl/"
)


WebCrawler.create_worker (
    master = master_crawler,
    privileges = construct_full_privilege(),
    max_internal_expansion = 10,
    max_database_updates = 10
)

master_crawler.run()

time.sleep(120)
master_crawler.terminate()

예제 #3
0
print 'Output will be APPENDED to file named ' + EXPORT_FILE + '\n'

if len(sys.argv) == 1:
    exit()

master_crawler = WebCrawler.create_master (
    privileges = construct_full_privilege(),
    start_url = str(sys.argv[1]),
)

WebCrawler.create_worker (
    privileges = construct_full_privilege(),
    master = master_crawler,
    max_external_expansion = 1000,
    max_internal_expansion = 4,
    max_crawling_depth = 3,
    list_export = True,
    export_dicts = True,
    export_file = EXPORT_FILE,
)

master_crawler.run()

while master_crawler.is_working():
    time.sleep(1)

master_crawler.terminate()
time.sleep(2)

예제 #4
0
This test precisely explores www.wykop.pl in search for RSS feeds.
"""

import sys
import time

sys.path.append("../web_crawler")
from web_crawler import WebCrawler

sys.path.append("..")
from privileges import construct_full_privilege, privileges_bigger_or_equal


master_crawler = WebCrawler.create_master (
    privileges = construct_full_privilege(),
    start_url = "http://www.wykop.pl/"
)


WebCrawler.create_worker (
    privileges = construct_full_privilege(),
    master = master_crawler,
    max_crawling_depth = 3
)

master_crawler.run()

time.sleep(60*60*24*3)
master_crawler.terminate()