Пример #1
0
def run_kafka():
    wait_for_port(2181)
    crawling_utils.check_file_path("crawlers/log/")
    os.chdir('kafka_2.13-2.4.0')

    # Starts kafka server
    subprocess.run([
        'bin/kafka-server-start.sh', 'config/server.properties', '--override',
        'log.dirs=kafka-logs'
    ],
                   stdout=open(f"../crawlers/log/kafka.out", "a", buffering=1),
                   stderr=open(f"../crawlers/log/kafka.err", "a", buffering=1))
Пример #2
0
def run_zookeeper():
    crawling_utils.check_file_path("crawlers/log/")
    os.chdir('kafka_2.13-2.4.0')

    # Starts zookeeper server with overriten properties
    subprocess.run(['bin/zookeeper-server-start.sh', 'config/zoo.properties'],
                   stdout=open(f"../crawlers/log/zookeeper.out",
                               "a",
                               buffering=1),
                   stderr=open(f"../crawlers/log/zookeeper.err",
                               "a",
                               buffering=1))
Пример #3
0
def create_folders(data_path):
    """Create essential folders for crawlers if they do not exists"""
    files = [
        f"{data_path}",
        f"{data_path}/config",
        f"{data_path}/data",
        f"{data_path}/flags",
        f"{data_path}/log",
        f"{data_path}/webdriver",
    ]
    for f in files:
        crawling_utils.check_file_path(f)
Пример #4
0
def file_descriptor_process():
    """Redirects descriptor output and starts descriptor consumer loop."""
    crawling_utils.check_file_path("crawlers/log/")
    sys.stdout = open(f"crawlers/log/file_descriptor.out", "w+", buffering=1)
    sys.stderr = open(f"crawlers/log/file_descriptor.err", "w+", buffering=1)
    FileDescriptor.description_consumer()
Пример #5
0
def file_downloader_process():
    """Redirects downloader output and starts downloader consumer loop."""
    crawling_utils.check_file_path("crawlers/log/")
    sys.stdout = open(f"crawlers/log/file_downloader.out", "w+", buffering=1)
    sys.stderr = open(f"crawlers/log/file_downloader.err", "w+", buffering=1)
    FileDownloader.download_consumer()