Пример #1
0
def start_collector(type, workers_num, id=None):

    # generate ingest id
    ingest_id = str(datetime.datetime.time(datetime.datetime.now())).replace(
        ":", "_").replace(".", "_")

    # create logger.
    logger = Util.get_logger("ONI.INGEST")

    # validate the given configuration exists in ingest_conf.json.
    if not type in master_conf["pipelines"]:
        logger.error("'{0}' type is not a valid configuration.".format(type))
        sys.exit(1)

    # validate the type is a valid module.
    if not Util.validate_data_source(master_conf["pipelines"][type]["type"]):
        logger.error(
            "'{0}' type is not configured. Please check you ingest conf file".
            format(master_conf["pipelines"][type]["type"]))
        sys.exit(1)

    # validate if kerberos authentication is required.
    if os.getenv('KRB_AUTH'):
        kb = Kerberos()
        kb.authenticate()

    # kafka server info.
    logger.info("Initializing kafka instance")
    k_server = master_conf["kafka"]['kafka_server']
    k_port = master_conf["kafka"]['kafka_port']

    # required zookeeper info.
    zk_server = master_conf["kafka"]['zookeper_server']
    zk_port = master_conf["kafka"]['zookeper_port']

    topic = "ONI-INGEST-{0}_{1}".format(type, ingest_id) if not id else id
    kafka = KafkaTopic(topic, k_server, k_port, zk_server, zk_port,
                       workers_num)

    # create a collector instance based on data source type.
    logger.info("Starting {0} ingest instance".format(topic))
    module = __import__("pipelines.{0}.collector".format(
        master_conf["pipelines"][type]["type"]),
                        fromlist=['Collector'])

    # start collector.
    ingest_collector = module.Collector(master_conf['hdfs_app_path'], kafka,
                                        type)
    ingest_collector.start()
Пример #2
0
def start_worker(type, topic, id, processes=None):

    logger = Util.get_logger("ONI.INGEST.WORKER")

    # validate the given configuration exists in ingest_conf.json.
    if not type in worker_conf["pipelines"]:
        logger.error("'{0}' type is not a valid configuration.".format(type))
        sys.exit(1)

    # validate the type is a valid module.
    if not Util.validate_data_source(worker_conf["pipelines"][type]["type"]):
        logger.error("The provided data source {0} is not valid".format(type))
        sys.exit(1)

    # validate if kerberos authentication is requiered.
    if os.getenv('KRB_AUTH'):
        kb = Kerberos()
        kb.authenticate()

    # create a worker instance based on the data source type.
    module = __import__("pipelines.{0}.worker".format(
        worker_conf["pipelines"][type]["type"]),
                        fromlist=['Worker'])

    # kafka server info.
    logger.info("Initializing kafka instance")
    k_server = worker_conf["kafka"]['kafka_server']
    k_port = worker_conf["kafka"]['kafka_port']

    # required zookeeper info.
    zk_server = worker_conf["kafka"]['zookeper_server']
    zk_port = worker_conf["kafka"]['zookeper_port']
    topic = topic

    # create kafka consumer.
    kafka_consumer = KafkaConsumer(topic, k_server, k_port, zk_server, zk_port,
                                   id)

    # start worker.
    db_name = worker_conf['dbname']
    app_path = worker_conf['hdfs_app_path']
    ingest_worker = module.Worker(db_name, app_path, kafka_consumer, type,
                                  processes)
    ingest_worker.start()
Пример #3
0
def start_worker(type,topic,id,processes=None):

    logger = Util.get_logger("ONI.INGEST.WORKER")

    # validate the given configuration exists in ingest_conf.json.
    if not type in worker_conf["pipelines"]:
        logger.error("'{0}' type is not a valid configuration.".format(type));
        sys.exit(1)

    # validate the type is a valid module.
    if not Util.validate_data_source(worker_conf["pipelines"][type]["type"]):
        logger.error("The provided data source {0} is not valid".format(type));sys.exit(1)

    # validate if kerberos authentication is requiered.
    if os.getenv('KRB_AUTH'):
        kb = Kerberos()
        kb.authenticate()

    # create a worker instance based on the data source type.
    module = __import__("pipelines.{0}.worker".format(worker_conf["pipelines"][type]["type"]),fromlist=['Worker'])

    # kafka server info.
    logger.info("Initializing kafka instance")
    k_server = worker_conf["kafka"]['kafka_server']
    k_port = worker_conf["kafka"]['kafka_port']

    # required zookeeper info.
    zk_server = worker_conf["kafka"]['zookeper_server']
    zk_port = worker_conf["kafka"]['zookeper_port']
    topic = topic

    # create kafka consumer.
    kafka_consumer = KafkaConsumer(topic,k_server,k_port,zk_server,zk_port,id)

    # start worker.
    db_name = worker_conf['dbname']
    app_path = worker_conf['hdfs_app_path']
    ingest_worker = module.Worker(db_name,app_path,kafka_consumer,type,processes)
    ingest_worker.start()