Exemplo n.º 1
0
def check_broker_id_in_zk(broker_id_policy, process, region):
    """
    Check endlessly for the Zookeeper Connection.

    This function checks endlessly if the broker is still registered in ZK
    (we observered running brokers but missing broker id's so we implemented this check)
    and if the ZK IP's changed (e.g. due to a node restart). If this happens a Kafka restart is enforced.
    """
    from kazoo.client import KazooClient
    zk_conn_str = os.getenv('ZOOKEEPER_CONN_STRING')
    broker_id_manager = find_out_own_id.get_broker_policy(broker_id_policy)
    broker_id = broker_id_manager.get_id(kafka_data_dir)
    logging.info("check broker id... {}".format(broker_id))

    if not broker_id:
        broker_id = wait_for_broker_id(broker_id_manager, kafka_data_dir)

    while True:
        check_kafka(region)

        new_zk_conn_str = generate_zk_conn_str.run(os.getenv('ZOOKEEPER_STACK_NAME'), region)
        if zk_conn_str != new_zk_conn_str:
            logging.warning("ZooKeeper connection string changed!")
            logging.warning("new ZK: " + new_zk_conn_str)
            logging.warning("old ZK: " + zk_conn_str)
            zk_conn_str = new_zk_conn_str
            os.environ['ZOOKEEPER_CONN_STRING'] = zk_conn_str
            create_broker_properties(zk_conn_str)
            from random import randint
            wait_to_stop = randint(1, 10)
            logging.info("Waiting " + str(wait_to_stop) + " seconds to stop kafka broker ...")
            sleep(wait_to_stop)
            process.terminate()
            process.wait()
            wait_to_restart = randint(10, 20)
            logging.info("Waiting " + str(wait_to_restart) + " seconds to restart kafka broker ...")
            sleep(wait_to_restart)
            logging.info("Restarting kafka broker with new ZooKeeper connection string ...")
            process = subprocess.Popen([kafka_dir + "/bin/kafka-server-start.sh",
                                        kafka_dir + "/config/server.properties"])
            os.environ['WAIT_FOR_KAFKA'] = 'yes'
            continue

        zk = KazooClient(hosts=zk_conn_str)
        zk.start()
        try:
            zk.get("/brokers/ids/" + broker_id)
            logging.info("I'm still in ZK registered, all good!")
            sleep(60)
            zk.stop()
        except:
            logging.warning("I'm not in ZK registered, stopping kafka broker process!")
            zk.stop()
            process.terminate()
            process.wait()
            logging.info("Restarting kafka broker ...")
            process = subprocess.Popen([kafka_dir + "/bin/kafka-server-start.sh",
                                        kafka_dir + "/config/server.properties"])
            os.environ['WAIT_FOR_KAFKA'] = 'yes'
    """Get a config from a remote location (e.g. Github)."""
    logging.info("getting " + file + " file from " + url)
    with open(file, 'w') as file_:
        config_content = requests.get(url).text
        file_.write(config_content)


get_remote_config(kafka_dir + "/config/server.properties", os.getenv('SERVER_PROPERTIES'))
get_remote_config(kafka_dir + "/config/log4j.properties", os.getenv('LOG4J_PROPERTIES'))

create_broker_properties(zk_conn_str)

broker_policy = os.getenv('BROKER_ID_POLICY', 'ip').lower()
logging.info("broker id policy - {}".format(broker_policy))

broker_id_manager = find_out_own_id.get_broker_policy(broker_policy)
broker_id = broker_id_manager.get_id(kafka_data_dir)
logging.info("broker id is {}".format(broker_id))

HealthServer().start()

reassign_process = None

if os.getenv('REASSIGN_PARTITIONS') == 'yes':
    logging.info("starting reassignment script")
    reassign_process = multiprocessing.Process(target=rebalance_partitions.run, args=[region])
    reassign_process.start()

logging.info("starting kafka server ...")

kafka_options = "-server"