Пример #1
0
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts(
        {node['ip']: node['host']
         for node in datanode.nodes()})
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = [node['host'] for node in datanode.nodes()]
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)

    hookenv.status_set(
        'active', 'Ready ({count} DataNode{s})'.format(
            count=len(slaves),
            s='s' if len(slaves) > 1 else '',
        ))
    set_state('namenode.ready')
Пример #2
0
 def configure_hdfs(namenode):
     hadoop = get_hadoop_base()
     hdfs = HDFS(hadoop)
     utils.update_kv_hosts(namenode.hosts_map())
     utils.manage_etc_hosts()
     if not namenode.namenodes():
         data = yaml.dump(
             {
                 'relation_name': namenode.relation_name,
                 'conversations': {
                     conv.key: dict({'relation_ids': conv.relation_ids},
                                    **conv.serialize(conv))
                     for conv in namenode.conversations()
                 },
                 'relation_data': {
                     rid: {
                         unit: hookenv.relation_get(unit=unit, rid=rid)
                         for unit in hookenv.related_units(rid)
                     }
                     for rid in hookenv.relation_ids(namenode.relation_name)
                 },
             },
             default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port())
     set_state('hadoop.hdfs.configured')
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace('/', '-')
    hdfs_port = hadoop.dist_config.port('namenode')
    webhdfs_port = hadoop.dist_config.port('nn_webapp_http')

    utils.update_kv_hosts(datanode.hosts_map())
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key('hdfs'))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = datanode.nodes()
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)
        hdfs.refresh_slaves()

    hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format(
        count=len(slaves),
        s='s' if len(slaves) > 1 else '',
    ))
    set_state('namenode.ready')
def stop_datanode():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.stop_datanode()
    hdfs.stop_journalnode()
    hadoop.close_ports('datanode')
    remove_state('datanode.started')
Пример #5
0
 def configure_hdfs(namenode):
     hadoop = get_hadoop_base()
     hdfs = HDFS(hadoop)
     utils.update_kv_hosts(namenode.hosts_map())
     utils.manage_etc_hosts()
     if not namenode.namenodes():
         data = yaml.dump({
             'relation_name': namenode.relation_name,
             'conversations': {
                 conv.key: dict({'relation_ids': conv.relation_ids},
                                **conv.serialize(conv))
                 for conv in namenode.conversations()
             },
             'relation_data': {
                 rid: {
                     unit: hookenv.relation_get(unit=unit, rid=rid)
                     for unit in hookenv.related_units(rid)
                 } for rid in hookenv.relation_ids(namenode.relation_name)
             },
         }, default_flow_style=False)
         for line in data.splitlines():
             hookenv.log(line)
     hdfs.configure_hdfs_base(
         namenode.clustername(), namenode.namenodes(),
         namenode.port(), namenode.webhdfs_port())
     set_state('hadoop.hdfs.configured')
def send_info(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    local_hostname = hookenv.local_unit().replace("/", "-")
    hdfs_port = hadoop.dist_config.port("namenode")
    webhdfs_port = hadoop.dist_config.port("nn_webapp_http")

    utils.update_kv_hosts({node["ip"]: node["host"] for node in datanode.nodes()})
    utils.manage_etc_hosts()

    datanode.send_spec(hadoop.spec())
    datanode.send_namenodes([local_hostname])
    datanode.send_ports(hdfs_port, webhdfs_port)
    datanode.send_ssh_key(utils.get_ssh_key("hdfs"))
    datanode.send_hosts_map(utils.get_kv_hosts())

    slaves = [node["host"] for node in datanode.nodes()]
    if data_changed("namenode.slaves", slaves):
        unitdata.kv().set("namenode.slaves", slaves)
        hdfs.register_slaves(slaves)

    hookenv.status_set(
        "active", "Ready ({count} DataNode{s})".format(count=len(slaves), s="s" if len(slaves) > 1 else "")
    )
    set_state("namenode.ready")
def update_zk_config(zookeeper):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    zk_nodes = sorted(zookeeper.zookeepers(), key=itemgetter('host'))
    zk_started = is_state('namenode.zk.started')
    hdfs.configure_zookeeper(zk_nodes)
    if zk_started and data_changed('namenode.zk', zk_nodes):
        hdfs.restart_zookeeper()
def start_datanode(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    update_config(namenode)  # force config update
    hdfs.start_datanode()
    hdfs.start_journalnode()
    hadoop.open_ports('datanode')
    set_state('datanode.started')
def update_slaves(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    slaves = datanode.nodes()
    if data_changed('namenode.slaves', slaves):
        unitdata.kv().set('namenode.slaves', slaves)
        hdfs.register_slaves(slaves)
        hdfs.reload_slaves()

    set_state('namenode.ready')
def start_datanode(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_datanode(namenode.namenodes()[0], namenode.port())
    utils.install_ssh_key('hdfs', namenode.ssh_key())
    utils.update_kv_hosts(namenode.hosts_map())
    utils.manage_etc_hosts()
    hdfs.start_datanode()
    hadoop.open_ports('datanode')
    set_state('datanode.started')
def start_zookeeper(zookeeper):
    local_hostname = hookenv.local_unit().replace('/', '-')
    if local_hostname not in get_cluster_nodes():
        # can't run zkfc on a non-cluster node
        return
    update_zk_config(zookeeper)  # ensure config is up to date
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.restart_namenode()
    hdfs.start_zookeeper()
    set_state('namenode.zk.started')
Пример #12
0
def configure_namenode():
    local_hostname = hookenv.local_unit().replace('/', '-')
    private_address = hookenv.unit_get('private-address')
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode()
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports('namenode')
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state('namenode.started')
def configure_namenode():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode(get_cluster_nodes())
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports('namenode')
    utils.initialize_kv_host()
    utils.manage_etc_hosts()
    set_state('namenode.started')
def configure_namenode():
    local_hostname = hookenv.local_unit().replace("/", "-")
    private_address = hookenv.unit_get("private-address")
    ip_addr = utils.resolve_private_address(private_address)
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode()
    hdfs.format_namenode()
    hdfs.start_namenode()
    hdfs.create_hdfs_dirs()
    hadoop.open_ports("namenode")
    utils.update_kv_hosts({ip_addr: local_hostname})
    set_state("namenode.started")
def init_ha_standby(datanode, cluster):
    """
    Once initial HA setup is done, any new NameNode is started as standby.
    """
    local_hostname = hookenv.local_unit().replace('/', '-')
    if local_hostname not in get_cluster_nodes():
        # can't even bootstrapStandby if not in the list of chosen nodes
        return
    update_ha_config(datanode)  # ensure the config is written
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    update_ha_config(datanode)
    hdfs.bootstrap_standby()
    hdfs.start_namenode()
    cluster.standby_ready()
    set_state('namenode.standby')
    hadoop.open_ports('namenode')
    set_state('namenode.started')
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)

    slaves = unitdata.kv().get('namenode.slaves', [])
    slaves_leaving = datanode.nodes()  # only returns nodes in "leaving" state
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('namenode.slaves', slaves_remaining)
    hdfs.register_slaves(slaves_remaining)
    hdfs.reload_slaves()

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        remove_state('namenode.ready')

    datanode.dismiss()
def update_ha_config(datanode):
    cluster_nodes = get_cluster_nodes()
    jn_nodes = sorted(datanode.nodes())
    jn_port = datanode.jn_port()
    started = is_state('namenode.started')
    new_cluster_config = data_changed('namenode.cluster-nodes', cluster_nodes)
    new_jn_config = data_changed('namenode.jn.config', (jn_nodes, jn_port))

    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.configure_namenode(cluster_nodes)
    hdfs.register_journalnodes(jn_nodes, jn_port)

    if started and new_cluster_config:
        hdfs.restart_namenode()
    elif started and new_jn_config:
        hdfs.reload_slaves()  # is this actually necessary?
Пример #18
0
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    nodes_leaving = datanode.nodes()  # only returns nodes in "leaving" state

    slaves = unitdata.kv().get('namenode.slaves', [])
    slaves_leaving = [node['host'] for node in nodes_leaving]
    hookenv.log('Slaves leaving: {}'.format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set('namenode.slaves', slaves_remaining)
    hdfs.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        hookenv.status_set('blocked', 'Waiting for relation to DataNodes')
        remove_state('namenode.ready')

    datanode.dismiss()
def unregister_datanode(datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    nodes_leaving = datanode.nodes()  # only returns nodes in "leaving" state

    slaves = unitdata.kv().get("namenode.slaves", [])
    slaves_leaving = [node["host"] for node in nodes_leaving]
    hookenv.log("Slaves leaving: {}".format(slaves_leaving))

    slaves_remaining = list(set(slaves) - set(slaves_leaving))
    unitdata.kv().set("namenode.slaves", slaves_remaining)
    hdfs.register_slaves(slaves_remaining)

    utils.remove_kv_hosts(slaves_leaving)
    utils.manage_etc_hosts()

    if not slaves_remaining:
        hookenv.status_set("blocked", "Waiting for relation to DataNodes")
        remove_state("namenode.ready")

    datanode.dismiss()
Пример #20
0
def ganglia_changed():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    yarn = YARN(hadoop)
    if is_state('namenode.started'):
        hdfs.restart_namenode()
    if is_state('datanode.started'):
        hdfs.restart_datanode()
    if is_state('journalnode.started'):
        hdfs.restart_journalnode()
    if is_state('resourcemanager.started'):
        yarn.restart_resourcemanager()
    if is_state('nodemanager.started'):
        yarn.restart_nodemanager()
Пример #21
0
def ganglia_changed():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    yarn = YARN(hadoop)
    if is_state('namenode.started'):
        hdfs.restart_namenode()
    if is_state('datanode.started'):
        hdfs.restart_datanode()
    if is_state('journalnode.started'):
        hdfs.restart_journalnode()
    if is_state('resourcemanager.started'):
        yarn.restart_resourcemanager()
    if is_state('nodemanager.started'):
        yarn.restart_nodemanager()
def configure_ha(cluster, datanode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    cluster_nodes = cluster.nodes()
    jn_nodes = datanode.nodes()
    jn_port = datanode.jn_port()
    if data_changed('namenode.ha', [cluster_nodes, jn_nodes, jn_port]):
        utils.update_kv_hosts(cluster.hosts_map())
        utils.manage_etc_hosts()
        hdfs.register_journalnodes(jn_nodes, jn_port)
        hdfs.restart_namenode()
        datanode.send_namenodes(cluster_nodes)
        if not is_state('namenode.shared-edits.init'):
            hdfs.init_sharededits()
            set_state('namenode.shared-edits.init')
def init_ha_active(datanode, cluster):
    """
    Do initial HA setup on the leader.
    """
    local_hostname = hookenv.local_unit().replace('/', '-')
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.stop_namenode()
    remove_state('namenode.started')
    # initial cluster is us (active) plus a standby
    set_cluster_nodes([local_hostname, cluster.nodes()[0]])
    update_ha_config(datanode)
    hdfs.init_sharededits()
    hdfs.start_namenode()
    leadership.leader_set({'ha-initialized': 'true'})
    set_state('namenode.started')
def update_config(namenode):
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)

    utils.update_kv_hosts(namenode.hosts_map())
    utils.manage_etc_hosts()

    namenode_data = (
        namenode.clustername(), namenode.namenodes(),
        namenode.port(), namenode.webhdfs_port(),
    )
    if data_changed('datanode.namenode-data', namenode_data):
        hdfs.configure_datanode(*namenode_data)
        if is_state('datanode.started'):  # re-check because for manual call
            hdfs.restart_datanode()
            hdfs.restart_journalnode()

    if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()):
        utils.install_ssh_key('hdfs', namenode.ssh_key())
def stop_zookeeper():
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.stop_zookeeper()
    remove_state('namenode.zk.started')
def format_zookeeper(zookeeper):
    update_zk_config(zookeeper)  # ensure config is up to date
    hadoop = get_hadoop_base()
    hdfs = HDFS(hadoop)
    hdfs.format_zookeeper()
    leadership.leader_set({'zk-formatted': 'true'})