def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump({ 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port()) set_state('hadoop.hdfs.configured')
def send_nm_all_info(nodemanager): """Send nodemanagers all mapred-slave relation data. At this point, the resourcemanager is ready to serve nodemanagers. Send all mapred-slave relation data so that our 'resourcemanager.ready' state becomes set. """ bigtop = Bigtop() rm_host = get_fqdn() rm_ipc = get_layer_opts().port('resourcemanager') jh_ipc = get_layer_opts().port('jobhistory') jh_http = get_layer_opts().port('jh_webapp_http') nodemanager.send_resourcemanagers([rm_host]) nodemanager.send_spec(bigtop.spec()) nodemanager.send_ports(rm_ipc, jh_http, jh_ipc) # hosts_map and ssh_key are required by the mapred-slave interface to signify # RM's readiness. Send them, even though they are not utilized by bigtop. # NB: update KV hosts with all nodemanagers prior to sending the hosts_map # because mapred-slave gates readiness on a NM's presence in the hosts_map. utils.update_kv_hosts(nodemanager.hosts_map()) nodemanager.send_hosts_map(utils.get_kv_hosts()) nodemanager.send_ssh_key('invalid') # update status with slave count and report ready for hdfs num_slaves = len(nodemanager.nodes()) hookenv.status_set('active', 'ready ({count} nodemanager{s})'.format( count=num_slaves, s='s' if num_slaves > 1 else '', )) set_state('apache-bigtop-resourcemanager.ready')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace("/", "-") hdfs_port = hadoop.dist_config.port("namenode") webhdfs_port = hadoop.dist_config.port("nn_webapp_http") utils.update_kv_hosts({node["ip"]: node["host"] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key("hdfs")) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node["host"] for node in datanode.nodes()] if data_changed("namenode.slaves", slaves): unitdata.kv().set("namenode.slaves", slaves) hdfs.register_slaves(slaves) hookenv.status_set( "active", "Ready ({count} DataNode{s})".format(count=len(slaves), s="s" if len(slaves) > 1 else "") ) set_state("namenode.ready")
def configure_yarn(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() if not resourcemanager.resourcemanagers(): data = yaml.dump({ 'relation_name': resourcemanager.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in resourcemanager.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids( resourcemanager.relation_name ) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) yarn.configure_yarn_base( resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) set_state('hadoop.yarn.configured')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts( {node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in datanode.nodes()] if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hookenv.status_set( 'active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def send_info(datanode): hadoop = get_bigtop_base() # hdfs = HDFS(hadoop) # local_hostname = hookenv.local_unit().replace('/', '-') # hdfs_port = hadoop.dist_config.port('namenode') # webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts({node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() # datanode.send_spec(hadoop.spec()) # datanode.send_namenodes([local_hostname]) # datanode.send_ports(hdfs_port, webhdfs_port) # datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) # slaves = [node['host'] for node in datanode.nodes()] # if data_changed('namenode.slaves', slaves): # unitdata.kv().set('namenode.slaves', slaves) # hdfs.register_slaves(slaves) # hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format( # count=len(slaves), # s='s' if len(slaves) > 1 else '', # )) set_state('namenode.ready') hookenv.status_set('active', 'ready')
def send_dn_all_info(datanode): """Send datanodes all dfs-slave relation data. At this point, the namenode is ready to serve datanodes. Send all dfs-slave relation data so that our 'namenode.ready' state becomes set. """ bigtop = Bigtop() fqdn = get_fqdn() hdfs_port = get_layer_opts().port('namenode') webhdfs_port = get_layer_opts().port('nn_webapp_http') datanode.send_spec(bigtop.spec()) datanode.send_namenodes([fqdn]) datanode.send_ports(hdfs_port, webhdfs_port) # hosts_map, ssh_key, and clustername are required by the dfs-slave # interface to signify NN's readiness. Send them, even though they are not # utilized by bigtop. # NB: update KV hosts with all datanodes prior to sending the hosts_map # because dfs-slave gates readiness on a DN's presence in the hosts_map. utils.update_kv_hosts(datanode.hosts_map()) datanode.send_hosts_map(utils.get_kv_hosts()) datanode.send_ssh_key('invalid') datanode.send_clustername(hookenv.service_name()) # update status with slave count and report ready for hdfs num_slaves = len(datanode.nodes()) hookenv.status_set( 'active', 'ready ({count} datanode{s})'.format( count=num_slaves, s='s' if num_slaves > 1 else '', )) set_state('apache-bigtop-namenode.ready')
def configure_yarn(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() if not resourcemanager.resourcemanagers(): data = yaml.dump( { 'relation_name': resourcemanager.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in resourcemanager.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids( resourcemanager.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) yarn.configure_yarn_base(resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) set_state('hadoop.yarn.configured')
def configure_hdfs(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() if not namenode.namenodes(): data = yaml.dump( { 'relation_name': namenode.relation_name, 'conversations': { conv.key: dict({'relation_ids': conv.relation_ids}, **conv.serialize(conv)) for conv in namenode.conversations() }, 'relation_data': { rid: { unit: hookenv.relation_get(unit=unit, rid=rid) for unit in hookenv.related_units(rid) } for rid in hookenv.relation_ids(namenode.relation_name) }, }, default_flow_style=False) for line in data.splitlines(): hookenv.log(line) hdfs.configure_hdfs_base(namenode.namenodes()[0], namenode.port()) set_state('hadoop.hdfs.configured')
def send_info(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') port = hadoop.dist_config.port('resourcemanager') hs_http = hadoop.dist_config.port('jh_webapp_http') hs_ipc = hadoop.dist_config.port('jobhistory') utils.update_kv_hosts(nodemanager.hosts_map()) utils.manage_etc_hosts() nodemanager.send_spec(hadoop.spec()) nodemanager.send_resourcemanagers([local_hostname]) nodemanager.send_ports(port, hs_http, hs_ipc) nodemanager.send_ssh_key(utils.get_ssh_key('yarn')) nodemanager.send_hosts_map(utils.get_kv_hosts()) slaves = nodemanager.nodes() if data_changed('resourcemanager.slaves', slaves): unitdata.kv().set('resourcemanager.slaves', slaves) yarn.register_slaves(slaves) hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('resourcemanager.ready')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts(datanode.hosts_map()) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = datanode.nodes() if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hdfs.refresh_slaves() hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def send_info(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') port = hadoop.dist_config.port('resourcemanager') hs_http = hadoop.dist_config.port('jh_webapp_http') hs_ipc = hadoop.dist_config.port('jobhistory') utils.update_kv_hosts({node['ip']: node['host'] for node in nodemanager.nodes()}) utils.manage_etc_hosts() nodemanager.send_spec(hadoop.spec()) nodemanager.send_resourcemanagers([local_hostname]) nodemanager.send_ports(port, hs_http, hs_ipc) nodemanager.send_ssh_key(utils.get_ssh_key('hdfs')) nodemanager.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in nodemanager.nodes()] if data_changed('resourcemanager.slaves', slaves): unitdata.kv().set('resourcemanager.slaves', slaves) yarn.register_slaves(slaves) hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('resourcemanager.ready')
def send_dn_all_info(datanode): """Send datanodes all dfs-slave relation data. At this point, the namenode is ready to serve datanodes. Send all dfs-slave relation data so that our 'namenode.ready' state becomes set. """ bigtop = Bigtop() fqdn = get_fqdn() hdfs_port = get_layer_opts().port('namenode') webhdfs_port = get_layer_opts().port('nn_webapp_http') datanode.send_spec(bigtop.spec()) datanode.send_namenodes([fqdn]) datanode.send_ports(hdfs_port, webhdfs_port) # hosts_map, ssh_key, and clustername are required by the dfs-slave # interface to signify NN's readiness. Send them, even though they are not # utilized by bigtop. # NB: update KV hosts with all datanodes prior to sending the hosts_map # because dfs-slave gates readiness on a DN's presence in the hosts_map. utils.update_kv_hosts(datanode.hosts_map()) datanode.send_hosts_map(utils.get_kv_hosts()) datanode.send_ssh_key('invalid') datanode.send_clustername(hookenv.service_name()) # update status with slave count and report ready for hdfs num_slaves = len(datanode.nodes()) hookenv.status_set('active', 'ready ({count} datanode{s})'.format( count=num_slaves, s='s' if num_slaves > 1 else '', )) set_state('apache-bigtop-namenode.ready')
def configure_resourcemanager(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_resourcemanager() yarn.configure_jobhistory() utils.update_kv_hosts({ip_addr: local_hostname}) set_state('resourcemanager.configured')
def start_datanode(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_datanode(namenode.namenodes()[0], namenode.port()) utils.install_ssh_key('hdfs', namenode.ssh_key()) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() hdfs.start_datanode() hadoop.open_ports('datanode') set_state('datanode.started')
def start_nodemanager(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_nodemanager( resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) utils.install_ssh_key('yarn', resourcemanager.ssh_key()) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() yarn.start_nodemanager() hadoop.open_ports('nodemanager') set_state('nodemanager.started')
def configure_namenode(): local_hostname = hookenv.local_unit().replace("/", "-") private_address = hookenv.unit_get("private-address") ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode() hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports("namenode") utils.update_kv_hosts({ip_addr: local_hostname}) set_state("namenode.started")
def configure_namenode(): local_hostname = hookenv.local_unit().replace('/', '-') private_address = hookenv.unit_get('private-address') ip_addr = utils.resolve_private_address(private_address) hadoop = get_hadoop_base() hdfs = HDFS(hadoop) hdfs.configure_namenode() hdfs.format_namenode() hdfs.start_namenode() hdfs.create_hdfs_dirs() hadoop.open_ports('namenode') utils.update_kv_hosts({ip_addr: local_hostname}) set_state('namenode.started')
def start_nodemanager(resourcemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) yarn.configure_nodemanager(resourcemanager.resourcemanagers()[0], resourcemanager.port(), resourcemanager.hs_http(), resourcemanager.hs_ipc()) utils.install_ssh_key('yarn', resourcemanager.ssh_key()) utils.update_kv_hosts(resourcemanager.hosts_map()) utils.manage_etc_hosts() yarn.start_nodemanager() hadoop.open_ports('nodemanager') set_state('nodemanager.started')
def configure_ha(cluster, datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) cluster_nodes = cluster.nodes() jn_nodes = datanode.nodes() jn_port = datanode.jn_port() if data_changed('namenode.ha', [cluster_nodes, jn_nodes, jn_port]): utils.update_kv_hosts(cluster.hosts_map()) utils.manage_etc_hosts() hdfs.register_journalnodes(jn_nodes, jn_port) hdfs.restart_namenode() datanode.send_namenodes(cluster_nodes) if not is_state('namenode.shared-edits.init'): hdfs.init_sharededits() set_state('namenode.shared-edits.init')
def configure_hosts_file(self): """ Add the unit's private-address to /etc/hosts to ensure that Java can resolve the hostname of the server to its real IP address. We derive our hostname from the unit_id, replacing / with -. """ local_ip = utils.resolve_private_address(hookenv.unit_get('private-address')) hostname = hookenv.local_unit().replace('/', '-') utils.update_kv_hosts({local_ip: hostname}) utils.manage_etc_hosts() # update name of host to more semantically meaningful value # (this is required on some providers; the /etc/hosts entry must match # the /etc/hostname lest Hadoop get confused about where certain things # should be run) etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname])
def configure_hosts_file(self): """ Add the unit's private-address to /etc/hosts to ensure that Java can resolve the hostname of the server to its real IP address. We derive our hostname from the unit_id, replacing / with -. """ local_ip = utils.resolve_private_address( hookenv.unit_get('private-address')) hostname = hookenv.local_unit().replace('/', '-') utils.update_kv_hosts({local_ip: hostname}) utils.manage_etc_hosts() # update name of host to more semantically meaningful value # (this is required on some providers; the /etc/hosts entry must match # the /etc/hostname lest Hadoop get confused about where certain things # should be run) etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname])
def update_config(namenode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) utils.update_kv_hosts(namenode.hosts_map()) utils.manage_etc_hosts() namenode_data = ( namenode.clustername(), namenode.namenodes(), namenode.port(), namenode.webhdfs_port(), ) if data_changed('datanode.namenode-data', namenode_data): hdfs.configure_datanode(*namenode_data) if is_state('datanode.started'): # re-check because for manual call hdfs.restart_datanode() hdfs.restart_journalnode() if data_changed('datanode.namenode-ssh-key', namenode.ssh_key()): utils.install_ssh_key('hdfs', namenode.ssh_key())
def manage_cluster_hosts(cluster): utils.update_kv_hosts(cluster.hosts_map()) utils.manage_etc_hosts()
def manage_datanode_hosts(datanode): utils.update_kv_hosts(datanode.hosts_map()) utils.manage_etc_hosts() datanode.send_hosts_map(utils.get_kv_hosts())