def send_info(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') port = hadoop.dist_config.port('resourcemanager') hs_http = hadoop.dist_config.port('jh_webapp_http') hs_ipc = hadoop.dist_config.port('jobhistory') utils.update_kv_hosts(nodemanager.hosts_map()) utils.manage_etc_hosts() nodemanager.send_spec(hadoop.spec()) nodemanager.send_resourcemanagers([local_hostname]) nodemanager.send_ports(port, hs_http, hs_ipc) nodemanager.send_ssh_key(utils.get_ssh_key('yarn')) nodemanager.send_hosts_map(utils.get_kv_hosts()) slaves = nodemanager.nodes() if data_changed('resourcemanager.slaves', slaves): unitdata.kv().set('resourcemanager.slaves', slaves) yarn.register_slaves(slaves) hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('resourcemanager.ready')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace("/", "-") hdfs_port = hadoop.dist_config.port("namenode") webhdfs_port = hadoop.dist_config.port("nn_webapp_http") utils.update_kv_hosts({node["ip"]: node["host"] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key("hdfs")) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node["host"] for node in datanode.nodes()] if data_changed("namenode.slaves", slaves): unitdata.kv().set("namenode.slaves", slaves) hdfs.register_slaves(slaves) hookenv.status_set( "active", "Ready ({count} DataNode{s})".format(count=len(slaves), s="s" if len(slaves) > 1 else "") ) set_state("namenode.ready")
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts(datanode.hosts_map()) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = datanode.nodes() if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hdfs.refresh_slaves() hookenv.status_set('active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def send_info(nodemanager): hadoop = get_hadoop_base() yarn = YARN(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') port = hadoop.dist_config.port('resourcemanager') hs_http = hadoop.dist_config.port('jh_webapp_http') hs_ipc = hadoop.dist_config.port('jobhistory') utils.update_kv_hosts({node['ip']: node['host'] for node in nodemanager.nodes()}) utils.manage_etc_hosts() nodemanager.send_spec(hadoop.spec()) nodemanager.send_resourcemanagers([local_hostname]) nodemanager.send_ports(port, hs_http, hs_ipc) nodemanager.send_ssh_key(utils.get_ssh_key('hdfs')) nodemanager.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in nodemanager.nodes()] if data_changed('resourcemanager.slaves', slaves): unitdata.kv().set('resourcemanager.slaves', slaves) yarn.register_slaves(slaves) hookenv.status_set('active', 'Ready ({count} NodeManager{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('resourcemanager.ready')
def send_info(datanode): hadoop = get_hadoop_base() hdfs = HDFS(hadoop) local_hostname = hookenv.local_unit().replace('/', '-') hdfs_port = hadoop.dist_config.port('namenode') webhdfs_port = hadoop.dist_config.port('nn_webapp_http') utils.update_kv_hosts( {node['ip']: node['host'] for node in datanode.nodes()}) utils.manage_etc_hosts() datanode.send_spec(hadoop.spec()) datanode.send_namenodes([local_hostname]) datanode.send_ports(hdfs_port, webhdfs_port) datanode.send_ssh_key(utils.get_ssh_key('hdfs')) datanode.send_hosts_map(utils.get_kv_hosts()) slaves = [node['host'] for node in datanode.nodes()] if data_changed('namenode.slaves', slaves): unitdata.kv().set('namenode.slaves', slaves) hdfs.register_slaves(slaves) hookenv.status_set( 'active', 'Ready ({count} DataNode{s})'.format( count=len(slaves), s='s' if len(slaves) > 1 else '', )) set_state('namenode.ready')
def provide(self, remote_service, all_ready): data = super(SSHRelation, self).provide(remote_service, all_ready) try: pwd.getpwnam(self.ssh_user) except KeyError: hookenv.log('Cannot provide SSH key yet, user not available: %s' % self.ssh_user) else: data.update({ 'ssh-key': utils.get_ssh_key(self.ssh_user), }) return data
def install(self): version = hookenv.config()['spark_version'] spark_path = self.extract_spark_binary('spark-{}'.format(version), version) os.symlink(spark_path, self.dist_config.path('spark')) unitdata.kv().set('spark.version', version) self.dist_config.add_users() self.dist_config.add_dirs() self.dist_config.add_packages() # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) unitdata.kv().set('spark.installed', True) unitdata.kv().flush(True)
def install(self, force=False): if not force and self.is_installed(): return jujuresources.install(self.resources['spark'], destination=self.dist_config.path('spark'), skip_top_level=True) self.dist_config.add_users() self.dist_config.add_dirs() self.dist_config.add_packages() # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) unitdata.kv().set('spark.installed', True) unitdata.kv().flush(True)
def install(self): version = hookenv.config()['spark_version'] spark_path = self.extract_spark_binary('spark-{}'.format(version), version) os.symlink(spark_path, self.dist_config.path('spark')) unitdata.kv().set('spark.version', version) self.dist_config.add_users() self.dist_config.add_dirs() self.dist_config.add_packages() # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) utils.initialize_kv_host() utils.manage_etc_hosts() hostname = hookenv.local_unit().replace('/', '-') etc_hostname = Path('/etc/hostname') etc_hostname.write_text(hostname) check_call(['hostname', '-F', etc_hostname]) unitdata.kv().set('spark.installed', True) unitdata.kv().flush(True)
def install(self): ''' install Spark and add dependencies in dist-config ''' self.dist_config.add_dirs() self.dist_config.add_packages() jujuresources.install(self.resources['spark'], destination=self.dist_config.path('spark'), skip_top_level=True) # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) # put the spark jar in hdfs spark_assembly_jar = glob( '{}/lib/spark-assembly-*.jar'.format(self.dist_config.path('spark')) )[0] utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib') try: utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar') except CalledProcessError: hookenv.log("File exists") # create hdfs storage space for history server utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/directory') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/directory') # create hdfs storage space for spark-bench utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/spark-bench') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/spark-bench')
def install(self): ''' install Spark and add dependencies in dist-config ''' self.dist_config.add_dirs() self.dist_config.add_packages() jujuresources.install(self.resources['spark'], destination=self.dist_config.path('spark'), skip_top_level=True) # allow ubuntu user to ssh to itself so spark can ssh to its worker # in local/standalone modes utils.install_ssh_key('ubuntu', utils.get_ssh_key('ubuntu')) # put the spark jar in hdfs spark_assembly_jar = glob('{}/lib/spark-assembly-*.jar'.format( self.dist_config.path('spark')))[0] utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/share/lib') try: utils.run_as('hdfs', 'hdfs', 'dfs', '-put', spark_assembly_jar, '/user/ubuntu/share/lib/spark-assembly.jar') except CalledProcessError: hookenv.log("File exists") # create hdfs storage space for history server utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/directory') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/directory') # create hdfs storage space for spark-bench utils.run_as('hdfs', 'hdfs', 'dfs', '-mkdir', '-p', '/user/ubuntu/spark-bench') utils.run_as('hdfs', 'hdfs', 'dfs', '-chown', '-R', 'ubuntu:hadoop', '/user/ubuntu/spark-bench')