def install_namenode(): hookenv.status_set('maintenance', 'installing namenode') bigtop = Bigtop() nn_host = get_fqdn() hosts = {'namenode': nn_host} bigtop.render_site_yaml(hosts=hosts, roles='namenode') bigtop.trigger_puppet() # /etc/hosts entries from the KV are not currently used for bigtop, # but a hosts_map attribute is required by some interfaces (eg: dfs-slave) # to signify NN's readiness. Set our NN info in the KV to fulfill this # requirement. utils.initialize_kv_host() # make our namenode listen on all interfaces hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml') with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.rpc-bind-host'] = '0.0.0.0' props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0' props['dfs.namenode.http-bind-host'] = '0.0.0.0' props['dfs.namenode.https-bind-host'] = '0.0.0.0' # We need to create the 'mapred' user/group since we are not installing # hadoop-mapreduce. This is needed so the namenode can access yarn # job history files in hdfs. Also add our ubuntu user to the hadoop # and mapred groups. get_layer_opts().add_users() set_state('apache-bigtop-namenode.installed') hookenv.status_set('maintenance', 'namenode installed')
def install_resourcemanager(namenode): """Install if the namenode has sent its FQDN. We only need the namenode FQDN to perform the RM install, so poll for namenodes() data whenever we have a namenode relation. This allows us to install asap, even if 'namenode.ready' is not set yet. """ if namenode.namenodes(): hookenv.status_set('maintenance', 'installing resourcemanager') nn_host = namenode.namenodes()[0] rm_host = get_fqdn() bigtop = Bigtop() hosts = {'namenode': nn_host, 'resourcemanager': rm_host} bigtop.render_site_yaml(hosts=hosts, roles='resourcemanager') bigtop.trigger_puppet() # /etc/hosts entries from the KV are not currently used for bigtop, # but a hosts_map attribute is required by some interfaces (eg: mapred-slave) # to signify RM's readiness. Set our RM info in the KV to fulfill this # requirement. utils.initialize_kv_host() # Add our ubuntu user to the hadoop and mapred groups. get_layer_opts().add_users() set_state('apache-bigtop-resourcemanager.installed') hookenv.status_set('maintenance', 'resourcemanager installed') else: hookenv.status_set('waiting', 'waiting for namenode fqdn')
def configure_kafka(self, zk_units, network_interface=None): # Get ip:port data from our connected zookeepers zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zks.sort() zk_connect = ",".join(zks) service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1) kafka_port = self.dist_config.port('kafka') roles = ['kafka-server'] override = { 'kafka::server::broker_id': unit_num, 'kafka::server::port': kafka_port, 'kafka::server::zookeeper_connection_string': zk_connect, } if network_interface: ip = Bigtop().get_ip_for_interface(network_interface) override['kafka::server::bind_addr'] = ip bigtop = Bigtop() bigtop.render_site_yaml(roles=roles, overrides=override) bigtop.trigger_puppet() self.set_advertise() self.restart()
def install_pig(self): ''' Trigger the Bigtop puppet recipe that handles the Pig service. ''' # Dirs are handled by the bigtop deb. No need to call out to # dist_config to do that work. roles = ['pig-client'] bigtop = Bigtop() bigtop.render_site_yaml(roles=roles) bigtop.trigger_puppet() # Set app version for juju status output; pig --version looks like: # Apache Pig version 0.15.0 (r: unknown) # compiled Feb 06 2016, 23:00:40 try: pig_out = check_output(['pig', '-x', 'local', '--version']).decode() except CalledProcessError as e: pig_out = e.output lines = pig_out.splitlines() parts = lines[0].split() if lines else [] if len(parts) < 4: hookenv.log('Error getting Pig version: {}'.format(pig_out), hookenv.ERROR) pig_ver = '' else: pig_ver = parts[3] hookenv.application_version_set(pig_ver)
def trigger_bigtop(self): ''' Trigger the Bigtop puppet recipe that handles the Zeppelin service. ''' bigtop = Bigtop() overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.', strip=True) # The zep deb depends on spark-core which unfortunately brings in # most of hadoop. Include appropriate roles here to ensure these # packages are configured in the same way as our other Bigtop # software deployed with puppet. bigtop.render_site_yaml( roles=[ 'spark-client', 'spark-yarn-slave', 'zeppelin-server', ], overrides=overrides, ) # NB: during an upgrade, we configure the site.yaml, but do not # trigger puppet. The user must do that with the 'reinstall' action. if unitdata.kv().get('zeppelin.version.repo', False): hookenv.log("An upgrade is available and the site.yaml has been " "configured. Run the 'reinstall' action to continue.", level=hookenv.INFO) else: #################################################################### # BUG: BIGTOP-2742 # Default zeppelin init script looks for the literal '$(hostname)' # string. Symlink it so it exists before the apt install from puppet # tries to start the service. import subprocess host = subprocess.check_output(['hostname']).decode('utf8').strip() zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host) utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin') utils.run_as('root', 'ln', '-sf', zepp_pid, '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid') #################################################################### bigtop.trigger_puppet() self.wait_for_api(30) #################################################################### # BUG: BIGTOP-2742 # Puppet apply will call systemctl daemon-reload, which removes the # symlink we just created. Now that the bits are on disk, update the # init script $(hostname) that caused this mess to begin with. zepp_init_script = '/etc/init.d/zeppelin' utils.re_edit_in_place(zepp_init_script, { r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid), }) utils.run_as('root', 'systemctl', 'daemon-reload') self.restart() self.wait_for_api(30)
def install_oozie(self): roles = ['hadoop-client'] bigtop = Bigtop() bigtop.render_site_yaml(roles=roles) bigtop.trigger_puppet() roles = ['oozie-client', 'oozie-server'] bigtop.render_site_yaml(roles=roles) bigtop.trigger_puppet()
def configure(self, available_hosts): """ This is the core logic of setting up spark. Two flags are needed: * Namenode exists aka HDFS is there * Resource manager exists aka YARN is ready both flags are infered from the available hosts. :param dict available_hosts: Hosts that Spark should know about. """ if not unitdata.kv().get('spark.bootstrapped', False): self.setup() unitdata.kv().set('spark.bootstrapped', True) self.install_benchmark() hosts = { 'spark': available_hosts['spark-master'], } dc = self.dist_config events_log_dir = 'file://{}'.format(dc.path('spark_events')) if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] events_log_dir = self.setup_hdfs_logs() if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] roles = self.get_roles() override = { 'spark::common::master_url': self.get_master_url(available_hosts['spark-master']), 'spark::common::event_log_dir': events_log_dir, 'spark::common::history_log_dir': events_log_dir, } bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() # There is a race condition here. # The work role will not start the first time we trigger puppet apply. # The exception in /var/logs/spark: # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077 # The master url is not set at the time the worker start the first time. # TODO(kjackal): ...do the needed... (investiate,debug,submit patch) bigtop.trigger_puppet() if 'namenode' not in available_hosts: # Make sure users other than spark can access the events logs dir and run jobs utils.run_as('root', 'chmod', '777', dc.path('spark_events'))
def trigger_bigtop(self): bigtop = Bigtop() overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.', strip=True) bigtop.render_site_yaml( roles=[ 'zeppelin-server', ], overrides=overrides, ) bigtop.trigger_puppet() self.wait_for_api(30)
def install_mahout(): hookenv.status_set('maintenance', 'installing mahout') bigtop = Bigtop() bigtop.render_site_yaml(roles=[ 'mahout-client', ], ) bigtop.trigger_puppet() with utils.environment_edit_in_place('/etc/environment') as env: env['MAHOUT_HOME'] = '/usr/lib/mahout' hookenv.status_set('active', 'ready') set_state('mahout.installed')
def install(self, hbase=None, zk_units=None): ''' Trigger the Bigtop puppet recipe that handles the Hive service. ''' # Dirs are handled by the bigtop deb. No need to call out to # dist_config to do that. We do want 'ubuntu' in the hive group though. self.dist_config.add_users() # Prep config roles = ['hive-client', 'hive-metastore', 'hive-server2'] metastore = "thrift://{}:9083".format(hookenv.unit_private_ip()) if hbase: roles.append('hive-hbase') hb_connect = "{}:{}".format(hbase['host'], hbase['master_port']) zk_hbase_connect = hbase['zk_connect'] else: hb_connect = "" zk_hbase_connect = "" if zk_units: hive_support_concurrency = True zk_hive_connect = self.get_zk_connect(zk_units) else: hive_support_concurrency = False zk_hive_connect = "" override = { 'hadoop_hive::common_config::hbase_master': hb_connect, 'hadoop_hive::common_config::hbase_zookeeper_quorum': zk_hbase_connect, 'hadoop_hive::common_config::hive_zookeeper_quorum': zk_hive_connect, 'hadoop_hive::common_config::hive_support_concurrency': hive_support_concurrency, 'hadoop_hive::common_config::metastore_uris': metastore, 'hadoop_hive::common_config::server2_thrift_port': self.dist_config.port('hive-thrift'), 'hadoop_hive::common_config::server2_thrift_http_port': self.dist_config.port('hive-thrift-web'), } bigtop = Bigtop() bigtop.render_site_yaml(roles=roles, overrides=override) bigtop.trigger_puppet() # Bigtop doesn't create a hive-env.sh, but we need it for heap config hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' if not hive_env.exists(): (self.dist_config.path('hive_conf') / 'hive-env.sh.template').copy(hive_env)
def install_hadoop_client_yarn(principal, namenode, resourcemanager): if namenode.namenodes() and resourcemanager.resourcemanagers(): hookenv.status_set('maintenance', 'installing plugin (yarn)') nn_host = namenode.namenodes()[0] rm_host = resourcemanager.resourcemanagers()[0] bigtop = Bigtop() hosts = {'namenode': nn_host, 'resourcemanager': rm_host} bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client') bigtop.trigger_puppet() set_state('apache-bigtop-plugin.yarn.installed') hookenv.status_set('maintenance', 'plugin (yarn) installed') else: hookenv.status_set('waiting', 'waiting for master fqdns')
def install(self, hbase=None, zk_units=None): ''' Trigger the Bigtop puppet recipe that handles the Hive service. ''' # Dirs are handled by the bigtop deb. No need to call out to # dist_config to do that. We do want 'ubuntu' in the hive group though. self.dist_config.add_users() # Prep config roles = ['hive-client', 'hive-metastore', 'hive-server2'] metastore = "thrift://{}:9083".format(hookenv.unit_private_ip()) if hbase: roles.append('hive-hbase') hb_connect = "{}:{}".format(hbase['host'], hbase['master_port']) zk_hbase_connect = hbase['zk_connect'] else: hb_connect = "" zk_hbase_connect = "" if zk_units: hive_support_concurrency = True zk_hive_connect = self.get_zk_connect(zk_units) else: hive_support_concurrency = False zk_hive_connect = "" override = { 'hadoop_hive::common_config::hbase_master': hb_connect, 'hadoop_hive::common_config::hbase_zookeeper_quorum': zk_hbase_connect, 'hadoop_hive::common_config::hive_zookeeper_quorum': zk_hive_connect, 'hadoop_hive::common_config::hive_support_concurrency': hive_support_concurrency, 'hadoop_hive::common_config::metastore_uris': metastore, 'hadoop_hive::common_config::server2_thrift_port': self.dist_config.port('hive-thrift'), 'hadoop_hive::common_config::server2_thrift_http_port': self.dist_config.port('hive-thrift-web'), } bigtop = Bigtop() bigtop.render_site_yaml(roles=roles, overrides=override) bigtop.trigger_puppet() # Bigtop doesn't create a hive-env.sh, but we need it for heap config hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' if not hive_env.exists(): (self.dist_config.path('hive_conf') / 'hive-env.sh.template').copy( hive_env)
def install_mahout(): hookenv.status_set('maintenance', 'installing mahout') bigtop = Bigtop() bigtop.render_site_yaml( roles=[ 'mahout-client', ], ) bigtop.trigger_puppet() with utils.environment_edit_in_place('/etc/environment') as env: env['MAHOUT_HOME'] = '/usr/lib/mahout' hookenv.status_set('active', 'ready') set_state('mahout.installed')
def configure(self, hosts, zk_units): zk_connect = self.get_zk_connect(zk_units) roles = ['hbase-server', 'hbase-master', 'hbase-client'] override = { 'bigtop::hbase_thrift_port': self.dist_config.port('hbase-thrift'), 'hadoop_hbase::client::thrift': True, 'hadoop_hbase::common_config::heap_size': hookenv.config()['heap'], 'hadoop_hbase::common_config::zookeeper_quorum': zk_connect, 'hadoop_hbase::deploy::auxiliary': False, } bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet()
def install(self, nodes=None): ''' Write out the config, then run puppet. After this runs, we should have a configured and running service. ''' bigtop = Bigtop() log("Rendering site yaml ''with overrides: {}".format(self._override)) bigtop.render_site_yaml(self._hosts, self._roles, self._override) bigtop.trigger_puppet() if self.is_zk_leader(): zkpeer = RelationBase.from_state('zkpeer.joined') zkpeer.set_zk_leader()
def install_mahout(): hookenv.status_set('maintenance', 'installing mahout') bigtop = Bigtop() bigtop.render_site_yaml(roles=[ 'mahout-client', ], ) bigtop.trigger_puppet() with utils.environment_edit_in_place('/etc/environment') as env: env['MAHOUT_HOME'] = '/usr/lib/mahout' set_state('mahout.installed') hookenv.status_set('active', 'ready') # set app version string for juju status output mahout_version = get_package_version('mahout') or 'unknown' hookenv.application_version_set(mahout_version)
def trigger_bigtop(self): ''' Trigger the Bigtop puppet recipe that handles the Zeppelin service. ''' bigtop = Bigtop() overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.', strip=True) bigtop.render_site_yaml( roles=[ 'zeppelin-server', ], overrides=overrides, ) bigtop.trigger_puppet() self.wait_for_api(30)
def install_mahout(): hookenv.status_set('maintenance', 'installing mahout') bigtop = Bigtop() bigtop.render_site_yaml( roles=[ 'mahout-client', ], ) bigtop.trigger_puppet() with utils.environment_edit_in_place('/etc/environment') as env: env['MAHOUT_HOME'] = '/usr/lib/mahout' set_state('mahout.installed') hookenv.status_set('active', 'ready') # set app version string for juju status output mahout_version = get_package_version('mahout') or 'unknown' hookenv.application_version_set(mahout_version)
def configure(self, hosts, zk_units): zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append(ip) zks.sort() zk_connect = ",".join(zks) roles = ['hbase-server', 'hbase-master', 'hbase-client'] override = { 'hadoop_hbase::common_config::zookeeper_quorum': zk_connect, 'hadoop_hbase::deploy::auxiliary': False } bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet()
def install_hadoop_client_hdfs(principal, namenode): """Install if the namenode has sent its FQDN. We only need the namenode FQDN to perform the plugin install, so poll for namenodes() data whenever we have a namenode relation. This allows us to install asap, even if 'namenode.ready' is not set yet. """ if namenode.namenodes(): hookenv.status_set('maintenance', 'installing plugin (hdfs)') nn_host = namenode.namenodes()[0] bigtop = Bigtop() hosts = {'namenode': nn_host} bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client') bigtop.trigger_puppet() set_state('apache-bigtop-plugin.hdfs.installed') hookenv.status_set('maintenance', 'plugin (hdfs) installed') else: hookenv.status_set('waiting', 'waiting for namenode fqdn')
def configure(self, hosts, zk_units): zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit["host"]) zks.append(ip) zks.sort() zk_connect = ",".join(zks) roles = ["hbase-server", "hbase-master", "hbase-client"] override = { "hadoop_hbase::common_config::zookeeper_quorum": zk_connect, "hadoop_hbase::deploy::auxiliary": False, } bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet()
def install_giraph(giraph): """Install giraph when prerequisite states are present.""" hookenv.status_set('maintenance', 'installing giraph') bigtop = Bigtop() bigtop.render_site_yaml( roles=[ 'giraph-client', ], ) bigtop.trigger_puppet() # Put down the -doc subpackage so we get giraph-examples fetch.apt_install('giraph-doc') giraph_home = Path('/usr/lib/giraph') giraph_docdir = Path('/usr/share/doc/giraph') giraph_libdir = Path(giraph_home / 'lib') giraph_examples = glob('{}/giraph-examples-*.jar'.format(giraph_docdir)) # Gather a list of all the giraph jars (needed for -libjars) giraph_jars = giraph_examples giraph_jars.extend(get_good_jars(giraph_home, prefix=True)) giraph_jars.extend(get_good_jars(giraph_libdir, prefix=True)) # Update environment with appropriate giraph bits. HADOOP_CLASSPATH can # use wildcards (and it should for readability), but GIRAPH_JARS, which # is intended to be used as 'hadoop jar -libjars $GIRAPH_JARS', needs to # be a comma-separate list of jars. with utils.environment_edit_in_place('/etc/environment') as env: cur_cp = env['HADOOP_CLASSPATH'] if 'HADOOP_CLASSPATH' in env else "" env['GIRAPH_HOME'] = giraph_home env['HADOOP_CLASSPATH'] = "{examples}/*:{home}/*:{libs}/*:{cp}".format( examples=giraph_docdir, home=giraph_home, libs=giraph_libdir, cp=cur_cp ) env['GIRAPH_JARS'] = ','.join(j for j in giraph_jars) set_state('giraph.installed') report_status() # set app version string for juju status output giraph_version = get_package_version('giraph') or 'unknown' hookenv.application_version_set(giraph_version)
def install_giraph(giraph): """Install giraph when prerequisite states are present.""" hookenv.status_set('maintenance', 'installing giraph') bigtop = Bigtop() bigtop.render_site_yaml(roles=[ 'giraph-client', ], ) bigtop.trigger_puppet() # Put down the -doc subpackage so we get giraph-examples fetch.apt_install('giraph-doc') giraph_home = Path('/usr/lib/giraph') giraph_docdir = Path('/usr/share/doc/giraph') giraph_libdir = Path(giraph_home / 'lib') giraph_examples = glob('{}/giraph-examples-*.jar'.format(giraph_docdir)) # Gather a list of all the giraph jars (needed for -libjars) giraph_jars = giraph_examples giraph_jars.extend(get_good_jars(giraph_home, prefix=True)) giraph_jars.extend(get_good_jars(giraph_libdir, prefix=True)) # Update environment with appropriate giraph bits. HADOOP_CLASSPATH can # use wildcards (and it should for readability), but GIRAPH_JARS, which # is intended to be used as 'hadoop jar -libjars $GIRAPH_JARS', needs to # be a comma-separate list of jars. with utils.environment_edit_in_place('/etc/environment') as env: cur_cp = env['HADOOP_CLASSPATH'] if 'HADOOP_CLASSPATH' in env else "" env['GIRAPH_HOME'] = giraph_home env['HADOOP_CLASSPATH'] = "{examples}/*:{home}/*:{libs}/*:{cp}".format( examples=giraph_docdir, home=giraph_home, libs=giraph_libdir, cp=cur_cp) env['GIRAPH_JARS'] = ','.join(j for j in giraph_jars) set_state('giraph.installed') report_status() # set app version string for juju status output giraph_version = get_package_version('giraph') or 'unknown' hookenv.application_version_set(giraph_version)
def install_namenode(): hookenv.status_set('maintenance', 'installing namenode') bigtop = Bigtop() hdfs_port = get_layer_opts().port('namenode') webhdfs_port = get_layer_opts().port('nn_webapp_http') bigtop.render_site_yaml( hosts={ 'namenode': get_fqdn(), }, roles=[ 'namenode', 'mapred-app', ], # NB: We want the NN to listen on all interfaces, so bind to 0.0.0.0. overrides={ 'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port, 'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0', 'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port, 'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0', 'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0', } ) bigtop.trigger_puppet() # /etc/hosts entries from the KV are not currently used for bigtop, # but a hosts_map attribute is required by some interfaces (eg: dfs-slave) # to signify NN's readiness. Set our NN info in the KV to fulfill this # requirement. utils.initialize_kv_host() # We need to create the 'mapred' and 'spark' user/group since we may not # be installing hadoop-mapreduce or spark on this machine. This is needed # so the namenode can access yarn and spark job history files in hdfs. Also # add our ubuntu user to the hadoop, mapred, and spark groups. get_layer_opts().add_users() set_state('apache-bigtop-namenode.installed') hookenv.status_set('maintenance', 'namenode installed')
def trigger_bigtop(self): ''' Trigger the Bigtop puppet recipe that handles the Zeppelin service. ''' bigtop = Bigtop() overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.', strip=True) # The zep deb depends on spark-core which unfortunately brings in # most of hadoop. Include appropriate roles here to ensure these # packages are configured in the same way as our other Bigtop # software deployed with puppet. bigtop.render_site_yaml( roles=[ 'spark-client', 'spark-yarn-slave', 'zeppelin-server', ], overrides=overrides, ) bigtop.trigger_puppet() self.wait_for_api(30)
def install_namenode(): hookenv.status_set('maintenance', 'installing namenode') bigtop = Bigtop() hdfs_port = get_layer_opts().port('namenode') webhdfs_port = get_layer_opts().port('nn_webapp_http') bigtop.render_site_yaml( hosts={ 'namenode': get_fqdn(), }, roles=[ 'namenode', 'mapred-app', ], # NB: We want the NN to listen on all interfaces, so bind to 0.0.0.0. overrides={ 'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port, 'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0', 'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port, 'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0', 'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0', }) bigtop.trigger_puppet() # /etc/hosts entries from the KV are not currently used for bigtop, # but a hosts_map attribute is required by some interfaces (eg: dfs-slave) # to signify NN's readiness. Set our NN info in the KV to fulfill this # requirement. utils.initialize_kv_host() # We need to create the 'mapred' user/group since we are not installing # hadoop-mapreduce. This is needed so the namenode can access yarn # job history files in hdfs. Also add our ubuntu user to the hadoop # and mapred groups. get_layer_opts().add_users() set_state('apache-bigtop-namenode.installed') hookenv.status_set('maintenance', 'namenode installed')
def install_namenode(): hookenv.status_set('maintenance', 'installing namenode') bigtop = Bigtop() bigtop.render_site_yaml( hosts={ 'namenode': get_fqdn(), }, roles=[ 'namenode', 'mapred-app', ], ) bigtop.trigger_puppet() # /etc/hosts entries from the KV are not currently used for bigtop, # but a hosts_map attribute is required by some interfaces (eg: dfs-slave) # to signify NN's readiness. Set our NN info in the KV to fulfill this # requirement. utils.initialize_kv_host() # make our namenode listen on all interfaces hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml') with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.rpc-bind-host'] = '0.0.0.0' props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0' props['dfs.namenode.http-bind-host'] = '0.0.0.0' props['dfs.namenode.https-bind-host'] = '0.0.0.0' # We need to create the 'mapred' user/group since we are not installing # hadoop-mapreduce. This is needed so the namenode can access yarn # job history files in hdfs. Also add our ubuntu user to the hadoop # and mapred groups. get_layer_opts().add_users() set_state('apache-bigtop-namenode.installed') hookenv.status_set('maintenance', 'namenode installed')
def configure(self, available_hosts, zk_units, peers): """ This is the core logic of setting up spark. Two flags are needed: * Namenode exists aka HDFS is there * Resource manager exists aka YARN is ready both flags are infered from the available hosts. :param dict available_hosts: Hosts that Spark should know about. """ unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) if not unitdata.kv().get('spark.bootstrapped', False): self.setup() unitdata.kv().set('spark.bootstrapped', True) master_ip = utils.resolve_private_address(available_hosts['spark-master']) hosts = { 'spark': master_ip, } dc = self.dist_config events_log_dir = 'file://{}'.format(dc.path('spark_events')) if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] events_log_dir = self.setup_hdfs_logs() if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] roles = self.get_roles() override = { 'spark::common::master_url': self.get_master_url(master_ip), 'spark::common::event_log_dir': events_log_dir, 'spark::common::history_log_dir': events_log_dir, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = "" bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() # There is a race condition here. # The work role will not start the first time we trigger puppet apply. # The exception in /var/logs/spark: # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077 # The master url is not set at the time the worker start the first time. # TODO(kjackal): ...do the needed... (investiate,debug,submit patch) bigtop.trigger_puppet() if 'namenode' not in available_hosts: # Local event dir (not in HDFS) needs to be 777 so non-spark # users can write job history there. It needs to be g+s so # all entries will be readable by spark (in the spark group). # It needs to be +t so users cannot remove files they don't own. dc.path('spark_events').chmod(0o3777) self.patch_worker_master_url(master_ip) # SparkBench looks for the spark master in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = self.get_master_url(master_ip) # Install SB (subsequent calls will reconfigure existing install) self.install_benchmark()
def install_resourcemanager(namenode): """Install if the namenode has sent its FQDN. We only need the namenode FQDN to perform the RM install, so poll for namenodes() data whenever we have a namenode relation. This allows us to install asap, even if 'namenode.ready' is not set yet. """ if namenode.namenodes(): hookenv.status_set('maintenance', 'installing resourcemanager') # Hosts nn_host = namenode.namenodes()[0] rm_host = get_fqdn() # Ports rm_ipc = get_layer_opts().port('resourcemanager') rm_http = get_layer_opts().port('rm_webapp_http') jh_ipc = get_layer_opts().port('jobhistory') jh_http = get_layer_opts().port('jh_webapp_http') hdfs_port = namenode.port() webhdfs_port = namenode.webhdfs_port() bigtop = Bigtop() bigtop.render_site_yaml( hosts={ 'namenode': nn_host, 'resourcemanager': rm_host, }, roles=[ 'resourcemanager', ], # NB: When we colocate the NN and RM, the RM will run puppet apply # last. To ensure we don't lose any hdfs-site.xml data set by the # NN, override common_hdfs properties again here. overrides={ 'hadoop::common_yarn::hadoop_rm_port': rm_ipc, 'hadoop::common_yarn::hadoop_rm_webapp_port': rm_http, 'hadoop::common_yarn::hadoop_rm_bind_host': '0.0.0.0', 'hadoop::common_mapred_app::mapreduce_jobhistory_host': '0.0.0.0', 'hadoop::common_mapred_app::mapreduce_jobhistory_port': jh_ipc, 'hadoop::common_mapred_app::mapreduce_jobhistory_webapp_port': jh_http, 'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port, 'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0', 'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port, 'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0', 'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0', } ) bigtop.trigger_puppet() # /etc/hosts entries from the KV are not currently used for bigtop, # but a hosts_map attribute is required by some interfaces (eg: mapred-slave) # to signify RM's readiness. Set our RM info in the KV to fulfill this # requirement. utils.initialize_kv_host() # We need to create the 'spark' user/group since we may not be # installing spark on this machine. This is needed so the history # server can access spark job history files in hdfs. Also add our # ubuntu user to the hadoop, mapred, and spark groups on this machine. get_layer_opts().add_users() set_state('apache-bigtop-resourcemanager.installed') hookenv.status_set('maintenance', 'resourcemanager installed') else: hookenv.status_set('waiting', 'waiting for namenode fqdn')
def configure(self, available_hosts, zk_units, peers, extra_libs): """ This is the core logic of setting up spark. :param dict available_hosts: Hosts that Spark should know about. :param list zk_units: List of Zookeeper dicts with host/port info. :param list peers: List of Spark peer tuples (unit name, IP). :param list extra_libs: List of extra lib paths for driver/executors. """ # Set KV based on connected applications unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) # Get our config ready dc = self.dist_config mode = hookenv.config()['spark_execution_mode'] master_ip = utils.resolve_private_address( available_hosts['spark-master']) master_url = self.get_master_url(master_ip) req_driver_mem = hookenv.config()['driver_memory'] req_executor_mem = hookenv.config()['executor_memory'] if mode.startswith('yarn'): spark_events = 'hdfs://{}'.format(dc.path('spark_events')) else: spark_events = 'file://{}'.format(dc.path('spark_events')) # handle tuning options that may be set as percentages driver_mem = '1g' executor_mem = '1g' if req_driver_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "driver_memory percentage in non-local mode. " "Using 1g default.", level=hookenv.WARNING) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "executor_memory percentage in non-local mode. " "Using 1g default.", level=hookenv.WARNING) else: executor_mem = req_executor_mem # Some spark applications look for envars in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = master_url env['SPARK_HOME'] = dc.path('spark_home') # Setup hosts dict hosts = { 'spark': master_ip, } if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] # Setup roles dict. We always include the history server and client. # Determine other roles based on our execution mode. roles = ['spark-history-server', 'spark-client'] if mode == 'standalone': roles.append('spark-master') roles.append('spark-worker') elif mode.startswith('yarn'): roles.append('spark-on-yarn') roles.append('spark-yarn-slave') # Setup overrides dict override = { 'spark::common::master_url': master_url, 'spark::common::event_log_dir': spark_events, 'spark::common::history_log_dir': spark_events, 'spark::common::extra_lib_dirs': ':'.join(extra_libs) if extra_libs else None, 'spark::common::driver_mem': driver_mem, 'spark::common::executor_mem': executor_mem, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = None # Create our site.yaml and trigger puppet. # NB: during an upgrade, we configure the site.yaml, but do not # trigger puppet. The user must do that with the 'reinstall' action. bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) if unitdata.kv().get('spark.version.repo', False): hookenv.log( "An upgrade is available and the site.yaml has been " "configured. Run the 'reinstall' action to continue.", level=hookenv.INFO) else: bigtop.trigger_puppet() self.patch_worker_master_url(master_ip, master_url) # Packages don't create the event dir by default. Do it each time # spark is (re)installed to ensure location/perms are correct. self.configure_events_dir(mode) # Handle examples and Spark-Bench. Do this each time this method is # called in case we need to act on a new resource or user config. self.configure_examples() self.configure_sparkbench()
def configure(self, available_hosts, zk_units, peers): """ This is the core logic of setting up spark. Two flags are needed: * Namenode exists aka HDFS is ready * Resource manager exists aka YARN is ready both flags are infered from the available hosts. :param dict available_hosts: Hosts that Spark should know about. """ # Bootstrap spark if not unitdata.kv().get('spark.bootstrapped', False): self.setup() unitdata.kv().set('spark.bootstrapped', True) # Set KV based on connected applications unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) # Get our config ready dc = self.dist_config events_log_dir = 'file://{}'.format(dc.path('spark_events')) mode = hookenv.config()['spark_execution_mode'] master_ip = utils.resolve_private_address(available_hosts['spark-master']) master_url = self.get_master_url(master_ip) # Setup hosts dict hosts = { 'spark': master_ip, } if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] events_log_dir = self.setup_hdfs_logs() if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] # Setup roles dict. We always include the history server and client. # Determine other roles based on our execution mode. roles = ['spark-history-server', 'spark-client'] if mode == 'standalone': roles.append('spark-master') roles.append('spark-worker') elif mode.startswith('yarn'): roles.append('spark-on-yarn') roles.append('spark-yarn-slave') # Setup overrides dict override = { 'spark::common::master_url': master_url, 'spark::common::event_log_dir': events_log_dir, 'spark::common::history_log_dir': events_log_dir, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = None # Create our site.yaml and trigger puppet bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() # Do this after our puppet bits in case puppet overrides needed perms if 'namenode' not in available_hosts: # Local event dir (not in HDFS) needs to be 777 so non-spark # users can write job history there. It needs to be g+s so # all entries will be readable by spark (in the spark group). # It needs to be +t so users cannot remove files they don't own. dc.path('spark_events').chmod(0o3777) self.patch_worker_master_url(master_ip, master_url) # handle tuning options that may be set as percentages driver_mem = '1g' req_driver_mem = hookenv.config()['driver_memory'] executor_mem = '1g' req_executor_mem = hookenv.config()['executor_memory'] if req_driver_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("driver_memory percentage in non-local mode. Using 1g default.", level=None) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("executor_memory percentage in non-local mode. Using 1g default.", level=None) else: executor_mem = req_executor_mem spark_env = '/etc/spark/conf/spark-env.sh' utils.re_edit_in_place(spark_env, { r'.*SPARK_DRIVER_MEMORY.*': 'export SPARK_DRIVER_MEMORY={}'.format(driver_mem), r'.*SPARK_EXECUTOR_MEMORY.*': 'export SPARK_EXECUTOR_MEMORY={}'.format(executor_mem), }, append_non_matches=True) # Install SB (subsequent calls will reconfigure existing install) # SparkBench looks for the spark master in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = master_url self.install_benchmark()
def configure(self, available_hosts, zk_units, peers, extra_libs): """ This is the core logic of setting up spark. :param dict available_hosts: Hosts that Spark should know about. :param list zk_units: List of Zookeeper dicts with host/port info. :param list peers: List of Spark peer tuples (unit name, IP). :param list extra_libs: List of extra lib paths for driver/executors. """ # Set KV based on connected applications unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) # Get our config ready dc = self.dist_config mode = hookenv.config()['spark_execution_mode'] master_ip = utils.resolve_private_address(available_hosts['spark-master']) master_url = self.get_master_url(master_ip) req_driver_mem = hookenv.config()['driver_memory'] req_executor_mem = hookenv.config()['executor_memory'] if mode.startswith('yarn'): spark_events = 'hdfs://{}'.format(dc.path('spark_events')) else: spark_events = 'file://{}'.format(dc.path('spark_events')) # handle tuning options that may be set as percentages driver_mem = '1g' executor_mem = '1g' if req_driver_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("driver_memory percentage in non-local mode. " "Using 1g default.", level=hookenv.WARNING) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log("executor_memory percentage in non-local mode. " "Using 1g default.", level=hookenv.WARNING) else: executor_mem = req_executor_mem # Some spark applications look for envars in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = master_url env['SPARK_HOME'] = dc.path('spark_home') # Setup hosts dict hosts = { 'spark': master_ip, } if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] # Setup roles dict. We always include the history server and client. # Determine other roles based on our execution mode. roles = ['spark-history-server', 'spark-client'] if mode == 'standalone': roles.append('spark-master') roles.append('spark-worker') elif mode.startswith('yarn'): roles.append('spark-on-yarn') roles.append('spark-yarn-slave') # Setup overrides dict override = { 'spark::common::master_url': master_url, 'spark::common::event_log_dir': spark_events, 'spark::common::history_log_dir': spark_events, 'spark::common::extra_lib_dirs': ':'.join(extra_libs) if extra_libs else None, 'spark::common::driver_mem': driver_mem, 'spark::common::executor_mem': executor_mem, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = None # Create our site.yaml and trigger puppet. # NB: during an upgrade, we configure the site.yaml, but do not # trigger puppet. The user must do that with the 'reinstall' action. bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) if unitdata.kv().get('spark.version.repo', False): hookenv.log("An upgrade is available and the site.yaml has been " "configured. Run the 'reinstall' action to continue.", level=hookenv.INFO) else: bigtop.trigger_puppet() self.patch_worker_master_url(master_ip, master_url) # Packages don't create the event dir by default. Do it each time # spark is (re)installed to ensure location/perms are correct. self.configure_events_dir(mode) # Handle examples and Spark-Bench. Do this each time this method is # called in case we need to act on a new resource or user config. self.configure_examples() self.configure_sparkbench()
class TestBigtopUnit(Harness): ''' Unit tests for Bigtop class. ''' @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version', new_callable=mock.PropertyMock) def setUp(self, mock_ver, mock_hookenv): mock_ver.return_value = '1.2.0' super(TestBigtopUnit, self).setUp() self.bigtop = Bigtop() def test_init(self): ''' Verify that the Bigtop class can init itself, and that it has some of the properties that we expect.. ''' # paths should be Path objects. self.assertEqual(type(self.bigtop.bigtop_base), Path) self.assertEqual(type(self.bigtop.site_yaml), Path) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_puppet_modules') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_reverse_dns') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_localdomain') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_java') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_swap') @mock.patch('charms.layer.apache_bigtop_base.is_container') def test_install(self, mock_container, mock_swap, mock_java, mock_pin, mock_local, mock_dns, mock_fetch, mock_puppet, mock_apply, mock_hiera): ''' Verify install calls expected class methods. ''' mock_container.return_value = False self.bigtop.install() self.assertTrue(mock_swap.called) self.assertTrue(mock_java.called) self.assertTrue(mock_pin.called) self.assertTrue(mock_local.called) self.assertTrue(mock_dns.called) self.assertTrue(mock_fetch.called) self.assertTrue(mock_puppet.called) self.assertTrue(mock_apply.called) self.assertTrue(mock_hiera.called) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages') def test_refresh_bigtop_release(self, mock_pin, mock_fetch, mock_apply, mock_update): ''' Verify refresh calls expected class methods. ''' self.bigtop.refresh_bigtop_release() self.assertTrue(mock_pin.called) self.assertTrue(mock_fetch.called) self.assertTrue(mock_apply.called) self.assertTrue(mock_update.called) @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.lsb_release') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version', new_callable=mock.PropertyMock) def test_get_repo_url(self, mock_ver, mock_lsb_release, mock_utils, mock_hookenv): ''' Verify that we setup an appropriate repository. ''' mock_ver.return_value = '1.1.0' # non-ubuntu should throw an exception mock_lsb_release.return_value = {'DISTRIB_ID': 'centos'} self.assertRaises( BigtopError, self.bigtop.get_repo_url, '1.1.0') # 1.1.0 on trusty/non-power mock_utils.cpu_arch.return_value = 'foo' mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'} self.assertEqual(self.bigtop.get_repo_url('1.1.0'), ('http://bigtop-repos.s3.amazonaws.com/releases/' '1.1.0/ubuntu/trusty/foo')) # 1.1.0 on trusty/power (should return vivid url) mock_utils.cpu_arch.return_value = 'ppc64le' self.assertEqual(self.bigtop.get_repo_url('1.1.0'), ('http://bigtop-repos.s3.amazonaws.com/releases/' '1.1.0/ubuntu/vivid/ppc64el')) # 1.2.0 on xenial mock_ver.return_value = '1.2.0' mock_utils.cpu_arch.return_value = 'foo' mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'} self.assertEqual(self.bigtop.get_repo_url('1.2.0'), ('http://bigtop-repos.s3.amazonaws.com/releases/' '1.2.0/ubuntu/16.04/foo')) # 1.2.1 on xenial/intel mock_hookenv.return_value = {'name': 'foo'} mock_ver.return_value = '1.2.1' mock_utils.cpu_arch.return_value = 'x86_64' self.assertEqual(self.bigtop.get_repo_url('1.2.1'), ('http://repos.bigtop.apache.org/releases/' '1.2.1/ubuntu/16.04/x86_64')) # 1.2.1 on xenial/non-intel mock_ver.return_value = '1.2.1' mock_utils.cpu_arch.return_value = 'foo' self.assertEqual(self.bigtop.get_repo_url('1.2.1'), ('https://ci.bigtop.apache.org/job/Bigtop-1.2.1/' 'OS=ubuntu-16.04/lastSuccessfulBuild/artifact/output/apt')) # master on xenial/intel mock_ver.return_value = 'master' mock_utils.cpu_arch.return_value = 'x86_64' self.assertEqual(self.bigtop.get_repo_url('master'), ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/' 'OS=ubuntu-16.04,label=docker-slave/ws/output/apt')) # master on xenial/non-intel mock_ver.return_value = 'master' mock_utils.cpu_arch.return_value = 'foo' self.assertEqual(self.bigtop.get_repo_url('master'), ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/' 'OS=ubuntu-16.04-foo,label=docker-slave/ws/output/apt')) # test bad version on xenial should throw an exception self.assertRaises( BigtopError, self.bigtop.get_repo_url, '0.0.0') @mock.patch('charms.layer.apache_bigtop_base.subprocess') def test_install_swap_when_swap_exists(self, mock_sub): ''' Verify we do attempt to install swap space if it already exists. ''' mock_sub.check_output.return_value = b"foo\nbar" mock_sub.reset_mock() self.bigtop.install_swap() # We reset the mock, so here we're verifying no other subprocess # calls were made. mock_sub.check_call.assert_not_called() @mock.patch('charms.layer.apache_bigtop_base.lsb_release') @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.fetch') @mock.patch('charms.layer.apache_bigtop_base.layer.options') def test_install_java(self, mock_options, mock_fetch, mock_utils, mock_lsb_release): ''' Test to verify that we install java when requested. ''' mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial'} # Should be noop if install_java layer opt is not set. self.bigtop.options.get.return_value = '' self.bigtop.install_java() self.assertFalse(mock_fetch.add_source.called) self.assertFalse(mock_fetch.apt_update.called) self.assertFalse(mock_fetch.apt_install.called) self.assertFalse(mock_utils.re_edit_in_place.called) # Should apt install if install_java layer opt is set. self.bigtop.options.get.return_value = 'foo' print("options: {}".format(self.bigtop.options)) self.bigtop.install_java() self.assertFalse(mock_fetch.add_source.called) self.assertFalse(mock_fetch.apt_update.called) self.assertTrue(mock_fetch.apt_install.called) self.assertTrue(mock_utils.re_edit_in_place.called) # On trusty, should add a ppa so that we can install Java 8. mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'trusty'} self.bigtop.install_java() self.assertTrue(mock_fetch.add_source.called) self.assertTrue(mock_fetch.apt_update.called) @mock.patch('charms.layer.apache_bigtop_base.Path') def test_pin_bigtop_packages(self, mock_path): ''' Verify the apt template is opened and written to a (mocked) file. ''' mock_dst = mock.Mock() mock_path.return_value = mock_dst self.bigtop.pin_bigtop_packages(priority=100) self.assertTrue(mock_dst.write_text.called) @mock.patch('charms.layer.apache_bigtop_base.subprocess') @mock.patch('charms.layer.apache_bigtop_base.lsb_release') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_apt', new_callable=mock.PropertyMock) def test_update_bigtop_repo(self, mock_apt, mock_lsb_release, mock_sub): ''' Verify a bigtop apt repository is added/removed. ''' # non-ubuntu should not invoke a subprocess call mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'foo', 'DISTRIB_ID': 'centos', 'DISTRIB_RELEASE': '7'} self.bigtop.update_bigtop_repo() mock_sub.check_call.assert_not_called() # verify args when adding a repo on ubuntu mock_apt.return_value = 'foo' mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial', 'DISTRIB_ID': 'ubuntu', 'DISTRIB_RELEASE': '16.04'} self.bigtop.update_bigtop_repo() mock_sub.check_call.assert_called_with( ['add-apt-repository', '-yu', 'deb foo bigtop contrib']) # verify args when removing a repo on ubuntu self.bigtop.update_bigtop_repo(remove=True) mock_sub.check_call.assert_called_with( ['add-apt-repository', '-yur', 'deb foo bigtop contrib']) # verify we handle check_call errors class MockException(Exception): pass mock_sub.CalledProcessError = MockException def mock_raise(*args, **kwargs): raise MockException('foo!') mock_sub.check_call.side_effect = mock_raise self.bigtop.update_bigtop_repo() @mock.patch('charms.layer.apache_bigtop_base.get_package_version') @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.subprocess.Popen') @mock.patch('charms.layer.apache_bigtop_base.lsb_release') def test_check_bigtop_repo_package(self, mock_lsb_release, mock_sub, mock_hookenv, mock_pkg_ver): ''' Verify bigtop repo package queries. ''' # non-ubuntu should raise an error mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'foo', 'DISTRIB_ID': 'centos', 'DISTRIB_RELEASE': '7'} self.assertRaises(BigtopError, self.bigtop.check_bigtop_repo_package, 'foo') # reset with ubuntu mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial', 'DISTRIB_ID': 'ubuntu', 'DISTRIB_RELEASE': '16.04'} madison_proc = mock.Mock() grep_proc = mock.Mock() # simulate a missing repo pkg grep_attrs = {'communicate.return_value': (b'', 'stderr')} grep_proc.configure_mock(**grep_attrs) # test a missing repo pkg (message should be logged) mock_sub.return_value = madison_proc mock_sub.return_value = grep_proc mock_pkg_ver.return_value = '' self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo')) mock_hookenv.log.assert_called_once() mock_hookenv.reset_mock() # reset our grep args to simulate the repo pkg being found grep_attrs = {'communicate.return_value': (b'pkg|1|repo', 'stderr')} grep_proc.configure_mock(**grep_attrs) # test a missing installed pkg (no log message) mock_sub.return_value = madison_proc mock_sub.return_value = grep_proc mock_pkg_ver.return_value = '' self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo')) mock_hookenv.log.assert_not_called() mock_hookenv.reset_mock() # test repo and installed pkg versions are the same (no log message) mock_sub.return_value = madison_proc mock_sub.return_value = grep_proc mock_pkg_ver.return_value = '1' self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo')) mock_hookenv.log.assert_not_called() mock_hookenv.reset_mock() # test repo pkg is newer than installed pkg (no log message) mock_sub.return_value = madison_proc mock_sub.return_value = grep_proc mock_pkg_ver.return_value = '0' self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo')) mock_hookenv.log.assert_not_called() mock_hookenv.reset_mock() @mock.patch('charms.layer.apache_bigtop_base.socket') @mock.patch('charms.layer.apache_bigtop_base.subprocess') @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.hookenv') def test_check_reverse_dns(self, mock_hookenv, mock_utils, mock_sub, mock_socket): ''' Verify that we set the reverse_dns_ok state, and handle errors correctly. ''' # Test the case where things succeed. mock_sub.check_output.return_value = b'domain' self.bigtop.check_reverse_dns() self.assertTrue(unitdata.kv().get('reverse_dns_ok')) # Test the case where we get an exception. mock_sub.check_output.return_value = b'localdomain' self.bigtop.check_reverse_dns() self.assertFalse(unitdata.kv().get('reverse_dns_ok')) class MockHError(Exception): pass def raise_herror(*args, **kwargs): raise MockHError('test') mock_socket.herror = MockHError mock_socket.gethostbyaddr = raise_herror self.bigtop.check_reverse_dns() self.assertFalse(unitdata.kv().get('reverse_dns_ok')) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version', new_callable=mock.PropertyMock) @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.Path') def test_fetch_bigtop_release(self, mock_path, mock_hookenv, mock_ver): '''Verify we raise an exception if an invalid release is specified.''' mock_hookenv.resource_get.return_value = False mock_ver.return_value = 'foo' self.assertRaises( BigtopError, self.bigtop.fetch_bigtop_release) @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.hookenv') def test_install_puppet_modules(self, mock_hookenv, mock_utils): '''Verify that we seem to install puppet modules correctly.''' mock_hookenv.charm_dir.return_value = '/tmp' def mock_run_as(user, *args): ''' Verify that we run puppet as root. ''' self.assertEqual(user, 'root') mock_utils.run_as.side_effect = mock_run_as self.bigtop.install_puppet_modules() @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.glob') @mock.patch('charms.layer.apache_bigtop_base.chdir') def test_apply_patches(self, mock_chdir, mock_glob, mock_utils, mock_hookenv): ''' Verify that we apply patches in the correct order. ''' mock_hookenv.charm_dir.return_value = '/tmp' reverse_sorted = ['foo', 'baz', 'bar'] mock_glob.return_value = ['foo', 'baz', 'bar'] def mock_run_as(*args): patch = args[-1] self.assertEqual(args[0], 'root') # Verify that we're running on a sorted list. self.assertTrue(patch.endswith(reverse_sorted.pop())) mock_utils.run_as.side_effect = mock_run_as self.bigtop.apply_patches() @mock.patch('charms.layer.apache_bigtop_base.yaml') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_base') @mock.patch('charms.layer.apache_bigtop_base.Path') def test_render_hiera_yaml(self, mock_path, mock_base, mock_yaml): ''' Verify that we attempt to add the values that we expect our hiera object, before writing it out to a (mocked) yaml file. ''' def mock_dump(hiera_yaml, *args, **kwargs): self.assertTrue(hiera_yaml.get(':yaml')) self.assertTrue(':datadir' in hiera_yaml[':yaml']) mock_yaml.dump.side_effect = mock_dump mock_dst = mock.Mock() mock_path.return_value = mock_dst mock_yaml.load.return_value = defaultdict(lambda: {}) mock_base.__div__.side_effect = lambda rel: mock_base mock_base.__truediv__.side_effect = lambda rel: mock_base self.bigtop.render_hiera_yaml() # Verify that we attempt to write yaml::datadir to hieradata. self.assertTrue(mock_dst.write_text.called) @mock.patch('charms.layer.apache_bigtop_base.utils.run_as') @mock.patch('charms.layer.apache_bigtop_base.yaml') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.site_yaml') @mock.patch('charms.layer.apache_bigtop_base.Path') def test_render_site_yaml(self, mock_path, mock_site, mock_yaml, mock_run): ''' Verify that we attempt to put together a plausible site yaml config, before writing it out to a (mocked) yaml file. ''' # Setup mock_yaml.load.return_value = defaultdict(lambda: {}) config = { 'roles': None, 'overrides': None, 'hosts': None } def verify_yaml(yaml, *args, **kwargs): ''' Verify that the dict we are trying to dump to yaml has the values that we expect. ''' self.assertTrue('bigtop::bigtop_repo_uri' in yaml) if config['roles'] is None: self.assertFalse('bigtop::roles_enabled' in yaml) else: self.assertTrue('bigtop::roles_enabled' in yaml) self.assertTrue('bigtop::roles' in yaml) self.assertEqual( yaml['bigtop::roles'], sorted(config['roles']) ) if config['overrides'] is not None: for key in config['overrides']: self.assertTrue(yaml.get(key) == config['overrides'][key]) mock_yaml.dump.side_effect = verify_yaml # Test various permutations of arguments passed in. for config_set in [ {'roles': ['foo', 'bar', 'baz']}, # Test roles {'overrides': {'foo': 'bar'}}]: # Test override config.update(config_set) # Test self.bigtop.render_site_yaml( roles=config['roles'], overrides=config['overrides'], hosts=config['hosts']) # Reset mock_yaml.load.return_value = defaultdict(lambda: {}) config['roles'] = None config['overrides'] = None config['hosts'] = None def test_queue_puppet(self): '''Verify that we set the expected 'puppet queued' state.''' self.bigtop.queue_puppet() self.assertTrue(is_state('apache-bigtop-base.puppet_queued')) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet') @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version', new_callable=mock.PropertyMock) def test_handle_queued_puppet(self, mock_ver, mock_hookenv, mock_trigger): ''' Verify that we attempt to call puppet when it has been queued, and then clear the queued state. ''' set_state('apache-bigtop-base.puppet_queued') mock_ver.return_value = '1.2.0' Bigtop._handle_queued_puppet() self.assertTrue(mock_trigger.called) self.assertFalse(is_state('apache-bigtop-base.puppet_queued')) @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.chdir') @mock.patch('charms.layer.apache_bigtop_base.unitdata') def test_trigger_puppet(self, mock_unit, mock_chdir, mock_utils): ''' Test to verify that we attempt to trigger puppet correctly. ''' def verify_utils_call(user, puppet, *args): self.assertEqual(user, 'root') self.assertEqual(puppet, 'puppet') mock_kv = mock.Mock() mock_unit.kv.return_value = mock_kv mock_kv.get.return_value = 'foo' mock_utils.run_as.side_effect = verify_utils_call self.bigtop.trigger_puppet() self.assertTrue(mock_utils.run_as.called) # TODO: verify the Java 1.7 logic. @mock.patch('charms.layer.apache_bigtop_base.subprocess') @mock.patch('charms.layer.apache_bigtop_base.utils.run_as') def test_check_hdfs_setup(self, mock_run, mock_sub): ''' Verify that our hdfs setup check works as expected, and handles errors as expected. ''' class MockException(Exception): pass mock_sub.CalledProcessError = MockException def mock_raise(*args, **kwargs): raise MockException('foo!') for s in ['ubuntu', ' ubuntu ', 'ubuntu ', ' ubuntu']: mock_run.return_value = s self.assertTrue(self.bigtop.check_hdfs_setup()) for s in ['foo', ' ', '', ' bar', 'notubuntu', 'ubuntu not ']: mock_run.return_value = s self.assertFalse(self.bigtop.check_hdfs_setup()) mock_run.side_effect = mock_raise self.assertFalse(self.bigtop.check_hdfs_setup()) @unittest.skip('noop') def test_spec(self): '''Nothing to test that the linter won't handle.''' @mock.patch('charms.layer.apache_bigtop_base.subprocess') @mock.patch('charms.layer.apache_bigtop_base.utils.run_as') @mock.patch('charms.layer.apache_bigtop_base.chdir') @mock.patch('charms.layer.apache_bigtop_base.chownr') @mock.patch('charms.layer.apache_bigtop_base.layer.options') def test_run_smoke_tests(self, mock_options, mock_ownr, mock_chdir, mock_run, mock_sub): ''' Verify that we attempt to run smoke tests correctly, and handle exceptions as expected. ''' mock_options.return_value = {} # Returns None if bigtop isn't available. remove_state('bigtop.available') self.assertEqual(None, self.bigtop.run_smoke_tests()) # Returns None if we don't pass in a 'smoke_components' arg set_state('bigtop.available') self.assertEqual(None, self.bigtop.run_smoke_tests()) # Should return 'success' if all went well. self.assertEqual( self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']), 'success' ) # Should return error message if subprocess raised an Exception. class MockException(Exception): pass MockException.output = "test output" mock_sub.CalledProcessError = MockException def mock_raise(*args, **kwargs): raise MockException('foo!') mock_run.side_effect = mock_raise self.assertEqual( self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']), "test output" ) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml') @mock.patch('charms.layer.apache_bigtop_base.Path') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet') @mock.patch('charms.layer.apache_bigtop_base.subprocess') def test_reinstall_repo_packages(self, mock_sub, mock_trigger, mock_pin, mock_path, mock_hiera, mock_update): ''' Verify that we attempt to trigger puppet during a reinstall, and handle exceptions as expected. ''' class MockException(Exception): pass MockException.output = "test output" mock_sub.CalledProcessError = MockException def mock_raise(*args, **kwargs): raise MockException('foo!') # Should return error message if apt-get remove raised an Exception. mock_sub.check_call.side_effect = mock_raise self.assertEqual( self.bigtop.reinstall_repo_packages(remove_pkgs='foo bar-*'), "test output" ) # Should call pin twice if trigger puppet fails (once to raise prio, # once again to drop it back down) mock_trigger.side_effect = mock_raise self.assertEqual(self.bigtop.reinstall_repo_packages(), 'failed') self.assertEqual(mock_pin.call_count, 2) # Should return 'success' if all went well. mock_trigger.side_effect = None self.assertEqual(self.bigtop.reinstall_repo_packages(), 'success') def test_get_ip_for_interface(self): ''' Test to verify that our get_ip_for_interface method does sensible things. ''' ip = self.bigtop.get_ip_for_interface('lo') self.assertEqual(ip, '127.0.0.1') ip = self.bigtop.get_ip_for_interface('127.0.0.0/24') self.assertEqual(ip, '127.0.0.1') # If passed 0.0.0.0, or something similar, the function should # treat it as a special case, and return what it was passed. for i in ['0.0.0.0', '0.0.0.0/0', '0/0', '::']: ip = self.bigtop.get_ip_for_interface(i) self.assertEqual(ip, i) self.assertRaises( BigtopError, self.bigtop.get_ip_for_interface, '2.2.2.0/24') self.assertRaises( BigtopError, self.bigtop.get_ip_for_interface, 'foo')
class TestBigtopUnit(Harness): ''' Unit tests for Bigtop class. ''' @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version', new_callable=mock.PropertyMock) def setUp(self, mock_ver, mock_hookenv): mock_ver.return_value = '1.2.0' super(TestBigtopUnit, self).setUp() self.bigtop = Bigtop() def test_init(self): ''' Verify that the Bigtop class can init itself, and that it has some of the properties that we expect.. ''' # paths should be Path objects. self.assertEqual(type(self.bigtop.bigtop_base), Path) self.assertEqual(type(self.bigtop.site_yaml), Path) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_puppet_modules' ) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_reverse_dns') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_localdomain') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_java') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_swap') @mock.patch('charms.layer.apache_bigtop_base.is_container') def test_install(self, mock_container, mock_swap, mock_java, mock_pin, mock_local, mock_dns, mock_fetch, mock_puppet, mock_apply, mock_hiera): ''' Verify install calls expected class methods. ''' mock_container.return_value = False self.bigtop.install() self.assertTrue(mock_swap.called) self.assertTrue(mock_java.called) self.assertTrue(mock_pin.called) self.assertTrue(mock_local.called) self.assertTrue(mock_dns.called) self.assertTrue(mock_fetch.called) self.assertTrue(mock_puppet.called) self.assertTrue(mock_apply.called) self.assertTrue(mock_hiera.called) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages') def test_refresh_bigtop_release(self, mock_pin, mock_fetch, mock_apply, mock_update): ''' Verify refresh calls expected class methods. ''' self.bigtop.refresh_bigtop_release() self.assertTrue(mock_pin.called) self.assertTrue(mock_fetch.called) self.assertTrue(mock_apply.called) self.assertTrue(mock_update.called) @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.lsb_release') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version', new_callable=mock.PropertyMock) def test_get_repo_url(self, mock_ver, mock_lsb_release, mock_utils, mock_hookenv): ''' Verify that we setup an appropriate repository. ''' mock_ver.return_value = '1.1.0' # non-ubuntu should throw an exception mock_lsb_release.return_value = {'DISTRIB_ID': 'centos'} self.assertRaises(BigtopError, self.bigtop.get_repo_url, '1.1.0') # 1.1.0 on trusty/non-power mock_utils.cpu_arch.return_value = 'foo' mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'} self.assertEqual(self.bigtop.get_repo_url('1.1.0'), ('http://bigtop-repos.s3.amazonaws.com/releases/' '1.1.0/ubuntu/trusty/foo')) # 1.1.0 on trusty/power (should return vivid url) mock_utils.cpu_arch.return_value = 'ppc64le' self.assertEqual(self.bigtop.get_repo_url('1.1.0'), ('http://bigtop-repos.s3.amazonaws.com/releases/' '1.1.0/ubuntu/vivid/ppc64el')) # 1.2.0 on xenial mock_ver.return_value = '1.2.0' mock_utils.cpu_arch.return_value = 'foo' mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'} self.assertEqual(self.bigtop.get_repo_url('1.2.0'), ('http://bigtop-repos.s3.amazonaws.com/releases/' '1.2.0/ubuntu/16.04/foo')) # 1.2.1 on xenial/intel mock_hookenv.return_value = {'name': 'foo'} mock_ver.return_value = '1.2.1' mock_utils.cpu_arch.return_value = 'x86_64' self.assertEqual(self.bigtop.get_repo_url('1.2.1'), ('http://repos.bigtop.apache.org/releases/' '1.2.1/ubuntu/16.04/x86_64')) # 1.2.1 on xenial/non-intel mock_ver.return_value = '1.2.1' mock_utils.cpu_arch.return_value = 'foo' self.assertEqual( self.bigtop.get_repo_url('1.2.1'), ('https://ci.bigtop.apache.org/job/Bigtop-1.2.1/' 'OS=ubuntu-16.04/lastSuccessfulBuild/artifact/output/apt')) # master on xenial/intel mock_ver.return_value = 'master' mock_utils.cpu_arch.return_value = 'x86_64' self.assertEqual( self.bigtop.get_repo_url('master'), ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/' 'OS=ubuntu-16.04,label=docker-slave/ws/output/apt')) # master on xenial/non-intel mock_ver.return_value = 'master' mock_utils.cpu_arch.return_value = 'foo' self.assertEqual( self.bigtop.get_repo_url('master'), ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/' 'OS=ubuntu-16.04-foo,label=docker-slave/ws/output/apt')) # test bad version on xenial should throw an exception self.assertRaises(BigtopError, self.bigtop.get_repo_url, '0.0.0') @mock.patch('charms.layer.apache_bigtop_base.subprocess') def test_install_swap_when_swap_exists(self, mock_sub): ''' Verify we do attempt to install swap space if it already exists. ''' mock_sub.check_output.return_value = b"foo\nbar" mock_sub.reset_mock() self.bigtop.install_swap() # We reset the mock, so here we're verifying no other subprocess # calls were made. mock_sub.check_call.assert_not_called() @mock.patch('charms.layer.apache_bigtop_base.lsb_release') @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.fetch') @mock.patch('charms.layer.apache_bigtop_base.layer.options') def test_install_java(self, mock_options, mock_fetch, mock_utils, mock_lsb_release): ''' Test to verify that we install java when requested. ''' mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial'} # Should be noop if bigtop_jdk not set. self.bigtop.options.get.return_value = '' self.bigtop.install_java() self.assertFalse(mock_fetch.add_source.called) self.assertFalse(mock_fetch.apt_update.called) self.assertFalse(mock_fetch.apt_install.called) self.assertFalse(mock_utils.re_edit_in_place.called) # Should add ppa if we have set bigtop_jdk. self.bigtop.options.get.return_value = 'foo' print("options: {}".format(self.bigtop.options)) self.bigtop.install_java() self.assertFalse(mock_fetch.add_source.called) self.assertFalse(mock_fetch.apt_update.called) self.assertTrue(mock_fetch.apt_install.called) self.assertTrue(mock_utils.re_edit_in_place.called) # On trusty, should add a ppa so that we can install Java 8. mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'trusty'} self.bigtop.install_java() self.assertTrue(mock_fetch.add_source.called) self.assertTrue(mock_fetch.apt_update.called) @mock.patch('charms.layer.apache_bigtop_base.Path') def test_pin_bigtop_packages(self, mock_path): ''' Verify the apt template is opened and written to a (mocked) file. ''' mock_dst = mock.Mock() mock_path.return_value = mock_dst self.bigtop.pin_bigtop_packages(priority=100) self.assertTrue(mock_dst.write_text.called) @mock.patch('charms.layer.apache_bigtop_base.subprocess') @mock.patch('charms.layer.apache_bigtop_base.lsb_release') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_apt', new_callable=mock.PropertyMock) def test_update_bigtop_repo(self, mock_apt, mock_lsb_release, mock_sub): ''' Verify a bigtop apt repository is added/removed. ''' # non-ubuntu should not invoke a subprocess call mock_lsb_release.return_value = { 'DISTRIB_CODENAME': 'foo', 'DISTRIB_ID': 'centos', 'DISTRIB_RELEASE': '7' } self.bigtop.update_bigtop_repo() mock_sub.check_call.assert_not_called() # verify args when adding a repo on ubuntu mock_apt.return_value = 'foo' mock_lsb_release.return_value = { 'DISTRIB_CODENAME': 'xenial', 'DISTRIB_ID': 'ubuntu', 'DISTRIB_RELEASE': '16.04' } self.bigtop.update_bigtop_repo() mock_sub.check_call.assert_called_with( ['add-apt-repository', '-yu', 'deb foo bigtop contrib']) # verify args when removing a repo on ubuntu self.bigtop.update_bigtop_repo(remove=True) mock_sub.check_call.assert_called_with( ['add-apt-repository', '-yur', 'deb foo bigtop contrib']) # verify we handle check_call errors class MockException(Exception): pass mock_sub.CalledProcessError = MockException def mock_raise(*args, **kwargs): raise MockException('foo!') mock_sub.check_call.side_effect = mock_raise self.bigtop.update_bigtop_repo() @mock.patch('charms.layer.apache_bigtop_base.get_package_version') @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.subprocess.Popen') @mock.patch('charms.layer.apache_bigtop_base.lsb_release') def test_check_bigtop_repo_package(self, mock_lsb_release, mock_sub, mock_hookenv, mock_pkg_ver): ''' Verify bigtop repo package queries. ''' # non-ubuntu should raise an error mock_lsb_release.return_value = { 'DISTRIB_CODENAME': 'foo', 'DISTRIB_ID': 'centos', 'DISTRIB_RELEASE': '7' } self.assertRaises(BigtopError, self.bigtop.check_bigtop_repo_package, 'foo') # reset with ubuntu mock_lsb_release.return_value = { 'DISTRIB_CODENAME': 'xenial', 'DISTRIB_ID': 'ubuntu', 'DISTRIB_RELEASE': '16.04' } madison_proc = mock.Mock() grep_proc = mock.Mock() # simulate a missing repo pkg grep_attrs = {'communicate.return_value': (b'', 'stderr')} grep_proc.configure_mock(**grep_attrs) # test a missing repo pkg (message should be logged) mock_sub.return_value = madison_proc mock_sub.return_value = grep_proc mock_pkg_ver.return_value = '' self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo')) mock_hookenv.log.assert_called_once() mock_hookenv.reset_mock() # reset our grep args to simulate the repo pkg being found grep_attrs = {'communicate.return_value': (b'pkg|1|repo', 'stderr')} grep_proc.configure_mock(**grep_attrs) # test a missing installed pkg (no log message) mock_sub.return_value = madison_proc mock_sub.return_value = grep_proc mock_pkg_ver.return_value = '' self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo')) mock_hookenv.log.assert_not_called() mock_hookenv.reset_mock() # test repo and installed pkg versions are the same (no log message) mock_sub.return_value = madison_proc mock_sub.return_value = grep_proc mock_pkg_ver.return_value = '1' self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo')) mock_hookenv.log.assert_not_called() mock_hookenv.reset_mock() # test repo pkg is newer than installed pkg (no log message) mock_sub.return_value = madison_proc mock_sub.return_value = grep_proc mock_pkg_ver.return_value = '0' self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo')) mock_hookenv.log.assert_not_called() mock_hookenv.reset_mock() @mock.patch('charms.layer.apache_bigtop_base.socket') @mock.patch('charms.layer.apache_bigtop_base.subprocess') @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.hookenv') def test_check_reverse_dns(self, mock_hookenv, mock_utils, mock_sub, mock_socket): ''' Verify that we set the reverse_dns_ok state, and handle errors correctly. ''' # Test the case where things succeed. mock_sub.check_output.return_value = b'domain' self.bigtop.check_reverse_dns() self.assertTrue(unitdata.kv().get('reverse_dns_ok')) # Test the case where we get an exception. mock_sub.check_output.return_value = b'localdomain' self.bigtop.check_reverse_dns() self.assertFalse(unitdata.kv().get('reverse_dns_ok')) class MockHError(Exception): pass def raise_herror(*args, **kwargs): raise MockHError('test') mock_socket.herror = MockHError mock_socket.gethostbyaddr = raise_herror self.bigtop.check_reverse_dns() self.assertFalse(unitdata.kv().get('reverse_dns_ok')) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version', new_callable=mock.PropertyMock) @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.Path') def test_fetch_bigtop_release(self, mock_path, mock_hookenv, mock_ver): '''Verify we raise an exception if an invalid release is specified.''' mock_hookenv.resource_get.return_value = False mock_ver.return_value = 'foo' self.assertRaises(BigtopError, self.bigtop.fetch_bigtop_release) @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.hookenv') def test_install_puppet_modules(self, mock_hookenv, mock_utils): '''Verify that we seem to install puppet modules correctly.''' mock_hookenv.charm_dir.return_value = '/tmp' def mock_run_as(user, *args): ''' Verify that we run puppet as root. ''' self.assertEqual(user, 'root') mock_utils.run_as.side_effect = mock_run_as self.bigtop.install_puppet_modules() @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.glob') @mock.patch('charms.layer.apache_bigtop_base.chdir') def test_apply_patches(self, mock_chdir, mock_glob, mock_utils, mock_hookenv): ''' Verify that we apply patches in the correct order. ''' mock_hookenv.charm_dir.return_value = '/tmp' reverse_sorted = ['foo', 'baz', 'bar'] mock_glob.return_value = ['foo', 'baz', 'bar'] def mock_run_as(*args): patch = args[-1] self.assertEqual(args[0], 'root') # Verify that we're running on a sorted list. self.assertTrue(patch.endswith(reverse_sorted.pop())) mock_utils.run_as.side_effect = mock_run_as self.bigtop.apply_patches() @mock.patch('charms.layer.apache_bigtop_base.yaml') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_base') @mock.patch('charms.layer.apache_bigtop_base.Path') def test_render_hiera_yaml(self, mock_path, mock_base, mock_yaml): ''' Verify that we attempt to add the values that we expect our hiera object, before writing it out to a (mocked) yaml file. ''' def mock_dump(hiera_yaml, *args, **kwargs): self.assertTrue(hiera_yaml.get(':yaml')) self.assertTrue(':datadir' in hiera_yaml[':yaml']) mock_yaml.dump.side_effect = mock_dump mock_dst = mock.Mock() mock_path.return_value = mock_dst mock_yaml.load.return_value = defaultdict(lambda: {}) mock_base.__div__.side_effect = lambda rel: mock_base mock_base.__truediv__.side_effect = lambda rel: mock_base self.bigtop.render_hiera_yaml() # Verify that we attempt to write yaml::datadir to hieradata. self.assertTrue(mock_dst.write_text.called) @mock.patch('charms.layer.apache_bigtop_base.utils.run_as') @mock.patch('charms.layer.apache_bigtop_base.yaml') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.site_yaml') @mock.patch('charms.layer.apache_bigtop_base.Path') def test_render_site_yaml(self, mock_path, mock_site, mock_yaml, mock_run): ''' Verify that we attempt to put together a plausible site yaml config, before writing it out to a (mocked) yaml file. ''' # Setup mock_yaml.load.return_value = defaultdict(lambda: {}) config = {'roles': None, 'overrides': None, 'hosts': None} def verify_yaml(yaml, *args, **kwargs): ''' Verify that the dict we are trying to dump to yaml has the values that we expect. ''' self.assertTrue('bigtop::bigtop_repo_uri' in yaml) if config['roles'] is None: self.assertFalse('bigtop::roles_enabled' in yaml) else: self.assertTrue('bigtop::roles_enabled' in yaml) self.assertTrue('bigtop::roles' in yaml) self.assertEqual(yaml['bigtop::roles'], sorted(config['roles'])) if config['overrides'] is not None: for key in config['overrides']: self.assertTrue(yaml.get(key) == config['overrides'][key]) mock_yaml.dump.side_effect = verify_yaml # Test various permutations of arguments passed in. for config_set in [ { 'roles': ['foo', 'bar', 'baz'] }, # Test roles { 'overrides': { 'foo': 'bar' } } ]: # Test override config.update(config_set) # Test self.bigtop.render_site_yaml(roles=config['roles'], overrides=config['overrides'], hosts=config['hosts']) # Reset mock_yaml.load.return_value = defaultdict(lambda: {}) config['roles'] = None config['overrides'] = None config['hosts'] = None def test_queue_puppet(self): '''Verify that we set the expected 'puppet queued' state.''' self.bigtop.queue_puppet() self.assertTrue(is_state('apache-bigtop-base.puppet_queued')) @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet') @mock.patch('charms.layer.apache_bigtop_base.hookenv') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version', new_callable=mock.PropertyMock) def test_handle_queued_puppet(self, mock_ver, mock_hookenv, mock_trigger): ''' Verify that we attempt to call puppet when it has been queued, and then clear the queued state. ''' set_state('apache-bigtop-base.puppet_queued') mock_ver.return_value = '1.2.0' Bigtop._handle_queued_puppet() self.assertTrue(mock_trigger.called) self.assertFalse(is_state('apache-bigtop-base.puppet_queued')) @mock.patch('charms.layer.apache_bigtop_base.utils') @mock.patch('charms.layer.apache_bigtop_base.chdir') @mock.patch('charms.layer.apache_bigtop_base.unitdata') def test_trigger_puppet(self, mock_unit, mock_chdir, mock_utils): ''' Test to verify that we attempt to trigger puppet correctly. ''' def verify_utils_call(user, puppet, *args): self.assertEqual(user, 'root') self.assertEqual(puppet, 'puppet') mock_kv = mock.Mock() mock_unit.kv.return_value = mock_kv mock_kv.get.return_value = 'foo' mock_utils.run_as.side_effect = verify_utils_call self.bigtop.trigger_puppet() self.assertTrue(mock_utils.run_as.called) # TODO: verify the Java 1.7 logic. @mock.patch('charms.layer.apache_bigtop_base.subprocess') @mock.patch('charms.layer.apache_bigtop_base.utils.run_as') def test_check_hdfs_setup(self, mock_run, mock_sub): ''' Verify that our hdfs setup check works as expected, and handles errors as expected. ''' class MockException(Exception): pass mock_sub.CalledProcessError = MockException def mock_raise(*args, **kwargs): raise MockException('foo!') for s in ['ubuntu', ' ubuntu ', 'ubuntu ', ' ubuntu']: mock_run.return_value = s self.assertTrue(self.bigtop.check_hdfs_setup()) for s in ['foo', ' ', '', ' bar', 'notubuntu', 'ubuntu not ']: mock_run.return_value = s self.assertFalse(self.bigtop.check_hdfs_setup()) mock_run.side_effect = mock_raise self.assertFalse(self.bigtop.check_hdfs_setup()) @unittest.skip('noop') def test_spec(self): '''Nothing to test that the linter won't handle.''' @mock.patch('charms.layer.apache_bigtop_base.subprocess') @mock.patch('charms.layer.apache_bigtop_base.utils.run_as') @mock.patch('charms.layer.apache_bigtop_base.chdir') @mock.patch('charms.layer.apache_bigtop_base.chownr') @mock.patch('charms.layer.apache_bigtop_base.layer.options') def test_run_smoke_tests(self, mock_options, mock_ownr, mock_chdir, mock_run, mock_sub): ''' Verify that we attempt to run smoke tests correctly, and handle exceptions as expected. ''' mock_options.return_value = {} # Returns None if bigtop isn't available. remove_state('bigtop.available') self.assertEqual(None, self.bigtop.run_smoke_tests()) # Returns None if we don't pass in a 'smoke_components' arg set_state('bigtop.available') self.assertEqual(None, self.bigtop.run_smoke_tests()) # Should return 'success' if all went well. self.assertEqual( self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']), 'success') # Should return error message if subprocess raised an Exception. class MockException(Exception): pass MockException.output = "test output" mock_sub.CalledProcessError = MockException def mock_raise(*args, **kwargs): raise MockException('foo!') mock_run.side_effect = mock_raise self.assertEqual( self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']), "test output") @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml') @mock.patch('charms.layer.apache_bigtop_base.Path') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages') @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet') @mock.patch('charms.layer.apache_bigtop_base.subprocess') def test_reinstall_repo_packages(self, mock_sub, mock_trigger, mock_pin, mock_path, mock_hiera, mock_update): ''' Verify that we attempt to trigger puppet during a reinstall, and handle exceptions as expected. ''' class MockException(Exception): pass MockException.output = "test output" mock_sub.CalledProcessError = MockException def mock_raise(*args, **kwargs): raise MockException('foo!') # Should return error message if apt-get remove raised an Exception. mock_sub.check_call.side_effect = mock_raise self.assertEqual( self.bigtop.reinstall_repo_packages(remove_pkgs='foo bar-*'), "test output") # Should call pin twice if trigger puppet fails (once to raise prio, # once again to drop it back down) mock_trigger.side_effect = mock_raise self.assertEqual(self.bigtop.reinstall_repo_packages(), 'failed') self.assertEqual(mock_pin.call_count, 2) # Should return 'success' if all went well. mock_trigger.side_effect = None self.assertEqual(self.bigtop.reinstall_repo_packages(), 'success') def test_get_ip_for_interface(self): ''' Test to verify that our get_ip_for_interface method does sensible things. ''' ip = self.bigtop.get_ip_for_interface('lo') self.assertEqual(ip, '127.0.0.1') ip = self.bigtop.get_ip_for_interface('127.0.0.0/24') self.assertEqual(ip, '127.0.0.1') # If passed 0.0.0.0, or something similar, the function should # treat it as a special case, and return what it was passed. for i in ['0.0.0.0', '0.0.0.0/0', '0/0', '::']: ip = self.bigtop.get_ip_for_interface(i) self.assertEqual(ip, i) self.assertRaises(BigtopError, self.bigtop.get_ip_for_interface, '2.2.2.0/24') self.assertRaises(BigtopError, self.bigtop.get_ip_for_interface, 'foo')
def configure(self, available_hosts, zk_units, peers, extra_libs): """ This is the core logic of setting up spark. :param dict available_hosts: Hosts that Spark should know about. :param list zk_units: List of Zookeeper dicts with host/port info. :param list peers: List of Spark peer tuples (unit name, IP). :param list extra_libs: List of extra lib paths for driver/executors. """ # Bootstrap spark if not unitdata.kv().get('spark.bootstrapped', False): self.setup() unitdata.kv().set('spark.bootstrapped', True) # Set KV based on connected applications unitdata.kv().set('zookeeper.units', zk_units) unitdata.kv().set('sparkpeer.units', peers) unitdata.kv().flush(True) # Get our config ready dc = self.dist_config events_log_dir = 'file://{}'.format(dc.path('spark_events')) mode = hookenv.config()['spark_execution_mode'] master_ip = utils.resolve_private_address( available_hosts['spark-master']) master_url = self.get_master_url(master_ip) req_driver_mem = hookenv.config()['driver_memory'] req_executor_mem = hookenv.config()['executor_memory'] # handle tuning options that may be set as percentages driver_mem = '1g' executor_mem = '1g' if req_driver_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_driver_mem.strip('%')) / 100 driver_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "driver_memory percentage in non-local mode. Using 1g default.", level=None) else: driver_mem = req_driver_mem if req_executor_mem.endswith('%'): if mode == 'standalone' or mode.startswith('local'): mem_mb = host.get_total_ram() / 1024 / 1024 req_percentage = float(req_executor_mem.strip('%')) / 100 executor_mem = str(int(mem_mb * req_percentage)) + 'm' else: hookenv.log( "executor_memory percentage in non-local mode. Using 1g default.", level=None) else: executor_mem = req_executor_mem # Setup hosts dict hosts = { 'spark': master_ip, } if 'namenode' in available_hosts: hosts['namenode'] = available_hosts['namenode'] events_log_dir = self.setup_hdfs_logs() else: # Bigtop includes a default hadoop_head_node if we do not specify # any namenode info. To ensure spark standalone doesn't get # invalid hadoop config, set our NN to an empty string. hosts['namenode'] = '' if 'resourcemanager' in available_hosts: hosts['resourcemanager'] = available_hosts['resourcemanager'] # Setup roles dict. We always include the history server and client. # Determine other roles based on our execution mode. roles = ['spark-history-server', 'spark-client'] if mode == 'standalone': roles.append('spark-master') roles.append('spark-worker') elif mode.startswith('yarn'): roles.append('spark-on-yarn') roles.append('spark-yarn-slave') # Setup overrides dict override = { 'spark::common::master_url': master_url, 'spark::common::event_log_dir': events_log_dir, 'spark::common::history_log_dir': events_log_dir, 'spark::common::extra_lib_dirs': ':'.join(extra_libs) if extra_libs else None, 'spark::common::driver_mem': driver_mem, 'spark::common::executor_mem': executor_mem, } if zk_units: zks = [] for unit in zk_units: ip = utils.resolve_private_address(unit['host']) zks.append("%s:%s" % (ip, unit['port'])) zk_connect = ",".join(zks) override['spark::common::zookeeper_connection_string'] = zk_connect else: override['spark::common::zookeeper_connection_string'] = None # Create our site.yaml and trigger puppet bigtop = Bigtop() bigtop.render_site_yaml(hosts, roles, override) bigtop.trigger_puppet() # Do this after our puppet bits in case puppet overrides needed perms if 'namenode' not in available_hosts: # Local event dir (not in HDFS) needs to be 777 so non-spark # users can write job history there. It needs to be g+s so # all entries will be readable by spark (in the spark group). # It needs to be +t so users cannot remove files they don't own. dc.path('spark_events').chmod(0o3777) self.patch_worker_master_url(master_ip, master_url) # Install SB (subsequent calls will reconfigure existing install) # SparkBench looks for the spark master in /etc/environment with utils.environment_edit_in_place('/etc/environment') as env: env['MASTER'] = master_url self.install_benchmark()