def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    nn_host = get_fqdn()
    hosts = {'namenode': nn_host}
    bigtop.render_site_yaml(hosts=hosts, roles='namenode')
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # make our namenode listen on all interfaces
    hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
    with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.http-bind-host'] = '0.0.0.0'
        props['dfs.namenode.https-bind-host'] = '0.0.0.0'

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
示例#2
0
def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='resourcemanager')
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # Add our ubuntu user to the hadoop and mapred groups.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
示例#3
0
    def configure_kafka(self, zk_units, network_interface=None):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        kafka_port = self.dist_config.port('kafka')

        roles = ['kafka-server']
        override = {
            'kafka::server::broker_id': unit_num,
            'kafka::server::port': kafka_port,
            'kafka::server::zookeeper_connection_string': zk_connect,
        }
        if network_interface:
            ip = Bigtop().get_ip_for_interface(network_interface)
            override['kafka::server::bind_addr'] = ip

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()
        self.set_advertise()
        self.restart()
示例#4
0
    def install_pig(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Pig service.
        '''
        # Dirs are handled by the bigtop deb. No need to call out to
        # dist_config to do that work.
        roles = ['pig-client']

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()

        # Set app version for juju status output; pig --version looks like:
        #   Apache Pig version 0.15.0 (r: unknown)
        #   compiled Feb 06 2016, 23:00:40
        try:
            pig_out = check_output(['pig', '-x', 'local', '--version']).decode()
        except CalledProcessError as e:
            pig_out = e.output
        lines = pig_out.splitlines()
        parts = lines[0].split() if lines else []
        if len(parts) < 4:
            hookenv.log('Error getting Pig version: {}'.format(pig_out),
                        hookenv.ERROR)
            pig_ver = ''
        else:
            pig_ver = parts[3]
        hookenv.application_version_set(pig_ver)
def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='resourcemanager')
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # Add our ubuntu user to the hadoop and mapred groups.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
示例#6
0
    def configure_kafka(self, zk_units, network_interface=None):
        # Get ip:port data from our connected zookeepers
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append("%s:%s" % (ip, unit['port']))
        zks.sort()
        zk_connect = ",".join(zks)
        service, unit_num = os.environ['JUJU_UNIT_NAME'].split('/', 1)
        kafka_port = self.dist_config.port('kafka')

        roles = ['kafka-server']
        override = {
            'kafka::server::broker_id': unit_num,
            'kafka::server::port': kafka_port,
            'kafka::server::zookeeper_connection_string': zk_connect,
        }
        if network_interface:
            ip = Bigtop().get_ip_for_interface(network_interface)
            override['kafka::server::bind_addr'] = ip

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()
        self.set_advertise()
        self.restart()
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)

        # The zep deb depends on spark-core which unfortunately brings in
        # most of hadoop. Include appropriate roles here to ensure these
        # packages are configured in the same way as our other Bigtop
        # software deployed with puppet.
        bigtop.render_site_yaml(
            roles=[
                'spark-client',
                'spark-yarn-slave',
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        if unitdata.kv().get('zeppelin.version.repo', False):
            hookenv.log("An upgrade is available and the site.yaml has been "
                        "configured. Run the 'reinstall' action to continue.",
                        level=hookenv.INFO)
        else:
            ####################################################################
            # BUG: BIGTOP-2742
            # Default zeppelin init script looks for the literal '$(hostname)'
            # string. Symlink it so it exists before the apt install from puppet
            # tries to start the service.
            import subprocess
            host = subprocess.check_output(['hostname']).decode('utf8').strip()
            zepp_pid = '/var/run/zeppelin/zeppelin-zeppelin-{}.pid'.format(host)
            utils.run_as('root', 'mkdir', '-p', '/var/run/zeppelin')
            utils.run_as('root', 'ln', '-sf',
                         zepp_pid,
                         '/var/run/zeppelin/zeppelin-zeppelin-$(hostname).pid')
            ####################################################################

            bigtop.trigger_puppet()
            self.wait_for_api(30)

            ####################################################################
            # BUG: BIGTOP-2742
            # Puppet apply will call systemctl daemon-reload, which removes the
            # symlink we just created. Now that the bits are on disk, update the
            # init script $(hostname) that caused this mess to begin with.
            zepp_init_script = '/etc/init.d/zeppelin'
            utils.re_edit_in_place(zepp_init_script, {
                r'^# pidfile.*': '# pidfile: {}'.format(zepp_pid),
            })
            utils.run_as('root', 'systemctl', 'daemon-reload')
            self.restart()
            self.wait_for_api(30)
    def install_oozie(self):
        roles = ['hadoop-client']

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()

        roles = ['oozie-client', 'oozie-server']

        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()
    def configure(self, available_hosts):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is there
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """

        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        self.install_benchmark()

        hosts = {
            'spark': available_hosts['spark-master'],
        }

        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        roles = self.get_roles()

        override = {
            'spark::common::master_url': self.get_master_url(available_hosts['spark-master']),
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
        # There is a race condition here.
        # The work role will not start the first time we trigger puppet apply.
        # The exception in /var/logs/spark:
        # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077
        # The master url is not set at the time the worker start the first time.
        # TODO(kjackal): ...do the needed... (investiate,debug,submit patch)
        bigtop.trigger_puppet()
        if 'namenode' not in available_hosts:
            # Make sure users other than spark can access the events logs dir and run jobs
            utils.run_as('root', 'chmod', '777', dc.path('spark_events'))
示例#10
0
    def install_oozie(self):
        roles = ['hadoop-client']

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()

        roles = ['oozie-client', 'oozie-server']

        bigtop.render_site_yaml(roles=roles)
        bigtop.trigger_puppet()
示例#11
0
 def trigger_bigtop(self):
     bigtop = Bigtop()
     overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                        strip=True)
     bigtop.render_site_yaml(
         roles=[
             'zeppelin-server',
         ],
         overrides=overrides,
     )
     bigtop.trigger_puppet()
     self.wait_for_api(30)
示例#12
0
def install_mahout():
    hookenv.status_set('maintenance', 'installing mahout')
    bigtop = Bigtop()
    bigtop.render_site_yaml(roles=[
        'mahout-client',
    ], )
    bigtop.trigger_puppet()
    with utils.environment_edit_in_place('/etc/environment') as env:
        env['MAHOUT_HOME'] = '/usr/lib/mahout'

    hookenv.status_set('active', 'ready')
    set_state('mahout.installed')
示例#13
0
    def install(self, hbase=None, zk_units=None):
        '''
        Trigger the Bigtop puppet recipe that handles the Hive service.
        '''
        # Dirs are handled by the bigtop deb. No need to call out to
        # dist_config to do that. We do want 'ubuntu' in the hive group though.
        self.dist_config.add_users()

        # Prep config
        roles = ['hive-client', 'hive-metastore', 'hive-server2']
        metastore = "thrift://{}:9083".format(hookenv.unit_private_ip())

        if hbase:
            roles.append('hive-hbase')
            hb_connect = "{}:{}".format(hbase['host'], hbase['master_port'])
            zk_hbase_connect = hbase['zk_connect']
        else:
            hb_connect = ""
            zk_hbase_connect = ""

        if zk_units:
            hive_support_concurrency = True
            zk_hive_connect = self.get_zk_connect(zk_units)
        else:
            hive_support_concurrency = False
            zk_hive_connect = ""

        override = {
            'hadoop_hive::common_config::hbase_master':
            hb_connect,
            'hadoop_hive::common_config::hbase_zookeeper_quorum':
            zk_hbase_connect,
            'hadoop_hive::common_config::hive_zookeeper_quorum':
            zk_hive_connect,
            'hadoop_hive::common_config::hive_support_concurrency':
            hive_support_concurrency,
            'hadoop_hive::common_config::metastore_uris':
            metastore,
            'hadoop_hive::common_config::server2_thrift_port':
            self.dist_config.port('hive-thrift'),
            'hadoop_hive::common_config::server2_thrift_http_port':
            self.dist_config.port('hive-thrift-web'),
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()

        # Bigtop doesn't create a hive-env.sh, but we need it for heap config
        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        if not hive_env.exists():
            (self.dist_config.path('hive_conf') /
             'hive-env.sh.template').copy(hive_env)
示例#14
0
 def trigger_bigtop(self):
     bigtop = Bigtop()
     overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                        strip=True)
     bigtop.render_site_yaml(
         roles=[
             'zeppelin-server',
         ],
         overrides=overrides,
     )
     bigtop.trigger_puppet()
     self.wait_for_api(30)
def install_hadoop_client_yarn(principal, namenode, resourcemanager):
    if namenode.namenodes() and resourcemanager.resourcemanagers():
        hookenv.status_set('maintenance', 'installing plugin (yarn)')
        nn_host = namenode.namenodes()[0]
        rm_host = resourcemanager.resourcemanagers()[0]
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client')
        bigtop.trigger_puppet()
        set_state('apache-bigtop-plugin.yarn.installed')
        hookenv.status_set('maintenance', 'plugin (yarn) installed')
    else:
        hookenv.status_set('waiting', 'waiting for master fqdns')
示例#16
0
def install_hadoop_client_yarn(principal, namenode, resourcemanager):
    if namenode.namenodes() and resourcemanager.resourcemanagers():
        hookenv.status_set('maintenance', 'installing plugin (yarn)')
        nn_host = namenode.namenodes()[0]
        rm_host = resourcemanager.resourcemanagers()[0]
        bigtop = Bigtop()
        hosts = {'namenode': nn_host, 'resourcemanager': rm_host}
        bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client')
        bigtop.trigger_puppet()
        set_state('apache-bigtop-plugin.yarn.installed')
        hookenv.status_set('maintenance', 'plugin (yarn) installed')
    else:
        hookenv.status_set('waiting', 'waiting for master fqdns')
示例#17
0
    def install(self, hbase=None, zk_units=None):
        '''
        Trigger the Bigtop puppet recipe that handles the Hive service.
        '''
        # Dirs are handled by the bigtop deb. No need to call out to
        # dist_config to do that. We do want 'ubuntu' in the hive group though.
        self.dist_config.add_users()

        # Prep config
        roles = ['hive-client', 'hive-metastore', 'hive-server2']
        metastore = "thrift://{}:9083".format(hookenv.unit_private_ip())

        if hbase:
            roles.append('hive-hbase')
            hb_connect = "{}:{}".format(hbase['host'], hbase['master_port'])
            zk_hbase_connect = hbase['zk_connect']
        else:
            hb_connect = ""
            zk_hbase_connect = ""

        if zk_units:
            hive_support_concurrency = True
            zk_hive_connect = self.get_zk_connect(zk_units)
        else:
            hive_support_concurrency = False
            zk_hive_connect = ""

        override = {
            'hadoop_hive::common_config::hbase_master': hb_connect,
            'hadoop_hive::common_config::hbase_zookeeper_quorum':
                zk_hbase_connect,
            'hadoop_hive::common_config::hive_zookeeper_quorum':
                zk_hive_connect,
            'hadoop_hive::common_config::hive_support_concurrency':
                hive_support_concurrency,
            'hadoop_hive::common_config::metastore_uris': metastore,
            'hadoop_hive::common_config::server2_thrift_port':
                self.dist_config.port('hive-thrift'),
            'hadoop_hive::common_config::server2_thrift_http_port':
                self.dist_config.port('hive-thrift-web'),
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(roles=roles, overrides=override)
        bigtop.trigger_puppet()

        # Bigtop doesn't create a hive-env.sh, but we need it for heap config
        hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh'
        if not hive_env.exists():
            (self.dist_config.path('hive_conf') / 'hive-env.sh.template').copy(
                hive_env)
示例#18
0
文件: mahout.py 项目: evans-ye/bigtop
def install_mahout():
    hookenv.status_set('maintenance', 'installing mahout')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        roles=[
            'mahout-client',
        ],
    )
    bigtop.trigger_puppet()
    with utils.environment_edit_in_place('/etc/environment') as env:
        env['MAHOUT_HOME'] = '/usr/lib/mahout'

    hookenv.status_set('active', 'ready')
    set_state('mahout.installed')
示例#19
0
    def configure(self, hosts, zk_units):
        zk_connect = self.get_zk_connect(zk_units)
        roles = ['hbase-server', 'hbase-master', 'hbase-client']
        override = {
            'bigtop::hbase_thrift_port': self.dist_config.port('hbase-thrift'),
            'hadoop_hbase::client::thrift': True,
            'hadoop_hbase::common_config::heap_size': hookenv.config()['heap'],
            'hadoop_hbase::common_config::zookeeper_quorum': zk_connect,
            'hadoop_hbase::deploy::auxiliary': False,
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
示例#20
0
    def install(self, nodes=None):
        '''
        Write out the config, then run puppet.

        After this runs, we should have a configured and running service.

        '''
        bigtop = Bigtop()
        log("Rendering site yaml ''with overrides: {}".format(self._override))
        bigtop.render_site_yaml(self._hosts, self._roles, self._override)
        bigtop.trigger_puppet()
        if self.is_zk_leader():
            zkpeer = RelationBase.from_state('zkpeer.joined')
            zkpeer.set_zk_leader()
示例#21
0
    def install(self, nodes=None):
        '''
        Write out the config, then run puppet.

        After this runs, we should have a configured and running service.

        '''
        bigtop = Bigtop()
        log("Rendering site yaml ''with overrides: {}".format(self._override))
        bigtop.render_site_yaml(self._hosts, self._roles, self._override)
        bigtop.trigger_puppet()
        if self.is_zk_leader():
            zkpeer = RelationBase.from_state('zkpeer.joined')
            zkpeer.set_zk_leader()
示例#22
0
def install_mahout():
    hookenv.status_set('maintenance', 'installing mahout')
    bigtop = Bigtop()
    bigtop.render_site_yaml(roles=[
        'mahout-client',
    ], )
    bigtop.trigger_puppet()
    with utils.environment_edit_in_place('/etc/environment') as env:
        env['MAHOUT_HOME'] = '/usr/lib/mahout'

    set_state('mahout.installed')
    hookenv.status_set('active', 'ready')
    # set app version string for juju status output
    mahout_version = get_package_version('mahout') or 'unknown'
    hookenv.application_version_set(mahout_version)
示例#23
0
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)
        bigtop.render_site_yaml(
            roles=[
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        bigtop.trigger_puppet()
        self.wait_for_api(30)
示例#24
0
文件: mahout.py 项目: Guavus/bigtop
def install_mahout():
    hookenv.status_set('maintenance', 'installing mahout')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        roles=[
            'mahout-client',
        ],
    )
    bigtop.trigger_puppet()
    with utils.environment_edit_in_place('/etc/environment') as env:
        env['MAHOUT_HOME'] = '/usr/lib/mahout'

    set_state('mahout.installed')
    hookenv.status_set('active', 'ready')
    # set app version string for juju status output
    mahout_version = get_package_version('mahout') or 'unknown'
    hookenv.application_version_set(mahout_version)
示例#25
0
    def configure(self, hosts, zk_units):
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit['host'])
            zks.append(ip)
        zks.sort()
        zk_connect = ",".join(zks)

        roles = ['hbase-server', 'hbase-master', 'hbase-client']

        override = {
            'hadoop_hbase::common_config::zookeeper_quorum': zk_connect,
            'hadoop_hbase::deploy::auxiliary': False
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
示例#26
0
def install_hadoop_client_hdfs(principal, namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the plugin install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing plugin (hdfs)')
        nn_host = namenode.namenodes()[0]
        bigtop = Bigtop()
        hosts = {'namenode': nn_host}
        bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client')
        bigtop.trigger_puppet()
        set_state('apache-bigtop-plugin.hdfs.installed')
        hookenv.status_set('maintenance', 'plugin (hdfs) installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
示例#27
0
    def configure(self, hosts, zk_units):
        zks = []
        for unit in zk_units:
            ip = utils.resolve_private_address(unit["host"])
            zks.append(ip)
        zks.sort()
        zk_connect = ",".join(zks)

        roles = ["hbase-server", "hbase-master", "hbase-client"]

        override = {
            "hadoop_hbase::common_config::zookeeper_quorum": zk_connect,
            "hadoop_hbase::deploy::auxiliary": False,
        }

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
示例#28
0
def install_hadoop_client_hdfs(principal, namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the plugin install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing plugin (hdfs)')
        nn_host = namenode.namenodes()[0]
        bigtop = Bigtop()
        hosts = {'namenode': nn_host}
        bigtop.render_site_yaml(hosts=hosts, roles='hadoop-client')
        bigtop.trigger_puppet()
        set_state('apache-bigtop-plugin.hdfs.installed')
        hookenv.status_set('maintenance', 'plugin (hdfs) installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
示例#29
0
文件: giraph.py 项目: apache/bigtop
def install_giraph(giraph):
    """Install giraph when prerequisite states are present."""
    hookenv.status_set('maintenance', 'installing giraph')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        roles=[
            'giraph-client',
        ],
    )
    bigtop.trigger_puppet()

    # Put down the -doc subpackage so we get giraph-examples
    fetch.apt_install('giraph-doc')

    giraph_home = Path('/usr/lib/giraph')
    giraph_docdir = Path('/usr/share/doc/giraph')
    giraph_libdir = Path(giraph_home / 'lib')
    giraph_examples = glob('{}/giraph-examples-*.jar'.format(giraph_docdir))

    # Gather a list of all the giraph jars (needed for -libjars)
    giraph_jars = giraph_examples
    giraph_jars.extend(get_good_jars(giraph_home, prefix=True))
    giraph_jars.extend(get_good_jars(giraph_libdir, prefix=True))

    # Update environment with appropriate giraph bits. HADOOP_CLASSPATH can
    # use wildcards (and it should for readability), but GIRAPH_JARS, which
    # is intended to be used as 'hadoop jar -libjars $GIRAPH_JARS', needs to
    # be a comma-separate list of jars.
    with utils.environment_edit_in_place('/etc/environment') as env:
        cur_cp = env['HADOOP_CLASSPATH'] if 'HADOOP_CLASSPATH' in env else ""
        env['GIRAPH_HOME'] = giraph_home
        env['HADOOP_CLASSPATH'] = "{examples}/*:{home}/*:{libs}/*:{cp}".format(
            examples=giraph_docdir,
            home=giraph_home,
            libs=giraph_libdir,
            cp=cur_cp
        )
        env['GIRAPH_JARS'] = ','.join(j for j in giraph_jars)

    set_state('giraph.installed')
    report_status()
    # set app version string for juju status output
    giraph_version = get_package_version('giraph') or 'unknown'
    hookenv.application_version_set(giraph_version)
示例#30
0
def install_giraph(giraph):
    """Install giraph when prerequisite states are present."""
    hookenv.status_set('maintenance', 'installing giraph')
    bigtop = Bigtop()
    bigtop.render_site_yaml(roles=[
        'giraph-client',
    ], )
    bigtop.trigger_puppet()

    # Put down the -doc subpackage so we get giraph-examples
    fetch.apt_install('giraph-doc')

    giraph_home = Path('/usr/lib/giraph')
    giraph_docdir = Path('/usr/share/doc/giraph')
    giraph_libdir = Path(giraph_home / 'lib')
    giraph_examples = glob('{}/giraph-examples-*.jar'.format(giraph_docdir))

    # Gather a list of all the giraph jars (needed for -libjars)
    giraph_jars = giraph_examples
    giraph_jars.extend(get_good_jars(giraph_home, prefix=True))
    giraph_jars.extend(get_good_jars(giraph_libdir, prefix=True))

    # Update environment with appropriate giraph bits. HADOOP_CLASSPATH can
    # use wildcards (and it should for readability), but GIRAPH_JARS, which
    # is intended to be used as 'hadoop jar -libjars $GIRAPH_JARS', needs to
    # be a comma-separate list of jars.
    with utils.environment_edit_in_place('/etc/environment') as env:
        cur_cp = env['HADOOP_CLASSPATH'] if 'HADOOP_CLASSPATH' in env else ""
        env['GIRAPH_HOME'] = giraph_home
        env['HADOOP_CLASSPATH'] = "{examples}/*:{home}/*:{libs}/*:{cp}".format(
            examples=giraph_docdir,
            home=giraph_home,
            libs=giraph_libdir,
            cp=cur_cp)
        env['GIRAPH_JARS'] = ','.join(j for j in giraph_jars)

    set_state('giraph.installed')
    report_status()
    # set app version string for juju status output
    giraph_version = get_package_version('giraph') or 'unknown'
    hookenv.application_version_set(giraph_version)
示例#31
0
文件: namenode.py 项目: Guavus/bigtop
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
        # NB: We want the NN to listen on all interfaces, so bind to 0.0.0.0.
        overrides={
            'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
        }
    )
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # We need to create the 'mapred' and 'spark' user/group since we may not
    # be installing hadoop-mapreduce or spark on this machine. This is needed
    # so the namenode can access yarn and spark job history files in hdfs. Also
    # add our ubuntu user to the hadoop, mapred, and spark groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
示例#32
0
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)

        # The zep deb depends on spark-core which unfortunately brings in
        # most of hadoop. Include appropriate roles here to ensure these
        # packages are configured in the same way as our other Bigtop
        # software deployed with puppet.
        bigtop.render_site_yaml(
            roles=[
                'spark-client',
                'spark-yarn-slave',
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        bigtop.trigger_puppet()
        self.wait_for_api(30)
示例#33
0
    def trigger_bigtop(self):
        '''
        Trigger the Bigtop puppet recipe that handles the Zeppelin service.
        '''
        bigtop = Bigtop()
        overrides = unitdata.kv().getrange('zeppelin.bigtop.overrides.',
                                           strip=True)

        # The zep deb depends on spark-core which unfortunately brings in
        # most of hadoop. Include appropriate roles here to ensure these
        # packages are configured in the same way as our other Bigtop
        # software deployed with puppet.
        bigtop.render_site_yaml(
            roles=[
                'spark-client',
                'spark-yarn-slave',
                'zeppelin-server',
            ],
            overrides=overrides,
        )

        bigtop.trigger_puppet()
        self.wait_for_api(30)
示例#34
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    hdfs_port = get_layer_opts().port('namenode')
    webhdfs_port = get_layer_opts().port('nn_webapp_http')
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
        # NB: We want the NN to listen on all interfaces, so bind to 0.0.0.0.
        overrides={
            'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
            'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
            'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
        })
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
示例#35
0
def install_namenode():
    hookenv.status_set('maintenance', 'installing namenode')
    bigtop = Bigtop()
    bigtop.render_site_yaml(
        hosts={
            'namenode': get_fqdn(),
        },
        roles=[
            'namenode',
            'mapred-app',
        ],
    )
    bigtop.trigger_puppet()

    # /etc/hosts entries from the KV are not currently used for bigtop,
    # but a hosts_map attribute is required by some interfaces (eg: dfs-slave)
    # to signify NN's readiness. Set our NN info in the KV to fulfill this
    # requirement.
    utils.initialize_kv_host()

    # make our namenode listen on all interfaces
    hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml')
    with utils.xmlpropmap_edit_in_place(hdfs_site) as props:
        props['dfs.namenode.rpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0'
        props['dfs.namenode.http-bind-host'] = '0.0.0.0'
        props['dfs.namenode.https-bind-host'] = '0.0.0.0'

    # We need to create the 'mapred' user/group since we are not installing
    # hadoop-mapreduce. This is needed so the namenode can access yarn
    # job history files in hdfs. Also add our ubuntu user to the hadoop
    # and mapred groups.
    get_layer_opts().add_users()

    set_state('apache-bigtop-namenode.installed')
    hookenv.status_set('maintenance', 'namenode installed')
示例#36
0
    def configure(self, available_hosts, zk_units, peers):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is there
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        hosts = {
            'spark': master_ip,
        }

        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        roles = self.get_roles()

        override = {
            'spark::common::master_url': self.get_master_url(master_ip),
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }

        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = ""

        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()
        # There is a race condition here.
        # The work role will not start the first time we trigger puppet apply.
        # The exception in /var/logs/spark:
        # Exception in thread "main" org.apache.spark.SparkException: Invalid master URL: spark://:7077
        # The master url is not set at the time the worker start the first time.
        # TODO(kjackal): ...do the needed... (investiate,debug,submit patch)
        bigtop.trigger_puppet()
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip)

        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = self.get_master_url(master_ip)
        # Install SB (subsequent calls will reconfigure existing install)
        self.install_benchmark()
示例#37
0
def install_resourcemanager(namenode):
    """Install if the namenode has sent its FQDN.

    We only need the namenode FQDN to perform the RM install, so poll for
    namenodes() data whenever we have a namenode relation. This allows us to
    install asap, even if 'namenode.ready' is not set yet.
    """
    if namenode.namenodes():
        hookenv.status_set('maintenance', 'installing resourcemanager')
        # Hosts
        nn_host = namenode.namenodes()[0]
        rm_host = get_fqdn()

        # Ports
        rm_ipc = get_layer_opts().port('resourcemanager')
        rm_http = get_layer_opts().port('rm_webapp_http')
        jh_ipc = get_layer_opts().port('jobhistory')
        jh_http = get_layer_opts().port('jh_webapp_http')
        hdfs_port = namenode.port()
        webhdfs_port = namenode.webhdfs_port()

        bigtop = Bigtop()
        bigtop.render_site_yaml(
            hosts={
                'namenode': nn_host,
                'resourcemanager': rm_host,
            },
            roles=[
                'resourcemanager',
            ],
            # NB: When we colocate the NN and RM, the RM will run puppet apply
            # last. To ensure we don't lose any hdfs-site.xml data set by the
            # NN, override common_hdfs properties again here.
            overrides={
                'hadoop::common_yarn::hadoop_rm_port': rm_ipc,
                'hadoop::common_yarn::hadoop_rm_webapp_port': rm_http,
                'hadoop::common_yarn::hadoop_rm_bind_host': '0.0.0.0',
                'hadoop::common_mapred_app::mapreduce_jobhistory_host': '0.0.0.0',
                'hadoop::common_mapred_app::mapreduce_jobhistory_port': jh_ipc,
                'hadoop::common_mapred_app::mapreduce_jobhistory_webapp_port': jh_http,
                'hadoop::common_hdfs::hadoop_namenode_port': hdfs_port,
                'hadoop::common_hdfs::hadoop_namenode_bind_host': '0.0.0.0',
                'hadoop::common_hdfs::hadoop_namenode_http_port': webhdfs_port,
                'hadoop::common_hdfs::hadoop_namenode_http_bind_host': '0.0.0.0',
                'hadoop::common_hdfs::hadoop_namenode_https_bind_host': '0.0.0.0',
            }
        )
        bigtop.trigger_puppet()

        # /etc/hosts entries from the KV are not currently used for bigtop,
        # but a hosts_map attribute is required by some interfaces (eg: mapred-slave)
        # to signify RM's readiness. Set our RM info in the KV to fulfill this
        # requirement.
        utils.initialize_kv_host()

        # We need to create the 'spark' user/group since we may not be
        # installing spark on this machine. This is needed so the history
        # server can access spark job history files in hdfs. Also add our
        # ubuntu user to the hadoop, mapred, and spark groups on this machine.
        get_layer_opts().add_users()

        set_state('apache-bigtop-resourcemanager.installed')
        hookenv.status_set('maintenance', 'resourcemanager installed')
    else:
        hookenv.status_set('waiting', 'waiting for namenode fqdn')
示例#38
0
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(
            available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']
        if mode.startswith('yarn'):
            spark_events = 'hdfs://{}'.format(dc.path('spark_events'))
        else:
            spark_events = 'file://{}'.format(dc.path('spark_events'))

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. "
                    "Using 1g default.",
                    level=hookenv.WARNING)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. "
                    "Using 1g default.",
                    level=hookenv.WARNING)
        else:
            executor_mem = req_executor_mem

        # Some spark applications look for envars in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
            env['SPARK_HOME'] = dc.path('spark_home')

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url':
            master_url,
            'spark::common::event_log_dir':
            spark_events,
            'spark::common::history_log_dir':
            spark_events,
            'spark::common::extra_lib_dirs':
            ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem':
            driver_mem,
            'spark::common::executor_mem':
            executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet.
        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        if unitdata.kv().get('spark.version.repo', False):
            hookenv.log(
                "An upgrade is available and the site.yaml has been "
                "configured. Run the 'reinstall' action to continue.",
                level=hookenv.INFO)
        else:
            bigtop.trigger_puppet()
            self.patch_worker_master_url(master_ip, master_url)

            # Packages don't create the event dir by default. Do it each time
            # spark is (re)installed to ensure location/perms are correct.
            self.configure_events_dir(mode)

        # Handle examples and Spark-Bench. Do this each time this method is
        # called in case we need to act on a new resource or user config.
        self.configure_examples()
        self.configure_sparkbench()
示例#39
0
    def configure(self, available_hosts, zk_units, peers):
        """
        This is the core logic of setting up spark.

        Two flags are needed:

          * Namenode exists aka HDFS is ready
          * Resource manager exists aka YARN is ready

        both flags are infered from the available hosts.

        :param dict available_hosts: Hosts that Spark should know about.
        """
        # Bootstrap spark
        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()

        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url': master_url,
            'spark::common::event_log_dir': events_log_dir,
            'spark::common::history_log_dir': events_log_dir,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()

        # Do this after our puppet bits in case puppet overrides needed perms
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip, master_url)

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        req_driver_mem = hookenv.config()['driver_memory']
        executor_mem = '1g'
        req_executor_mem = hookenv.config()['executor_memory']
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. Using 1g default.",
                            level=None)
        else:
            executor_mem = req_executor_mem

        spark_env = '/etc/spark/conf/spark-env.sh'
        utils.re_edit_in_place(spark_env, {
            r'.*SPARK_DRIVER_MEMORY.*': 'export SPARK_DRIVER_MEMORY={}'.format(driver_mem),
            r'.*SPARK_EXECUTOR_MEMORY.*': 'export SPARK_EXECUTOR_MEMORY={}'.format(executor_mem),
        }, append_non_matches=True)

        # Install SB (subsequent calls will reconfigure existing install)
        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
        self.install_benchmark()
示例#40
0
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']
        if mode.startswith('yarn'):
            spark_events = 'hdfs://{}'.format(dc.path('spark_events'))
        else:
            spark_events = 'file://{}'.format(dc.path('spark_events'))

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("driver_memory percentage in non-local mode. "
                            "Using 1g default.", level=hookenv.WARNING)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log("executor_memory percentage in non-local mode. "
                            "Using 1g default.", level=hookenv.WARNING)
        else:
            executor_mem = req_executor_mem

        # Some spark applications look for envars in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
            env['SPARK_HOME'] = dc.path('spark_home')

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url': master_url,
            'spark::common::event_log_dir': spark_events,
            'spark::common::history_log_dir': spark_events,
            'spark::common::extra_lib_dirs':
                ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem': driver_mem,
            'spark::common::executor_mem': executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet.
        # NB: during an upgrade, we configure the site.yaml, but do not
        # trigger puppet. The user must do that with the 'reinstall' action.
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        if unitdata.kv().get('spark.version.repo', False):
            hookenv.log("An upgrade is available and the site.yaml has been "
                        "configured. Run the 'reinstall' action to continue.",
                        level=hookenv.INFO)
        else:
            bigtop.trigger_puppet()
            self.patch_worker_master_url(master_ip, master_url)

            # Packages don't create the event dir by default. Do it each time
            # spark is (re)installed to ensure location/perms are correct.
            self.configure_events_dir(mode)

        # Handle examples and Spark-Bench. Do this each time this method is
        # called in case we need to act on a new resource or user config.
        self.configure_examples()
        self.configure_sparkbench()
class TestBigtopUnit(Harness):
    '''
    Unit tests for Bigtop class.

    '''

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def setUp(self, mock_ver, mock_hookenv):
        mock_ver.return_value = '1.2.0'
        super(TestBigtopUnit, self).setUp()
        self.bigtop = Bigtop()

    def test_init(self):
        '''
        Verify that the Bigtop class can init itself, and that it has some
        of the properties that we expect..

        '''
        # paths should be Path objects.
        self.assertEqual(type(self.bigtop.bigtop_base), Path)
        self.assertEqual(type(self.bigtop.site_yaml), Path)

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_puppet_modules')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_reverse_dns')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_localdomain')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_java')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_swap')
    @mock.patch('charms.layer.apache_bigtop_base.is_container')
    def test_install(self, mock_container, mock_swap, mock_java, mock_pin,
                     mock_local, mock_dns, mock_fetch, mock_puppet, mock_apply,
                     mock_hiera):
        '''
        Verify install calls expected class methods.

        '''
        mock_container.return_value = False
        self.bigtop.install()
        self.assertTrue(mock_swap.called)
        self.assertTrue(mock_java.called)
        self.assertTrue(mock_pin.called)
        self.assertTrue(mock_local.called)
        self.assertTrue(mock_dns.called)
        self.assertTrue(mock_fetch.called)
        self.assertTrue(mock_puppet.called)
        self.assertTrue(mock_apply.called)
        self.assertTrue(mock_hiera.called)

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    def test_refresh_bigtop_release(self, mock_pin, mock_fetch, mock_apply,
                                    mock_update):
        '''
        Verify refresh calls expected class methods.

        '''
        self.bigtop.refresh_bigtop_release()
        self.assertTrue(mock_pin.called)
        self.assertTrue(mock_fetch.called)
        self.assertTrue(mock_apply.called)
        self.assertTrue(mock_update.called)

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def test_get_repo_url(self, mock_ver, mock_lsb_release,
                          mock_utils, mock_hookenv):
        '''
        Verify that we setup an appropriate repository.

        '''
        mock_ver.return_value = '1.1.0'

        # non-ubuntu should throw an exception
        mock_lsb_release.return_value = {'DISTRIB_ID': 'centos'}
        self.assertRaises(
            BigtopError,
            self.bigtop.get_repo_url,
            '1.1.0')

        # 1.1.0 on trusty/non-power
        mock_utils.cpu_arch.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'}
        self.assertEqual(self.bigtop.get_repo_url('1.1.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.1.0/ubuntu/trusty/foo'))

        # 1.1.0 on trusty/power (should return vivid url)
        mock_utils.cpu_arch.return_value = 'ppc64le'
        self.assertEqual(self.bigtop.get_repo_url('1.1.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.1.0/ubuntu/vivid/ppc64el'))

        # 1.2.0 on xenial
        mock_ver.return_value = '1.2.0'
        mock_utils.cpu_arch.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'}
        self.assertEqual(self.bigtop.get_repo_url('1.2.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.2.0/ubuntu/16.04/foo'))

        # 1.2.1 on xenial/intel
        mock_hookenv.return_value = {'name': 'foo'}
        mock_ver.return_value = '1.2.1'
        mock_utils.cpu_arch.return_value = 'x86_64'
        self.assertEqual(self.bigtop.get_repo_url('1.2.1'),
                         ('http://repos.bigtop.apache.org/releases/'
                          '1.2.1/ubuntu/16.04/x86_64'))

        # 1.2.1 on xenial/non-intel
        mock_ver.return_value = '1.2.1'
        mock_utils.cpu_arch.return_value = 'foo'
        self.assertEqual(self.bigtop.get_repo_url('1.2.1'),
                         ('https://ci.bigtop.apache.org/job/Bigtop-1.2.1/'
                          'OS=ubuntu-16.04/lastSuccessfulBuild/artifact/output/apt'))

        # master on xenial/intel
        mock_ver.return_value = 'master'
        mock_utils.cpu_arch.return_value = 'x86_64'
        self.assertEqual(self.bigtop.get_repo_url('master'),
                         ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/'
                          'OS=ubuntu-16.04,label=docker-slave/ws/output/apt'))

        # master on xenial/non-intel
        mock_ver.return_value = 'master'
        mock_utils.cpu_arch.return_value = 'foo'
        self.assertEqual(self.bigtop.get_repo_url('master'),
                         ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/'
                          'OS=ubuntu-16.04-foo,label=docker-slave/ws/output/apt'))

        # test bad version on xenial should throw an exception
        self.assertRaises(
            BigtopError,
            self.bigtop.get_repo_url,
            '0.0.0')

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    def test_install_swap_when_swap_exists(self, mock_sub):
        '''
        Verify we do attempt to install swap space if it already exists.

        '''
        mock_sub.check_output.return_value = b"foo\nbar"
        mock_sub.reset_mock()
        self.bigtop.install_swap()

        # We reset the mock, so here we're verifying no other subprocess
        # calls were made.
        mock_sub.check_call.assert_not_called()

    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.fetch')
    @mock.patch('charms.layer.apache_bigtop_base.layer.options')
    def test_install_java(self, mock_options, mock_fetch,
                          mock_utils, mock_lsb_release):
        '''
        Test to verify that we install java when requested.

        '''
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial'}

        # Should be noop if install_java layer opt is not set.
        self.bigtop.options.get.return_value = ''
        self.bigtop.install_java()

        self.assertFalse(mock_fetch.add_source.called)
        self.assertFalse(mock_fetch.apt_update.called)
        self.assertFalse(mock_fetch.apt_install.called)
        self.assertFalse(mock_utils.re_edit_in_place.called)

        # Should apt install if install_java layer opt is set.
        self.bigtop.options.get.return_value = 'foo'
        print("options: {}".format(self.bigtop.options))
        self.bigtop.install_java()

        self.assertFalse(mock_fetch.add_source.called)
        self.assertFalse(mock_fetch.apt_update.called)
        self.assertTrue(mock_fetch.apt_install.called)
        self.assertTrue(mock_utils.re_edit_in_place.called)

        # On trusty, should add a ppa so that we can install Java 8.
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'trusty'}
        self.bigtop.install_java()
        self.assertTrue(mock_fetch.add_source.called)
        self.assertTrue(mock_fetch.apt_update.called)

    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_pin_bigtop_packages(self, mock_path):
        '''
        Verify the apt template is opened and written to a (mocked) file.

        '''
        mock_dst = mock.Mock()
        mock_path.return_value = mock_dst

        self.bigtop.pin_bigtop_packages(priority=100)
        self.assertTrue(mock_dst.write_text.called)

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_apt',
                new_callable=mock.PropertyMock)
    def test_update_bigtop_repo(self, mock_apt, mock_lsb_release, mock_sub):
        '''
        Verify a bigtop apt repository is added/removed.

        '''
        # non-ubuntu should not invoke a subprocess call
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'foo',
                                         'DISTRIB_ID': 'centos',
                                         'DISTRIB_RELEASE': '7'}
        self.bigtop.update_bigtop_repo()
        mock_sub.check_call.assert_not_called()

        # verify args when adding a repo on ubuntu
        mock_apt.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial',
                                         'DISTRIB_ID': 'ubuntu',
                                         'DISTRIB_RELEASE': '16.04'}
        self.bigtop.update_bigtop_repo()
        mock_sub.check_call.assert_called_with(
            ['add-apt-repository', '-yu', 'deb foo bigtop contrib'])

        # verify args when removing a repo on ubuntu
        self.bigtop.update_bigtop_repo(remove=True)
        mock_sub.check_call.assert_called_with(
            ['add-apt-repository', '-yur', 'deb foo bigtop contrib'])

        # verify we handle check_call errors
        class MockException(Exception):
            pass
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        mock_sub.check_call.side_effect = mock_raise
        self.bigtop.update_bigtop_repo()

    @mock.patch('charms.layer.apache_bigtop_base.get_package_version')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess.Popen')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    def test_check_bigtop_repo_package(self, mock_lsb_release, mock_sub,
                                       mock_hookenv, mock_pkg_ver):
        '''
        Verify bigtop repo package queries.

        '''
        # non-ubuntu should raise an error
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'foo',
                                         'DISTRIB_ID': 'centos',
                                         'DISTRIB_RELEASE': '7'}
        self.assertRaises(BigtopError,
                          self.bigtop.check_bigtop_repo_package,
                          'foo')

        # reset with ubuntu
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial',
                                         'DISTRIB_ID': 'ubuntu',
                                         'DISTRIB_RELEASE': '16.04'}

        madison_proc = mock.Mock()
        grep_proc = mock.Mock()

        # simulate a missing repo pkg
        grep_attrs = {'communicate.return_value': (b'', 'stderr')}
        grep_proc.configure_mock(**grep_attrs)

        # test a missing repo pkg (message should be logged)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = ''
        self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_called_once()
        mock_hookenv.reset_mock()

        # reset our grep args to simulate the repo pkg being found
        grep_attrs = {'communicate.return_value': (b'pkg|1|repo', 'stderr')}
        grep_proc.configure_mock(**grep_attrs)

        # test a missing installed pkg (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = ''
        self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

        # test repo and installed pkg versions are the same (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = '1'
        self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

        # test repo pkg is newer than installed pkg (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = '0'
        self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

    @mock.patch('charms.layer.apache_bigtop_base.socket')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    def test_check_reverse_dns(self, mock_hookenv, mock_utils,
                               mock_sub, mock_socket):
        '''
        Verify that we set the reverse_dns_ok state, and handle errors
        correctly.

        '''
        # Test the case where things succeed.
        mock_sub.check_output.return_value = b'domain'
        self.bigtop.check_reverse_dns()
        self.assertTrue(unitdata.kv().get('reverse_dns_ok'))

        # Test the case where we get an exception.
        mock_sub.check_output.return_value = b'localdomain'
        self.bigtop.check_reverse_dns()
        self.assertFalse(unitdata.kv().get('reverse_dns_ok'))

        class MockHError(Exception):
            pass

        def raise_herror(*args, **kwargs):
            raise MockHError('test')
        mock_socket.herror = MockHError
        mock_socket.gethostbyaddr = raise_herror

        self.bigtop.check_reverse_dns()
        self.assertFalse(unitdata.kv().get('reverse_dns_ok'))

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_fetch_bigtop_release(self, mock_path, mock_hookenv, mock_ver):
        '''Verify we raise an exception if an invalid release is specified.'''
        mock_hookenv.resource_get.return_value = False
        mock_ver.return_value = 'foo'
        self.assertRaises(
            BigtopError,
            self.bigtop.fetch_bigtop_release)

    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    def test_install_puppet_modules(self, mock_hookenv, mock_utils):
        '''Verify that we seem to install puppet modules correctly.'''
        mock_hookenv.charm_dir.return_value = '/tmp'

        def mock_run_as(user, *args):
            '''
            Verify that we run puppet as root.

            '''
            self.assertEqual(user, 'root')

        mock_utils.run_as.side_effect = mock_run_as
        self.bigtop.install_puppet_modules()

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.glob')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    def test_apply_patches(self, mock_chdir, mock_glob, mock_utils,
                           mock_hookenv):
        '''
        Verify that we apply patches in the correct order.

        '''
        mock_hookenv.charm_dir.return_value = '/tmp'

        reverse_sorted = ['foo', 'baz', 'bar']
        mock_glob.return_value = ['foo', 'baz', 'bar']

        def mock_run_as(*args):
            patch = args[-1]
            self.assertEqual(args[0], 'root')
            # Verify that we're running on a sorted list.
            self.assertTrue(patch.endswith(reverse_sorted.pop()))

        mock_utils.run_as.side_effect = mock_run_as

        self.bigtop.apply_patches()

    @mock.patch('charms.layer.apache_bigtop_base.yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_base')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_render_hiera_yaml(self, mock_path, mock_base, mock_yaml):
        '''
        Verify that we attempt to add the values that we expect our hiera
        object, before writing it out to a (mocked) yaml file.

        '''
        def mock_dump(hiera_yaml, *args, **kwargs):
            self.assertTrue(hiera_yaml.get(':yaml'))
            self.assertTrue(':datadir' in hiera_yaml[':yaml'])

        mock_yaml.dump.side_effect = mock_dump

        mock_dst = mock.Mock()
        mock_path.return_value = mock_dst
        mock_yaml.load.return_value = defaultdict(lambda: {})
        mock_base.__div__.side_effect = lambda rel: mock_base
        mock_base.__truediv__.side_effect = lambda rel: mock_base

        self.bigtop.render_hiera_yaml()

        # Verify that we attempt to write yaml::datadir to hieradata.
        self.assertTrue(mock_dst.write_text.called)

    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    @mock.patch('charms.layer.apache_bigtop_base.yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.site_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_render_site_yaml(self, mock_path, mock_site, mock_yaml, mock_run):
        '''
        Verify that we attempt to put together a plausible site yaml
        config, before writing it out to a (mocked) yaml file.

        '''

        # Setup
        mock_yaml.load.return_value = defaultdict(lambda: {})
        config = {
            'roles': None,
            'overrides': None,
            'hosts': None
        }

        def verify_yaml(yaml, *args, **kwargs):
            '''
            Verify that the dict we are trying to dump to yaml has the values
            that we expect.

            '''
            self.assertTrue('bigtop::bigtop_repo_uri' in yaml)
            if config['roles'] is None:
                self.assertFalse('bigtop::roles_enabled' in yaml)
            else:
                self.assertTrue('bigtop::roles_enabled' in yaml)
                self.assertTrue('bigtop::roles' in yaml)
                self.assertEqual(
                    yaml['bigtop::roles'],
                    sorted(config['roles'])
                )
            if config['overrides'] is not None:
                for key in config['overrides']:
                    self.assertTrue(yaml.get(key) == config['overrides'][key])

        mock_yaml.dump.side_effect = verify_yaml

        # Test various permutations of arguments passed in.
        for config_set in [
                {'roles': ['foo', 'bar', 'baz']},  # Test roles
                {'overrides': {'foo': 'bar'}}]:  # Test override
            config.update(config_set)

            # Test
            self.bigtop.render_site_yaml(
                roles=config['roles'],
                overrides=config['overrides'],
                hosts=config['hosts'])

            # Reset
            mock_yaml.load.return_value = defaultdict(lambda: {})
            config['roles'] = None
            config['overrides'] = None
            config['hosts'] = None

    def test_queue_puppet(self):
        '''Verify that we set the expected 'puppet queued' state.'''

        self.bigtop.queue_puppet()
        self.assertTrue(is_state('apache-bigtop-base.puppet_queued'))

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def test_handle_queued_puppet(self, mock_ver, mock_hookenv, mock_trigger):
        '''
        Verify that we attempt to call puppet when it has been queued, and
        then clear the queued state.

        '''
        set_state('apache-bigtop-base.puppet_queued')
        mock_ver.return_value = '1.2.0'
        Bigtop._handle_queued_puppet()
        self.assertTrue(mock_trigger.called)
        self.assertFalse(is_state('apache-bigtop-base.puppet_queued'))

    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    @mock.patch('charms.layer.apache_bigtop_base.unitdata')
    def test_trigger_puppet(self, mock_unit, mock_chdir, mock_utils):
        '''
        Test to verify that we attempt to trigger puppet correctly.

        '''
        def verify_utils_call(user, puppet, *args):
            self.assertEqual(user, 'root')
            self.assertEqual(puppet, 'puppet')

        mock_kv = mock.Mock()
        mock_unit.kv.return_value = mock_kv
        mock_kv.get.return_value = 'foo'

        mock_utils.run_as.side_effect = verify_utils_call

        self.bigtop.trigger_puppet()

        self.assertTrue(mock_utils.run_as.called)

        # TODO: verify the Java 1.7 logic.

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    def test_check_hdfs_setup(self, mock_run, mock_sub):
        '''
        Verify that our hdfs setup check works as expected, and handles
        errors as expected.

        '''
        class MockException(Exception):
            pass
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        for s in ['ubuntu', '   ubuntu  ', 'ubuntu  ', '  ubuntu']:
            mock_run.return_value = s
            self.assertTrue(self.bigtop.check_hdfs_setup())

        for s in ['foo', '   ', '', ' bar', 'notubuntu', 'ubuntu not ']:
            mock_run.return_value = s
            self.assertFalse(self.bigtop.check_hdfs_setup())

        mock_run.side_effect = mock_raise
        self.assertFalse(self.bigtop.check_hdfs_setup())

    @unittest.skip('noop')
    def test_spec(self):
        '''Nothing to test that the linter won't handle.'''

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    @mock.patch('charms.layer.apache_bigtop_base.chownr')
    @mock.patch('charms.layer.apache_bigtop_base.layer.options')
    def test_run_smoke_tests(self, mock_options, mock_ownr, mock_chdir,
                             mock_run, mock_sub):
        '''
        Verify that we attempt to run smoke tests correctly, and handle
        exceptions as expected.

        '''
        mock_options.return_value = {}
        # Returns None if bigtop isn't available.
        remove_state('bigtop.available')
        self.assertEqual(None, self.bigtop.run_smoke_tests())

        # Returns None if we don't pass in a 'smoke_components' arg
        set_state('bigtop.available')
        self.assertEqual(None, self.bigtop.run_smoke_tests())

        # Should return 'success' if all went well.
        self.assertEqual(
            self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']),
            'success'
        )

        # Should return error message if subprocess raised an Exception.
        class MockException(Exception):
            pass
        MockException.output = "test output"
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')
        mock_run.side_effect = mock_raise

        self.assertEqual(
            self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']),
            "test output"
        )

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    def test_reinstall_repo_packages(self, mock_sub, mock_trigger, mock_pin,
                                     mock_path, mock_hiera, mock_update):
        '''
        Verify that we attempt to trigger puppet during a reinstall, and handle
        exceptions as expected.

        '''
        class MockException(Exception):
            pass
        MockException.output = "test output"
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        # Should return error message if apt-get remove raised an Exception.
        mock_sub.check_call.side_effect = mock_raise
        self.assertEqual(
            self.bigtop.reinstall_repo_packages(remove_pkgs='foo bar-*'),
            "test output"
        )

        # Should call pin twice if trigger puppet fails (once to raise prio,
        # once again to drop it back down)
        mock_trigger.side_effect = mock_raise
        self.assertEqual(self.bigtop.reinstall_repo_packages(), 'failed')
        self.assertEqual(mock_pin.call_count, 2)

        # Should return 'success' if all went well.
        mock_trigger.side_effect = None
        self.assertEqual(self.bigtop.reinstall_repo_packages(), 'success')

    def test_get_ip_for_interface(self):
        '''
        Test to verify that our get_ip_for_interface method does sensible
        things.

        '''
        ip = self.bigtop.get_ip_for_interface('lo')
        self.assertEqual(ip, '127.0.0.1')

        ip = self.bigtop.get_ip_for_interface('127.0.0.0/24')
        self.assertEqual(ip, '127.0.0.1')

        # If passed 0.0.0.0, or something similar, the function should
        # treat it as a special case, and return what it was passed.
        for i in ['0.0.0.0', '0.0.0.0/0', '0/0', '::']:
            ip = self.bigtop.get_ip_for_interface(i)
            self.assertEqual(ip, i)

        self.assertRaises(
            BigtopError,
            self.bigtop.get_ip_for_interface,
            '2.2.2.0/24')

        self.assertRaises(
            BigtopError,
            self.bigtop.get_ip_for_interface,
            'foo')
class TestBigtopUnit(Harness):
    '''
    Unit tests for Bigtop class.

    '''
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def setUp(self, mock_ver, mock_hookenv):
        mock_ver.return_value = '1.2.0'
        super(TestBigtopUnit, self).setUp()
        self.bigtop = Bigtop()

    def test_init(self):
        '''
        Verify that the Bigtop class can init itself, and that it has some
        of the properties that we expect..

        '''
        # paths should be Path objects.
        self.assertEqual(type(self.bigtop.bigtop_base), Path)
        self.assertEqual(type(self.bigtop.site_yaml), Path)

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_puppet_modules'
                )
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_reverse_dns')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.check_localdomain')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_java')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.install_swap')
    @mock.patch('charms.layer.apache_bigtop_base.is_container')
    def test_install(self, mock_container, mock_swap, mock_java, mock_pin,
                     mock_local, mock_dns, mock_fetch, mock_puppet, mock_apply,
                     mock_hiera):
        '''
        Verify install calls expected class methods.

        '''
        mock_container.return_value = False
        self.bigtop.install()
        self.assertTrue(mock_swap.called)
        self.assertTrue(mock_java.called)
        self.assertTrue(mock_pin.called)
        self.assertTrue(mock_local.called)
        self.assertTrue(mock_dns.called)
        self.assertTrue(mock_fetch.called)
        self.assertTrue(mock_puppet.called)
        self.assertTrue(mock_apply.called)
        self.assertTrue(mock_hiera.called)

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.apply_patches')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.fetch_bigtop_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    def test_refresh_bigtop_release(self, mock_pin, mock_fetch, mock_apply,
                                    mock_update):
        '''
        Verify refresh calls expected class methods.

        '''
        self.bigtop.refresh_bigtop_release()
        self.assertTrue(mock_pin.called)
        self.assertTrue(mock_fetch.called)
        self.assertTrue(mock_apply.called)
        self.assertTrue(mock_update.called)

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def test_get_repo_url(self, mock_ver, mock_lsb_release, mock_utils,
                          mock_hookenv):
        '''
        Verify that we setup an appropriate repository.

        '''
        mock_ver.return_value = '1.1.0'

        # non-ubuntu should throw an exception
        mock_lsb_release.return_value = {'DISTRIB_ID': 'centos'}
        self.assertRaises(BigtopError, self.bigtop.get_repo_url, '1.1.0')

        # 1.1.0 on trusty/non-power
        mock_utils.cpu_arch.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'}
        self.assertEqual(self.bigtop.get_repo_url('1.1.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.1.0/ubuntu/trusty/foo'))

        # 1.1.0 on trusty/power (should return vivid url)
        mock_utils.cpu_arch.return_value = 'ppc64le'
        self.assertEqual(self.bigtop.get_repo_url('1.1.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.1.0/ubuntu/vivid/ppc64el'))

        # 1.2.0 on xenial
        mock_ver.return_value = '1.2.0'
        mock_utils.cpu_arch.return_value = 'foo'
        mock_lsb_release.return_value = {'DISTRIB_ID': 'ubuntu'}
        self.assertEqual(self.bigtop.get_repo_url('1.2.0'),
                         ('http://bigtop-repos.s3.amazonaws.com/releases/'
                          '1.2.0/ubuntu/16.04/foo'))

        # 1.2.1 on xenial/intel
        mock_hookenv.return_value = {'name': 'foo'}
        mock_ver.return_value = '1.2.1'
        mock_utils.cpu_arch.return_value = 'x86_64'
        self.assertEqual(self.bigtop.get_repo_url('1.2.1'),
                         ('http://repos.bigtop.apache.org/releases/'
                          '1.2.1/ubuntu/16.04/x86_64'))

        # 1.2.1 on xenial/non-intel
        mock_ver.return_value = '1.2.1'
        mock_utils.cpu_arch.return_value = 'foo'
        self.assertEqual(
            self.bigtop.get_repo_url('1.2.1'),
            ('https://ci.bigtop.apache.org/job/Bigtop-1.2.1/'
             'OS=ubuntu-16.04/lastSuccessfulBuild/artifact/output/apt'))

        # master on xenial/intel
        mock_ver.return_value = 'master'
        mock_utils.cpu_arch.return_value = 'x86_64'
        self.assertEqual(
            self.bigtop.get_repo_url('master'),
            ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/'
             'OS=ubuntu-16.04,label=docker-slave/ws/output/apt'))

        # master on xenial/non-intel
        mock_ver.return_value = 'master'
        mock_utils.cpu_arch.return_value = 'foo'
        self.assertEqual(
            self.bigtop.get_repo_url('master'),
            ('https://ci.bigtop.apache.org/job/Bigtop-trunk-repos/'
             'OS=ubuntu-16.04-foo,label=docker-slave/ws/output/apt'))

        # test bad version on xenial should throw an exception
        self.assertRaises(BigtopError, self.bigtop.get_repo_url, '0.0.0')

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    def test_install_swap_when_swap_exists(self, mock_sub):
        '''
        Verify we do attempt to install swap space if it already exists.

        '''
        mock_sub.check_output.return_value = b"foo\nbar"
        mock_sub.reset_mock()
        self.bigtop.install_swap()

        # We reset the mock, so here we're verifying no other subprocess
        # calls were made.
        mock_sub.check_call.assert_not_called()

    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.fetch')
    @mock.patch('charms.layer.apache_bigtop_base.layer.options')
    def test_install_java(self, mock_options, mock_fetch, mock_utils,
                          mock_lsb_release):
        '''
        Test to verify that we install java when requested.

        '''
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'xenial'}

        # Should be noop if bigtop_jdk not set.
        self.bigtop.options.get.return_value = ''
        self.bigtop.install_java()

        self.assertFalse(mock_fetch.add_source.called)
        self.assertFalse(mock_fetch.apt_update.called)
        self.assertFalse(mock_fetch.apt_install.called)
        self.assertFalse(mock_utils.re_edit_in_place.called)

        # Should add ppa if we have set bigtop_jdk.
        self.bigtop.options.get.return_value = 'foo'
        print("options: {}".format(self.bigtop.options))
        self.bigtop.install_java()

        self.assertFalse(mock_fetch.add_source.called)
        self.assertFalse(mock_fetch.apt_update.called)
        self.assertTrue(mock_fetch.apt_install.called)
        self.assertTrue(mock_utils.re_edit_in_place.called)

        # On trusty, should add a ppa so that we can install Java 8.
        mock_lsb_release.return_value = {'DISTRIB_CODENAME': 'trusty'}
        self.bigtop.install_java()
        self.assertTrue(mock_fetch.add_source.called)
        self.assertTrue(mock_fetch.apt_update.called)

    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_pin_bigtop_packages(self, mock_path):
        '''
        Verify the apt template is opened and written to a (mocked) file.

        '''
        mock_dst = mock.Mock()
        mock_path.return_value = mock_dst

        self.bigtop.pin_bigtop_packages(priority=100)
        self.assertTrue(mock_dst.write_text.called)

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_apt',
                new_callable=mock.PropertyMock)
    def test_update_bigtop_repo(self, mock_apt, mock_lsb_release, mock_sub):
        '''
        Verify a bigtop apt repository is added/removed.

        '''
        # non-ubuntu should not invoke a subprocess call
        mock_lsb_release.return_value = {
            'DISTRIB_CODENAME': 'foo',
            'DISTRIB_ID': 'centos',
            'DISTRIB_RELEASE': '7'
        }
        self.bigtop.update_bigtop_repo()
        mock_sub.check_call.assert_not_called()

        # verify args when adding a repo on ubuntu
        mock_apt.return_value = 'foo'
        mock_lsb_release.return_value = {
            'DISTRIB_CODENAME': 'xenial',
            'DISTRIB_ID': 'ubuntu',
            'DISTRIB_RELEASE': '16.04'
        }
        self.bigtop.update_bigtop_repo()
        mock_sub.check_call.assert_called_with(
            ['add-apt-repository', '-yu', 'deb foo bigtop contrib'])

        # verify args when removing a repo on ubuntu
        self.bigtop.update_bigtop_repo(remove=True)
        mock_sub.check_call.assert_called_with(
            ['add-apt-repository', '-yur', 'deb foo bigtop contrib'])

        # verify we handle check_call errors
        class MockException(Exception):
            pass

        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        mock_sub.check_call.side_effect = mock_raise
        self.bigtop.update_bigtop_repo()

    @mock.patch('charms.layer.apache_bigtop_base.get_package_version')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess.Popen')
    @mock.patch('charms.layer.apache_bigtop_base.lsb_release')
    def test_check_bigtop_repo_package(self, mock_lsb_release, mock_sub,
                                       mock_hookenv, mock_pkg_ver):
        '''
        Verify bigtop repo package queries.

        '''
        # non-ubuntu should raise an error
        mock_lsb_release.return_value = {
            'DISTRIB_CODENAME': 'foo',
            'DISTRIB_ID': 'centos',
            'DISTRIB_RELEASE': '7'
        }
        self.assertRaises(BigtopError, self.bigtop.check_bigtop_repo_package,
                          'foo')

        # reset with ubuntu
        mock_lsb_release.return_value = {
            'DISTRIB_CODENAME': 'xenial',
            'DISTRIB_ID': 'ubuntu',
            'DISTRIB_RELEASE': '16.04'
        }

        madison_proc = mock.Mock()
        grep_proc = mock.Mock()

        # simulate a missing repo pkg
        grep_attrs = {'communicate.return_value': (b'', 'stderr')}
        grep_proc.configure_mock(**grep_attrs)

        # test a missing repo pkg (message should be logged)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = ''
        self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_called_once()
        mock_hookenv.reset_mock()

        # reset our grep args to simulate the repo pkg being found
        grep_attrs = {'communicate.return_value': (b'pkg|1|repo', 'stderr')}
        grep_proc.configure_mock(**grep_attrs)

        # test a missing installed pkg (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = ''
        self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

        # test repo and installed pkg versions are the same (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = '1'
        self.assertEqual(None, self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

        # test repo pkg is newer than installed pkg (no log message)
        mock_sub.return_value = madison_proc
        mock_sub.return_value = grep_proc
        mock_pkg_ver.return_value = '0'
        self.assertEqual('1', self.bigtop.check_bigtop_repo_package('foo'))
        mock_hookenv.log.assert_not_called()
        mock_hookenv.reset_mock()

    @mock.patch('charms.layer.apache_bigtop_base.socket')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    def test_check_reverse_dns(self, mock_hookenv, mock_utils, mock_sub,
                               mock_socket):
        '''
        Verify that we set the reverse_dns_ok state, and handle errors
        correctly.

        '''
        # Test the case where things succeed.
        mock_sub.check_output.return_value = b'domain'
        self.bigtop.check_reverse_dns()
        self.assertTrue(unitdata.kv().get('reverse_dns_ok'))

        # Test the case where we get an exception.
        mock_sub.check_output.return_value = b'localdomain'
        self.bigtop.check_reverse_dns()
        self.assertFalse(unitdata.kv().get('reverse_dns_ok'))

        class MockHError(Exception):
            pass

        def raise_herror(*args, **kwargs):
            raise MockHError('test')

        mock_socket.herror = MockHError
        mock_socket.gethostbyaddr = raise_herror

        self.bigtop.check_reverse_dns()
        self.assertFalse(unitdata.kv().get('reverse_dns_ok'))

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_fetch_bigtop_release(self, mock_path, mock_hookenv, mock_ver):
        '''Verify we raise an exception if an invalid release is specified.'''
        mock_hookenv.resource_get.return_value = False
        mock_ver.return_value = 'foo'
        self.assertRaises(BigtopError, self.bigtop.fetch_bigtop_release)

    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    def test_install_puppet_modules(self, mock_hookenv, mock_utils):
        '''Verify that we seem to install puppet modules correctly.'''
        mock_hookenv.charm_dir.return_value = '/tmp'

        def mock_run_as(user, *args):
            '''
            Verify that we run puppet as root.

            '''
            self.assertEqual(user, 'root')

        mock_utils.run_as.side_effect = mock_run_as
        self.bigtop.install_puppet_modules()

    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.glob')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    def test_apply_patches(self, mock_chdir, mock_glob, mock_utils,
                           mock_hookenv):
        '''
        Verify that we apply patches in the correct order.

        '''
        mock_hookenv.charm_dir.return_value = '/tmp'

        reverse_sorted = ['foo', 'baz', 'bar']
        mock_glob.return_value = ['foo', 'baz', 'bar']

        def mock_run_as(*args):
            patch = args[-1]
            self.assertEqual(args[0], 'root')
            # Verify that we're running on a sorted list.
            self.assertTrue(patch.endswith(reverse_sorted.pop()))

        mock_utils.run_as.side_effect = mock_run_as

        self.bigtop.apply_patches()

    @mock.patch('charms.layer.apache_bigtop_base.yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_base')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_render_hiera_yaml(self, mock_path, mock_base, mock_yaml):
        '''
        Verify that we attempt to add the values that we expect our hiera
        object, before writing it out to a (mocked) yaml file.

        '''
        def mock_dump(hiera_yaml, *args, **kwargs):
            self.assertTrue(hiera_yaml.get(':yaml'))
            self.assertTrue(':datadir' in hiera_yaml[':yaml'])

        mock_yaml.dump.side_effect = mock_dump

        mock_dst = mock.Mock()
        mock_path.return_value = mock_dst
        mock_yaml.load.return_value = defaultdict(lambda: {})
        mock_base.__div__.side_effect = lambda rel: mock_base
        mock_base.__truediv__.side_effect = lambda rel: mock_base

        self.bigtop.render_hiera_yaml()

        # Verify that we attempt to write yaml::datadir to hieradata.
        self.assertTrue(mock_dst.write_text.called)

    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    @mock.patch('charms.layer.apache_bigtop_base.yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.site_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    def test_render_site_yaml(self, mock_path, mock_site, mock_yaml, mock_run):
        '''
        Verify that we attempt to put together a plausible site yaml
        config, before writing it out to a (mocked) yaml file.

        '''

        # Setup
        mock_yaml.load.return_value = defaultdict(lambda: {})
        config = {'roles': None, 'overrides': None, 'hosts': None}

        def verify_yaml(yaml, *args, **kwargs):
            '''
            Verify that the dict we are trying to dump to yaml has the values
            that we expect.

            '''
            self.assertTrue('bigtop::bigtop_repo_uri' in yaml)
            if config['roles'] is None:
                self.assertFalse('bigtop::roles_enabled' in yaml)
            else:
                self.assertTrue('bigtop::roles_enabled' in yaml)
                self.assertTrue('bigtop::roles' in yaml)
                self.assertEqual(yaml['bigtop::roles'],
                                 sorted(config['roles']))
            if config['overrides'] is not None:
                for key in config['overrides']:
                    self.assertTrue(yaml.get(key) == config['overrides'][key])

        mock_yaml.dump.side_effect = verify_yaml

        # Test various permutations of arguments passed in.
        for config_set in [
            {
                'roles': ['foo', 'bar', 'baz']
            },  # Test roles
            {
                'overrides': {
                    'foo': 'bar'
                }
            }
        ]:  # Test override
            config.update(config_set)

            # Test
            self.bigtop.render_site_yaml(roles=config['roles'],
                                         overrides=config['overrides'],
                                         hosts=config['hosts'])

            # Reset
            mock_yaml.load.return_value = defaultdict(lambda: {})
            config['roles'] = None
            config['overrides'] = None
            config['hosts'] = None

    def test_queue_puppet(self):
        '''Verify that we set the expected 'puppet queued' state.'''

        self.bigtop.queue_puppet()
        self.assertTrue(is_state('apache-bigtop-base.puppet_queued'))

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet')
    @mock.patch('charms.layer.apache_bigtop_base.hookenv')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.bigtop_version',
                new_callable=mock.PropertyMock)
    def test_handle_queued_puppet(self, mock_ver, mock_hookenv, mock_trigger):
        '''
        Verify that we attempt to call puppet when it has been queued, and
        then clear the queued state.

        '''
        set_state('apache-bigtop-base.puppet_queued')
        mock_ver.return_value = '1.2.0'
        Bigtop._handle_queued_puppet()
        self.assertTrue(mock_trigger.called)
        self.assertFalse(is_state('apache-bigtop-base.puppet_queued'))

    @mock.patch('charms.layer.apache_bigtop_base.utils')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    @mock.patch('charms.layer.apache_bigtop_base.unitdata')
    def test_trigger_puppet(self, mock_unit, mock_chdir, mock_utils):
        '''
        Test to verify that we attempt to trigger puppet correctly.

        '''
        def verify_utils_call(user, puppet, *args):
            self.assertEqual(user, 'root')
            self.assertEqual(puppet, 'puppet')

        mock_kv = mock.Mock()
        mock_unit.kv.return_value = mock_kv
        mock_kv.get.return_value = 'foo'

        mock_utils.run_as.side_effect = verify_utils_call

        self.bigtop.trigger_puppet()

        self.assertTrue(mock_utils.run_as.called)

        # TODO: verify the Java 1.7 logic.

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    def test_check_hdfs_setup(self, mock_run, mock_sub):
        '''
        Verify that our hdfs setup check works as expected, and handles
        errors as expected.

        '''
        class MockException(Exception):
            pass

        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        for s in ['ubuntu', '   ubuntu  ', 'ubuntu  ', '  ubuntu']:
            mock_run.return_value = s
            self.assertTrue(self.bigtop.check_hdfs_setup())

        for s in ['foo', '   ', '', ' bar', 'notubuntu', 'ubuntu not ']:
            mock_run.return_value = s
            self.assertFalse(self.bigtop.check_hdfs_setup())

        mock_run.side_effect = mock_raise
        self.assertFalse(self.bigtop.check_hdfs_setup())

    @unittest.skip('noop')
    def test_spec(self):
        '''Nothing to test that the linter won't handle.'''

    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    @mock.patch('charms.layer.apache_bigtop_base.utils.run_as')
    @mock.patch('charms.layer.apache_bigtop_base.chdir')
    @mock.patch('charms.layer.apache_bigtop_base.chownr')
    @mock.patch('charms.layer.apache_bigtop_base.layer.options')
    def test_run_smoke_tests(self, mock_options, mock_ownr, mock_chdir,
                             mock_run, mock_sub):
        '''
        Verify that we attempt to run smoke tests correctly, and handle
        exceptions as expected.

        '''
        mock_options.return_value = {}
        # Returns None if bigtop isn't available.
        remove_state('bigtop.available')
        self.assertEqual(None, self.bigtop.run_smoke_tests())

        # Returns None if we don't pass in a 'smoke_components' arg
        set_state('bigtop.available')
        self.assertEqual(None, self.bigtop.run_smoke_tests())

        # Should return 'success' if all went well.
        self.assertEqual(
            self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']),
            'success')

        # Should return error message if subprocess raised an Exception.
        class MockException(Exception):
            pass

        MockException.output = "test output"
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        mock_run.side_effect = mock_raise

        self.assertEqual(
            self.bigtop.run_smoke_tests(smoke_components=['foo', 'bar']),
            "test output")

    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.update_bigtop_repo')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.render_hiera_yaml')
    @mock.patch('charms.layer.apache_bigtop_base.Path')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.pin_bigtop_packages')
    @mock.patch('charms.layer.apache_bigtop_base.Bigtop.trigger_puppet')
    @mock.patch('charms.layer.apache_bigtop_base.subprocess')
    def test_reinstall_repo_packages(self, mock_sub, mock_trigger, mock_pin,
                                     mock_path, mock_hiera, mock_update):
        '''
        Verify that we attempt to trigger puppet during a reinstall, and handle
        exceptions as expected.

        '''
        class MockException(Exception):
            pass

        MockException.output = "test output"
        mock_sub.CalledProcessError = MockException

        def mock_raise(*args, **kwargs):
            raise MockException('foo!')

        # Should return error message if apt-get remove raised an Exception.
        mock_sub.check_call.side_effect = mock_raise
        self.assertEqual(
            self.bigtop.reinstall_repo_packages(remove_pkgs='foo bar-*'),
            "test output")

        # Should call pin twice if trigger puppet fails (once to raise prio,
        # once again to drop it back down)
        mock_trigger.side_effect = mock_raise
        self.assertEqual(self.bigtop.reinstall_repo_packages(), 'failed')
        self.assertEqual(mock_pin.call_count, 2)

        # Should return 'success' if all went well.
        mock_trigger.side_effect = None
        self.assertEqual(self.bigtop.reinstall_repo_packages(), 'success')

    def test_get_ip_for_interface(self):
        '''
        Test to verify that our get_ip_for_interface method does sensible
        things.

        '''
        ip = self.bigtop.get_ip_for_interface('lo')
        self.assertEqual(ip, '127.0.0.1')

        ip = self.bigtop.get_ip_for_interface('127.0.0.0/24')
        self.assertEqual(ip, '127.0.0.1')

        # If passed 0.0.0.0, or something similar, the function should
        # treat it as a special case, and return what it was passed.
        for i in ['0.0.0.0', '0.0.0.0/0', '0/0', '::']:
            ip = self.bigtop.get_ip_for_interface(i)
            self.assertEqual(ip, i)

        self.assertRaises(BigtopError, self.bigtop.get_ip_for_interface,
                          '2.2.2.0/24')

        self.assertRaises(BigtopError, self.bigtop.get_ip_for_interface, 'foo')
示例#43
0
    def configure(self, available_hosts, zk_units, peers, extra_libs):
        """
        This is the core logic of setting up spark.

        :param dict available_hosts: Hosts that Spark should know about.
        :param list zk_units: List of Zookeeper dicts with host/port info.
        :param list peers: List of Spark peer tuples (unit name, IP).
        :param list extra_libs: List of extra lib paths for driver/executors.
        """
        # Bootstrap spark
        if not unitdata.kv().get('spark.bootstrapped', False):
            self.setup()
            unitdata.kv().set('spark.bootstrapped', True)

        # Set KV based on connected applications
        unitdata.kv().set('zookeeper.units', zk_units)
        unitdata.kv().set('sparkpeer.units', peers)
        unitdata.kv().flush(True)

        # Get our config ready
        dc = self.dist_config
        events_log_dir = 'file://{}'.format(dc.path('spark_events'))
        mode = hookenv.config()['spark_execution_mode']
        master_ip = utils.resolve_private_address(
            available_hosts['spark-master'])
        master_url = self.get_master_url(master_ip)
        req_driver_mem = hookenv.config()['driver_memory']
        req_executor_mem = hookenv.config()['executor_memory']

        # handle tuning options that may be set as percentages
        driver_mem = '1g'
        executor_mem = '1g'
        if req_driver_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_driver_mem.strip('%')) / 100
                driver_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "driver_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            driver_mem = req_driver_mem

        if req_executor_mem.endswith('%'):
            if mode == 'standalone' or mode.startswith('local'):
                mem_mb = host.get_total_ram() / 1024 / 1024
                req_percentage = float(req_executor_mem.strip('%')) / 100
                executor_mem = str(int(mem_mb * req_percentage)) + 'm'
            else:
                hookenv.log(
                    "executor_memory percentage in non-local mode. Using 1g default.",
                    level=None)
        else:
            executor_mem = req_executor_mem

        # Setup hosts dict
        hosts = {
            'spark': master_ip,
        }
        if 'namenode' in available_hosts:
            hosts['namenode'] = available_hosts['namenode']
            events_log_dir = self.setup_hdfs_logs()
        else:
            # Bigtop includes a default hadoop_head_node if we do not specify
            # any namenode info. To ensure spark standalone doesn't get
            # invalid hadoop config, set our NN to an empty string.
            hosts['namenode'] = ''
        if 'resourcemanager' in available_hosts:
            hosts['resourcemanager'] = available_hosts['resourcemanager']

        # Setup roles dict. We always include the history server and client.
        # Determine other roles based on our execution mode.
        roles = ['spark-history-server', 'spark-client']
        if mode == 'standalone':
            roles.append('spark-master')
            roles.append('spark-worker')
        elif mode.startswith('yarn'):
            roles.append('spark-on-yarn')
            roles.append('spark-yarn-slave')

        # Setup overrides dict
        override = {
            'spark::common::master_url':
            master_url,
            'spark::common::event_log_dir':
            events_log_dir,
            'spark::common::history_log_dir':
            events_log_dir,
            'spark::common::extra_lib_dirs':
            ':'.join(extra_libs) if extra_libs else None,
            'spark::common::driver_mem':
            driver_mem,
            'spark::common::executor_mem':
            executor_mem,
        }
        if zk_units:
            zks = []
            for unit in zk_units:
                ip = utils.resolve_private_address(unit['host'])
                zks.append("%s:%s" % (ip, unit['port']))

            zk_connect = ",".join(zks)
            override['spark::common::zookeeper_connection_string'] = zk_connect
        else:
            override['spark::common::zookeeper_connection_string'] = None

        # Create our site.yaml and trigger puppet
        bigtop = Bigtop()
        bigtop.render_site_yaml(hosts, roles, override)
        bigtop.trigger_puppet()

        # Do this after our puppet bits in case puppet overrides needed perms
        if 'namenode' not in available_hosts:
            # Local event dir (not in HDFS) needs to be 777 so non-spark
            # users can write job history there. It needs to be g+s so
            # all entries will be readable by spark (in the spark group).
            # It needs to be +t so users cannot remove files they don't own.
            dc.path('spark_events').chmod(0o3777)

        self.patch_worker_master_url(master_ip, master_url)

        # Install SB (subsequent calls will reconfigure existing install)
        # SparkBench looks for the spark master in /etc/environment
        with utils.environment_edit_in_place('/etc/environment') as env:
            env['MASTER'] = master_url
        self.install_benchmark()