def reinstall_spark(force=False): """ Gather the state of our deployment and (re)install when leaders, hadoop, sparkpeers, or zookeepers change. In the future this should also fire when Cassandra or any other storage comes or goes. Config changed events will also call this method, but that is invoked with a separate handler below. Use a deployment-matrix dict to track changes and (re)install as needed. """ spark_master_host = leadership.leader_get('master-fqdn') if not spark_master_host: hookenv.status_set('maintenance', 'juju leader not elected yet') return mode = hookenv.config()['spark_execution_mode'] peers = None zks = None # If mode is standalone and ZK is ready, we are in HA. Do not consider # the master_host from juju leadership in our matrix. ZK handles this. if (mode == 'standalone' and is_state('zookeeper.ready')): spark_master_host = '' zk = RelationBase.from_state('zookeeper.ready') zks = zk.zookeepers() # peers are only used to set our MASTER_URL in standalone HA mode peers = get_spark_peers() # Construct a deployment matrix sample_data = hookenv.resource_get('sample-data') deployment_matrix = { 'hdfs_ready': is_state('hadoop.hdfs.ready'), 'peers': peers, 'sample_data': host.file_hash(sample_data) if sample_data else None, 'spark_master': spark_master_host, 'yarn_ready': is_state('hadoop.yarn.ready'), 'zookeepers': zks, } # No-op if we are not forcing a reinstall or our matrix is unchanged. if not (force or data_changed('deployment_matrix', deployment_matrix)): report_status() return # (Re)install based on our execution mode hookenv.status_set('maintenance', 'configuring spark in {} mode'.format(mode)) hookenv.log("Configuring spark with deployment matrix: {}".format(deployment_matrix)) if mode.startswith('yarn') and is_state('hadoop.yarn.ready'): install_spark_yarn() elif mode.startswith('local') or mode == 'standalone': install_spark_standalone(zks, peers) else: # Something's wrong (probably requested yarn without yarn.ready). remove_state('spark.started') report_status() return # restart services to pick up possible config changes spark = Spark() spark.stop() spark.start() set_state('spark.started') report_status()
def reinstall_spark(): """ This is tricky. We want to fire on config or leadership changes, or when hadoop, sparkpeers, or zookeepers come and go. In the future this should fire when Cassandra or any other storage comes or goes. We always fire this method (or rather, when bigtop is ready and juju has elected a master). We then build a deployment-matrix and (re)install as things change. """ spark_master_host = leadership.leader_get('master-fqdn') if not spark_master_host: hookenv.status_set('maintenance', 'juju leader not elected yet') return mode = hookenv.config()['spark_execution_mode'] peers = None zks = None # If mode is standalone and ZK is ready, we are in HA. Do not consider # the master_host from juju leadership in our matrix. ZK handles this. if (mode == 'standalone' and is_state('zookeeper.ready')): spark_master_host = '' zk = RelationBase.from_state('zookeeper.ready') zks = zk.zookeepers() # peers are only used to set our MASTER_URL in standalone HA mode peers = get_spark_peers() deployment_matrix = { 'spark_master': spark_master_host, 'yarn_ready': is_state('hadoop.yarn.ready'), 'hdfs_ready': is_state('hadoop.hdfs.ready'), 'zookeepers': zks, 'peers': peers, } # If neither config nor our matrix is changing, there is nothing to do. if not (is_state('config.changed') or data_changed('deployment_matrix', deployment_matrix)): return # (Re)install based on our execution mode hookenv.status_set('maintenance', 'configuring spark in {} mode'.format(mode)) hookenv.log("Configuring spark with deployment matrix: {}".format(deployment_matrix)) if mode.startswith('yarn') and is_state('hadoop.yarn.ready'): install_spark_yarn() elif mode.startswith('local') or mode == 'standalone': install_spark_standalone(zks, peers) else: # Something's wrong (probably requested yarn without yarn.ready). remove_state('spark.started') report_status() return # restart services to pick up possible config changes spark = Spark() spark.stop() spark.start() set_state('spark.started') report_status()
def reinstall_spark(force=False): """ Gather the state of our deployment and (re)install when leaders, hadoop, sparkpeers, or zookeepers change. In the future this should also fire when Cassandra or any other storage comes or goes. Config changed events will also call this method, but that is invoked with a separate handler below. Use a deployment-matrix dict to track changes and (re)install as needed. """ spark_master_host = leadership.leader_get('master-fqdn') if not spark_master_host: hookenv.status_set('maintenance', 'juju leader not elected yet') return mode = hookenv.config()['spark_execution_mode'] peers = None zks = None # If mode is standalone and ZK is ready, we are in HA. Do not consider # the master_host from juju leadership in our matrix. ZK handles this. if (mode == 'standalone' and is_state('zookeeper.ready')): spark_master_host = '' zk = RelationBase.from_state('zookeeper.ready') zks = zk.zookeepers() # peers are only used to set our MASTER_URL in standalone HA mode peers = get_spark_peers() # Construct a deployment matrix sample_data = hookenv.resource_get('sample-data') deployment_matrix = { 'hdfs_ready': is_state('hadoop.hdfs.ready'), 'peers': peers, 'sample_data': host.file_hash(sample_data) if sample_data else None, 'spark_master': spark_master_host, 'yarn_ready': is_state('hadoop.yarn.ready'), 'zookeepers': zks, } # No-op if we are not forcing a reinstall or our matrix is unchanged. if not (force or data_changed('deployment_matrix', deployment_matrix)): report_status() return # (Re)install based on our execution mode hookenv.status_set('maintenance', 'configuring spark in {} mode'.format(mode)) hookenv.log("Configuring spark with deployment matrix: {}".format( deployment_matrix)) if mode.startswith('yarn') and is_state('hadoop.yarn.ready'): install_spark_yarn() elif mode.startswith('local') or mode == 'standalone': install_spark_standalone(zks, peers) else: # Something's wrong (probably requested yarn without yarn.ready). remove_state('spark.started') report_status() return # restart services to pick up possible config changes spark = Spark() spark.stop() spark.start() set_state('spark.started') report_status()