def configure_hdfs_base(self, host, port): dc = self.hadoop_base.dist_config core_site = dc.path('hadoop_conf') / 'core-site.xml' with utils.xmlpropmap_edit_in_place(core_site) as props: if host and port: props['fs.defaultFS'] = "hdfs://{host}:{port}".format(host=host, port=port) props['hadoop.proxyuser.hue.hosts'] = "*" props['hadoop.proxyuser.hue.groups'] = "*" props['hadoop.proxyuser.oozie.groups'] = '*' props['hadoop.proxyuser.oozie.hosts'] = '*' if 'lzo' in self.hadoop_base.resources: props['io.compression.codecs'] = ('org.apache.hadoop.io.compress.GzipCodec, ' 'org.apache.hadoop.io.compress.DefaultCodec, ' 'org.apache.hadoop.io.compress.BZip2Codec, ' 'org.apache.hadoop.io.compress.SnappyCodec, ' 'com.hadoop.compression.lzo.LzoCodec, ' 'com.hadoop.compression.lzo.LzopCodec') props['io.compression.codec.lzo.class'] = 'com.hadoop.compression.lzo.LzoCodec' else: props['io.compression.codecs'] = ('org.apache.hadoop.io.compress.GzipCodec, ' 'org.apache.hadoop.io.compress.DefaultCodec, ' 'org.apache.hadoop.io.compress.BZip2Codec, ' 'org.apache.hadoop.io.compress.SnappyCodec') hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.webhdfs.enabled'] = "true" props['dfs.namenode.name.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name' props['dfs.datanode.data.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name' props['dfs.permissions'] = 'false' # TODO - secure this hadoop installation!
def configure_yarn_base(self, host, port, history_http, history_ipc): dc = self.hadoop_base.dist_config yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml' with utils.xmlpropmap_edit_in_place(yarn_site) as props: props['yarn.nodemanager.aux-services'] = 'mapreduce_shuffle' props['yarn.nodemanager.vmem-check-enabled'] = 'false' if host: props['yarn.resourcemanager.hostname'] = '{}'.format(host) props['yarn.resourcemanager.address'] = '{}:{}'.format( host, port) props["yarn.log.server.url"] = "{}:{}/jobhistory/logs/".format( host, history_http) mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml' with utils.xmlpropmap_edit_in_place(mapred_site) as props: if host and history_ipc: props["mapreduce.jobhistory.address"] = "{}:{}".format( host, history_ipc) if host and history_http: props["mapreduce.jobhistory.webapp.address"] = "{}:{}".format( host, history_http) props["mapreduce.framework.name"] = 'yarn' props[ "mapreduce.jobhistory.intermediate-done-dir"] = "/mr-history/tmp" props["mapreduce.jobhistory.done-dir"] = "/mr-history/done" props["mapreduce.map.output.compress"] = 'true' props[ "mapred.map.output.compress.codec"] = 'org.apache.hadoop.io.compress.SnappyCodec' props[ "mapreduce.application.classpath"] = "$HADOOP_HOME/share/hadoop/mapreduce/*,\
def setup_hue(self, namenodes, resourcemanagers, hdfs_port, yarn_port, yarn_http, yarn_ipc): hookenv.status_set('maintenance', 'Setting up Hue') hue_bin = self.dist_config.path('hue') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if hue_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], hue_bin]) env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin' env['GOBBLIN_WORK_DIR'] = self.dist_config.path('outputdir') hadoop_conf = env['HADOOP_CONF_DIR'] + '/core-site.xml' yarn_conf = env['HADOOP_CONF_DIR'] + '/yarn-site.xml' mapred_conf = env['HADOOP_CONF_DIR'] + '/mapred-site.xml' with utils.xmlpropmap_edit_in_place(hadoop_conf) as props: hdfs_endpoint = props['fs.defaultFS'] with utils.xmlpropmap_edit_in_place(yarn_conf) as props: yarn_log_url = props['yarn.log.server.url'] # 19888 yarn_resmgr = props['yarn.resourcemanager.address'] # 8032 with utils.xmlpropmap_edit_in_place(mapred_conf) as props: mapred_jobhistory = props['mapreduce.jobhistory.address'] # 10020 default_conf = self.dist_config.path('hue') / 'desktop/conf' hue_conf = self.dist_config.path('hue_conf') if os.path.islink('/usr/lib/hue/desktop/conf'): return else: hue_conf.rmtree_p() default_conf.copytree(hue_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() hue_conf.symlink(default_conf) hdfs_fulluri = hdfs_endpoint.split('/')[2] hdfs_hostname = hdfs_fulluri.split(':')[0] hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini')) hue_port = self.dist_config.port('hue_web') # Fix following for HA: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_hadoop-ha/content/ha-nn-deploy-hue.html hookenv.log("Not currently supporting HA, FIX: namenodes are: " + str(namenodes) + " resmanagers: " + str(resourcemanagers)) utils.re_edit_in_place(hue_config, { r'http_port=8888': 'http_port=%s' % hue_port, #r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs=%s' % hdfs_endpoint, r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs=%s:%s' % (namenodes[0], hdfs_port), #r'## resourcemanager_host=localhost': 'resourcemanager_host=%s' % yarn_resmgr.split(':')[0], r'.*resourcemanager_host=localhost': 'resourcemanager_host=%s' % resourcemanagers[0], #r'## resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_resmgr.split(':')[1], r'.*resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_port, r'.*webhdfs_url=http://localhost:50070/webhdfs/v1': 'webhdfs_url=http://%s:50070/webhdfs/v1' % namenodes[0], r'.*history_server_api_url=http://localhost:19888': 'history_server_api_url=%s' % yarn_log_url.split('/')[0], r'.*resourcemanager_api_url=http://localhost:8088': 'resourcemanager_api_url=http://%s:8088' % yarn_resmgr.split(':')[0], r'.*secret_key=.*': 'secret_key=%s' % uuid.uuid4() }) self.update_apps()
def configure_zookeeper(self, zookeepers): dc = self.hadoop_base.dist_config hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.ha.automatic-failover.enabled'] = 'true' core_site = dc.path('hadoop_conf') / 'core-site.xml' with utils.xmlpropmap_edit_in_place(core_site) as props: zk_str = ','.join('{host}:{port}'.format(**zk) for zk in zookeepers) hookenv.log("Zookeeper string is: %s" % zk_str) props['ha.zookeeper.quorum'] = zk_str self.hadoop_base.setup_init_script("hdfs", "zkfc")
def configure_yarn_base(self, host, port, history_http, history_ipc): dc = self.hadoop_base.dist_config yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml' with utils.xmlpropmap_edit_in_place(yarn_site) as props: props['yarn.nodemanager.aux-services'] = 'mapreduce_shuffle' props['yarn.nodemanager.vmem-check-enabled'] = 'false' if host: props['yarn.resourcemanager.hostname'] = '{}'.format(host) props['yarn.resourcemanager.address'] = '{}:{}'.format(host, port) props["yarn.log.server.url"] = "{}:{}/jobhistory/logs/".format(host, history_http) mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml' with utils.xmlpropmap_edit_in_place(mapred_site) as props: if host and history_ipc: props["mapreduce.jobhistory.address"] = "{}:{}".format(host, history_ipc) props["mapreduce.framework.name"] = 'yarn'
def configure_hive(self, mysql): config = hookenv.config() hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' with utils.xmlpropmap_edit_in_place(hive_site) as props: props[ 'javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format( mysql.host(), mysql.port(), mysql.database()) props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props[ 'javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver" props[ 'hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[ self.cpu_arch] hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place( hive_env, { r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'], r'.*export HIVE_AUX_JARS_PATH *=.*': 'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar', }) # Now that we have db connection info, init our schema (only once) if not unitdata.kv().get('hive.schema.initialized'): utils.run_as('hive', 'schematool', '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized', True)
def install_namenode(): hookenv.status_set('maintenance', 'installing namenode') bigtop = Bigtop() nn_host = get_fqdn() hosts = {'namenode': nn_host} bigtop.render_site_yaml(hosts=hosts, roles='namenode') bigtop.trigger_puppet() # /etc/hosts entries from the KV are not currently used for bigtop, # but a hosts_map attribute is required by some interfaces (eg: dfs-slave) # to signify NN's readiness. Set our NN info in the KV to fulfill this # requirement. utils.initialize_kv_host() # make our namenode listen on all interfaces hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml') with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.rpc-bind-host'] = '0.0.0.0' props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0' props['dfs.namenode.http-bind-host'] = '0.0.0.0' props['dfs.namenode.https-bind-host'] = '0.0.0.0' # We need to create the 'mapred' user/group since we are not installing # hadoop-mapreduce. This is needed so the namenode can access yarn # job history files in hdfs. Also add our ubuntu user to the hadoop # and mapred groups. get_layer_opts().add_users() set_state('apache-bigtop-namenode.installed') hookenv.status_set('maintenance', 'namenode installed')
def configure_remote_db(self, mysql): hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' jdbc_url = \ "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format( mysql.host(), mysql.port(), mysql.database() ) with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = jdbc_url props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props['javax.jdo.option.ConnectionDriverName'] = \ "com.mysql.jdbc.Driver" hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': ('export HIVE_AUX_JARS_PATH=' '/usr/share/java/mysql-connector-java.jar'), }) # Now that we have db connection info, init our schema (only once) remote_db = hookenv.remote_service_name() if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db): tool_path = "{}/bin/schematool".format( self.dist_config.path('hive')) utils.run_as( 'ubuntu', tool_path, '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True) unitdata.kv().flush(True)
def setup_hive_config(self): ''' copy the default configuration files to hive_conf property defined in dist.yaml ''' default_conf = self.dist_config.path('hive') / 'conf' hive_conf = self.dist_config.path('hive_conf') hive_conf.rmtree_p() default_conf.copytree(hive_conf) # Configure immutable bits hive_bin = self.dist_config.path('hive') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if hive_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], hive_bin]) env['HIVE_CONF_DIR'] = self.dist_config.path('hive_conf') hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' if not hive_env.exists(): (self.dist_config.path('hive_conf') / 'hive-env.sh.template').copy(hive_env) hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' if not hive_site.exists(): (self.dist_config.path('hive_conf') / 'hive-default.xml.template').copy(hive_site) with utils.xmlpropmap_edit_in_place(hive_site) as props: # TODO (kwm): we should be able to export java.io.tmpdir so these 4 arent needed props['hive.exec.local.scratchdir'] = "/tmp/hive" props['hive.downloaded.resources.dir'] = "/tmp/hive_resources" props['hive.querylog.location'] = "/tmp/hive" props['hive.server2.logging.operation.log.location'] = "/tmp/hive" #### # create hdfs storage space utils.run_as('hive', 'hdfs', 'dfs', '-mkdir', '-p', '/user/hive/warehouse')
def configure_namenode(self, secondary_host=None, secondary_port=None): dc = self.hadoop_base.dist_config host = hookenv.local_unit().replace('/', '-') port = dc.port('namenode') self.configure_hdfs_base(host, port) cfg = self.hadoop_base.charm_config hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.replication'] = cfg['dfs_replication'] props['dfs.blocksize'] = int(cfg['dfs_blocksize']) props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'true' props['dfs.namenode.http-address'] = '0.0.0.0:{}'.format(dc.port('nn_webapp_http')) # TODO: support SSL # props['dfs.namenode.https-address'] = '0.0.0.0:{}'.format(dc.port('nn_webapp_https')) # FIXME hack-around until transition to layers is complete if not (secondary_host and secondary_port) and helpers: unit, secondary = helpers.any_ready_unit('secondary') if unit: secondary_host = secondary['hostname'] secondary_port = secondary['port'] if secondary_host and secondary_port: props['dfs.secondary.http.address'] = '{host}:{port}'.format( host=secondary_host, port=secondary_port, )
def configure_remote_db(self, mysql): hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' jdbc_url = \ "jdbc:mysql://{}:{}/{}?createDatabaseIfNotExist=true".format( mysql.host(), mysql.port(), mysql.database() ) with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = jdbc_url props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props['javax.jdo.option.ConnectionDriverName'] = \ "com.mysql.jdbc.Driver" hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place( hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': ('export HIVE_AUX_JARS_PATH=' '/usr/share/java/mysql-connector-java.jar'), }) # Now that we have db connection info, init our schema (only once) remote_db = hookenv.remote_service_name() if not unitdata.kv().get('hive.schema.initialized.%s' % remote_db): tool_path = "{}/bin/schematool".format( self.dist_config.path('hive')) utils.run_as('ubuntu', tool_path, '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized.%s' % remote_db, True) unitdata.kv().flush(True)
def configure_resourcemanager(self): self.configure_yarn_base(*self._local()) dc = self.hadoop_base.dist_config yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml' with utils.xmlpropmap_edit_in_place(yarn_site) as props: # 0.0.0.0 will listen on all interfaces, which is what we want on the server props['yarn.resourcemanager.webapp.address'] = '0.0.0.0:{}'.format(dc.port('rm_webapp_http'))
def register_journalnodes(self, nodes, port): clustername = hookenv.service_name() hdfs_site = self.hadoop_base.dist_config.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.shared.edits.dir'] = 'qjournal://{}/{}'.format( ';'.join(['%s:%s' % (host, port) for host in nodes]), clustername)
def configure_hdfs_base(self, clustername, namenodes, port, webhdfs_port): dc = self.hadoop_base.dist_config core_site = dc.path('hadoop_conf') / 'core-site.xml' with utils.xmlpropmap_edit_in_place(core_site) as props: props['hadoop.proxyuser.hue.hosts'] = "*" props['hadoop.proxyuser.hue.groups'] = "*" props['hadoop.proxyuser.oozie.groups'] = '*' props['hadoop.proxyuser.oozie.hosts'] = '*' if 'lzo' in self.hadoop_base.resources: props['io.compression.codecs'] = ( 'org.apache.hadoop.io.compress.GzipCodec, ' 'org.apache.hadoop.io.compress.DefaultCodec, ' 'org.apache.hadoop.io.compress.BZip2Codec, ' 'org.apache.hadoop.io.compress.SnappyCodec, ' 'com.hadoop.compression.lzo.LzoCodec, ' 'com.hadoop.compression.lzo.LzopCodec') props[ 'io.compression.codec.lzo.class'] = 'com.hadoop.compression.lzo.LzoCodec' else: props['io.compression.codecs'] = ( 'org.apache.hadoop.io.compress.GzipCodec, ' 'org.apache.hadoop.io.compress.DefaultCodec, ' 'org.apache.hadoop.io.compress.BZip2Codec, ' 'org.apache.hadoop.io.compress.SnappyCodec') props['fs.defaultFS'] = "hdfs://{clustername}".format( clustername=clustername, port=port) hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.webhdfs.enabled'] = "true" props['dfs.namenode.name.dir'] = dc.path( 'hdfs_dir_base') / 'cache/hadoop/dfs/name' props['dfs.datanode.data.dir'] = dc.path( 'hdfs_dir_base') / 'cache/hadoop/dfs/name' props[ 'dfs.permissions'] = 'false' # TODO - secure this hadoop installation! props['dfs.nameservices'] = clustername props['dfs.client.failover.proxy.provider.%s' % clustername] = \ 'org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider' props['dfs.ha.fencing.methods'] = 'sshfence\nshell(/bin/true)' props['dfs.ha.fencing.ssh.private-key-files'] = utils.ssh_priv_key( 'hdfs') props['dfs.ha.namenodes.%s' % clustername] = ','.join(namenodes) for node in namenodes: props['dfs.namenode.rpc-address.%s.%s' % (clustername, node)] = '%s:%s' % (node, port) props['dfs.namenode.http-address.%s.%s' % (clustername, node)] = '%s:%s' % (node, webhdfs_port)
def register_journalnodes(self, nodes, port): clustername = hookenv.service_name() hdfs_site = self.hadoop_base.dist_config.path( 'hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.shared.edits.dir'] = 'qjournal://{}/{}'.format( ';'.join(['%s:%s' % (host, port) for host in nodes]), clustername)
def configure_journalnode(self): dc = self.hadoop_base.dist_config hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.journalnode.rpc-address'] = '0.0.0.0:{}'.format( dc.port('journalnode')) props['dfs.journalnode.http-address'] = '0.0.0.0:{}'.format( dc.port('jn_webapp_http'))
def configure_datanode(self, clustername, namenodes, port, webhdfs_port): self.configure_hdfs_base(clustername, namenodes, port, webhdfs_port) dc = self.hadoop_base.dist_config hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.datanode.http.address'] = '0.0.0.0:{}'.format(dc.port('dn_webapp_http')) self.hadoop_base.setup_init_script("hdfs", "datanode") self.hadoop_base.setup_init_script("hdfs", "journalnode")
def configure_datanode(self, host=None, port=None): if not (host and port): host, port = self._remote("datanode") self.configure_hdfs_base(host, port) dc = self.hadoop_base.dist_config hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.datanode.http.address'] = '0.0.0.0:{}'.format(dc.port('dn_webapp_http'))
def configure_jobhistory(self): self.configure_yarn_base(*self._local()) dc = self.hadoop_base.dist_config mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml' with utils.xmlpropmap_edit_in_place(mapred_site) as props: # 0.0.0.0 will listen on all interfaces, which is what we want on the server props["mapreduce.jobhistory.address"] = "0.0.0.0:{}".format(dc.port('jobhistory')) props["mapreduce.jobhistory.webapp.address"] = "0.0.0.0:{}".format(dc.port('jh_webapp_http'))
def configure_datanode(self, clustername, namenodes, port, webhdfs_port): self.configure_hdfs_base(clustername, namenodes, port, webhdfs_port) dc = self.hadoop_base.dist_config hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.datanode.http.address'] = '0.0.0.0:{}'.format( dc.port('dn_webapp_http')) self.hadoop_base.setup_init_script("hdfs", "datanode") self.hadoop_base.setup_init_script("hdfs", "journalnode")
def setup_oozie_config(self): # copy default config into alternate dir conf_dir = self.dist_config.path('oozie') / 'conf' self.dist_config.path('oozie_conf').rmtree_p() conf_dir.copytree(self.dist_config.path('oozie_conf')) oozie_conf = self.dist_config.path('oozie_conf') / "oozie-site.xml" with utils.xmlpropmap_edit_in_place(oozie_conf) as e: e['oozie.service.ProxyUserService.proxyuser.hue.hosts'] = '*' e['oozie.service.ProxyUserService.proxyuser.hue.groups'] = '*'
def setup_hue(self): hue_bin = self.dist_config.path('hue') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if hue_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], hue_bin]) env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin' env['GOBBLIN_WORK_DIR'] = self.dist_config.path('outputdir') hadoop_conf = env['HADOOP_CONF_DIR'] + '/core-site.xml' yarn_conf = env['HADOOP_CONF_DIR'] + '/yarn-site.xml' mapred_conf = env['HADOOP_CONF_DIR'] + '/mapred-site.xml' with utils.xmlpropmap_edit_in_place(hadoop_conf) as props: hdfs_endpoint = props['fs.defaultFS'] with utils.xmlpropmap_edit_in_place(yarn_conf) as props: yarn_log_url = props['yarn.log.server.url'] # 19888 yarn_resmgr = props['yarn.resourcemanager.address'] # 8032 with utils.xmlpropmap_edit_in_place(mapred_conf) as props: mapred_jobhistory = props['mapreduce.jobhistory.address'] # 10020 default_conf = self.dist_config.path('hue') / 'desktop/conf' hue_conf = self.dist_config.path('hue_conf') hue_conf.rmtree_p() default_conf.copytree(hue_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() hue_conf.symlink(default_conf) hdfs_fulluri = hdfs_endpoint.split('/')[2] hdfs_hostname = hdfs_fulluri.split(':')[0] hue_config = ''.join((self.dist_config.path('hue'), '/desktop/conf/hue.ini')) hue_port = self.dist_config.port('hue_web') utils.re_edit_in_place(hue_config, { r'http_port=8888': 'http_port=%s' % hue_port, r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaults=%s' % hdfs_endpoint, r'## resourcemanager_host=localhost': 'resourcemanager_host=%s' % yarn_resmgr.split(':')[0], r'## resourcemanager_port=8032': 'resourcemanager_port=%s' % yarn_resmgr.split(':')[1], r'## webhdfs_url=http://localhost:50070/webhdfs/v1': 'webhdfs_url=http://%s:50070/webhdfs/v1' % hdfs_hostname, r'## history_server_api_url=http://localhost:19888': 'history_server_api_url=%s' % yarn_log_url.split('/')[0], r'## resourcemanager_api_url=http://localhost:8088': 'resourcemanager_api_url=http://%s:8088' % yarn_resmgr.split(':')[0] })
def configure_namenode(self, namenodes): dc = self.hadoop_base.dist_config clustername = hookenv.service_name() host = hookenv.local_unit().replace('/', '-') self.configure_hdfs_base(clustername, namenodes, dc.port('namenode'), dc.port('nn_webapp_http')) hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'true' props['dfs.namenode.http-address.%s.%s' % (clustername, host)] = '%s:%s' % (host, dc.port('nn_webapp_http')) self.hadoop_base.setup_init_script("hdfs", "namenode")
def configure_zeppelin(self): ''' Configure zeppelin environment for all users ''' zeppelin_bin = self.dist_config.path('zeppelin') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if zeppelin_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], zeppelin_bin]) env['ZEPPELIN_CONF_DIR'] = self.dist_config.path('zeppelin_conf') zeppelin_site = self.dist_config.path( 'zeppelin_conf') / 'zeppelin-site.xml' with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml: xml['zeppelin.server.port'] = self.dist_config.port('zeppelin') xml['zeppelin.notebook.dir'] = self.dist_config.path( 'zeppelin_notebooks') etc_env = utils.read_etc_env() hadoop_conf_dir = etc_env.get('HADOOP_CONF_DIR', '/etc/hadoop/conf') spark_home = etc_env.get('SPARK_HOME', '/usr/lib/spark') spark_driver_mem = etc_env.get('SPARK_DRIVER_MEMORY', '1g') spark_exe_mode = os.environ.get('MASTER', 'yarn-client') spark_executor_mem = etc_env.get('SPARK_EXECUTOR_MEMORY', '1g') zeppelin_env = self.dist_config.path( 'zeppelin_conf') / 'zeppelin-env.sh' with open(zeppelin_env, "a") as f: f.write('export ZEPPELIN_HOME={}\n'.format( self.dist_config.path('zeppelin'))) f.write( 'export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n' .format(spark_driver_mem, spark_executor_mem)) f.write('export ZEPPELIN_LOG_DIR={}\n'.format( self.dist_config.path('zeppelin_logs'))) f.write( 'export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n' ) f.write('export ZEPPELIN_NOTEBOOK_DIR={}\n'.format( self.dist_config.path('zeppelin_notebooks'))) f.write('export SPARK_HOME={}\n'.format(spark_home)) f.write( 'export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n' .format(spark_driver_mem, spark_executor_mem)) f.write('export HADOOP_CONF_DIR={}\n'.format(hadoop_conf_dir)) f.write( 'export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n' .format(s=spark_home)) f.write('export MASTER={}\n'.format(spark_exe_mode)) # User needs write access to zepp's conf to write interpreter.json # on server start. chown the whole conf dir, though we could probably # touch that file and chown it, leaving the rest owned as root:root. # TODO: weigh implications of have zepp's conf dir owned by non-root. cmd = "chown -R ubuntu:hadoop {}".format( self.dist_config.path('zeppelin_conf')) call(cmd.split())
def configure_jobhistory(self): self.configure_yarn_base(*self._local()) dc = self.hadoop_base.dist_config mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml' with utils.xmlpropmap_edit_in_place(mapred_site) as props: # 0.0.0.0 will listen on all interfaces, which is what we want on the server props["mapreduce.jobhistory.address"] = "0.0.0.0:{}".format(dc.port('jobhistory')) props["mapreduce.jobhistory.webapp.address"] = "0.0.0.0:{}".format(dc.port('jh_webapp_http')) props["mapreduce.jobhistory.intermediate-done-dir"] = "/mr-history/tmp" props["mapreduce.jobhistory.done-dir"] = "/mr-history/done" self.hadoop_base.setup_init_script(user='******', servicename='historyserver')
def test_xmlpropmap_edit_in_place(self): fd, filename = tempfile.mkstemp() os.close(fd) tmp_file = Path(filename) try: tmp_file.write_text( '<?xml version="1.0"?>\n' '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>\n' '\n' '<!-- Put site-specific property overrides in this file. -->\n' '\n' '<configuration>\n' ' <property>\n' ' <name>modify.me</name>\n' ' <value>1</value>\n' ' <description>Property to be modified</description>\n' ' </property>\n' ' <property>\n' ' <name>delete.me</name>\n' ' <value>None</value>\n' ' <description>Property to be removed</description>\n' ' </property>\n' ' <property>\n' ' <name>do.not.modify.me</name>\n' ' <value>0</value>\n' ' <description>Property to *not* be modified</description>\n' ' </property>\n' '</configuration>') with utils.xmlpropmap_edit_in_place(tmp_file) as props: del props['delete.me'] props['modify.me'] = 'one' props['add.me'] = 'NEW' self.assertEqual( tmp_file.text(), '<?xml version="1.0" ?>\n' '<configuration>\n' ' <property>\n' ' <name>modify.me</name>\n' ' <value>one</value>\n' ' <description>Property to be modified</description>\n' ' </property>\n' ' <property>\n' ' <name>do.not.modify.me</name>\n' ' <value>0</value>\n' ' <description>Property to *not* be modified</description>\n' ' </property>\n' ' <property>\n' ' <name>add.me</name>\n' ' <value>NEW</value>\n' ' </property>\n' '</configuration>\n') finally: tmp_file.remove()
def configure_resourcemanager(self): self.configure_yarn_base(*self._local()) dc = self.hadoop_base.dist_config yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml' with utils.xmlpropmap_edit_in_place(yarn_site) as props: # 0.0.0.0 will listen on all interfaces, which is what we want on the server props['yarn.resourcemanager.webapp.address'] = '0.0.0.0:{}'.format( dc.port('rm_webapp_http')) # TODO: support SSL # props['yarn.resourcemanager.webapp.https.address'] = '0.0.0.0:{}'.format(dc.port('rm_webapp_https')) self.hadoop_base.setup_init_script(user='******', servicename='resourcemanager')
def trigger_puppet(self): # If we can't reverse resolve the hostname (like on azure), support DN # registration by IP address. # NB: determine this *before* updating /etc/hosts below since # gethostbyaddr will not fail if we have an /etc/hosts entry. reverse_dns_bad = False try: socket.gethostbyaddr(utils.resolve_private_address(hookenv.unit_private_ip())) except socket.herror: reverse_dns_bad = True # We know java7 has MAXHOSTNAMELEN of 64 char, so we cannot rely on # java to do a hostname lookup on clouds that have >64 char fqdns # (gce). Force short hostname (< 64 char) into /etc/hosts as workaround. # Better fix may be to move to java8. See http://paste.ubuntu.com/16230171/ # NB: do this before the puppet apply, which may call java stuffs # like format namenode, which will fail if we dont get this fix # down early. short_host = subprocess.check_output(['facter', 'hostname']).strip().decode() private_ip = utils.resolve_private_address(hookenv.unit_private_ip()) if short_host and private_ip: utils.update_kv_host(private_ip, short_host) utils.manage_etc_hosts() charm_dir = hookenv.charm_dir() # TODO JIRA KWM: rm does not need Hdfs_init and will fail rm_patch = Path(charm_dir) / 'resources/patch1_rm_init_hdfs.patch' # TODO JIRA KWM: nm should not *need* mapred role. we could patch it # with nm_patch, or adjust nm charm to include mapred role. for now, # we're doing the latter. todo rfc from dev@bigtop list. # nm_patch = Path(charm_dir) / 'resources/patch2_nm_core-site.patch' # TODO JIRA KWM: client role needs common_yarn for yarn-site.xml client_patch = Path(charm_dir) / 'resources/patch3_client_role_use_common_yarn.patch' with chdir("{}".format(self.bigtop_base)): # rm patch goes first utils.run_as('root', 'patch', '-p1', '-s', '-i', rm_patch) # skip nm_patch for now since nm charm is including mapred role # utils.run_as('root', 'patch', '-p1', '-s', '-i', nm_patch) # client patch goes last utils.run_as('root', 'patch', '-p1', '-s', '-i', client_patch) # TODO FIX ABOVE KWM # puppet apply needs to be ran where recipes were unpacked with chdir("{}".format(self.bigtop_base)): utils.run_as('root', 'puppet', 'apply', '-d', '--modulepath="bigtop-deploy/puppet/modules:/etc/puppet/modules"', 'bigtop-deploy/puppet/manifests/site.pp') # Do any post-puppet config on the generated config files. if reverse_dns_bad: hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml') with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.datanode.registration.ip-hostname-check'] = 'false'
def configure_yarn_base(self, host, port, history_http, history_ipc): dc = self.hadoop_base.dist_config yarn_site = dc.path('hadoop_conf') / 'yarn-site.xml' with utils.xmlpropmap_edit_in_place(yarn_site) as props: props['yarn.nodemanager.aux-services'] = 'mapreduce_shuffle' props['yarn.nodemanager.vmem-check-enabled'] = 'false' if host: props['yarn.resourcemanager.hostname'] = '{}'.format(host) props['yarn.resourcemanager.address'] = '{}:{}'.format(host, port) props["yarn.log.server.url"] = "{}:{}/jobhistory/logs/".format(host, history_http) mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml' with utils.xmlpropmap_edit_in_place(mapred_site) as props: if host and history_ipc: props["mapreduce.jobhistory.address"] = "{}:{}".format(host, history_ipc) if host and history_http: props["mapreduce.jobhistory.webapp.address"] = "{}:{}".format(host, history_http) props["mapreduce.framework.name"] = 'yarn' props["mapreduce.jobhistory.intermediate-done-dir"] = "/mr-history/tmp" props["mapreduce.jobhistory.done-dir"] = "/mr-history/done" props["mapreduce.map.output.compress"] = 'true' props["mapred.map.output.compress.codec"] = 'org.apache.hadoop.io.compress.SnappyCodec' props["mapreduce.application.classpath"] = "$HADOOP_HOME/share/hadoop/mapreduce/*,\
def configure_hdfs_base(self, host, port): dc = self.hadoop_base.dist_config core_site = dc.path('hadoop_conf') / 'core-site.xml' with utils.xmlpropmap_edit_in_place(core_site) as props: if host and port: props['fs.defaultFS'] = "hdfs://{host}:{port}".format(host=host, port=port) props['hadoop.proxyuser.hue.hosts'] = "*" props['hadoop.proxyuser.hue.groups'] = "*" props['hadoop.proxyuser.oozie.groups'] = '*' props['hadoop.proxyuser.oozie.hosts'] = '*' lzo_installed = unitdata.kv().get('hadoop.lzo.installed') lzo_enabled = hookenv.config().get('compression') == 'lzo' if lzo_installed and lzo_enabled: props['io.compression.codecs'] = ('com.hadoop.compression.lzo.LzoCodec, ' 'com.hadoop.compression.lzo.LzopCodec') props['io.compression.codec.lzo.class'] = 'com.hadoop.compression.lzo.LzoCodec' hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.webhdfs.enabled'] = "true" props['dfs.namenode.name.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name' props['dfs.datanode.data.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name' props['dfs.permissions'] = 'false' # TODO - secure this hadoop installation!
def configure_local_db(self): local_url = 'jdbc:derby:;databaseName=/var/lib/hive/metastore/metastore_db;create=true' local_driver = 'org.apache.derby.jdbc.EmbeddedDriver' hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = local_url props['javax.jdo.option.ConnectionUserName'] = '******' props['javax.jdo.option.ConnectionPassword'] = '******' props['javax.jdo.option.ConnectionDriverName'] = local_driver hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': '# export HIVE_AUX_JARS_PATH=', })
def configure_hdfs_base(self, clustername, namenodes, port, webhdfs_port): dc = self.hadoop_base.dist_config core_site = dc.path('hadoop_conf') / 'core-site.xml' with utils.xmlpropmap_edit_in_place(core_site) as props: props['hadoop.proxyuser.hue.hosts'] = "*" props['hadoop.proxyuser.hue.groups'] = "*" props['hadoop.proxyuser.oozie.groups'] = '*' props['hadoop.proxyuser.oozie.hosts'] = '*' if 'lzo' in self.hadoop_base.resources: props['io.compression.codecs'] = ('org.apache.hadoop.io.compress.GzipCodec, ' 'org.apache.hadoop.io.compress.DefaultCodec, ' 'org.apache.hadoop.io.compress.BZip2Codec, ' 'org.apache.hadoop.io.compress.SnappyCodec, ' 'com.hadoop.compression.lzo.LzoCodec, ' 'com.hadoop.compression.lzo.LzopCodec') props['io.compression.codec.lzo.class'] = 'com.hadoop.compression.lzo.LzoCodec' else: props['io.compression.codecs'] = ('org.apache.hadoop.io.compress.GzipCodec, ' 'org.apache.hadoop.io.compress.DefaultCodec, ' 'org.apache.hadoop.io.compress.BZip2Codec, ' 'org.apache.hadoop.io.compress.SnappyCodec') props['fs.defaultFS'] = "hdfs://{clustername}".format(clustername=clustername, port=port) hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.webhdfs.enabled'] = "true" props['dfs.namenode.name.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name' props['dfs.datanode.data.dir'] = dc.path('hdfs_dir_base') / 'cache/hadoop/dfs/name' props['dfs.permissions'] = 'false' # TODO - secure this hadoop installation! props['dfs.nameservices'] = clustername props['dfs.client.failover.proxy.provider.%s' % clustername] = \ 'org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider' props['dfs.ha.fencing.methods'] = 'sshfence\nshell(/bin/true)' props['dfs.ha.fencing.ssh.private-key-files'] = utils.ssh_priv_key('hdfs') props['dfs.ha.namenodes.%s' % clustername] = ','.join(namenodes) for node in namenodes: props['dfs.namenode.rpc-address.%s.%s' % (clustername, node)] = '%s:%s' % (node, port) props['dfs.namenode.http-address.%s.%s' % (clustername, node)] = '%s:%s' % (node, webhdfs_port)
def configure_jobhistory(self): self.configure_yarn_base(*self._local()) dc = self.hadoop_base.dist_config mapred_site = dc.path('hadoop_conf') / 'mapred-site.xml' with utils.xmlpropmap_edit_in_place(mapred_site) as props: # 0.0.0.0 will listen on all interfaces, which is what we want on the server props["mapreduce.jobhistory.address"] = "0.0.0.0:{}".format( dc.port('jobhistory')) props["mapreduce.jobhistory.webapp.address"] = "0.0.0.0:{}".format( dc.port('jh_webapp_http')) props[ "mapreduce.jobhistory.intermediate-done-dir"] = "/mr-history/tmp" props["mapreduce.jobhistory.done-dir"] = "/mr-history/done" self.hadoop_base.setup_init_script(user='******', servicename='historyserver')
def configure_zeppelin(self): """ Configure zeppelin environment for all users """ zeppelin_bin = self.dist_config.path("zeppelin") / "bin" with utils.environment_edit_in_place("/etc/environment") as env: if zeppelin_bin not in env["PATH"]: env["PATH"] = ":".join([env["PATH"], zeppelin_bin]) env["ZEPPELIN_CONF_DIR"] = self.dist_config.path("zeppelin_conf") zeppelin_site = self.dist_config.path("zeppelin_conf") / "zeppelin-site.xml" with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml: xml["zeppelin.server.port"] = self.dist_config.port("zeppelin") xml["zeppelin.notebook.dir"] = self.dist_config.path("zeppelin_notebooks") etc_env = utils.read_etc_env() hadoop_conf_dir = etc_env.get("HADOOP_CONF_DIR", "/etc/hadoop/conf") spark_home = etc_env.get("SPARK_HOME", "/usr/lib/spark") spark_driver_mem = etc_env.get("SPARK_DRIVER_MEMORY", "1g") spark_exe_mode = os.environ.get("MASTER", "yarn-client") spark_executor_mem = etc_env.get("SPARK_EXECUTOR_MEMORY", "1g") zeppelin_env = self.dist_config.path("zeppelin_conf") / "zeppelin-env.sh" with open(zeppelin_env, "a") as f: f.write("export ZEPPELIN_HOME={}\n".format(self.dist_config.path("zeppelin"))) f.write( 'export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'.format( spark_driver_mem, spark_executor_mem ) ) f.write("export ZEPPELIN_LOG_DIR={}\n".format(self.dist_config.path("zeppelin_logs"))) f.write('export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n') f.write("export ZEPPELIN_NOTEBOOK_DIR={}\n".format(self.dist_config.path("zeppelin_notebooks"))) f.write("export SPARK_HOME={}\n".format(spark_home)) f.write( 'export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'.format( spark_driver_mem, spark_executor_mem ) ) f.write("export HADOOP_CONF_DIR={}\n".format(hadoop_conf_dir)) f.write("export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n".format(s=spark_home)) f.write("export MASTER={}\n".format(spark_exe_mode)) # User needs write access to zepp's conf to write interpreter.json # on server start. chown the whole conf dir, though we could probably # touch that file and chown it, leaving the rest owned as root:root. # TODO: weigh implications of have zepp's conf dir owned by non-root. cmd = "chown -R ubuntu:hadoop {}".format(self.dist_config.path("zeppelin_conf")) call(cmd.split())
def configure_zeppelin(self): ''' Configure zeppelin environment for all users ''' zeppelin_bin = self.dist_config.path('zeppelin') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if zeppelin_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], zeppelin_bin]) env['ZEPPELIN_CONF_DIR'] = self.dist_config.path('zeppelin_conf') zeppelin_site = self.dist_config.path('zeppelin_conf') / 'zeppelin-site.xml' with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml: xml['zeppelin.server.port'] = self.dist_config.port('zeppelin') xml['zeppelin.notebook.dir'] = self.dist_config.path('zeppelin_notebooks') etc_env = utils.read_etc_env() hadoop_conf_dir = etc_env.get('HADOOP_CONF_DIR', '/etc/hadoop/conf') hadoop_extra_classpath = etc_env.get('HADOOP_EXTRA_CLASSPATH', '') spark_home = etc_env.get('SPARK_HOME', '/usr/lib/spark') spark_driver_mem = etc_env.get('SPARK_DRIVER_MEMORY', '1g') spark_exe_mode = os.environ.get('MASTER', 'yarn-client') spark_executor_mem = etc_env.get('SPARK_EXECUTOR_MEMORY', '1g') zeppelin_env = self.dist_config.path('zeppelin_conf') / 'zeppelin-env.sh' with open(zeppelin_env, "a") as f: f.write('export ZEPPELIN_CLASSPATH_OVERRIDES={}\n'.format(hadoop_extra_classpath)) f.write('export ZEPPELIN_HOME={}\n'.format(self.dist_config.path('zeppelin'))) f.write('export ZEPPELIN_JAVA_OPTS="-Dspark.driver.memory={} -Dspark.executor.memory={}"\n'.format( spark_driver_mem, spark_executor_mem)) f.write('export ZEPPELIN_LOG_DIR={}\n'.format(self.dist_config.path('zeppelin_logs'))) f.write('export ZEPPELIN_MEM="-Xms128m -Xmx1024m -XX:MaxPermSize=512m"\n') f.write('export ZEPPELIN_NOTEBOOK_DIR={}\n'.format(self.dist_config.path('zeppelin_notebooks'))) f.write('export SPARK_HOME={}\n'.format(spark_home)) f.write('export SPARK_SUBMIT_OPTIONS="--driver-memory {} --executor-memory {}"\n'.format( spark_driver_mem, spark_executor_mem)) f.write('export HADOOP_CONF_DIR={}\n'.format(hadoop_conf_dir)) f.write('export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip\n'.format(s=spark_home)) f.write('export MASTER={}\n'.format(spark_exe_mode)) # User needs write access to zepp's conf to write interpreter.json # on server start. chown the whole conf dir, though we could probably # touch that file and chown it, leaving the rest owned as root:root. # TODO: weigh implications of have zepp's conf dir owned by non-root. cmd = "chown -R ubuntu:hadoop {}".format(self.dist_config.path('zeppelin_conf')) call(cmd.split())
def configure_local_db(self): local_url = \ ('jdbc:derby:;databaseName=' '/var/lib/hive/metastore/metastore_db;create=true') local_driver = 'org.apache.derby.jdbc.EmbeddedDriver' hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = local_url props['javax.jdo.option.ConnectionUserName'] = '******' props['javax.jdo.option.ConnectionPassword'] = '******' props['javax.jdo.option.ConnectionDriverName'] = local_driver hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HIVE_AUX_JARS_PATH *=.*': '# export HIVE_AUX_JARS_PATH=', })
def configure_namenode(self, namenodes): dc = self.hadoop_base.dist_config clustername = hookenv.service_name() host = hookenv.local_unit().replace('/', '-') self.configure_hdfs_base(clustername, namenodes, dc.port('namenode'), dc.port('nn_webapp_http')) hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props[ 'dfs.namenode.datanode.registration.ip-hostname-check'] = 'true' props['dfs.namenode.http-address.%s.%s' % (clustername, host)] = '%s:%s' % (host, dc.port('nn_webapp_http')) props['dfs.namenode.rpc-bind-host'] = '0.0.0.0' props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0' props['dfs.namenode.http-bind-host'] = '0.0.0.0' props['dfs.namenode.https-bind-host'] = '0.0.0.0' self.hadoop_base.setup_init_script("hdfs", "namenode")
def setup_hue(self, namenodes, resourcemanagers, hdfs_port, yarn_port): hookenv.status_set('maintenance', 'Setting up Hue') hue_bin = self.dist_config.path('hue') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if hue_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], hue_bin]) env['HADOOP_BIN_DIR'] = env['HADOOP_HOME'] + '/bin' env['GOBBLIN_WORK_DIR'] = self.dist_config.path('outputdir') yarn_conf = env['HADOOP_CONF_DIR'] + '/yarn-site.xml' with utils.xmlpropmap_edit_in_place(yarn_conf) as props: yarn_log_url = props['yarn.log.server.url'] # 19888 yarn_resmgr = props['yarn.resourcemanager.address'] # 8032 default_conf = self.dist_config.path('hue') / 'desktop/conf' hue_conf = self.dist_config.path('hue_conf') if os.path.islink('/usr/lib/hue/desktop/conf'): return else: hue_conf.rmtree_p() default_conf.copytree(hue_conf) # Now remove the conf included in the tarball and symlink our real conf default_conf.rmtree_p() hue_conf.symlink(default_conf) hue_port = self.dist_config.port('hue_web') # Fix following for HA: http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.3.0/bk_hadoop-ha/content/ha-nn-deploy-hue.html hookenv.log("Not currently supporting HA, FIX: namenodes are: " + str(namenodes) + " resmanagers: " + str(resourcemanagers)) utils.re_edit_in_place(self.hue_config, { r'http_port=8888': 'http_port={}' % hue_port, r'fs_defaultfs=hdfs://localhost:8020': 'fs_defaultfs={}:{}'.format(namenodes[0], hdfs_port), r'.*resourcemanager_host=localhost': 'resourcemanager_host={}'.format(resourcemanagers[0]), r'.*resourcemanager_port=8032': 'resourcemanager_port={}'.format(yarn_port), r'.*webhdfs_url=http://localhost:50070/webhdfs/v1': 'webhdfs_url=http://{}:50070/webhdfs/v1'.format(namenodes[0]), r'.*history_server_api_url=http://localhost:19888': 'history_server_api_url={}'.format(yarn_log_url.split('/')[0]), r'.*resourcemanager_api_url=http://localhost:8088': 'resourcemanager_api_url=http://{}:8088'.format(yarn_resmgr.split(':')[0]), r'.*secret_key=.*': 'secret_key={}'.format(uuid.uuid4()) }) self.update_apps()
def configure_hive(self, mysql): config = hookenv.config() hive_site = self.dist_config.path('hive_conf') / 'hive-site.xml' with utils.xmlpropmap_edit_in_place(hive_site) as props: props['javax.jdo.option.ConnectionURL'] = "jdbc:mysql://{}:{}/{}".format( mysql.host(), mysql.port(), mysql.database() ) props['javax.jdo.option.ConnectionUserName'] = mysql.user() props['javax.jdo.option.ConnectionPassword'] = mysql.password() props['javax.jdo.option.ConnectionDriverName'] = "com.mysql.jdbc.Driver" props['hive.hwi.war.file'] = "lib/hive-hwi-%s.jar" % self.HIVE_VERSION[self.cpu_arch] hive_env = self.dist_config.path('hive_conf') / 'hive-env.sh' utils.re_edit_in_place(hive_env, { r'.*export HADOOP_HEAPSIZE *=.*': 'export HADOOP_HEAPSIZE=%s' % config['heap'], r'.*export HIVE_AUX_JARS_PATH *=.*': 'export HIVE_AUX_JARS_PATH=/usr/share/java/mysql-connector-java.jar', }) # Now that we have db connection info, init our schema (only once) if not unitdata.kv().get('hive.schema.initialized'): utils.run_as('hive', 'schematool', '-initSchema', '-dbType', 'mysql') unitdata.kv().set('hive.schema.initialized', True)
def configure_zeppelin(self): ''' Configure zeppelin environment for all users ''' zeppelin_bin = self.dist_config.path('zeppelin') / 'bin' with utils.environment_edit_in_place('/etc/environment') as env: if zeppelin_bin not in env['PATH']: env['PATH'] = ':'.join([env['PATH'], zeppelin_bin]) env['ZEPPELIN_CONF_DIR'] = self.dist_config.path('zeppelin_conf') zeppelin_site = self.dist_config.path('zeppelin_conf') / 'zeppelin-site.xml' with utils.xmlpropmap_edit_in_place(zeppelin_site) as xml: xml['zeppelin.server.port'] = self.dist_config.port('zeppelin') xml['zeppelin.websocket.port'] = self.dist_config.port('zeppelin_web') xml['zeppelin.notebook.dir'] = self.dist_config.path('zeppelin_notebooks') hadoop_conf_dir = os.environ.get('HADOOP_CONF_DIR', '/etc/hadoop/conf') spark_home = os.environ.get('SPARK_HOME', '/usr/lib/spark') spark_exe_mode = os.environ.get('MASTER', 'yarn-client') zeppelin_env = self.dist_config.path('zeppelin_conf') / 'zeppelin-env.sh' self.re_edit_in_place(zeppelin_env, { r'.*export ZEPPELIN_HOME.*': 'export ZEPPELIN_HOME={}'.format(self.dist_config.path('zeppelin')), r'.*export ZEPPELIN_LOG_DIR.*': 'export ZEPPELIN_LOG_DIR={}'.format(self.dist_config.path('zeppelin_logs')), r'.*export ZEPPELIN_NOTEBOOK_DIR.*': 'export ZEPPELIN_NOTEBOOK_DIR={}'.format(self.dist_config.path('zeppelin_notebooks')), r'.*export SPARK_HOME.*': 'export SPARK_HOME={}'.format(spark_home), r'.*export HADOOP_CONF_DIR.*': 'export HADOOP_CONF_DIR={}'.format(hadoop_conf_dir), r'.*export PYTHONPATH.*': 'export PYTHONPATH={s}/python:{s}/python/lib/py4j-0.8.2.1-src.zip'.format( s=spark_home), r'.*export MASTER.*': 'export MASTER={}'.format(spark_exe_mode), r'.*export SPARK_YARN_USER_ENV.*': 'export SPARK_YARN_USER_ENV="PYTHONPATH=${PYTHONPATH}"', }, add_if_not_found=True) # User needs write access to zepp's conf to write interpreter.json # on server start. chown the whole conf dir, though we could probably # touch that file and chown it, leaving the rest owned as root:root. # TODO: weigh implications of have zepp's conf dir owned by non-root. cmd = "chown -R ubuntu:hadoop {}".format(self.dist_config.path('zeppelin_conf')) call(cmd.split())
def install_namenode(): hookenv.status_set('maintenance', 'installing namenode') bigtop = Bigtop() bigtop.render_site_yaml( hosts={ 'namenode': get_fqdn(), }, roles=[ 'namenode', 'mapred-app', ], ) bigtop.trigger_puppet() # /etc/hosts entries from the KV are not currently used for bigtop, # but a hosts_map attribute is required by some interfaces (eg: dfs-slave) # to signify NN's readiness. Set our NN info in the KV to fulfill this # requirement. utils.initialize_kv_host() # make our namenode listen on all interfaces hdfs_site = Path('/etc/hadoop/conf/hdfs-site.xml') with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.namenode.rpc-bind-host'] = '0.0.0.0' props['dfs.namenode.servicerpc-bind-host'] = '0.0.0.0' props['dfs.namenode.http-bind-host'] = '0.0.0.0' props['dfs.namenode.https-bind-host'] = '0.0.0.0' # We need to create the 'mapred' user/group since we are not installing # hadoop-mapreduce. This is needed so the namenode can access yarn # job history files in hdfs. Also add our ubuntu user to the hadoop # and mapred groups. get_layer_opts().add_users() set_state('apache-bigtop-namenode.installed') hookenv.status_set('maintenance', 'namenode installed')
def build_oozie_sharelib(self): core_conf = self.dist_config.path('hadoop_conf') / "core-site.xml" with utils.xmlpropmap_edit_in_place(core_conf) as e: namenodeURL = e['fs.defaultFS'] slib = '/usr/lib/oozie/' utils.run_as('oozie', 'oozie-setup.sh', 'sharelib', 'create', '-fs', namenodeURL, '-locallib', slib)
def reconfigure_hdfs(): cfg = hookenv.config() hdfs_site = get_dist_config().path('hadoop_conf') / 'hdfs-site.xml' with xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.replication'] = cfg['dfs_replication'] props['dfs.blocksize'] = int(cfg['dfs_blocksize'])
def configure_journalnode(self): dc = self.hadoop_base.dist_config hdfs_site = dc.path('hadoop_conf') / 'hdfs-site.xml' with utils.xmlpropmap_edit_in_place(hdfs_site) as props: props['dfs.journalnode.rpc-address'] = '0.0.0.0:{}'.format(dc.port('journalnode')) props['dfs.journalnode.http-address'] = '0.0.0.0:{}'.format(dc.port('jn_webapp_http'))