def metadata(): import params Directory([params.pid_dir], mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.conf_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.log_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.data_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.expanded_war_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) File(format("{expanded_war_dir}/atlas.war"), content=StaticFile(format('{metadata_home}/server/webapp/atlas.war'))) PropertiesFile(format('{conf_dir}/application.properties'), properties=params.application_properties, mode=0644, owner=params.metadata_user, group=params.user_group) File(format("{conf_dir}/atlas-env.sh"), owner=params.metadata_user, group=params.user_group, mode=0755, content=InlineTemplate(params.metadata_env_content)) File(format("{conf_dir}/atlas-log4j.xml"), mode=0644, owner=params.metadata_user, group=params.user_group, content=StaticFile('atlas-log4j.xml'))
def setup_livy(env, type, upgrade_type=None, action=None): import params Directory([params.livy2_pid_dir, params.livy2_log_dir], owner=params.livy2_user, group=params.user_group, mode=0775, create_parents=True) if type == 'server' and action == 'config': params.HdfsResource(params.livy2_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.livy2_user, mode=0775) params.HdfsResource(None, action="execute") # create livy-env.sh in etc/conf dir File( os.path.join(params.livy2_conf, 'livy-env.sh'), owner=params.livy2_user, group=params.livy2_group, content=InlineTemplate(params.livy2_env_sh), mode=0644, ) # create livy.conf in etc/conf dir PropertiesFile( format("{livy2_conf}/livy.conf"), properties=params.config['configurations']['livy2-conf'], key_value_delimiter=" ", owner=params.livy2_user, group=params.livy2_group, ) # create log4j.properties in etc/conf dir File( os.path.join(params.livy2_conf, 'log4j.properties'), owner=params.livy2_user, group=params.livy2_group, content=params.livy2_log4j_properties, mode=0644, ) # create spark-blacklist.properties in etc/conf dir File( os.path.join(params.livy2_conf, 'spark-blacklist.conf'), owner=params.livy2_user, group=params.livy2_group, content=params.livy2_spark_blacklist_properties, mode=0644, ) Directory( params.livy2_logs_dir, owner=params.livy2_user, group=params.livy2_group, mode=0755, )
def install_airflow(self, env): import params env.set_params(params) Execute('yum groupinstall -y "Development Tools"') Execute( "/opt/anaconda/bin/pip3 install 'pyqtwebengine<5.13' --force-reinstall" ) Execute( '/opt/anaconda/bin/pip3 install "pyqt5<5.13" --force-reinstall') Execute( '/opt/anaconda/bin/pip3 install apache-airflow==1.10.6 --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1-10/constraints-3.8.txt"' ) Directory(params.airflow_home_directory, create_parents=True, owner='root', group='root', mode=0o0755) Execute('export AIRFLOW_HOME={0} && /opt/anaconda/bin/airflow initdb'. format(params.airflow_home_directory)) Execute('/opt/anaconda/bin/pip3 uninstall SQLAlchemy -y') Execute('/opt/anaconda/bin/pip3 install SQLAlchemy==1.3.15') try: self.configure_airflow(env) except ExecutionFailed as ef: print("Error {0}".format(ef)) return
def install_livy(): import params Directory([params.livy2_conf], owner=params.livy2_user, group=params.user_group, mode=0775, create_parents=True) if (not os.path.exists('/opt/' + params.livy_version_dir) or not os.path.exists( params.livy_install_dir)) and params.has_livyserver: Execute('rm -rf %s' % '/opt/' + params.livy_version_dir) Execute('rm -rf %s' % params.livy_install_dir) Execute('wget ' + params.livy_download_url + ' -O /tmp/' + params.livy_filename, user=params.livy2_user) Execute('tar -zxf /tmp/' + params.livy_filename + ' -C /opt') Execute('ln -s /opt/' + params.livy_version_dir + ' ' + params.livy_install_dir) Execute(' rm -rf ' + params.livy_install_dir + '/conf') Execute('ln -s ' + params.livy2_conf + ' ' + params.livy_install_dir + '/conf') Execute( 'chown -R %s:%s /opt/%s' % (params.livy2_user, params.livy2_group, params.livy_version_dir)) Execute( 'chown -R %s:%s %s' % (params.livy2_user, params.livy2_group, params.livy_install_dir)) Execute('/bin/rm -f /tmp/' + params.filename)
def setup_hadoop_env(): import params stackversion = params.stack_version_unformatted Logger.info("FS Type: {0}".format(params.dfs_type)) if params.has_namenode or stackversion.find( 'Gluster') >= 0 or params.dfs_type == 'HCFS': if params.security_enabled: tc_owner = "root" else: tc_owner = params.hdfs_user # create /etc/hadoop #Directory(params.hadoop_dir, mode=0755) # write out hadoop-env.sh, but only if the directory exists if os.path.exists(params.hadoop_conf_dir): File(os.path.join(params.hadoop_conf_dir, 'hadoop-env.sh'), owner=tc_owner, group=params.user_group, content=InlineTemplate(params.hadoop_env_sh_template)) # Create tmp dir for java.io.tmpdir # Handle a situation when /tmp is set to noexec try: Directory(params.hadoop_java_io_tmpdir, owner=params.hdfs_user, group=params.user_group, mode=01777) except Exception as e: Logger.info('Skipping setting uid for hdfs user as host')
def install_spark(): import params Directory([params.spark_conf, params.spark_logs_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents=True) if not os.path.exists('/opt/' + params.version_dir) or not os.path.exists( params.install_dir): Execute('rm -rf %s' % '/opt/' + params.version_dir) Execute('rm -rf %s' % params.install_dir) Execute('wget ' + params.download_url + ' -O /tmp/' + params.filename, user=params.spark_user) Execute('tar -zxf /tmp/' + params.filename + ' -C /opt') Execute('ln -s /opt/' + params.version_dir + ' ' + params.install_dir) Execute(' rm -rf ' + params.install_dir + '/conf') Execute('ln -s ' + params.spark_conf + ' ' + params.install_dir + '/conf') Execute("echo 'export PATH=%s/bin:$PATH'>>/etc/profile.d/hadoop.sh" % params.install_dir) Execute('chown -R %s:%s /opt/%s' % (params.spark_user, params.user_group, params.version_dir)) Execute('chown -R %s:%s %s' % (params.spark_user, params.user_group, params.install_dir)) Execute('/bin/rm -f /tmp/' + params.filename) if params.hive_interactive_enabled and params.spark_llap_enabled: Execute('wget ' + params.spark_llap_jar_url + ' -O /' + params.install_dir + '/jars/', user=params.spark_user)
def install_ac(self, env): import params env.set_params(params) filestr = """[Unit] Description=Jupyter-Notebook service After=network.target StartLimitIntervalSec=0 [Service] Type=simple Restart=always RestartSec=1 User=root ExecStart=/opt/anaconda/bin/jupyter-lab --config {0}jupyter_notebook_config.py [Install] WantedBy=multi-user.target""".format(params.config_dir) if 'anaconda' in os.listdir("/opt"): print("already installed") else: Execute( "curl -o /tmp/anaconda.sh https://repo.anaconda.com/archive/Anaconda3-2020.07-Linux-x86_64.sh" ) Execute("bash /tmp/anaconda.sh -b -p /opt/anaconda") Execute("export PATH=$PATH:/opt/anaconda/bin/") Execute('rm -f /opt/anaconda.sh') path = os.environ['PATH'] binary_directory = '/opt/anaconda/bin' print(path) if binary_directory not in path: print("Not in path") new_path = 'PATH=$PATH:' + binary_directory + "\n" with open('/root/.bash_profile') as f: text = f.readlines() if new_path not in text: text.insert(-2, new_path) text = "".join(text) with open('/root/.bash_profile', 'w') as f: f.write(text) print(text) else: print('already in path') Execute('source /root/.bash_profile') if 'jupyter' in os.listdir("/opt"): print("directory exists") else: Directory(params.config_dir, create_parents=True) Execute( 'echo "{0}" > /etc/systemd/system/jupyter.service'.format(filestr)) Execute('sudo systemctl daemon-reload') self.configure_ac(env)
def metadata(): import params Directory([params.pid_dir], mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) Directory(params.conf_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) Directory(params.log_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) Directory(params.data_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) Directory(params.expanded_war_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, recursive=True ) metadata_war_file = format('{params.metadata_home}/server/webapp/metadata.war') if not os.path.isfile(metadata_war_file): raise Fail("Unable to copy {0} because it does not exist".format(metadata_war_file)) Logger.info("Copying {0} to {1}".format(metadata_war_file, params.expanded_war_dir)) shutil.copy2(metadata_war_file, params.expanded_war_dir) File(format('{conf_dir}/application.properties'), content=InlineTemplate(params.application_properties_content), mode=0644, owner=params.metadata_user, group=params.user_group ) File(format("{conf_dir}/metadata-env.sh"), owner=params.metadata_user, group=params.user_group, mode=0755, content=InlineTemplate(params.metadata_env_content) ) File(format("{conf_dir}/log4j.xml"), mode=0644, owner=params.metadata_user, group=params.user_group, content=StaticFile('log4j.xml') )
def setup_livy(env, type, upgrade_type=None, action=None): import params Directory([params.livy2_pid_dir, params.livy2_log_dir], owner=params.livy2_user, group=params.user_group, mode=0775, cd_access='a', create_parents=True) if type == 'server' and action == 'config': params.HdfsResource(params.livy2_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.livy2_user, mode=0775) params.HdfsResource(None, action="execute") if params.livy2_recovery_store == 'filesystem': params.HdfsResource(params.livy2_recovery_dir, type="directory", action="create_on_execute", owner=params.livy2_user, mode=0700) params.HdfsResource(None, action="execute") generate_logfeeder_input_config( 'spark2', Template("input.config-spark2.json.j2", extra_imports=[default])) # create livy-env.sh in etc/conf dir File( os.path.join(params.livy2_conf, 'livy-env.sh'), owner=params.livy2_user, group=params.livy2_group, content=InlineTemplate(params.livy2_env_sh), mode=0644, ) # create livy-client.conf in etc/conf dir PropertiesFile( format("{livy2_conf}/livy-client.conf"), properties=params.config['configurations']['livy2-client-conf'], key_value_delimiter=" ", owner=params.livy2_user, group=params.livy2_group, ) # create livy.conf in etc/conf dir PropertiesFile( format("{livy2_conf}/livy.conf"), properties=params.config['configurations']['livy2-conf'], key_value_delimiter=" ", owner=params.livy2_user, group=params.livy2_group, ) # create log4j.properties in etc/conf dir File( os.path.join(params.livy2_conf, 'log4j.properties'), owner=params.livy2_user, group=params.livy2_group, content=params.livy2_log4j_properties, mode=0644, ) # create spark-blacklist.properties in etc/conf dir File( os.path.join(params.livy2_conf, 'spark-blacklist.conf'), owner=params.livy2_user, group=params.livy2_group, content=params.livy2_spark_blacklist_properties, mode=0644, ) Directory( params.livy2_logs_dir, owner=params.livy2_user, group=params.livy2_group, mode=0755, )
def metadata(type='server'): import params # Needed by both Server and Client Directory(params.conf_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) if type == "server": Directory([params.pid_dir], mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(format('{conf_dir}/solr'), mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.log_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.data_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) Directory(params.expanded_war_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents=True) File(format("{expanded_war_dir}/atlas.war"), content=StaticFile( format('{metadata_home}/server/webapp/atlas.war'))) File(format("{conf_dir}/atlas-log4j.xml"), mode=0644, owner=params.metadata_user, group=params.user_group, content=InlineTemplate(params.metadata_log4j_content)) File(format("{conf_dir}/atlas-env.sh"), owner=params.metadata_user, group=params.user_group, mode=0755, content=InlineTemplate(params.metadata_env_content)) File(format("{conf_dir}/solr/solrconfig.xml"), mode=0644, owner=params.metadata_user, group=params.user_group, content=InlineTemplate(params.metadata_solrconfig_content)) # Needed by both Server and Client PropertiesFile(format('{conf_dir}/{conf_file}'), properties=params.application_properties, mode=0644, owner=params.metadata_user, group=params.user_group) if type == 'server' and params.search_backend_solr and params.has_logsearch_solr: solr_cloud_util.setup_solr_client(params.config) random_num = random.random() upload_conf_set('basic_configs', random_num) create_collection('vertex_index', 'basic_configs') create_collection('edge_index', 'basic_configs') create_collection('fulltext_index', 'basic_configs') if params.security_enabled: TemplateConfig(format(params.atlas_jaas_file), owner=params.metadata_user)
def setup_java(): """ Installs jdk using specific params, that comes from ambari-server """ import params java_exec = format("{java_home}/bin/java") if not os.path.isfile(java_exec): jdk_curl_target = format("{tmp_dir}/{jdk_name}") java_dir = os.path.dirname(params.java_home) Directory( params.artifact_dir, create_parents=True, ) File(jdk_curl_target, content=DownloadSource(format("{jdk_location}/{jdk_name}")), not_if=format("test -f {jdk_curl_target}")) File( jdk_curl_target, mode=0755, ) tmp_java_dir = tempfile.mkdtemp(prefix="jdk_tmp_", dir=params.tmp_dir) try: if params.jdk_name.endswith(".bin"): chmod_cmd = ("chmod", "+x", jdk_curl_target) install_cmd = format( "cd {tmp_java_dir} && echo A | {jdk_curl_target} -noregister && {sudo} cp -rp {tmp_java_dir}/* {java_dir}" ) elif params.jdk_name.endswith(".gz"): chmod_cmd = ("chmod", "a+x", java_dir) install_cmd = format( "cd {tmp_java_dir} && tar -xf {jdk_curl_target} && {sudo} /bin/cp -rp {tmp_java_dir}/* {java_dir}" ) Directory(java_dir) Execute( chmod_cmd, sudo=True, ) Execute(install_cmd, ) java_version_dir = os.listdir(tmp_java_dir)[0] Execute("ln -s %s %s" % (java_dir + '/' + java_version_dir, params.java_home)) finally: Directory(tmp_java_dir, action="delete") File( format("{java_home}/bin/java"), mode=0755, cd_access="a", ) Execute( ('chmod', '-R', '755', params.java_home), sudo=True, )
def setup_users(): """ Creates users before cluster installation """ import params should_create_users_and_groups = False if params.host_sys_prepped: should_create_users_and_groups = not params.sysprep_skip_create_users_and_groups else: should_create_users_and_groups = not params.ignore_groupsusers_create if should_create_users_and_groups: for group in params.group_list: Group(group, ) for user in params.user_list: status, result = commands.getstatusoutput('id %s' % user) try: User(user, gid=params.user_to_gid_dict[user], groups=params.user_to_groups_dict[user], fetch_nonlocal_groups=params.fetch_nonlocal_groups) except Exception as e: Logger.info(str(e)) if params.override_uid == "true": set_uid(params.smoke_user, params.smoke_user_dirs) else: Logger.info( 'Skipping setting uid for smoke user as host is sys prepped') else: Logger.info( 'Skipping creation of User and Group as host is sys prepped or ignore_groupsusers_create flag is on' ) pass for user in params.user_list: if not os.path.exists('/home/' + user): try: Execute( 'mkdir -p /home/%s && chown -R %s:hadoop /home/%s && \cp /etc/skel/.bash* /home/%s/' % (user, user, user, user)) except Exception as e: Logger.info('change /home fail') if params.has_hbase_masters: try: Directory( params.hbase_tmp_dir, owner=params.hbase_user, mode=0775, create_parents=True, cd_access="a", ) if params.override_uid == "true": set_uid(params.hbase_user, params.hbase_user_dirs) else: Logger.info( 'Skipping setting uid for hbase user as host is sys prepped' ) except Exception as e: Logger.info('Skipping setting uid for hbase user as host') if should_create_users_and_groups: if params.has_namenode: create_dfs_cluster_admins() else: Logger.info('Skipping setting dfs cluster admin ')
def metadata(): import params Directory([params.pid_dir], mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents = True ) Directory(params.conf_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents = True ) Directory(params.log_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents = True ) Directory(params.atlas_hbase_log_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents = True ) Directory(params.atlas_hbase_data_dir, mode=0755, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents = True ) Directory(params.data_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents = True ) Directory(params.expanded_war_dir, mode=0644, cd_access='a', owner=params.metadata_user, group=params.user_group, create_parents = True ) File(format("{expanded_war_dir}/atlas.war"), content = StaticFile(format('{metadata_home}/server/webapp/atlas.war')) ) PropertiesFile(format('{conf_dir}/{conf_file}'), properties = params.application_properties, mode=0644, owner=params.metadata_user, group=params.user_group ) File(format("{conf_dir}/atlas-env.sh"), owner=params.metadata_user, group=params.user_group, mode=0755, content=InlineTemplate(params.metadata_env_content) ) File(format("{conf_dir}/atlas-log4j.xml"), mode=0644, owner=params.metadata_user, group=params.user_group, content=InlineTemplate(params.metadata_log4j_content) ) # hbase-site for embedded hbase used by Atlas XmlConfig( "hbase-site.xml", conf_dir = params.atlas_hbase_conf_dir, configurations = params.config['configurations']['atlas-hbase-site'], configuration_attributes=params.config['configuration_attributes']['atlas-hbase-site'], owner = params.metadata_user, group = params.user_group ) if params.security_enabled: TemplateConfig(format(params.atlas_jaas_file), owner=params.metadata_user)
def setup_spark(env, type, upgrade_type=None, action=None, config_dir=None): """ :param env: Python environment :param type: Spark component type :param upgrade_type: If in a stack upgrade, either UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING :param action: Action to perform, such as generate configs :param config_dir: Optional config directory to write configs to. """ import params if config_dir is None: config_dir = params.spark_conf Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents=True) if type == 'server' and action == 'config': params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775) params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775) params.HdfsResource(None, action="execute") PropertiesFile( os.path.join(config_dir, "spark-defaults.conf"), properties=params.config['configurations']['spark-defaults'], key_value_delimiter=" ", owner=params.spark_user, group=params.spark_group, mode=0644) # create spark-env.sh in etc/conf dir File( os.path.join(config_dir, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) # create log4j.properties in etc/conf dir File( os.path.join(config_dir, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) # create metrics.properties in etc/conf dir File(os.path.join(config_dir, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties), mode=0644) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=config_dir, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: PropertiesFile(params.spark_thrift_server_conf_file, properties=params.config['configurations'] ['spark-thrift-sparkconf'], owner=params.hive_user, group=params.user_group, key_value_delimiter=" ", mode=0644) File(os.path.join(params.spark_conf, 'java-opts'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_javaopts_properties), mode=0644) # File(os.path.join(params.spark_conf, 'java-opts'), action="delete") if params.spark_thrift_fairscheduler_content: # create spark-thrift-fairscheduler.xml File(os.path.join(config_dir, "spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content))