示例#1
0
 def _send_cpplint_result_to_comments(self, issue_id):
     # get trunk url of current dir
     cmd = ["svn", "info"]
     svn_info, returncode = common.run_shell(cmd)
     if returncode:
         error_exit("failed to run '%s'" % " ".join(cmd))
     current_trunk_dir = svn_info.split('\n')[2].split()[1]
     comment = []
     for msg in self.cpplint_err_msgs:
         err_info = msg.split(":")
         if len(err_info) < 3:
             continue
         temp_dict = {}
         temp_dict["file"] = os.path.join(current_trunk_dir, err_info[0])
         temp_dict["line"] = err_info[1]
         temp_dict["type"] = "3"
         temp_dict["comment"] = "CPPLINT:" + err_info[2]
         comment.append(temp_dict)
     summary = ('cpplint check result: %d new errors found' %
                len(self.cpplint_err_msgs))
     json_content = {"comments": comment, "summary": summary}
     (fd, comment_file) = tempfile.mkstemp(suffix=".json")
     with open(comment_file, 'w') as fd:
         json.dump(json_content, fd)
     # send comment
     cmd = ["%s/app/qzap/common/tool/tcr.py" % self.current_source_dir]
     cmd.extend(["-i", issue_id, "--comment", comment_file])
     output, ret = common.run_shell(cmd)
     if ret:
         common.warning("Failed to send comments! ret=%d" % ret)
     os.remove(comment_file)
示例#2
0
def update_slurm_conf(update_cmpnode_res=False, nodes_change=False):
    # backup last slurm configuration
    if os_path.exists(SLURM_CONF):
        backup(BACKUP_SLURM_CONF_CMD)

    # replace slurm.conf.template
    tmp_file = "{}/slurm.conf.tmp".format(APP_HOME)
    run_shell("cp -f {} {}".format(SLURM_CONF, tmp_file))

    # update NodeName
    if update_cmpnode_res:
        update_cmpnodes_res_conf(tmp_file)

    # get current queues info
    queues_info = get_queues_details()

    # update queues_info according to nodes changing
    if nodes_change:
        update_queues_info_with_nodes_change(queues_info)

    # update queues_info to slurm conf Partition
    update_queues_conf_file(tmp_file, queues_info)

    run_shell("mv {} {}".format(tmp_file, SLURM_CONF))
    pass
示例#3
0
def generate_hosts():
    backup(BACKUP_HOSTS_CMD)

    ori_hosts = []
    with open(HOSTS, "r") as old:
        line = old.readline()
        while line:
            ori_hosts.append(line)
            if "metadata" in line:
                break
            line = old.readline()

    tmp_hosts = "{}/hosts".format(APP_HOME)
    with open(tmp_hosts, "w") as hosts:
        for line in ori_hosts:
            hosts.write(line)
        with open(HOSTS_INFO_FILE, "r") as ehpc_hosts:
            hosts_lines = ehpc_hosts.readlines()
        for i in range(len(hosts_lines)):
            # replace node1 to node001
            if hosts_lines[i].find(COMPUTE_HOSTNAME_PREFIX) != -1:
                node = hosts_lines[i].split()[1]
                sid = int(node.split(COMPUTE_HOSTNAME_PREFIX)[1])
                new_node = "%s%03d" % (COMPUTE_HOSTNAME_PREFIX, sid)
                hosts_lines[i] = hosts_lines[i].replace(node, new_node)

        hosts.writelines(hosts_lines)

    run_shell("mv {} {}".format(tmp_hosts, HOSTS))
示例#4
0
文件: appctl.py 项目: ljlu1504/EHPC
def stop():
    role = get_role()
    if role in ROLE_SERVICES:
        for service in ROLE_SERVICES[role]:
            run_shell("systemctl stop {}".format(service))
    else:
        logger.error("Un-support role[%s].", role)
        return 1
    return 0
示例#5
0
def modify_user_password(username, old_password, password):
    salt = ''.join(
        map(
            lambda x:
            './0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'[
                ord(x) % 64], os.urandom(16)))
    pwd = crypt.crypt(password, "$6$%s" % salt)
    pwd = pwd.replace("$", "\$")
    run_shell("usermod -p %s %s" % (pwd, username))
示例#6
0
文件: appctl.py 项目: ljlu1504/EHPC
def restart():
    role = get_role()
    if role in ROLE_SERVICES:
        for service in ROLE_SERVICES[role]:
            run_shell("systemctl restart {}".format(service))
    else:
        logger.error("Un-support role[%s].", role)
        return 1
    logger.info("%s re-started.", role)
    return 0
示例#7
0
def generate_conf():
    # get cluster_name
    cluster_info = get_cluster_info()
    cls_name = cluster_info["cluster_name"]

    ctl_resource = ""  # the first line in resource.info
    cmp_resource = ""  # the last line in resource.info
    with open(RESOURCE_INFO_FILE, "r") as info:
        lines = info.readlines()
        for line in lines:
            if line:
                if not ctl_resource:
                    ctl_resource = line
                else:
                    cmp_resource = line

    # generate default NodeName, eg: node[1-6,8,12-15]
    sids = [int(s) for s in cluster_info["sids"].split(",") if s]
    sids.sort()

    node_name = "{}[".format(COMPUTE_HOSTNAME_PREFIX)  # eg: [1-6,8]
    start_sid = sids[0]  # eg: 1
    last_sid = start_sid
    for sid in sids:
        if sid - last_sid > 1:
            if last_sid == start_sid:
                node_name = "{}{},".format(node_name, start_sid)  # eg: [1-6,8,
            else:
                node_name = "{}{}-{},".format(node_name, start_sid,
                                              last_sid)  # eg: [1-6,8,12-15]
            start_sid = sid
        last_sid = sid

    if last_sid == start_sid:  # the end of sids
        node_name = "{}{}]".format(node_name, start_sid)
    else:
        node_name = "{}{}-{}]".format(node_name, start_sid, last_sid)

    # backup last slurm configuration
    if os_path.exists(SLURM_CONF):
        backup(BACKUP_SLURM_CONF_CMD)

    # replace slurm.conf.template
    tmp_file = "{}/slurm.conf.tmp".format(APP_HOME)
    with open(tmp_file, "w") as conf:
        with open(SLURM_CONF_TMPL, "r") as tmpl:
            for line in tmpl.readlines():
                if line:
                    line = line.format(CLUSTER_NAME=cls_name,
                                       CONTROLLER_RESOURCE=ctl_resource,
                                       COMPUTE_RESOURCE=cmp_resource,
                                       DEFAULT_NODE_NAME=node_name)
                conf.write(line)

    run_shell("mv {} {}".format(tmp_file, SLURM_CONF))
示例#8
0
def update_queues_conf(queues_info):
    # backup last slurm configuration
    if os_path.exists(SLURM_CONF):
        backup(BACKUP_SLURM_CONF_CMD)

    # replace slurm.conf.template
    tmp_file = "{}/slurm.conf.tmp".format(APP_HOME)
    run_shell("cp -f {} {}".format(SLURM_CONF, tmp_file))

    update_queues_conf_file(tmp_file, queues_info)
    run_shell("mv {} {}".format(tmp_file, SLURM_CONF))
    return 0
示例#9
0
文件: appctl.py 项目: ljlu1504/EHPC
def metadata_reload():
    logger.info("generate hosts for reloading..")
    generate_hosts()

    role = get_role()
    if role == ROLE_CONTROLLER:
        logger.info("update slurm conf for reloading metadata..")
        update_slurm_conf()

        # TODO: 多controller节点时,只在一个master节点执行此命令即可
        logger.info("re-config slurm configuration for cluster..")
        run_shell("scontrol reconfigure")
    return 0
def gcloud_upload(app_platform, app_path, gcloud_storage_keyword,
                  success_keyword):
    cmds = [
        'gcloud',
        'beta',
        'firebase',
        'test',
        app_platform,
        'run',
        '--type game-loop',
        '--app %s' % app_path,
        '--scenario-numbers 1',
        '--format="json"',
    ]
    res = common.run_shell(cmds)
    gcloud_storage_url = ''
    for line in res.stderr.readlines():
        utf8 = line.decode('UTF-8').strip()
        if len(utf8) > 0:
            print('stderr:%s' % utf8)
        if gcloud_storage_keyword in utf8:
            url = re.findall(r'\[(.*?)\]', utf8)
            gcloud_storage_url = url[0][len(gcloud_storage_keyword):]
    total = 0
    succeeded = 0
    output = res.stdout.read().decode('UTF-8')
    print('upload package output:%s' % output)
    for result in json.loads(output):
        total += 1
        if check_firebase_log(app_platform, gcloud_storage_url,
                              result['axis_value'], success_keyword):
            succeeded += 1
    return succeeded, total
示例#11
0
def list_slice(table):
    "ret list: [Slice]"
    slice_and_state_count = [[], {},
                             {}]  # [[slice_list],state->num,state->slice_list]
    global g_slice_states
    for state in g_slice_states:
        slice_and_state_count[1][state] = 0
        slice_and_state_count[2][state] = []

    try:
        shell_out = common.run_shell(
            'echo "list slice on table %s;" | ./Cli -s' % table)
        output_lines = shell_out.split('\n')
        n = len(output_lines)
        if len(output_lines) > 7:
            status = output_lines[7].split('|')[3].strip()
            if status != 'ok':
                pass  # TODO
            else:
                for line_idx in range(13, n, 2):
                    stmp = output_lines[line_idx]
                    if stmp != '':
                        slice = BailingSlice.Slice(stmp)
                        slice_and_state_count[1][slice.state] += 1
                        slice_and_state_count[2][slice.state].append(slice)
                        slice_and_state_count[0].append(slice)
        return slice_and_state_count
    except:
        common.print_exc_plus()
示例#12
0
def _uninstall(software, software_home, uninstaller=None):
    logger.info("Do uninstall software[%s]..", software)
    software_dir = "{}/{}".format(software_home, software)
    if uninstaller:
        uninstaller = "bash {}".format(uninstaller)
    else:
        f = "bash {}/uninstall.sh".format(software_dir)
        uninstaller = f if os.path.exists(f) else "rm -rf {}".format(software_dir)

    try:
        # uninstall
        run_shell("export SOFTWARE_HOME={} && {}".format(software_home, uninstaller))
    except Exception as e:
        logger.error("Failed to run install cmd: %s", e.message)
        logger.error("Error: %s", traceback.format_exc())
        return 1
    return 0
示例#13
0
def generate_hosts():
    backup(BACKUP_HOSTS_CMD)

    ori_hosts = []
    with open(HOSTS, "r") as old:
        line = old.readline()
        while line:
            ori_hosts.append(line)
            if "metadata" in line:
                break
            line = old.readline()

    tmp_hosts = "{}/hosts".format(APP_HOME)
    with open(tmp_hosts, "w") as hosts:
        for line in ori_hosts:
            hosts.write(line)
        with open(HOSTS_INFO_FILE, "r") as ehpc_hosts:
            hosts.writelines(ehpc_hosts.readlines())

    run_shell("mv {} {}".format(tmp_hosts, HOSTS))
示例#14
0
 def check_dependency_between_issues(self):
     if not self.issue_dict.has_key(self.issue) or len(self.issue_dict) < 2:
         return
     build_path = set()
     current_dir = get_cwd()
     file_list = []
     if isinstance(self.issue_dict[self.issue], set):
         file_list = self.issue_dict[self.issue]
     else:
         file_list = self.issue_dict[self.issue]["filelist"]
     for f in file_list:
         ext = os.path.splitext(f)[1]
         #BUILD has no .h dependency specification, so not check .h
         if ext in (".c", ".cpp", ".hpp", ".C", ".cxx", ".cc"):
             find_path = self._find_build_path_contain_filename_bottom_up(f)
             if find_path:
                 build_path.add(os.path.relpath(find_path, current_dir))
     if not build_path:
         # some issues have no associated BUILD file(eg. modify script files)
         return
     blade = find_file_bottom_up('blade')
     cmd = "%s query --deps %s" % (blade, " ".join(build_path))
     (output, ret) = run_shell(cmd.split())
     if ret:
         warning('failed to run %s' % cmd)
         return
     submit_issue_deps = set(output.splitlines())
     issues_to_pop = []
     for issue in self.issue_dict.keys():
         if issue == self.issue:
             continue
         issue_info_url = "%s%s" % (DEFAULT_REVIEW_INFO_URL, issue)
         issue_info = json.load(urllib.urlopen(issue_info_url))
         if not issue_info['successfully']:
             warning('failed to get issue_info for issue %s' % issue)
             continue
         issue_info_detail = issue_info["requestsWithPagerInfo"][
             "requests"][0]
         issue_state = issue_info_detail['state']
         if (issue_state == _ISSUE_STATE.CLOSED
                 or issue_state == _ISSUE_STATE.SUBMITED):
             # issue already closed or submited
             issues_to_pop.append(issue)
             continue
         if self._has_dependency_relation(submit_issue_deps, issue):
             warning('the submit issue may depends on the issue %s with'
                     ' title \"%s\"' % (issue, issue_info_detail['name']))
             answer = raw_input('Continue?(y/N) ').strip()
             if answer != 'y':
                 error_exit('Exit')
     if issues_to_pop:
         map(self.issue_dict.pop, issues_to_pop)
示例#15
0
def install(software_list, ignore_exist=False):
    logger.info("install software[%s]..", software_list)
    software_home = SOFTWARE_HOME_FMT.format(get_admin_user())
    if software_list:
        run_shell("mkdir -p {}".format(software_home))

    exist_info = {}
    for s in software_list:
        # software exist
        if os.path.exists("{}/{}".format(software_home, s["name"])):
            exist_info[s["name"]] = True
            logger.error("The software[%s] already exist!", s["name"])
            if not ignore_exist:
                return 55

    for s in software_list:
        if not exist_info.get(s["name"], False):
            ret = _install(s["name"], s["source"], software_home,
                           s.get("installer"))
            if ret is not 0:
                return ret
    return 0
示例#16
0
def show_slice(table, slice_no):
    "ret_list: [Slice, unit_list, snapshot_list]"
    ret_list = []
    try:
        shell_out = common.run_shell(
            'echo "show slice %d on table %s;" | ./Cli -s' %
            (int(slice_no), table))
        output_lines = shell_out.split('\n')
        n = len(output_lines)
        if len(output_lines) > 7:
            status = output_lines[7].split('|')[3].strip()
            if status != 'ok':
                pass  # TODO
            else:
                sli = BailingSlice.Slice(output_lines[13])
                ret_list.append(sli)

                #                rep_base = snapshot_base = 0
                for idx, line in enumerate(output_lines):
                    line = line.strip()
                    if line == 'Replication List:':
                        rep_base = idx
                    elif line == 'Snapshot List:' or line == 'Snapshot list:':
                        snapshot_base = idx
                rep_list = []
                for line_idx in range(rep_base + 4, snapshot_base, 2):
                    stmp = output_lines[line_idx]
                    if stmp != '':
                        rep_list.append(stmp.split('|')[1].strip())

                unit_list = []
                for line_idx in range(19, rep_base, 2):
                    stmp = output_lines[line_idx]
                    if stmp != '':
                        unit = BailingSlice.Unit(stmp)
                        try:
                            unit.rep_order = rep_list.index(unit.repl_addr)
                        except:
                            unit.rep_order = 999
                        unit_list.append(unit)
                ret_list.append(unit_list)

                snapshot_list = []
                for line_idx in range(snapshot_base + 4, len(output_lines), 2):
                    stmp = output_lines[line_idx]
                    if stmp != '':
                        snapshot_list.append(BailingSlice.Snapshot(stmp))
                ret_list.append(snapshot_list)
        return ret_list
    except:
        common.print_exc_plus()
示例#17
0
def generate_slurm_conf():
    # get cluster_name
    cluster_info = get_cluster_info()
    cls_name = cluster_info["cluster_name"]

    ctl_resource = ""  # the first line in resource.info
    cmp_resource = ""  # the last line in resource.info
    with open(RESOURCE_INFO_FILE, "r") as info:
        lines = info.readlines()
        for line in lines:
            if line:
                if not ctl_resource:
                    ctl_resource = line
                else:
                    cmp_resource = line

    node_list = get_slurm_cmpnode_list(cluster_info)
    ctl_machine = get_ctl_machine()

    # backup last slurm configuration
    if os_path.exists(SLURM_CONF):
        backup(BACKUP_SLURM_CONF_CMD)

    # replace slurm.conf.template
    tmp_file = "{}/slurm.conf.tmp".format(APP_HOME)
    with open(tmp_file, "w") as conf:
        with open(SLURM_CONF_TMPL, "r") as tmpl:
            for line in tmpl.readlines():
                if line:
                    line = line.format(CLUSTER_NAME=cls_name,
                                       CONTROL_MACHINE=ctl_machine,
                                       CONTROLLER_RESOURCE=ctl_resource,
                                       COMPUTE_RESOURCE=cmp_resource,
                                       DEFAULT_NODE_NAME=node_list)
                conf.write(line)

    run_shell("mv {} {}".format(tmp_file, SLURM_CONF))
示例#18
0
def list_server():
    machine_list = []
    try:
        shell_out = common.run_shell('echo "list server;" | ./Cli -s')
        output_lines = shell_out.split('\n')
        n = len(output_lines)
        if len(output_lines) > 7:
            status = output_lines[7].split('|')[3].strip()
            if status != 'ok':
                pass  # TODO
            else:
                for line_idx in range(13, n, 2):
                    if output_lines[line_idx] != '':
                        machine_list.append(
                            MachineManager.Machine(output_lines[line_idx]))
        return machine_list
    except:
        common.print_exc_plus()
示例#19
0
def list_table():
    "ret list: [Repository]"
    table_list = []
    try:
        shell_out = common.run_shell('echo "list table;" | ./Cli -s')
        output_lines = shell_out.split('\n')
        n = len(output_lines)
        if len(output_lines) > 7:
            status = output_lines[7].split('|')[3].strip()
            if status != 'ok':
                pass  # TODO
            else:
                for line_idx in range(13, n, 2):
                    stmp = output_lines[line_idx]
                    if stmp != '':
                        table_list.append(BailingRepository.Repository(stmp))
        return table_list
    except:
        common.print_exc_plus()
示例#20
0
def show_snapshot(table, ss):
    ret_list = []
    try:
        shell_out = common.run_shell(
            'echo "show snapshot %s on table %s;" | ./Cli -s' % (ss, table))
        output_lines = shell_out.split('\n')
        n = len(output_lines)
        if len(output_lines) > 7:
            status = output_lines[7].split('|')[3].strip()
            if status != 'ok':
                pass  # TODO
            else:
                for line_idx in range(13, len(output_lines), 2):
                    stmp = output_lines[line_idx]
                    if stmp != '':
                        ret_list.append(stmp.split('|')[1:-1])
        return ret_list
    except:
        common.print_exc_plus()
示例#21
0
def _install(software, source, software_home, installer=None):
    """
    :param source: full url to download software, eg: root@xxx/aa/bb/cc.tar.gz
    :param software_home: the home that software would to be installed
    :param software: software name (install dir: software_home/software)
    :param installer: install script
    :return:
    """
    logger.info("Do install software[%s] from source[%s]..", software, source)

    package = source.split("/")[-1]
    workdir = SOFTWARE_WORKDIR_FMT.format(get_admin_user(), software)
    package_path = "{}/{}".format(workdir, package)

    try:
        # download
        _download(source, workdir)

        # un-tar
        run_shell("tar -zxf {}".format(package_path), cwd=workdir, timeout=180)

        # install
        ret = os.listdir(workdir)
        un_tar_dir = ""
        for r in ret:
            if os.path.isdir("{}/{}".format(workdir, r)):
                un_tar_dir = "{}/{}".format(workdir, r)
                break

        if installer:
            installer = "bash {}/{}".format(un_tar_dir, installer)
        else:
            f = "bash {}/install.sh".format(un_tar_dir)
            installer = f if os.path.exists(f) else \
                "mv {} {}/".format(un_tar_dir, software_home)

        run_shell("export SOFTWARE_HOME={} && {}".format(
            software_home, installer),
                  timeout=120)

        # clean
        if os.path.exists(workdir):
            run_shell("rm -rf {}".format(workdir))
    except Exception:
        logger.error("Failed to install software[%s]: \n%s", software,
                     traceback.format_exc())
        return 1
    return 0
示例#22
0
def add_user(username, password, uid, gid, nas_mount_path=None):
    run_shell("groupadd -g %s %s" % (gid, username))
    salt = ''.join(
        map(
            lambda x:
            './0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'[
                ord(x) % 64], os.urandom(16)))
    pwd = crypt.crypt(password, "$6$%s" % salt)
    pwd = pwd.replace("$", "\$")

    run_shell("useradd -u %s -g %s -p %s %s" % (uid, gid, pwd, username))
    # run_shell("chmod 755 /etc/sudoers")
    # run_shell("cat >> /etc/sudoers <<EOF\n%s   ALL=(ALL)      NOPASSWD: ALL\nEOF" %
    #           username)
    # run_shell("chmod 755 /etc/sudoers")
    run_shell("usermod -aG wheel %s" % username)
示例#23
0
def generate_hpcmodulefiles():
    # prepare path for ehpc cluster modulefiles
    nas_mount_point = get_nas_mount_point()
    run_shell("mkdir -p {}/opt/modulefiles/".format(nas_mount_point))
    dft_modulefiles_path = "{}/opt/modulefiles/default".format(nas_mount_point)
    run_shell("cp {} {}".format(HPC_DEFAULT_MODULE_FILE, dft_modulefiles_path))

    # replace slurm.conf.template
    tmp_file = "{}/hpcmodulefiles.tmp".format(APP_HOME)
    with open(tmp_file, "w") as conf:
        with open(HPC_MODULE_FILES_TMPL, "r") as tmpl:
            for line in tmpl.readlines():
                if line and line.find("NAS_MOUNT_POINT") != -1:
                    line = line.format(NAS_MOUNT_POINT=nas_mount_point)
                conf.write(line)

    run_shell("mv {} {}".format(tmp_file, HPC_MODULE_FILES))
示例#24
0
 def expand_depended_targets(self, build_targets):
     if not self.options.build_dependeds:
         return build_targets
     blade = find_file_bottom_up('blade')
     dot_file = 'depended_targets'
     cmd = "%s query --depended --output-to-dot %s %s" % (blade, dot_file,
                                                          build_targets)
     (output, ret) = run_shell(cmd.split())
     all_targets = set()
     if not ret:
         with open(dot_file) as f:
             next(f)  # skip the starting line "digraph blade..."
             for line in f:
                 if line.find('[label =') == -1:
                     break
                 # line: "path:target" [label = "path:target"]
                 all_targets.add(line.split()[0][1:-1])  # skip double quote
         if all_targets:
             build_targets = ''
             for target in sorted(list(all_targets)):
                 build_targets += " %s/%s" % (self.blade_root_dir, target)
         os.remove(dot_file)
     return build_targets
示例#25
0
文件: appctl.py 项目: ljlu1504/EHPC
def check_service_status(service):
    retcode = run_shell("systemctl is-active {}".format(service), without_log=True)
    if retcode != 0:
        logger.error("the service[%s] is not health[code: %s].", service, retcode)
    return retcode
示例#26
0
def _download(source, workdir):
    if source.startswith("http"):
        run_shell("wget -P {} -t 5 -w 60 -c {}".format(workdir, source), timeout=600)
    else:
        run_shell("rsync -q -aP {} {}/".format(source, workdir), timeout=600)
示例#27
0
            json_message['attachments'][0]['actions'].append(deployment_button)
    headers = {'Content-Type': 'application/json', 'Accept': 'text/plain'}
    res = requests.post(slack_webhook_url,
                        data=json.dumps(json_message),
                        headers=headers)
    return res.text == 'ok'


if __name__ == '__main__':
    common.log('generate-slack-webhook-url')
    cmds = [
        'imp-ci', 'secrets', 'read', '--environment=production',
        '--buildkite-org=improbable', '--secret-type=slack-webhook',
        '--secret-name=unreal-gdk-slack-web-hook'
    ]
    res = common.run_shell(cmds)
    for line in res.stderr.readlines():
        utf8 = line.decode('UTF-8').strip()
        if len(utf8) > 0:
            print('%s' % utf8)
    output = res.stdout.read().decode('UTF-8')
    slack_webhook_url = json.loads(output)['url']

    common.log('slack-notify')
    slack_channel = common.get_environment_variable('SLACK_CHANNEL',
                                                    '#unreal-gdk-builds')
    result = slack_notify(slack_channel, slack_webhook_url)

    exit_value = 0 if result == True else 1
    exit(exit_value)
示例#28
0
文件: appctl.py 项目: ljlu1504/EHPC
def start():
    role = get_role()
    nas_mount_point = get_nas_mount_point()
    cluster_name = get_cluster_name()
    # mkdir /nas_mount_point/opt/slurm/state_save_loc for StateSaveLocation
    run_shell("mkdir -p {}/opt/slurm/state_save_loc/{}/".format(nas_mount_point,
                                                                cluster_name))
    run_shell("ln -sf {}/opt/slurm/ /opt/slurm".format(nas_mount_point))

    # start before
    if role == ROLE_CONTROLLER:
        logger.info("Generating slurm configurations...")
        generate_slurm_conf()
    else:
        for f in clear_files[role]:
            if path.exists(f):
                run_shell("rm {}".format(f))

    # start service
    if role in ROLE_SERVICES:
        for service in ROLE_SERVICES[role]:
            logger.info("Start service {}".format(service))
            run_shell("systemctl start {}".format(service))
    else:
        logger.error("Un-support role[%s].", role)
        return 1

    # start post
    cluster_info = get_cluster_info()
    nas_mount_point = get_nas_mount_point()
    if role == ROLE_CONTROLLER and \
            int(cluster_info["sid"]) == MASTER_CONTROLLER_SID:
        logger.info("create admin dirs..")
        run_shell("mkdir -p {}/opt".format(nas_mount_point))
        run_shell("mkdir -p {}/home/".format(nas_mount_point))
        run_shell("mkdir -p {}/data/".format(nas_mount_point))

        # create admin user
        add_admin_user()

        # install software
        return init_software()
    logger.info("%s started.", role)
    return 0
示例#29
0
def set_hostname():
    hostname = get_hostname()
    run_shell("hostnamectl set-hostname {}".format(hostname))