def run(): dbcfgs = json.loads(dbcfgs_json) nodes = dbcfgs['node_list'].split(',') scratch_locs = dbcfgs['scratch_locs'].split(',') # this script is running by trafodion user, so get sqroot from env traf_home = os.environ['TRAF_HOME'] if traf_home == '': err('TRAF_HOME var is empty') sqconfig_file = traf_home + '/sql/scripts/sqconfig' sqconfig_persist_file = traf_home + '/sql/scripts/sqconfig.persist' core, processor = run_cmd("lscpu|grep -E '(^CPU\(s\)|^Socket\(s\))'|awk '{print $2}'").split('\n')[:2] core = int(core)-1 if int(core) <= 256 else 255 lines = ['begin node\n'] if len(nodes) == 1: lines.append('_virtualnodes 2\n') else: for node_id, node in enumerate(nodes): line = 'node-id=%s;node-name=%s;cores=0-%d;processors=%s;roles=connection,aggregation,storage\n' % (node_id, node, core, processor) lines.append(line) lines.append('end node\n') # write out the node section with open(sqconfig_file, 'w') as f: f.writelines(lines) print 'sqconfig generated successfully!' run_cmd('sqgen') print 'sqgen ran successfully!'
def wait_for_discovered_blades(self): log('Wait for discovered blades') discovered_macs = [] restart_times = BLADE_RESTART_TIMES for blade in self.node_ids: self.blade_node_dict[blade] = None with self.ssh: all_discovered = self.discovery_waiting_loop(discovered_macs) while not all_discovered and restart_times != 0: restart_times -= 1 for blade in self.get_not_discovered_blades(): self.dha.node_reset(blade) with self.ssh: all_discovered = self.discovery_waiting_loop(discovered_macs) if not all_discovered: err('Not all blades have been discovered: %s' % self.not_discovered_blades_summary()) with io.open(self.updated_dea_file) as stream: updated_dea = yaml.load(stream) updated_dea.update({'blade_node_map': self.blade_node_dict}) with io.open(self.updated_dea_file, 'w') as stream: yaml.dump(updated_dea, stream, default_flow_style=False)
def dumpToFile(self, msg = None): # XXX make this a method of RunningTask d = 'unknown-benchmarks' if not os.path.exists(d): os.mkdir(d) d = '{0}/{1}-{2}'.format(d, configs['tool'], configs['started_at']) if not os.path.exists(d): os.mkdir(d) fname = '{0}/{1}-{2}.log'.format(d, self.category, os.path.basename(self.name)) try: f = open(fname, 'w') except IOError as e: err('Failed dumping benchmark to file: {0}'.format(str(e))) if msg: f.write('Reason: {0}\n'.format(msg)) f.write('category: {0}\n'.format(self.category)) f.write('name: {0}\n\n'.format(self.name)) f.write('cmd: {0}\n'.format(self.cmd)) f.write('machine: {0}\n'.format(self.task.getMachine())) f.write('params: {0}\n'.format(configs['params'])) f.write('versions: {0}\n'.format(self.versions)) f.write('result: {0}\n'.format(self.result)) f.write('witness: {0}\n\n'.format(self.witness)) f.write('memUsage: {0}\n'.format(self.memory)) f.write('cpuUsage: {0}s\n\n'.format(self.time)) f.write('other output:\n{0}\n\n'.format(self.output)) f.write(str(configs)) f.close()
def run_deploy(self): WAIT_LOOP = 180 SLEEP_TIME = 60 LOG_FILE = 'cloud.log' log('Starting deployment of environment %s' % self.env_id) run_proc('fuel --env %s deploy-changes | strings | tee %s' % (self.env_id, LOG_FILE)) ready = False for i in range(WAIT_LOOP): env = parse(exec_cmd('fuel env --env %s' % self.env_id)) log('Environment status: %s' % env[0][E['status']]) r, _ = exec_cmd('tail -2 %s | head -1' % LOG_FILE, False) if r: log(r) if env[0][E['status']] == 'operational': ready = True break elif (env[0][E['status']] == 'error' or env[0][E['status']] == 'stopped'): break else: time.sleep(SLEEP_TIME) delete(LOG_FILE) if ready: log('Environment %s successfully deployed' % self.env_id) else: self.collect_error_logs() err('Deployment failed, environment %s is not operational' % self.env_id)
def _node_power_cmd(self, node_id, cmd): expected = 'Chassis Power is %s' % cmd if self.node_get_state(node_id) == expected: return pow_cmd = '%s chassis power %s' % (self.ipmi_cmd(node_id), cmd) exec_cmd(pow_cmd, attempts=self.attempts, delay=self.delay, verbose=True, mask_args=[8, 10]) attempts = self.attempts while attempts: time.sleep(self.delay) state = self.node_get_state(node_id) attempts -= 1 if state == expected: return elif attempts != 0: # reinforce our will, but allow the command to fail, # we know our message got across once already... exec_cmd(pow_cmd, check=False, mask_args=[8, 10]) err('Could not set chassis %s for node %s' % (cmd, node_id))
def health_check(self): log('Now running sanity and smoke health checks') r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id) log(r) if 'failure' in r: err('Healthcheck failed!')
def run(user, pwd): """ gen ssh key on local and copy to all nodes copy traf package file from local to all nodes """ dbcfgs = json.loads(dbcfgs_json) hosts = dbcfgs['node_list'].split(',') traf_package = dbcfgs['traf_package'] sudo_prefix = get_sudo_prefix() run_cmd('%s rm -rf %s*' % (sudo_prefix, SSHKEY_FILE)) run_cmd('%s echo -e "y" | ssh-keygen -t rsa -N "" -f %s' % (sudo_prefix, SSHKEY_FILE)) files = [SSHKEY_FILE, SSHKEY_FILE + '.pub', traf_package] remote_insts = [Remote(h, user=user, pwd=pwd) for h in hosts] threads = [ Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts ] for thread in threads: thread.start() for thread in threads: thread.join() for r in remote_insts: if r.rc != 0: err('Failed to copy files to %s' % r.host)
def run(pwd): """ gen ssh key on local and copy to all nodes copy traf package file from local to all nodes """ dbcfgs = json.loads(dbcfgs_json) hosts = dbcfgs['node_list'].split(',') traf_package = dbcfgs['traf_package'] # save db configs to a tmp file and copy to all trafodion nodes dbcfgs_file = '/tmp/dbcfgs' p = ParseJson(dbcfgs_file) # remove password from config file try: dbcfgs.pop('mgr_pwd') dbcfgs.pop('traf_pwd') dbcfgs.pop('kdcadmin_pwd') except KeyError: pass p.save(dbcfgs) key_file = '/tmp/id_rsa' run_cmd('sudo -n rm -rf %s*' % key_file) run_cmd('sudo -n echo -e "y" | ssh-keygen -t rsa -N "" -f %s' % key_file) files = [key_file, key_file+'.pub', traf_package, dbcfgs_file] remote_insts = [Remote(h, pwd=pwd) for h in hosts] threads = [Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts] for thread in threads: thread.start() for thread in threads: thread.join() for r in remote_insts: if r.rc != 0: err('Failed to copy files to %s' % r.host)
def get_interface_from_network(self, interfaces, network): nics = self.base[interfaces] for nic in nics: if network in nics[nic]: return nic err('Network not found: %s' % network)
def params_from_string(pars, pard = None): " pars = params string, pard = params dictionary " " returns updated (or new) dictionary created from params string" # default value for all benchmarks is empty string # this way we avoid exceptions without explicit checks if pard is None: pard = {'*':''} for p in pars.split(','): try: k, v = p.split(':', 1) except ValueError: from common import err err('Wrong item in params key: {0}'.format(p)) k = k.strip() # allow omit * if not k: k = '*' pard[k] = v.strip() return pard
def get_role_interfaces(self, role): nodes = self.base['nodes'] for node in nodes: if role in node['role']: return node['interfaces'] err('Role not found: %s' % role)
def params_from_string(pars, pard=None): " pars = params string, pard = params dictionary " " returns updated (or new) dictionary created from params string" # default value for all benchmarks is empty string # this way we avoid exceptions without explicit checks if pard is None: pard = {'*': ''} for p in pars.split(','): try: k, v = p.split(':', 1) except ValueError: from common import err err('Wrong item in params key: {0}'.format(p)) k = k.strip() # allow omit * if not k: k = '*' pard[k] = v.strip() return pard
def _parse_string(self, info, string): try: info = info.split('\n') string_line = [line for line in info if string in line][0] except IndexError: err('Cannot get %s info' % string) return string_line
def get_env(self): env_list = parse(exec_cmd('fuel env')) if len(env_list) == 0: err('No environment deployed') elif len(env_list) > 1: err('More than 1 environment deployed') self.env = env_list[0] self.env_id = self.env[E['id']]
def translate(self, boot_order_list): translated = [] for boot_dev in boot_order_list: if boot_dev in DEV: translated.append(DEV[boot_dev]) else: err('Boot device %s not recognized' % boot_dev) return translated
def scp_put(self, local, remote='.', dir=False): try: with scp.SCPClient(self.client.get_transport(), sanitize=lambda x: x, socket_timeout=TIMEOUT) as _scp: _scp.put(local, remote, dir) except Exception as e: err(e)
def node_power_off(self, node_id): log('Power OFF Node %s' % node_id) cmd_prefix = self.amt_cmd(node_id) resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False) if "Powerstate: S0" in resp: resp, ret = exec_cmd('{0} powerdown'.format(cmd_prefix), check=False) if 'pt_status: success' not in resp: err('Could Not Power OFF Node %s' % node_id)
def parse_include_tag(self, tag): # Remove 'include(' prefix and trailing ')' filename = tag[len('include('):].rstrip(')') if not filename: err('No argument for include().') return filename
def run(): """ install Trafodion dependencies """ dbcfgs = json.loads(dbcfgs_json) if dbcfgs['offline_mode'] == 'Y': print 'Installing pdsh in offline mode ...' # setup temp local repo repo_content = LOCAL_REPO_PTR % (dbcfgs['repo_ip'], dbcfgs['repo_port']) with open(REPO_FILE, 'w') as f: f.write(repo_content) run_cmd( 'yum install -y --disablerepo=\* --enablerepo=traflocal pdsh-rcmd-ssh pdsh' ) else: pdsh_installed = cmd_output('rpm -qa|grep -c pdsh') if pdsh_installed == '0': release = platform.release() releasever, arch = re.search(r'el(\d).(\w+)', release).groups() if releasever == '7': pdsh_pkg = 'http://mirrors.neusoft.edu.cn/epel/7/%s/p/pdsh-2.31-1.el7.%s.rpm' % ( arch, arch) elif releasever == '6': pdsh_pkg = 'http://mirrors.neusoft.edu.cn/epel/6/%s/pdsh-2.26-4.el6.%s.rpm' % ( arch, arch) else: err('Unsupported Linux version') print 'Installing pdsh ...' run_cmd('yum install -y %s' % pdsh_pkg) package_list = [ 'apr', 'apr-util', 'expect', 'gzip', 'libiodbc-devel', 'lzo', 'lzop', 'openldap-clients', 'perl-DBD-SQLite', 'perl-Params-Validate', 'perl-Time-HiRes', 'sqlite', 'snappy', 'unixODBC-devel', 'unzip' ] all_pkg_list = run_cmd('rpm -qa') for pkg in package_list: if pkg in all_pkg_list: print 'Package %s had already been installed' % pkg else: print 'Installing %s ...' % pkg if dbcfgs['offline_mode'] == 'Y': run_cmd( 'yum install -y --disablerepo=\* --enablerepo=traflocal %s' % pkg) else: run_cmd('yum install -y %s' % pkg) # remove temp repo file if dbcfgs['offline_mode'] == 'Y': os.remove(REPO_FILE)
def node_power_on(self, node_id): log('Power ON Node %s' % node_id) cmd_prefix = self.amt_cmd(node_id) resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False) if 'Powerstate: S0' not in resp: dev = self.node_get_boot_dev(node_id) resp, ret = exec_cmd('{0} powerup {1}'.format(cmd_prefix, dev), check=False) if 'pt_status: success' not in resp: err('Could Not Power ON Node %s' % node_id)
def run(): hdfs_bin = DEF_HDFS_BIN dbcfgs = json.loads(dbcfgs_json) distro = dbcfgs['distro'] if 'CDH' in distro: parcel_lib = PARCEL_HBASE_LIB if os.path.exists(parcel_lib): hdfs_bin = PARCEL_HDFS_BIN elif 'APACHE' in distro: hdfs_bin = dbcfgs['hadoop_home'] + '/bin/hdfs' traf_loc = '/user/trafodion' traf_user = dbcfgs['traf_user'] hdfs_user = dbcfgs['hdfs_user'] hbase_user = dbcfgs['hbase_user'] hbase_group = cmd_output('%s groups %s | cut -d" " -f3' % (hdfs_bin, hbase_user)) run_cmd_as_user(hdfs_user, '%s dfsadmin -safemode wait' % hdfs_bin) run_cmd_as_user( hdfs_user, '%s dfs -mkdir -p %s/{trafodion_backups,bulkload,lobs} /hbase/archive' % (hdfs_bin, traf_loc)) run_cmd_as_user( hdfs_user, '%s dfs -chown -R %s:%s /hbase/archive' % (hdfs_bin, hbase_user, hbase_user)) run_cmd_as_user( hdfs_user, '%s dfs -chown -R %s:%s %s %s/{trafodion_backups,bulkload,lobs}' % (hdfs_bin, traf_user, traf_user, traf_loc, traf_loc)) run_cmd_as_user(hdfs_user, '%s dfs -chmod 0755 %s' % (hdfs_bin, traf_loc)) run_cmd_as_user( hdfs_user, '%s dfs -chmod 0750 %s/{trafodion_backups,bulkload,lobs}' % (hdfs_bin, traf_loc)) run_cmd_as_user( hdfs_user, '%s dfs -chgrp %s %s/bulkload' % (hdfs_bin, hbase_group, traf_loc)) run_cmd_as_user( hdfs_user, '%s dfs -setfacl -R -m user:%s:rwx /hbase/archive' % (hdfs_bin, traf_user)) run_cmd_as_user( hdfs_user, '%s dfs -setfacl -R -m default:user:%s:rwx /hbase/archive' % (hdfs_bin, traf_user)) run_cmd_as_user( hdfs_user, '%s dfs -setfacl -R -m mask::rwx /hbase/archive' % hdfs_bin) # Grant all privileges to the Trafodion principal in HBase if dbcfgs['secure_hadoop'] == 'Y': run_cmd( 'echo "grant \'%s\', \'RWXC\'" | %s su - %s -s /bin/bash -c "hbase shell" > /tmp/hbase_shell.out' % (traf_user, get_sudo_prefix(), hbase_user)) has_err = cmd_output('grep -c ERROR /tmp/hbase_shell.out') if int(has_err): err('Failed to grant HBase privileges to %s' % traf_user) run_cmd('rm /tmp/hbase_shell.out')
def patch_iso(self, new_iso): tmp_orig_dir = '%s/origiso' % self.tmp_dir tmp_new_dir = '%s/newiso' % self.tmp_dir try: self.copy(tmp_orig_dir, tmp_new_dir) self.patch(tmp_new_dir, new_iso) except Exception as e: exec_cmd('fusermount -u %s' % tmp_orig_dir, False) delete(self.tmp_dir) err(e)
def load_yaml(self, filespec): try: if (self.is_url(filespec)): response = urllib2.urlopen(filespec) return yaml.load(response) else: with io.open(filespec) as f: return yaml.load(f) except Exception as error: err('Error opening YAML file: %s' % error)
def check_bridge(pxe_bridge, dha_path): with io.open(dha_path) as yaml_file: dha_struct = yaml.load(yaml_file) if dha_struct['adapter'] != 'libvirt': log('Using Linux Bridge %s for booting up the Fuel Master VM' % pxe_bridge) r = exec_cmd('ip link show %s' % pxe_bridge) if pxe_bridge in r and 'state DOWN' in r: err('Linux Bridge {0} is not Active, bring' ' it UP first: [ip link set dev {0} up]'.format(pxe_bridge))
def load_template(self, filespec): try: if (self.is_url(filespec)): response = urllib2.urlopen(filespec) return response.read() else: with io.open(filespec) as f: return f.readlines() except Exception as error: err('Error opening template file: %s' % error)
def satt_log_init(lfile): global log_file assert log_file is None try: log_file = open(lfile, 'w') except OSError as e: from common import err err('Failed creating log: {0}'.format(str(e))) atexit.register(lambda: log_file.close())
def patch_iso(self, new_iso): tmp_orig_dir = '%s/origiso' % self.tmp_dir tmp_new_dir = '%s/newiso' % self.tmp_dir try: self.copy(tmp_orig_dir, tmp_new_dir) self.patch(tmp_new_dir, new_iso) except Exception as e: exec_cmd('fusermount -u %s' % tmp_orig_dir, False) os.environ.pop(MOUNT_STATE_VAR, None) delete(self.tmp_dir) err(e)
def check_supported_release(self): log('Check supported release: %s' % self.wanted_release) found = False release_list = parse(self.ssh.exec_cmd('fuel release -l')) for release in release_list: if release[R['name']] == self.wanted_release: found = True break if not found: err('This Fuel does not contain the following release: %s' % self.wanted_release)
def verify_node_status(self): node_list = parse(exec_cmd('fuel node list')) failed_nodes = [] for node in node_list: if node[N['status']] != 'ready' and node[N['cluster']] != 'None': failed_nodes.append((node[N['id']], node[N['status']])) if failed_nodes: summary = '' for node, status in failed_nodes: summary += '[node %s, status %s]\n' % (node, status) err('Deployment failed: %s' % summary)
def node_reset(self, node_id): log('RESET Node %s' % node_id) cmd_prefix = self.amt_cmd(node_id) dev = self.node_get_boot_dev(node_id) resp, ret = exec_cmd('{0} info'.format(cmd_prefix), check=False) if 'Powerstate: S0' in resp: resp, ret = exec_cmd('{0} reset {1}'.format(cmd_prefix, dev), check=False) if 'pt_status: success' not in resp: err('Could Not RESET Node %s' % node_id) else: err('Cannot RESET Node %s because it\'s not Active, state: %s' % (node_id, resp))
def verify_node_status(self): node_list = parse(exec_cmd('fuel --env %s node' % self.env_id)) failed_nodes = [] for node in node_list: if node[N['status']] != 'ready': failed_nodes.append((node[N['id']], node[N['status']])) if failed_nodes: summary = '' for node, status in failed_nodes: summary += '[node %s, status %s]\n' % (node, status) err('Deployment failed: %s' % summary, self.collect_logs)
def run(): try: dbcfgs_json = sys.argv[1] except IndexError: err('No db config found') dbcfgs = json.loads(dbcfgs_json) discover = Discover(dbcfgs) methods = [m for m in dir(discover) if m.startswith(PREFIX)] result = {} for method in methods: key, value = getattr(discover, method)() # call method result[key] = value print json.dumps(result)
def restart(self): srv_baseurl = CLUSTER_URL_PTR % (self.url, self.cluster_name) + '/services/' srvs = ['HBASE', 'ZOOKEEPER', 'HDFS'] # Stop print 'Restarting HDP services ...' for srv in srvs: srv_url = srv_baseurl + srv config = {'RequestInfo': {'context' :'Stop %s services' % srv}, 'ServiceInfo': {'state' : 'INSTALLED'}} rc = self.p.put(srv_url, config) # check stop status if rc: stat = self.p.get(srv_url) retry_cnt, maxcnt, interval = 0, 30, 5 while stat['ServiceInfo']['state'] != 'INSTALLED': retry_cnt += 1 flush_str = '.' * retry_cnt print '\rCheck HDP service %s stop status (timeout: %dmin) %s' % (srv, maxcnt*interval/60, flush_str), sys.stdout.flush() time.sleep(interval) stat = self.p.get(srv_url) if retry_cnt == maxcnt: err('Failed to stop HDP service %s, timeout' % srv) # wrap line print else: print 'HDP service %s had already been stopped' % srv time.sleep(5) # Start config = {'RequestInfo': {'context' :'Start All services'}, 'ServiceInfo': {'state' : 'STARTED'}} rc = self.p.put(srv_baseurl, config) # check start status if rc: result_url = rc['href'] stat = self.p.get(result_url) retry_cnt, maxcnt, interval = 0, 120, 5 while stat['Requests']['request_status'] != 'COMPLETED': retry_cnt += 1 flush_str = '.' * retry_cnt print '\rCheck HDP services start status (timeout: %dmin) %s' % (maxcnt*interval/60, flush_str), sys.stdout.flush() time.sleep(interval) stat = self.p.get(result_url) if retry_cnt == maxcnt: err('Failed to start all HDP services') print 'HDP services started successfully!' else: print 'HDP services had already been started'
def _monitorTasks(self): assert self._is_running() while self._is_running(): for fd, flags in self._poll_wait(): if flags & select.POLLERR: self._killTasks() err('Waiting for benchmark failed') if flags & select.POLLIN: bench = self._getBenchmark(fd) try: data = bench.readOutput() while data: self._report.report(data, bench) data = bench.readOutput() # While can be too fast and raise # EBUSY except IOError: continue # is benchmark done? if flags & select.POLLHUP: # remove the old benchmark bench = self._unregisterFd(fd) if not self._report.done(bench): # something went wrong - queue this one again satt_log('Running benchmark again'); # we must take this one as it was not running yet self._benchmarks_done -= 1 # XXX we do not have a mechanism how to track # how many times the benchmark ran, so it may happen # that it will run indifinetly many times. # It seems we don't need to care about that atm, so let's # ignore it for now. # # P. S message for future me: If you read this, we probably hit # this error and you hate me and my wickidness - just sorry. bench.task.readd(bench) self._benchmarks_done += 1 # set progress if self._benchmarks_done != 0: prgs = float(self._benchmarks_done) / self._benchmarks_count self._report.progress(int(prgs * 100)) # run new benchmark self._runBenchmark(bench.task)
def __init__(self, query_func): res = query_func('SELECT unknown, false_correct, false_incorrect,' 'true_correct, true_incorrect ' 'FROM rating_methods INNER JOIN years ' 'ON rating_methods.year_id = years.id ' 'WHERE year = \'{0}\';'.format(configs.configs['year'])) if not res: err('Failed getting rating methods') res = res[0] self.unknown = res[0] self.false_correct = res[1] self.false_incorrect = res[2] self.true_correct = res[3] self.true_incorrect = res[4]
def restart(self): restart_url = RESTART_URL_PTR % (self.url, self.cluster_name) deploy_cfg_url = DEPLOY_CFG_URL_PTR % (self.url, self.cluster_name) print 'Restarting CDH services ...' rc1 = self.p.post(restart_url) if self.__retry_check(rc1['id'], 40, 15, 'restart'): print 'Restart CDH successfully!' else: err('Failed to restart CDH, max retry count reached') rc2 = self.p.post(deploy_cfg_url) if self.__retry_check(rc2['id'], 30, 10, 'deploy'): print 'Deploy client config successfully!' else: err('Failed to deploy CDH client config, max retry count reached')
def configure_environment(self): log('Configure environment') delete(self.yaml_config_dir) create_dir_if_not_exists(self.yaml_config_dir) env_name = self.dea.get_env_name() env_net_segment_type = self.dea.get_env_net_segment_type() log('Creating environment %s release %s net-segment-type %s' % (env_name, self.release_id, env_net_segment_type)) exec_cmd('fuel env create --name "%s" --release %s --net-segment-type %s' % (env_name, self.release_id, env_net_segment_type)) if not self.env_exists(env_name): err('Failed to create environment %s' % env_name) self.config_settings() self.config_network() self.config_nodes()
def reap_nodes_interfaces_transformations(self): node_list = parse(exec_cmd('fuel node')) real_node_ids = [node[N['id']] for node in node_list] real_node_ids.sort() min_node = real_node_ids[0] interfaces = {} transformations = {} dea_nodes = [] dha_nodes = [] for real_node_id in real_node_ids: node_id = int(real_node_id) - int(min_node) + 1 self.last_node = node_id node = self.get_node_by_id(node_list, real_node_id) roles = commafy(node[N['roles']]) if not roles: err('Fuel Node %s has no role' % real_node_id) dea_node = {'id': node_id, 'role': roles} dha_node = {'id': node_id} if_name, mac = self.reap_interface(real_node_id, interfaces) log('reap transformation for node %s' % real_node_id) tr_name = self.reap_transformation(real_node_id, roles, transformations) dea_node.update( {'interfaces': if_name, 'transformations': tr_name}) dha_node.update( {'pxeMac': mac if mac else None, 'ipmiIp': None, 'ipmiUser': None, 'ipmiPass': None, 'libvirtName': None, 'libvirtTemplate': None}) dea_nodes.append(dea_node) dha_nodes.append(dha_node) self.write_yaml(self.dha_file, {'nodes': dha_nodes}, False) self.write_yaml(self.dea_file, {'nodes': dea_nodes}) self.write_yaml(self.dea_file, interfaces) self.write_yaml(self.dea_file, transformations) self.reap_fuel_node_info() self.write_yaml(self.dha_file, {'disks': DISKS})
def cleanup_fuel_environments(self, env_list): WAIT_LOOP = 60 SLEEP_TIME = 10 for env in env_list: log('Deleting environment %s' % env[E['id']]) self.ssh.exec_cmd('fuel env --env %s --delete --force' % env[E['id']]) all_env_erased = False for i in range(WAIT_LOOP): env_list = parse(self.ssh.exec_cmd('fuel env list')) if env_list: time.sleep(SLEEP_TIME) else: all_env_erased = True break if not all_env_erased: err('Could not erase these environments %s' % [(env[E['id']], env[E['status']]) for env in env_list])
def _updateDb(self, rb): def choose_tag(): if configs.configs.has_key('tool-tag'): return configs.configs['tool-tag'] else: return configs.configs['tool'] ver = rb.versions.strip() q = """ SELECT id FROM years WHERE year = '{0}'; """.format(configs.configs['year']); res = self._db.query(q) if not res: err('Do not have year {0}. If this is not typo, ' 'update the database and benchmarks'.format(configs.configs['year'])) year_id = res[0][0] # If tool that runs in this run is not known to database, add it q = """ SELECT id FROM tools WHERE name = '{0}' and version = '{1}' and params = '{2}' and year_id = '{3}'; """.format(configs.configs['tool'], ver, self.tool_params, year_id) res = self._db.query(q) if not res: q2 = """ INSERT INTO tools (name, year_id, version, params, tag, note) VALUES('{0}', '{1}', '{2}', '{3}', '{4}', {5}); """.format(configs.configs['tool'], year_id, ver, self.tool_params, choose_tag(), Empty2Null(configs.configs['note'])) self._db.query(q2) # get new tool_id res = self._db.query(q) assert len(res) == 1 tool_id = res[0][0] return tool_id, year_id
def node_power_off(self, node_id): WAIT_LOOP = 200 SLEEP_TIME = 3 log('Power OFF Node %s' % node_id) cmd_prefix = self.ipmi_cmd(node_id) state = exec_cmd('%s chassis power status' % cmd_prefix) if state == 'Chassis Power is on': done = False exec_cmd('%s chassis power off' % cmd_prefix) for i in range(WAIT_LOOP): state, _ = exec_cmd('%s chassis power status' % cmd_prefix, False) if state == 'Chassis Power is off': done = True break else: time.sleep(SLEEP_TIME) if not done: err('Could Not Power OFF Node %s' % node_id)
def parse_configs(path = 'symbiotic/config'): from common import err, dbg if not os.path.exists(path): return configs try: f = open(path, 'r') except IOError as e: err("Failed opening configuration file ({0}): {1}" .format(path, e.strerror)) accline = None for line in f: line = line.strip() if not line or line[0] == '#': continue if not accline is None: line = accline + line accline = None # if \ is on the end of line, append next line if line[-1] == '\\': accline = line[:-1] continue key, val = line.split('=', 1) key = key.strip() val = val.strip() if key in allowed_keys: if key == 'params': configs[key] = params_from_string(val) else: configs[key] = val else: err('Unknown config key: {0}'.format(key)) return configs
def run(): dbcfgs = json.loads(dbcfgs_json) nodes = dbcfgs['node_list'].split(',') scratch_locs = dbcfgs['scratch_locs'].split(',') # this script is running by trafodion user, so get sqroot from env traf_home = os.environ['TRAF_HOME'] if traf_home == '': err('TRAF_HOME var is empty') sqconfig_file = traf_home + '/sql/scripts/sqconfig' sqconfig_persist_file = traf_home + '/sql/scripts/sqconfig.persist' core, processor = run_cmd("lscpu|grep -E '(^CPU\(s\)|^Socket\(s\))'|awk '{print $2}'").split('\n')[:2] core = int(core)-1 if int(core) <= 256 else 255 lines = ['begin node\n'] if len(nodes) == 1: lines.append('_virtualnodes 2\n') else: for node_id, node in enumerate(nodes): line = 'node-id=%s;node-name=%s;cores=0-%d;processors=%s;roles=connection,aggregation,storage\n' % (node_id, node, core, processor) lines.append(line) lines.append('end node\n') lines.append('\n') lines.append('begin overflow\n') for scratch_loc in scratch_locs: line = 'hdd %s\n' % scratch_loc lines.append(line) lines.append('end overflow\n') # write out the node section with open(sqconfig_file, 'w') as f: f.writelines(lines) print 'sqconfig generated successfully!' run_cmd('sqgen') print 'sqgen ran successfully!'
def run(pwd): """ gen ssh key on local and copy to all nodes copy traf package file from local to all nodes """ dbcfgs = json.loads(dbcfgs_json) hosts = dbcfgs['node_list'].split(',') traf_package = dbcfgs['traf_package'] key_file = '/tmp/id_rsa' run_cmd('sudo -n rm -rf %s*' % key_file) run_cmd('sudo -n echo -e "y" | ssh-keygen -t rsa -N "" -f %s' % key_file) files = [key_file, key_file+'.pub', traf_package] remote_insts = [Remote(h, pwd=pwd) for h in hosts] threads = [Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts] for thread in threads: thread.start() for thread in threads: thread.join() for r in remote_insts: if r.rc != 0: err('Failed to copy files to %s' % r.host)
def run(user, pwd): """ gen ssh key on local and copy to all nodes copy traf package file from local to all nodes """ dbcfgs = json.loads(dbcfgs_json) hosts = dbcfgs['node_list'].split(',') traf_package = dbcfgs['traf_package'] sudo_prefix = get_sudo_prefix() run_cmd('%s rm -rf %s*' % (sudo_prefix, SSHKEY_FILE)) run_cmd('%s echo -e "y" | ssh-keygen -t rsa -N "" -f %s' % (sudo_prefix, SSHKEY_FILE)) files = [SSHKEY_FILE, SSHKEY_FILE+'.pub', traf_package] remote_insts = [Remote(h, user=user, pwd=pwd) for h in hosts] threads = [Thread(target=r.copy, args=(files, '/tmp')) for r in remote_insts] for thread in threads: thread.start() for thread in threads: thread.join() for r in remote_insts: if r.rc != 0: err('Failed to copy files to %s' % r.host)
def _exception_handler(args, data): q, tool_id, task_id = data if (args[1].startswith('Duplicate entry')): if configs.configs['ignore-duplicates'] == 'yes': satt_log('Already has this result for this tool, ignoring.') else: err('Already has result of this benchmark for this tool.\n' 'It is only supported to have one result for each ' 'benchmark and particular tool\n' 'If want ignore this behaviour use --ignore-duplicates.\n' '(tool + version + params). You can delete the old result:\n' ' $ ./db-cli \'DELETE from task_results WHERE tool_id={0}' ' and task_id={1}\'\n' 'or you can delete all results for this tool:\n' ' $ ./db-cli \'DELETE from tools WHERE id={0}\'\n' .format(tool_id, task_id, tool_id)) else: err('Failed querying db: {0}\n\n{1}'.format(args[1], q))
def run(): hdfs_bin = "/usr/bin/hdfs" dbcfgs = json.loads(dbcfgs_json) DISTRO = dbcfgs["distro"] if "CDH" in DISTRO: parcel_lib = "/opt/cloudera/parcels/CDH/lib/hbase/lib" if os.path.exists(parcel_lib): hdfs_bin = "/opt/cloudera/parcels/CDH/bin/hdfs" elif "APACHE" in DISTRO: hdfs_bin = dbcfgs["hadoop_home"] + "/bin/hdfs" traf_loc = "/user/trafodion" traf_user = dbcfgs["traf_user"] hdfs_user = dbcfgs["hdfs_user"] hbase_user = dbcfgs["hbase_user"] run_cmd_as_user(hdfs_user, "%s dfsadmin -safemode wait" % hdfs_bin) run_cmd_as_user( hdfs_user, "%s dfs -mkdir -p %s/{trafodion_backups,bulkload,lobs} /hbase/archive /hbase-staging" % (hdfs_bin, traf_loc), ) run_cmd_as_user( hdfs_user, "%s dfs -chown -R %s:%s /hbase/archive /hbase-staging" % (hdfs_bin, hbase_user, hbase_user) ) run_cmd_as_user( hdfs_user, "%s dfs -chown -R %s:%s %s/{trafodion_backups,bulkload,lobs}" % (hdfs_bin, traf_user, traf_user, traf_loc), ) run_cmd_as_user(hdfs_user, "%s dfs -setfacl -R -m user:%s:rwx /hbase/archive" % (hdfs_bin, traf_user)) run_cmd_as_user(hdfs_user, "%s dfs -setfacl -R -m default:user:%s:rwx /hbase/archive" % (hdfs_bin, traf_user)) run_cmd_as_user(hdfs_user, "%s dfs -setfacl -R -m mask::rwx /hbase/archive" % hdfs_bin) # Grant all privileges to the Trafodion principal in HBase if dbcfgs["secure_hadoop"] == "Y": run_cmd('grant "%s", "RWXC" | sudo -u %s hbase shell > /tmp/hbase_shell.out' % (traf_user, hbase_user)) has_err = cmd_output("grep -c ERROR /tmp/hbase_shell.out") if int(has_err): err("Failed to grant HBase privileges to %s" % traf_user) run_cmd("rm /tmp/hbase_shell.out")