def stop_server(self): """ Stop some/all the instances on a given SciDB server """ instances = None if self._ctx._args.all: serversToStop = self._ctx._srvList errStr = "on %d servers" % (len(serversToStop)) if self._ctx._args.instance_filter: raise AppError("Instance filter is not allowed with --all") elif self._ctx._args.server_id: srvId = self.findServerIndex(int(self._ctx._args.server_id)) serversToStop = [self._ctx._srvList[srvId]] errStr = "on server %d" % (srvId) if self._ctx._args.instance_filter: #always need instances to support multiple servers/host instanceRanges = self._ctx._args.instance_filter.split(',') instances = [scidb.parseServerInstanceIds(instanceRanges)] else: instances = scidb.applyInstanceFilter(serversToStop, instances, nonesOK=False) else: raise AppError("Invalid server ID specification") errStr = "Failed to stop SciDB %s" % (errStr) scidb.stopSomeServers(serversToStop, instances=instances) self.waitToStop(serversToStop, errStr, instances=instances)
def start_server(self): """ Start some/all the instances on a given SciDB server """ instances = None if self._ctx._args.all: serversToStart = self._ctx._srvList errStr = "on %d servers" % (len(serversToStart)) if self._ctx._args.instance_filter: raise AppError("Instance filter is not allowed with --all") elif self._ctx._args.server_id: srvId = self.findServerIndex(int(self._ctx._args.server_id)) serversToStart = [self._ctx._srvList[srvId]] errStr = "on server %d" % (srvId) if self._ctx._args.instance_filter: #always need instances to support multiple servers/host instanceRanges = self._ctx._args.instance_filter.split(',') instances = [scidb.parseServerInstanceIds(instanceRanges)] else: raise AppError("Invalid server ID specification") if instances: numberOfProcs = len(instances[0]) * 2 #XXX +1 for watchdog else: instances = scidb.applyInstanceFilter(serversToStart, instances, nonesOK=False) numberOfProcs = scidb.getInstanceCount( serversToStart) * 2 #XXX +1 for watchdog scidb.startSomeServers(serversToStart, instances=instances) errStr = "Failed to start SciDB %s" % (errStr) self.waitToStart(serversToStart, numberOfProcs, errStr, instances=instances)
def parseServerInstanceIds(instanceList): ''' Parse the instance list as specified in a config.ini file. I.e. server-<sid>=<host>,<instance_list> where instance_list is of the form: 'n,m-p,q-s, ...' @param instanceList pre-split instance_list, i.e. [ 'n','m-p','q-s', ...] @return a list of integer ranges [ [0,1,...n], [m,m+1,..p], [q,q+1,...s], ...] Checks that 0<n<m<p<q<s ... ''' instances = [] ranges = [r.split("-") for r in instanceList] isFirst = True last = 0 for r in ranges: if isFirst and len(r) == 1: left = 0 right = int(r[0]) elif len(r) == 2: left = int(r[0]) right = int(r[1]) else: raise AppError("Invalid server entry") isFirst = False if left > right: raise AppError("Invalid server instance range in server entry") elif left < last: raise AppError("Duplicate server instances in server entry") else: last = right + 1 instances.extend(range(left, last)) return instances
def validateConfigForAddDelta(self, deltaCtx): numSrvs = 0 numPrefixes = 0 for key in deltaCtx._configOpts.iterkeys(): if 'server-' in key: numSrvs += 1 elif 'data-dir-prefix-' in key: if key in self._ctx._configOpts: raise AppError("Option %s already exists" % (key)) numPrefixes += 1 self._ctx._configOpts[key] = deltaCtx._configOpts[key] elif key == 'db_name': assert deltaCtx._configOpts[key] == self._ctx._scidb_name, \ "Unexpected SciDB name %s" % (str(deltaCtx._configOpts[key])) else: raise AppError("Unknown option with key %s" % (key)) assert numSrvs == len(deltaCtx._srvList), "Duplicate server- entries" numMatched = 0 for deltaSrv in deltaCtx._srvList: for liid in deltaSrv.getServerInstances(): if 'data-dir-prefix-%d-%d' % (deltaSrv.getServerId(), liid) in deltaCtx._configOpts: numMatched += 1 if numPrefixes != numMatched: raise AppError("Duplicate/invalid data-dir-prefix entries")
def add_user(u): """Add a new SciDB user.""" if user_exists(u): raise AppError("User '{0}' already exists".format(u)) cleartext = raw_input() if _args.stdin else getVerifiedPassword() pwhash = base64.b64encode(hashlib.sha512(cleartext).digest()) out, err = _iquery("create_user('{0}', '{1}')".format(u, pwhash)) if _iquery.returncode: raise AppError("Cannot create user '{0}':\n{1}".format(u, err)) print out return 0
def setUpClass(self): """Create some test data files used by all test methods.""" print "Setup ...", sys.stdout.flush() self._iquery = IQuery(afl=True, no_fetch=True) # -naq self._array_cleanups = [] self._files = {} # map array name to input file # Put all our data files in one temp directory so we can # easily remove them all during tearDown. if os.system("rm -rf {0} ; mkdir -p {0}".format(_tmpdir)): raise AppError("Trouble (re)creating %s" % _tmpdir) # Create slightly sparse 3-D input data with no collisions. self._files['nocoll_3d'] = os.path.join(_tmpdir, "nocoll_3d.bin") if boxofpoints([ 'boxofpoints', '--lower-corner', '0,0,0', '--upper-corner', '9,69,69', '--cells', '40000', # sparse: 40000 < 10x70x70 (49000) '--format', 'binary', '--output', self._files['nocoll_3d'], '--seed', '42' ]): raise AppError("box_of_points could not create %s" % self._files['nocoll_3d']) # Create dense 2-D input data with 10% collisions. self._files['coll_2d'] = os.path.join(_tmpdir, "coll_2d.bin") if boxofpoints([ 'boxofpoints', '--lower-corner', '0,0', '--upper-corner', '49,999', '--cells', '50000', # dense: 50,000 == 50x1000 '--collisions', '0.1', # 10% collision rate '--format', 'binary', '--output', self._files['coll_2d'], '--seed', '42' ]): raise AppError("box_of_points could not create %s" % self._files['coll_2d']) print "done"
def removeInstances(self, srv): if self._sid != srv.getServerId(): raise AppError("Cannot remove instances. Server IDs are different %d!=%d" % \ (self._sid,srv.getServerId())) tempInstances = set(self._instances) for i in srv.getServerInstances(): if i in tempInstances: tempInstances.remove(i) else: raise AppError("Cannot remove non-existent instance %d" % (i)) self._instances = sorted([i for i in tempInstances])
def modify_user(u): """Change SciDB user's password.""" if not user_exists(u): raise AppError("User '{0}' does not exist".format(u)) prompt = "New %s password: "******"change_user('password', '{0}', '{1}')".format( u, pwhash)) if _iquery.returncode: raise AppError("Cannot change password for user '{0}':\n{1}".format( u, err)) print out return 0
def __enter__(self): # If no credentials found in the default pgpass file, prompt # for a password and create a temporary pgpass file. pup = None try: pgpass = os.environ.get( "PGPASSFILE", os.path.join(os.environ['HOME'], '.pgpass')) pup = PgpassUpdater(filename=pgpass) except PgpassError as e: dbg("PgpassUpdater:", e) # Make a pgpass file if we don't have one or it doesn't # contain the entry we need. if not (pup and pup.find(self._user, self._db, self._host, self._port)): self._make_pgpass() # Try a sure-to-work psql command, if it fails the given # password was probably wrong. if self._verify: psql = Psql(host=self._host, port=self._port, database=self._db, user=self._user, debug=_args.verbose, options=['--no-password']) try: psql("select usename from pg_catalog.pg_user limit 1") except Exception as e: raise AppError("Password verification failed:\n%s" % e)
def parseLine(line): ids = line.lstrip().rstrip().split(',') if len(ids) < 3: raise AppError( "Unexpected instance [server_id,server_instance_id,instance_id]: %s " % (ids)) parsedIds[','.join(ids[0:2])] = int(ids[2])
def waitToStart(self, servers, procNum, errorStr, instances=None, maxAttempts=10): """ Wait for the specified (or all) SciDB instances to be started on a given list of servers. An instance is considered started when 2 OS processes with the same command line are found by the ps command. The command line must be of the form: <base_path>/<server_id>/<server_instance_id>/SciDB-<server_id>-<server_instance_id> """ attempts = 0 conns = [] try: conns = [scidb.sshconnect(srv) for srv in servers] pidCount = scidb.check_scidb_running(sshConns=conns, servers=servers, instances=instances) while pidCount < procNum: attempts += 1 if attempts > maxAttempts: raise AppError(errorStr) time.sleep(1) pidCount = scidb.check_scidb_running(sshConns=conns, servers=servers, instances=instances) finally: scidb.sshCloseNoError(conns)
def __call__(self, sql_stmt): """Invoke psql to run an SQL statement.""" # Build command line. cmd = [self.prog, '--no-align', '--quiet', '--no-psqlrc'] if self.port: cmd.extend(['-p', str(self.port)]) if self.host: cmd.extend(['-h', self.host]) if self.user: cmd.extend(['-U', self.user]) if self.database: cmd.extend(['-d', self.database]) if self.options: cmd.extend(self.options) # Run it! self._dbg("Cmd:", ' '.join(cmd)) p = subp.Popen(cmd, stdin=subp.PIPE, stdout=subp.PIPE, stderr=subp.PIPE) out, err = p.communicate(sql_stmt) # Sometimes psql fails but gives exit status 0, mumble.... if p.returncode or 'ERROR:' in err: raise AppError('\n'.join(('Psql: "{0}" failed:'.format(sql_stmt), "Cmd: {0}".format(' '.join(cmd)), err))) try: tbl = self._make_table(out) except Exception as e: # Couldn't make a table from the output, so just return the output. self._dbg("Psql._make_table failed:", e) return out else: return tbl
def status_server(self): """ Check if the servers specified in the config file are up. """ if self._ctx._args.all: serversToCheck = self._ctx._srvList errStr = "on %d servers" % (len(serversToCheck)) elif self._ctx._args.server_id: srvId = self.findServerIndex(int(self._ctx._args.server_id)) serversToCheck = [self._ctx._srvList[srvId]] errStr = "on server %d" % (srvId) else: raise AppError("Invalid server ID specification") numberOfProcs = scidb.getInstanceCount( serversToCheck) * 2 #XXX make sure watchdog is on errStr = "Failed to find %d SciDB processes %s" % (numberOfProcs, errStr) #always need instances to support multiple servers/host instances = None instances = scidb.applyInstanceFilter(serversToCheck, instances, nonesOK=False) self.waitToStart(serversToCheck, numberOfProcs, errStr, maxAttempts=2, instances=instances)
def __init__(self, sid, host, instance_list): self._sid = int(sid) self._host = host try: self._instances = parseServerInstanceIds(instance_list) except AppError as e: raise AppError("%s for host: %s with server-id: %d" % (str(e), host, sid))
def addInstances(self, srv): ''' Add instances from srv to this entry Checks for duplicates. ''' if self._sid != srv.getServerId(): raise AppError("Cannot add instances. Server IDs are different %d!=%d" % \ (self._sid,srv.getServerId())) tempInstances = set(self._instances) for i in srv.getServerInstances(): if i in tempInstances: raise AppError("Duplicate instance %d" % (i)) else: tempInstances.add(i) self._instances = sorted([i for i in tempInstances])
def removeServer(self, deltaFile, force): """ Remove instances on one or more servers in accordance with a user-specified file. """ deltaCtx = scidb.Context() deltaCtx._scidb_name = self._ctx._scidb_name deltaCtx._config_file = deltaFile scidb.parseConfig(deltaCtx) self.validateConfigForRemoveDelta(deltaCtx) # weed out the server-id's for deltaSrv in deltaCtx._srvList: i = bisect.bisect_left(self._ctx._srvList, deltaSrv) #binary search if i != len(self._ctx._srvList) and \ self._ctx._srvList[i].getServerId() == deltaSrv.getServerId(): self._ctx._srvList[i].removeInstances(deltaSrv) if self._ctx._srvList[i].getServerInstances(): pass elif self._ctx._srvList[i].getServerHost() == self._ctx.pgHost: raise AppError( "Cannot remove all instances from the host with " + "the system catalog postgres instance %s" % self._ctx.pgHost) self.removeDataDirPrefixes(deltaSrv) else: raise AppError("Cannot remove non-existent server %s" % (str(deltaSrv))) newSrvList = [ srv for srv in self._ctx._srvList if srv.getServerInstances() ] if not newSrvList: raise AppError("Cannot remove all instances") self._ctx._srvList = newSrvList scidb.checkRedundancy(scidb.getInstanceCount(self._ctx._srvList)) config = self.removeDeltaFromConfig() ret = self.unregisterSomeServers(deltaCtx._srvList, force=force) assert ret, "Unexpected failure in while unregistering!" self.depositConfigFile(self._ctx._args.output, config)
def tearDownClass(self): print "Teardown ...", sys.stdout.flush() if not _args.keep_arrays: if os.system("rm -rf {0}".format(_tmpdir)): raise AppError("Trouble cleaning up %s" % _tmpdir) for a in self._array_cleanups: self._iquery("remove(%s)" % a) print "done"
def findServerIndex(self, srvId): """ Find the server entry index for a given server_id in the list of servers specified in the config.ini """ srvIdList = [srv.getServerId() for srv in self._ctx._srvList] i = bisect.bisect_left(srvIdList, srvId) #binary search if i != len(srvIdList) and srvIdList[i] == srvId: return i raise AppError("Invalid server ID=%d" % srvId)
def validateConfigForRemoveDelta(self, deltaCtx): numSrvs = 0 for key in deltaCtx._configOpts.iterkeys(): if 'server-' in key: numSrvs += 1 elif key == 'db_name': assert deltaCtx._configOpts[key] == self._ctx._scidb_name, \ "Unexpected SciDB name %s" % (str(deltaCtx._configOpts[key])) else: raise AppError("Unknown option with key %s" % (key)) assert numSrvs == len(deltaCtx._srvList), "Duplicate server- entries"
def parseConfig(ctx): ''' Parse config.ini file ''' config = RawConfigParser() try: printDebug("Parsing config file = %s" % (ctx._config_file)) config.readfp(open(ctx._config_file, 'r')) # If _scidb_name has not been assigned, use the first section name as the dbname. if ctx._scidb_name == '': ctx._scidb_name = config.sections()[0] except Exception, e: raise AppError("Cannot read config file: %s" % e)
def service_register(self): """ Register new SciDB database cluster with the SciDB-<version> SysV service on the servers specified in a config.ini file """ if self._ctx._args.all: serversToRegister = self._ctx._srvList elif self._ctx._args.server_id: srvId = self.findServerIndex(int(self._ctx._args.server_id)) serversToRegister = [self._ctx._srvList[srvId]] else: raise AppError("Invalid server ID specification") self.registerSomeServices(serversToRegister)
def addServer(self, deltaFile, force): """ Add instances on one or more servers in accordance with a user-specified file. """ deltaCtx = scidb.Context() deltaCtx._scidb_name = self._ctx._scidb_name deltaCtx._config_file = deltaFile scidb.parseConfig(deltaCtx) self.validateConfigForAddDelta(deltaCtx) numInstances = 0 # merge server-id's while detecting duplicates for deltaSrv in deltaCtx._srvList: i = bisect.bisect_left(self._ctx._srvList, deltaSrv) #binary search if i != len(self._ctx._srvList) and \ self._ctx._srvList[i].getServerId() == deltaSrv.getServerId(): self._ctx._srvList[i].addInstances(deltaSrv) else: numInstances = numInstances + len( deltaSrv.getServerInstances()) numInstances = numInstances + scidb.getInstanceCount( self._ctx._srvList) config = self.addDeltaToConfig(deltaCtx) # write out the new config file self.depositConfigFile(self._ctx._args.output, config) try: scidb.checkMaxPostgresConns(numInstances) except Exception as pgException: if not force: raise AppError("Postgres exception: %s" % pgException) printWarn(pgException) scidb.checkRedundancy(numInstances) # try to register the instances # the operation may partially succeed/fail scidb.initSomeInParallel(deltaCtx._srvList, instances=None, force=force, remove=force, initialize=False, online="infinity")
def runRemote(self, servers, func, opStr, remoteUsers=None, remotePwds=None, conns=None): """ Run remote commands generate by a given functor on a list of remote servers """ closeSSH = False if not conns: closeSSH = True cons = [] try: if closeSSH: if not remoteUsers: conns = [scidb.sshconnect(srv) for srv in servers] else: conns = [ scidb.sshconnect(trio[0], username=trio[1], password=trio[2]) for trio in zip(servers, remoteUsers, remotePwds) ] # generate remote commands cmds = func(servers, conns) # execute (ret, out, err) = scidb.parallelRemoteExec(conns, cmds) map(lambda c: c.close(), conns) finally: if closeSSH: scidb.sshCloseNoError(conns) i = 0 scidb.printDebug("parallelRemoteExec out's: %s" % (str(out))) scidb.printDebug("parallelRemoteExec err's: %s" % (str(err))) for rc in ret: if rc != 0: raise AppError("Failed to %s on server %d, errors: %s" % (opStr, servers[i][0], str(err))) i += 1
def user_exists(u): """Return True iff user 'u' already exists in SciDB.""" out, err = _iquery("project(list('users'), name)") if _iquery.returncode: raise AppError("Cannot list users:\n%s" % err) return u in out.splitlines()
Parse config.ini file ''' config = RawConfigParser() try: printDebug("Parsing config file = %s" % (ctx._config_file)) config.readfp(open(ctx._config_file, 'r')) # If _scidb_name has not been assigned, use the first section name as the dbname. if ctx._scidb_name == '': ctx._scidb_name = config.sections()[0] except Exception, e: raise AppError("Cannot read config file: %s" % e) section_name = ctx._scidb_name # Check for upper case letters in database name. if not section_name.islower(): raise AppError( "Invalid specification for database name = %s; uppercase letters are not allowed!" % section_name) # First process the "global" section. try: ctx._srvList = [] srvIdSet = set() for (key, value) in config.items(section_name): ctx._configOpts[key] = value # make a srv & instance list # format: server-N=ip(,n)|(,m-p) [,q-s] number of local workers if key.startswith('server-'): srvId = int(key.split('-')[1]) valueSplit = value.split(',')
def unregisterSomeServers(self, deltaSrvs, force): """ Unregister the instances on the specified servers from SciDB using the unregister_instances() operator @param deltaSrvs the servers whose instances to be unregistered @param force if set the existence of data directories is not checked and the absence of the specified instances in SciDB is ignored. If unset, the above conditions will cause an exception @throw scidblib.AppError """ if not force: self.checkDirs(deltaSrvs) coordinator = self._ctx._srvList[0] #coordinator iqueryPrefix = [self._ctx._installPath + "/bin/iquery"] if self._ctx._args.auth_file: iqueryPrefix.extend(['--auth-file', self._ctx._args.auth_file]) iid = coordinator.getServerInstances()[0] scidb.printDebug("Coordinator srv: %s" % str(coordinator)) scidb.printDebug("Coordinator instance %s" % str(iid)) iqueryPrefix.extend([ "-c", coordinator.getServerHost(), "-p", str(self._ctx._basePort + iid) ]) iqueryPrefix.extend(["-o", 'csv']) cmdList = [i for i in iqueryPrefix] cmdList.extend([ "-aq", "\"project(list_instances(), server_id, server_instance_id, instance_id);\"" ]) (ret, out, err) = scidb.executeLocal(cmdList, None, nocwd=True, useConnstr=False, ignoreError=False, useShell=True, sout=os.tmpfile()) assert ret == 0, "Unexpected error from: %s " % str(cmdList) scidb.printDebug("Instances: %s" % out) lines = out.splitlines() out = None parsedIds = {} def parseLine(line): ids = line.lstrip().rstrip().split(',') if len(ids) < 3: raise AppError( "Unexpected instance [server_id,server_instance_id,instance_id]: %s " % (ids)) parsedIds[','.join(ids[0:2])] = int(ids[2]) map(parseLine, lines) scidb.printDebug("Parsed IDs: %s " % str(parsedIds)) instance_ids = [] for srv in deltaSrvs: for liid in srv.getServerInstances(): key = ','.join([str(srv.getServerId()), str(liid)]) if key in parsedIds: instance_ids.append(parsedIds[key]) else: msg = "Unknown instance: %s " % (scidb.validateInstance( srv, liid)) if force: scidb.printWarn(msg) else: raise AppError(msg) ret = 0 if len(instance_ids) > 0: cmdList = [i for i in iqueryPrefix] cmdList.extend([ "-naq", "\"unregister_instances(%s);\"" % (",".join(map(str, instance_ids))) ]) (ret, out, err) = scidb.executeLocal(cmdList, None, nocwd=True, useConnstr=False, ignoreError=False, useShell=True) elif not force: raise AppError("No instances to unregister") return (ret == 0)