def testAddressAlreadyInUseRoundRobinPortReservation(): """ #38 - Address already in use FDT Java https://trac.hep.caltech.edu/trac/fdtcp/ticket/38 Address already in use problem was seen during #5:comment:20 https://trac.hep.caltech.edu/trac/fdtcp/ticket/5#comment:20 2 times out of 338 transfer (attempts). Probably, when there is traffic the port can't be bound immediately even if it was released very short ago by the previous process. This test could not reproduce the problem (when reusing immediately the same port number for the next request), so FDTD.getFreePort() was reimplemented to reserver ports on round-robin basis. """ hostName = os.uname()[1] f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) conf.sanitize() testName = inspect.stack()[0][3] logger = Logger(name=testName, logFile="/tmp/fdtdtest-%s.log" % testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # launch two subsequent ReceivingServerAction, second will likely fail # to bind the same, just very short ago, released port serverId = "%s" % testName testAction = TestAction(hostName, hostName) testAction.id = serverId # do TestAction r = fdtd.service.service(testAction) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=[]) recvServerAction = ReceivingServerAction(testAction.id, options) # do ReceivingServerAction - start FDT Java server r = fdtd.service.service(recvServerAction) print(r.msg) assert r.status == 0 assert r.serverPort == 54321 cleanupAction = CleanupProcessesAction(serverId, timeout=0, waitTimeout=False) # do CleanupProcessesAction - shut FDT Java server, port shall be # released r = fdtd.service.service(cleanupAction) print(r.msg) assert r.status == 0 # do another ReceivingServerAction - start FDT Java server r = fdtd.service.service(recvServerAction) print(r.msg) assert r.status == 0 # will not get the same port, but the next one in the range assert r.serverPort == 54322 # in fact, if separate log files are enabled, after this last # ReceivingServerAction, there is a separate log file open. # taking the the service down, it should also closed it's related # to open files #41 problem fdtd.shutdown() fdtd.pyroDaemon.closedown() logger.close()
def testReleasePort(): c = """ [general] port = 5000 portRangeFDTServer = 54321,54323 """ # only ports 54321, 54322, 54323 are available to play: f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert daemon.getFreePort() == 54321 assert daemon.getFreePort() == 54322 # nothing should happen py.test.raises(PortReservationException, daemon.releasePort, 20) py.test.raises(PortReservationException, daemon.releasePort, "aa") assert daemon.getFreePort() == 54323 daemon.releasePort(54321) daemon.releasePort(54322) assert daemon.getFreePort() == 54321 assert daemon.getFreePort() == 54322 py.test.raises(PortReservationException, daemon.getFreePort) daemon.shutdown() daemon.pyroDaemon.closedown()
def testFDTDKillProcess2(): c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # need long-running, non blocking process command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" executor = Executor("some_id", command, blocking=False, caller=daemon, logger=logger, killTimeout=0) try: executor.execute() daemon.killProcess("some_id", logger) finally: # definitely release port and kill the process daemon.shutdown() daemon.pyroDaemon.closedown() try: p = Process(executor.proc.pid) m = "FAIL: Process PID:%s should have been " "killed." % executor.proc.pid logger.debug(m) py.test.fail(m) except NoSuchProcess as ex: logger.debug("OK: Process PID:%s doesn't exist now." % executor.proc.pid)
def testReleasePort(): c = """ [general] port = 5000 portRangeFDTServer = 54321,54323 """ # only ports 54321, 54322, 54323 are available to play: f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert daemon.getFreePort() == 54321 assert daemon.getFreePort() == 54322 assert len(daemon._portsTaken) == 2 # nothing should happen daemon.releasePort(20) daemon.releasePort("aa") assert len(daemon._portsTaken) == 2 assert daemon.getFreePort() == 54323 assert len(daemon._portsTaken) == 3 daemon.releasePort(54321) daemon.releasePort(54322) assert len(daemon._portsTaken) == 1 assert daemon.getFreePort() == 54321 assert daemon.getFreePort() == 54322 py.test.raises(PortReservationException, daemon.getFreePort) assert len(daemon._portsTaken) == 3 daemon.shutdown()
def testFDTDWaitingTimeoutWhenCleanup(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set higher than issuing ALARM signal. It's tested that the ALARM signal was raised, but implemented as is is in fact ignored. More obvious when the killTimeout is set much higher. Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler(object): def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout (will be interrupted sooner by alarm - while # waiting on kill timeout) e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=2, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = True, kill will be waiting cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=True) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False print("test %s is waiting here ..." % testName) signal.alarm(1) # raise alarm in timeout seconds cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # but the alarm was called during this waiting (test flag value) assert handler.flag finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
def testFDTDAndExecutorContainerHandling(): c = """ [general] port = 6700 debug = DEBUG portRangeFDTServer = 54321,54323 killCommand = "kill -9 %(pid)s" killCommandSudo = "kill -9 %(pid)s" """ f = getTempFile(c) inputOptions = "-d DEBUG -p 6700 --config=%s" % f.name conf = ConfigFDTD(inputOptions.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert len(daemon._executors) == 0 # needs to be blocking command, since simple ls finishes too quickly # for default blocking it would be considered that it failed # just needed existing process and executor container handling with it executor = Executor("some_id", "ls /tmp", logger=logger, blocking=True) executor.execute() daemon.addExecutor(executor) # subsequent adding of the same executor shall raise exception # (the same id) py.test.raises(FDTDException, daemon.addExecutor, executor) daemon.removeExecutor(executor) assert len(daemon._executors) == 0 # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # caller is not explicitly specified, so all executors container # manipulation is done from this test executor = Executor("some_id", command, blocking=False, killTimeout=0, logger=logger) executor.execute() daemon.addExecutor(executor) assert len(daemon._executors) == 1 # check getting the executor reference from the container ex = daemon.getExecutor("some_id_nonsence") # doens't exist assert ex == None ex = daemon.getExecutor("some_id") assert ex == executor # this in fact should discover that the process is still running # and not remove it daemon.removeExecutor(executor) assert len(daemon._executors) == 1 # now kill the process and try to remove it afterwards - test # different branch of removeExecutor daemon.killProcess(executor.id, logger) assert len(daemon._executors) == 0 # should have been removed daemon.shutdown() daemon.pyroDaemon.closedown()
def testFDTDDesiredPortOccupiedRaisesException(): inputOptions = "-d DEBUG -p 6700" conf = ConfigFDTD(inputOptions.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) py.test.raises(FDTDException, FDTD, conf, apMon, logger) daemon.shutdown()
def testFDTDServiceOpenFiles(): """ #41 - Too many open files (fdtd side) """ hostName = os.uname()[1] f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) conf.sanitize() testName = inspect.stack()[0][3] logger = Logger(name=testName, logFile="/tmp/fdtdtest-%s.log" % testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) proc = Process(os.getpid()) initStateNumOpenFiles = len(proc.get_open_files()) for testAction in [TestAction("fakeSrc", "fakeDst") for i in range(3)]: r = fdtd.service.service(testAction) logger.debug("Result: %s" % r) assert r.status == 0 # after TestAction, there should not be left behind any open files numOpenFilesNow = len(proc.get_open_files()) assert initStateNumOpenFiles == numOpenFilesNow # test on ReceivingServerAction - it's action after which the # separate logger is not closed, test the number of open files went +1, # send CleanupProcessesAction and shall again remain # initStateNumOpenFiles send appropriate TestAction first (like in real) serverId = "server-id" testAction = TestAction(hostName, hostName) testAction.id = serverId r = fdtd.service.service(testAction) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=[]) recvServerAction = ReceivingServerAction(testAction.id, options) r = fdtd.service.service(recvServerAction) print r.msg assert r.status == 0 numOpenFilesNow = len(proc.get_open_files()) # there should be only 1 extra opened file now assert initStateNumOpenFiles == numOpenFilesNow - 1 cleanupAction = CleanupProcessesAction(serverId, timeout=2) r = fdtd.service.service(cleanupAction) print r.msg assert r.status == 0 numOpenFilesNow = len(proc.get_open_files()) assert initStateNumOpenFiles == numOpenFilesNow fdtd.shutdown() fdtd.pyroDaemon.closedown() logger.close()
def testFDTDKillProcess(): inputOptions = "-d DEBUG -p 6700" conf = ConfigFDTD(inputOptions.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # now doesn't fail, just says "some_id" process doesn't exist in the executors container daemon.killProcess("some_id", logger) daemon.shutdown()
def testAuthService(): inputOptions = "-d DEBUG -p 6700" conf = ConfigFDTD(inputOptions.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # shall fail due to unavailable grid credentials (when running locally) py.test.raises(AuthServiceException, AuthService, daemon, conf, logger) daemon.shutdown()
def testFDTDDesiredPortOccupiedRaisesException(): inputOptions = "-d DEBUG -p 6700 -H localhost" conf = ConfigFDTD(inputOptions.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) py.test.raises(FDTDException, FDTD, conf, apMon, logger) daemon.shutdown() daemon.pyroDaemon.closedown()
def testFDTDAndExecutorContainerHandling(): c = """ [general] port = 6700 debug = DEBUG portRangeFDTServer = 54321,54323 killCommand = "kill -9 %(pid)s" killCommandSudo = "kill -9 %(pid)s" """ f = getTempFile(c) inputOptions = "-d DEBUG -p 6700 --config=%s" % f.name conf = ConfigFDTD(inputOptions.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert len(daemon._executors) == 0 # needs to be blocking command, since simple ls finishes too quickly # for default blocking it would be considered that it failed # just needed existing process and executor container handling with it executor = Executor("some_id", "ls /tmp", logger=logger, blocking=True) executor.execute() daemon.addExecutor(executor) # subsequent adding of the same executor shall raise exception # (the same id) py.test.raises(FDTDException, daemon.addExecutor, executor) daemon.removeExecutor(executor) assert len(daemon._executors) == 0 # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # caller is not explicitly specified, so all executors container # manipulation is done from this test executor = Executor("some_id", command, blocking=False, killTimeout=0, logger=logger) executor.execute() daemon.addExecutor(executor) assert len(daemon._executors) == 1 # check getting the executor reference from the container ex = daemon.getExecutor("some_id_nonsence") # doens't exist assert ex is None ex = daemon.getExecutor("some_id") assert ex == executor # this in fact should discover that the process is still running # and not remove it daemon.removeExecutor(executor) assert len(daemon._executors) == 1 # now kill the process and try to remove it afterwards - test # different branch of removeExecutor daemon.killProcess(executor.id, logger) assert len(daemon._executors) == 0 # should have been removed daemon.shutdown() daemon.pyroDaemon.closedown()
def testAuthService(): inputOptions = "-d DEBUG -p 6700 -H localhost" conf = ConfigFDTD(inputOptions.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # shall fail due to unavailable grid credentials (when running locally) py.test.raises(AuthServiceException, AuthService, daemon, conf, logger) daemon.shutdown() daemon.pyroDaemon.closedown()
def testFDTDServiceOpenFiles(): """ #41 - Too many open files (fdtd side) """ hostName = os.uname()[1] f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) conf.sanitize() testName = inspect.stack()[0][3] logger = Logger(name=testName, logFile="/tmp/fdtdtest-%s.log" % testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) proc = Process(os.getpid()) initStateNumOpenFiles = len(proc.get_open_files()) for testAction in [TestAction("fakeSrc", "fakeDst") for i in range(3)]: r = fdtd.service.service(testAction) logger.debug("Result: %s" % r) assert r.status == 0 # after TestAction, there should not be left behind any open files numOpenFilesNow = len(proc.get_open_files()) assert initStateNumOpenFiles == numOpenFilesNow # test on ReceivingServerAction - it's action after which the # separate logger is not closed, test the number of open files went +1, # send CleanupProcessesAction and shall again remain # initStateNumOpenFiles send appropriate TestAction first (like in real) serverId = "server-id" testAction = TestAction(hostName, hostName) testAction.id = serverId r = fdtd.service.service(testAction) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=[]) recvServerAction = ReceivingServerAction(testAction.id, options) r = fdtd.service.service(recvServerAction) print(r.msg) assert r.status == 0 numOpenFilesNow = len(proc.get_open_files()) # there should be only 1 extra opened file now assert initStateNumOpenFiles == numOpenFilesNow - 1 cleanupAction = CleanupProcessesAction(serverId, timeout=2) r = fdtd.service.service(cleanupAction) print(r.msg) assert r.status == 0 numOpenFilesNow = len(proc.get_open_files()) assert initStateNumOpenFiles == numOpenFilesNow fdtd.shutdown() fdtd.pyroDaemon.closedown() logger.close()
def testFDTDKillProcess1(): inputOptions = "-d DEBUG -p 6700 -H localhost" conf = ConfigFDTD(inputOptions.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # now doesn't fail, just says "some_id" process doesn't exist in the # executors container daemon.killProcess("some_id", logger) daemon.shutdown() daemon.pyroDaemon.closedown()
def testKillProcessTimeout(): script = """ c=$1 while [ $c -gt 0 ] do sleep 1 let "c -= 1" done exit 0 """ inputOptions = "-d DEBUG -p 6700" conf = ConfigFDTD(inputOptions.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) f = getTempFile(script) # wait time 5s command = "bash %s 5" % f.name # wait only 2s for me before killing e = Executor("some_id", command, caller = daemon, blocking = False, killTimeout = 3) try: output = e.execute() daemon.killProcess("some_id", logger) finally: daemon.shutdown() assert e.proc.poll() == -9 # the process was still running, timeout elapsed and was killed # try different port, even if the previous was released, # immediate rebinding attempt makes PYRO fail inputOptions = "-d DEBUG -p 6701" conf = ConfigFDTD(inputOptions.split()) apMon = None daemon = FDTD(conf, apMon, logger) # wait time 2s command = "bash %s 2" % f.name e = Executor("some_id", command, caller = daemon, blocking = False, killTimeout = 4) try: output = e.execute() daemon.killProcess("some_id", logger) finally: daemon.shutdown() # the process should have normally finished, waiting killTimeout assert e.proc.poll() == 0
def testCorrectPortRange(): c = """ [general] port = 5000 portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert daemon._portRange == range(54321,54400 + 1) assert len(daemon._portsTaken) == 0 daemon.shutdown()
def testCorrectPortRange(): c = """ [general] port = 5000 portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert daemon._portMgmt._ports[0]._port == 54321 assert daemon._portMgmt._ports[-1]._port == 54400 daemon.shutdown() daemon.pyroDaemon.closedown()
def testFDTD(): inputOptions = "-d DEBUG -p 6700" conf = ConfigFDTD(inputOptions.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # do not call start - will stop on the call ... assert len(daemon._executors) == 0 executor = Executor("some_id", logger) daemon.addExecutor(executor) # subsequent adding of the same executor shall raise exception (the same id) py.test.raises(FDTDException, daemon.addExecutor, executor) daemon.removeExecutor(executor) assert len(daemon._executors) == 0 daemon.shutdown()
def testGetFreePort(): c = """ [general] port = 5000 portRangeFDTServer = 54321,54323 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert daemon.getFreePort() == 54321 assert daemon.getFreePort() == 54322 assert daemon.getFreePort() == 54323 py.test.raises(PortReservationException, daemon.getFreePort) daemon.shutdown() daemon.pyroDaemon.closedown()
def testGetFreePort(): c = """ [general] port = 5000 portRangeFDTServer = 54321,54323 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert len(daemon._portsTaken) == 0 assert daemon.getFreePort() == 54321 assert daemon.getFreePort() == 54322 assert daemon.getFreePort() == 54323 py.test.raises(PortReservationException, daemon.getFreePort) assert len(daemon._portsTaken) == 3 daemon.shutdown()
def testFDTDKillProcess2(): c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # need long-running, non blocking process command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" executor = Executor("some_id", command, blocking=False, caller=daemon, logger=logger, killTimeout=0) try: executor.execute() daemon.killProcess("some_id", logger) finally: # definitely release port and kill the process daemon.shutdown() daemon.pyroDaemon.closedown() try: p = Process(executor.proc.pid) m = ("FAIL: Process PID:%s should have been " "killed." % executor.proc.pid) logger.debug(m) py.test.fail(m) except NoSuchProcess, ex: logger.debug("OK: Process PID:%s doesn't exist now." % executor.proc.pid)
def testFDTDKillProcess(): inputOptions = "-d DEBUG -p 6700" conf = ConfigFDTD(inputOptions.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # need long-running, non blocking process command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" executor = Executor("some_id", command, blocking = False, caller = daemon, logger = logger) try: executor.execute() daemon.killProcess("some_id", logger) finally: # definitely release port and kill the process daemon.shutdown() # if kill wasn't successful try again try: os.kill(executor.proc.pid, signal.SIGKILL) except OSError: pass
def testFDTDServiceOpenFilesFullTransfer(): """ #41:comment:8 - Too many open files (fdtd side) SendingClient actually removed itself from the executors container once it finishes so subsequent CleanupProcessesAction doesn't know about this process, nor about its open separate log file, which doesn't get closed. Simulate a simple successful transfer, send all actions and check number of open files - does all as it happens in fdtd.service() """ hostName = os.uname()[1] testName = inspect.stack()[0][3] initStateNumOpenFilesTestStart, filesStr = getOpenFilesList() print("%s: test 0: open files: %s items:\n%s" % (testName, initStateNumOpenFilesTestStart, filesStr)) # there should not be any open files now assert initStateNumOpenFilesTestStart == 0 f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s --port=10001" % f.name confServer = ConfigFDTD(inputOption.split()) confServer.sanitize() loggerServer = Logger(name=testName, logFile="/tmp/fdtdtest-%s-writer.log" % testName, level=logging.DEBUG) apMon = None fdtdServer = FDTD(confServer, apMon, loggerServer) inputOption = "--config=%s --port=10002" % f.name confReader = ConfigFDTD(inputOption.split()) confReader.sanitize() loggerReader = Logger(name=testName, logFile="/tmp/fdtdtest-%s-reader.log" % testName, level=logging.DEBUG) apMon = None fdtdReader = FDTD(confReader, apMon, loggerReader) # -2 open log files, additional -1 is the temp config file initStateNumOpenFiles, filesStr = getOpenFilesList() print("%s: test 1: open files: %s items:\n%s" % (testName, initStateNumOpenFiles, filesStr)) assert initStateNumOpenFilesTestStart == initStateNumOpenFiles - 2 - 1 testActionServer = TestAction(hostName, hostName) testActionServer.id = testActionServer.id + "-writer" r = fdtdServer.service.service(testActionServer) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=["/dev/null"]) recvServerAction = ReceivingServerAction(testActionServer.id, options) r = fdtdServer.service.service(recvServerAction) print r.msg assert r.status == 0 serverFDTPort = r.serverPort # there should be only 1 extra opened file now - ReceivingServerAction # separate log numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 2: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow - 1 testActionReader = TestAction(hostName, hostName) testActionReader.id = testActionReader.id + "-reader" r = fdtdReader.service.service(testActionReader) assert r.status == 0 files = [TransferFile("/etc/passwd", "/dev/null")] # list of TransferFile options = dict(port=serverFDTPort, hostDest=os.uname()[1], transferFiles=files, gridUserSrc="soemuserSrc") sndClientAction = SendingClientAction(testActionReader.id, options) r = fdtdReader.service.service(sndClientAction) assert r.status == 0 # there should be +2 extra - for separate both server and client numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 3: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) # 2 extra files - separate transfer log at both ends assert initStateNumOpenFiles == numOpenFilesNow - 2 # now the transfer is over, both server (writer) and sender (reader) # parties kept their separate log files open, CleanupProcessesAction # will close them print "going to clean up" cl = CleanupProcessesAction(testActionReader.id, waitTimeout=False) r = fdtdReader.service.service(cl) assert r.status == 0 # one shall be closed now numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 4: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow - 1 cl = CleanupProcessesAction(testActionServer.id, waitTimeout=False) r = fdtdServer.service.service(cl) assert r.status == 0 # both separate log files should be closed now # problem #41:comment:8 was here - server behaved correctly, but # reader kept its separate log file open numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 5: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow fdtdServer.shutdown() fdtdServer.pyroDaemon.closedown() loggerServer.close() fdtdReader.shutdown() fdtdReader.pyroDaemon.closedown() loggerReader.close() # after even log files were closed, etc numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 6: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) # -1: the temp configuration file is still open assert initStateNumOpenFilesTestStart == numOpenFilesNow - 1
def testReceivingServerAddressAlreadyInUse(): c = """ [general] port = 6700 debug = DEBUG portRangeFDTServer = 54321,54323 fdtReceivingServerCommand = java -jar ../fdtjava/fdt.jar -bs 64K -p %(port)s -wCount 5 -S -noupdates fdtServerLogOutputTimeout = 2 fdtServerLogOutputToWaitFor = "FDTServer start listening on port: %(port)s" fdtReceivingServerKillTimeout = 1 killCommand = "kill -9 %(pid)s" killCommandSudo = "kill -9 %(pid)s" """ f = getTempFile(c) inputOptions = "-d DEBUG -p 6700 --config=%s" % f.name conf = ConfigFDTD(inputOptions.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert len(daemon._executors) == 0 files = ["/mnt/data", "/etc/passwd", "/etc/something/nonsence", "/tmp"] options = dict(gridUserDest="someuser", destFiles=files) a = ReceivingServerAction("some_id", options) a._checkForAddressAlreadyInUseError("some message", 222, logger) a._checkForAddressAlreadyInUseError("some message", 25, logger) a._checkForAddressAlreadyInUseError("Address already in use", 25, logger) a._checkForAddressAlreadyInUseError("Address already in use", 22225, logger) logger.info("Now real FDT Java server real attempts") logger.info('#' * 78) # 1) successful attempt logger.info('1' * 78) options = dict(gridUserDest="someuser", destFiles=files) a = ReceivingServerAction("some_id", options) assert len(daemon._executors) == 0 result = a.execute(conf=conf, caller=daemon, apMon=apMon, logger=logger) assert len(daemon._executors) == 1 assert result.status == 0 assert result.serverPort == 54321 assert result.msg == "FDT server is running" # 2) this executor attempt shall fail with Address already in use, # fool into reusing the same port 54321 as the previous process # by replacing caller.getFreePort() method which is used in # ReceivingServerAction.exectute() def myFoolGetFreePort(inp, logger): def returner(): logger.debug("myFoolGetFreePort called, returning %s" % inp) return inp return returner daemon.getFreePort = myFoolGetFreePort(54321, logger) logger.info('2' * 78) options = dict(gridUserDest="someuser", destFiles=files) a = ReceivingServerAction("some_id-2", options) py.test.raises(FDTDException, a.execute, conf=conf, caller=daemon, apMon=apMon, logger=logger) # starting FDT Java command failed, but the request remains in the # executor container for later cleanup assert len(daemon._executors) == 2 # 3) kill both executors / processes - one running, other failed logger.info('3' * 78) daemon.killProcess("some_id", logger, waitTimeout=False) assert len(daemon._executors) == 1 daemon.killProcess("some_id-2", logger, waitTimeout=False) assert len(daemon._executors) == 0 # 4) try starting FDT Java server on privileged port - will fail logger.info('4' * 78) options = dict(gridUserDest="someuser", destFiles=files) a = ReceivingServerAction("some_id", options) daemon.getFreePort = myFoolGetFreePort(999, logger) py.test.raises(FDTDException, a.execute, conf=conf, caller=daemon, apMon=apMon, logger=logger) assert len(daemon._executors) == 1 daemon.killProcess("some_id", logger, waitTimeout=False) assert len(daemon._executors) == 0 daemon.shutdown() daemon.pyroDaemon.closedown()
def testAddressAlreadyInUseRoundRobinPortReservation(): """ #38 - Address already in use FDT Java https://trac.hep.caltech.edu/trac/fdtcp/ticket/38 Address already in use problem was seen during #5:comment:20 https://trac.hep.caltech.edu/trac/fdtcp/ticket/5#comment:20 2 times out of 338 transfer (attempts). Probably, when there is traffic the port can't be bound immediately even if it was released very short ago by the previous process. This test could not reproduce the problem (when reusing immediately the same port number for the next request), so FDTD.getFreePort() was reimplemented to reserver ports on round-robin basis. """ hostName = os.uname()[1] f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) conf.sanitize() testName = inspect.stack()[0][3] logger = Logger(name=testName, logFile="/tmp/fdtdtest-%s.log" % testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # launch two subsequent ReceivingServerAction, second will likely fail # to bind the same, just very short ago, released port serverId = "%s" % testName testAction = TestAction(hostName, hostName) testAction.id = serverId # do TestAction r = fdtd.service.service(testAction) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=[]) recvServerAction = ReceivingServerAction(testAction.id, options) # do ReceivingServerAction - start FDT Java server r = fdtd.service.service(recvServerAction) print r.msg assert r.status == 0 assert r.serverPort == 54321 cleanupAction = CleanupProcessesAction(serverId, timeout=0, waitTimeout=False) # do CleanupProcessesAction - shut FDT Java server, port shall be # released r = fdtd.service.service(cleanupAction) print r.msg assert r.status == 0 # do another ReceivingServerAction - start FDT Java server r = fdtd.service.service(recvServerAction) print r.msg assert r.status == 0 # will not get the same port, but the next one in the range assert r.serverPort == 54322 # in fact, if separate log files are enabled, after this last # ReceivingServerAction, there is a separate log file open. # taking the the service down, it should also closed it's related # to open files #41 problem fdtd.shutdown() fdtd.pyroDaemon.closedown() logger.close()
def testFDTDNotWaitingTimeoutWhenCleanupForced(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set high. But no timeout is waited and the command is killed immediately. Raising ALARM in 2s never happens and command shall finish immediately (value of the flag changed in the signal handler never happens). Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler: def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout, shall be killed immediately anyway e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=100, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = False, shall be killed immediately cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=False) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False signal.alarm(1) # raise alarm in timeout seconds # should happen immediately so that ALARM is not raised cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # the alarm shouldn't have been called - value should have # remained the same assert handler.flag == False finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
def testFDTDWaitingTimeoutWhenCleanup(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set higher than issuing ALARM signal. It's tested that the ALARM signal was raised, but implemented as is is in fact ignored. More obvious when the killTimeout is set much higher. Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler: def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout (will be interrupted sooner by alarm - while # waiting on kill timeout) e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=2, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = True, kill will be waiting cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=True) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False print "test %s is waiting here ..." % testName signal.alarm(1) # raise alarm in timeout seconds cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # but the alarm was called during this waiting (test flag value) assert handler.flag == True finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
def testKillProcessTimeout(): c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ script = """ c=$1 while [ $c -gt 0 ] do sleep 1 let "c -= 1" done exit 0 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) f = getTempFile(script) # wait time 5s command = "bash %s 5" % f.name # wait only 2s for me before killing e = Executor("some_id", command, caller=daemon, blocking=False, killTimeout=3, logger=logger) try: output = e.execute() daemon.killProcess("some_id", logger) finally: daemon.shutdown() daemon.pyroDaemon.closedown() # the process was still running, timeout elapsed and was killed assert e.proc.poll() == -9 # try different port, even if the previous was released, # immediate rebinding attempt makes PYRO fail inputOptions = "-d DEBUG -p 6701" conf = ConfigFDTD(inputOptions.split()) apMon = None daemon = FDTD(conf, apMon, logger) # wait time 2s command = "bash %s 2" % f.name e = Executor("some_id", command, caller=daemon, blocking=False, killTimeout=4, logger=logger) try: output = e.execute() daemon.killProcess("some_id", logger) finally: daemon.shutdown() daemon.pyroDaemon.closedown() # the process should have normally finished, waiting killTimeout assert e.proc.poll() == 0
def testFDTDServiceOpenFilesFullTransfer(): """ #41:comment:8 - Too many open files (fdtd side) SendingClient actually removed itself from the executors container once it finishes so subsequent CleanupProcessesAction doesn't know about this process, nor about its open separate log file, which doesn't get closed. Simulate a simple successful transfer, send all actions and check number of open files - does all as it happens in fdtd.service() """ hostName = os.uname()[1] testName = inspect.stack()[0][3] initStateNumOpenFilesTestStart, filesStr = getOpenFilesList() print("%s: test 0: open files: %s items:\n%s" % (testName, initStateNumOpenFilesTestStart, filesStr)) # there should not be any open files now assert initStateNumOpenFilesTestStart == 0 f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s --port=10001" % f.name confServer = ConfigFDTD(inputOption.split()) confServer.sanitize() loggerServer = Logger(name=testName, logFile="/tmp/fdtdtest-%s-writer.log" % testName, level=logging.DEBUG) apMon = None fdtdServer = FDTD(confServer, apMon, loggerServer) inputOption = "--config=%s --port=10002" % f.name confReader = ConfigFDTD(inputOption.split()) confReader.sanitize() loggerReader = Logger(name=testName, logFile="/tmp/fdtdtest-%s-reader.log" % testName, level=logging.DEBUG) apMon = None fdtdReader = FDTD(confReader, apMon, loggerReader) # -2 open log files, additional -1 is the temp config file initStateNumOpenFiles, filesStr = getOpenFilesList() print("%s: test 1: open files: %s items:\n%s" % (testName, initStateNumOpenFiles, filesStr)) assert initStateNumOpenFilesTestStart == initStateNumOpenFiles - 2 - 1 testActionServer = TestAction(hostName, hostName) testActionServer.id = testActionServer.id + "-writer" r = fdtdServer.service.service(testActionServer) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=["/dev/null"]) recvServerAction = ReceivingServerAction(testActionServer.id, options) r = fdtdServer.service.service(recvServerAction) print(r.msg) assert r.status == 0 serverFDTPort = r.serverPort # there should be only 1 extra opened file now - ReceivingServerAction # separate log numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 2: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow - 1 testActionReader = TestAction(hostName, hostName) testActionReader.id = testActionReader.id + "-reader" r = fdtdReader.service.service(testActionReader) assert r.status == 0 files = [TransferFile("/etc/passwd", "/dev/null")] # list of TransferFile options = dict(port=serverFDTPort, hostDest=os.uname()[1], transferFiles=files, gridUserSrc="soemuserSrc") sndClientAction = SendingClientAction(testActionReader.id, options) r = fdtdReader.service.service(sndClientAction) assert r.status == 0 # there should be +2 extra - for separate both server and client numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 3: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) # 2 extra files - separate transfer log at both ends assert initStateNumOpenFiles == numOpenFilesNow - 2 # now the transfer is over, both server (writer) and sender (reader) # parties kept their separate log files open, CleanupProcessesAction # will close them print("going to clean up") cl = CleanupProcessesAction(testActionReader.id, waitTimeout=False) r = fdtdReader.service.service(cl) assert r.status == 0 # one shall be closed now numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 4: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow - 1 cl = CleanupProcessesAction(testActionServer.id, waitTimeout=False) r = fdtdServer.service.service(cl) assert r.status == 0 # both separate log files should be closed now # problem #41:comment:8 was here - server behaved correctly, but # reader kept its separate log file open numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 5: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow fdtdServer.shutdown() fdtdServer.pyroDaemon.closedown() loggerServer.close() fdtdReader.shutdown() fdtdReader.pyroDaemon.closedown() loggerReader.close() # after even log files were closed, etc numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 6: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) # -1: the temp configuration file is still open assert initStateNumOpenFilesTestStart == numOpenFilesNow - 1
def testFDTDNotWaitingTimeoutWhenCleanupForced(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set high. But no timeout is waited and the command is killed immediately. Raising ALARM in 2s never happens and command shall finish immediately (value of the flag changed in the signal handler never happens). Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler(object): def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout, shall be killed immediately anyway e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=100, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = False, shall be killed immediately cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=False) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False signal.alarm(1) # raise alarm in timeout seconds # should happen immediately so that ALARM is not raised cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # the alarm shouldn't have been called - value should have # remained the same assert handler.flag == False finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()