def setUp(self): self._base_dir = tmp.mkdtemp() self.nb_commands = 3 self.command1 = "1\n" self.command2 = "2\n" self.command3 = "3\n" command_filename = os.path.join(self._base_dir, "commands.txt") with open(command_filename, "w+") as commands_file: commands_file.write(self.command1 + self.command2 + self.command3) self.command_manager = CommandManager(command_filename)
def setUp(self): self.base_worker_script = os.path.join( os.path.dirname(smartdispatch.__file__), 'workers', 'base_worker.py') self.commands = ["echo 1", "echo 2", "echo 3", "echo 4"] self._commands_dir = tempfile.mkdtemp() self.logs_dir = tempfile.mkdtemp() self.command_manager = CommandManager( os.path.join(self._commands_dir, "commands.txt")) self.command_manager.set_commands_to_run(self.commands) self.commands_uid = map(utils.generate_uid_from_string, self.commands)
def main(): # Necessary if we want 'logging.info' to appear in stderr. logging.root.setLevel(logging.INFO) args = parse_arguments() command_manager = CommandManager(args.commands_filename) while True: command = command_manager.get_command_to_run() if command is None: break uid = utils.generate_uid_from_string(command) stdout_filename = os.path.join(args.logs_dir, uid + ".out") stderr_filename = os.path.join(args.logs_dir, uid + ".err") # Get job and node ID job_id = os.environ.get('PBS_JOBID', 'undefined') node_name = os.environ.get('HOSTNAME', 'undefined') with open(stdout_filename, 'a') as stdout_file: with open(stderr_filename, 'a') as stderr_file: log_datetime = t.strftime( "## SMART-DISPATCH - Started on: %Y-%m-%d %H:%M:%S - In job: {job_id} - On nodes: {node_name} ##\n" .format(job_id=job_id, node_name=node_name)) if stdout_file.tell( ) > 0: # Not the first line in the log file. log_datetime = t.strftime( "\n## SMART-DISPATCH - Resumed on: %Y-%m-%d %H:%M:%S - In job: {job_id} - On nodes: {node_name} ##\n" .format(job_id=job_id, node_name=node_name)) log_command = "## SMART-DISPATCH - Command: " + command + '\n' stdout_file.write(log_datetime + log_command) stdout_file.flush() stderr_file.write(log_datetime + log_command) stderr_file.flush() error_code = subprocess.call(command, stdout=stdout_file, stderr=stderr_file, shell=True) command_manager.set_running_command_as_finished(command, error_code)
def setUp(self): self.base_worker_script = os.path.join(os.path.dirname(smartdispatch.__file__), 'workers', 'base_worker.py') self.commands = ["echo 1", "echo 2", "echo 3", "echo 4"] self._commands_dir = tempfile.mkdtemp() self.logs_dir = tempfile.mkdtemp() self.command_manager = CommandManager(os.path.join(self._commands_dir, "commands.txt")) self.command_manager.set_commands_to_run(self.commands) self.commands_uid = map(utils.generate_uid_from_string, self.commands)
class CommandFilesTests(unittest.TestCase): def setUp(self): self._base_dir = tmp.mkdtemp() self.nb_commands = 3 self.command1 = "1\n" self.command2 = "2\n" self.command3 = "3\n" command_filename = os.path.join(self._base_dir, "commands.txt") with open(command_filename, "w+") as commands_file: commands_file.write(self.command1 + self.command2 + self.command3) self.command_manager = CommandManager(command_filename) def tearDown(self): shutil.rmtree(self._base_dir) def test_set_commands_to_run(self): # SetUp commands = ["4", "5", "6"] # The function to test self.command_manager.set_commands_to_run(commands) # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command1 + self.command2 + self.command3 + "4\n5\n6\n") assert_true(not os.path.isfile(self.command_manager._running_commands_filename)) assert_true(not os.path.isfile(self.command_manager._finished_commands_filename)) def test_get_failed_commands(self): # Setup command = self.command_manager.get_command_to_run() self.command_manager.set_running_command_as_finished(command, 1) # The function to test failed_commands = self.command_manager.get_failed_commands() # Test validation assert_equal(len(failed_commands), 1) assert_equal(failed_commands[0], self.command1) def test_get_failed_commands_empty(self): # The function to test failed_commands = self.command_manager.get_failed_commands() # Test validation assert_equal(len(failed_commands), 0) def test_get_command_to_run(self): # The function to test command = self.command_manager.get_command_to_run() # Test validation assert_equal(command, self.command1.strip()) with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command2 + self.command3) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), self.command1) assert_true(not os.path.isfile(self.command_manager._finished_commands_filename)) def test_get_nb_commands_to_run(self): assert_equal(self.command_manager.get_nb_commands_to_run(), self.nb_commands) def test_set_running_command_as_finished(self): # SetUp command = self.command_manager.get_command_to_run() # The function to test self.command_manager.set_running_command_as_finished(command) # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command2 + self.command3) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), "") with open(self.command_manager._finished_commands_filename, "r") as finished_commands_file: assert_equal(finished_commands_file.read(), self.command1) assert_true(not os.path.isfile(self.command_manager._failed_commands_filename)) def test_set_running_command_as_pending(self): # SetUp command = self.command_manager.get_command_to_run() # The function to test self.command_manager.set_running_command_as_pending(command) # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command2 + self.command3 + self.command1) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), "") assert_true(not os.path.isfile(self.command_manager._finished_commands_filename)) assert_true(not os.path.isfile(self.command_manager._failed_commands_filename)) def test_set_running_command_as_failed(self): # SetUp command = self.command_manager.get_command_to_run() error_code = 1 # The function to test self.command_manager.set_running_command_as_finished(command, error_code) # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command2 + self.command3) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), "") with open(self.command_manager._failed_commands_filename, "r") as failed_commands_file: assert_equal(failed_commands_file.read(), self.command1) assert_true(not os.path.isfile(self.command_manager._finished_commands_filename)) def test_reset_running_commands(self): # SetUp self.command_manager.get_command_to_run() # The function to test self.command_manager.reset_running_commands() # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command1 + self.command2 + self.command3) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), "") assert_true(not os.path.isfile(self.command_manager._finished_commands_filename))
def main(): # Necessary if we want 'logging.info' to appear in stderr. logging.root.setLevel(logging.INFO) args = parse_arguments() command_manager = CommandManager(args.commands_filename) if args.assumeResumable: # Handle TERM signal gracefully by sending running commands back to # the list of pending commands. # NOTE: There are several cases when the handler will not have # up-to-date information on running the command and/or process, # but chances of that happening are VERY slim and the # consequences are not fatal. def sigterm_handler(signal, frame): if sigterm_handler.triggered: return else: sigterm_handler.triggered = True if sigterm_handler.proc is not None: sigterm_handler.proc.wait() if sigterm_handler.command is not None: command_manager.set_running_command_as_pending( sigterm_handler.command) sys.exit(0) sigterm_handler.triggered = False sigterm_handler.command = None sigterm_handler.proc = None signal.signal(signal.SIGTERM, sigterm_handler) while True: command = command_manager.get_command_to_run() if args.assumeResumable: sigterm_handler.proc = None sigterm_handler.command = command if command is None: break uid = utils.generate_uid_from_string(command) stdout_filename = os.path.join(args.logs_dir, uid + ".out") stderr_filename = os.path.join(args.logs_dir, uid + ".err") # Get job and node ID job_id = os.environ.get('PBS_JOBID', 'undefined') node_name = os.environ.get('HOSTNAME', 'undefined') with open(stdout_filename, 'a') as stdout_file: with open(stderr_filename, 'a') as stderr_file: log_datetime = t.strftime( "## SMART-DISPATCH - Started on: %Y-%m-%d %H:%M:%S - In job: {job_id} - On nodes: {node_name} ##\n" .format(job_id=job_id, node_name=node_name)) if stdout_file.tell( ) > 0: # Not the first line in the log file. log_datetime = t.strftime( "\n## SMART-DISPATCH - Resumed on: %Y-%m-%d %H:%M:%S - In job: {job_id} - On nodes: {node_name} ##\n" .format(job_id=job_id, node_name=node_name)) log_command = "## SMART-DISPATCH - Command: " + command + '\n' stdout_file.write(log_datetime + log_command) stdout_file.flush() stderr_file.write(log_datetime + log_command) stderr_file.flush() proc = subprocess.Popen(command, stdout=stdout_file, stderr=stderr_file, shell=True) if args.assumeResumable: sigterm_handler.proc = proc error_code = proc.wait() command_manager.set_running_command_as_finished(command, error_code)
class TestSmartWorker(unittest.TestCase): def setUp(self): self.base_worker_script = os.path.join( os.path.dirname(smartdispatch.__file__), 'workers', 'base_worker.py') self.commands = ["echo 1", "echo 2", "echo 3", "echo 4"] self._commands_dir = tempfile.mkdtemp() self.logs_dir = tempfile.mkdtemp() self.command_manager = CommandManager( os.path.join(self._commands_dir, "commands.txt")) self.command_manager.set_commands_to_run(self.commands) self.commands_uid = map(utils.generate_uid_from_string, self.commands) def tearDown(self): shutil.rmtree(self._commands_dir) shutil.rmtree(self.logs_dir) def test_main(self): command = [ 'python2', self.base_worker_script, self.command_manager._commands_filename, self.logs_dir ] assert_equal(call(command), 0) # Simulate a resume, i.e. re-run the command, the output/error should be concatenated. self.command_manager.set_commands_to_run(self.commands) assert_equal(call(command), 0) # Check output logs filenames = os.listdir(self.logs_dir) outlogs = [ os.path.join(self.logs_dir, filename) for filename in filenames if filename.endswith(".out") ] for log_filename in outlogs: with open(log_filename) as logfile: # From log's filename (i.e. uid) retrieve executed command associated with this log uid = os.path.splitext(os.path.basename(log_filename))[0] executed_command = self.commands[self.commands_uid.index(uid)] # Since the command was run twice. for i in range(2): # First line is the datetime of the executed command in comment. line = logfile.readline().strip() if i == 0: assert_true("Started" in line) else: assert_true("Resumed" in line) assert_true(line.startswith("## SMART-DISPATCH")) assert_true(time.strftime("%Y-%m-%d %H:%M:") in line) # Don't check seconds. # Second line is the executed command in comment. line = logfile.readline().strip() assert_true(executed_command in line) # Next should be the command's output line = logfile.readline().strip() assert_equal(line, executed_command[-1] ) # We know those are 'echo' of a digit # Empty line assert_equal(logfile.readline().strip(), "") # Log should be empty now assert_equal("", logfile.read()) # Check error logs errlogs = [ os.path.join(self.logs_dir, filename) for filename in filenames if filename.endswith(".err") ] for log_filename in errlogs: with open(log_filename) as logfile: # From log's filename (i.e. uid) retrieve executed command associated with this log uid = os.path.splitext(os.path.basename(log_filename))[0] executed_command = self.commands[self.commands_uid.index(uid)] # Since the command was run twice. for i in range(2): # First line is the datetime of the executed command in comment. line = logfile.readline().strip() if i == 0: assert_true("Started" in line) else: assert_true("Resumed" in line) assert_true(line.startswith("## SMART-DISPATCH")) assert_true(time.strftime("%Y-%m-%d %H:%M:") in line) # Don't check seconds. # Second line is the executed command in comment. line = logfile.readline().strip() assert_true(executed_command in line) # Empty line assert_equal(logfile.readline().strip(), "") # Log should be empty now assert_equal("", logfile.read()) def test_lock(self): command = [ 'python2', self.base_worker_script, self.command_manager._commands_filename, self.logs_dir ] # Lock the commands file before running 'base_worker.py' with open_with_lock(self.command_manager._commands_filename, 'r+'): process = Popen(command, stdout=PIPE, stderr=PIPE) time.sleep(1) stdout, stderr = process.communicate() assert_equal(stdout, "") assert_true( "write-lock" in stderr, msg="Forcing a race condition, try increasing sleeping time above." ) assert_true("Traceback" not in stderr) # Check that there are no errors.
class CommandFilesTests(unittest.TestCase): def setUp(self): self._base_dir = tmp.mkdtemp() self.nb_commands = 3 self.command1 = "1\n" self.command2 = "2\n" self.command3 = "3\n" command_filename = os.path.join(self._base_dir, "commands.txt") with open(command_filename, "w+") as commands_file: commands_file.write(self.command1 + self.command2 + self.command3) self.command_manager = CommandManager(command_filename) def tearDown(self): shutil.rmtree(self._base_dir) def test_set_commands_to_run(self): # SetUp commands = ["4", "5", "6"] # The function to test self.command_manager.set_commands_to_run(commands) # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal( commands_file.read(), self.command1 + self.command2 + self.command3 + "4\n5\n6\n") assert_true(not os.path.isfile( self.command_manager._running_commands_filename)) assert_true(not os.path.isfile( self.command_manager._finished_commands_filename)) def test_get_failed_commands(self): # Setup command = self.command_manager.get_command_to_run() self.command_manager.set_running_command_as_finished(command, 1) # The function to test failed_commands = self.command_manager.get_failed_commands() # Test validation assert_equal(len(failed_commands), 1) assert_equal(failed_commands[0], self.command1) def test_get_failed_commands_empty(self): # The function to test failed_commands = self.command_manager.get_failed_commands() # Test validation assert_equal(len(failed_commands), 0) def test_get_command_to_run(self): # The function to test command = self.command_manager.get_command_to_run() # Test validation assert_equal(command, self.command1.strip()) with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command2 + self.command3) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), self.command1) assert_true(not os.path.isfile( self.command_manager._finished_commands_filename)) def test_get_nb_commands_to_run(self): assert_equal(self.command_manager.get_nb_commands_to_run(), self.nb_commands) def test_set_running_command_as_finished(self): # SetUp command = self.command_manager.get_command_to_run() # The function to test self.command_manager.set_running_command_as_finished(command) # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command2 + self.command3) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), "") with open(self.command_manager._finished_commands_filename, "r") as finished_commands_file: assert_equal(finished_commands_file.read(), self.command1) assert_true( not os.path.isfile(self.command_manager._failed_commands_filename)) def test_set_running_command_as_pending(self): # SetUp command = self.command_manager.get_command_to_run() # The function to test self.command_manager.set_running_command_as_pending(command) # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command2 + self.command3 + self.command1) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), "") assert_true(not os.path.isfile( self.command_manager._finished_commands_filename)) assert_true( not os.path.isfile(self.command_manager._failed_commands_filename)) def test_set_running_command_as_failed(self): # SetUp command = self.command_manager.get_command_to_run() error_code = 1 # The function to test self.command_manager.set_running_command_as_finished( command, error_code) # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command2 + self.command3) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), "") with open(self.command_manager._failed_commands_filename, "r") as failed_commands_file: assert_equal(failed_commands_file.read(), self.command1) assert_true(not os.path.isfile( self.command_manager._finished_commands_filename)) def test_reset_running_commands(self): # SetUp self.command_manager.get_command_to_run() # The function to test self.command_manager.reset_running_commands() # Test validation with open(self.command_manager._commands_filename, "r") as commands_file: assert_equal(commands_file.read(), self.command1 + self.command2 + self.command3) with open(self.command_manager._running_commands_filename, "r") as running_commands_file: assert_equal(running_commands_file.read(), "") assert_true(not os.path.isfile( self.command_manager._finished_commands_filename))
def main(): # Necessary if we want 'logging.info' to appear in stderr. logging.root.setLevel(logging.INFO) args = parse_arguments() command_manager = CommandManager(args.commands_filename) if args.assumeResumable: # Handle TERM signal gracefully by sending running commands back to # the list of pending commands. # NOTE: There are several cases when the handler will not have # up-to-date information on running the command and/or process, # but chances of that happening are VERY slim and the # consequences are not fatal. def sigterm_handler(signal, frame): if sigterm_handler.triggered: return else: sigterm_handler.triggered = True if sigterm_handler.proc is not None: sigterm_handler.proc.wait() if sigterm_handler.command is not None: command_manager.set_running_command_as_pending(sigterm_handler.command) sys.exit(0) sigterm_handler.triggered = False sigterm_handler.command = None sigterm_handler.proc = None signal.signal(signal.SIGTERM, sigterm_handler) while True: command = command_manager.get_command_to_run() if args.assumeResumable: sigterm_handler.proc = None sigterm_handler.command = command if command is None: break uid = utils.generate_uid_from_string(command) stdout_filename = os.path.join(args.logs_dir, uid + ".out") stderr_filename = os.path.join(args.logs_dir, uid + ".err") # Get job and node ID job_id = os.environ.get('PBS_JOBID', 'undefined') node_name = os.environ.get('HOSTNAME', 'undefined') with open(stdout_filename, 'a') as stdout_file: with open(stderr_filename, 'a') as stderr_file: log_datetime = t.strftime("## SMART-DISPATCH - Started on: %Y-%m-%d %H:%M:%S - In job: {job_id} - On nodes: {node_name} ##\n".format(job_id=job_id, node_name=node_name)) if stdout_file.tell() > 0: # Not the first line in the log file. log_datetime = t.strftime("\n## SMART-DISPATCH - Resumed on: %Y-%m-%d %H:%M:%S - In job: {job_id} - On nodes: {node_name} ##\n".format(job_id=job_id, node_name=node_name)) log_command = "## SMART-DISPATCH - Command: " + command + '\n' stdout_file.write(log_datetime + log_command) stdout_file.flush() stderr_file.write(log_datetime + log_command) stderr_file.flush() proc = subprocess.Popen(command, stdout=stdout_file, stderr=stderr_file, shell=True) if args.assumeResumable: sigterm_handler.proc = proc error_code = proc.wait() command_manager.set_running_command_as_finished(command, error_code)
class TestSmartWorker(unittest.TestCase): def setUp(self): self.base_worker_script = os.path.join(os.path.dirname(smartdispatch.__file__), 'workers', 'base_worker.py') self.commands = ["echo 1", "echo 2", "echo 3", "echo 4"] self._commands_dir = tempfile.mkdtemp() self.logs_dir = tempfile.mkdtemp() self.command_manager = CommandManager(os.path.join(self._commands_dir, "commands.txt")) self.command_manager.set_commands_to_run(self.commands) self.commands_uid = map(utils.generate_uid_from_string, self.commands) def tearDown(self): shutil.rmtree(self._commands_dir) shutil.rmtree(self.logs_dir) def test_main(self): command = ['python2', self.base_worker_script, self.command_manager._commands_filename, self.logs_dir] assert_equal(call(command), 0) # Simulate a resume, i.e. re-run the command, the output/error should be concatenated. self.command_manager.set_commands_to_run(self.commands) assert_equal(call(command), 0) # Check output logs filenames = os.listdir(self.logs_dir) outlogs = [os.path.join(self.logs_dir, filename) for filename in filenames if filename.endswith(".out")] for log_filename in outlogs: with open(log_filename) as logfile: # From log's filename (i.e. uid) retrieve executed command associated with this log uid = os.path.splitext(os.path.basename(log_filename))[0] executed_command = self.commands[self.commands_uid.index(uid)] # Since the command was run twice. for i in range(2): # First line is the datetime of the executed command in comment. line = logfile.readline().strip() if i == 0: assert_true("Started" in line) else: assert_true("Resumed" in line) assert_true(line.startswith("## SMART-DISPATCH")) assert_true(time.strftime("%Y-%m-%d %H:%M:") in line) # Don't check seconds. # Second line is the executed command in comment. line = logfile.readline().strip() assert_true(executed_command in line) # Next should be the command's output line = logfile.readline().strip() assert_equal(line, executed_command[-1]) # We know those are 'echo' of a digit # Empty line assert_equal(logfile.readline().strip(), "") # Log should be empty now assert_equal("", logfile.read()) # Check error logs errlogs = [os.path.join(self.logs_dir, filename) for filename in filenames if filename.endswith(".err")] for log_filename in errlogs: with open(log_filename) as logfile: # From log's filename (i.e. uid) retrieve executed command associated with this log uid = os.path.splitext(os.path.basename(log_filename))[0] executed_command = self.commands[self.commands_uid.index(uid)] # Since the command was run twice. for i in range(2): # First line is the datetime of the executed command in comment. line = logfile.readline().strip() if i == 0: assert_true("Started" in line) else: assert_true("Resumed" in line) assert_true(line.startswith("## SMART-DISPATCH")) assert_true(time.strftime("%Y-%m-%d %H:%M:") in line) # Don't check seconds. # Second line is the executed command in comment. line = logfile.readline().strip() assert_true(executed_command in line) # Empty line assert_equal(logfile.readline().strip(), "") # Log should be empty now assert_equal("", logfile.read()) def test_lock(self): command = ['python2', self.base_worker_script, self.command_manager._commands_filename, self.logs_dir] # Lock the commands file before running 'base_worker.py' with open_with_lock(self.command_manager._commands_filename, 'r+'): process = Popen(command, stdout=PIPE, stderr=PIPE) time.sleep(1) stdout, stderr = process.communicate() assert_equal(stdout, "") assert_true("write-lock" in stderr, msg="Forcing a race condition, try increasing sleeping time above.") assert_true("Traceback" not in stderr) # Check that there are no errors.