def test_send_file(self): port = 1239 master = Master(port=port) th = threading.Thread(target=master.run) th.start() worker = Worker('localhost:{}'.format(port), 1) time.sleep(2) tmp_dir = 'rom_files' tmp_file = os.path.join(tmp_dir, 'pong.bin') os.system('mkdir {}'.format(tmp_dir)) if _IS_WINDOWS: os.system('type NUL >> {}'.format(tmp_file)) else: os.system('touch {}'.format(tmp_file)) assert os.path.exists(tmp_file) parl.connect('localhost:{}'.format(port), distributed_files=[tmp_file]) time.sleep(5) actor = Actor() for _ in range(10): if actor.check_local_file(): break time.sleep(10) self.assertEqual(True, actor.check_local_file()) del actor time.sleep(10) worker.exit() master.exit()
def test_sync_config_file(self): master = Master(port=1335) th = threading.Thread(target=master.run) th.start() time.sleep(1) worker = Worker('localhost:1335', 1) random_file = 'random.npy' random_array = np.random.randn(3, 5) np.save(random_file, random_array) random_sum = random_array.sum() with open('config.json', 'w') as f: config_file = {'test': 1000} json.dump(config_file, f) parl.connect('localhost:1335', ['random.npy', 'config.json']) actor = Actor('random.npy', 'config.json') time.sleep(5) os.remove('./random.npy') os.remove('./config.json') remote_sum = actor.random_sum() self.assertEqual(remote_sum, random_sum) time.sleep(10) remote_config = actor.read_config() self.assertEqual(config_file['test'], remote_config) del actor worker.exit() master.exit()
def test_connect_and_create_actor_in_multiprocessing_with_connected_in_main_process( self): # start the master master = Master(port=8238) th = threading.Thread(target=master.run) th.start() time.sleep(1) worker1 = Worker('localhost:8238', 4) parl.connect('localhost:8238') proc1 = multiprocessing.Process(target=self._connect_and_create_actor, args=('localhost:8238', )) proc2 = multiprocessing.Process(target=self._connect_and_create_actor, args=('localhost:8238', )) proc1.start() proc2.start() proc1.join() proc2.join() # make sure that the client of the main process still works self._create_actor() worker1.exit() master.exit()
def test_create_actor_in_multiprocessing(self): # start the master master = Master(port=8240) th = threading.Thread(target=master.run) th.start() time.sleep(1) worker1 = Worker('localhost:8240', 4) parl.connect('localhost:8240') if not _IS_WINDOWS: # In windows, fork process cannot access client created in main process. proc1 = multiprocessing.Process(target=self._create_actor) proc2 = multiprocessing.Process(target=self._create_actor) proc1.start() proc2.start() proc1.join() proc2.join() print("[test_create_actor_in_multiprocessing] Join") # make sure that the client of the main process still works self._create_actor() worker1.exit() master.exit()
def test_max_memory(self): port = 3001 master = Master(port=port) th = threading.Thread(target=master.run) th.start() time.sleep(5) cluster_addr = 'localhost:{}'.format(port) worker = Worker(cluster_addr, 1) cluster_monitor = ClusterMonitor(cluster_addr) time.sleep(5) parl.connect(cluster_addr) actor = Actor() time.sleep(20) self.assertEqual(1, cluster_monitor.data['clients'][0]['actor_num']) del actor time.sleep(10) p = Process(target=self.actor, args=(cluster_addr, )) p.start() for _ in range(6): x = cluster_monitor.data['clients'][0]['actor_num'] if x == 0: break else: time.sleep(10) if x == 1: raise ValueError("Actor max memory test failed.") self.assertEqual(0, cluster_monitor.data['clients'][0]['actor_num']) p.terminate() worker.exit() master.exit()
def test_job_exit_exceptionally(self): master = Master(port=1334) th = threading.Thread(target=master.run) th.start() time.sleep(1) worker1 = Worker('localhost:1334', 4) time.sleep(10) self.assertEqual(worker1.job_buffer.full(), True) time.sleep(1) self.assertEqual(master.cpu_num, 4) print("We are going to kill all the jobs.") if _IS_WINDOWS: command = r'''for /F "skip=2 tokens=2 delims=," %a in ('wmic process where "commandline like '%remote\\job.py%'" get processid^,status /format:csv') do taskkill /F /T /pid %a''' print(os.popen(command).read()) else: command = ( "ps aux | grep remote/job.py | awk '{print $2}' | xargs kill -9" ) subprocess.call([command], shell=True) parl.connect('localhost:1334') actor = Actor() self.assertEqual(actor.add_one(1), 2) time.sleep(20) master.exit() worker1.exit()
def test_actor_exception_2(self): logger.info("running: test_actor_exception_2") master = Master(port=8236) th = threading.Thread(target=master.run) th.start() time.sleep(3) worker1 = Worker('localhost:8236', 1) self.assertEqual(1, master.cpu_num) parl.connect('localhost:8236') actor = Actor() try: actor.will_raise_exception_func() except: pass actor2 = Actor() for _ in range(5): if master.cpu_num == 0: break time.sleep(10) self.assertEqual(actor2.add_one(1), 2) self.assertEqual(0, master.cpu_num) del actor del actor2 worker1.exit() master.exit()
def test_actor_exception(self): logger.info("running:test_actor_exception") master = Master(port=8235) th = threading.Thread(target=master.run) th.start() time.sleep(3) worker1 = Worker('localhost:8235', 1) for _ in range(3): if master.cpu_num == 1: break time.sleep(10) self.assertEqual(1, master.cpu_num) logger.info("running:test_actor_exception: 0") parl.connect('localhost:8235') logger.info("running:test_actor_exception: 1") with self.assertRaises(exceptions.RemoteError): actor = Actor(abcd='a bug') logger.info("running:test_actor_exception: 2") actor2 = Actor() for _ in range(3): if master.cpu_num == 0: break time.sleep(10) self.assertEqual(actor2.add_one(1), 2) self.assertEqual(0, master.cpu_num) master.exit() worker1.exit()
def test_send_file2(self): port = 1240 master = Master(port=port) th = threading.Thread(target=master.run) th.start() worker = Worker('localhost:{}'.format(port), 1) time.sleep(2) tmp_file = os.path.join('rom_files', 'no_pong.bin') self.assertRaises(Exception, parl.connect, 'localhost:{}'.format(port), [tmp_file]) worker.exit() master.exit()
def test_add_worker(self): logger.info("running: test_add_worker") master = Master(port=8234) th = threading.Thread(target=master.run) th.start() time.sleep(1) worker1 = Worker('localhost:8234', 4) for _ in range(3): if master.cpu_num == 4: break time.sleep(10) self.assertEqual(master.cpu_num, 4) worker2 = Worker('localhost:8234', 4) for _ in range(3): if master.cpu_num == 8: break time.sleep(10) self.assertEqual(master.cpu_num, 8) worker2.exit() for _ in range(10): if master.cpu_num == 4: break time.sleep(10) self.assertEqual(master.cpu_num, 4) master.exit() worker1.exit()
def test_one_worker(self): port = 1439 master = Master(port=port) th = threading.Thread(target=master.run) th.start() time.sleep(1) worker = Worker('localhost:{}'.format(port), 1) cluster_monitor = ClusterMonitor('localhost:{}'.format(port)) time.sleep(1) self.assertEqual(1, len(cluster_monitor.data['workers'])) worker.exit() time.sleep(40) self.assertEqual(0, len(cluster_monitor.data['workers'])) master.exit()
def test_get_attribute(self): logger.info("running:test_get_attirbute") master = Master(port=8507) th = threading.Thread(target=master.run) th.start() time.sleep(3) worker1 = Worker('localhost:8507', 1) arg1 = np.random.randint(100) arg2 = np.random.randn() arg3 = np.random.randn(3, 3) parl.connect('localhost:8507') actor = Actor(arg1, arg2, arg3) self.assertTrue(arg1 == actor.arg1) self.assertTrue(arg2 == actor.arg2) self.assertTrue((arg3 == actor.arg3).all()) master.exit() worker1.exit()
def test_cluster_status(self): port = 4321 master = Master(port=port) th = threading.Thread(target=master.run) th.start() time.sleep(5) worker = Worker('localhost:{}'.format(port), 1) time.sleep(5) status_info = master.cluster_monitor.get_status_info() self.assertEqual(status_info, 'has 0 used cpus, 1 vacant cpus.') parl.connect('localhost:{}'.format(port)) actor = Actor() time.sleep(50) status_info = master.cluster_monitor.get_status_info() self.assertEqual(status_info, 'has 1 used cpus, 0 vacant cpus.') worker.exit() master.exit()
def test_log_server(self): master_port = 8401 # start the master master = Master(port=master_port) th = threading.Thread(target=master.run) th.start() time.sleep(1) cluster_addr = 'localhost:{}'.format(master_port) log_server_port = 8402 worker = Worker(cluster_addr, 4, log_server_port=log_server_port) outputs = self._connect_and_create_actor(cluster_addr) # Get status status = master._get_status() client_jobs = pickle.loads(status).get('client_jobs') self.assertIsNotNone(client_jobs) # Get job id client = get_global_client() jobs = client_jobs.get(client.client_id) self.assertIsNotNone(jobs) for job_id, log_server_addr in jobs.items(): log_url = "http://{}/get-log".format(log_server_addr) # Test response without job_id r = requests.get(log_url) self.assertEqual(r.status_code, 400) # Test normal response r = requests.get(log_url, params={'job_id': job_id}) self.assertEqual(r.status_code, 200) log_content = json.loads(r.text).get('log') self.assertIsNotNone(log_content) log_content = log_content.replace('\r\n', '\n') self.assertIn(log_content, outputs) # Test download download_url = "http://{}/download-log".format(log_server_addr) r = requests.get(download_url, params={'job_id': job_id}) self.assertEqual(r.status_code, 200) log_content = r.text.replace('\r\n', '\n') self.assertIn(log_content, outputs) disconnect() worker.exit() master.exit()
def test_connect_and_create_actor_in_multiprocessing_without_connected_in_main_process( self): # start the master master = Master(port=8239) th = threading.Thread(target=master.run) th.start() time.sleep(1) worker1 = Worker('localhost:8239', 4) proc1 = multiprocessing.Process(target=self._connect_and_create_actor, args=('localhost:8239', )) proc2 = multiprocessing.Process(target=self._connect_and_create_actor, args=('localhost:8239', )) proc1.start() proc2.start() proc1.join() proc2.join() self.assertRaises(AssertionError, self._create_actor) worker1.exit() master.exit()
def test_acor_exit_exceptionally(self): port = 1337 master = Master(port) th = threading.Thread(target=master.run) th.start() time.sleep(1) worker1 = Worker('localhost:{}'.format(port), 1) file_path = __file__.replace('reset_job_test', 'simulate_client') command = [sys.executable, file_path] proc = subprocess.Popen(command) for _ in range(6): if master.cpu_num == 0: break else: time.sleep(10) self.assertEqual(master.cpu_num, 0) proc.kill() parl.connect('localhost:{}'.format(port)) actor = Actor() master.exit() worker1.exit() disconnect()
def test_reset_actor(self): logger.info("running: test_reset_actor") # start the master master = Master(port=8237) th = threading.Thread(target=master.run) th.start() time.sleep(3) worker1 = Worker('localhost:8237', 4) parl.connect('localhost:8237') for _ in range(10): actor = Actor() ret = actor.add_one(1) self.assertEqual(ret, 2) del actor for _ in range(10): if master.cpu_num == 4: break time.sleep(10) self.assertEqual(master.cpu_num, 4) worker1.exit() master.exit()
def test_twenty_worker(self): port = 1440 master = Master(port=port) th = threading.Thread(target=master.run) th.start() time.sleep(1) workers = [] for _ in range(20): worker = Worker('localhost:{}'.format(port), 1) time.sleep(1) workers.append(worker) cluster_monitor = ClusterMonitor('localhost:{}'.format(port)) time.sleep(1) self.assertEqual(20, len(cluster_monitor.data['workers'])) # check if the number of workers drops by 10 for i in range(10): workers[i].exit() check_flag = False for _ in range(10): if 10 == len(cluster_monitor.data['workers']): check_flag = True break time.sleep(10) self.assertTrue(check_flag) for i in range(10, 20): workers[i].exit() # check if the number of workers drops to 0 check_flag = False for _ in range(10): if 0 == len(cluster_monitor.data['workers']): check_flag = True break time.sleep(10) self.assertTrue(check_flag) master.exit()
from parl.remote.master import Master from parl.remote.worker import Worker import time import threading c = 10 port = 3002 if __name__ == '__main__': master = Master(port=port) th = threading.Thread(target=master.run) th.setDaemon(True) th.start() time.sleep(5) cluster_addr = 'localhost:{}'.format(port) parl.connect(cluster_addr) worker = Worker(cluster_addr, 1) @parl.remote_class class Actor(object): def add(self, a, b): return a + b + c actor = Actor() class TestRecursive_actor(unittest.TestCase): def tearDown(self): disconnect()
def test_monitor_query_log_server(self): master_port = 8403 monitor_port = 8404 # start the master master = Master(port=master_port, monitor_port=monitor_port) th = threading.Thread(target=master.run) th.start() time.sleep(1) # start the cluster monitor monitor_file = __file__.replace( os.path.join('tests', 'log_server_test.pyc'), 'monitor.py') monitor_file = monitor_file.replace( os.path.join('tests', 'log_server_test.py'), 'monitor.py') command = [ sys.executable, monitor_file, "--monitor_port", str(monitor_port), "--address", "localhost:" + str(master_port) ] if _IS_WINDOWS: FNULL = tempfile.TemporaryFile() else: FNULL = open(os.devnull, 'w') monitor_proc = subprocess.Popen( command, stdout=FNULL, stderr=subprocess.STDOUT, ) # Start worker cluster_addr = 'localhost:{}'.format(master_port) log_server_port = 8405 worker = Worker(cluster_addr, 4, log_server_port=log_server_port) # Test monitor API outputs = self._connect_and_create_actor(cluster_addr) time.sleep(5) # Wait for the status update client = get_global_client() jobs_url = "{}/get-jobs?client_id={}".format(master.monitor_url, client.client_id) r = requests.get(jobs_url) self.assertEqual(r.status_code, 200) data = json.loads(r.text) for job in data: log_url = job.get('log_url') self.assertIsNotNone(log_url) r = requests.get(log_url) self.assertEqual(r.status_code, 200) log_content = json.loads(r.text).get('log') self.assertIsNotNone(log_content) log_content = log_content.replace('\r\n', '\n') self.assertIn(log_content, outputs) # Test download download_url = job.get('download_url') r = requests.get(download_url) self.assertEqual(r.status_code, 200) log_content = r.text.replace('\r\n', '\n') self.assertIn(log_content, outputs) # Clean context monitor_proc.kill() monitor_proc.wait() disconnect() worker.exit() master.exit()