Пример #1
0
    def test_send_file(self):
        port = 1239
        master = Master(port=port)
        th = threading.Thread(target=master.run)
        th.start()
        worker = Worker('localhost:{}'.format(port), 1)
        time.sleep(2)

        tmp_dir = 'rom_files'
        tmp_file = os.path.join(tmp_dir, 'pong.bin')
        os.system('mkdir {}'.format(tmp_dir))
        if _IS_WINDOWS:
            os.system('type NUL >> {}'.format(tmp_file))
        else:
            os.system('touch {}'.format(tmp_file))
        assert os.path.exists(tmp_file)
        parl.connect('localhost:{}'.format(port), distributed_files=[tmp_file])
        time.sleep(5)
        actor = Actor()
        for _ in range(10):
            if actor.check_local_file():
                break
            time.sleep(10)
        self.assertEqual(True, actor.check_local_file())
        del actor
        time.sleep(10)
        worker.exit()
        master.exit()
Пример #2
0
    def test_sync_config_file(self):
        master = Master(port=1335)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)
        worker = Worker('localhost:1335', 1)

        random_file = 'random.npy'
        random_array = np.random.randn(3, 5)
        np.save(random_file, random_array)
        random_sum = random_array.sum()

        with open('config.json', 'w') as f:
            config_file = {'test': 1000}
            json.dump(config_file, f)

        parl.connect('localhost:1335', ['random.npy', 'config.json'])
        actor = Actor('random.npy', 'config.json')
        time.sleep(5)
        os.remove('./random.npy')
        os.remove('./config.json')
        remote_sum = actor.random_sum()
        self.assertEqual(remote_sum, random_sum)
        time.sleep(10)

        remote_config = actor.read_config()
        self.assertEqual(config_file['test'], remote_config)

        del actor
        worker.exit()
        master.exit()
    def test_connect_and_create_actor_in_multiprocessing_with_connected_in_main_process(
            self):
        # start the master
        master = Master(port=8238)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)

        worker1 = Worker('localhost:8238', 4)
        parl.connect('localhost:8238')

        proc1 = multiprocessing.Process(target=self._connect_and_create_actor,
                                        args=('localhost:8238', ))
        proc2 = multiprocessing.Process(target=self._connect_and_create_actor,
                                        args=('localhost:8238', ))
        proc1.start()
        proc2.start()

        proc1.join()
        proc2.join()

        # make sure that the client of the main process still works
        self._create_actor()

        worker1.exit()
        master.exit()
    def test_create_actor_in_multiprocessing(self):
        # start the master
        master = Master(port=8240)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)

        worker1 = Worker('localhost:8240', 4)
        parl.connect('localhost:8240')

        if not _IS_WINDOWS:  # In windows, fork process cannot access client created in main process.
            proc1 = multiprocessing.Process(target=self._create_actor)
            proc2 = multiprocessing.Process(target=self._create_actor)
            proc1.start()
            proc2.start()

            proc1.join()
            proc2.join()
            print("[test_create_actor_in_multiprocessing]  Join")

        # make sure that the client of the main process still works
        self._create_actor()

        worker1.exit()
        master.exit()
Пример #5
0
    def test_max_memory(self):
        port = 3001
        master = Master(port=port)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(5)
        cluster_addr = 'localhost:{}'.format(port)
        worker = Worker(cluster_addr, 1)
        cluster_monitor = ClusterMonitor(cluster_addr)
        time.sleep(5)
        parl.connect(cluster_addr)
        actor = Actor()
        time.sleep(20)
        self.assertEqual(1, cluster_monitor.data['clients'][0]['actor_num'])
        del actor
        time.sleep(10)
        p = Process(target=self.actor, args=(cluster_addr, ))
        p.start()

        for _ in range(6):
            x = cluster_monitor.data['clients'][0]['actor_num']
            if x == 0:
                break
            else:
                time.sleep(10)
        if x == 1:
            raise ValueError("Actor max memory test failed.")
        self.assertEqual(0, cluster_monitor.data['clients'][0]['actor_num'])
        p.terminate()

        worker.exit()
        master.exit()
Пример #6
0
    def test_job_exit_exceptionally(self):
        master = Master(port=1334)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)
        worker1 = Worker('localhost:1334', 4)
        time.sleep(10)
        self.assertEqual(worker1.job_buffer.full(), True)
        time.sleep(1)
        self.assertEqual(master.cpu_num, 4)
        print("We are going to kill all the jobs.")
        if _IS_WINDOWS:
            command = r'''for /F "skip=2 tokens=2 delims=," %a in ('wmic process where "commandline like '%remote\\job.py%'" get processid^,status /format:csv') do taskkill /F /T /pid %a'''
            print(os.popen(command).read())
        else:
            command = (
                "ps aux | grep remote/job.py | awk '{print $2}' | xargs kill -9"
            )
            subprocess.call([command], shell=True)
        parl.connect('localhost:1334')
        actor = Actor()
        self.assertEqual(actor.add_one(1), 2)
        time.sleep(20)

        master.exit()
        worker1.exit()
Пример #7
0
 def test_actor_exception_2(self):
     logger.info("running: test_actor_exception_2")
     master = Master(port=8236)
     th = threading.Thread(target=master.run)
     th.start()
     time.sleep(3)
     worker1 = Worker('localhost:8236', 1)
     self.assertEqual(1, master.cpu_num)
     parl.connect('localhost:8236')
     actor = Actor()
     try:
         actor.will_raise_exception_func()
     except:
         pass
     actor2 = Actor()
     for _ in range(5):
         if master.cpu_num == 0:
             break
         time.sleep(10)
     self.assertEqual(actor2.add_one(1), 2)
     self.assertEqual(0, master.cpu_num)
     del actor
     del actor2
     worker1.exit()
     master.exit()
Пример #8
0
    def test_actor_exception(self):
        logger.info("running:test_actor_exception")
        master = Master(port=8235)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(3)
        worker1 = Worker('localhost:8235', 1)
        for _ in range(3):
            if master.cpu_num == 1:
                break
            time.sleep(10)
        self.assertEqual(1, master.cpu_num)
        logger.info("running:test_actor_exception: 0")
        parl.connect('localhost:8235')
        logger.info("running:test_actor_exception: 1")

        with self.assertRaises(exceptions.RemoteError):
            actor = Actor(abcd='a bug')
        logger.info("running:test_actor_exception: 2")

        actor2 = Actor()
        for _ in range(3):
            if master.cpu_num == 0:
                break
            time.sleep(10)
        self.assertEqual(actor2.add_one(1), 2)
        self.assertEqual(0, master.cpu_num)

        master.exit()
        worker1.exit()
Пример #9
0
    def test_send_file2(self):
        port = 1240
        master = Master(port=port)
        th = threading.Thread(target=master.run)
        th.start()
        worker = Worker('localhost:{}'.format(port), 1)
        time.sleep(2)

        tmp_file = os.path.join('rom_files', 'no_pong.bin')
        self.assertRaises(Exception, parl.connect, 'localhost:{}'.format(port),
                          [tmp_file])

        worker.exit()
        master.exit()
Пример #10
0
    def test_add_worker(self):
        logger.info("running: test_add_worker")
        master = Master(port=8234)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)

        worker1 = Worker('localhost:8234', 4)
        for _ in range(3):
            if master.cpu_num == 4:
                break
            time.sleep(10)
        self.assertEqual(master.cpu_num, 4)

        worker2 = Worker('localhost:8234', 4)
        for _ in range(3):
            if master.cpu_num == 8:
                break
            time.sleep(10)
        self.assertEqual(master.cpu_num, 8)

        worker2.exit()

        for _ in range(10):
            if master.cpu_num == 4:
                break
            time.sleep(10)
        self.assertEqual(master.cpu_num, 4)

        master.exit()
        worker1.exit()
Пример #11
0
 def test_one_worker(self):
     port = 1439
     master = Master(port=port)
     th = threading.Thread(target=master.run)
     th.start()
     time.sleep(1)
     worker = Worker('localhost:{}'.format(port), 1)
     cluster_monitor = ClusterMonitor('localhost:{}'.format(port))
     time.sleep(1)
     self.assertEqual(1, len(cluster_monitor.data['workers']))
     worker.exit()
     time.sleep(40)
     self.assertEqual(0, len(cluster_monitor.data['workers']))
     master.exit()
Пример #12
0
 def test_get_attribute(self):
     logger.info("running:test_get_attirbute")
     master = Master(port=8507)
     th = threading.Thread(target=master.run)
     th.start()
     time.sleep(3)
     worker1 = Worker('localhost:8507', 1)
     arg1 = np.random.randint(100)
     arg2 = np.random.randn()
     arg3 = np.random.randn(3, 3)
     parl.connect('localhost:8507')
     actor = Actor(arg1, arg2, arg3)
     self.assertTrue(arg1 == actor.arg1)
     self.assertTrue(arg2 == actor.arg2)
     self.assertTrue((arg3 == actor.arg3).all())
     master.exit()
     worker1.exit()
Пример #13
0
 def test_cluster_status(self):
     port = 4321
     master = Master(port=port)
     th = threading.Thread(target=master.run)
     th.start()
     time.sleep(5)
     worker = Worker('localhost:{}'.format(port), 1)
     time.sleep(5)
     status_info = master.cluster_monitor.get_status_info()
     self.assertEqual(status_info, 'has 0 used cpus, 1 vacant cpus.')
     parl.connect('localhost:{}'.format(port))
     actor = Actor()
     time.sleep(50)
     status_info = master.cluster_monitor.get_status_info()
     self.assertEqual(status_info, 'has 1 used cpus, 0 vacant cpus.')
     worker.exit()
     master.exit()
Пример #14
0
    def test_log_server(self):
        master_port = 8401
        # start the master
        master = Master(port=master_port)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)

        cluster_addr = 'localhost:{}'.format(master_port)
        log_server_port = 8402
        worker = Worker(cluster_addr, 4, log_server_port=log_server_port)
        outputs = self._connect_and_create_actor(cluster_addr)

        # Get status
        status = master._get_status()
        client_jobs = pickle.loads(status).get('client_jobs')
        self.assertIsNotNone(client_jobs)

        # Get job id
        client = get_global_client()
        jobs = client_jobs.get(client.client_id)
        self.assertIsNotNone(jobs)

        for job_id, log_server_addr in jobs.items():
            log_url = "http://{}/get-log".format(log_server_addr)
            # Test response without job_id
            r = requests.get(log_url)
            self.assertEqual(r.status_code, 400)
            # Test normal response
            r = requests.get(log_url, params={'job_id': job_id})
            self.assertEqual(r.status_code, 200)
            log_content = json.loads(r.text).get('log')
            self.assertIsNotNone(log_content)
            log_content = log_content.replace('\r\n', '\n')
            self.assertIn(log_content, outputs)

            # Test download
            download_url = "http://{}/download-log".format(log_server_addr)
            r = requests.get(download_url, params={'job_id': job_id})
            self.assertEqual(r.status_code, 200)
            log_content = r.text.replace('\r\n', '\n')
            self.assertIn(log_content, outputs)

        disconnect()
        worker.exit()
        master.exit()
    def test_connect_and_create_actor_in_multiprocessing_without_connected_in_main_process(
            self):
        # start the master
        master = Master(port=8239)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)

        worker1 = Worker('localhost:8239', 4)

        proc1 = multiprocessing.Process(target=self._connect_and_create_actor,
                                        args=('localhost:8239', ))
        proc2 = multiprocessing.Process(target=self._connect_and_create_actor,
                                        args=('localhost:8239', ))
        proc1.start()
        proc2.start()

        proc1.join()
        proc2.join()

        self.assertRaises(AssertionError, self._create_actor)

        worker1.exit()
        master.exit()
Пример #16
0
    def test_acor_exit_exceptionally(self):
        port = 1337
        master = Master(port)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)
        worker1 = Worker('localhost:{}'.format(port), 1)

        file_path = __file__.replace('reset_job_test', 'simulate_client')
        command = [sys.executable, file_path]
        proc = subprocess.Popen(command)
        for _ in range(6):
            if master.cpu_num == 0:
                break
            else:
                time.sleep(10)
        self.assertEqual(master.cpu_num, 0)
        proc.kill()

        parl.connect('localhost:{}'.format(port))
        actor = Actor()
        master.exit()
        worker1.exit()
        disconnect()
Пример #17
0
    def test_reset_actor(self):
        logger.info("running: test_reset_actor")
        # start the master
        master = Master(port=8237)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(3)

        worker1 = Worker('localhost:8237', 4)
        parl.connect('localhost:8237')
        for _ in range(10):
            actor = Actor()
            ret = actor.add_one(1)
            self.assertEqual(ret, 2)
        del actor

        for _ in range(10):
            if master.cpu_num == 4:
                break
            time.sleep(10)

        self.assertEqual(master.cpu_num, 4)
        worker1.exit()
        master.exit()
Пример #18
0
    def test_twenty_worker(self):
        port = 1440
        master = Master(port=port)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)
        workers = []
        for _ in range(20):
            worker = Worker('localhost:{}'.format(port), 1)
            time.sleep(1)
            workers.append(worker)

        cluster_monitor = ClusterMonitor('localhost:{}'.format(port))
        time.sleep(1)
        self.assertEqual(20, len(cluster_monitor.data['workers']))

        # check if the number of workers drops by 10
        for i in range(10):
            workers[i].exit()

        check_flag = False
        for _ in range(10):
            if 10 == len(cluster_monitor.data['workers']):
                check_flag = True
                break
            time.sleep(10)
        self.assertTrue(check_flag)

        for i in range(10, 20):
            workers[i].exit()

        # check if the number of workers drops to 0
        check_flag = False
        for _ in range(10):
            if 0 == len(cluster_monitor.data['workers']):
                check_flag = True
                break
            time.sleep(10)
        self.assertTrue(check_flag)

        master.exit()
Пример #19
0
from parl.remote.master import Master
from parl.remote.worker import Worker
import time
import threading

c = 10
port = 3002
if __name__ == '__main__':
    master = Master(port=port)
    th = threading.Thread(target=master.run)
    th.setDaemon(True)
    th.start()
time.sleep(5)
cluster_addr = 'localhost:{}'.format(port)
parl.connect(cluster_addr)
worker = Worker(cluster_addr, 1)


@parl.remote_class
class Actor(object):
    def add(self, a, b):
        return a + b + c


actor = Actor()


class TestRecursive_actor(unittest.TestCase):
    def tearDown(self):
        disconnect()
Пример #20
0
    def test_monitor_query_log_server(self):
        master_port = 8403
        monitor_port = 8404
        # start the master
        master = Master(port=master_port, monitor_port=monitor_port)
        th = threading.Thread(target=master.run)
        th.start()
        time.sleep(1)
        # start the cluster monitor
        monitor_file = __file__.replace(
            os.path.join('tests', 'log_server_test.pyc'), 'monitor.py')
        monitor_file = monitor_file.replace(
            os.path.join('tests', 'log_server_test.py'), 'monitor.py')
        command = [
            sys.executable, monitor_file, "--monitor_port",
            str(monitor_port), "--address", "localhost:" + str(master_port)
        ]
        if _IS_WINDOWS:
            FNULL = tempfile.TemporaryFile()
        else:
            FNULL = open(os.devnull, 'w')
        monitor_proc = subprocess.Popen(
            command,
            stdout=FNULL,
            stderr=subprocess.STDOUT,
        )

        # Start worker
        cluster_addr = 'localhost:{}'.format(master_port)
        log_server_port = 8405
        worker = Worker(cluster_addr, 4, log_server_port=log_server_port)

        # Test monitor API
        outputs = self._connect_and_create_actor(cluster_addr)
        time.sleep(5)  # Wait for the status update
        client = get_global_client()
        jobs_url = "{}/get-jobs?client_id={}".format(master.monitor_url,
                                                     client.client_id)
        r = requests.get(jobs_url)
        self.assertEqual(r.status_code, 200)
        data = json.loads(r.text)
        for job in data:
            log_url = job.get('log_url')
            self.assertIsNotNone(log_url)
            r = requests.get(log_url)
            self.assertEqual(r.status_code, 200)
            log_content = json.loads(r.text).get('log')
            self.assertIsNotNone(log_content)
            log_content = log_content.replace('\r\n', '\n')
            self.assertIn(log_content, outputs)

            # Test download
            download_url = job.get('download_url')
            r = requests.get(download_url)
            self.assertEqual(r.status_code, 200)
            log_content = r.text.replace('\r\n', '\n')
            self.assertIn(log_content, outputs)

        # Clean context
        monitor_proc.kill()
        monitor_proc.wait()
        disconnect()
        worker.exit()
        master.exit()