def test_submit_models(self): _reset() nni.retiarii.debug_configs.framework = 'pytorch' os.makedirs('generated', exist_ok=True) import nni.runtime.platform.test as tt protocol._set_out_file( open('generated/debug_protocol_out_file.py', 'wb')) protocol._set_in_file( open('generated/debug_protocol_out_file.py', 'rb')) models = _load_mnist(2) advisor = RetiariiAdvisor('ws://_unittest_placeholder_') advisor._channel = protocol.LegacyCommandChannel() advisor.default_worker.start() advisor.assessor_worker.start() remote = RemoteConfig(machine_list=[]) remote.machine_list.append( RemoteMachineConfig(host='test', gpu_indices=[0, 1, 2, 3])) cgo_engine = CGOExecutionEngine(training_service=remote, batch_waiting_time=0) set_execution_engine(cgo_engine) submit_models(*models) time.sleep(3) if torch.cuda.is_available() and torch.cuda.device_count() >= 2: cmd, data = protocol.receive() params = nni.load(data) tt.init_params(params) trial_thread = threading.Thread( target=CGOExecutionEngine.trial_execute_graph) trial_thread.start() last_metric = None while True: time.sleep(1) if tt._last_metric: metric = tt.get_last_metric() if metric == last_metric: continue if 'value' in metric: metric['value'] = json.dumps(metric['value']) advisor.handle_report_metric_data(metric) last_metric = metric if not trial_thread.is_alive(): trial_thread.join() break trial_thread.join() advisor.stopping = True advisor.default_worker.join() advisor.assessor_worker.join() cgo_engine.join()
def test_submit_models(self): _reset() nni.retiarii.debug_configs.framework = 'pytorch' os.makedirs('generated', exist_ok=True) from nni.runtime import protocol import nni.runtime.platform.test as tt protocol._set_out_file( open('generated/debug_protocol_out_file.py', 'wb')) protocol._set_in_file( open('generated/debug_protocol_out_file.py', 'rb')) models = _load_mnist(2) advisor = RetiariiAdvisor() cgo_engine = CGOExecutionEngine(devices=[ GPUDevice("test", 0), GPUDevice("test", 1), GPUDevice("test", 2), GPUDevice("test", 3) ], batch_waiting_time=0) set_execution_engine(cgo_engine) submit_models(*models) time.sleep(3) if torch.cuda.is_available() and torch.cuda.device_count() >= 2: cmd, data = protocol.receive() params = nni.load(data) tt.init_params(params) trial_thread = threading.Thread( target=CGOExecutionEngine.trial_execute_graph) trial_thread.start() last_metric = None while True: time.sleep(1) if tt._last_metric: metric = tt.get_last_metric() if metric == last_metric: continue if 'value' in metric: metric['value'] = json.dumps(metric['value']) advisor.handle_report_metric_data(metric) last_metric = metric if not trial_thread.is_alive(): trial_thread.join() break trial_thread.join() advisor.stopping = True advisor.default_worker.join() advisor.assessor_worker.join() cgo_engine.join()
def test_submit_models(self): os.environ['CGO'] = 'true' os.makedirs('generated', exist_ok=True) from nni.runtime import protocol, platform import nni.runtime.platform.test as tt protocol._out_file = open('generated/debug_protocol_out_file.py', 'wb') protocol._in_file = open('generated/debug_protocol_out_file.py', 'rb') models = _load_mnist(2) advisor = RetiariiAdvisor() submit_models(*models) if torch.cuda.is_available() and torch.cuda.device_count() >= 2: cmd, data = protocol.receive() params = json.loads(data) params['parameters']['training_kwargs']['max_steps'] = 100 tt.init_params(params) trial_thread = threading.Thread( target=CGOExecutionEngine.trial_execute_graph()) trial_thread.start() last_metric = None while True: time.sleep(1) if tt._last_metric: metric = tt.get_last_metric() if metric == last_metric: continue advisor.handle_report_metric_data(metric) last_metric = metric if not trial_thread.is_alive(): break trial_thread.join() advisor.stopping = True advisor.default_worker.join() advisor.assessor_worker.join()