示例#1
0
 def test_retrieve_when_version_mismatch(self, app_client, body_data, expect_file):
     """Test retrieve when train_begin."""
     url = 'retrieve'
     with self._debugger_client.get_thread_instance():
         check_state(app_client, ServerStatus.MISMATCH.value)
         send_and_compare_result(app_client, url, body_data, expect_file)
         send_terminate_cmd(app_client)
示例#2
0
 def test_next_node_on_gpu(self, app_client):
     """Test get next node on GPU."""
     gpu_debugger_client = MockDebuggerClient(backend='GPU')
     with gpu_debugger_client.get_thread_instance():
         check_state(app_client)
         # send run command to get watchpoint hit
         url = 'control'
         body_data = {
             'mode': 'continue',
             'level': 'node',
             'name': 'Default/TransData-op99'
         }
         res = get_request_result(app_client, url, body_data)
         assert res == {
             'metadata': {
                 'state': 'sending',
                 'enable_recheck': False
             }
         }
         # get metadata
         check_state(app_client)
         url = 'retrieve'
         body_data = {'mode': 'all'}
         expect_file = 'retrieve_next_node_on_gpu.json'
         send_and_compare_result(app_client, url, body_data, expect_file)
         send_terminate_cmd(app_client)
示例#3
0
 def test_compare_tensor_value(self, app_client):
     """Test compare tensor value."""
     node_name = 'Default/args0'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         # prepare tensor values
         url = 'control'
         body_data = {'mode': 'continue', 'steps': 2}
         get_request_result(app_client, url, body_data)
         check_state(app_client)
         get_request_result(app_client=app_client,
                            url='tensor-history',
                            body_data={'name': node_name})
         res = get_request_result(app_client=app_client,
                                  url='poll-data',
                                  body_data={'pos': 0},
                                  method='get')
         assert res.get('receive_tensor', {}).get('node_name') == node_name
         # get compare results
         url = 'tensor-comparisons'
         body_data = {
             'name': node_name + ':0',
             'detail': 'data',
             'shape': quote('[:, :]'),
             'tolerance': 1
         }
         expect_file = 'compare_tensors.json'
         send_and_compare_result(app_client,
                                 url,
                                 body_data,
                                 expect_file,
                                 method='get')
         send_terminate_cmd(app_client)
示例#4
0
 def test_search_by_category(self, app_client, filter_condition, expect_file):
     """Test recheck request."""
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         send_and_compare_result(app_client, 'search', filter_condition, expect_file,
                                 method='get')
         send_terminate_cmd(app_client)
示例#5
0
 def test_continue_on_gpu(self, app_client, params, expect_file):
     """Test get next node on GPU."""
     gpu_debugger_client = MockDebuggerClient(backend='GPU', graph_num=2)
     original_value = settings.ENABLE_RECOMMENDED_WATCHPOINTS
     settings.ENABLE_RECOMMENDED_WATCHPOINTS = True
     try:
         with gpu_debugger_client.get_thread_instance():
             check_state(app_client)
             # send run command to get watchpoint hit
             url = 'control'
             body_data = {'mode': 'continue'}
             body_data.update(params)
             res = get_request_result(app_client, url, body_data)
             assert res == {
                 'metadata': {
                     'state': 'sending',
                     'enable_recheck': False
                 }
             }
             # get metadata
             check_state(app_client)
             url = 'retrieve'
             body_data = {'mode': 'all'}
             send_and_compare_result(app_client, url, body_data,
                                     expect_file)
             send_terminate_cmd(app_client)
     finally:
         settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_value
示例#6
0
 def test_compare_tensor_value(self, app_client):
     """Test compare tensor value."""
     node_name = 'Default/args0'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         # prepare tensor values
         url = 'control'
         body_data = {'mode': 'continue',
                      'steps': 2}
         get_request_result(app_client, url, body_data)
         check_state(app_client)
         get_request_result(
             app_client=app_client, url='tensor-history', body_data={'name': node_name, 'rank_id': 0})
         res = get_request_result(
             app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get')
         assert res.get('receive_tensor', {}).get('node_name') == node_name, 'Node name unmatched.'
         # get compare results
         url = 'tensor-comparisons'
         body_data = {
             'name': node_name + ':0',
             'detail': 'data',
             'shape': quote('[:, :]'),
             'tolerance': 1,
             'rank_id': 0}
         get_request_result(app_client, url, body_data, method='GET')
         # sleep 0.01 second to  wait the tensor update.
         time.sleep(0.01)
         res = get_request_result(
             app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get')
         assert res.get('receive_tensor', {}).get('node_name') == node_name, 'Node name unmatched.'
         expect_file = 'compare_tensors.json'
         if self.save_results:
             send_and_save_result(app_client, url, body_data, expect_file, method='get')
         send_and_compare_result(app_client, url, body_data, expect_file, method='get')
         send_terminate_cmd(app_client)
示例#7
0
 def test_retrieve_tensor_value(self, app_client):
     """Test retrieve tensor value."""
     node_name = 'Default/TransData-op99'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         # prepare tensor value
         url = 'tensor-history'
         body_data = {'name': node_name, 'rank_id': 0}
         get_request_result(app_client, url, body_data, method='post')
         get_request_result(app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get')
         url = 'tensors'
         body_data = {
             'name': node_name + ':0',
             'detail': 'data',
             'shape': quote('[1, 1:3]')
         }
         get_request_result(app_client, url, body_data, method='GET')
         # sleep 0.01 second to  wait the tensor update.
         time.sleep(0.01)
         res = get_request_result(
             app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get')
         assert res.get('receive_tensor', {}).get('node_name') == node_name, 'Node name unmatched.'
         expect_file = 'retrieve_tensor_value.json'
         if self.save_results:
             send_and_save_result(app_client, url, body_data, expect_file, method='get')
         send_and_compare_result(app_client, url, body_data, expect_file, method='get')
         send_terminate_cmd(app_client)
示例#8
0
 def test_update_watchpoint(self, app_client):
     """Test retrieve when train_begin."""
     watch_point_id = 1
     leaf_node_name = 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         condition = {
             'id': 'tensor_too_large',
             'params': [{
                 'name': 'max_gt',
                 'value': 1.0
             }]
         }
         create_watchpoint(app_client, condition, watch_point_id)
         # update watchpoint watchpoint list
         url = 'update-watchpoint'
         body_data = {
             'watch_point_id': watch_point_id,
             'watch_nodes': [leaf_node_name],
             'mode': 1
         }
         get_request_result(app_client, url, body_data)
         # get updated nodes
         url = 'search'
         body_data = {
             'name': leaf_node_name,
             'watch_point_id': watch_point_id
         }
         expect_file = 'search_unwatched_leaf_node.json'
         send_and_compare_result(app_client,
                                 url,
                                 body_data,
                                 expect_file,
                                 method='get')
         send_terminate_cmd(app_client)
示例#9
0
 def test_create_and_delete_watchpoint(self, app_client):
     """Test create and delete watchpoint."""
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         conditions = [
             {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
             {'id': 'tensor_too_small', 'params': [{'name': 'max_lt', 'value': -1.0}]},
             {'id': 'tensor_too_large', 'params': [{'name': 'min_gt', 'value': 1e+32}]},
             {'id': 'tensor_too_small', 'params': [{'name': 'min_lt', 'value': -1e+32}]},
             {'id': 'tensor_too_large', 'params': [{'name': 'mean_gt', 'value': 0}]},
             {'id': 'tensor_too_small', 'params': [{'name': 'mean_lt', 'value': 0}]}
         ]
         for idx, condition in enumerate(conditions):
             create_watchpoint(app_client, condition, idx + 1)
         # delete 4-th watchpoint
         url = 'delete-watchpoint'
         body_data = {'watch_point_id': 4}
         get_request_result(app_client, url, body_data)
         # test watchpoint list
         url = 'retrieve'
         body_data = {'mode': 'watchpoint'}
         expect_file = 'create_and_delete_watchpoint.json'
         if self.save_results:
             send_and_save_result(app_client, url, body_data, expect_file)
         send_and_compare_result(app_client, url, body_data, expect_file)
         send_terminate_cmd(app_client)
示例#10
0
 def test_search_by_category_with_multi_graph(self, app_client, filter_condition, expect_file):
     """Test search by category request."""
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         if self.save_results:
             send_and_save_result(app_client, 'search', filter_condition, expect_file, method='get')
         send_and_compare_result(app_client, 'search', filter_condition, expect_file, method='get')
         send_terminate_cmd(app_client)
示例#11
0
 def test_retrieve_when_train_begin(self, app_client, body_data,
                                    expect_file):
     """Test retrieve when train_begin."""
     url = 'retrieve'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         send_and_compare_result(app_client, url, body_data, expect_file)
         send_terminate_cmd(app_client)
示例#12
0
 def test_retrieve_tensor_hits(self, app_client, body_data, expect_file):
     """Test retrieve tensor graph."""
     url = 'tensor-hits'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         if self.save_results:
             send_and_save_result(app_client, url, body_data, expect_file, method='GET')
         send_and_compare_result(app_client, url, body_data, expect_file, method='GET')
         send_terminate_cmd(app_client)
示例#13
0
 def test_multi_retrieve_when_train_begin(self, app_client, body_data,
                                          expect_file):
     """Test retrieve when train_begin."""
     url = 'retrieve'
     debugger_client = MockDebuggerClient(backend='Ascend', graph_num=2)
     with debugger_client.get_thread_instance():
         check_state(app_client)
         send_and_compare_result(app_client, url, body_data, expect_file)
         send_terminate_cmd(app_client)
示例#14
0
 def test_get_conditions(self, app_client):
     """Test get conditions for ascend."""
     url = '/v1/mindinsight/debugger/sessions/0/condition-collections'
     body_data = {}
     expect_file = 'get_conditions_for_ascend.json'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         if self.save_results:
             send_and_save_result(app_client, url, body_data, expect_file, method='get', full_url=True)
         send_and_compare_result(app_client, url, body_data, expect_file, method='get', full_url=True)
         send_terminate_cmd(app_client)
示例#15
0
 def test_retrieve_bfs_node(self, app_client, body_data, expect_file):
     """Test retrieve bfs node."""
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         # prepare tensor values
         url = 'retrieve_node_by_bfs'
         send_and_compare_result(app_client,
                                 url,
                                 body_data,
                                 expect_file,
                                 method='get')
         send_terminate_cmd(app_client)
示例#16
0
 def test_recommend_watchpoints(self, app_client):
     """Test generating recommended watchpoints."""
     original_value = settings.ENABLE_RECOMMENDED_WATCHPOINTS
     settings.ENABLE_RECOMMENDED_WATCHPOINTS = True
     try:
         with self._debugger_client.get_thread_instance():
             check_state(app_client)
             url = 'retrieve'
             body_data = {'mode': 'watchpoint'}
             expect_file = 'recommended_watchpoints_at_startup.json'
             send_and_compare_result(app_client, url, body_data, expect_file, method='post')
             send_terminate_cmd(app_client)
     finally:
         settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_value
示例#17
0
 def test_get_conditions(self, app_client):
     """Test get conditions for gpu."""
     url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/condition-collections'
     body_data = {}
     expect_file = 'get_conditions_for_gpu.json'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         send_and_compare_result(app_client,
                                 url,
                                 body_data,
                                 expect_file,
                                 method='get',
                                 full_url=True)
         send_terminate_cmd(app_client)
示例#18
0
 def test_retrieve_tensor_graph(self, app_client, body_data, expect_file):
     """Test retrieve tensor graph."""
     url = 'tensor-graphs'
     with self._debugger_client.get_thread_instance():
         create_watchpoint_and_wait(app_client)
         get_request_result(app_client, url, body_data, method='GET')
         # sleep 0.01 second to  wait the tensor update.
         time.sleep(0.01)
         # check full tensor history from poll data
         res = get_request_result(
             app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get')
         assert res.get('receive_tensor', {}).get('tensor_name') == body_data.get('tensor_name')
         if self.save_results:
             send_and_save_result(app_client, url, body_data, expect_file, method='GET')
         send_and_compare_result(app_client, url, body_data, expect_file, method='GET')
         send_terminate_cmd(app_client)
示例#19
0
 def test_retrieve_tensor_history(self, app_client):
     """Test retrieve tensor value."""
     node_name = 'Default/TransData-op99'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         # prepare tensor value
         url = 'tensor-history'
         body_data = {'name': node_name, 'rank_id': 0}
         expect_file = 'retrieve_empty_tensor_history.json'
         if self.save_results:
             send_and_save_result(app_client, url, body_data, expect_file)
         send_and_compare_result(app_client, url, body_data, expect_file)
         # check full tensor history from poll data
         res = get_request_result(
             app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get')
         assert res.get('receive_tensor', {}).get('node_name') == node_name, 'Node name unmatched.'
         expect_file = 'retrieve_full_tensor_history.json'
         if self.save_results:
             send_and_save_result(app_client, url, body_data, expect_file)
         send_and_compare_result(app_client, url, body_data, expect_file)
         send_terminate_cmd(app_client)
示例#20
0
 def test_retrieve_tensor_value(self, app_client):
     """Test retrieve tensor value."""
     node_name = 'Default/TransData-op99'
     with self._debugger_client.get_thread_instance():
         check_state(app_client)
         # prepare tensor value
         url = 'tensor-history'
         body_data = {'name': node_name}
         expect_file = 'retrieve_empty_tensor_history.json'
         send_and_compare_result(app_client, url, body_data, expect_file)
         # check full tensor history from poll data
         res = get_request_result(app_client=app_client,
                                  url='poll-data',
                                  body_data={'pos': 0},
                                  method='get')
         assert res.get('receive_tensor', {}).get('node_name') == node_name
         expect_file = 'retrieve_full_tensor_history.json'
         send_and_compare_result(app_client, url, body_data, expect_file)
         # check tensor value
         url = 'tensors'
         body_data = {
             'name': node_name + ':0',
             'detail': 'data',
             'shape': quote('[1, 1:3]')
         }
         expect_file = 'retrieve_tensor_value.json'
         send_and_compare_result(app_client,
                                 url,
                                 body_data,
                                 expect_file,
                                 method='get')
         send_terminate_cmd(app_client)
示例#21
0
 def test_before_train_begin(self, app_client):
     """Test retrieve all."""
     url = 'retrieve'
     body_data = {'mode': 'all'}
     expect_file = 'before_train_begin.json'
     send_and_compare_result(app_client, url, body_data, expect_file)
示例#22
0
def send_terminate_cmd(app_client):
    """Send terminate command to debugger client."""
    url = os.path.join(DEBUGGER_BASE_URL, 'control')
    body_data = {'mode': 'terminate'}
    send_and_compare_result(app_client, url, body_data)