Exemplo n.º 1
0
 def test_normal_c2(self):
     """test normal yaml construct by RankDNN base."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
Exemplo n.º 2
0
 def test_mmoe_normal(self):
     """test normal yaml construct by MultiTaskMMOE base."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, 'epoch.+done', 2, self.err_msg)
Exemplo n.º 3
0
 def test_runner_no_phases_c2(self):
     """test runner has no phase and it will run all phase."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0].pop("phases")
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 4, self.err_msg)
Exemplo n.º 4
0
 def test_workspace_abs_c2(self):
     """test abs worksapce."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["workspace"] = './PaddleRec/models/rank/dnn'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
Exemplo n.º 5
0
 def test_debug_open_c2(self):
     """test debug open."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["debug"] = 'True'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
Exemplo n.º 6
0
 def test_optimizer_adam_c2(self):
     """test optimizer adam."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["hyper_parameters"]['optimizer']['class'] = 'Adam'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
Exemplo n.º 7
0
 def test_optimizer_sgd_reg(self):
     """test optimizer sgd with reg."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["hyper_parameters"]['optimizer']['class'] = 'SGD'
     self.yaml_content["hyper_parameters"]['reg'] = 0.1
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, 'epoch.+done', 2, self.err_msg)
Exemplo n.º 8
0
 def test_single_infer_in_epochs_dir_c2(self):
     """test single infer base on one epoch with new config.
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content['mode'] = 'runner2'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.run_time_re, 1, self.err_msg)
Exemplo n.º 9
0
 def test_single_infer_gpu(self):
     """test single infer with gpu."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content['mode'] = 'runner2'
     self.yaml_content["runner"][2]["device"] = 'gpu'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.run_time_re, 1, self.err_msg)
Exemplo n.º 10
0
 def test_optimizer_sgd_gpu(self):
     """test optimizer sgd with gpu."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["hyper_parameters"]['optimizer']['class'] = 'SGD'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
Exemplo n.º 11
0
 def test_increment_train(self):
     """test increment train."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content['mode'] = 'runner1'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
     built_in.regex_match_equal(self.out, '\nmode\s+(\S+)\s+\n', 'runner1', self.err_msg)
Exemplo n.º 12
0
 def test_two_phase_train_c2(self):
     """test single train with two phase in runner config.
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["phases"] = ['phase1', 'phase2']
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 4, self.err_msg)
Exemplo n.º 13
0
    def test_single_infer(self):
        """test single infer."""
        self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
        self.yaml_content['mode'] = 'runner2'

        self.run_yaml()
        built_in.equals(self.pro.returncode, 0, self.err_msg)
        built_in.not_contains(self.err, 'Traceback', self.err_msg)
        built_in.regex_match_len(self.out, 'Infer.+done', 1, self.err_msg)
Exemplo n.º 14
0
 def test_mode_str_ps_local_cluster_1p_1t_geo_c2(self):
     """test_mode_str_ps_local_cluster_1p_1t_geo_c2."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["mode"] = "runner0"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["distribute_strategy"] = "geo"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg)
     built_in.contains('logs/worker.0', 'GeoSgdCommunicator Initialized', self.err_msg)
Exemplo n.º 15
0
 def test_increment_train_c2(self):
     """test increment train.
        both runners are single & train.
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content['mode'] = ['runner0', 'runner1']
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 4, self.err_msg)
Exemplo n.º 16
0
 def test_single_infer_in_base_dir_c2(self):
     """test single infer base on save dir with new config.
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content['mode'] = 'runner2'
     self.yaml_content['runner'][2]['init_model_path'] = 'increment_dnn'
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.run_time_re, 2, self.err_msg)
Exemplo n.º 17
0
 def test_mode_list_ps_local_cluster_and_increment_c2(self):
     """test mode list has two elements and both runner are local cluster.
        one is train, the other is increment training.
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][1]["class"] = "local_cluster_train"
     self.yaml_content["mode"] = ["runner0", "runner1"]
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg)
Exemplo n.º 18
0
 def test_mode_str_ps_local_cluster_1p_2t_geo_c2(self):
     """test_mode_str_ps_local_cluster_1p_2t_geo_c2."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["mode"] = "runner0"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["distribute_strategy"] = "geo"
     self.yaml_content["runner"][0]["worker_num"] = 2
     self.yaml_content["runner"][0]["server_num"] = 1
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg)
     built_in.regex_match_len('logs/worker.1', self.epoch_re, 2, self.err_msg)
 def test_QueueDataset_train_c2(self):
     """test QueueDataset in train."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["dataset"][0]["type"] = "QueueDataset"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
     # NOTE windows和mac直接会强行切换到dataloader
     if utils.get_platform() != "LINUX" or not six.PY2:
         check_type = "DataLoader"
     else:
         check_type = "QueueDataset"
 def test_mode_list_single_selected_gpus_1card_c2(self):
     """test selected gpus 1card, it will run with single mode."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["runner"][0]["selected_gpus"] = "0"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
     built_in.regex_match_equal(self.out,
                                '\ntrain.trainer.engine\s+(\S+)\s+\n',
                                "single",
                                self.err_msg)
Exemplo n.º 21
0
 def test_two_phase_train(self):
     """test two phase train"""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content['phase'].append({
         'name': 'phase2',
         'model': '{workspace}/model.py',  # user-defined model
         'dataset_name': 'dataset_infer',  # select dataset by name
         'thread_num': 1
     })
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, 'epoch.+done', 4, self.err_msg)
Exemplo n.º 22
0
 def test_optimizer_lr_le(self):
     """test optimizer lr"""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["hyper_parameters"]['optimizer']['class'] = 'SGD'
     self.yaml_content["hyper_parameters"]['optimizer']['learning_rate'] = 2e-2
     self.yaml_content["hyper_parameters"]['reg'] = 0.1
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
     built_in.regex_match_equal(self.out,
                                '\nhyper_parameters.optimizer.learning_rate\s+(\S+)\s+\n',
                                '0.02',
                                self.err_msg)
 def test_mode_list_collective_selected_gpus_2f_2cards_c2(self):
     """test selected gpus 2card with two files and set fleet mode = collective,
        it will change ps to collective and run with local_cluster_train mode
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["runner"][0]["selected_gpus"] = "0,1"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["fleet_mode"] = "collective"
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg)
     built_in.path_not_exist('logs/server.0', self.err_msg)
Exemplo n.º 24
0
 def test_mode_str_ps_local_cluster_1p_2t_2f_async_c2(self):
     """test_mode_str_ps_local_cluster_1p_2t_c2.
        worker_num会被data_path下得文件数量来覆盖掉;
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["mode"] = "runner0"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["worker_num"] = 10
     self.yaml_content["runner"][0]["server_num"] = 1
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains('logs/server.0', 'Traceback', self.err_msg)
     built_in.not_contains('logs/worker.0', 'Traceback', self.err_msg)
     built_in.regex_match_len('logs/worker.1', self.epoch_re, 2, self.err_msg)
Exemplo n.º 25
0
 def test_thread_num(self):
     """test thread num."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content['phase'].append({
         'name': 'phase2',
         'model': '{workspace}/model.py',  # user-defined model
         'dataset_name': 'dataset_infer',  # select dataset by name
         'thread_num': 2
     })
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, self.run_time_re, 4, self.err_msg)
     built_in.regex_match_equal(self.out,
                                '\nphase.phase2.thread_num\s+(\S+)\s+\n', 2,
                                self.err_msg)
 def test_mode_list_collective_selected_gpus_2f_4cards_c2(self):
     """
     test_collective_selected_gpus_2f_2cards.
     程序运行GPU卡号,会依据文件个数, gpu_nums, worker_num 来判断起多少个trainer.
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["runner"][0]["selected_gpus"] = "0,1"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["fleet_mode"] = "collective"
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len('logs/worker.1', self.epoch_re, 2, self.err_msg)
     built_in.path_not_exist('logs/worker.2', self.err_msg)
Exemplo n.º 27
0
 def test_mode_list_ps_selected_gpus_2f_2card_c2(self):
     """test selected gpus 2card with two files and not set fleet mode,
        it will change ps to collective and run with local_cluster_train mode
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["runner"][0]["selected_gpus"] = "0,1"
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.path_not_exist('logs/server.0', self.err_msg)
     built_in.regex_match_equal(self.out,
                                '\ntrain.trainer.engine\s+(\S+)\s+\n',
                                "local_cluster",
                                self.err_msg)
     built_in.regex_match_len('logs/worker.1', self.auc_re, 6, self.err_msg)
Exemplo n.º 28
0
 def test_QueueDataset_train(self):
     """test QueueDataset in train."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     if six.PY3:
         self.yaml_content["dataset"][0]["type"] = "DataLoader"
     else:
         self.yaml_content["dataset"][0]["type"] = "QueueDataset"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len(self.out, 'epoch.+done', 2, self.err_msg)
     # NOTE windows和mac直接会强行切换到dataloader
     check_type = "DataLoader" if utils.get_platform(
     ) != "LINUX" else "QueueDataset"
     if six.PY3:
         check_type = "DataLoader"
     else:
         check_type = "DataLoader" if utils.get_platform(
         ) != "LINUX" else "QueueDataset"
     built_in.regex_match_equal(
         self.out, '\ndataset.dataset_train.type\s+(\S+)\s+\n', check_type,
         self.err_msg)
Exemplo n.º 29
0
 def test_mode_list_collective_local_cluster_and_increment_c2(self):
     """test mode list has two elements and both runner are local cluster.
        one is train, the other is increment training.
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["runner"][0]["selected_gpus"] = "0,1"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["fleet_mode"] = "collective"
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.yaml_content["runner"][1]["device"] = 'gpu'
     self.yaml_content["runner"][1]["selected_gpus"] = "0,1"
     self.yaml_content["runner"][1]["class"] = "local_cluster_train"
     self.yaml_content["runner"][1]["fleet_mode"] = "collective"
     self.yaml_content["dataset"][1]["data_path"] = "criteo_data"
     self.yaml_content["mode"] = ["runner0", "runner1"]
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg)
     built_in.regex_match_len('logs/worker.1', self.epoch_re, 2, self.err_msg)
     built_in.regex_match_len('logs/worker.1', '.+load.+increment_dnn', 1, self.err_msg)