def get_test_numpy(self):
        if self.test_tfds is None:
            error("Error: test_tfds is None.")
            return self.accum_test_x, self.accum_test_y

        if len(self.accum_test_x) == 0:
            time_test_np_start = time.time()
            tfds_test_os_iterator = self.test_tfds.make_one_shot_iterator()
            as_timer("tfds_test_ositer")
            tfds_test_iter_next = tfds_test_os_iterator.get_next()
            time_test_os_iterator_end = time.time()
            info(
                "note: now take time_test_os_iterator_end cost_time={}s".format(
                    round(time_test_os_iterator_end - time_test_np_start, 3)
                )
            )

            with tf.Session(config=tf.ConfigProto(log_device_placement=False)) as sess:
                if self.domain == "text":
                    while True:
                        try:
                            example, labels = sess.run(tfds_test_iter_next)
                            example = np.squeeze(example, (2, 3))
                            example = np.squeeze(example, axis=-1)
                            example = example.astype(np.int)

                            self.accum_test_x.extend(example)
                            self.accum_test_y.extend(labels)
                            self.accm_test_cnt += example.shape[0]
                            # X.append(example)
                            # Y.append(labels)
                        except tf.errors.OutOfRangeError:
                            break
                else:
                    while True:
                        try:
                            example, labels = sess.run(tfds_test_iter_next)
                            # output:  Note:time example shape=(86401, 1, 1, 1)
                            # logger.info("Note:time example shape={}".format(example.shape))
                            self.accum_test_x.append(example)
                            self.accum_test_y.append(labels)
                            self.accm_test_cnt += 1

                        except tf.errors.OutOfRangeError:
                            as_timer("tfds_test_run_OOR_{}".format(self.accm_test_cnt))
                            break

            time_test_np_end = time.time()
            info(
                "note: now take test accm_test_cnt={}, cost_time={}s".format(
                    self.accm_test_cnt, round(time_test_np_end - time_test_np_start, 3)
                )
            )
            self.accum_test_y = np.array(self.accum_test_y)

        return self.accum_test_x
Пример #2
0
    def test(self, dataset, remaining_time_budget=None):
        """Test method of domain-specific model."""
        # Convert test dataset to necessary format and
        # store as self.domain_dataset_test
        # self.set_domain_dataset(dataset, is_training=False)

        as_timer("test_start")
        # init tf_test_dataset for the first time.
        self.tf_dataset_trainsformer.init_test_tfds(dataset)

        self.domain_dataset_test, self.X_test = self.tf_dataset_trainsformer.get_speech_test_dataset()

        # As the original metadata doesn't contain number of test examples, we
        # need to add this information
        if self.domain in ["text", "speech"] and (not self.domain_metadata["test_num"] >= 0):
            self.domain_metadata["test_num"] = len(self.X_test)
        logger.info("Note:test_process test domain metadata is {}".format(self.domain_metadata))

        # Make predictions
        if self.domain in ["speech"]:
            if (
                self.main_train_loop_num
                <= speech_ms_mlp_conf.midwei_train_start_loop + speech_ms_mlp_conf.midwei_predict_block_loop
            ):
                Y_pred = self.domain_model.test(self.domain_dataset_test, remaining_time_budget=remaining_time_budget)
                logger.info(
                    "Note: speech pasa_model, speech_main_train_loop={}, speech_main_test_loop={}".format(
                        self.main_train_loop_num, self.main_test_loop_num
                    )
                )
                # Update self.done_training
                self.done_training = self.domain_model.done_training

            else:
                Y_pred = self.speech_widsom_model.test(
                    self.domain_dataset_test, remaining_time_budget=remaining_time_budget
                )
                logger.info(
                    "Note: speech dw_model, train_loop={}, test_loop={}".format( self.main_train_loop_num, self.main_test_loop_num)
                )
                # Update self.done_training
                self.done_training = self.speech_widsom_model.done_training

            as_timer("test_end")
            logger.info(as_timer)
        else:
            logger.error("Note: Domain is not Speech!")

        self.main_test_loop_num += 1

        return Y_pred
    def init_test_tfds(self, test_tfds):
        if self.test_tfds is None:

            if self.domain == "text":
                self.test_tfds = test_tfds.padded_batch(
                    20,
                    padded_shapes=([None, 1, 1, 1], [None]),
                    padding_values=(tf.constant(-1, dtype=tf.float32), tf.constant(-1, dtype=tf.float32)),
                )

            else:
                # config: test_if_map_cutoff
                tfds_if_test_cutoff = False
                if tfds_if_test_cutoff:
                    self.test_tfds = test_tfds.map(lambda x,y: (x[:800000], y), num_parallel_calls=4)
                else:
                    self.test_tfds = test_tfds

            as_timer("tfds_cvtr_init_tfds")
Пример #4
0
    def __init__(self, metadata):
        """
    Args:
      metadata: an AutoDLMetadata object. Its definition can be found in
          AutoDL_ingestion_program/dataset.py
    """
        self.done_training = False
        self.metadata = metadata
        # self.domain = infer_domain(metadata)
        self.domain = "speech"
        # logger.info("Note:The AutoDL_G_CONF: {}".format(autodl_g_conf_repr))
        logger.info("Note:The inferred domain of current dataset is: {}.".format(self.domain))
        # Domain识别及Model初始化
        # DomainModel = DOMAIN_TO_MODEL[self.domain]
        DomainModel = meta_domain_2_model(self.domain)
        self.domain_metadata = get_domain_metadata(metadata, self.domain)
        self.class_num = self.domain_metadata["class_num"]
        self.train_num = self.domain_metadata["train_num"]

        logger.info("Note:The domain metadata is {}".format(self.domain_metadata))
        self.domain_model = DomainModel(self.domain_metadata)

        # fixme: 增加更新数据.
        self.speech_widsom_model = ASpeechWidsomModel(self.domain_metadata)
        self.speech_wisdom_dataset_train = None
        logger.info("Note:Init Speech Wisdom solution, is {}".format(self.domain_metadata))
        self.main_train_loop_num = 0
        self.main_test_loop_num = 0
        #
        self.raw_tf_train_dataset = None
        self.dataset_sample_size = None
        self.dataset_read_num_second = None
        self.data_all_np_x_list = list()
        self.data_all_np_y_array = None
        self.ds_incr_flag = True  # dataset sampling if still remain to be sampled incrementally.
        self.domain_dataset_train = None
        self.domain_dataset_test = None

        # for tf_dataset.
        self.tf_dataset_trainsformer = TfDatasetTransformer(if_train_shuffle=speech_ds_tds_conf.if_shuffle)
        as_timer("model_speech_init")
Пример #5
0
    def train(self, dataset, remaining_time_budget=None):
        """Train method of domain-specific model."""
        # Convert training dataset to necessary format and
        # store as self.domain_dataset_train
        logger.info("Note: speech_train_process  model.py starts train")
        as_timer("train_start")

        # load tf_train_dataset for first time.
        self.tf_dataset_trainsformer.init_train_tfds(dataset, self.train_num)

        if self.domain in ["speech"]:
            # Train the model with light model.
            if self.main_train_loop_num < speech_ms_mlp_conf.lightwei_train_end_loop:
                # fixme: need to be autotuned.
                ds_take_size = min(int(self.train_num * speech_ds_tds_conf.sample_ratio[self.main_train_loop_num]), self.class_num * 50)

                # self.domain_dataset_train = self.tf_dataset_trainsformer.get_speech_train_dataset(ds_take_size)

                # self.domain_model.train(self.domain_dataset_train, remaining_time_budget=remaining_time_budget)
                self.domain_model.train(self.tf_dataset_trainsformer.get_speech_train_dataset(ds_take_size), remaining_time_budget=remaining_time_budget)

                logger.info(
                    "Note: domain={}, main_train_loop_num={}, light_model train finished.".format(
                        self.domain, self.main_train_loop_num
                    )
                )
                as_timer("speech_model_basic_train")

            if self.main_train_loop_num >= speech_ms_mlp_conf.midwei_train_start_loop:
                self.speech_widsom_model.train(
                    # (self.domain_dataset_train["x"], self.domain_dataset_train["y"]), remaining_time_budget
                    self.tf_dataset_trainsformer.get_speech_train_dataset_full(), remaining_time_budget
                )
                logger.info("Note: start wisdom at np, main_train_loop_num={}".format( self.main_train_loop_num))
                as_timer("speech_tr34_train")

            logger.info("Note:time_train model.py domain_model train finished.")

            # Update self.done_training
            self.done_training = self.domain_model.done_training
            self.main_train_loop_num += 1
            # print(as_timer)
            as_timer("train_end")
            logger.info(as_timer)
        else:
            logger.error("Note: Domain is not Speech!")
    def get_train_numpy(self, update_train_num):
        # info(
        #     "note: get_train_numpy, update_train_num={}, domain={}, accm_train_cnt={}, train_num={}".format(
        #         update_train_num, self.domain, self.accm_train_cnt, self.train_num
        #     )
        # )
        as_timer("tfdscvtr_get_train_np_start")
        if self.train_tfds is None:
            error("Error: train_tfds is None.")
            return self.accum_train_x, self.accum_train_y

        if self.tfds_train_os_iterator is None:
            time_mosi_start = time.time()
            self.tfds_train_os_iterator = self.train_tfds.make_one_shot_iterator()
            as_timer("tfds_train_os_iterator_make")
            self.tfds_train_iter_next = self.tfds_train_os_iterator.get_next()
            time_mosi_end = time.time()
            info("note: train_os_iterator done, cost_time={}s".format(round(time_mosi_end - time_mosi_start, 3)))

        cur_get_cnt = 0
        cur_data_y = list()
        cur_incre_train_x = list()

        if self.accm_train_cnt < self.train_num:
            # info("note: accm_train_cnt={}, train_num={}".format(self.accm_train_cnt, self.train_num))
            time_train_np_start = time.time()
            if self.domain == "text":
                info("note: domain={}".format(self.domain))
                while True:
                    example_batch_num = 0
                    try:
                        example, labels = self.tfds_convertor_sess.run(self.tfds_train_iter_next)
                        example = np.squeeze(example, (2, 3))
                        example = np.squeeze(example, axis=-1)
                        example = example.astype(np.int)
                        # fixme: 注意,这里example 和 labels都是batch, batch_size=20
                        cur_incre_train_x.extend(example)
                        cur_data_y.extend(labels)
                        # X.append(example)
                        # Y.append(labels)
                        cur_get_cnt += example.shape[0]
                        self.accm_train_cnt += example.shape[0]
                        example_batch_num += 1
                        # info("note: cur_get_cnt={}, accm_train_cnt={}, example_batch_num={}, a_example_shape={}".format(cur_get_cnt, self.accm_train_cnt, example_batch_num, example.shape))

                        if cur_get_cnt >= update_train_num or self.accm_train_cnt >= self.train_num:
                            time_train_np_end = time.time()
                            info(
                                "note: now text extend batch domain={} take train update={}, accm_train_cnt={}, cost_time={}s".format(
                                    self.domain,
                                    cur_get_cnt,
                                    self.accm_train_cnt,
                                    round(time_train_np_end - time_train_np_start, 3)
                                )
                            )
                            break

                    except tf.errors.OutOfRangeError:
                        info("train out of range, cur_get_cnt={}".format(cur_get_cnt))
                        break

            else:
                while True:
                    try:
                        example, labels = self.tfds_convertor_sess.run(self.tfds_train_iter_next)
                        # output:  Note:time example shape=(86401, 1, 1, 1)
                        # logger.info("Note:time example shape={}".format(example.shape))
                        # self.accum_train_x.append(example)
                        cur_incre_train_x.append(example)
                        cur_data_y.append(labels)
                        cur_get_cnt += 1
                        self.accm_train_cnt += 1
                        if cur_get_cnt >= update_train_num or self.accm_train_cnt >= self.train_num:
                            time_train_np_end = time.time()
                            info(
                                "note: now append domain={} take train update={}, accm_train_cnt={}, train_num={}, cost_time={}s".format(
                                    self.domain,
                                    cur_get_cnt,
                                    self.accm_train_cnt,
                                    self.train_num,
                                    round(time_train_np_end - time_train_np_start, 3)
                                )
                            )
                            as_timer("tfds_get_train_np_update={}".format(cur_get_cnt))
                            break

                    except tf.errors.OutOfRangeError:
                        break

            # 获取增量 train_x/y_numpy
            # info(
            #     "note: self.accum_train_x num = {}, cur_incre_train_x num={}".format(
            #         len(self.accum_train_x), len(cur_incre_train_x)
            #     )
            # )
            # update accum_train_x/accum_train_y
            self.accum_train_x.extend(cur_incre_train_x)
            as_timer("tfds_get_train_np_accum_train_x_{}".format(len(self.accum_train_x)))

            if self.accum_train_y is None:
                # info("note: np.array(cur_data_y) shape={}".format(np.array(cur_data_y).shape))
                self.accum_train_y = np.array(cur_data_y)
            else:
                # info(
                #     "note: self.accum_train_y shape={}, np.array(cur_data_y) shape={}".format(
                #         self.accum_train_y.shape, np.array(cur_data_y).shape
                #     )
                # )
                self.accum_train_y = np.concatenate((self.accum_train_y, np.array(cur_data_y)))

                # info(
                #     "note: self.accum_train_y shape={}, np.array(cur_data_y) shape={}".format(
                #         self.accum_train_y.shape, np.array(cur_data_y).shape
                #     )
                # )
            info("note: self.accum_train_x num_new={}, incre_train_num={}, self.accum_train_y shape={}, cur_data_y shape={}".format(
                len(self.accum_train_x),
                len(cur_incre_train_x),
                self.accum_train_y.shape,
                np.array(cur_data_y).shape
            ))

        else:
            self.tfds_convertor_sess.close()

        return cur_incre_train_x, np.array(cur_data_y)