Пример #1
0
 def get_test_examples(self, data_dir):
     return dataset_dstc2.create_examples(os.path.join(
         data_dir, 'dstc2_test_en.json'),
                                          self.slot_list,
                                          'test',
                                          use_asr_hyp=1,
                                          exclude_unpointable=False)
Пример #2
0
 def get_dev_examples(self, data_dir):
     return dataset_dstc2.create_examples(os.path.join(
         data_dir, 'woz_validate_en.json'),
                                          self.slot_list,
                                          'dev',
                                          use_asr_hyp=0,
                                          exclude_unpointable=False)
Пример #3
0
 def get_train_examples(self, data_dir):
     return dataset_dstc2.create_examples(
         os.path.join(data_dir, 'dstc2_train_en.json'), self.slot_list,
         'train')
Пример #4
0
                        prediction["slot_prediction_%s" % slot] = ' '.join(
                            input_tokens[start_pd:end_pd + 1])
                        prediction["slot_groundtruth_%s" % slot] = ' '.join(
                            input_tokens[start_gt:end_gt + 1])
                    list_prediction.append(prediction)
                    if i >= num_actual_predict_examples:
                        break
                    num_written_ex += 1
                json.dump(list_prediction, f, indent=2)
            assert num_written_ex == num_actual_predict_examples


if __name__ == "__main__":
    #flags.mark_flag_as_required("data_dir")
    #flags.mark_flag_as_required("task_name")
    #flags.mark_flag_as_required("vocab_file")
    #flags.mark_flag_as_required("bert_config_file")
    #flags.mark_flag_as_required("output_dir")
    #tf.app.run()

    tokenizer = tokenization.FullTokenizer(
        vocab_file='/ml/uncased_L-12_H-768_A-12/vocab.txt', do_lower_case=True)

    class_types = ['none', 'dontcare', 'copy_value', 'unpointable']
    slot_list = ['area', 'food', 'price range']

    examples = dataset_dstc2.create_examples('/ml/woz/woz_train_en.json',
                                             slot_list, 'train')

    file_based_convert_examples_to_features( \
      examples, slot_list, class_types, 128, tokenizer, 'train.tfr')