Пример #1
0
 def _makeToyEnDeData(self, with_alignments=False):
   data_config = {}
   features_file = test_util.make_data_file(
       os.path.join(self.get_temp_dir(), "src.txt"),
       ["Parliament Does Not Support Amendment Freeing Tymoshenko",
        "Today , the Ukraine parliament dismissed , within the Code of Criminal Procedure "
        "amendment , the motion to revoke an article based on which the opposition leader , "
        "Yulia Tymoshenko , was sentenced .",
        "The amendment that would lead to freeing the imprisoned former Prime Minister was "
        "revoked during second reading of the proposal for mitigation of sentences for "
        "economic offences ."])
   labels_file = test_util.make_data_file(
       os.path.join(self.get_temp_dir(), "tgt.txt"),
       ["Keine befreiende Novelle für Tymoshenko durch das Parlament",
        "Das ukrainische Parlament verweigerte heute den Antrag , im Rahmen einer Novelle "
        "des Strafgesetzbuches denjenigen Paragrafen abzuschaffen , auf dessen Grundlage die "
        "Oppositionsführerin Yulia Timoshenko verurteilt worden war .",
        "Die Neuregelung , die den Weg zur Befreiung der inhaftierten Expremierministerin hätte "
        "ebnen können , lehnten die Abgeordneten bei der zweiten Lesung des Antrags auf Milderung "
        "der Strafen für wirtschaftliche Delikte ab ."])
   data_config["source_vocabulary"] = test_util.make_vocab_from_file(
       os.path.join(self.get_temp_dir(), "src_vocab.txt"), features_file)
   data_config["target_vocabulary"] = test_util.make_vocab_from_file(
       os.path.join(self.get_temp_dir(), "tgt_vocab.txt"), labels_file)
   if with_alignments:
     # Dummy and incomplete alignments.
     data_config["train_alignments"] = test_util.make_data_file(
         os.path.join(self.get_temp_dir(), "aligne.txt"),
         ["0-0 1-0 2-2 3-4 4-4 5-6",
          "0-1 1-1 1-3 2-3 4-4",
          "0-0 1-0 2-2 3-4 4-4 5-6"])
   return features_file, labels_file, data_config
Пример #2
0
 def _makeToyTaggerData(self):
     data_config = {}
     features_file = test_util.make_data_file(
         os.path.join(self.get_temp_dir(), "src.txt"),
         ["M . Smith went to Washington .", "I live in New Zealand ."],
     )
     labels_file = test_util.make_data_file(
         os.path.join(self.get_temp_dir(), "labels.txt"),
         ["B-PER I-PER E-PER O O S-LOC O", "O O O B-LOC E-LOC O"],
     )
     data_config["source_vocabulary"] = test_util.make_vocab_from_file(
         os.path.join(self.get_temp_dir(), "src_vocab.txt"), features_file)
     data_config["target_vocabulary"] = test_util.make_data_file(
         os.path.join(self.get_temp_dir(), "labels_vocab.txt"),
         [
             "O",
             "B-LOC",
             "I-LOC",
             "E-LOC",
             "S-LOC",
             "B-PER",
             "I-PER",
             "E-PER",
             "S-PER",
         ],
     )
     return features_file, labels_file, data_config
Пример #3
0
 def _makeToyClassifierData(self):
   data_config = {}
   features_file = test_util.make_data_file(
       os.path.join(self.get_temp_dir(), "src.txt"),
       ["This product was not good at all , it broke on the first use !",
        "Perfect , it does everything I need .",
        "How do I change the battery ?"])
   labels_file = test_util.make_data_file(
       os.path.join(self.get_temp_dir(), "labels.txt"), ["negative", "positive", "neutral"])
   data_config["source_vocabulary"] = test_util.make_vocab_from_file(
       os.path.join(self.get_temp_dir(), "src_vocab.txt"), features_file)
   data_config["target_vocabulary"] = test_util.make_data_file(
       os.path.join(self.get_temp_dir(), "labels_vocab.txt"), ["negative", "positive", "neutral"])
   return features_file, labels_file, data_config