def test_source_en_contains_en(self):
     """
     The source_en return actually all the whole english sentence.
     :return:
     """
     sources_en, sources_de, targets_de, bases_de, pos_de = generate_train_data(
         'Record file and application usage',
         'Die Nutzung von Dateien und Anwendungen aufzeichnen')
     for source_en in sources_en:
         self.assertEqual(source_en, 'Record file and application usage\n')
 def test_bases_de_contains_de(self):
     """
     The base_de return actually the whole german sentence.
     :return:
     """
     sources_en, sources_de, targets_de, bases_de, pos_de = generate_train_data(
         'Record file and application usage',
         'Die Nutzung von Dateien und Anwendungen aufzeichnen')
     for base_de in bases_de:
         self.assertEqual(
             base_de,
             'Die Nutzung von Dateien und Anwendungen aufzeichnen\n')
    def test_empty_string_parameter(self):
        """
        Empty strings should return empty lists back
        :return:
        """
        sources_en, sources_de, targets_de, bases_de, pos_de = generate_train_data(
            '', '')

        self.assertEqual(len(sources_en), 0, '"sources_en" has lenght of 0.')
        self.assertEqual(len(sources_de), 0, '"sources_de" has lenght of 0.')
        self.assertEqual(len(targets_de), 0, '"targets_de" has lenght of 0.')
        self.assertEqual(len(bases_de), 0, '"bases_de" has lenght of 0.')
        self.assertEqual(len(pos_de), 0, '"pos_de" has lenght of 0.')
    def test_generation_of_train_data(self):
        """
        Checks if the returning values have all the length as expected.
        :return:
        """
        sources_en, sources_de, targets_de, bases_de, pos_de = generate_train_data(
            'Record file and application usage',
            'Die Nutzung von Dateien und Anwendungen aufzeichnen')

        self.assertEqual(len(sources_en), 7, '"sources_en" has lenght of 7.')
        self.assertEqual(len(sources_de), 7, '"sources_de" has lenght of 7.')
        self.assertEqual(len(targets_de), 7, '"targets_de" has lenght of 7.')
        self.assertEqual(len(bases_de), 7, '"bases_de" has lenght of 7.')
        self.assertEqual(len(pos_de), 7, '"pos_de" has lenght of 7.')
    def test_unequal_token_length(self):
        """
        The lines of test data produces must match the number of tokens generated from the german sentence,
        even if the
        :return:
        """
        sources_en, sources_de, targets_de, bases_de, pos_de = generate_train_data(
            'test short translation',
            'Dieser Schlüssel legt fest, ob die Karten per Ziehen-und-Ablegen oder per Anklicken gelegt werden sollen.'
        )

        self.assertEqual(len(sources_en), 15, '"sources_en" has lenght of 15.')
        self.assertEqual(len(sources_de), 15, '"sources_de" has lenght of 15.')
        self.assertEqual(len(targets_de), 15, '"targets_de" has lenght of 15.')
        self.assertEqual(len(bases_de), 15, '"bases_de" has lenght of 15.')
        self.assertEqual(len(pos_de), 15, '"pos_de" has lenght of 15.')
    def test_sources_de_contains_de(self):
        """
        The sources_de contains one token as the base form
         of the corresponding word of the whole sentence at each position.
        :return:
        """
        sources_en, sources_de, targets_de, bases_de, pos_de = generate_train_data(
            'Record file and application usage',
            'Die Nutzung von Dateien und Anwendungen aufzeichnen')

        de_tokens = [
            'der\n', 'Nutzung\n', 'von\n', 'Datei\n', 'und\n', 'Anwendung\n',
            'aufzeichnen\n'
        ]
        for i in range(0, len(sources_de)):
            self.assertEqual(
                sources_de[i], de_tokens[i],
                'source_de entry: "{0}" matches with token "{1}" at position {2}'
                .format(sources_de[i], de_tokens[i], i))
    def test_targets_de_contains_de(self):
        """
        The targets_de contains the actual german token inflected in their original form. at each position
        :return:
        """
        sources_en, sources_de, targets_de, bases_de, pos_de = generate_train_data(
            'Record file and application usage',
            'Die Nutzung von Dateien und Anwendungen aufzeichnen')

        targets_tokens = [
            'die\n', 'Nutzung\n', 'von\n', 'Dateien\n', 'und\n',
            'Anwendungen\n', 'aufzeichnen\n'
        ]

        for i in range(0, len(targets_de)):
            self.assertEqual(
                targets_de[i], targets_tokens[i],
                'targets_de entry: "{0}" matches with token "{1}" at position {2}'
                .format(targets_de[i], targets_tokens[i], i))