Пример #1
0
    def test_strip_word_data_large_string(self):
        input = u'''\\id 1CH
\\h PREMIER LIVRE DES CHRONIQUES
\\toc1 PREMIER LIVRE DES CHRONIQUES
\\toc2 1 Chroniques
\\toc3 1 Ch
\\mt1 LES LIVRES DES CHRONIQUES
\\mt1 PREMIER LIVRE DES CHRONIQUES

\\s5
\\c 1
\\p
\\v 1  \\w Adam|strong="H121"\\w*, \\w Seth|strong="H8352"\\w*, \\w Énosch|strong="H583"\\w*,
\\v 2  \\w Kénan|strong="H7018"\\w*, \\w Mahalaleel|strong="H4111"\\w*, \\w Jéred|strong="H3382"\\w*,
\\v 3  \\w Hénoc|strong="H2585"\\w*, \\w Metuschélah|strong="H4968"\\w*, \\w Lémec|strong="H3929"\\w*,
\\v 4  \\w Noé|strong="H5146"\\w*, \\w Sem|strong="H8035"\\w*, \\w Cham|strong="H2526"\\w* et \\w Japhet|strong="H3315"\\w*.

\\s5
\\v 5  \\w Fils|strong="H1121"\\w* de \\w Japhet|strong="H3315"\\w*: \\w Gomer|strong="H1586"\\w*, \\w Magog|strong="H4031"\\w*, \\w Madaï|strong="H4074"\\w*, \\w Javan|strong="H3120"\\w*, \\w Tubal|strong="H8422"\\w*, \\w Méschec|strong="H4902"\\w* et \\w Tiras|strong="H8494"\\w*. -
\\v 6  \\w Fils|strong="H1121"\\w* de \\w Gomer|strong="H1586"\\w*: \\w Aschkenaz|strong="H813"\\w*, \\w Diphat|strong="H7384"\\w* et \\w Togarma|strong="H8425"\\w*. -
\\v 7  \\w Fils|strong="H1121"\\w* de \\w Javan|strong="H3120"\\w*: \\w Élischa|strong="H473"\\w*, \\w Tarsisa|strong="H8659"\\w*, \\w Kittim|strong="H3794"\\w* et \\w Rodanim|strong="H1721"\\w*.

\\s5
\\v 8  \\w Fils|strong="H1121"\\w* de \\w Cham|strong="H2526"\\w*: \\w Cusch|strong="H3568"\\w*, \\w Mitsraïm|strong="H4714"\\w*, \\w Puth|strong="H6316"\\w* et \\w Canaan|strong="H3667"\\w*. -
\\v 9  \\w Fils|strong="H1121"\\w* de \\w Cusch|strong="H3568"\\w*: \\w Saba|strong="H5434"\\w*, \\w Havila|strong="H2341"\\w*, \\w Sabta|strong="H5454"\\w*, \\w Raema|strong="H7484"\\w* et \\w Sabteca|strong="H5455"\\w*. -\\w Fils|strong="H1121"\\w* de \\w Raema|strong="H7484"\\w*: \\w Séba|strong="H7614"\\w* et \\w Dedan|strong="H1719"\\w*.
\\v 10  \\w Cusch|strong="H3568"\\w* \\w engendra|strong="H3205" x-morph="strongMorph:TH8804"\\w* \\w Nimrod|strong="H5248"\\w*; c'est lui qui \\w commença|strong="H2490" x-morph="strongMorph:TH8689"\\w* à être \\w puissant|strong="H1368"\\w* sur la \\w terre|strong="H776"\\w*. -
'''
        expected = read_file(os.path.join(self.resources_dir, 'uwapi_1ch.usfm'))
        output = strip_word_data(input)
        self.assertEqual(expected, output)
 def _process_usfm(self, format):
     url = format['url']
     usfm_file = os.path.join(self.temp_dir, md5(url).hexdigest())
     self.download_file(url, usfm_file)
     usfm = read_file(usfm_file)
     return remove_unknown_markers(
         convert_chunk_markers(strip_word_data(usfm)))
Пример #3
0
 def test_usfm3_file_to_usfm2(self):
     """
     This ensures we are correctly converting content to be used in the
     uW api. This content wasn't getting converted correctly in the past.
     :return:
     """
     input = read_file(os.path.join(self.resources_dir, 'fr_gen.usfm3'))
     expected = read_file(os.path.join(self.resources_dir, 'fr_gen.usfm2'))
     output = strip_word_data(input)
     self.assertEqual(expected, output)
Пример #4
0
 def test_strip_word_data_from_file(self):
     """
     This ensures we are correctly converting content to be used in the
     uW api. This content wasn't getting converted correctly in the past.
     :return:
     """
     input = read_file(os.path.join(self.resources_dir, 'apiv3_1ch.usfm'))
     expected = read_file(os.path.join(self.resources_dir, 'uwapi_1ch.usfm'))
     output = strip_word_data(input)
     self.assertEqual(expected, output)
Пример #5
0
def build_usx(usfm_dir, usx_dir):
    """
    Builds the usx from usfm after performing some custom processing
    :param usfm_dir:
    :param usx_dir:
    :return:
    """
    # strip word data
    files = os.listdir(usfm_dir)
    for name in files:
        f = os.path.join(usfm_dir, name)
        usfm = read_file(f)
        write_file(f, convert_chunk_markers(strip_word_data(usfm)))

    UsfmTransform.buildUSX(usfm_dir, usx_dir, '', True)
Пример #6
0
def build_usx(usfm_dir, usx_dir, logger=None):
    """
    Builds the usx from usfm after performing some custom processing
    :param usfm_dir:
    :param usx_dir:
    :return:
    """
    # strip word data
    files = os.listdir(usfm_dir)
    for name in files:
        f = os.path.join(usfm_dir, name)
        usfm = read_file(f)
        write_file(f, remove_unknown_markers(convert_chunk_markers(strip_word_data(usfm))))

    if logger:
        logger.debug("Actual USX conversion into {}".format(usx_dir))
    UsfmTransform.buildUSX(usfm_dir, usx_dir, '', True)
Пример #7
0
    def test_strip_usfm_mixed_word_data(self):
        """
        This ensures we are correctly handling input that contains spaces on "blank" lines.
        :return:
        """
        input = u'''\\v 7  \\w Fils|strong="H1121"\\w* de \\w Javan|strong="H3120"\\w*: \\w Élischa|strong="H473"\\w*, \\w Tarsisa|strong="H8659"\\w*, \\w Kittim|strong="H3794"\\w* et \\w Rodanim|strong="H1721"\\w*.
  
\\s5
\\v 8  \\w Fils|strong="H1121"\\w* de \\w Cham|strong="H2526"\\w*: \\w Cusch|strong="H3568"\\w*, \\w Mitsraïm|strong="H4714"\\w*, \\w Puth|strong="H6316"\\w* et \\w Canaan|strong="H3667"\\w*. -
\\v 9  \\w Fils|strong="H1121"\\w* de \\w Cusch|strong="H3568"\\w*: \\w Saba|strong="H5434"\\w*, \\w Havila|strong="H2341"\\w*, \\w Sabta|strong="H5454"\\w*, \\w Raema|strong="H7484"\\w* et \\w Sabteca|strong="H5455"\\w*. -\\w Fils|strong="H1121"\\w* de \\w Raema|strong="H7484"\\w*: \\w Séba|strong="H7614"\\w* et \\w Dedan|strong="H1719"\\w*.
\\v 10  \\w Cusch|strong="H3568"\\w* \\w engendra|strong="H3205" x-morph="strongMorph:TH8804"\\w* \\w Nimrod|strong="H5248"\\w*; c'est lui qui \\w commença|strong="H2490" x-morph="strongMorph:TH8689"\\w* à être \\w puissant|strong="H1368"\\w* sur la \\w terre|strong="H776"\\w*. -'''
        expected = u'''\\v 7 Fils de Javan: Élischa, Tarsisa, Kittim et Rodanim.

\\s5
\\v 8 Fils de Cham: Cusch, Mitsraïm, Puth et Canaan. -
\\v 9 Fils de Cusch: Saba, Havila, Sabta, Raema et Sabteca. - Fils de Raema: Séba et Dedan.
\\v 10 Cusch engendra Nimrod; c'est lui qui commença à être puissant sur la terre. -'''
        output = strip_word_data(input)
        self.assertEqual(expected, output)
Пример #8
0
 def test_strip_usfm_word_data(self):
     input = u'\\v 1 Ce \\w qui|strong="G3739" \\w* \\w était|strong="G2258" x-morph="strongMorph:TG5713" \\w* \\w dès|strong="G575" \\w*'
     expected = u'\\v 1 Ce qui était dès'
     output = strip_word_data(input)
     self.assertEqual(expected, output)