def tr(text, scr, titled=True, fontize=False): if scr == 'hk': scr = 'harvardkyoto' if text == '': return '' text_bits = text.split('|') transliterated_text = [] if titled: for t in text_bits: t = t.rstrip('~0123456789 ') if t[:3] == 'ta:': # Force Tamil! t = t[3:] if fontize: transliterated_text.append('\\tamil{%s}' % dn2tam( str(transliterate(t, 'harvardkyoto', scr), 'utf8').title())) else: transliterated_text.append( dn2tam( str(transliterate(t, 'harvardkyoto', scr), 'utf8').title())) else: if t.find('RIGHTarrow') == -1: transliterated_text.append( str(transliterate(t, 'harvardkyoto', scr), 'utf8').title()) else: [txt, t1, arrow, t2] = t.split('\\') transliterated_text.append('\\'.join([ str(transliterate(txt, 'harvardkyoto', scr), 'utf8').title(), t1, arrow, t2 ])) else: for t in text_bits: t = t.rstrip('~0123456789 ') if t[:3] == 'ta:': # Force Tamil! t = t[3:] transliterated_text.append( dn2tam( str(transliterate(t, 'harvardkyoto', scr), 'utf8').title())) else: if t.find('RIGHTarrow') == -1: transliterated_text.append( str(transliterate(t, 'harvardkyoto', scr), 'utf8')) else: [txt, t1, arrow, t2] = t.split('\\') transliterated_text.append('\\'.join([ str(transliterate(txt, 'harvardkyoto', scr), 'utf8'), t1, arrow, t2 ])) return '|'.join(transliterated_text)
def i2d(text): newtext = text.strip('|') # print(newtext,file=sys.stderr) if newtext[-1] == 'M': newtext = newtext[:-1] + 'm' text = newtext + '|'*(len(text)-len(newtext)) text_parts = text.split() out_text_parts = [] for t in text_parts: try: out_text = transliterator.transliterate(t, 'itrans', 'devanagari') except: e = sys.exc_info()[0] sys.stderr.write( 'Error transliterating the string "%s"...\n' % (t)) out_text = '##%s##' % t out_text_parts.append(out_text) return ' '.join(out_text_parts)
def i2d(text): newtext = text.strip('|') # print(newtext,file=sys.stderr) if newtext[-1] == 'M': newtext = newtext[:-1] + 'm' text = newtext + '|' * (len(text) - len(newtext)) text_parts = text.split() out_text_parts = [] for t in text_parts: try: out_text = transliterator.transliterate(t, 'itrans', 'devanagari') except: e = sys.exc_info()[0] sys.stderr.write('Error transliterating the string "%s"...\n' % (t)) out_text = '##%s##' % t out_text_parts.append(out_text) return ' '.join(out_text_parts)
def test_sequence_darija(self): result = transliterate("nta 7aamed w m3e9ed") self.assertEqual(result, "نت حامض و معقد")
def test_sequence(self): result = transliterate("ya 7mar") self.assertEqual(result, "يا حمار")
# loop through the files for f in files: with open(corpus_folder + f) as text: for line in text: try: # exclude RN, ABB tags and words with w or q or y word, pos = re.findall( "(^.*?(?=\t)|(?<=\t)[A-PS-VZ][AC-NT-VZ]*(?=\t[A-Ža-ž])|(?<=[0-9]\t)[A-Ža-ž]+(?=\t<))", line) # exclude NUM @card@, SENT, PUNCT and ? tags except: pass # to single-character Cyrillic word = transliterate(word) # find letter indices nuclei_positions = find_syllable_nuclei(word) sonant_positions, fricative_positions, africate_positions, nasal_positions, plosive_positions = find_consonants( word) # sillabify syllabified_word = syllabify(word, nuclei_positions, sonant_positions, fricative_positions, africate_positions, nasal_positions, plosive_positions) # get structure syllable_structure = get_syllable_structure(
#NEED TO USE PYTHON 2.7 FOR THIS SCRIPT import os import re import transliterator name = re.compile('(\d\.\d\.\d )(\D*).xml', re.U) files = [f for f in os.listdir(os.getcwd()) if name.match(f)] files.sort() for f in files: match = name.match(f) if(match): try: trans = transliterator.transliterate(match.group(2),'devanagari', 'iast') except: print('derped!') continue newName = match.group(1) + trans + '.xml' os.rename(f, newName)