def translate(bicorpus, file=sys.stdin): """ input: Ds in the source language output: list of Bt in the target language, candidate translations data (passed as argument): bicorpus = list of pairs (As, At) where At is the translation of As. """ for Ds in file: Ds = Ds.rstrip('\n') if __verbose__: print >> sys.stderr, '\n# Translating sentence: {}'.format(Ds) # import itertools # for (As, Bs, Cs) in itertools.product(bicorpus, repeat=3): for (As, Bs, Cs) in bicorpus.iter(string=Ds, strategy='closest', method='indirect'): if Ds == Bs: print '{}\t{}'.format(Ds, bicorpus[Bs]) else: if verifnlg(As, Bs, Cs, Ds): At, Bt, Ct = bicorpus[As], bicorpus[Bs], bicorpus[Cs] Dt = solvenlg(At, Bt, Ct) lA, lB, lC, lD = lengths([As, Bs, Cs, Ds], [At, Bt, Ct, Dt]) if __verbose__: print >> sys.stderr, \ ('{} => x = {}\n'.format(two_line_nlg_fmt, Dt)).\ format(As, Bs, Cs, Ds, At, Bt, Ct, 'x', lA=lA, lB=lB, lC=lC, lD=lD) print '{}\t{}'.format(Ds, Dt)
def indirect_iteration_strategy(self, string=None, strategy='naive'): """ If no Bs, then just output all triples of sentences in the order of the bicorpus. Else, apply the stategy selected to enumerate the source sentences in the bicorpus. There are 2 possible strategies implemented: naive: no sort is performed. Just output all triples of sentences in the order of the bicorpus. by distance: the As, Bs and Cs are enumerated by increasing distance to the sentence to be translated. """ Ds = string if __verbose__: print >> sys.stderr, 'Ds = "%s", strategy = %s' % (Ds, strategy) if Ds == None or strategy == 'naive': for triple in itertools.product(self, repeat=3): yield triple else: init_memo_fast_distance(Ds) if strategy == 'by distance': init_memo_fast_distance(Ds) closest_As = sorted(self.keys(), key=lambda Xs: memo_fast_distance(Xs))[:100] for triple in sorted(itertools.product(closest_As, repeat=3)): As, Bs, Cs = triple if As != Bs and As != Cs and Bs != Cs: if __verbose__: print >> sys.stderr, '# {} : {} :: {} : {}'.format( As, Bs, Cs, Ds) yield triple elif strategy == 'closest': first_N = 3 init_memo_fast_distance(Ds) result = sorted(self.keys(), key=lambda Xs: memo_fast_distance(Xs)) print 'RESULT', result, '\n' for Bs in sorted(self.keys(), key=lambda Xs: memo_fast_distance(Xs))[:first_N]: print Bs init_memo_fast_distance(Bs) for As in sorted(self.keys(), key=lambda Xs: len(Bs) - memo_fast_similitude( Xs))[:first_N]: if __verbose__: print >> sys.stderr, '# {} : {} :: {} : x'.format( Bs, As, Ds) CCs = solvenlg(Bs, As, Ds) if CCs != None: if __verbose__: print >> sys.stderr, '# {} : {} :: {} : {}'.format( Bs, As, Ds, CCs) init_memo_fast_distance(CCs) for Cs in sorted(self.keys(), key=lambda Xs: memo_fast_distance(Xs) )[:first_N]: print Cs if __verbose__: print >> sys.stderr, '# {} : {} :: {} : {}'.format( Bs, As, Ds, Cs) yield (As, Bs, Cs)
def main(repeat=1000, vocsize=8, strlen=8): """ >>> solvenlg('aslama','muslim','arsala') 'mursil' >>> solvenlg('abc','aabbcc','aaabbbccc') 'aaaabbbbcccc' >>> '' == solvenlg('aaa','aa','a') True >>> None == solvenlg('aaa','aa','a') False >>> solvenlg('aaa','aa','a') '' >>> print solvenlg('aaa','aa','a') <BLANKLINE> >>> None == solvenlg('aaac','aa','a') True >>> '' == solvenlg('aaac','aa','a') False >>> solvenlg('aaac','aa','a') >>> print solvenlg('aaac','aa','a') None >>> verifnlg('aslama','muslim','arsala','mursil') 1 >>> verifnlg('abc','aabbcc','aaabbbccc','aaaabbbbcccc') 1 >>> verifnlg('aaa','aa','a','') 1 >>> verifnlg('aaac','aa','a','') 0 The following instruction causes segmentation fault because None is not a character string. *** >>> verifnlg('aaac','aa','a',None) *** 1 *** LINGUISTIC EXAMPLES *** *** auf Deutsch *** >>> is_analogy( 'setzen : setzte :: lachen : lachte' ) True >>> is_analogy( 'lang : längste :: scharf : schärfste' ) True >>> is_analogy( 'sprechen : wir sprächen :: nehmen : wir nähmen' ) True >>> is_analogy( 'sprechen : er spräche :: nehmen : er nähme' ) True >>> is_analogy( 'sprechen : du sprächest :: nehmen : du nähmest' ) True >>> is_analogy( 'sprechen : ihr sprächet :: nehmen : ihr nähmet' ) True >>> is_analogy( 'sprechen : ihr aussprächet :: nehmen : ihr ausnähmet' ) True >>> is_analogy( 'fliehen : er floh :: schließen : er schloß' ) True >>> is_analogy( 'sprechen : aussprächet :: nehmen : ausnähmet' ) True *** bi llugha 'l 3arabiya *** >>> is_analogy( 'ضسمضكض : ضقسضمض :: كعسمهكعر : كعقسهمعر' ) True >>> is_analogy( 'ضسمضكض : ضقسضمض :: كعسمهك : كعقسهم' ) True >>> is_analogy( 'نضفضزض : نضضفهز :: سضنضوض : سضضنهر' ) True >>> is_analogy( 'huzila : huzAl :: Sudi`a : SudA`' ) True >>> is_analogy( 'kalb : kulaib :: masjid : musaijid' ) True >>> is_analogy( 'yaSilu : yaSala :: yasimu : yasama' ) True >>> is_analogy( 'aslama : arsala :: muslimun : mursilun' ) True >>> is_analogy( 'aslama : arsala :: muslim : mursil' ) True >>> is_analogy( 'kataba : kAtib :: sakana : sAkin' ) True >>> is_analogy( 'huzila : huzAl :: Sudi`a : SudA`' ) True *** Akkadien *** >>> is_analogy( 'ukaSSad : uktanaSSad :: uSakSad : uStanakSad' ) True *** Hébreu *** >>> is_analogy( 'iahmod : mahmAd :: ia`abor : ma`abAr' ) True *** Proto-sémitique *** >>> is_analogy( 'yaqtilu : qatil :: yuqtilu : qutil' ) True >>> is_analogy( 'yasriqu : sariq :: yanqimu : naqim' ) True *** 普通话 *** >>> is_analogy( '一年,是我国社会主义 : 一年是我国 :: ,社会主义 : ' ) True >>> is_analogy( '科学 : 科学家 :: 政治 : 政治家' ) True >>> is_analogy( '我 : 我们 :: 他 : 他们' ) True >>> is_analogy( '今年 : 今天 :: 明年 : 明天' ) True >>> is_analogy( '读 : 读者 :: 学 : 学者' ) True >>> is_analogy( '勇 : 勇者 :: 强 : 强者' ) True >>> is_analogy( '车 : 车行 :: 药 : 药行' ) True >>> is_analogy( '学 : 学院 :: 医 : 医院' ) True >>> is_analogy( '工程 : 工程师 :: 理发 : 理发师' ) True >>> is_analogy( 'kexue : kexuejia :: zhengzhi : zhengzhijia' ) True >>> is_analogy( 'wo : women :: ta : tamen' ) True >>> is_analogy( 'jinnian : jintian :: mingnian : mingtian' ) True >>> is_analogy( 'du : duzhe :: xue : xuezhe' ) True >>> is_analogy( 'AduA : AduzheA :: AxueA : AxuezheA' ) True >>> is_analogy( 'yong : yongzhe :: qiang : qiangzhe' ) True >>> is_analogy( 'che : chehang :: yao : yaohang' ) True >>> is_analogy( 'xue : xueyuan :: yi : yiyuan' ) True >>> is_analogy( 'gongcheng : gongchengshi :: lifa : lifashi' ) True *** en français *** >>> is_analogy( 'dues : indu :: nées : inné' ) True >>> is_analogy( 'inné : nées :: indu : dues' ) True >>> is_analogy( 'réaction : réactionnaire :: répression : répressionnaire' ) True >>> is_analogy( 'aimer : ils aimaient :: marcher : ils marchaient' ) True >>> is_analogy( 'pardonner : impardonnable :: décorer : imdécorable' ) True >>> is_analogy( 'joindre : je joins :: oindre : je oins' ) True >>> is_analogy( 'logique : logiciel :: ludique : ludiciel' ) True >>> is_analogy( 'prendrai : prendre :: viendrai : viendre' ) True >>> is_analogy( 'changer : tu changes :: observer : tu observes' ) True >>> is_analogy( 'marcher : tu marches :: démenager : tu démenages' ) True >>> is_analogy( 'préférer : je préfère :: vénérer : je vénère' ) True >>> is_analogy( 'préférer : je préfère :: révérer : je révère' ) True >>> is_analogy( 'préférer : je préfère :: zébrer : je zèbre' ) True >>> is_analogy( 'fini : infini :: exact : inexact' ) True >>> is_analogy( "recevoir : j'ai reçu :: percevoir : j'ai perçu" ) True >>> is_analogy( "décevoir : j'ai déçu :: percevoir : j'ai perçu" ) True >>> is_analogy( "concevoir : j'ai conçu :: percevoir : j'ai perçu" ) True *** 日本語で *** >>> is_analogy( '食べる : 食べます :: 認める : 認めます' ) True >>> is_analogy( '痛い : 痛む :: 親しい : 親しむ' ) True >>> is_analogy( 'あれ : これ :: あっち : こっち' ) True >>> is_analogy( '乗る : 乗せる :: 寄る : 寄せる' ) True >>> is_analogy( '自由 : 不自由な :: 用意 : 不用意な' ) True >>> is_analogy( '飛びます : 飛ぶ :: 選びます : 選ぶ' ) True >>> is_analogy( '飲む : 飲みます :: 進む : 進みます' ) True >>> is_analogy( '飲みます : 飲んでみます :: 進みます : 進んでみます' ) True >>> is_analogy( '冷める : 冷まる :: 決める : 決まる' ) True >>> is_analogy( '乗る : 乗せる :: 寄る : 寄せる' ) True >>> is_analogy( '新しい : 新しかった :: 嬉しい : 嬉しかった' ) True >>> is_analogy( '新しい : 新しかった :: きれい : きれかった' ) True *** lingua latine *** >>> is_analogy( 'oratorem : orator :: honorem : honor' ) True >>> is_analogy( 'facio : conficio :: capio : concipio' ) True >>> is_analogy( 'amo : amas :: oro : oras' ) True >>> is_analogy( 'amo : amat :: oro : orat' ) True >>> is_analogy( 'amo : amamus :: oro : oramus' ) True *** dalam bahasa melayu *** >>> is_analogy( 'tinggal : ketinggalan :: duduk : kedudukan' ) True >>> is_analogy( 'pekerja : kerja :: pelawat : lawat' ) True >>> is_analogy( 'kawan : mengawani :: keliling : mengelilingi' ) True >>> is_analogy( 'isteri : beristeri :: ladang : berladang' ) True >>> is_analogy( 'keras : mengeraskan :: kena : mengenakan' ) True *** po polsku *** True >>> is_analogy( 'biorąc : bierzesz :: piorąc : pierzesz' ) True >>> is_analogy( 'ubezpieczony : ubezpieczeni :: obrażony : obrażeni' ) True >>> is_analogy( 'spiewać : spiewaczka :: łechtać : łechtaczka' ) True >>> is_analogy( 'wyszedłem : wyszłaS :: poszedłem : poszłaS' ) True >>> is_analogy( 'rozproszyć : rozpraszać :: rozmnożyć się : rozmnażać się' ) True >>> is_analogy( 'stworzyć : stwarzać :: rozmnożyć się : rozmnażać się' ) True >>> is_analogy( 'stworzyć : stwarzać :: mnożyć się : mnażać się' ) True >>> is_analogy( 'wyszedłeś : wyszłaś :: poszedłeś : poszłaś' ) True >>> is_analogy( 'zgubiony : zgubieni :: zmartwiony : zmartwieni' ) True >>> is_analogy( 'ṥpiewać : ṥpiewaczka :: biegać : biegaczka' ) True *** in English *** >>> is_analogy( 'wolf : wolves :: leaf : leaves' ) True >>> is_analogy( 'wolf : wolves :: calf : calves' ) True *** EXEMPLES FORMELS *** >>> is_analogy( 'bb : ab :: ba : aa' ) True >>> is_analogy( 'a : aa :: aaa : aaaa' ) True >>> is_analogy( 'b : ab :: aab : aaab' ) True >>> is_analogy( 'b : ba :: baa : baaa' ) True >>> is_analogy( 'ab : aabb :: aaabbb : aaaabbbb' ) True >>> is_analogy( 'ab : abab :: ababab : abababab' ) True >>> is_analogy( 'aab : aaaabb :: aaaaaabbb : aaaaaaaabbbb' ) True >>> is_analogy( 'aba : aabbaa :: aaabbbaaa : aaaabbbbaaaa' ) True >>> is_analogy( 'ab : aabb :: aaaaaaabbbbbbb : aaaaaaaabbbbbbbb' ) True >>> is_analogy( 'abc : aabbcc :: aaabbbccc : aaaabbbbcccc' ) True >>> is_analogy( 'a : aa :: aaaaaaa : aaaaaaaa' ) True >>> is_analogy( 'b : ab :: aaaaaaab : aaaaaaaab' ) True >>> is_analogy( 'ab : aabb :: aaaaaaabbbbbbb : aaaaaaaabbbbbbbb' ) True >>> is_analogy( 'ab : abab :: ababababababab : abababababababab' ) True >>> is_analogy( 'aab : aaaabb :: aaaaaaaaaaaaaabbbbbbb : aaaaaaaaaaaaaaaabbbbbbbb' ) True >>> is_analogy( 'aba : aabbaa :: aaaaaaabbbbbbbaaaaaaa : aaaaaaaabbbbbbbbaaaaaaaa' ) True >>> is_analogy( 'aab : aaaabb :: aaaaaaaaaaaaaabbbbbbb : aaaaaaaaaaaaaaaabbbbbbbb' ) True >>> is_analogy( 'abc : aabbcc :: aaaaaaabbbbbbbccccccc : aaaaaaaabbbbbbbbcccccccc' ) True *** CONTRE-EXEMPLES FORMELS *** >>> is_analogy( 'a : ab :: c : bc' ) False >>> is_analogy( 'abcde : edcba :: abc : cba' ) False >>> is_analogy( 'b : b :: ba : bb' ) False >>> is_analogy( 'b : ab :: aab : abaa' ) False >>> is_analogy( 'a : aa :: aaa : aaaaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aaabbbba' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aaabbbab' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aaabbbaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aaabbabb' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aabbbaba' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aabbbaab' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbbbaaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aaababbb' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aaababbb' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aabbabba' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aabbabab' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbbabaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbbaabb' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbbaaba' ) False >>> is_analogy( 'ab : aabb :: aaabbb : bbbbaaaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aababbba' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aababbab' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbabbaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aabababb' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbababa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : bbbabaaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : aabaabbb' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbaabba' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbaabab' ) False >>> is_analogy( 'ab : aabb :: aaabbb : bbbaabaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : abbaaabb' ) False >>> is_analogy( 'ab : aabb :: aaabbb : bbbaaaba' ) False >>> is_analogy( 'ab : aabb :: aaabbb : bbabbbba' ) False >>> is_analogy( 'ab : aabb :: aaabbb : bbabbbab' ) False >>> is_analogy( 'ab : aabb :: aaabbb : bababbaa' ) False >>> is_analogy( 'ab : aabb :: aaabbb : baaabbba' ) False >>> is_analogy( 'ab : abab :: ababab : ababbaab' ) False >>> is_analogy( 'ab : abab :: ababab : bbababaa' ) False >>> is_analogy( 'aab : aaaabb :: aaaaaabbb : aaabaaaababb' ) False >>> is_analogy( 'aba : aabbaa :: aaabbbaaa : aababbabaaaa' ) False >>> is_analogy( 'ab : aabb :: aaaaaaabbbbbbb : aabaaaaababbbbbb' ) False >>> is_analogy( 'abc : aabbcc :: aaabbbccc : aababcbbcacc' ) False >>> is_analogy( 'ab : aabb :: ab : abba' ) False >>> is_analogy( 'ab : ab :: aabb : abba' ) False >>> is_analogy( 'ab : abab :: abab : abbaab' ) False >>> is_analogy( 'abbaab : abab :: abab : ab' ) False >>> is_analogy( 'ab : aabb :: aabb : aababb' ) False *** Tests from Baptsite Jonglez ([email protected]). *** >>> confirm_analogy( 'eue : rue :: nous devons : nous drvons' ) True >>> confirm_analogy( 'sue : rue :: nous devons : nous desons' ) True >>> confirm_analogy( 'eue : rue :: nous devons : nous devons' ) False >>> confirm_analogy( 'sus : vus :: nous devons : nous devons' ) False >>> confirm_analogy( 'tata : tàtà :: haha : hàhà' ) True >>> confirm_analogy( 'tété : tete :: héhé : hehe' ) True >>> confirm_analogy( 'tete : tété :: hehe : héhé' ) True >>> confirm_analogy( 'aaaa : à :: aaaàa : àà' ) True >>> confirm_analogy( '決める : 決めます :: 食べる : 食べます' ) True >>> confirm_analogy( '一年,是我国社会主义 : ,社会主义 :: 一年是我国 : ' ) True """ successes, total_t = 0, 0 for _ in xrange(repeat): lenA, lenB, lenC = random.randint(1, strlen), random.randint( 1, strlen), random.randint(1, strlen) A, B, C = random_word(vocsize, lenA), random_word(vocsize, lenB), random_word( vocsize, lenC) t1 = time.time() D = solvenlg(A, B, C) total_t += time.time() - t1 if None != D: if __verbose__: print >> sys.stderr, __nlg_fmt__ % (A, B, C, D) successes += 1 return int(round((100.0 * successes)) / repeat), int(round(1000 * total_t))
def confirm_analogy(s): A, B, _, C, D = [str.strip() for str in s.split(':')] return D == solvenlg(A, B, C)