示例#1
0
def translate(bicorpus, file=sys.stdin):
    """
	input: Ds in the source language
	output: list of Bt in the target language, candidate translations
	data (passed as argument):
		bicorpus = list of pairs (As, At) where At is the translation of As.
	"""
    for Ds in file:
        Ds = Ds.rstrip('\n')
        if __verbose__:
            print >> sys.stderr, '\n# Translating sentence: {}'.format(Ds)
        #		import itertools
        #		for (As, Bs, Cs) in itertools.product(bicorpus, repeat=3):
        for (As, Bs, Cs) in bicorpus.iter(string=Ds,
                                          strategy='closest',
                                          method='indirect'):
            if Ds == Bs:
                print '{}\t{}'.format(Ds, bicorpus[Bs])
            else:
                if verifnlg(As, Bs, Cs, Ds):
                    At, Bt, Ct = bicorpus[As], bicorpus[Bs], bicorpus[Cs]
                    Dt = solvenlg(At, Bt, Ct)
                    lA, lB, lC, lD = lengths([As, Bs, Cs, Ds],
                                             [At, Bt, Ct, Dt])
                    if __verbose__:                        print >> sys.stderr, \
         ('{}  =>  x = {}\n'.format(two_line_nlg_fmt, Dt)).\
          format(As, Bs, Cs, Ds, At, Bt, Ct, 'x', lA=lA, lB=lB, lC=lC, lD=lD)
                    print '{}\t{}'.format(Ds, Dt)
示例#2
0
def indirect_iteration_strategy(self, string=None, strategy='naive'):
    """
	If no Bs, then just output all triples of sentences in the order
		of the bicorpus.
	Else, apply the stategy selected to enumerate the source sentences in the bicorpus.
	There are 2 possible strategies implemented:
		naive: no sort is performed.
				Just output all triples of sentences in the order
				of the bicorpus.
		by distance: the As, Bs and Cs are enumerated by increasing
				distance to the sentence to be translated.
	"""
    Ds = string
    if __verbose__:
        print >> sys.stderr, 'Ds = "%s", strategy = %s' % (Ds, strategy)
    if Ds == None or strategy == 'naive':
        for triple in itertools.product(self, repeat=3):
            yield triple
    else:
        init_memo_fast_distance(Ds)
        if strategy == 'by distance':
            init_memo_fast_distance(Ds)
            closest_As = sorted(self.keys(),
                                key=lambda Xs: memo_fast_distance(Xs))[:100]
            for triple in sorted(itertools.product(closest_As, repeat=3)):
                As, Bs, Cs = triple
                if As != Bs and As != Cs and Bs != Cs:
                    if __verbose__:
                        print >> sys.stderr, '# {} : {} :: {} : {}'.format(
                            As, Bs, Cs, Ds)
                    yield triple
        elif strategy == 'closest':
            first_N = 3
            init_memo_fast_distance(Ds)
            result = sorted(self.keys(), key=lambda Xs: memo_fast_distance(Xs))
            print 'RESULT', result, '\n'
            for Bs in sorted(self.keys(),
                             key=lambda Xs: memo_fast_distance(Xs))[:first_N]:
                print Bs
                init_memo_fast_distance(Bs)
                for As in sorted(self.keys(),
                                 key=lambda Xs: len(Bs) - memo_fast_similitude(
                                     Xs))[:first_N]:
                    if __verbose__:
                        print >> sys.stderr, '# {} : {} :: {} : x'.format(
                            Bs, As, Ds)
                    CCs = solvenlg(Bs, As, Ds)
                    if CCs != None:
                        if __verbose__:
                            print >> sys.stderr, '# {} : {} :: {} : {}'.format(
                                Bs, As, Ds, CCs)
                        init_memo_fast_distance(CCs)
                        for Cs in sorted(self.keys(),
                                         key=lambda Xs: memo_fast_distance(Xs)
                                         )[:first_N]:
                            print Cs
                            if __verbose__:
                                print >> sys.stderr, '# {} : {} :: {} : {}'.format(
                                    Bs, As, Ds, Cs)
                            yield (As, Bs, Cs)
示例#3
0
def main(repeat=1000, vocsize=8, strlen=8):
    """
	>>> solvenlg('aslama','muslim','arsala')
	'mursil'
	>>> solvenlg('abc','aabbcc','aaabbbccc')
	'aaaabbbbcccc'
	>>> '' == solvenlg('aaa','aa','a')
	True
	>>> None == solvenlg('aaa','aa','a')
	False
	>>> solvenlg('aaa','aa','a')
	''
	>>> print solvenlg('aaa','aa','a')
	<BLANKLINE>
	>>> None == solvenlg('aaac','aa','a')
	True
	>>> '' == solvenlg('aaac','aa','a')
	False
	>>> solvenlg('aaac','aa','a')

	>>> print solvenlg('aaac','aa','a')
	None
	>>> verifnlg('aslama','muslim','arsala','mursil')
	1
	>>> verifnlg('abc','aabbcc','aaabbbccc','aaaabbbbcccc')
	1
	>>> verifnlg('aaa','aa','a','')
	1
	>>> verifnlg('aaac','aa','a','')
	0
	
	The following instruction causes segmentation fault because None is not a character string.
	*** >>> verifnlg('aaac','aa','a',None)
	***	1
	
	*** LINGUISTIC EXAMPLES ***
	
	*** auf Deutsch ***
	
	>>> is_analogy( 'setzen : setzte :: lachen : lachte' )
	True
	>>> is_analogy( 'lang : längste :: scharf : schärfste' )
	True
	>>> is_analogy( 'sprechen : wir sprächen :: nehmen : wir nähmen' )
	True
	>>> is_analogy( 'sprechen : er spräche :: nehmen : er nähme' )
	True
	>>> is_analogy( 'sprechen : du sprächest :: nehmen : du nähmest'  )
	True
	>>> is_analogy( 'sprechen : ihr sprächet :: nehmen : ihr nähmet' )
	True
	>>> is_analogy( 'sprechen : ihr aussprächet :: nehmen : ihr ausnähmet' )
	True
	>>> is_analogy( 'fliehen : er floh :: schließen : er schloß' )
	True
	>>> is_analogy( 'sprechen : aussprächet :: nehmen : ausnähmet' )
	True

	*** bi llugha 'l 3arabiya ***
	
	>>> is_analogy( 'ضسمضكض : ضقسضمض :: كعسمهكعر : كعقسهمعر' )
	True
	>>> is_analogy( 'ضسمضكض : ضقسضمض :: كعسمهك : كعقسهم' )
	True
	>>> is_analogy( 'نضفضزض : نضضفهز :: سضنضوض : سضضنهر' )
	True
	>>> is_analogy( 'huzila : huzAl :: Sudi`a : SudA`' )
	True
	>>> is_analogy( 'kalb : kulaib :: masjid : musaijid' )
	True
	>>> is_analogy( 'yaSilu : yaSala :: yasimu : yasama' )
	True
	>>> is_analogy( 'aslama : arsala :: muslimun : mursilun' )
	True
	>>> is_analogy( 'aslama : arsala :: muslim : mursil' )
	True
	>>> is_analogy( 'kataba : kAtib :: sakana : sAkin' )
	True
	>>> is_analogy( 'huzila : huzAl :: Sudi`a : SudA`' )
	True

	*** Akkadien ***
	
	>>> is_analogy( 'ukaSSad : uktanaSSad :: uSakSad : uStanakSad' )
	True

	*** Hébreu ***
	
	>>> is_analogy( 'iahmod : mahmAd :: ia`abor : ma`abAr' )
	True

	*** Proto-sémitique ***
	
	>>> is_analogy( 'yaqtilu : qatil :: yuqtilu : qutil' )
	True
	>>> is_analogy( 'yasriqu : sariq :: yanqimu : naqim' )
	True

	*** 普通话 ***

	>>> is_analogy( '一年,是我国社会主义 : 一年是我国 :: ,社会主义 : ' )
	True
	>>> is_analogy( '科学 : 科学家 :: 政治 : 政治家' )
	True
	>>> is_analogy( '我 : 我们 :: 他 : 他们' )
	True
	>>> is_analogy( '今年 : 今天 :: 明年 : 明天' )
	True
	>>> is_analogy( '读 : 读者 :: 学 : 学者' )
	True
	>>> is_analogy( '勇 : 勇者 :: 强 : 强者' )
	True
	>>> is_analogy( '车 : 车行 :: 药 : 药行' )
	True
	>>> is_analogy( '学 : 学院 :: 医 : 医院' )
	True
	>>> is_analogy( '工程 : 工程师 :: 理发 : 理发师' )
	True
	>>> is_analogy( 'kexue : kexuejia :: zhengzhi : zhengzhijia' )
	True
	>>> is_analogy( 'wo : women :: ta : tamen' )
	True
	>>> is_analogy( 'jinnian : jintian :: mingnian : mingtian' )
	True
	>>> is_analogy( 'du : duzhe :: xue : xuezhe' )
	True
	>>> is_analogy( 'AduA : AduzheA :: AxueA : AxuezheA' )
	True
	>>> is_analogy( 'yong : yongzhe :: qiang : qiangzhe' )
	True
	>>> is_analogy( 'che : chehang :: yao : yaohang' )
	True
	>>> is_analogy( 'xue : xueyuan :: yi : yiyuan' )
	True
	>>> is_analogy( 'gongcheng : gongchengshi :: lifa : lifashi' )
	True

	*** en français ***

	>>> is_analogy( 'dues : indu :: nées : inné' )
	True
	>>> is_analogy( 'inné : nées :: indu : dues' )
	True
	>>> is_analogy( 'réaction : réactionnaire :: répression : répressionnaire' )
	True
	>>> is_analogy( 'aimer : ils aimaient :: marcher : ils marchaient' )
	True
	>>> is_analogy( 'pardonner : impardonnable :: décorer : imdécorable' )
	True
	>>> is_analogy( 'joindre : je joins :: oindre : je oins' )
	True
	>>> is_analogy( 'logique : logiciel :: ludique : ludiciel' )
	True
	>>> is_analogy( 'prendrai : prendre :: viendrai : viendre' )
	True
	>>> is_analogy( 'changer : tu changes :: observer : tu observes' )
	True
	>>> is_analogy( 'marcher : tu marches :: démenager : tu démenages' )
	True
	>>> is_analogy( 'préférer : je préfère :: vénérer : je vénère' )
	True
	>>> is_analogy( 'préférer : je préfère :: révérer : je révère' )
	True
	>>> is_analogy( 'préférer : je préfère :: zébrer : je zèbre' )
	True
	>>> is_analogy( 'fini : infini :: exact : inexact' )
	True
	>>> is_analogy( "recevoir : j'ai reçu :: percevoir : j'ai perçu" )
	True
	>>> is_analogy( "décevoir : j'ai déçu :: percevoir : j'ai perçu" )
	True
	>>> is_analogy( "concevoir : j'ai conçu :: percevoir : j'ai perçu" )
	True

	*** 日本語で ***

	>>> is_analogy( '食べる : 食べます :: 認める : 認めます' )
	True
	>>> is_analogy( '痛い : 痛む :: 親しい : 親しむ' )
	True
	>>> is_analogy( 'あれ : これ :: あっち : こっち' )
	True
	>>> is_analogy( '乗る : 乗せる :: 寄る : 寄せる' )
	True
	>>> is_analogy( '自由 : 不自由な :: 用意 : 不用意な' )
	True
	>>> is_analogy( '飛びます : 飛ぶ :: 選びます : 選ぶ' )
	True
	>>> is_analogy( '飲む : 飲みます :: 進む : 進みます' )
	True
	>>> is_analogy( '飲みます : 飲んでみます :: 進みます : 進んでみます' )
	True
	>>> is_analogy( '冷める : 冷まる :: 決める : 決まる' )
	True
	>>> is_analogy( '乗る : 乗せる :: 寄る : 寄せる' )
	True
	>>> is_analogy( '新しい : 新しかった :: 嬉しい : 嬉しかった' )
	True
	>>> is_analogy( '新しい : 新しかった :: きれい : きれかった' )
	True

	*** lingua latine ***

	>>> is_analogy( 'oratorem : orator :: honorem : honor' )
	True
	>>> is_analogy( 'facio : conficio :: capio : concipio' )
	True
	>>> is_analogy( 'amo : amas :: oro : oras' )
	True
	>>> is_analogy( 'amo : amat :: oro : orat' )
	True
	>>> is_analogy( 'amo : amamus :: oro : oramus' )
	True

	*** dalam bahasa melayu ***

	>>> is_analogy( 'tinggal : ketinggalan :: duduk : kedudukan' )
	True
	>>> is_analogy( 'pekerja : kerja :: pelawat : lawat' )
	True
	>>> is_analogy( 'kawan : mengawani :: keliling : mengelilingi' )
	True
	>>> is_analogy( 'isteri : beristeri :: ladang : berladang' )
	True
	>>> is_analogy( 'keras : mengeraskan :: kena : mengenakan' )
	True

	*** po polsku ***

	True
	>>> is_analogy( 'biorąc : bierzesz :: piorąc : pierzesz' )
	True
	>>> is_analogy( 'ubezpieczony : ubezpieczeni :: obrażony : obrażeni' )
	True
	>>> is_analogy( 'spiewać : spiewaczka :: łechtać : łechtaczka' )
	True
	>>> is_analogy( 'wyszedłem : wyszłaS :: poszedłem : poszłaS' )
	True
	>>> is_analogy( 'rozproszyć : rozpraszać :: rozmnożyć się : rozmnażać się' )
	True
	>>> is_analogy( 'stworzyć : stwarzać :: rozmnożyć się : rozmnażać się' )
	True
	>>> is_analogy( 'stworzyć : stwarzać :: mnożyć się : mnażać się' )
	True
	>>> is_analogy( 'wyszedłeś : wyszłaś :: poszedłeś : poszłaś' )
	True
	>>> is_analogy( 'zgubiony : zgubieni :: zmartwiony : zmartwieni' )
	True
	>>> is_analogy( 'ṥpiewać : ṥpiewaczka :: biegać : biegaczka' )
	True

	*** in English ***

	>>> is_analogy( 'wolf : wolves :: leaf : leaves' )
	True
	>>> is_analogy( 'wolf : wolves :: calf : calves' )
	True

	*** EXEMPLES FORMELS ***

	>>> is_analogy( 'bb : ab :: ba : aa' )
	True

	>>> is_analogy( 'a : aa :: aaa : aaaa' )
	True
	>>> is_analogy( 'b : ab :: aab : aaab' )
	True
	>>> is_analogy( 'b : ba :: baa : baaa' )
	True
	>>> is_analogy( 'ab : aabb :: aaabbb : aaaabbbb' )
	True
	>>> is_analogy( 'ab : abab :: ababab : abababab' )
	True
	>>> is_analogy( 'aab : aaaabb :: aaaaaabbb : aaaaaaaabbbb' )
	True
	>>> is_analogy( 'aba : aabbaa :: aaabbbaaa : aaaabbbbaaaa' )
	True
	>>> is_analogy( 'ab : aabb :: aaaaaaabbbbbbb : aaaaaaaabbbbbbbb' )
	True
	>>> is_analogy( 'abc : aabbcc :: aaabbbccc : aaaabbbbcccc' )
	True

	>>> is_analogy( 'a : aa :: aaaaaaa : aaaaaaaa' )
	True
	>>> is_analogy( 'b : ab :: aaaaaaab : aaaaaaaab' )
	True
	>>> is_analogy( 'ab : aabb :: aaaaaaabbbbbbb : aaaaaaaabbbbbbbb' )
	True
	>>> is_analogy( 'ab : abab :: ababababababab : abababababababab' )
	True
	>>> is_analogy( 'aab : aaaabb :: aaaaaaaaaaaaaabbbbbbb : aaaaaaaaaaaaaaaabbbbbbbb' )
	True
	>>> is_analogy( 'aba : aabbaa :: aaaaaaabbbbbbbaaaaaaa : aaaaaaaabbbbbbbbaaaaaaaa' )
	True
	>>> is_analogy( 'aab : aaaabb :: aaaaaaaaaaaaaabbbbbbb : aaaaaaaaaaaaaaaabbbbbbbb' )
	True
	>>> is_analogy( 'abc : aabbcc :: aaaaaaabbbbbbbccccccc : aaaaaaaabbbbbbbbcccccccc' )
	True

	*** CONTRE-EXEMPLES FORMELS ***

	>>> is_analogy( 'a : ab :: c : bc' )
	False
	>>> is_analogy( 'abcde : edcba :: abc : cba' )
	False
	>>> is_analogy( 'b : b :: ba : bb' )
	False
	>>> is_analogy( 'b : ab :: aab : abaa' )
	False
	>>> is_analogy( 'a : aa :: aaa : aaaaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aaabbbba' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aaabbbab' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aaabbbaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aaabbabb' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aabbbaba' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aabbbaab' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbbbaaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aaababbb' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aaababbb' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aabbabba' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aabbabab' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbbabaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbbaabb' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbbaaba' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : bbbbaaaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aababbba' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aababbab' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbabbaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aabababb' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbababa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : bbbabaaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : aabaabbb' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbaabba' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbaabab' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : bbbaabaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : abbaaabb' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : bbbaaaba' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : bbabbbba' )
	False
	>>> is_analogy( 'ab : aabb :: aaabbb : bbabbbab' )
	False

	>>> is_analogy( 'ab : aabb :: aaabbb : bababbaa' )
	False

	>>> is_analogy( 'ab : aabb :: aaabbb : baaabbba' )
	False
	>>> is_analogy( 'ab : abab :: ababab : ababbaab' )
	False
	>>> is_analogy( 'ab : abab :: ababab : bbababaa' )
	False
	>>> is_analogy( 'aab : aaaabb :: aaaaaabbb : aaabaaaababb' )
	False
	>>> is_analogy( 'aba : aabbaa :: aaabbbaaa : aababbabaaaa' )
	False
	>>> is_analogy( 'ab : aabb :: aaaaaaabbbbbbb : aabaaaaababbbbbb' )
	False
	>>> is_analogy( 'abc : aabbcc :: aaabbbccc : aababcbbcacc' )
	False
	>>> is_analogy( 'ab : aabb :: ab : abba' )
	False
	>>> is_analogy( 'ab : ab :: aabb : abba' )
	False
	>>> is_analogy( 'ab : abab :: abab : abbaab' )
	False
	>>> is_analogy( 'abbaab : abab :: abab : ab' )
	False
	>>> is_analogy( 'ab : aabb :: aabb : aababb' )
	False
	
	*** Tests from Baptsite Jonglez ([email protected]). ***
	
	>>> confirm_analogy( 'eue : rue :: nous devons : nous drvons' )
	True
	>>> confirm_analogy( 'sue : rue :: nous devons : nous desons' )
	True
	>>> confirm_analogy( 'eue : rue :: nous devons : nous devons' )
	False
	>>> confirm_analogy( 'sus : vus :: nous devons : nous devons' )
	False
	
	>>> confirm_analogy( 'tata : tàtà :: haha : hàhà' )
	True
	>>> confirm_analogy( 'tété : tete :: héhé : hehe' )
	True
	>>> confirm_analogy( 'tete : tété :: hehe : héhé' )
	True
	>>> confirm_analogy( 'aaaa : à :: aaaàa : àà' )
	True
	>>> confirm_analogy( '決める : 決めます :: 食べる : 食べます' )
	True
	>>> confirm_analogy( '一年,是我国社会主义 : ,社会主义 :: 一年是我国 : ' )
	True
	"""

    successes, total_t = 0, 0
    for _ in xrange(repeat):
        lenA, lenB, lenC = random.randint(1, strlen), random.randint(
            1, strlen), random.randint(1, strlen)
        A, B, C = random_word(vocsize, lenA), random_word(vocsize,
                                                          lenB), random_word(
                                                              vocsize, lenC)
        t1 = time.time()
        D = solvenlg(A, B, C)
        total_t += time.time() - t1
        if None != D:
            if __verbose__: print >> sys.stderr, __nlg_fmt__ % (A, B, C, D)
            successes += 1
    return int(round((100.0 * successes)) / repeat), int(round(1000 * total_t))
示例#4
0
def confirm_analogy(s):
    A, B, _, C, D = [str.strip() for str in s.split(':')]
    return D == solvenlg(A, B, C)