Exemplo n.º 1
0
    def normalize(self):
        """
		Exchange As and Bs so that As are smaller than Bs.
		"""
        if self.is_normalized: return
        A, B = self[0][0], self[0][1]
        if len(B) < len(A):
            self[:] = [[ratio[1], ratio[0]] for ratio in self]
        if 2 == len(self):
            A, B, C = self[0][0], self[0][1], self[1][0]
            init_memo_fast_distance(A)
            if memo_fast_similitude(B) < memo_fast_similitude(C):
                self[0][1], self[1][0] = self[1][0], self[0][1]
        self.is_normalized = True
Exemplo n.º 2
0
def indirect_iteration_strategy(self, string=None, strategy='naive'):
    """
	If no Bs, then just output all triples of sentences in the order
		of the bicorpus.
	Else, apply the stategy selected to enumerate the source sentences in the bicorpus.
	There are 2 possible strategies implemented:
		naive: no sort is performed.
				Just output all triples of sentences in the order
				of the bicorpus.
		by distance: the As, Bs and Cs are enumerated by increasing
				distance to the sentence to be translated.
	"""
    Ds = string
    if __verbose__:
        print >> sys.stderr, 'Ds = "%s", strategy = %s' % (Ds, strategy)
    if Ds == None or strategy == 'naive':
        for triple in itertools.product(self, repeat=3):
            yield triple
    else:
        init_memo_fast_distance(Ds)
        if strategy == 'by distance':
            init_memo_fast_distance(Ds)
            closest_As = sorted(self.keys(),
                                key=lambda Xs: memo_fast_distance(Xs))[:100]
            for triple in sorted(itertools.product(closest_As, repeat=3)):
                As, Bs, Cs = triple
                if As != Bs and As != Cs and Bs != Cs:
                    if __verbose__:
                        print >> sys.stderr, '# {} : {} :: {} : {}'.format(
                            As, Bs, Cs, Ds)
                    yield triple
        elif strategy == 'closest':
            first_N = 3
            init_memo_fast_distance(Ds)
            result = sorted(self.keys(), key=lambda Xs: memo_fast_distance(Xs))
            print 'RESULT', result, '\n'
            for Bs in sorted(self.keys(),
                             key=lambda Xs: memo_fast_distance(Xs))[:first_N]:
                print Bs
                init_memo_fast_distance(Bs)
                for As in sorted(self.keys(),
                                 key=lambda Xs: len(Bs) - memo_fast_similitude(
                                     Xs))[:first_N]:
                    if __verbose__:
                        print >> sys.stderr, '# {} : {} :: {} : x'.format(
                            Bs, As, Ds)
                    CCs = solvenlg(Bs, As, Ds)
                    if CCs != None:
                        if __verbose__:
                            print >> sys.stderr, '# {} : {} :: {} : {}'.format(
                                Bs, As, Ds, CCs)
                        init_memo_fast_distance(CCs)
                        for Cs in sorted(self.keys(),
                                         key=lambda Xs: memo_fast_distance(Xs)
                                         )[:first_N]:
                            print Cs
                            if __verbose__:
                                print >> sys.stderr, '# {} : {} :: {} : {}'.format(
                                    Bs, As, Ds, Cs)
                            yield (As, Bs, Cs)
Exemplo n.º 3
0
def direct_iteration_strategy(self, string=None, strategy='naive'):
	"""
	If no Bs, then just iterate over the keys in the order of the dictionary.
	Else, apply the stategy selected to enumerate the source sentences in the bicorpus.
	There are 3 possible strategies implemented:
		naive: no sort is performed.
				The sentences are just enumerated in the order
				in which they appear in the bicorpus.
		by distance: the sentences are enumerated by increasing
				distance to the sentence to be translated.
				The LCS distance is used.
		by similitude: the sentences are enumerated by decreasing
				similarity with the sentence to be translated.
				Similitude is the length of the longest common
				subsequence (LCS).
	"""
	Bs = string
	if __verbose__: print >> sys.stderr, 'Bs = "%s", strategy = %s' % (Bs, strategy)
	if Bs == None or strategy == 'naive':
		result = self.keys()
	else:
		init_memo_fast_distance(Bs)
		if strategy == 'by distance':
			result = sorted(self.keys(), key=lambda Xs: memo_fast_distance(Xs))
		elif strategy == 'by similitude':
			result = sorted(self.keys(), key=lambda Xs: memo_fast_similitude(Xs), reverse=True)
	for As in result:
		A = As.rstrip('\n').split('\t')
		if memo_fast_distance(As) == 0:
			yield As, self[As]
			exit(0)
		
		yield As, self[As]
Exemplo n.º 4
0
def memo_fast_similitude(word2):
    return _fast_distance.memo_fast_similitude(word2)