def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True): """Do the actual pretranslation of a whole store.""" #preperation matchers = [] #prepare template if template_store is not None: template_store.makeindex() #template preparation based on type prepare_template = "prepare_template_%s" % template_store.__class__.__name__ if prepare_template in globals(): globals()[prepare_template](template_store) if fuzzymatching: #create template matcher #FIXME: max_length hardcoded matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) matcher.addpercentage = False matchers.append(matcher) #prepare tm #create tm matcher if tm and fuzzymatching: #FIXME: max_length hardcoded matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) matcher.addpercentage = False matchers.append(matcher) #main loop match_locations = isinstance(input_store, po.pofile) and input_store.parseheader().get('X-Accelerator-Marker') in ('&', '~') for input_unit in input_store.units: if input_unit.istranslatable(): input_unit = pretranslate_unit(input_unit, template_store, matchers, match_locations=match_locations) return input_store
def convert_stores(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True, **kwargs): """Actual conversion function, works on stores not files, returns a properly initialized pretranslated output store, with structure based on input_store, metadata based on template_store, migrates old translations from template_store and pretranslating from tm""" #prepare for merging output_store = type(input_store)() #create fuzzy matchers to be used by pretranslate.pretranslate_unit matchers = [] _prepare_merge(input_store, output_store, template_store) if fuzzymatching: if template_store: matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) matcher.addpercentage = False matchers.append(matcher) if tm: matcher = pretranslate.memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) matcher.addpercentage = False matchers.append(matcher) #initialize store _store_pre_merge(input_store, output_store, template_store) # Do matching for input_unit in input_store.units: if input_unit.istranslatable(): input_unit = pretranslate.pretranslate_unit(input_unit, template_store, matchers, mark_reused=True) _unit_post_merge(input_unit, input_store, output_store, template_store) output_store.addunit(input_unit) #finalize store _store_post_merge(input_store, output_store, template_store) return output_store
def memory(tmfile, max_candidates=4, min_similarity=75, max_length=1000): """Returns the TM store to use. Only initialises on first call.""" global tmmatcher # Only initialise first time if tmmatcher is None: tmstore = factory.getobject(tmfile) tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length) return tmmatcher
def __init__(self, addr, storage): """Loads the initial tbx file from the given filename""" SimpleXMLRPCServer.__init__(self, addr, requestHandler=lookupRequestHandler, logRequests=1) self.storage = storage self.storage.makeindex() self.matcher = match.matcher(storage) print "Performing lookup from %d units" % len(storage.units) print "Translation memory using %d units" % len(self.matcher.candidates.units)
def convert_stores(input_store, template_store, temp_store=None, tm=None, min_similarity=75, fuzzymatching=True, **kwargs): """Actual conversion function, works on stores not files, returns a properly initialized pretranslated output store, with structure based on input_store, metadata based on template_store, migrates old translations from template_store and pretranslating from TM. """ if temp_store is None: temp_store = input_store # Create fuzzy matchers to be used by pretranslate.pretranslate_unit matchers = [] _prepare_merge(input_store, temp_store, template_store) if fuzzymatching: if template_store: matcher = match.matcher( template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True, ) matcher.addpercentage = False matchers.append(matcher) if tm: matcher = pretranslate.memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) matcher.addpercentage = False matchers.append(matcher) # initialize store _store_pre_merge(input_store, temp_store, template_store) # Do matching for input_unit in temp_store.units: if input_unit.istranslatable(): input_unit = pretranslate.pretranslate_unit( input_unit, template_store, matchers, mark_reused=True, merge_on=input_store.merge_on, ) _unit_post_merge(input_unit, input_store, temp_store, template_store) # finalize store _store_post_merge(input_store, temp_store, template_store) return temp_store
def test_multiple_store(self): """Test using multiple datastores""" csvfile1 = self.buildcsv(["hand", "asdf", "fdas"]) csvfile2 = self.buildcsv(["haas", "pond"]) matcher = match.matcher([csvfile1, csvfile2]) candidates = self.candidatestrings(matcher.matches("hond")) candidates.sort() assert candidates == ["hand", "pond"] message = "Ek skop die bal" csvfile1 = self.buildcsv(["Hy skop die bal", message, "Jannie skop die bal"]) csvfile2 = self.buildcsv(["Ek skop die balle", "Niemand skop die bal nie"]) matcher = match.matcher([csvfile1, csvfile2]) candidates = self.candidatestrings(matcher.matches(message)) assert len(candidates) == 3 # test that the 100% match is indeed first: assert candidates[0] == message candidates.sort() assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
def test_matching(self): """Test basic matching""" csvfile = self.buildcsv(["hand", "asdf", "fdas", "haas", "pond"]) matcher = match.matcher(csvfile) candidates = self.candidatestrings(matcher.matches("hond")) candidates.sort() assert candidates == ["hand", "pond"] message = "Ek skop die bal" csvfile = self.buildcsv([ "Hy skop die bal", message, "Jannie skop die bal", "Ek skop die balle", "Niemand skop die bal nie" ]) matcher = match.matcher(csvfile) candidates = self.candidatestrings(matcher.matches(message)) assert len(candidates) == 3 #test that the 100% match is indeed first: assert candidates[0] == message candidates.sort() assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
def test_extendtm(self): """Test that we can extend the TM after creation.""" message = "Open file..." csvfile1 = self.buildcsv(["Close application", "Do something"]) matcher = match.matcher([csvfile1]) candidates = self.candidatestrings(matcher.matches(message)) assert len(candidates) == 0 csvfile2 = self.buildcsv(["Open file"]) matcher.extendtm(csvfile2.units, store=csvfile2) candidates = self.candidatestrings(matcher.matches(message)) assert len(candidates) == 1 assert candidates[0] == "Open file"
def test_matching(self): """Test basic matching""" csvfile = self.buildcsv(["hand", "asdf", "fdas", "haas", "pond"]) matcher = match.matcher(csvfile) candidates = self.candidatestrings(matcher.matches("hond")) candidates.sort() assert candidates == ["hand", "pond"] message = "Ek skop die bal" csvfile = self.buildcsv( ["Hy skop die bal", message, "Jannie skop die bal", "Ek skop die balle", "Niemand skop die bal nie"]) matcher = match.matcher(csvfile) candidates = self.candidatestrings(matcher.matches(message)) assert len(candidates) == 3 #test that the 100% match is indeed first: assert candidates[0] == message candidates.sort() assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
def recreate_matcher(self, storecontroller): store = storecontroller.get_store()._trans_store if self.matcher is None: options = { 'max_length': int(self.config['max_length']), 'max_candidates': self.controller.max_matches, 'min_similarity': self.controller.min_quality } self.matcher = match.matcher(store, **options) else: self.matcher.extendtm(store.units) self.cache = {}
def __init__(self, addr, storage): """Loads the initial tbx file from the given filename""" SimpleXMLRPCServer.__init__(self, addr, requestHandler=lookupRequestHandler, logRequests=1) self.storage = storage self.storage.makeindex() self.matcher = match.matcher(storage) print "Performing lookup from %d units" % len(storage.units) print "Translation memory using %d units" % len( self.matcher.candidates.units)
def test_extendtm(self): """Test that we can extend the TM after creation.""" message = "Open file..." csvfile1 = self.buildcsv(["Close application", "Do something"]) matcher = match.matcher([csvfile1]) candidates = self.candidatestrings(matcher.matches(message)) assert len(candidates) == 0 csvfile2 = self.buildcsv(["Open file"]) matcher.extendtm(csvfile2.units, store=csvfile2) candidates = self.candidatestrings(matcher.matches(message)) assert len(candidates) == 1 assert candidates[0] == "Open file"
def recreate_matcher(self, storecontroller): store = storecontroller.get_store()._trans_store if self.matcher is None: options = { 'max_length': int(self.config['max_length']), 'max_candidates': self.controller.max_matches, 'min_similarity': self.controller.min_quality } self.matcher = match.matcher(store, **options) else: self.matcher.extendtm(store.units) self.cache = {}
def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True): """Do the actual pretranslation of a whole store.""" #preperation matchers = [] #prepare template if template_store is not None: template_store.makeindex() #template preparation based on type prepare_template = "prepare_template_%s" % template_store.__class__.__name__ if prepare_template in globals(): globals()[prepare_template](template_store) if fuzzymatching: #create template matcher #FIXME: max_length hardcoded matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) matcher.addpercentage = False matchers.append(matcher) #prepare tm #create tm matcher if tm and fuzzymatching: #FIXME: max_length hardcoded matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) matcher.addpercentage = False matchers.append(matcher) #main loop match_locations = isinstance(input_store, po.pofile) and input_store.parseheader().get( 'X-Accelerator-Marker') in ('&', '~') for input_unit in input_store.units: if input_unit.istranslatable(): input_unit = pretranslate_unit(input_unit, template_store, matchers, match_locations=match_locations) return input_store
def test_multiple_store(self): """Test using multiple datastores""" csvfile1 = self.buildcsv(["hand", "asdf", "fdas"]) csvfile2 = self.buildcsv(["haas", "pond"]) matcher = match.matcher([csvfile1, csvfile2]) candidates = self.candidatestrings(matcher.matches("hond")) candidates.sort() assert candidates == ["hand", "pond"] message = "Ek skop die bal" csvfile1 = self.buildcsv( ["Hy skop die bal", message, "Jannie skop die bal"]) csvfile2 = self.buildcsv( ["Ek skop die balle", "Niemand skop die bal nie"]) matcher = match.matcher([csvfile1, csvfile2]) candidates = self.candidatestrings(matcher.matches(message)) assert len(candidates) == 3 #test that the 100% match is indeed first: assert candidates[0] == message candidates.sort() assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True): """Do the actual pretranslation of a whole store.""" # preperation matchers = [] # prepare template if template_store is not None: template_store.makeindex() # template preparation based on type prepare_template = "prepare_template_%s" % template_store.__class__.__name__ if prepare_template in globals(): globals()[prepare_template](template_store) if fuzzymatching: # create template matcher # FIXME: max_length hardcoded matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) matcher.addpercentage = False matchers.append(matcher) # prepare tm # create tm matcher if tm and fuzzymatching: # FIXME: max_length hardcoded matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) matcher.addpercentage = False matchers.append(matcher) # Main loop for input_unit in input_store.units: if input_unit.istranslatable(): input_unit = pretranslate_unit(input_unit, template_store, matchers, merge_on=input_store.merge_on) return input_store
def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True): """Do the actual pretranslation of a whole store.""" #preperation matchers = [] #prepare template if template_store is not None: template_store.makeindex() #template preparation based on type prepare_template = "prepare_template_%s" % template_store.__class__.__name__ if prepare_template in globals(): globals()[prepare_template](template_store) if fuzzymatching: #create template matcher #FIXME: max_length hardcoded matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) matcher.addpercentage = False matchers.append(matcher) #prepare tm #create tm matcher if tm and fuzzymatching: #FIXME: max_length hardcoded matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) matcher.addpercentage = False matchers.append(matcher) # Main loop for input_unit in input_store.units: if input_unit.istranslatable(): input_unit = pretranslate_unit(input_unit, template_store, matchers, merge_on=input_store.merge_on) return input_store
def convert_stores(input_store, template_store, temp_store=None, tm=None, min_similarity=75, fuzzymatching=True, **kwargs): """Actual conversion function, works on stores not files, returns a properly initialized pretranslated output store, with structure based on input_store, metadata based on template_store, migrates old translations from template_store and pretranslating from tm""" if temp_store is None: temp_store = input_store #create fuzzy matchers to be used by pretranslate.pretranslate_unit matchers = [] _prepare_merge(input_store, temp_store, template_store) if fuzzymatching: if template_store: matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) matcher.addpercentage = False matchers.append(matcher) if tm: matcher = pretranslate.memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) matcher.addpercentage = False matchers.append(matcher) #initialize store _store_pre_merge(input_store, temp_store, template_store) # Do matching match_locations = isinstance(input_store, po.pofile) and input_store.parseheader().get('X-Accelerator-Marker') in ('&', '~') for input_unit in temp_store.units: if input_unit.istranslatable(): input_unit = pretranslate.pretranslate_unit(input_unit, template_store, matchers, mark_reused=True, match_locations=match_locations) _unit_post_merge(input_unit, input_store, temp_store, template_store) #finalize store _store_post_merge(input_store, temp_store, template_store) return temp_store
def get_matcher(self): """builds a TM matcher from current translations and obsolete units""" #FIXME: should we cache this? matcher = match.matcher(self, max_candidates=1, usefuzzy=True) matcher.extendtm(self.unit_set.filter(state=OBSOLETE)) return matcher