示例#1
0
def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True):
    """Do the actual pretranslation of a whole store."""
    #preperation
    matchers = []
    #prepare template
    if template_store is not None:
        template_store.makeindex()
        #template preparation based on type
        prepare_template = "prepare_template_%s" % template_store.__class__.__name__
        if prepare_template in globals():
            globals()[prepare_template](template_store)

        if fuzzymatching:
            #create template matcher
            #FIXME: max_length hardcoded
            matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True)
            matcher.addpercentage = False
            matchers.append(matcher)

    #prepare tm
    #create tm matcher
    if tm and fuzzymatching:
        #FIXME: max_length hardcoded
        matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000)
        matcher.addpercentage = False
        matchers.append(matcher)

    #main loop
    match_locations = isinstance(input_store, po.pofile) and input_store.parseheader().get('X-Accelerator-Marker') in ('&', '~')
    for input_unit in input_store.units:
        if  input_unit.istranslatable():
            input_unit = pretranslate_unit(input_unit, template_store, matchers, match_locations=match_locations)

    return input_store
示例#2
0
def convert_stores(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True, **kwargs):
    """Actual conversion function, works on stores not files, returns
    a properly initialized pretranslated output store, with structure
    based on input_store, metadata based on template_store, migrates
    old translations from template_store and pretranslating from tm"""

    #prepare for merging
    output_store = type(input_store)()
    #create fuzzy matchers to be used by pretranslate.pretranslate_unit
    matchers = []
    _prepare_merge(input_store, output_store, template_store)
    if fuzzymatching:
        if template_store:
            matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True)
            matcher.addpercentage = False
            matchers.append(matcher)
        if tm:
            matcher = pretranslate.memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000)
            matcher.addpercentage = False
            matchers.append(matcher)

    #initialize store
    _store_pre_merge(input_store, output_store, template_store)

    # Do matching
    for input_unit in input_store.units:
        if input_unit.istranslatable():
            input_unit = pretranslate.pretranslate_unit(input_unit, template_store, matchers, mark_reused=True)
            _unit_post_merge(input_unit, input_store, output_store, template_store)
            output_store.addunit(input_unit)

    #finalize store
    _store_post_merge(input_store, output_store, template_store)

    return output_store
def memory(tmfile, max_candidates=4, min_similarity=75, max_length=1000):
    """Returns the TM store to use. Only initialises on first call."""
    global tmmatcher
    # Only initialise first time
    if tmmatcher is None:
        tmstore = factory.getobject(tmfile)
        tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length)
    return tmmatcher
示例#4
0
 def __init__(self, addr, storage):
     """Loads the initial tbx file from the given filename"""
     SimpleXMLRPCServer.__init__(self, addr, requestHandler=lookupRequestHandler, logRequests=1)
     self.storage = storage
     self.storage.makeindex()
     self.matcher = match.matcher(storage)
     print "Performing lookup from %d units" % len(storage.units)
     print "Translation memory using %d units" % len(self.matcher.candidates.units)
示例#5
0
def convert_stores(input_store,
                   template_store,
                   temp_store=None,
                   tm=None,
                   min_similarity=75,
                   fuzzymatching=True,
                   **kwargs):
    """Actual conversion function, works on stores not files, returns
    a properly initialized pretranslated output store, with structure
    based on input_store, metadata based on template_store, migrates
    old translations from template_store and pretranslating from TM.
    """
    if temp_store is None:
        temp_store = input_store

    # Create fuzzy matchers to be used by pretranslate.pretranslate_unit
    matchers = []

    _prepare_merge(input_store, temp_store, template_store)
    if fuzzymatching:
        if template_store:
            matcher = match.matcher(
                template_store,
                max_candidates=1,
                min_similarity=min_similarity,
                max_length=3000,
                usefuzzy=True,
            )
            matcher.addpercentage = False
            matchers.append(matcher)
        if tm:
            matcher = pretranslate.memory(tm,
                                          max_candidates=1,
                                          min_similarity=min_similarity,
                                          max_length=1000)
            matcher.addpercentage = False
            matchers.append(matcher)

    # initialize store
    _store_pre_merge(input_store, temp_store, template_store)

    # Do matching
    for input_unit in temp_store.units:
        if input_unit.istranslatable():
            input_unit = pretranslate.pretranslate_unit(
                input_unit,
                template_store,
                matchers,
                mark_reused=True,
                merge_on=input_store.merge_on,
            )
            _unit_post_merge(input_unit, input_store, temp_store,
                             template_store)

    # finalize store
    _store_post_merge(input_store, temp_store, template_store)

    return temp_store
示例#6
0
 def test_multiple_store(self):
     """Test using multiple datastores"""
     csvfile1 = self.buildcsv(["hand", "asdf", "fdas"])
     csvfile2 = self.buildcsv(["haas", "pond"])
     matcher = match.matcher([csvfile1, csvfile2])
     candidates = self.candidatestrings(matcher.matches("hond"))
     candidates.sort()
     assert candidates == ["hand", "pond"]
     message = "Ek skop die bal"
     csvfile1 = self.buildcsv(["Hy skop die bal", message, "Jannie skop die bal"])
     csvfile2 = self.buildcsv(["Ek skop die balle", "Niemand skop die bal nie"])
     matcher = match.matcher([csvfile1, csvfile2])
     candidates = self.candidatestrings(matcher.matches(message))
     assert len(candidates) == 3
     # test that the 100% match is indeed first:
     assert candidates[0] == message
     candidates.sort()
     assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
示例#7
0
 def test_matching(self):
     """Test basic matching"""
     csvfile = self.buildcsv(["hand", "asdf", "fdas", "haas", "pond"])
     matcher = match.matcher(csvfile)
     candidates = self.candidatestrings(matcher.matches("hond"))
     candidates.sort()
     assert candidates == ["hand", "pond"]
     message = "Ek skop die bal"
     csvfile = self.buildcsv([
         "Hy skop die bal", message, "Jannie skop die bal",
         "Ek skop die balle", "Niemand skop die bal nie"
     ])
     matcher = match.matcher(csvfile)
     candidates = self.candidatestrings(matcher.matches(message))
     assert len(candidates) == 3
     #test that the 100% match is indeed first:
     assert candidates[0] == message
     candidates.sort()
     assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
示例#8
0
 def test_extendtm(self):
     """Test that we can extend the TM after creation."""
     message = "Open file..."
     csvfile1 = self.buildcsv(["Close application", "Do something"])
     matcher = match.matcher([csvfile1])
     candidates = self.candidatestrings(matcher.matches(message))
     assert len(candidates) == 0
     csvfile2 = self.buildcsv(["Open file"])
     matcher.extendtm(csvfile2.units, store=csvfile2)
     candidates = self.candidatestrings(matcher.matches(message))
     assert len(candidates) == 1
     assert candidates[0] == "Open file"
示例#9
0
 def test_matching(self):
     """Test basic matching"""
     csvfile = self.buildcsv(["hand", "asdf", "fdas", "haas", "pond"])
     matcher = match.matcher(csvfile)
     candidates = self.candidatestrings(matcher.matches("hond"))
     candidates.sort()
     assert candidates == ["hand", "pond"]
     message = "Ek skop die bal"
     csvfile = self.buildcsv(
         ["Hy skop die bal",
         message,
         "Jannie skop die bal",
         "Ek skop die balle",
         "Niemand skop die bal nie"])
     matcher = match.matcher(csvfile)
     candidates = self.candidatestrings(matcher.matches(message))
     assert len(candidates) == 3
     #test that the 100% match is indeed first:
     assert candidates[0] == message
     candidates.sort()
     assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
示例#10
0
 def recreate_matcher(self, storecontroller):
     store = storecontroller.get_store()._trans_store
     if self.matcher is None:
         options = {
             'max_length': int(self.config['max_length']),
             'max_candidates': self.controller.max_matches,
             'min_similarity': self.controller.min_quality
         }
         self.matcher = match.matcher(store, **options)
     else:
         self.matcher.extendtm(store.units)
     self.cache = {}
示例#11
0
 def __init__(self, addr, storage):
     """Loads the initial tbx file from the given filename"""
     SimpleXMLRPCServer.__init__(self,
                                 addr,
                                 requestHandler=lookupRequestHandler,
                                 logRequests=1)
     self.storage = storage
     self.storage.makeindex()
     self.matcher = match.matcher(storage)
     print "Performing lookup from %d units" % len(storage.units)
     print "Translation memory using %d units" % len(
         self.matcher.candidates.units)
示例#12
0
 def test_extendtm(self):
     """Test that we can extend the TM after creation."""
     message = "Open file..."
     csvfile1 = self.buildcsv(["Close application", "Do something"])
     matcher = match.matcher([csvfile1])
     candidates = self.candidatestrings(matcher.matches(message))
     assert len(candidates) == 0
     csvfile2 = self.buildcsv(["Open file"])
     matcher.extendtm(csvfile2.units, store=csvfile2)
     candidates = self.candidatestrings(matcher.matches(message))
     assert len(candidates) == 1
     assert candidates[0] == "Open file"
示例#13
0
 def recreate_matcher(self, storecontroller):
     store = storecontroller.get_store()._trans_store
     if self.matcher is None:
         options = {
             'max_length': int(self.config['max_length']),
             'max_candidates': self.controller.max_matches,
             'min_similarity': self.controller.min_quality
         }
         self.matcher = match.matcher(store, **options)
     else:
         self.matcher.extendtm(store.units)
     self.cache = {}
示例#14
0
def pretranslate_store(input_store,
                       template_store,
                       tm=None,
                       min_similarity=75,
                       fuzzymatching=True):
    """Do the actual pretranslation of a whole store."""
    #preperation
    matchers = []
    #prepare template
    if template_store is not None:
        template_store.makeindex()
        #template preparation based on type
        prepare_template = "prepare_template_%s" % template_store.__class__.__name__
        if prepare_template in globals():
            globals()[prepare_template](template_store)

        if fuzzymatching:
            #create template matcher
            #FIXME: max_length hardcoded
            matcher = match.matcher(template_store,
                                    max_candidates=1,
                                    min_similarity=min_similarity,
                                    max_length=3000,
                                    usefuzzy=True)
            matcher.addpercentage = False
            matchers.append(matcher)

    #prepare tm
    #create tm matcher
    if tm and fuzzymatching:
        #FIXME: max_length hardcoded
        matcher = memory(tm,
                         max_candidates=1,
                         min_similarity=min_similarity,
                         max_length=1000)
        matcher.addpercentage = False
        matchers.append(matcher)

    #main loop
    match_locations = isinstance(input_store,
                                 po.pofile) and input_store.parseheader().get(
                                     'X-Accelerator-Marker') in ('&', '~')
    for input_unit in input_store.units:
        if input_unit.istranslatable():
            input_unit = pretranslate_unit(input_unit,
                                           template_store,
                                           matchers,
                                           match_locations=match_locations)

    return input_store
示例#15
0
 def test_multiple_store(self):
     """Test using multiple datastores"""
     csvfile1 = self.buildcsv(["hand", "asdf", "fdas"])
     csvfile2 = self.buildcsv(["haas", "pond"])
     matcher = match.matcher([csvfile1, csvfile2])
     candidates = self.candidatestrings(matcher.matches("hond"))
     candidates.sort()
     assert candidates == ["hand", "pond"]
     message = "Ek skop die bal"
     csvfile1 = self.buildcsv(
         ["Hy skop die bal",
         message,
         "Jannie skop die bal"])
     csvfile2 = self.buildcsv(
         ["Ek skop die balle",
         "Niemand skop die bal nie"])
     matcher = match.matcher([csvfile1, csvfile2])
     candidates = self.candidatestrings(matcher.matches(message))
     assert len(candidates) == 3
     #test that the 100% match is indeed first:
     assert candidates[0] == message
     candidates.sort()
     assert candidates[1:] == ["Ek skop die balle", "Hy skop die bal"]
示例#16
0
def pretranslate_store(input_store,
                       template_store,
                       tm=None,
                       min_similarity=75,
                       fuzzymatching=True):
    """Do the actual pretranslation of a whole store."""
    # preperation
    matchers = []
    # prepare template
    if template_store is not None:
        template_store.makeindex()
        # template preparation based on type
        prepare_template = "prepare_template_%s" % template_store.__class__.__name__
        if prepare_template in globals():
            globals()[prepare_template](template_store)

        if fuzzymatching:
            # create template matcher
            # FIXME: max_length hardcoded
            matcher = match.matcher(template_store,
                                    max_candidates=1,
                                    min_similarity=min_similarity,
                                    max_length=3000,
                                    usefuzzy=True)
            matcher.addpercentage = False
            matchers.append(matcher)

    # prepare tm
    # create tm matcher
    if tm and fuzzymatching:
        # FIXME: max_length hardcoded
        matcher = memory(tm,
                         max_candidates=1,
                         min_similarity=min_similarity,
                         max_length=1000)
        matcher.addpercentage = False
        matchers.append(matcher)

    # Main loop
    for input_unit in input_store.units:
        if input_unit.istranslatable():
            input_unit = pretranslate_unit(input_unit,
                                           template_store,
                                           matchers,
                                           merge_on=input_store.merge_on)

    return input_store
示例#17
0
def pretranslate_store(input_store, template_store, tm=None,
                       min_similarity=75, fuzzymatching=True):
    """Do the actual pretranslation of a whole store."""
    #preperation
    matchers = []
    #prepare template
    if template_store is not None:
        template_store.makeindex()
        #template preparation based on type
        prepare_template = "prepare_template_%s" % template_store.__class__.__name__
        if prepare_template in globals():
            globals()[prepare_template](template_store)

        if fuzzymatching:
            #create template matcher
            #FIXME: max_length hardcoded
            matcher = match.matcher(template_store, max_candidates=1,
                                    min_similarity=min_similarity,
                                    max_length=3000, usefuzzy=True)
            matcher.addpercentage = False
            matchers.append(matcher)

    #prepare tm
    #create tm matcher
    if tm and fuzzymatching:
        #FIXME: max_length hardcoded
        matcher = memory(tm, max_candidates=1, min_similarity=min_similarity,
                         max_length=1000)
        matcher.addpercentage = False
        matchers.append(matcher)

    # Main loop
    for input_unit in input_store.units:
        if input_unit.istranslatable():
            input_unit = pretranslate_unit(input_unit, template_store,
                                           matchers,
                                           merge_on=input_store.merge_on)

    return input_store
示例#18
0
文件: pot2po.py 项目: Esya/translate
def convert_stores(input_store, template_store, temp_store=None, tm=None, min_similarity=75, fuzzymatching=True, **kwargs):
    """Actual conversion function, works on stores not files, returns
    a properly initialized pretranslated output store, with structure
    based on input_store, metadata based on template_store, migrates
    old translations from template_store and pretranslating from tm"""

    if temp_store is None:
        temp_store = input_store

    #create fuzzy matchers to be used by pretranslate.pretranslate_unit
    matchers = []
    _prepare_merge(input_store, temp_store, template_store)
    if fuzzymatching:
        if template_store:
            matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True)
            matcher.addpercentage = False
            matchers.append(matcher)
        if tm:
            matcher = pretranslate.memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000)
            matcher.addpercentage = False
            matchers.append(matcher)

    #initialize store
    _store_pre_merge(input_store, temp_store, template_store)

    # Do matching
    match_locations = isinstance(input_store, po.pofile) and input_store.parseheader().get('X-Accelerator-Marker') in ('&', '~')
    for input_unit in temp_store.units:
        if input_unit.istranslatable():
            input_unit = pretranslate.pretranslate_unit(input_unit, template_store, matchers, mark_reused=True, match_locations=match_locations)
            _unit_post_merge(input_unit, input_store, temp_store, template_store)

    #finalize store
    _store_post_merge(input_store, temp_store, template_store)

    return temp_store
示例#19
0
 def get_matcher(self):
     """builds a TM matcher from current translations and obsolete units"""
     #FIXME: should we cache this?
     matcher = match.matcher(self, max_candidates=1, usefuzzy=True)
     matcher.extendtm(self.unit_set.filter(state=OBSOLETE))
     return matcher