示例#1
0
 def reverseDic(self, wordsArg=None, opt={}):
     opt = addDefaultOptions(opt, {
         'matchWord': True,
         'showRel': 'None',
         'includeDefs': False,
         'background': False,
         'reportStep': 300,
         'autoSaveStep': 1000, ## set this to zero to disable auto saving.
         'savePath': '',
     })
     self.stoped = False
     ui = self.ui
     try:
         c = self.continueFrom
     except AttributeError:
         c = 0
     savePath = opt['savePath']
     if c == -1:
         log.debug('c=%s'%c)
         return
     elif c==0:
         saveFile = open(savePath, 'wb')
         ui.progressStart()
         ui.progress(0.0, 'Starting...')
     elif c>0:
         saveFile = open(savePath, 'ab')
     if wordsArg is None:
         words = self.takeOutputWords()
     elif isinstance(wordsArg, file):
         words = wordsArg.read().split('\n')
     elif isinstance(wordsArg, (list, tuple)):
         words = wordsArg[:]
     elif isinstance(wordsArg, basestring):
         words = open(wordsArg).read().split('\n')
     else:
         raise TypeError('Argumant wordsArg to function reverseDic is not valid!')
     autoSaveStep = opt['autoSaveStep']
     if not opt['savePath']:
         opt['savePath'] = self.getInfo('name')+'.txt'
     revG = Glossary(
         info = self.info[:],
     )
     revG.setInfo('name', self.getInfo('name')+'_reversed')
     revG.setInfo('inputlang', self.getInfo('outputlang'))
     revG.setInfo('outputlang', self.getInfo('inputlang'))
     wNum = len(words)
     #steps = opt['reportStep']
     #div = 0
     #mod = 0
     #total = int(wNum/steps)
     '''
     if c==0:
         log.info('Number of input words:', wNum)
         log.info('Reversing glossary...')
     else:
         log.info('continue reversing from index %d ...'%c)
     '''
     t0 = time.time()
     if not ui:
         log.info('passed ratio\ttime:\tpassed\tremain\ttotal\tprocess')
     n = len(words)
     for i in xrange(c, n):
         word = words[i]
         rat = float(i+1)/n
         ui.progress(rat, '%d / %d words completed'%(i, n))
         if ui.reverseStop:
             saveFile.close() ## if with KeyboardInterrupt it will be closed ??????????????
             self.continueFrom = i
             self.stoped = True
             #thread.exit_thread()
             return
         else:
             self.i = i
         '''
         if mod == steps:
             mod = 0 ; div += 1
             t = time.time()
             dt = t-t0
             tRem = (total-div)*dt/div ## (n-i)*dt/n
             rat = float(i)/n
             if ui:
                 ############# FIXME
                 #ui.progressbar.set_text(
                     '%d/%d words completed (%%%2f) remaining %d seconds'%(i,n,rat*100,tRem)
                 )
                 ui.progressbar.update(rat)
                 while gtk.events_pending():
                     gtk.main_iteration_do(False)
             else:
                 log.info('%4d / %4d\t%8s\t%8s\t%8s\t%s'%(
                     div,
                     total,
                     timeHMS(dt),
                     timeHMS(tRem),
                     timeHMS(dt + tRem),
                     sys.argv[0],
                 ))
         else:
             mod += 1
         '''
         if autoSaveStep>0 and i%autoSaveStep==0 and i>0:
             saveFile.close()
             saveFile = open(savePath, 'ab')
         result = self.searchWordInDef(word, opt)
         if len(result)>0:
             try:
                 if opt['includeDefs']:
                     defi = '\\n\\n'.join(result)
                 else:
                     defi = ', '.join(result) + '.'
             except:
                 open('result', 'wb').write(str(result))
                 log.exception('')
                 return False
             if autoSaveStep>0:
                 saveFile.write('%s\t%s\n'%(word, defi))
             else:
                 revG.data.append((word, defi))
         if autoSaveStep>0 and i==n-1:
             saveFile.close()
     if autoSaveStep==0:
         revG.writeTabfile(opt['savePath'])
     ui.r_finished()
     ui.progressEnd()
     return True
示例#2
0
 def searchWordInDef(self, st, opt):
     #seachs word 'st' in meanings(definitions) of the glossary 'self'
     opt = addDefaultOptions(opt, {
         'minRel': 0.0,
         'maxNum': 100,
         'sep': commaFa,
         'matchWord': True,
         'showRel': 'Percent',
     })
     sep = opt['sep']
     matchWord = opt['matchWord']
     maxNum = opt['maxNum']
     minRel = opt['minRel']
     defs = opt['includeDefs']
     outRel = []
     for item in self.data:
         (word, defi) = item[:2]
         defiParts = defi.split(sep)
         if defi.find(st) == -1:
             continue
         rel = 0 ## relation value of word (as a float number between 0 and 1
         for part in defiParts:
             for ch in sch:
                 part = part.replace(ch, ' ')
             pRel = 0 # part relation
             if matchWord:
                 pNum = 0
                 partWords = takeStrWords(part)
                 pLen = len(partWords)
                 if pLen==0:
                     continue
                 for pw in partWords:
                     if pw == st:
                         pNum += 1
                 pRel = float(pNum)/pLen ## part relation
             else:
                 pLen = len(part.replace(' ', ''))
                 if pLen==0:
                     continue
                 pNum = len(findAll(part, st))*len(st)
                 pRel = float(pNum)/pLen ## part relation
             if pRel > rel:
                 rel = pRel
         if rel <= minRel:
             continue
         if defs:
             outRel.append((word, rel, defi))
         else:
             outRel.append((word, rel))
     #sortby_inplace(outRel, 1, True)##???
     outRel.sort(key=1, reverse=True)
     n = len(outRel)
     if n > maxNum > 0:
         outRel = outRel[:maxNum]
         n = maxNum
     num = 0
     out = []
     if defs:
         for j in xrange(n):
             numP = num
             (w, num, m) = outRel[j]
             m = m.replace('\n', '\\n').replace('\t', '\\t')
             onePer = int(1.0/num)
             if onePer == 1.0:
                 out.append('%s\\n%s'%(w, m))
             elif opt['showRel'] == 'Percent':
                 out.append('%s(%%%d)\\n%s'%(w, 100*num, m))
             elif opt['showRel'] == 'Percent At First':
                 if num == numP:
                     out.append('%s\\n%s'%(w, m))
                 else:
                     out.append('%s(%%%d)\\n%s'%(w, 100*num, m))
             else:
                 out.append('%s\\n%s'%(w, m))
         return out
     for j in xrange(n):
         numP = num
         (w, num) = outRel[j]
         onePer = int(1.0/num)
         if onePer == 1.0:
             out.append(w)
         elif opt['showRel'] == 'Percent':
             out.append('%s(%%%d)'%(w, 100*num))
         elif opt['showRel'] == 'Percent At First':
             if num == numP:
                 out.append(w)
             else:
                 out.append('%s(%%%d)'%(w, 100*num))
         else:
             out.append(w)
     return out
示例#3
0
 def reverseDic(self, wordsArg=None, opt={}):
     opt = addDefaultOptions(opt, {
         'matchWord': True,
         'showRel': 'None',
         'includeDefs': False,
         'background': False,
         'reportStep': 300,
         'autoSaveStep': 1000, ## set this to zero to disable auto saving.
         'savePath': '',
     })
     self.stoped = False
     ui = self.ui
     try:
         c = self.continueFrom
     except AttributeError:
         c = 0
     savePath = opt['savePath']
     if c == -1:
         print('c=%s'%c)
         return
     elif c==0:
         saveFile = open(savePath, 'wb')
         ui.progressStart()
         ui.progress(0.0, 'Starting...')
     elif c>0:
         saveFile = open(savePath, 'ab')
     if wordsArg is None:
         words = self.takeOutputWords()
     elif isinstance(wordsArg, file):
         words = wordsArg.read().split('\n')
     elif isinstance(wordsArg, (list, tuple)):
         words = wordsArg[:]
     elif isinstance(wordsArg, basestring):
         words = open(wordsArg).read().split('\n')
     else:
         raise TypeError('Argumant wordsArg to function reverseDic is not valid!')
     autoSaveStep = opt['autoSaveStep']
     if not opt['savePath']:
         opt['savePath'] = self.getInfo('name')+'.txt'
     revG = Glossary(self.info[:])
     revG.setInfo('name', self.getInfo('name')+'_reversed')
     revG.setInfo('inputlang' , self.getInfo('outputlang'))
     revG.setInfo('outputlang', self.getInfo('inputlang'))
     wNum = len(words)
     #steps = opt['reportStep']
     #div = 0
     #mod = 0
     #total = int(wNum/steps)
     '''
     if c==0:
         print('Number of input words:', wNum)
         print('Reversing glossary...')
     else:
         print('continue reversing from index %d ...'%c)
     '''
     t0 = time.time()
     if not ui:
         print('passed ratio\ttime:\tpassed\tremain\ttotal\tprocess')
     n = len(words)
     for i in xrange(c, n):
         word = words[i]
         rat = float(i+1)/n
         ui.progress(rat, '%d / %d words completed'%(i,n))
         if ui.reverseStop:
             saveFile.close() ## if with KeyboardInterrupt it will be closed ??????????????
             self.continueFrom = i
             self.stoped = True
             #thread.exit_thread()
             return
         else:
             self.i = i
         '''
         if mod == steps:
             mod = 0 ; div += 1
             t = time.time()
             dt = t-t0
             tRem = (total-div)*dt/div ## (n-i)*dt/n
             rat = float(i)/n
             if ui:
                 ############# FIXME
                 #ui.progressbar.set_text(
                     '%d/%d words completed (%%%2f) remaining %d seconds'%(i,n,rat*100,tRem)
                 )
                 ui.progressbar.update(rat)
                 while gtk.events_pending():
                     gtk.main_iteration_do(False)
             else:
                 print('%4d / %4d\t%8s\t%8s\t%8s\t%s'%(
                     div,
                     total,
                     timeHMS(dt),
                     timeHMS(tRem),
                     timeHMS(dt + tRem),
                     sys.argv[0],
                 ))
         else:
             mod += 1
         '''
         if autoSaveStep>0 and i%autoSaveStep==0 and i>0:
             saveFile.close()
             saveFile = open(savePath, 'ab')
         result = self.searchWordInDef(word, opt)
         if len(result)>0:
             try:
                 if opt['includeDefs']:
                     defi = '\\n\\n'.join(result)
                 else:
                     defi = ', '.join(result) + '.'
             except:
                 open('result', 'wb').write(str(result))
                 myRaise(__file__)
                 return False
             if autoSaveStep>0:
                 saveFile.write('%s\t%s\n'%(word, defi))
             else:
                 revG.data.append((word, defi))
         if autoSaveStep>0 and i==n-1:
             saveFile.close()
     if autoSaveStep==0:
         revG.writeTabfile(opt['savePath'])
     ui.r_finished()
     ui.progressEnd()
     return True
示例#4
0
 def searchWordInDef(self, st, opt):
     #seachs word 'st' in meanings(definitions) of the glossary 'self'
     opt = addDefaultOptions(opt, {
         'minRel': 0.0,
         'maxNum': 100,
         'sep': commaFa,
         'matchWord': True,
         'showRel': 'Percent',
     })
     sep = opt['sep']
     matchWord = opt['matchWord']
     maxNum = opt['maxNum']
     minRel = opt['minRel']
     defs = opt['includeDefs']
     outRel = []
     for item in self.data:
         (word, defi) = item[:2]
         defiParts = defi.split(sep)
         if defi.find(st) == -1:
             continue
         rel = 0 ## relation value of word (as a float number between 0 and 1
         for part in defiParts:
             for ch in sch:
                 part = part.replace(ch, ' ')
             pRel = 0 # part relation
             if matchWord:
                 pNum = 0
                 partWords = takeStrWords(part)
                 pLen = len(partWords)
                 if pLen==0:
                     continue
                 for pw in partWords:
                     if pw == st:
                         pNum += 1
                 pRel = float(pNum)/pLen ## part relation
             else:
                 pLen = len(part.replace(' ', ''))
                 if pLen==0:
                     continue
                 pNum = len(findAll(part, st))*len(st)
                 pRel = float(pNum)/pLen ## part relation
             if pRel > rel:
                 rel = pRel
         if rel <= minRel:
             continue
         if defs:
             outRel.append((word, rel, defi))
         else:
             outRel.append((word, rel))
     #sortby_inplace(outRel, 1, True)##???
     outRel.sort(key=1, reverse=True)
     n = len(outRel)
     if n > maxNum > 0:
         outRel = outRel[:maxNum]
         n = maxNum
     num = 0
     out = []
     if defs:
         for j in xrange(n):
             numP = num
             (w, num, m) = outRel[j]
             m = m.replace('\n', '\\n').replace('\t', '\\t')
             onePer = int(1.0/num)
             if onePer == 1.0:
                 out.append('%s\\n%s'%(w, m))
             elif opt['showRel'] == 'Percent':
                 out.append('%s(%%%d)\\n%s'%(w, 100*num, m))
             elif opt['showRel'] == 'Percent At First':
                 if num == numP:
                     out.append('%s\\n%s'%(w, m))
                 else:
                     out.append('%s(%%%d)\\n%s'%(w, 100*num , m))
             else:
                 out.append('%s\\n%s'%(w,m))
         return out
     for j in xrange(n):
         numP = num
         (w, num) = outRel[j]
         onePer = int(1.0/num)
         if onePer == 1.0:
             out.append(w)
         elif opt['showRel'] == 'Percent':
             out.append('%s(%%%d)'%(w, 100*num))
         elif opt['showRel'] == 'Percent At First':
             if num == numP:
                 out.append(w)
             else:
                 out.append('%s(%%%d)'%(w, 100*num))
         else:
             out.append(w)
     return out
示例#5
0
 def reverseDic_ext(self, wordsArg=None, opt={}):
   from _reverse_dic import search
   tabStr=self.writeTabfile(filename=None)
   defOpt={
   'matchWord':True,
   'showRel':'None',
   'background':False,
   'reportStep':300,
   'autoSaveStep':1000, ## set this to zero to disable auto saving.
   'savePath':'',
   'sep':commaFa}
   opt = addDefaultOptions(opt, defOpt)
   self.stoped = False
   ui=self.ui
   try:
     c = self.continueFrom
   except AttributeError:
     c = 0
   if c==-1:
     print('c=%s'%c)
     return
   elif c==0:
     ui.progress(0, 'Starting....')
   if wordsArg==None:
     words = self.takeOutputWords()
   elif isinstance(wordsArg, file):
     words = [ w[:-1] for w in wordsArg.readlines() ]
   elif isinstance(wordsArg, (list, tuple)):
     words = wordsArg[:]
   elif isinstance(wordsArg, basestring):
     fp = open(wordsArg)
     words = [ w[:-1] for w in fp.readlines() ]
     fp.close()
   else:
     raise TypeError, 'Argumant wordsArg to function reverseDic is not valid!'
   autoSaveStep = opt['autoSaveStep']
   if opt['savePath']=='':
     opt['savePath']=self.getInfo('name')+'.txt'
   savePath = opt['savePath']
   if c > 0:
     saveFile = open(savePath, 'ab')
   else:
     saveFile = open(savePath, 'wb')
     ui.progressStart()
   revG = Glossary(self.info[:])
   revG.setInfo('name', self.getInfo('name')+'_reversed')
   revG.setInfo('inputlang' , self.getInfo('outputlang'))
   revG.setInfo('outputlang', self.getInfo('inputlang'))
   wNum=len(words)
   #steps = opt['reportStep']
   #div = 0
   #mod = 0
   #total = int(wNum/steps)
   if c==0:
     print('Number of input words:', wNum)
     print('Reversing glossary...')
   else:
     print('continue reversing from index %d ...'%c)
   t0=time.time()
   if ui==None:
     print('passed ratio         time:  passed       remain          total       process')
   n = len(words)
   for i in xrange(c, n):
      word = words[i]
      rat = float(i+1)/n
      ui.progress(rat, '%d / %d words completed'%(i,n))
      if ui.reverseStop:
        saveFile.close()
        self.continueFrom = i
        self.stoped = True
        #thread.exit_thread()
        return
      if autoSaveStep>0 and i%autoSaveStep==0 and i>0:
        saveFile.close()
        saveFile = open(savePath, 'ab')
      result = search(tabStr,word,opt['minRel'],opt['maxNum'],opt['sep'],opt['matchWord'],opt['showRel'])
      if len(result)>0:
        new = ( word , result )
        if autoSaveStep>0:
          saveFile.write('%s\t%s\n'%new)
        else:
         revG.data.append(new) 
      if autoSaveStep>0 and i==n-1:
        saveFile.close()
   if autoSaveStep==0:
     revG.writeTabfile(opt['savePath'])
   ui.r_finished()
   ui.progressEnd()
   return True
示例#6
0
 def reverseDic_ext(self, wordsArg=None, opt={}):
     from _reverse_dic import search
     tabStr = self.writeTabfile(filename=None)
     defOpt = {
         'matchWord': True,
         'showRel': 'None',
         'background': False,
         'reportStep': 300,
         'autoSaveStep': 1000,  ## set this to zero to disable auto saving.
         'savePath': '',
         'sep': commaFa
     }
     opt = addDefaultOptions(opt, defOpt)
     self.stoped = False
     ui = self.ui
     try:
         c = self.continueFrom
     except AttributeError:
         c = 0
     if c == -1:
         print('c=%s' % c)
         return
     elif c == 0:
         ui.progress(0, 'Starting....')
     if wordsArg == None:
         words = self.takeOutputWords()
     elif isinstance(wordsArg, file):
         words = [w[:-1] for w in wordsArg.readlines()]
     elif isinstance(wordsArg, (list, tuple)):
         words = wordsArg[:]
     elif isinstance(wordsArg, basestring):
         fp = open(wordsArg)
         words = [w[:-1] for w in fp.readlines()]
         fp.close()
     else:
         raise TypeError, 'Argumant wordsArg to function reverseDic is not valid!'
     autoSaveStep = opt['autoSaveStep']
     if opt['savePath'] == '':
         opt['savePath'] = self.getInfo('name') + '.txt'
     savePath = opt['savePath']
     if c > 0:
         saveFile = open(savePath, 'ab')
     else:
         saveFile = open(savePath, 'wb')
         ui.progressStart()
     revG = Glossary(self.info[:])
     revG.setInfo('name', self.getInfo('name') + '_reversed')
     revG.setInfo('inputlang', self.getInfo('outputlang'))
     revG.setInfo('outputlang', self.getInfo('inputlang'))
     wNum = len(words)
     #steps = opt['reportStep']
     #div = 0
     #mod = 0
     #total = int(wNum/steps)
     if c == 0:
         print('Number of input words:', wNum)
         print('Reversing glossary...')
     else:
         print('continue reversing from index %d ...' % c)
     t0 = time.time()
     if ui == None:
         print(
             'passed ratio         time:  passed       remain          total       process'
         )
     n = len(words)
     for i in xrange(c, n):
         word = words[i]
         rat = float(i + 1) / n
         ui.progress(rat, '%d / %d words completed' % (i, n))
         if ui.reverseStop:
             saveFile.close()
             self.continueFrom = i
             self.stoped = True
             #thread.exit_thread()
             return
         if autoSaveStep > 0 and i % autoSaveStep == 0 and i > 0:
             saveFile.close()
             saveFile = open(savePath, 'ab')
         result = search(tabStr, word, opt['minRel'], opt['maxNum'],
                         opt['sep'], opt['matchWord'], opt['showRel'])
         if len(result) > 0:
             new = (word, result)
             if autoSaveStep > 0:
                 saveFile.write('%s\t%s\n' % new)
             else:
                 revG.data.append(new)
         if autoSaveStep > 0 and i == n - 1:
             saveFile.close()
     if autoSaveStep == 0:
         revG.writeTabfile(opt['savePath'])
     ui.r_finished()
     ui.progressEnd()
     return True