def reverseDic(self, wordsArg=None, opt={}): opt = addDefaultOptions(opt, { 'matchWord': True, 'showRel': 'None', 'includeDefs': False, 'background': False, 'reportStep': 300, 'autoSaveStep': 1000, ## set this to zero to disable auto saving. 'savePath': '', }) self.stoped = False ui = self.ui try: c = self.continueFrom except AttributeError: c = 0 savePath = opt['savePath'] if c == -1: log.debug('c=%s'%c) return elif c==0: saveFile = open(savePath, 'wb') ui.progressStart() ui.progress(0.0, 'Starting...') elif c>0: saveFile = open(savePath, 'ab') if wordsArg is None: words = self.takeOutputWords() elif isinstance(wordsArg, file): words = wordsArg.read().split('\n') elif isinstance(wordsArg, (list, tuple)): words = wordsArg[:] elif isinstance(wordsArg, basestring): words = open(wordsArg).read().split('\n') else: raise TypeError('Argumant wordsArg to function reverseDic is not valid!') autoSaveStep = opt['autoSaveStep'] if not opt['savePath']: opt['savePath'] = self.getInfo('name')+'.txt' revG = Glossary( info = self.info[:], ) revG.setInfo('name', self.getInfo('name')+'_reversed') revG.setInfo('inputlang', self.getInfo('outputlang')) revG.setInfo('outputlang', self.getInfo('inputlang')) wNum = len(words) #steps = opt['reportStep'] #div = 0 #mod = 0 #total = int(wNum/steps) ''' if c==0: log.info('Number of input words:', wNum) log.info('Reversing glossary...') else: log.info('continue reversing from index %d ...'%c) ''' t0 = time.time() if not ui: log.info('passed ratio\ttime:\tpassed\tremain\ttotal\tprocess') n = len(words) for i in xrange(c, n): word = words[i] rat = float(i+1)/n ui.progress(rat, '%d / %d words completed'%(i, n)) if ui.reverseStop: saveFile.close() ## if with KeyboardInterrupt it will be closed ?????????????? self.continueFrom = i self.stoped = True #thread.exit_thread() return else: self.i = i ''' if mod == steps: mod = 0 ; div += 1 t = time.time() dt = t-t0 tRem = (total-div)*dt/div ## (n-i)*dt/n rat = float(i)/n if ui: ############# FIXME #ui.progressbar.set_text( '%d/%d words completed (%%%2f) remaining %d seconds'%(i,n,rat*100,tRem) ) ui.progressbar.update(rat) while gtk.events_pending(): gtk.main_iteration_do(False) else: log.info('%4d / %4d\t%8s\t%8s\t%8s\t%s'%( div, total, timeHMS(dt), timeHMS(tRem), timeHMS(dt + tRem), sys.argv[0], )) else: mod += 1 ''' if autoSaveStep>0 and i%autoSaveStep==0 and i>0: saveFile.close() saveFile = open(savePath, 'ab') result = self.searchWordInDef(word, opt) if len(result)>0: try: if opt['includeDefs']: defi = '\\n\\n'.join(result) else: defi = ', '.join(result) + '.' except: open('result', 'wb').write(str(result)) log.exception('') return False if autoSaveStep>0: saveFile.write('%s\t%s\n'%(word, defi)) else: revG.data.append((word, defi)) if autoSaveStep>0 and i==n-1: saveFile.close() if autoSaveStep==0: revG.writeTabfile(opt['savePath']) ui.r_finished() ui.progressEnd() return True
def searchWordInDef(self, st, opt): #seachs word 'st' in meanings(definitions) of the glossary 'self' opt = addDefaultOptions(opt, { 'minRel': 0.0, 'maxNum': 100, 'sep': commaFa, 'matchWord': True, 'showRel': 'Percent', }) sep = opt['sep'] matchWord = opt['matchWord'] maxNum = opt['maxNum'] minRel = opt['minRel'] defs = opt['includeDefs'] outRel = [] for item in self.data: (word, defi) = item[:2] defiParts = defi.split(sep) if defi.find(st) == -1: continue rel = 0 ## relation value of word (as a float number between 0 and 1 for part in defiParts: for ch in sch: part = part.replace(ch, ' ') pRel = 0 # part relation if matchWord: pNum = 0 partWords = takeStrWords(part) pLen = len(partWords) if pLen==0: continue for pw in partWords: if pw == st: pNum += 1 pRel = float(pNum)/pLen ## part relation else: pLen = len(part.replace(' ', '')) if pLen==0: continue pNum = len(findAll(part, st))*len(st) pRel = float(pNum)/pLen ## part relation if pRel > rel: rel = pRel if rel <= minRel: continue if defs: outRel.append((word, rel, defi)) else: outRel.append((word, rel)) #sortby_inplace(outRel, 1, True)##??? outRel.sort(key=1, reverse=True) n = len(outRel) if n > maxNum > 0: outRel = outRel[:maxNum] n = maxNum num = 0 out = [] if defs: for j in xrange(n): numP = num (w, num, m) = outRel[j] m = m.replace('\n', '\\n').replace('\t', '\\t') onePer = int(1.0/num) if onePer == 1.0: out.append('%s\\n%s'%(w, m)) elif opt['showRel'] == 'Percent': out.append('%s(%%%d)\\n%s'%(w, 100*num, m)) elif opt['showRel'] == 'Percent At First': if num == numP: out.append('%s\\n%s'%(w, m)) else: out.append('%s(%%%d)\\n%s'%(w, 100*num, m)) else: out.append('%s\\n%s'%(w, m)) return out for j in xrange(n): numP = num (w, num) = outRel[j] onePer = int(1.0/num) if onePer == 1.0: out.append(w) elif opt['showRel'] == 'Percent': out.append('%s(%%%d)'%(w, 100*num)) elif opt['showRel'] == 'Percent At First': if num == numP: out.append(w) else: out.append('%s(%%%d)'%(w, 100*num)) else: out.append(w) return out
def reverseDic(self, wordsArg=None, opt={}): opt = addDefaultOptions(opt, { 'matchWord': True, 'showRel': 'None', 'includeDefs': False, 'background': False, 'reportStep': 300, 'autoSaveStep': 1000, ## set this to zero to disable auto saving. 'savePath': '', }) self.stoped = False ui = self.ui try: c = self.continueFrom except AttributeError: c = 0 savePath = opt['savePath'] if c == -1: print('c=%s'%c) return elif c==0: saveFile = open(savePath, 'wb') ui.progressStart() ui.progress(0.0, 'Starting...') elif c>0: saveFile = open(savePath, 'ab') if wordsArg is None: words = self.takeOutputWords() elif isinstance(wordsArg, file): words = wordsArg.read().split('\n') elif isinstance(wordsArg, (list, tuple)): words = wordsArg[:] elif isinstance(wordsArg, basestring): words = open(wordsArg).read().split('\n') else: raise TypeError('Argumant wordsArg to function reverseDic is not valid!') autoSaveStep = opt['autoSaveStep'] if not opt['savePath']: opt['savePath'] = self.getInfo('name')+'.txt' revG = Glossary(self.info[:]) revG.setInfo('name', self.getInfo('name')+'_reversed') revG.setInfo('inputlang' , self.getInfo('outputlang')) revG.setInfo('outputlang', self.getInfo('inputlang')) wNum = len(words) #steps = opt['reportStep'] #div = 0 #mod = 0 #total = int(wNum/steps) ''' if c==0: print('Number of input words:', wNum) print('Reversing glossary...') else: print('continue reversing from index %d ...'%c) ''' t0 = time.time() if not ui: print('passed ratio\ttime:\tpassed\tremain\ttotal\tprocess') n = len(words) for i in xrange(c, n): word = words[i] rat = float(i+1)/n ui.progress(rat, '%d / %d words completed'%(i,n)) if ui.reverseStop: saveFile.close() ## if with KeyboardInterrupt it will be closed ?????????????? self.continueFrom = i self.stoped = True #thread.exit_thread() return else: self.i = i ''' if mod == steps: mod = 0 ; div += 1 t = time.time() dt = t-t0 tRem = (total-div)*dt/div ## (n-i)*dt/n rat = float(i)/n if ui: ############# FIXME #ui.progressbar.set_text( '%d/%d words completed (%%%2f) remaining %d seconds'%(i,n,rat*100,tRem) ) ui.progressbar.update(rat) while gtk.events_pending(): gtk.main_iteration_do(False) else: print('%4d / %4d\t%8s\t%8s\t%8s\t%s'%( div, total, timeHMS(dt), timeHMS(tRem), timeHMS(dt + tRem), sys.argv[0], )) else: mod += 1 ''' if autoSaveStep>0 and i%autoSaveStep==0 and i>0: saveFile.close() saveFile = open(savePath, 'ab') result = self.searchWordInDef(word, opt) if len(result)>0: try: if opt['includeDefs']: defi = '\\n\\n'.join(result) else: defi = ', '.join(result) + '.' except: open('result', 'wb').write(str(result)) myRaise(__file__) return False if autoSaveStep>0: saveFile.write('%s\t%s\n'%(word, defi)) else: revG.data.append((word, defi)) if autoSaveStep>0 and i==n-1: saveFile.close() if autoSaveStep==0: revG.writeTabfile(opt['savePath']) ui.r_finished() ui.progressEnd() return True
def searchWordInDef(self, st, opt): #seachs word 'st' in meanings(definitions) of the glossary 'self' opt = addDefaultOptions(opt, { 'minRel': 0.0, 'maxNum': 100, 'sep': commaFa, 'matchWord': True, 'showRel': 'Percent', }) sep = opt['sep'] matchWord = opt['matchWord'] maxNum = opt['maxNum'] minRel = opt['minRel'] defs = opt['includeDefs'] outRel = [] for item in self.data: (word, defi) = item[:2] defiParts = defi.split(sep) if defi.find(st) == -1: continue rel = 0 ## relation value of word (as a float number between 0 and 1 for part in defiParts: for ch in sch: part = part.replace(ch, ' ') pRel = 0 # part relation if matchWord: pNum = 0 partWords = takeStrWords(part) pLen = len(partWords) if pLen==0: continue for pw in partWords: if pw == st: pNum += 1 pRel = float(pNum)/pLen ## part relation else: pLen = len(part.replace(' ', '')) if pLen==0: continue pNum = len(findAll(part, st))*len(st) pRel = float(pNum)/pLen ## part relation if pRel > rel: rel = pRel if rel <= minRel: continue if defs: outRel.append((word, rel, defi)) else: outRel.append((word, rel)) #sortby_inplace(outRel, 1, True)##??? outRel.sort(key=1, reverse=True) n = len(outRel) if n > maxNum > 0: outRel = outRel[:maxNum] n = maxNum num = 0 out = [] if defs: for j in xrange(n): numP = num (w, num, m) = outRel[j] m = m.replace('\n', '\\n').replace('\t', '\\t') onePer = int(1.0/num) if onePer == 1.0: out.append('%s\\n%s'%(w, m)) elif opt['showRel'] == 'Percent': out.append('%s(%%%d)\\n%s'%(w, 100*num, m)) elif opt['showRel'] == 'Percent At First': if num == numP: out.append('%s\\n%s'%(w, m)) else: out.append('%s(%%%d)\\n%s'%(w, 100*num , m)) else: out.append('%s\\n%s'%(w,m)) return out for j in xrange(n): numP = num (w, num) = outRel[j] onePer = int(1.0/num) if onePer == 1.0: out.append(w) elif opt['showRel'] == 'Percent': out.append('%s(%%%d)'%(w, 100*num)) elif opt['showRel'] == 'Percent At First': if num == numP: out.append(w) else: out.append('%s(%%%d)'%(w, 100*num)) else: out.append(w) return out
def reverseDic_ext(self, wordsArg=None, opt={}): from _reverse_dic import search tabStr=self.writeTabfile(filename=None) defOpt={ 'matchWord':True, 'showRel':'None', 'background':False, 'reportStep':300, 'autoSaveStep':1000, ## set this to zero to disable auto saving. 'savePath':'', 'sep':commaFa} opt = addDefaultOptions(opt, defOpt) self.stoped = False ui=self.ui try: c = self.continueFrom except AttributeError: c = 0 if c==-1: print('c=%s'%c) return elif c==0: ui.progress(0, 'Starting....') if wordsArg==None: words = self.takeOutputWords() elif isinstance(wordsArg, file): words = [ w[:-1] for w in wordsArg.readlines() ] elif isinstance(wordsArg, (list, tuple)): words = wordsArg[:] elif isinstance(wordsArg, basestring): fp = open(wordsArg) words = [ w[:-1] for w in fp.readlines() ] fp.close() else: raise TypeError, 'Argumant wordsArg to function reverseDic is not valid!' autoSaveStep = opt['autoSaveStep'] if opt['savePath']=='': opt['savePath']=self.getInfo('name')+'.txt' savePath = opt['savePath'] if c > 0: saveFile = open(savePath, 'ab') else: saveFile = open(savePath, 'wb') ui.progressStart() revG = Glossary(self.info[:]) revG.setInfo('name', self.getInfo('name')+'_reversed') revG.setInfo('inputlang' , self.getInfo('outputlang')) revG.setInfo('outputlang', self.getInfo('inputlang')) wNum=len(words) #steps = opt['reportStep'] #div = 0 #mod = 0 #total = int(wNum/steps) if c==0: print('Number of input words:', wNum) print('Reversing glossary...') else: print('continue reversing from index %d ...'%c) t0=time.time() if ui==None: print('passed ratio time: passed remain total process') n = len(words) for i in xrange(c, n): word = words[i] rat = float(i+1)/n ui.progress(rat, '%d / %d words completed'%(i,n)) if ui.reverseStop: saveFile.close() self.continueFrom = i self.stoped = True #thread.exit_thread() return if autoSaveStep>0 and i%autoSaveStep==0 and i>0: saveFile.close() saveFile = open(savePath, 'ab') result = search(tabStr,word,opt['minRel'],opt['maxNum'],opt['sep'],opt['matchWord'],opt['showRel']) if len(result)>0: new = ( word , result ) if autoSaveStep>0: saveFile.write('%s\t%s\n'%new) else: revG.data.append(new) if autoSaveStep>0 and i==n-1: saveFile.close() if autoSaveStep==0: revG.writeTabfile(opt['savePath']) ui.r_finished() ui.progressEnd() return True
def reverseDic_ext(self, wordsArg=None, opt={}): from _reverse_dic import search tabStr = self.writeTabfile(filename=None) defOpt = { 'matchWord': True, 'showRel': 'None', 'background': False, 'reportStep': 300, 'autoSaveStep': 1000, ## set this to zero to disable auto saving. 'savePath': '', 'sep': commaFa } opt = addDefaultOptions(opt, defOpt) self.stoped = False ui = self.ui try: c = self.continueFrom except AttributeError: c = 0 if c == -1: print('c=%s' % c) return elif c == 0: ui.progress(0, 'Starting....') if wordsArg == None: words = self.takeOutputWords() elif isinstance(wordsArg, file): words = [w[:-1] for w in wordsArg.readlines()] elif isinstance(wordsArg, (list, tuple)): words = wordsArg[:] elif isinstance(wordsArg, basestring): fp = open(wordsArg) words = [w[:-1] for w in fp.readlines()] fp.close() else: raise TypeError, 'Argumant wordsArg to function reverseDic is not valid!' autoSaveStep = opt['autoSaveStep'] if opt['savePath'] == '': opt['savePath'] = self.getInfo('name') + '.txt' savePath = opt['savePath'] if c > 0: saveFile = open(savePath, 'ab') else: saveFile = open(savePath, 'wb') ui.progressStart() revG = Glossary(self.info[:]) revG.setInfo('name', self.getInfo('name') + '_reversed') revG.setInfo('inputlang', self.getInfo('outputlang')) revG.setInfo('outputlang', self.getInfo('inputlang')) wNum = len(words) #steps = opt['reportStep'] #div = 0 #mod = 0 #total = int(wNum/steps) if c == 0: print('Number of input words:', wNum) print('Reversing glossary...') else: print('continue reversing from index %d ...' % c) t0 = time.time() if ui == None: print( 'passed ratio time: passed remain total process' ) n = len(words) for i in xrange(c, n): word = words[i] rat = float(i + 1) / n ui.progress(rat, '%d / %d words completed' % (i, n)) if ui.reverseStop: saveFile.close() self.continueFrom = i self.stoped = True #thread.exit_thread() return if autoSaveStep > 0 and i % autoSaveStep == 0 and i > 0: saveFile.close() saveFile = open(savePath, 'ab') result = search(tabStr, word, opt['minRel'], opt['maxNum'], opt['sep'], opt['matchWord'], opt['showRel']) if len(result) > 0: new = (word, result) if autoSaveStep > 0: saveFile.write('%s\t%s\n' % new) else: revG.data.append(new) if autoSaveStep > 0 and i == n - 1: saveFile.close() if autoSaveStep == 0: revG.writeTabfile(opt['savePath']) ui.r_finished() ui.progressEnd() return True