import util import morphemes as m def pre( ed ): if not util.requireKnownDb(): return 'BAIL' bs = util.getBlacklist( ed ) return { 'bs':bs, 'kdb': m.loadDb( util.knownDbPath ), 'mp':m.mecab(None) } def per( st, f ): ms = m.getMorphemes( st['mp'], f[ 'Expression' ], bs=st['bs'] ) us = [] for x in ms: if not x in st['kdb']: us += [ x[0] ] f[ 'unknowns' ] = u','.join( us ) return st def post( st ): st['mp'].kill() util.addDoOnSelectionBtn( 'Set unknowns', 'unknowns set', 'Analyzing...', pre, per, post )
from PyQt4.QtCore import * from PyQt4.QtGui import * import morphemes as m import util def pre( ed ): if not util.requireKnownDb(): return 'BAIL' bs = util.getBlacklist( ed ) return { 'ed':ed, 'txt':'', 'bs':bs } def per( st, f ): st['txt'] += f['Expression'] return st def post( st ): import morphemes as m mp = m.mecab( None ) ms = m.getMorphemes( mp, st['txt'], bs=st['bs'] ) mp.kill() txt = m.ms2str( ms ).decode('utf-8') kdb = m.loadDb( util.knownDbPath ) newMs = [ x for x in ms if x not in kdb ] newTxt = m.ms2str( newMs ).decode('utf-8') txt = '-----All-----\n' + txt + '\n-----New-----\n' + newTxt QMessageBox.information( st['ed'], 'Morphemes', txt ) util.addDoOnSelectionBtn( 'View morphemes', 'View morphemes', 'Viewing...', pre, per, post, shortcut='Ctrl+V' )
from PyQt4.QtCore import * from PyQt4.QtGui import * import util import rankVocab as R import morphemes as M def pre( ed ): if not util.requireKnownDb(): return 'BAIL' kdb = M.loadDb( util.knownDbPath ) rdb = R.mkRankDb( kdb ) return { 'rdb':rdb, 'mp':M.mecab(None) } def per( st, f ): f['vocabRank'] = u'%d' % R.rankFact( st, f ) return st def post( st ): st['mp'].kill() util.addDoOnSelectionBtn( 'Set vocabRank', 'vocabRank set', 'Ranking...', pre, per, post )
A, B = [], [] for (a, b) in pairs: if a not in A: A.append(a) if b not in B: B.append(b) g = ML.Graph() g.mkMatch(pairs) infoMsg( '%d possible pairings for %d/%d morphemes to %d/%d facts, %s. Calculating now...' % (len(pairs), len(A), len(allM), len(B), len(allF), g.complexity()), p=st['ed']) return g.doMatch() def post(st): util.killMecab(st) allM = st['db'].keys() # morphemes to learn allF = uniqueFlatten(st['mfmap'].values()) # facts to learn from ps = getMatches(st, allM, allF) infoMsg('Successfully matched %d pairs.' % len(ps), p=st['ed']) for m, f in ps: f['matchedMorpheme'] = u'%s' % m.base infoMsg('Saved') util.addDoOnSelectionBtn('Morph match', 'Match morphs', 'Generating match db...', pre, per, post)
directory=util.knownDbPath) if not path: return 'BAIL' bs = util.getBlacklist(ed) db = M.MorphDb(path) return { 'mp': M.mecab(), 'db': db, 'tags': unicode(tags), 'bs': bs, 'ed': ed } def per(st, f): ms = M.getMorphemes(st['mp'], f['Expression'], bs=st['bs']) for m in ms: if m in st['db'].db: st['ed'].deck.addTags([f.id], st['tags']) return st return st def post(st): util.killMecab(st) st['ed'].deck.reset() util.addDoOnSelectionBtn('Mass tagging', 'Mass tagging', 'Tagging...', pre, per, post)
from PyQt4.QtCore import * from PyQt4.QtGui import * from anki.utils import addTags, canonifyTags import util from util import infoMsg, errorMsg import morphemes as M def pre( ed ): tags, ok = QInputDialog.getText( ed, 'Enter Tags', 'Tags', QLineEdit.Normal, 'myMorph' ) if not ok or not tags: return 'BAIL' path = QFileDialog.getOpenFileName( caption='Open db', directory=util.knownDbPath ) if not path: return 'BAIL' bs = util.getBlacklist( ed ) db = M.MorphDb( path ) return { 'mp':M.mecab(), 'db':db, 'tags':unicode(tags), 'bs':bs, 'ed':ed } def per( st, f ): ms = M.getMorphemes( st['mp'], f['Expression'], bs=st['bs'] ) for m in ms: if m in st['db'].db: st['ed'].deck.addTags( [f.id], st['tags'] ) return st return st def post( st ): util.killMecab( st ) st['ed'].deck.reset() util.addDoOnSelectionBtn( 'Mass tagging', 'Mass tagging', 'Tagging...', pre, per, post )
# get maximum cardinality morpheme<->fact matching def getMatches( st, allM, allF ): pairs = [] for m in allM: # for ea morpheme we want to learn if m not in st['mfmap']: continue for f in st['mfmap'][m]: # for ea fact we can learn said morpheme from pairs.append( (m,f) ) # add morpheme,fact pair to graph A, B = [], [] for (a,b) in pairs: if a not in A: A.append( a ) if b not in B: B.append( b ) g = ML.Graph() g.mkMatch( pairs ) infoMsg( '%d possible pairings for %d/%d morphemes to %d/%d facts, %s. Calculating now...' % ( len(pairs), len(A), len(allM), len(B), len(allF), g.complexity() ), p=st['ed'] ) return g.doMatch() def post( st ): util.killMecab( st ) allM = st['db'].keys() # morphemes to learn allF = uniqueFlatten( st['mfmap'].values() ) # facts to learn from ps = getMatches( st, allM, allF ) infoMsg( 'Successfully matched %d pairs.' % len(ps), p=st['ed'] ) for m,f in ps: f['matchedMorpheme'] = u'%s' % m.base infoMsg( 'Saved' ) util.addDoOnSelectionBtn( 'Morph match', 'Match morphs', 'Generating match db...', pre, per, post )
def getCards( deck, fids ): cis = deck.s.column0( 'select id from cards where factId in %s' % ids2str(fids) ) cs = [ deck.s.query(Card).get( id ) for id in cis ] return cs def pre( ed ): field, ok = QInputDialog.getText( ed, 'Enter name of field to extract from', 'Field Name', QLineEdit.Normal, 'Expression' ) if not ok: return 'BAIL' defPath = util.dbPath + 'mySelection.db' path = QFileDialog.getSaveFileName( caption='Save morpheme db to?', directory=defPath ) if not path: return 'BAIL' return { 'ed':ed, 'fieldName':field, 'filePath':str(path), 'db':M.MorphDb(), 'mp':M.mecab() } def per( st, f ): d, fname = st['ed'].deck, st['fieldName'] mats = [ c.interval for c in getCards( d, [f.id] ) ] ms = M.getMorphemes( st['mp'], f[ fname ] ) loc = M.AnkiDeck( f.id, fname, f[ fname ], d.path, d.name(), mats ) st['db'].addMsL( ms, loc ) return st def post( st ): util.killMecab( st ) st['db'].save( st['filePath'] ) wantMerge = QMessageBox.question( st['ed'], 'Query', 'Would you like to merge with known db?', QMessageBox.Yes | QMessageBox.No, QMessageBox.No ) if wantMerge == QMessageBox.Yes: M.MorphDb.mergeFiles( st['filePath'], util.knownDbPath, util.knownDbPath ) util.addDoOnSelectionBtn( 'Export Morphemes', 'Morpheme export', 'Exporting...', pre, per, post )
import util import morphemes as m def pre( ed ): if not util.requireKnownDb(): return 'BAIL' bs = util.getBlacklist( ed ) return { 'bs':bs, 'kdb': m.loadDb( util.knownDbPath ), 'mp':m.mecab(None) } def per( st, f ): ms = m.getMorphemes( st['mp'], f[ 'Expression' ], bs=st['bs'] ) N = 0 for x in ms: if not x in st['kdb']: N += 1 f[ 'iPlusN' ] = u'%d' % N return st def post( st ): st['mp'].kill() util.addDoOnSelectionBtn( 'Set iPlusN', 'iPlusN set', 'Analyzing...', pre, per, post )
from PyQt4.QtCore import * from PyQt4.QtGui import * import morphemes as M import util def pre( ed ): if not util.requireKnownDb(): return 'BAIL' bs = util.getBlacklist( ed ) return { 'ed':ed, 'txt':'', 'bs':bs, 'mp':M.mecab() } def per( st, f ): st['txt'] += f['Expression'] return st def post( st ): ms = M.getMorphemes( st['mp'], st['txt'], bs=st['bs'] ) util.killMecab( st ) txt = M.ms2str( ms ) kdb = M.MorphDb( util.knownDbPath ) newMs = [ m for m in ms if m not in kdb.db ] newTxt = M.ms2str( newMs ) txt = '-----All-----\n' + txt + '\n-----New-----\n' + newTxt QMessageBox.information( st['ed'], 'Morphemes', txt ) util.addDoOnSelectionBtn( 'View morphemes', 'View morphemes', 'Viewing...', pre, per, post, shortcut='Ctrl+V' )