예제 #1
0
import util
import morphemes as m

def pre( ed ):
   if not util.requireKnownDb(): return 'BAIL'
   bs = util.getBlacklist( ed )
   return { 'bs':bs, 'kdb': m.loadDb( util.knownDbPath ), 'mp':m.mecab(None) }

def per( st, f ):
   ms = m.getMorphemes( st['mp'], f[ 'Expression' ], bs=st['bs'] )
   us = []
   for x in ms:
      if not x in st['kdb']: us += [ x[0] ]
   f[ 'unknowns' ] = u','.join( us )
   return st

def post( st ):
   st['mp'].kill()

util.addDoOnSelectionBtn( 'Set unknowns', 'unknowns set', 'Analyzing...', pre, per, post )
예제 #2
0
from PyQt4.QtCore import *
from PyQt4.QtGui import *

import morphemes as m
import util

def pre( ed ):
   if not util.requireKnownDb(): return 'BAIL'
   bs = util.getBlacklist( ed )
   return { 'ed':ed, 'txt':'', 'bs':bs }

def per( st, f ):
   st['txt'] += f['Expression']
   return st

def post( st ):
   import morphemes as m
   mp = m.mecab( None )
   ms = m.getMorphemes( mp, st['txt'], bs=st['bs'] )
   mp.kill()
   txt = m.ms2str( ms ).decode('utf-8')

   kdb = m.loadDb( util.knownDbPath )
   newMs = [ x for x in ms if x not in kdb ]
   newTxt = m.ms2str( newMs ).decode('utf-8')

   txt = '-----All-----\n' + txt + '\n-----New-----\n' + newTxt
   QMessageBox.information( st['ed'], 'Morphemes', txt )

util.addDoOnSelectionBtn( 'View morphemes', 'View morphemes', 'Viewing...', pre, per, post, shortcut='Ctrl+V' )
예제 #3
0
from PyQt4.QtCore import *
from PyQt4.QtGui import *
import util
import rankVocab as R
import morphemes as M

def pre( ed ):
    if not util.requireKnownDb(): return 'BAIL'
    kdb = M.loadDb( util.knownDbPath )
    rdb = R.mkRankDb( kdb )
    return { 'rdb':rdb, 'mp':M.mecab(None) }

def per( st, f ):
    f['vocabRank'] = u'%d' % R.rankFact( st, f )
    return st

def post( st ):
    st['mp'].kill()

util.addDoOnSelectionBtn( 'Set vocabRank', 'vocabRank set', 'Ranking...', pre, per, post )
예제 #4
0
    A, B = [], []
    for (a, b) in pairs:
        if a not in A: A.append(a)
        if b not in B: B.append(b)

    g = ML.Graph()
    g.mkMatch(pairs)
    infoMsg(
        '%d possible pairings for %d/%d morphemes to %d/%d facts, %s. Calculating now...'
        % (len(pairs), len(A), len(allM), len(B), len(allF), g.complexity()),
        p=st['ed'])
    return g.doMatch()


def post(st):
    util.killMecab(st)

    allM = st['db'].keys()  # morphemes to learn
    allF = uniqueFlatten(st['mfmap'].values())  # facts to learn from
    ps = getMatches(st, allM, allF)

    infoMsg('Successfully matched %d pairs.' % len(ps), p=st['ed'])
    for m, f in ps:
        f['matchedMorpheme'] = u'%s' % m.base
    infoMsg('Saved')


util.addDoOnSelectionBtn('Morph match', 'Match morphs',
                         'Generating match db...', pre, per, post)
예제 #5
0
                                       directory=util.knownDbPath)
    if not path: return 'BAIL'
    bs = util.getBlacklist(ed)

    db = M.MorphDb(path)
    return {
        'mp': M.mecab(),
        'db': db,
        'tags': unicode(tags),
        'bs': bs,
        'ed': ed
    }


def per(st, f):
    ms = M.getMorphemes(st['mp'], f['Expression'], bs=st['bs'])
    for m in ms:
        if m in st['db'].db:
            st['ed'].deck.addTags([f.id], st['tags'])
            return st
    return st


def post(st):
    util.killMecab(st)
    st['ed'].deck.reset()


util.addDoOnSelectionBtn('Mass tagging', 'Mass tagging', 'Tagging...', pre,
                         per, post)
예제 #6
0
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from anki.utils import addTags, canonifyTags
import util
from util import infoMsg, errorMsg
import morphemes as M

def pre( ed ):
   tags, ok = QInputDialog.getText( ed, 'Enter Tags', 'Tags', QLineEdit.Normal, 'myMorph' )
   if not ok or not tags: return 'BAIL'
   path = QFileDialog.getOpenFileName( caption='Open db', directory=util.knownDbPath )
   if not path: return 'BAIL'
   bs = util.getBlacklist( ed )

   db = M.MorphDb( path )
   return { 'mp':M.mecab(), 'db':db, 'tags':unicode(tags), 'bs':bs, 'ed':ed }

def per( st, f ):
   ms = M.getMorphemes( st['mp'], f['Expression'], bs=st['bs'] )
   for m in ms:
      if m in st['db'].db:
         st['ed'].deck.addTags( [f.id], st['tags'] )
         return st
   return st

def post( st ):
   util.killMecab( st )
   st['ed'].deck.reset()

util.addDoOnSelectionBtn( 'Mass tagging', 'Mass tagging', 'Tagging...', pre, per, post )
예제 #7
0
# get maximum cardinality morpheme<->fact matching
def getMatches( st, allM, allF ):
   pairs = []
   for m in allM:             # for ea morpheme we want to learn
      if m not in st['mfmap']: continue
      for f in st['mfmap'][m]:   # for ea fact we can learn said morpheme from
         pairs.append( (m,f) )      # add morpheme,fact pair to graph

   A, B = [], []
   for (a,b) in pairs:
      if a not in A: A.append( a )
      if b not in B: B.append( b )

   g = ML.Graph()
   g.mkMatch( pairs )
   infoMsg( '%d possible pairings for %d/%d morphemes to %d/%d facts, %s. Calculating now...' % ( len(pairs), len(A), len(allM), len(B), len(allF), g.complexity() ), p=st['ed'] )
   return g.doMatch()

def post( st ):
   util.killMecab( st )

   allM = st['db'].keys()                       # morphemes to learn
   allF = uniqueFlatten( st['mfmap'].values() ) # facts to learn from
   ps = getMatches( st, allM, allF )

   infoMsg( 'Successfully matched %d pairs.' % len(ps), p=st['ed'] )
   for m,f in ps: f['matchedMorpheme'] = u'%s' % m.base
   infoMsg( 'Saved' )

util.addDoOnSelectionBtn( 'Morph match', 'Match morphs', 'Generating match db...', pre, per, post )
def getCards( deck, fids ):
    cis = deck.s.column0( 'select id from cards where factId in %s' % ids2str(fids) )
    cs = [ deck.s.query(Card).get( id ) for id in cis ]
    return cs

def pre( ed ):
    field, ok = QInputDialog.getText( ed, 'Enter name of field to extract from', 'Field Name', QLineEdit.Normal, 'Expression' )
    if not ok: return 'BAIL'
    defPath = util.dbPath + 'mySelection.db'
    path = QFileDialog.getSaveFileName( caption='Save morpheme db to?', directory=defPath )
    if not path: return 'BAIL'
    return { 'ed':ed, 'fieldName':field, 'filePath':str(path), 'db':M.MorphDb(), 'mp':M.mecab() }

def per( st, f ):
    d, fname = st['ed'].deck, st['fieldName']
    mats = [ c.interval for c in getCards( d, [f.id] ) ]
    ms = M.getMorphemes( st['mp'], f[ fname ] )
    loc = M.AnkiDeck( f.id, fname, f[ fname ], d.path, d.name(), mats )
    st['db'].addMsL( ms, loc )
    return st

def post( st ):
    util.killMecab( st )
    st['db'].save( st['filePath'] )
    wantMerge = QMessageBox.question( st['ed'], 'Query', 'Would you like to merge with known db?', QMessageBox.Yes | QMessageBox.No, QMessageBox.No )
    if wantMerge == QMessageBox.Yes:
        M.MorphDb.mergeFiles( st['filePath'], util.knownDbPath, util.knownDbPath )

util.addDoOnSelectionBtn( 'Export Morphemes', 'Morpheme export', 'Exporting...', pre, per, post )
예제 #9
0
import util
import morphemes as m

def pre( ed ):
   if not util.requireKnownDb(): return 'BAIL'
   bs = util.getBlacklist( ed )
   return { 'bs':bs, 'kdb': m.loadDb( util.knownDbPath ), 'mp':m.mecab(None) }

def per( st, f ):
   ms = m.getMorphemes( st['mp'], f[ 'Expression' ], bs=st['bs'] )
   N = 0
   for x in ms:
      if not x in st['kdb']: N += 1
   f[ 'iPlusN' ] = u'%d' % N
   return st

def post( st ):
   st['mp'].kill()

util.addDoOnSelectionBtn( 'Set iPlusN', 'iPlusN set', 'Analyzing...', pre, per, post )
예제 #10
0
from PyQt4.QtCore import *
from PyQt4.QtGui import *

import morphemes as M
import util

def pre( ed ):
   if not util.requireKnownDb(): return 'BAIL'
   bs = util.getBlacklist( ed )
   return { 'ed':ed, 'txt':'', 'bs':bs, 'mp':M.mecab() }

def per( st, f ):
   st['txt'] += f['Expression']
   return st

def post( st ):
   ms = M.getMorphemes( st['mp'], st['txt'], bs=st['bs'] )
   util.killMecab( st )
   txt = M.ms2str( ms )

   kdb = M.MorphDb( util.knownDbPath )
   newMs = [ m for m in ms if m not in kdb.db ]
   newTxt = M.ms2str( newMs )

   txt = '-----All-----\n' + txt + '\n-----New-----\n' + newTxt
   QMessageBox.information( st['ed'], 'Morphemes', txt )

util.addDoOnSelectionBtn( 'View morphemes', 'View morphemes', 'Viewing...', pre, per, post, shortcut='Ctrl+V' )