Пример #1
0
    def words_by_reading(self, char, reading, **kwargs):
        """Returns a list of Word objects composed of every word in the 
        solutions database that contains char with reading."""

        r_id = reading_query.get_id(char, reading)
        return self.words_by_reading_id(r_id, **kwargs)
Пример #2
0
def db_populate_kanji_readings():
    print "Filling database with kanji/reading data..."
    conn = get_connection()
    c = conn.cursor()
    
    kd_conn = sqlite3.connect(KANJIDIC_PATH)
    kd_conn.row_factory = sqlite3.Row
    kd_c = kd_conn.cursor()
    
    reading_l = []
    start = time.time()

    s = "SELECT * FROM reading WHERE r_type='ja_on' OR r_type='ja_kun'"
    readings = kd_c.execute(s).fetchall()

    bases_used = {}
    lastchar = None
    for r in readings:
        char = r['character_literal']
        if char != lastchar:
            bases_used.clear()
            
        reading = r['reading']
        if reading[-1] == u"-":
            reading = reading[:-1]
        elif reading[0] == u"-":
            reading = reading[1:]
            
        (base,s,oku) = reading.partition('.')
        
        if base not in bases_used:
            bases_used[base] = True
            reading_l.append([char, base, r['r_type']])
        reading_l.append([char, reading, r['r_type']])
        
        lastchar = char

    c.executemany('''INSERT INTO reading(character, reading, type)
                     VALUES (?,?,?)''', reading_l)
    conn.commit()
    
    conn = get_connection()
    c = conn.cursor()
    reading_l = []
    
    #Now add our own, non-kanjidic entries 
    f = codecs.open(OTHER_READINGS_PATH, encoding='utf-8')
    for line in f:
        line = line.strip('\n')
        (char, s, reading) = line.partition(",")
        
        #Search the existing entries for the same entry, which might be in
        #a different character set (so db constraint won't pick up on it).
        #We don't want to add any duplicates.
        id = reading_query.get_id(char, reading)
        if id is None:
            reading_l.append([char, reading, 'other'])

    c.executemany('''INSERT INTO reading(character, reading, type)
                     VALUES (?,?,?)''', reading_l)
    conn.commit()
    
    print 'Filling database with kanji/reading data took '\
            '%s seconds' % (time.time() - start)
Пример #3
0
        in_keb = '%%'
        if count > 0:
            for n in range(0, count):
                in_keb += '%s%%' % char
            s = 'select word, reading from word where word.word like ?' 
            
            results = self.conn.execute(s, [in_keb]).fetchall()
            for r in results:
                word_list.append(Word(self.conn, r['word'], r['reading']))
        return word_list

    def clear_words(self):
        """Remove all existing words from the database."""
    
        self.conn.execute('delete from word')

      
if __name__ == '__main__':
    #dbpath = 'dbs/test_query.sqlite'
    dbpath = 'dbs/jmdict_solutions.sqlite'
    #word_db.create_db(dbpath)
    q = WordQuery(dbpath)
#    words = q.contains_char(u'帰')
#    for w in words:
#        print w.word, w.reading
    
    id = reading_query.get_id(u'帰', u'かえ.る')
    words = q.words_by_reading_id(id)
    for w in words:
        print w.word, w.reading
    print 'found', len(words)