示例#1
0
文件: create.py 项目: zsmahi/ArArud
import sys
sys.path.insert(0,'../')

import db.litebase

if __name__ == '__main__':

	#you can create the dictionary using arramouz database (http://arramooz.sourceforge.net/)
	# ory ou can find it by downloading 
	#"http://sourceforge.net/projects/mishkal/files/mishkal2013-05-18.tar.bz2/download"
	# in ./lib/qalsadi/data/arabicdictionary.sqlite
	srcdb = litebase.liteBase('arabicdictionary.sqlite')
		
	dstdb = litebase.liteBase('words.sqlite')
	
	tab = litebase.liteTable()
	tab.beginTable("nouns")
	tab.addColumn('id', litebase.litePK_INT_INC(), u'', False)
	tab.addColumn('word', litebase.liteVARCHAR(20), u'DEFAULT NULL', True)
	tab.addColumn('pattern', litebase.liteVARCHAR(20), u'DEFAULT NULL', True)
	tab.endTable()
	print tab.getSqlQuery()
	
	dstdb.addTable(tab)
	src = srcdb.getTable('nouns')
	
	rows = src.getData()
	
	first = True
	for row in rows:
		if first:
示例#2
0
from db.litebase import liteBase, liteINTEGER, liteVARCHAR
from db.litebase import liteTable

if __name__ == '__main__':
		
	dstdb = liteBase('tatoeba.db')
	
	f = codecs.open("sentences.csv", "r", "utf-8")
	i = 0;
	for line in f:
		e = line.split("\t");
		if len(e) < 3:
			continue
		if e[1] in ("ara", "fra", "eng", "jpn"):
			if not dstdb.containsTable(e[1]):
				tab = liteTable()
				tab.beginTable(e[1])
				tab.addColumn('id', liteINTEGER(), u'', False)
				tab.addColumn('sent', liteVARCHAR(60), u'DEFAULT NULL', True)
				tab.endTable()
				dstdb.addTable(tab)
				#print tab.getSqlQuery()
			data = u'%s, "%s"' % (e[0],e[2])
			print data
			dst = dstdb.getTable(e[1])
			dst.insertData(data, u'id, sent')
			i = i + 1
			if (i>=1000):
				dstdb.commit() 
				i=0
	dstdb.commit() 
示例#3
0
from trans.buckwalter import Buckwalter
import pattern

if __name__ == '__main__':

    #
    '''theword = "AbotAE"
	theword_u = "AbtAE"
	print distance(theword, "AfotaAEa")
	print getTemplate(theword, theword_u)
	exit()'''
    srcdb = litebase.liteBase(os.path.realpath('../test/ardic.sqlite'))

    dstdb = litebase.liteBase(os.path.realpath('../test/words.sqlite'))

    tab = litebase.liteTable()
    tab.beginTable("words")
    tab.addColumn('id', litebase.litePK_INT_INC(), u'', False)
    tab.addColumn('word', litebase.liteVARCHAR(20), u'DEFAULT NULL', True)
    tab.addColumn('pattern', litebase.liteVARCHAR(20), u'DEFAULT NULL', True)
    tab.addColumn('vocalized', litebase.liteVARCHAR(20), u'DEFAULT NULL', True)
    tab.endTable()
    # print tab.getSqlQuery()

    dstdb.addTable(tab)

    src = srcdb.getTable('ardict')

    print src.getSqlQuery()

    rows = src.getData()
示例#4
0
# 
#  You should have received a copy of the GNU Affero General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.
#  

import sys
import os
import codecs
from db.litebase import liteBase, liteINTEGER, liteVARCHAR
from db.litebase import liteTable

if __name__ == '__main__':
	
	path = os.path.realpath("tatoeba.db")
	tatodb = liteBase(path)
	linksTable = liteTable()
	if not tatodb.containsTable("links"):
		linksTable.beginTable("links")
		linksTable.addColumn('jpnId', liteINTEGER(), u'', True)
		linksTable.addColumn('id', liteINTEGER(), u'', False)
		linksTable.endTable()
		tatodb.addTable(linksTable)
	else:
		linksTable = tatodb.getTable("links")
	
	f = codecs.open("links.csv", "r", "utf-8")
	lastskip = "0"

	jpnTable = tatodb.getTable('jpn')
	i = 0
	for line in f: