def bulkcorrectDB(project, treeids=[], commit=True):
    """
	bulk correction of a whole project! very slow!
	
	better to do directly in sql, for example:
	#change all functions:
		#update links set function='dep' where function='det';

	"""

    sql = SQL(project)
    db, cursor = sql.open()

    if treeids: a, v = ["rowid"], treeids
    else: a, v = [], []

    allt = list(sql.getall(cursor, "trees", a, v))
    print "nb trees:", len(allt)
    ti = time()

    for nr, (treeid, sid, uid, annotype, status, comment,
             timestamp) in enumerate(allt):

        dic = sql.gettree(None, None, treeid, indb=db, incursor=cursor)
        if dic:
            tree = dic["tree"]
            newtree, changed = correctLowerProperNouns(tree)

            if changed:
                print "________________________________\n"
                ws, sentence, _ = sql.enterTree(cursor,
                                                newtree,
                                                sid,
                                                uid,
                                                tokensChanged=True)
                print sentence
                print "changed", changed
                if not nr % 100:
                    print "committing..."
                    if commit: db.commit()
            if not nr % 100:
                print "_____ treeid", treeid, "nr", nr + 1, "/", len(
                    allt), "---", int(float(nr + 1) /
                                      (time() - ti)), "trees per second", int(
                                          float(len(allt) - nr + 1) /
                                          (float(nr + 1) /
                                           (time() - ti))), "seconds (", round(
                                               float(len(allt) - nr + 1) /
                                               (float(nr + 1) /
                                                (time() - ti)) / 60,
                                               1), "minutes) to go"
    if commit: db.commit()
    db.close()
Exemplo n.º 2
0
def bulkcorrectDB(project, treeids=[]):
	"""
	bulk correction of a whole project! very slow!
	
	better to do directly in sql, for example:
	#change all functions:
		#update links set function='dep' where function='det';

	"""
	
	sql = SQL(project)
	db,cursor=sql.open()
	
	if treeids:	a,v=["rowid"],treeids
	else:		a,v=[],[]
	
	allt=sql.getall(cursor, "trees",a,v)
	
	ti = time()
	
	for nr, (treeid,sid,uid,annotype,status,comment,timestamp) in enumerate(allt):
		
		print "_____ treeid",treeid,"nr",nr+1,"/",len(allt),"---",float(nr+1)/(time()-ti),"trees per second",float(len(allt)-nr+1)/(float(nr+1)/(time()-ti)),"seconds to go",float(len(allt)-nr+1)/(float(nr+1)/(time()-ti))/60,"minutes to go"
		
		dic=sql.gettree(None,None,treeid, indb=db,incursor=cursor)
		if dic:
			sentencetree=dic["tree"]
			
			#newdic, changed = complexbulkcorrectdic(sentencetree)
			#newdic, changed = simplebulkcorrectdic(sentencetree)
			#newdic, changed = correctfeatures(sentencetree)
			#newdic, changed = correctfeatures(sentencetree,4,{})
			newdic, changed = correctfeatures(sentencetree,4,{	
				'i1':{ u'person': u'3', u'number': u'sg', u'cat': u'V', u'lemma': u'\xeatre', u'token': u'est', u'tense': u'present', u'mode': u'indicative', 'gov': {'i0':'root'}, u't': u'A'},
				'i2':{u'cat': u'Cl', u'lemma': u'c', u'token': u'-ce', u't': u'B', 'gov': {'i1': u'sub'}, 'child':None}
			})
			
	
			
			#break
			if changed:
				print "________________________________\n"
				#for i,node in newdic.iteritems():
					#print i, node["t"], node
				#1/0
				tokensChanged=True
				ws,sentence,_ = sql.enterTree(cursor, newdic, sid, uid,tokensChanged=tokensChanged)
				print sentence
				print "changed"
				db.commit()
	#db.commit()
	db.close()
Exemplo n.º 3
0
def bulkcorrectDB(project, treeids=[]):
    """
	bulk correction of a whole project! very slow!
	
	better to do directly in sql, for example:
	#change all functions:
		#update links set function='dep' where function='det';

	"""

    sql = SQL(project)
    db, cursor = sql.open()

    if treeids: a, v = ["rowid"], treeids
    else: a, v = [], []

    allt = sql.getall(cursor, "trees", a, v)

    ti = time()

    for nr, (treeid, sid, uid, annotype, status, comment,
             timestamp) in enumerate(allt):

        print "_____ treeid", treeid, "nr", nr + 1, "/", len(
            allt
        ), "---", float(nr + 1) / (
            time() - ti), "trees per second", float(len(allt) - nr + 1) / (
                float(nr + 1) /
                (time() - ti)), "seconds to go", float(len(allt) - nr + 1) / (
                    float(nr + 1) / (time() - ti)) / 60, "minutes to go"

        dic = sql.gettree(None, None, treeid, indb=db, incursor=cursor)
        if dic:
            sentencetree = dic["tree"]

            #newdic, changed = complexbulkcorrectdic(sentencetree)
            #newdic, changed = simplebulkcorrectdic(sentencetree)
            #newdic, changed = correctfeatures(sentencetree)
            #newdic, changed = correctfeatures(sentencetree,4,{})
            newdic, changed = correctfeatures(
                sentencetree, 4, {
                    'i1': {
                        u'person': u'3',
                        u'number': u'sg',
                        u'cat': u'V',
                        u'lemma': u'\xeatre',
                        u'token': u'est',
                        u'tense': u'present',
                        u'mode': u'indicative',
                        'gov': {
                            'i0': 'root'
                        },
                        u't': u'A'
                    },
                    'i2': {
                        u'cat': u'Cl',
                        u'lemma': u'c',
                        u'token': u'-ce',
                        u't': u'B',
                        'gov': {
                            'i1': u'sub'
                        },
                        'child': None
                    }
                })

            #break
            if changed:
                print "________________________________\n"
                #for i,node in newdic.iteritems():
                #print i, node["t"], node
                #1/0
                tokensChanged = True
                ws, sentence, _ = sql.enterTree(cursor,
                                                newdic,
                                                sid,
                                                uid,
                                                tokensChanged=tokensChanged)
                print sentence
                print "changed"
                db.commit()
    #db.commit()
    db.close()