def exportUniqueSentences(project, mode="lasttree", pattern=False): """ exports one tree per sentences: the first time the sentence is found, the newest tree """ sql = SQL(project) db, cursor = sql.open() sentences = {} # toks -> tree outdir = os.path.join("..", "projects", project, "export") try: os.mkdir(outdir) except OSError: pass outfile = os.path.join(outdir, "allSentences.conll") if pattern: command = """select trees.rowid,userid,max(timestamp) from trees, sentences, texts where texts.rowid=sentences.textid and sentences.rowid=trees.sentenceid and textname like "{pattern}" group by sentenceid order by trees.rowid;""".format(pattern=pattern) else: command = """select trees.rowid,userid,max(timestamp) from trees, sentences, texts where texts.rowid=sentences.textid and sentences.rowid=trees.sentenceid group by sentenceid order by trees.rowid;""" for i, ( treeid, userid, timestamp, ) in enumerate(cursor.execute(command).fetchall()): tree = sql.gettree(treeid=treeid, indb=db, incursor=cursor)["tree"] toks = tuple(tree[i]["t"] for i in tree) print "___", i, "\r", if toks not in sentences: sentences[toks] = tree print "writing file with", len(sentences), "sentences..." conll.trees2conllFile([sentences[toks] for toks in sorted(sentences)], outfile=outfile, columns=10) return outfile
def extractConllFiles(project,outfolder): """ creates the empty files with a word per line as a first step to mate parsing """ if outfolder[-1]!="/": outfolder=outfolder+"/" texts={} sql = SQL(project) db, cursor = sql.open() command = """select distinct texts.textname, sentences.nr, features.nr, features.value from features, trees, texts, sentences, users where attr = "t" and trees.rowid=features.treeid --and sentences.textid=13 and trees.sentenceid=sentences.rowid and sentences.textid=texts.rowid and users.user="******";""" cursor.execute(command) a = cursor.fetchall() #print a for nr, (textname, snr, num, token) in enumerate(a): #sql.exportAnnotations(textid, textname, "lastconll") texts[textname]= texts.get(textname,[])+[(snr, num,token)] newfiles=[] for textname in texts: print "processing",textname f=codecs.open(outfolder+textname, "w", "utf-8") for c, (snr, num, tok) in enumerate(sorted(texts[textname])): if num == 1 and c > 0: f.write('\n') f.write("\t".join([str(num), tok]+["_"]*12)+'\n') print c+1, "tokens" f.close() newfiles+=[outfolder+textname] return newfiles
def directDatabaseChangeForForgottenCorrection(): from database import SQL sql = SQL("Platinum") db, cursor = sql.open() cursor.execute('update links set function="comp" where function="aff";') db.commit() db.close() print "changed"
def lastTreeForAllSamples(project, onlyHuman=True, combine=False): outdir = os.path.join("..", "projects", project, "export") try: os.mkdir(outdir) except OSError: pass sql = SQL(project) db, cursor = sql.open() if onlyHuman: parserid = 0 for pid, in cursor.execute( "select rowid from users where user='******';"): parserid = pid else: parserid = -1 sents = sorted( cursor.execute( "select texts.textname, sentences.rowid, sentences.nr from sentences, texts where texts.rowid=sentences.textid;" ).fetchall()) print "todo:", len(sents), "sentences" pbar = tqdm.tqdm(total=len(sents)) annotators = {} if combine: trees = [] getTreesForSents(sents, trees, annotators, parserid, cursor, db, sql, pbar, project=project) outfile = os.path.join(outdir, project + ".lastHumanTreeForAllSamples.conllu") conll.trees2conllFile(trees, outfile=outfile) print "wrote", outfile else: for tid, textname, nrtokens in list( cursor.execute("select rowid, * from texts;")): print tid, textname, nrtokens sents = list( cursor.execute( "select rowid, * from sentences where textid=?;", (tid, )).fetchall()) trees = [] getTreesForSents(sents, trees, annotators, parserid, cursor, db, sql, pbar) if textname.endswith(".conll_parse"): textname = textname[:len(".conll_parse")] outfile = os.path.join(outdir, textname + ".lastHumanTrees.conllu") conll.trees2conllFile(trees, outfile=outfile) print "wrote", outfile for a in annotators: print a, annotators[a]
def exportConllByAnnotators(project, annotators=["prof", "Sy", "parser"]): """ exports complete project for every sentence, trees of annotators in given order. if no tree: throw error """ outfiles = [] sql = SQL(project) db, cursor = sql.open() goodTexts = {} outdir = os.path.join("..", "projects", project, "export") try: os.mkdir(outdir) except OSError: pass annotatorIds = tuple(a for (a, ) in [ list( cursor.execute("select rowid from users where user =?;", ( annotator, )))[0] for annotator in annotators ]) #print annotators, annotatorIds for textid, textname, nrtokens in list( cursor.execute("select rowid, * from texts;")): # for each text print "doing", textname, "with", nrtokens, "tokens" nrutids = {} for nr, userid, treeid in list( cursor.execute( "select nr,userid,trees.rowid as treeid from trees, sentences where sentenceid=sentences.rowid and userid in {annotatorIds} and textid = ? order by nr;" .format(annotatorIds=annotatorIds), (textid, ))): nrutids[nr] = nrutids.get(nr, {}) nrutids[nr][userid] = treeid trees = [] for nr in sorted(nrutids): # for each sentence tree = None for aid in annotatorIds: # for each interesting annotator id if aid in nrutids[nr]: tree = sql.gettree(treeid=nrutids[nr][aid], indb=db, incursor=cursor)["tree"] trees += [tree] #print "atree:",tree break if not tree: print "problem: no tree for nr", nr, "type", type(nr) print "annotatorIds", annotatorIds raise Exception('no tree', nr) if textname.endswith(".conll"): textname = textname[:-len(".conll")] outfile = os.path.join(outdir, textname) conll.trees2conllFile(trees, outfile=outfile, columns=10) print len(trees), "trees" outfiles += [outfile] return outfiles
def exportConllByAnnotators(project, annotators=["prof", "Sy", "parser"], fileExtension=".conllu"): """ exports complete project for every sentence, trees of annotators in given order. if no tree: throw error """ outfiles = [] sql = SQL(project) db, cursor = sql.open() goodTexts = {} outdir = os.path.join("..", "projects", project, "export") try: os.mkdir(outdir) except OSError: pass try: annotatorIds = tuple(a for (a, ) in [ list( cursor.execute("select rowid from users where user =?;", ( annotator, )))[0] for annotator in annotators ]) except: print "some required annotator IDs are not in the database" return print annotators, annotatorIds for textid, textname, nrtokens in list( cursor.execute("select rowid, * from texts;")): # for each text print "doing", textname, "with", nrtokens, "tokens" nrutids = {} for nr, userid, treeid in list( cursor.execute( "select nr,userid,trees.rowid as treeid from trees, sentences where sentenceid=sentences.rowid and userid in {annotatorIds} and textid = ? order by nr;" .format(annotatorIds=annotatorIds if len(annotatorIds) > 1 else '(' + str(annotatorIds[0]) + ')'), (textid, ))): nrutids[nr] = nrutids.get(nr, {}) nrutids[nr][userid] = treeid trees = getSpecificTrees(sql, db, cursor, nrutids, annotatorIds) if trees: if textname.endswith(".conll"): textname = textname[:-len(".conll")] if textname.endswith(".conllu"): textname = textname[:-len(".conllu")] outfile = os.path.join(outdir, textname + fileExtension) conll.trees2conllFile(trees, outfile=outfile, columns=10) print len(trees), "trees" outfiles += [outfile] else: print "skipped", textname return outfiles
def printTree(project, treeid): sql = SQL(project) db, cursor = sql.open() dic = sql.gettree( None, None, treeid, indb=db, incursor=cursor) # dic -> get tree == on récupère l'arbre #print dic if dic and dic["tree"]: sentencetree = dic["tree"] #sentencetree=corrigerNumerotation(sentencetree) for i in sorted(sentencetree): print i, sentencetree[i]
def _re_init(self, confirm_name): if confirm_name != self.name: return False if not self.valied: return False sql = SQL().Delete('label').Where(data_set_id=self.id).sql mysql.run(sql) sql = SQL().Delete('image').Where(data_set_id=self.id).sql mysql.run(sql) sql = SQL().Delete('model').Where(data_set_id=self.id).sql mysql.run(sql) return True
def bulkcorrectDB(project, treeids=[]): """ bulk correction of a whole project! very slow! better to do directly in sql, for example: #change all functions: #update links set function='dep' where function='det'; """ sql = SQL(project) db,cursor=sql.open() if treeids: a,v=["rowid"],treeids else: a,v=[],[] allt=sql.getall(cursor, "trees",a,v) ti = time() for nr, (treeid,sid,uid,annotype,status,comment,timestamp) in enumerate(allt): print "_____ treeid",treeid,"nr",nr+1,"/",len(allt),"---",float(nr+1)/(time()-ti),"trees per second",float(len(allt)-nr+1)/(float(nr+1)/(time()-ti)),"seconds to go",float(len(allt)-nr+1)/(float(nr+1)/(time()-ti))/60,"minutes to go" dic=sql.gettree(None,None,treeid, indb=db,incursor=cursor) if dic: sentencetree=dic["tree"] #newdic, changed = complexbulkcorrectdic(sentencetree) #newdic, changed = simplebulkcorrectdic(sentencetree) #newdic, changed = correctfeatures(sentencetree) #newdic, changed = correctfeatures(sentencetree,4,{}) newdic, changed = correctfeatures(sentencetree,4,{ 'i1':{ u'person': u'3', u'number': u'sg', u'cat': u'V', u'lemma': u'\xeatre', u'token': u'est', u'tense': u'present', u'mode': u'indicative', 'gov': {'i0':'root'}, u't': u'A'}, 'i2':{u'cat': u'Cl', u'lemma': u'c', u'token': u'-ce', u't': u'B', 'gov': {'i1': u'sub'}, 'child':None} }) #break if changed: print "________________________________\n" #for i,node in newdic.iteritems(): #print i, node["t"], node #1/0 tokensChanged=True ws,sentence,_ = sql.enterTree(cursor, newdic, sid, uid,tokensChanged=tokensChanged) print sentence print "changed" db.commit() #db.commit() db.close()
def load(self, limit=None): if not self.valied: return -1 sql = SQL().Select('image').Where(data_set_id=self.id) sql.sql = sql.sql + "and label_id is not null " if limit is not None: sql = sql.Limit(limit) sql = sql.sql result = mysql.query(sql) if result is None: return -1 self.data = list(result) self.data_count = len(self.data) return len(self.data)
def _add_img(self, data_set_name, path, idx=None): result = mysql.find('data_set', name=data_set_name) if result: data_set_id = result[0]['id'] exist = mysql.find('label', data_set_id=data_set_id, idx=idx) if exist: sql = SQL().Insert('image').Values( path=path, data_set_id=data_set_id, label_id=idx).sql return mysql.run(sql) return False
def save_model(self, name, path, gpu_counts=None, is_half=None): if not self.valied: return False cols = { 'name': name, 'path': path, 'data_set_id': self.id, 'gpu_count': gpu_counts, 'is_half': is_half } sql = SQL().Insert('model').Values(**cols).sql return mysql.run(sql)
def _get_classes(self, data_set_id=None): id = self.id if data_set_id is None else data_set_id sql = SQL().Select('label', ['idx', 'value']).Where( data_set_id=id).sql result = mysql.query(sql) idx_to_class = {} class_to_idx = {} if result is not None: for label in result: idx_to_class[label['idx']] = label['value'] for (idx, cla) in idx_to_class.items(): class_to_idx[cla] = idx return class_to_idx, idx_to_class
def __getitem__(self, index): #path = os.path.join(self.root_path, self.data[index]['PATH']) path = self.root_path + self.data[index]['path'] img = self.__pil_loader(path) if self.id_to_idx.get(self.data[index]['label_id']) is None: sql = SQL().Select('label').Where(id=self.data[index]['label_id']) result = mysql.query(sql.sql) self.id_to_idx[self.data[index]['label_id']] = result[0]['idx'] target = self.id_to_idx[self.data[index]['label_id']] if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target
def _add_data_set_from_folder(self, name, root_path=None): result = mysql.find('data_set', name=name) if result is False: sql = SQL().Insert('data_set').Values(name=name, root_path=root_path).sql if not mysql.run(sql): return False if root_path is None: return False return self._add_data_set_from_folder(name, root_path) data_set_id = result[0]['id'] root = root_path if root_path is not None else result[0]['root_path'] sql = SQL().Update('data_set').Set( root_path=root).Where(id=data_set_id).sql mysql.run(sql) classes, _ = self.__find_classes(root) tmp = mysql.is_print_log mysql.is_print_log = False count, classes_count = self._add_labels(classes, name) class_to_idx, idx_to_class = self._get_classes(data_set_id=data_set_id) if tmp: print('>>add {} labels done, total({})'.format(count, classes_count)) count = 0 for label in classes: pre_path = os.path.join(root, label) files = self.__find_files(pre_path) for f in files: path = '/%s/%s' % (label, f) if self.__is_img_file(path): self._add_img(name, path, class_to_idx[label]) count += 1 if tmp and count % 1000 == 0: print('>>insert {} imgs'.format(count)) if tmp: print('>>insert {} imgs done'.format(count)) mysql.is_print_log = tmp return True
def get_model(self, name=None, id=None): if not self.valied: return False if id is not None: result = mysql.find( 'model', id=id, data_set_id=self.id, name=name) if result: return result[0] return None sql = SQL().Select('model').Where(data_set_id=self.id, name=name)\ .OrderBy(id='DESC').Limit(1).sql result = mysql.query(sql) if (result is not None) and (len(result) > 0): return result[0] return None
def bulkcorrectDB(project, treeids=[], commit=True): """ bulk correction of a whole project! very slow! better to do directly in sql, for example: #change all functions: #update links set function='dep' where function='det'; """ sql = SQL(project) db, cursor = sql.open() if treeids: a, v = ["rowid"], treeids else: a, v = [], [] allt = list(sql.getall(cursor, "trees", a, v)) print "nb trees:", len(allt) ti = time() for nr, (treeid, sid, uid, annotype, status, comment, timestamp) in enumerate(allt): dic = sql.gettree(None, None, treeid, indb=db, incursor=cursor) if dic: tree = dic["tree"] newtree, changed = correctLowerProperNouns(tree) if changed: print "________________________________\n" ws, sentence, _ = sql.enterTree(cursor, newtree, sid, uid, tokensChanged=True) print sentence print "changed", changed if not nr % 100: print "committing..." if commit: db.commit() if not nr % 100: print "_____ treeid", treeid, "nr", nr + 1, "/", len( allt), "---", int(float(nr + 1) / (time() - ti)), "trees per second", int( float(len(allt) - nr + 1) / (float(nr + 1) / (time() - ti))), "seconds (", round( float(len(allt) - nr + 1) / (float(nr + 1) / (time() - ti)) / 60, 1), "minutes) to go" if commit: db.commit() db.close()
def _add_labels(self, labels, data_set_name=None): if data_set_name is None: data_set_name = self.name result_ds = mysql.find('data_set', name=data_set_name) count = 0 if result_ds: data_set_id = result_ds[0]['id'] result_lb_list = mysql.find( 'label', data_set_id=data_set_id) label_value_list = [x['value'] for x in result_lb_list] if result_lb_list else [] start_idx = 0 if result_lb_list: start_idx = len(result_lb_list) for label in labels: if label not in label_value_list: sql = SQL().Insert('label').Values( data_set_id=data_set_id, value=label, idx=(start_idx+count)).sql if mysql.run(sql): count += 1 return count, start_idx + count
#!/usr/bin/python # -*- coding: utf-8 -*- import sys,cgi,cgitb,codecs from database import SQL #cgitb.enable() #project = form.getvalue('project',"").decode("utf-8") project="depexo" #form = cgi.FieldStorage() #uid = form.getvalue('uid',None) uid = 1 # admin #print 'Content-type: text/html\n\n' sql=SQL(project) #print project.encode('utf-8'),"uuuuuuuuuuuuuuuuuuuuu",uid tid=4 uids=[uid for uid, in sql.uidForText(tid)] print uids with codecs.open(project+str(tid)+".results.html","w","utf-8") as out: for i,uid in enumerate(uids): print "__________________",uid,i,float(i)/len(uids)*100,"%" out.write(sql.evaluateUser(uid, tid, consolidateLine=True)+"\n")
def getValidatedTrees(project, folder, whoseTrees="validator"): sql = SQL(project) db, cursor = sql.open() sentenceValidationInValidatedText(cursor, sql, db) #on récupère les nouveaux arbres b = databaseQuery(cursor, table=whoseTrees) print len(b), u"trees to extract" sids2all = {} trees = [] error_trees = [] textnames = {} for nr, (treeid, textname, user, snr, sid, uid, annotype, status, comment, timestamp) in enumerate(b): # TODO: remove: #if textname.startswith("mandarinParsed"):continue sids2all[sid] = sids2all.get( sid, []) + [(timestamp, textname, user, snr, treeid)] textnames[textname] = None #print len(sids2all) print u"trees extracted from the samples", ", ".join(sorted(textnames)) lastpourc = -1 for c, sid in enumerate(sids2all): pourc = int(float(c) / len(sids2all) * 100) if pourc != lastpourc: sys.stdout.write("{pourc}%\r".format(pourc=pourc)) sys.stdout.flush() snr, treeid2get = sorted(sids2all[sid])[0][-2:] #print treeid2get, type(treeid2get) #lknlk dic = sql.gettree(None, None, treeid2get, indb=db, incursor=cursor) # dic -> get tree #if treeid2get==9669: #print 9669,dic if dic: sentencetree = dic["tree"] sentencetree = corrigerNumerotation(sentencetree) trees.append(sentencetree) #print " ".join(node["t"] for i,node in sentencetree.iteritems()) if checkTree(sentencetree)[0] == False: if checkTree(sentencetree)[1] == "self": error_trees += [ "\t".join([ textname, str(snr), user, "node " + str(checkTree(sentencetree)[2]) + " points to itself" ]) ] else: error_trees += [ "\t".join([ textname, str(snr), user, "no gov at node " + str(checkTree(sentencetree)[2]) ]) ] trees.remove(sentencetree) #print "nr arbres",len(trees) lastpourc = pourc print len(error_trees), "arbre(s) avec erreurs." if len(error_trees) > 0: print "\t".join(["Texte", "num phrase", "correcteur", "cause"]) for x in sorted(list(set(error_trees))): print x f = codecs.open( folder + "logs/log_erreurs." + datetime.datetime.now().strftime('%Y-%m-%d') + ".tsv", "w", "utf-8") f.write("\t".join(["Texte", "num phrase", "correcteur", "cause"]) + '\n') for e in error_trees: f.write(e + '\n') f.close() print "Erreurs dans", f.name print len(trees), "arbres restants pour entrainement" #Creation d'un fichier log db.commit() db.close() return trees
def fusionForgottenTrees(project="Platinum", fusdir="../projects/OrfeoGold2016/platinum/*", annotators=["admin"]): """ takes trees from project ordered by annotators. if they exist fuse them into the fusdir result has the extension "cool.conll" ,"Sy","Marion" """ #print lemmacorrection sys.path.insert(0, '../tools') import difflib outfiles = [] sql = SQL(project) db, cursor = sql.open() goodTexts = {} outdir = os.path.join("..", "projects", project, "exportcool") try: os.mkdir(outdir) except OSError: pass for annotator in annotators: print[ list( cursor.execute("select rowid from users where user =?;", (annotator, ))) ] annotatorIds = tuple(a for (a, ) in [ list( cursor.execute("select rowid from users where user =?;", ( annotator, )))[0] for annotator in annotators ]) print annotators, annotatorIds for textid, textname, nrtokens in list( cursor.execute("select rowid, * from texts;")): # for each text print "\n__________________________doing", textname, "with", nrtokens, "tokens" nrutids = {} for nr, userid, treeid in list( cursor.execute( "select nr,userid,trees.rowid as treeid from trees, sentences where sentenceid=sentences.rowid and userid in {annotatorIds} and textid = ? order by nr;" .format(annotatorIds=annotatorIds), (textid, ))): nrutids[nr] = nrutids.get(nr, {}) nrutids[nr][userid] = treeid trees = {} for nr in sorted(nrutids): # for each sentence tree = None for aid in annotatorIds: # for each interesting annotator id if aid in nrutids[nr]: tree = sql.gettree(treeid=nrutids[nr][aid], indb=db, incursor=cursor)["tree"] trees[nr] = tree #print "atree:",tree break #if not tree: #print "problem: no tree for nr",nr,"type",type(nr) #print "annotatorIds",annotatorIds #raise Exception('no tree', nr) #print trees print len(trees), "trees from", project print textname, textname.split(".")[0] btextname = os.path.basename(textname).split(".")[0] if btextname.endswith("-one-word-per-line"): btextname = btextname[:-len("-one-word-per-line")] #print glob.glob(fusdir),[os.path.basename(fi).split(".")[0] for fi in glob.glob(fusdir)] cooltrees = [] ptrees, ftrees = 0, 0 for fi in glob.glob(fusdir): if btextname == os.path.basename(fi).split(".")[0]: print "yes", btextname fustrees = conll.conllFile2trees(fi) print len(fustrees), "ftrees", fi for nr, ftree in enumerate(fustrees): if nr + 1 in trees: #print "added tree",nr+1,"from database" #ptree=platinum(trees[nr+1]) ptree = trees[nr + 1] for iii in ptree: ptree[iii]["tag2"] = "_" if ptree[iii]["lemma"] in lemmacorrection: ptree[iii]["lemma"] = lemmacorrection[ ptree[iii]["lemma"]] cooltrees += [ptree] #print nr+1,"tree from",project#,tree ptrees += 1 if ftree.sentence() != u" ".join( [ptree[i].get("t", "") for i in sorted(ptree)]): print "\n_________", nr + 1 print ftree.sentence() print u" ".join( [ptree[i].get("t", "") for i in sorted(ptree)]) #for l in difflib.context_diff(ftree.sentence() ,u" ".join([ptree[i].get("t","") for i in sorted(ptree)])):print l #print "dbtree",platinum(trees[nr+1]) else: for iii in ftree: ftree[iii]["tag2"] = "_" if ftree[iii]["lemma"] in lemmacorrection: ftree[iii]["lemma"] = lemmacorrection[ ftree[iii]["lemma"]] #print nr+1,"tree from",fusdir#,tree ftrees += 1 cooltrees += [ftree] #print "added tree",nr+1,"from fustrees",fi outfile = os.path.join(outdir, textname + ".cool.conll") conll.trees2conllFile(cooltrees, outfile=outfile, columns=10) print "wrote", outfile print ptrees, "ptrees, ", ftrees, "ftrees" break if len(cooltrees) == 0: print "nothing for", btextname outfiles += [outfile] #qsdf return outfiles
def exportGoodTexts(project, lastHuman=False, onlyValidated=True, pattern=False): """ TODO : - ajouter parametre p/selectionner Texte ex : "UD_ZH_[number]" """ outfiles = [] sql = SQL(project) db, cursor = sql.open() goodTexts = {} if onlyValidated: onlyValidated = "and todos.status=1" else: onlyValidated = "" # take all texts where a validator has validated if pattern: command = "select distinct * from texts, todos, users where texts.rowid=todos.textid and users.rowid=todos.userid and texts.textname {pattern};".format( pattern=pattern) # like 'UD_ZH%' else: command = "select distinct * from texts, todos, users where texts.rowid=todos.textid and todos.type=1 {onlyValidated} and users.rowid=todos.userid;".format( onlyValidated=onlyValidated) for row in cursor.execute(command): textname, nrtokens, userid, textid, validator, status, comment, user, realname = row goodTexts[textid] = (textname, userid, user) print "i'll take", textname, "validated by", user, "with", nrtokens, "tokens" sentenceValidationInValidatedText(cursor, sql, db) outdir = os.path.join("..", "projects", project, "export") try: os.mkdir(outdir) except OSError: pass for textid, (textname, userid, user) in goodTexts.iteritems(): textname = textname.replace("-one-word-per-line.conll14_Parse", "") if lastHuman: outfile = os.path.join(outdir, textname + ".lastHuman.conll") else: outfile = os.path.join( outdir, "validated." + textname + "." + user + ".conll") print "doing", textname, textid trees = [] if lastHuman: snr2all = {} for row in cursor.execute( """ select sentences.nr as snr, trees.rowid as treeid, users.user, trees.timestamp from sentences, trees, users where sentences.textid=? and sentences.rowid=trees.sentenceid and users.rowid = trees.userid; """, (textid, )): snr, treeid, user, timestamp = row snr2all[snr] = snr2all.get(snr, []) + [(timestamp, user, treeid)] lastpourc = -1 for c, snr in enumerate(sorted(snr2all)): pourc = int(float(c) / len(snr2all) * 100) if pourc != lastpourc: print "___{pourc}%___\r".format(pourc=pourc), lastusersnotparser = sorted([ (timestamp, user, treeid) for (timestamp, user, treeid) in snr2all[snr] if user not in ["parser", "mate"] ]) if len(lastusersnotparser) > 0: time, u, tid = lastusersnotparser[-1] # last tree by human else: time, u, tid = sorted( snr2all[snr])[-1] # last tree by whoever #print "je prends l'arbre de",u trees += [ sql.gettree(treeid=treeid, indb=db, incursor=cursor)["tree"] ] else: for ( treeid, sentencenr, ) in cursor.execute( "select trees.rowid, sentences.nr from texts, trees, sentences where texts.rowid=? and trees.userid=? and trees.sentenceid = sentences.rowid and sentences.textid=texts.rowid order by sentences.nr;", ( textid, userid, )).fetchall(): #print "ooo",sentencenr,"\r", print "nr", sentencenr, "_____\r", trees += [ sql.gettree(treeid=treeid, indb=db, incursor=cursor)["tree"] ] print "exporting", len(trees), "trees into", outfile outfiles += [outfile] conll.trees2conllFile(trees, outfile, columns=10) return outfiles
def edituser(theform, userdir, thisscript, userconfig, action, newcookie, msg=None, success=None): """Display the screen to edit or delete users..""" config = ConfigObj(userdir + 'config.ini') templatedir = config['templatedir'] realadminlev = int(userconfig['admin']) adminpage = readfile(templatedir+adminpage_file) adminpage = adminpage.replace('**this script**', thisscript + '?action=' + action) adminpage = adminpage.replace('**admin menu**', thisscript+'?login=admin'+'&action='+action) userlist = [entry[:-4] for entry in os.listdir(userdir) if os.path.isfile(userdir+entry) and entry[:-4] not in RESERVEDNAMES ] mainadmin = config['adminuser'] username = userconfig['username'] if mainadmin in userlist: userlist.remove(mainadmin) if username in userlist: userlist.remove(username) userlist = cSort(userlist) start = int(theform.getfirst('start', '1')) length = len(userlist) if start*numonpage > length: start = length//numonpage + 1 url = '<a href="' + thisscript + '?start=%s&login=admin&admin=edituser&action=' + action + '">%s</a>' indexline = '<div style="text-align:center;">%s</div>' % makeindexline(url, start, length, numonpage) # need to be able to edit - # username, realname, new password, confirm password, adminlev, email, max-age, editable index = (start-1)*numonpage + 1 last = min(length+1, index+numonpage) usermenu = indexline + '<br>' + edit_table_s while index < last: # go through all the users thisuser = userlist[index-1] index += 1 thisuserc = ConfigObj(userdir+thisuser+'.ini') #print "Content-Type: text/html\n" # blank line: end of headers #print userdir,thisuser,thisuserc,"admin" in thisuserc, "admin" in thisuserc.keys() if thisuserc=={}: continue adminlev = thisuserc.get('admin',0) if realadminlev <= int(adminlev): continue loginname = thisuser realname = thisuserc['realname'] email = thisuserc['email'] maxage = thisuserc['max-age'] editable = '' if istrue(thisuserc['editable']): editable = 'checked' if theform.getfirst('username')==loginname and msg and not success: realname = theform.getfirst('realname', '') realname = theform.getfirst('realname', '') email = theform.getfirst('email', '') adminlev = theform.getfirst('adminlev', '') maxage = theform.getfirst('maxage', '') editable = theform.getfirst('editable', '') if editable: editable = 'checked' thevals = (thisscript, action, 'doedituser', start, loginname, loginname, realname, email, adminlev, maxage, editable, thisscript, action, 'deluser', start, loginname) usermenu += elem_h+str(index-1) + (account_table % thevals) + elem_f # kim's stuff!!!!! realname=realname.decode("utf-8") project=unicode(action.split("_")[-1].decode("utf-8")) sql=SQL(project) uid=sql.userid(thisuser, realname) #print "Content-Type: text/html\n" # blank line: end of headers #print "kim<br>", project,thisuser,uid # kim's stuff ends.... usermenu += table_e + '<br>' + indexline eduserpage = readfile(templatedir+adminuser_file) eduserpage = eduserpage.replace('**account table**', usermenu) if msg: adminpage = adminpage.replace('<br><!- message --> ', '<h2>%s</h2>' % msg) adminpage = adminpage.replace('**admin**', eduserpage) print newcookie print serverline print print adminpage sys.exit()
def _add_data_set(self, name, root_path=None): result = mysql.find('data_set', name=name) if result is False: sql = SQL().Insert('data_set').Values(name=name, root_path=root_path).sql return mysql.run(sql) return False
new_trees = list() for nr, tree in sortable: # adding metadatas 应该是重命名sent_id,从0开始 tree.sentencefeatures["text"] = tree.sentence() tree.sentencefeatures["sent_id"] = prefix + "_" + str(nr - 1) # removing useless metadata del tree.sentencefeatures["nr"] new_trees.append(tree) conll.trees2conllFile(new_trees, outfile) if __name__ == "__main__": ## Open project database sql = SQL("NaijaSUD") # 输入project名字 db, cursor = sql.open() ## Use 2 functions : # - exportLastBestAnnotations in lib/database.py -> writes a file with trees and their rank # - reorder in lib/yuchen.py -> reorder trees based on their rank, write a file with the output users, c = sql.exportLastBestAnnotations( 115, "P_ABJ_GWA_06_Ugo-lifestory_PRO" ) # 输入textid和text name,可通过链接https://arborator.ilpga.fr/editor.cgi?project=NaijaSUD&textid=74&opensentence=1看到textid print(users, c) fpath = "E:/TAL/Stage/arborator/projects/NaijaSUD/export/P_ABJ_GWA_06_Ugo.lifestory_PRO.most.recent.trees.with.feats.conllu" # 输入导出的文件所在路径 trees = conll.conllFile2trees(fpath) # 重新排序conll树,重命名sent_id reorder(trees, fpath + "_reordered")
from config import bot from database import SQL import random db = SQL('localhost', 'domaildo_usrbbd6', 'e5svFRqYtG8SdB^P=)', 'domaildo_boottrg06') def check_win(room): a = [] for i in db.list_players(room): a.append(int(i[0])) result = random.choice(a) return result
def bulkcorrectDB(project, treeids=[]): """ bulk correction of a whole project! very slow! better to do directly in sql, for example: #change all functions: #update links set function='dep' where function='det'; """ sql = SQL(project) db, cursor = sql.open() if treeids: a, v = ["rowid"], treeids else: a, v = [], [] allt = sql.getall(cursor, "trees", a, v) ti = time() for nr, (treeid, sid, uid, annotype, status, comment, timestamp) in enumerate(allt): print "_____ treeid", treeid, "nr", nr + 1, "/", len( allt ), "---", float(nr + 1) / ( time() - ti), "trees per second", float(len(allt) - nr + 1) / ( float(nr + 1) / (time() - ti)), "seconds to go", float(len(allt) - nr + 1) / ( float(nr + 1) / (time() - ti)) / 60, "minutes to go" dic = sql.gettree(None, None, treeid, indb=db, incursor=cursor) if dic: sentencetree = dic["tree"] #newdic, changed = complexbulkcorrectdic(sentencetree) #newdic, changed = simplebulkcorrectdic(sentencetree) #newdic, changed = correctfeatures(sentencetree) #newdic, changed = correctfeatures(sentencetree,4,{}) newdic, changed = correctfeatures( sentencetree, 4, { 'i1': { u'person': u'3', u'number': u'sg', u'cat': u'V', u'lemma': u'\xeatre', u'token': u'est', u'tense': u'present', u'mode': u'indicative', 'gov': { 'i0': 'root' }, u't': u'A' }, 'i2': { u'cat': u'Cl', u'lemma': u'c', u'token': u'-ce', u't': u'B', 'gov': { 'i1': u'sub' }, 'child': None } }) #break if changed: print "________________________________\n" #for i,node in newdic.iteritems(): #print i, node["t"], node #1/0 tokensChanged = True ws, sentence, _ = sql.enterTree(cursor, newdic, sid, uid, tokensChanged=tokensChanged) print sentence print "changed" db.commit() #db.commit() db.close()
from oauth2client import file, client, tools from oauth2client.file import Storage from googleapiclient.discovery import build import config import telebot from telebot.types import InputMediaPhoto, InputMediaVideo from telebot import types from database import SQL from users import Seeker logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) logger = logging.getLogger(__name__) db = SQL() SCOPES = 'https://www.googleapis.com/auth/drive' store = file.Storage('storage.json') creds = store.get() if not creds or creds.invalid: flow = client.flow_from_clientsecrets('client_secrets.json', SCOPES) creds = tools.run_flow(flow, store) drive_service = discovery.build('drive', 'v3', http=creds.authorize(Http()), cache_discovery=False) def download_photo(file_id): request = drive_service.files().get_media(fileId=file_id)
#!c:/Python27/python.exe # -*- coding: utf-8 -*- import sys, cgi, cgitb, codecs sys.path.insert(0, '../lib') from database import SQL #cgitb.enable() #project = form.getvalue('project',"").decode("utf-8") project = "linguistiqueCorpus-Exo" #form = cgi.FieldStorage() #uid = form.getvalue('uid',None) uid = 1 # admin #print 'Content-type: text/html\n\n' sql = SQL(project) #print project.encode('utf-8'),"uuuuuuuuuuuuuuuuuuuuu",uid tid = 13 uids = [uid for uid, in sql.uidForText(tid)] print uids with codecs.open(project + str(tid) + ".results.html", "w", "utf-8") as out: for i, uid in enumerate(uids): print "__________________", uid, i, float(i) / len(uids) * 100, "%" out.write(sql.evaluateUser(uid, tid, consolidateLine=True) + "\n")
import psycopg2 from database import SQL db = SQL() db.drop_tables() db.create_tables() db.close()
def edituser(theform, userdir, thisscript, userconfig, action, newcookie, msg=None, success=None): """Display the screen to edit or delete users..""" config = ConfigObj(userdir + 'config.ini') templatedir = config['templatedir'] realadminlev = int(userconfig['admin']) adminpage = readfile(templatedir+adminpage_file) adminpage = adminpage.replace('**this script**', thisscript + '?action=' + action) adminpage = adminpage.replace('**admin menu**', thisscript+'?login=admin'+'&action='+action) userlist = [entry[:-4] for entry in os.listdir(userdir) if os.path.isfile(userdir+entry) and entry[:-4] not in RESERVEDNAMES ] mainadmin = config['adminuser'] username = userconfig['username'] if mainadmin in userlist: userlist.remove(mainadmin) if username in userlist: userlist.remove(username) userlist = cSort(userlist) start = int(theform.getfirst('start', '1')) length = len(userlist) if start*numonpage > length: start = length//numonpage + 1 url = '<a href="' + thisscript + '?start=%s&login=admin&admin=edituser&action=' + action + '">%s</a>' indexline = '<div style="text-align:center;">%s</div>' % makeindexline(url, start, length, numonpage) # need to be able to edit - # username, realname, new password, confirm password, adminlev, email, max-age, editable index = (start-1)*numonpage + 1 last = min(length+1, index+numonpage) usermenu = indexline + '<br>' + edit_table_s while index < last: # go through all the users thisuser = userlist[index-1] index += 1 thisuserc = ConfigObj(userdir+thisuser+'.ini') #print "Content-Type: text/html\n" # blank line: end of headers #print userdir,thisuser,thisuserc adminlev = thisuserc['admin'] if realadminlev <= int(adminlev): continue loginname = thisuser realname = thisuserc['realname'] email = thisuserc['email'] maxage = thisuserc['max-age'] editable = '' if istrue(thisuserc['editable']): editable = 'checked' if theform.getfirst('username')==loginname and msg and not success: realname = theform.getfirst('realname', '') realname = theform.getfirst('realname', '') email = theform.getfirst('email', '') adminlev = theform.getfirst('adminlev', '') maxage = theform.getfirst('maxage', '') editable = theform.getfirst('editable', '') if editable: editable = 'checked' thevals = (thisscript, action, 'doedituser', start, loginname, loginname, realname, email, adminlev, maxage, editable, thisscript, action, 'deluser', start, loginname) usermenu += elem_h + (account_table % thevals) + elem_f # kim's stuff!!!!! realname=realname.decode("utf-8") project=unicode(action.split("_")[-1].decode("utf-8")) sql=SQL(project) uid=sql.userid(thisuser, realname) #print "Content-Type: text/html\n" # blank line: end of headers #print "kim<br>", project,thisuser,uid # kim's stuff ends.... usermenu += table_e + '<br>' + indexline eduserpage = readfile(templatedir+adminuser_file) eduserpage = eduserpage.replace('**account table**', usermenu) if msg: adminpage = adminpage.replace('<br><!- message --> ', '<h2>%s</h2>' % msg) adminpage = adminpage.replace('**admin**', eduserpage) print newcookie print serverline print print adminpage sys.exit()
from database import SQL, REDIS import json try: REDIS.ping() except: print("Failed to connect to redis! a connection is required...") # Get current position for the current day. this cron runs every day ONCE! users = SQL.query("SELECT Id FROM Users") for user in users: user_lb = SQL.query("SELECT * FROM Leaderboard WHERE OwnerId = %s LIMIT 1", (user['Id'])) if (len(user_lb) == 0): continue user_lb = user_lb[0] for mode in [0, 1, 2, 3]: key = f"sora:performance:{user['Id']}_{mode}" print(key) last = REDIS.get(key) if last == None: last = [] else: last = json.loads(last) if (len(last) >= 60): last.pop(0) position = 0
def test_instance(self): sql = SQL() self.assertIsInstance(sql, SQL)