def exact_search(word, docs, flag, expand, page, per_page): db = Database() # db.cur.execute('SELECT tok.sent_id, tok.doc_id, sent.text FROM `annotator_token` tok, `annotator_sentence` sent WHERE tok.token="дом" and tok.sent_id=sent.id;') req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" ' if flag: req1 += 'AND doc_id IN ('+','.join(docs) + ');' docs_len = int(db.execute(req1)[0][0]) n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" ' if flag: n_req += 'AND doc_id IN ('+','.join(docs) + ');' sent_num = int(db.execute(n_req)[0][0]) req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" ' if flag: req2 += 'AND doc_id IN ('+','.join(docs) + ')' req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page) sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')' req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences tokens = db.execute(req3) # tokens = Token.objects.filter(token__exact=word) e = defaultdict(list) for i, j in tokens: e[i].append(j) jq = [] sent_list = [ShowSentence(i, e[i], expand) for i in e] for sent in sent_list: # sent.temp = bold(word, sent.tagged) # sent.save() jq.append(jquery.replace('***', str(sent.id))) return jq, sent_list, word, docs_len, sent_num
def search(parameter, value): db = Database() out = [] res = 0 if parameter == 'name': res = db.execute('''SELECT * FROM user_info JOIN friendship ON (user_name = %s OR f_name = %s)''', (value, value)) elif parameter == 'surname': res = db.execute('''SELECT * FROM user_info JOIN friendship ON (user_surname = %s OR f_surname = %s)''', (value, value)) elif parameter == 'city': res = db.execute('''SELECT * FROM user_info JOIN friendship ON (user_city = %s OR f_city = %s)''', (value, value)) elif parameter == 'age': res = db.execute('''SELECT * FROM user_info JOIN friendship ON (user_age = %s OR f_age = %s)''', (value, value)) for el in res: out.append([el[1], el[2], el[3], el[4]]) out.append([el[7], el[8], el[9], el[10]]) if parameter == 'age': out = [el for el in out if int(value) in el] else: out = [el for el in out if value in el] unique = [] for el in out: if el not in unique: unique.append(el) return unique
def download_file(request, doc_id, doc_type): db = Database() if doc_type == 'ann': req = "SELECT `username`, `data`, `tag`, `start`, `end` FROM `annotator_annotation` LEFT JOIN `auth_user` ON annotator_annotation.owner_id=auth_user.id WHERE `document_id` in (SELECT id FROM `annotator_sentence` WHERE `doc_id_id`=%s)" % doc_id text = u'Разметчик\tОшибка\tИсправление\tТэг\tНачало ошибки (номер слова от начала предложения)\tКонец ошибки (номер слова от начала предложения)\r\n' rows = db.execute(req) for row in rows: data = json.loads(row[1]) text += '\t'.join([ str(row[0]), data['quote'], data['corrs'], row[2], str(row[3]), str(row[4]) ]) + '\r\n' response = HttpResponse(text, content_type='text/csv; charset=utf-8') response[ 'Content-Disposition'] = 'attachment; filename="annotation_text_%s.csv"' % doc_id return response elif doc_type == u'text': req = "SELECT text FROM `annotator_sentence` WHERE `doc_id_id`=%s" % doc_id text = ' '.join( h.unescape(i[0]).encode('cp1251') for i in db.execute(req)) response = HttpResponse(text, content_type='text/plain') response['Content-Disposition'] = 'filename="text_%s.txt"' % doc_id return response else: req = "SELECT `token`,`num`, `sent_id` FROM `annotator_token` WHERE `doc_id`=%s" % doc_id rows = u'Номер предложения в базе данных\tСлово\tНомер слова в предложении\tТэги\tИсправление\tРазметчик\r\n' + u'\r\n'.join( u'\t'.join([str(row[2]), row[0], str(row[1]), '', '', '']) for row in db.execute(req)) response = HttpResponse(rows, content_type='text/csv') response[ 'Content-Disposition'] = 'attachment; filename="tokens_text_%s.txt"' % doc_id return response
def exact_search(word, docs, flag, expand, page, per_page): db = Database() word = word.split()[0] req1 = 'SELECT COUNT(DISTINCT doc_id) FROM `annotator_token` WHERE token="'+word + '" ' if flag: req1 += 'AND doc_id IN ('+','.join(docs) + ');' docs_len = int(db.execute(req1)[0][0]) n_req = 'SELECT COUNT(DISTINCT sent_id) FROM `annotator_token` WHERE token="'+ word +'" ' if flag: n_req += 'AND doc_id IN ('+','.join(docs) + ');' sent_num = int(db.execute(n_req)[0][0]) req2 = 'SELECT DISTINCT sent_id FROM `annotator_token` WHERE token="'+ word +'" ' if flag: req2 += 'AND doc_id IN ('+','.join(docs) + ')' req2 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page) sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req2)]) + ')' if sentences != '()': req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ word +'" AND sent_id IN ' + sentences tokens = db.execute(req3) else: tokens = [] # tokens = Token.objects.filter(token__exact=word) e = defaultdict(list) for i, j in tokens: e[i].append(j) jq = [] sent_list = [ShowSentence(i, e[i], expand) for i in sorted(e)] ShowSentence.empty() for sent in sent_list: jq.append(jquery.replace('***', str(sent.id))) return jq, sent_list, word, docs_len, sent_num
def get_subcorpus(query): req = 'SELECT id FROM `annotator_document` WHERE 1 ' # AND subcorpus NOT LIKE "hidden" if u'rulec' in query: req += 'AND subcorpus="RULEC" ' mode = query.get(u'mode').encode('utf-8') if mode != u'any': req += 'AND mode="'+ mode +'" ' background = query.get(u'background').encode('utf-8') if background != u'any': req += 'AND language_background="'+ background +'" ' gender = query.get(u'gender').encode('utf-8') if gender != u'any': req += 'AND gender="'+ gender +'" ' date1 = query.get(u'date1') if date1 != u'': req += 'AND date1>='+ date1.encode('utf-8') +' ' date2 = query.get(u'date2') if date2 != u'': req += 'AND date2<='+ date2.encode('utf-8') +' ' language = query.getlist(u'language[]') if language != []: one = [] for lang in language: one.append('native="'+ lang.encode('utf-8') +'"') if len(one) == 1: req += 'AND '+ one[0] else: req += 'AND (' + ' OR '.join(one) + ')' # with codecs.open('s.txt', 'w', encoding='utf-8') as f: # f.write(req) db = Database() docs = [str(i[0]) for i in db.execute(req)] subsum = db.execute('SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' +req + ')') flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True return docs, subsum[0][0], subsum[0][1], flag
def get_subcorpus(query): req = 'SELECT id FROM `annotator_document` WHERE 1 ' if u'checked' in query: req += 'AND checked=True ' if u'annotated' in query: req += 'AND annotated=True ' gender = query.get(u'gender').encode('utf-8') if gender != u'any': req += 'AND gender="'+ gender +'" ' date1 = query.get(u'date1') if date1 != u'': req += 'AND date1>='+ date1 +' ' date2 = query.get(u'date2') if date2 != u'': req += 'AND date2<='+ date2 +' ' genre = query.getlist(u'genre[]') if genre != []: req += make_small_query(genre, 'genre') major = query.getlist(u'major[]') if major != []: req += make_small_query(major, 'major') course = query.getlist(u'course[]') if course != []: req += make_small_query(course, 'course') db = Database() docs = [str(i[0]) for i in db.execute(req)] subsum = db.execute('SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' +req + ')') flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True return docs, subsum[0][0], subsum[0][1], flag
def insert_user_info(name, surname, city, user_age): db = Database() db.execute(''' INSERT INTO user_info (user_name, user_surname, user_city, user_age) VALUES (%s, %s, %s, %s) ''', (name, surname, city, user_age)) db.commit()
def get_subcorpus(query): req = 'SELECT id FROM `annotator_document` WHERE 1 ' # AND subcorpus NOT LIKE "hidden" if u'rulec' in query: req += 'AND subcorpus="RULEC" ' mode = query.get(u'mode').encode('utf-8') if mode != u'any': req += 'AND mode="' + mode + '" ' background = query.get(u'background').encode('utf-8') if background != u'any': req += 'AND language_background="' + background + '" ' gender = query.get(u'gender').encode('utf-8') if gender != u'any': req += 'AND gender="' + gender + '" ' date1 = query.get(u'date1') if date1 != u'': req += 'AND date1>=' + date1.encode('utf-8') + ' ' date2 = query.get(u'date2') if date2 != u'': req += 'AND date2<=' + date2.encode('utf-8') + ' ' language = query.getlist(u'language[]') if language != []: one = [] for lang in language: one.append('native="' + lang.encode('utf-8') + '"') if len(one) == 1: req += 'AND ' + one[0] else: req += 'AND (' + ' OR '.join(one) + ')' glevel = query.getlist(u'generallevel[]') if glevel != []: one = [] for l in glevel: one.append('general_level="' + l.encode('utf-8') + '"') if len(one) == 1: req += 'AND ' + one[0] else: req += 'AND (' + ' OR '.join(one) + ')' level = query.getlist(u'level[]') if level != []: one = [] for l in level: one.append('level="' + l.encode('utf-8') + '"') if len(one) == 1: req += 'AND ' + one[0] else: req += 'AND (' + ' OR '.join(one) + ')' # with codecs.open('/home/elmira/heritage_corpus/tempfiles/t.txt', 'a', 'utf-8') as f: # f.write(req) db = Database() docs = [str(i[0]) for i in db.execute(req)] num_docs = Document.objects.count() subsum = db.execute( 'SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' + req + ')') flag = False if num_docs == len(docs) else True return docs, subsum[0][0], subsum[0][1], flag
def exact_full_search(word, docs, flag, expand, page, per_page): db = Database() s = word words = word.split(' ') jq = [] a = {} for wn in range(len(words)): w = words[wn] req3 = 'SELECT sent_id, num FROM `annotator_token` WHERE token="'+ w +'" ' if flag: req3 += 'AND doc_id IN ('+','.join(docs) + ')' rows = db.execute(req3) e = defaultdict(list) if rows: for i, j in rows: e[i].append(j) if not a: a = SentBag(e, len(words)) else: fr, t = wn, wn a.update(e, fr, t) a = a.finalize(len(words)) sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)] ShowSentence.empty() sent_num = len(sent_list) d_num = len(set(i.doc_id for i in sent_list)) sent_list = sorted(sent_list, key=lambda i: i.id)[per_page*(page-1):per_page*page] for sent in sent_list: jq.append(jquery.replace('***', str(sent.id))) return jq, sent_list, s, d_num, sent_num
def complex_search(age, city, f_surname): db = Database() res = db.execute('''SELECT f_name, f_surname, f_age, f_city FROM friendship WHERE f_city = %s AND friend_1 IN (SELECT id FROM user_info WHERE user_surname = %s) HAVING f_age > %s''', (city, f_surname, age)) return res
def insert_friend_info(user_name, user_surname, user_city, user_age, name, surname, city, age): db = Database() res = db.execute(''' SELECT id FROM user_info WHERE user_name = %s AND user_surname = %s AND user_city = %s AND user_age = %s ''', (user_name, user_surname, user_city, user_age)) print(res) try: user_id = res[0][0] db.execute(''' INSERT INTO friendship (friend_1, f_name, f_surname, f_city, f_age) VALUES (%s, %s, %s, %s, %s) ''', (user_id, name, surname, city, age)) except IndexError: db.execute(''' INSERT INTO friendship (friend_name, f_surname, f_city, f_age) VALUES (%s, %s, %s, %s) ''', (name, surname, city, age)) db.commit()
def get_orig_sent(doc_id, num): db = Database() req = 'SELECT text FROM `annotator_originalsentence` ' \ 'WHERE doc_id_id={} AND num={}'.format(doc_id, num) # fw = open('log.txt', 'w') # fw.write(str(req)) # fw.close() orig_sent = db.execute(req)[0] return orig_sent[0]
def view_all(): out = [] db = Database() res = db.execute('SELECT * FROM user_info JOIN friendship', 0) for el in res: out.append([el[1], el[2], el[3], el[4]]) out.append([el[7], el[8], el[9], el[10]]) unique = [] for el in out: if el not in unique: unique.append(el) return unique
def get_subcorpus(query): req = 'SELECT id FROM `annotator_document` WHERE 1 ' # AND subcorpus NOT LIKE "hidden" # if u'rulec' in query: # req += 'AND subcorpus="RULEC" ' # mode = query.get(u'mode').encode('utf-8') # if mode != u'any': # req += 'AND mode="'+ mode +'" ' # background = query.get(u'background').encode('utf-8') # if background != u'any': # req += 'AND language_background="'+ background +'" ' # gender = query.get(u'gender').encode('utf-8') # if gender != u'any': # req += 'AND gender="'+ gender +'" ' # date1 = query.get(u'date1') # if date1 != u'': # req += 'AND date1>='+ date1.encode('utf-8') +' ' # date2 = query.get(u'date2') # if date2 != u'': # req += 'AND date2<='+ date2.encode('utf-8') +' ' # language = query.getlist(u'language[]') # if language != []: # one = [] # for lang in language: # one.append('native="'+ lang.encode('utf-8') +'"') # if len(one) == 1: # req += 'AND '+ one[0] # else: # req += 'AND (' + ' OR '.join(one) + ')' # with codecs.open('s.txt', 'w', encoding='utf-8') as f: # f.write(req) db = Database() docs = [str(i[0]) for i in db.execute(req)] subsum = db.execute( 'SELECT SUM(sentences), SUM(words) FROM `annotator_document` WHERE id IN (' + req + ')') flag = False if req == 'SELECT id FROM `annotator_document` WHERE 1 ' else True return docs, subsum[0][0], subsum[0][1], flag
def download_file(request, doc_id, doc_type): db = Database() if doc_type == 'ann': req = "SELECT `username`, `data`, `tag`, `start`, `end` FROM `annotator_annotation` LEFT JOIN `auth_user` ON annotator_annotation.owner_id=auth_user.id WHERE `document_id` in (SELECT id FROM `annotator_sentence` WHERE `doc_id_id`=%s)" %doc_id text = u'Разметчик\tОшибка\tИсправление\tТэг\tНачало ошибки (номер слова от начала предложения)\tКонец ошибки (номер слова от начала предложения)\r\n' rows = db.execute(req) for row in rows: data = json.loads(row[1]) text += '\t'.join([str(row[0]), data['quote'], data['corrs'], row[2], str(row[3]), str(row[4])]) + '\r\n' response = HttpResponse(text, content_type='text/csv; charset=utf-8') response['Content-Disposition'] = 'attachment; filename="annotation_text_%s.csv"' %doc_id return response elif doc_type == u'text': req = "SELECT text FROM `annotator_sentence` WHERE `doc_id_id`=%s" %doc_id text = ' '.join(h.unescape(i[0]).encode('cp1251') for i in db.execute(req)) response = HttpResponse(text, content_type='text/plain') response['Content-Disposition'] = 'filename="text_%s.txt"' %doc_id return response else: req = "SELECT `token`,`num`, `sent_id` FROM `annotator_token` WHERE `doc_id`=%s" %doc_id rows = u'Номер предложения в базе данных\tСлово\tНомер слова в предложении\tТэги\tИсправление\tРазметчик\r\n' + u'\r\n'.join(u'\t'.join([str(row[2]),row[0], str(row[1]), '', '', '']) for row in db.execute(req)) response = HttpResponse(rows, content_type='text/csv') response['Content-Disposition'] = 'attachment; filename="tokens_text_%s.txt"' %doc_id return response
def orig_exact_search(word, docs, flag, expand, page, per_page): db = Database() s = word words = word.split(' ') jq = [] a = {} for wn in range(len(words)): w = words[wn] req4 = 'SELECT doc_id_id, num, text FROM `annotator_originalsentence` WHERE text REGEXP "'+ w +'" ' if flag: req4 += 'AND doc_id_id IN ('+','.join(docs) + ')' rows = db.execute(req4) w = open('l.txt', 'a') w.write('\n') w.write(str(rows)) w.close() sent_list = {} if rows: for sent in rows: # req5 = 'SELECT text FROM `annotator_sentence` WHERE doc_id_id="' + str(sent[0]) + '"AND num="' + str(sent[1]) + '" ' # sents = db.execute(req5) # for s in sents: # sent_list[sent] = s[0].encode('utf-8') sent_list[sent] = ShowSentence1(sent[0], sent[1], expand) # print(sent[0], sent[1], sent_list[sent].text) ShowSentence.empty() # w = open('l.txt', 'a') # w.write('\n') # w.write(str(sent_list)) # w.close() # sent_list = [ShowSentence(i, a[i], expand) for i in sorted(a)] # ShowSentence.empty() sent_num = len(sent_list) d_num = len(set(sent[0] for sent in sent_list)) # sent_list = sorted(sent_list, key=lambda i: i[0])[per_page*(page-1):per_page*page] for sent in sent_list: jq.append(jquery.replace('***', str(sent[1]))) # w = open('l.txt', 'a') # w.write('\n') # w.write(str(a)) # w.close() return jq, sent_list, s, d_num, sent_num
def collect_data(arr): word, lex, gram, err, docs, flag = arr if all(i=="" for i in [word, lex, gram, err]): return [] if [word, lex, gram] == ["", "", ""] and err != '': req = '''SELECT DISTINCT document_id, start, end FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 ''' errs = [i for i in re.split(':?,|\\||\\(|\\)', err.lower()) if i != ''] for er in errs: req += 'AND tag REGEXP "[[:<:]]' + er + '[[:>:]]" ' if flag: req += 'AND doc_id_id IN ('+','.join(docs)+');' else: if err != '': req = '''SELECT DISTINCT sent_id, num FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 ''' errs = [i for i in re.split(':?,|\\||\\(|\\)', err.lower()) if i != ''] for er in errs: req += 'AND tag LIKE "%' + er + '%" ' req += 'AND num>= annotator_annotation.start AND num <= annotator_annotation.end ' else: req = '''SELECT DISTINCT sent_id, num FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' if word != '': req += 'AND lem="'+word+'" ' if lex != '': req += 'AND lex LIKE "%' + lex + '%" ' if gram != '': req += parse_gram(gram) if flag: req += 'AND doc_id IN ('+','.join(docs)+');' # f = codecs.open('s.txt', 'w') # f.write(req) # f.close() db = Database() rows = db.execute(req) return rows
def make_tables(): db = Database() db.execute("DROP TABLE IF EXISTS word_info;", 0) db.execute( """CREATE TABLE word_info (word_id INTEGER PRIMARY KEY AUTOINCREMENT, word TEXT, definition TEXT, updater TEXT); """, 0) db.execute("DROP TABLE IF EXISTS examples;", 0) db.execute( """CREATE TABLE examples (word_id INTEGER, example TEXT); """, 0) db.execute("DROP TABLE IF EXISTS updaters;", 0) db.execute( """CREATE TABLE updaters (author_id INTEGER PRIMARY KEY AUTOINCREMENT, city TEXT, date DATE); """, 0) db.commit()
def collect_full_data(arr): db = Database() word, lex, gram, err, docs, flag, page, per_page = arr err = err.strip() s = bincode(word, lex, gram, err) if s == '0000': return [], 0, 0 elif s == '0001': req_template = ''' FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE ''' req_template += parse_gram(err, 'tag') if flag: req_template += 'AND doc_id_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template req1 = 'SELECT DISTINCT document_id' + req_template req = 'SELECT DISTINCT document_id, start, end' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template elif s == '0010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 '''+ parse_gram(gram, 'gram') if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' req_template += parse_lex(lex) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0110': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0111': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1000': req = '''SELECT DISTINCT sent_id, num FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" ''' %word if flag: req += 'AND doc_id IN ('+','.join(docs)+')' elif s == '1001': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' %(word,parse_gram(err, 'tag')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1110': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template else: req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template rows = db.execute(req) # sent_num = int(db.execute(n_req)[0][0]) # d_num = int(db.execute(d_req)[0][0]) return rows, 0,0
def view_registered(): db = Database() res = db.execute('''SELECT * FROM user_info''', 0) return res
def collect_data(arr): db = Database() word, lex, gram, err, comment, docs, flag, page, per_page = arr err = err.strip() s = bincode(word, lex, gram, err) if s == '0000' or (flag and len(docs) == 0): return [], 0, 0 elif s == '0001': req_template = ''' FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 ''' req_template += parse_gram(err, 'tag') if flag: req_template += 'AND doc_id_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template req1 = 'SELECT DISTINCT document_id' + req_template req = 'SELECT DISTINCT document_id, start, end' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template elif s == '0010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 '''+ parse_gram(gram, 'gram') if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' % \ (parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' req_template += parse_lex(lex) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \ %(parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0110': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0111': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1000': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" ''' %word if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1001': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' \ %(word,parse_gram(err, 'tag')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(word,parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1110': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template else: req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' \ %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template req1 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page) # with codecs.open('/home/elmira/heritage_corpus/tempfiles/t.txt', 'a', 'utf-8') as f: # f.write(req1) sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req1)]) + ')' if sentences == '()': return [], 0, 0 if s == '0001': req += ' AND document_id IN ' + sentences else: req += ' AND sent_id IN ' + sentences rows = db.execute(req) sent_num = int(db.execute(n_req)[0][0]) d_num = int(db.execute(d_req)[0][0]) return rows, sent_num, d_num
def collect_full_data(arr): db = Database() word, lex, gram, err, docs, flag, page, per_page = arr err = err.strip() s = bincode(word, lex, gram, err) if s == '0000' or (flag and len(docs) == 0): return [], 0, 0 elif s == '0001': req_template = ''' FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 ''' req_template += parse_gram(err, 'tag') if flag: req_template += 'AND doc_id_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template req1 = 'SELECT DISTINCT document_id' + req_template req = 'SELECT DISTINCT document_id, start, end' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template elif s == '0010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 '''+ parse_gram(gram, 'gram') if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \ %(parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' req_template += parse_lex(lex) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' \ %(parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0110': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0111': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1000': req = '''SELECT DISTINCT sent_id, num FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" ''' %word if flag: req += 'AND doc_id IN ('+','.join(docs)+')' elif s == '1001': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' \ %(word,parse_gram(err, 'tag')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' \ %(word,parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1110': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template else: req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' \ %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template rows = db.execute(req) return rows, 0,0
def collect_data(arr): db = Database() word, lex, gram, err, docs, flag, page, per_page = arr err = err.strip() s = bincode(word, lex, gram, err) if s == '0000': return [] elif s == '0001': req_template = ''' FROM annotator_annotation LEFT JOIN annotator_sentence ON annotator_annotation.document_id = annotator_sentence.id WHERE 1 ''' req_template += parse_gram(err, 'tag') if flag: req_template += 'AND doc_id_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT document_id)''' + req_template req1 = 'SELECT DISTINCT document_id' + req_template req = 'SELECT DISTINCT document_id, start, end' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id_id)''' + req_template elif s == '0010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 '''+ parse_gram(gram, 'gram') if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 ''' req_template += parse_lex(lex) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s''' %(parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0110': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 %s %s''' %(parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '0111': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 %s AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1000': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" ''' %word if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1001': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s ''' %(word,parse_gram(err, 'tag')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1010': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1011': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1100': req_template = ''' FROM annotator_morphology LEFT JOIN annotator_token ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s''' %(word, parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1101': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex)) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template elif s == '1110': req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id WHERE 1 AND lem="%s" %s %s ''' %(word, parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template else: req_template = ''' FROM annotator_token LEFT JOIN annotator_morphology ON annotator_token.id = annotator_morphology.token_id LEFT JOIN annotator_annotation ON annotator_token.sent_id = annotator_annotation.document_id WHERE 1 AND lem="%s" AND num>= annotator_annotation.start AND num <= annotator_annotation.end %s %s %s''' %(word,parse_gram(err, 'tag'), parse_lex(lex), parse_gram(gram, 'gram')) if flag: req_template += 'AND doc_id IN ('+','.join(docs)+')' n_req = '''SELECT COUNT(DISTINCT sent_id)''' + req_template req = 'SELECT DISTINCT sent_id, num' + req_template req1 = 'SELECT DISTINCT sent_id' + req_template d_req = '''SELECT COUNT(DISTINCT doc_id)''' + req_template req1 += ' LIMIT %d,%d;' %((page - 1)*per_page, per_page) sentences = '(' + ', '.join([str(i[0]) for i in db.execute(req1)]) + ')' if sentences == '()': return [], 0, 0 if s == '0001': req += ' AND document_id IN ' + sentences else: req += ' AND sent_id IN ' + sentences f = codecs.open('/home/elmira/learner_corpus/tempfiles/s.txt', 'w') f.write(req + '\r\n' + n_req + '\r\n' + d_req) f.close() rows = db.execute(req) sent_num = int(db.execute(n_req)[0][0]) d_num = int(db.execute(d_req)[0][0]) return rows, sent_num, d_num