sents = [] sentids = [] for item in batchiter: doc_id = item[4] candidates = controls.pull_candidates(doc_id.split('_')[0]) idsql = 'SELECT sentence from esl_sentences where doc_id=%s;' cur2.execute(idsql, (doc_id,)) sents.append(cur2.fetchone()[0]) sentids.append(doc_id) if(len(candidates) > 0): i = 0 b = controls.best_control(sents, candidates, dfs, nbest=5) for bb in b: bbuni = controls.touni(bb[0]) newb = generrors.randerr(bb[0]) cid = controls.insert_into_db(hit_id, newb, cur2) if(not(cid == -1)): outfile.write(bbuni+'\t') sql="INSERT INTO esl_hits_data(hit_id,esl_sentence_id,language_id,sentence_num)VALUES(%s,%s,%s,%s);" cur2.execute(sql,(hit_id, cid, lang_id, i)) outfile.write('\n') conn.commit() else: check = 0 cachedsents = codecs.open('controls.log.bk', encoding='utf-8', mode='r') for hit in cachedsents.readlines(): guid=str(uuid.uuid4()) try: sql="SELECT add_hit(%s, %s, %s, %s, %s, %s, %s);" cur2.execute(sql,("", guid, hittype_id, lang_id, 0, 0, 0))
if(not(hit_id in sentcounts)): sentcounts.append(hit_id) logging.info("Batch added") sents = [] sentids = [] for item in batchiter: doc_id = item[4] candidates = controls.pull_candidates(doc_id.split('_')[0]) idsql = 'SELECT sentence from esl_sentences where doc_id=%s;' cur2.execute(idsql, (doc_id,)) sents.append(cur2.fetchone()[0]) sentids.append(doc_id) b = controls.best_control(sents, candidates, dfs) print b cid = controls.insert_into_db("CONTROL "+b, cur2) conn.commit() #outfile.write(b+'\n') for s in enumerate(sentids): print s i = s[0] if(i == qcnum): sql="INSERT INTO esl_hits_data (hit_id, esl_sentence_id, language_id, sentence_num) VALUES (%s,%s,%s,%s);" cur2.execute(sql,(hit_id, cid, lang_id, i)) else: idsql = 'SELECT id from esl_sentences where doc_id=%s;' cur2.execute(idsql, (s[1],)) eslid = cur2.fetchone()[0] sql="INSERT INTO esl_hits_data (hit_id, esl_sentence_id, language_id, sentence_num) VALUES (%s,%s,%s,%s);" cur2.execute(sql,(hit_id, eslid, lang_id, i))
sentnums.append(item[4]) #Insert 1 control sentence ctrlsql = 'SELECT sentence, doc_id from esl_sentences where qc = 1 and doc=%s' cur2.execute(ctrlsql, (doc,)) refs = cur2.fetchall() controlnum = random.randint(0, len(refs)-1) b = list(refs)[controlnum][0] sentnum = list(refs)[controlnum][1] timeout = 0 while(timeout < 20 and (sentnum in sentnums)): controlnum = random.randint(0, len(refs)-1) b = list(refs)[controlnum][0] sentnum = list(refs)[controlnum][1] timeout += 1 newb = generrors.randerr(b) cid = controls.insert_into_db(hit_id, newb, b, cur2, qcnum) if(cid == -1): print "Error inserting control sentence to DB" break; else: for i in range(0, settings['num_unknowns']+settings['num_knowns']): if(i == qcnum): sql="INSERT INTO esl_hits_data(hit_id,esl_sentence_id,language_id,sentence_num)VALUES(%s,%s,%s,%s);" cur2.execute(sql,(hit_id, cid, lang_id, n)) n += 1 else: if(len(sents) > 0): sent = sents[0] sid = sent[0] print sent sql="INSERT INTO esl_hits_data(hit_id,esl_sentence_id,language_id,sentence_num)VALUES(%s,%s,%s,%s);"