def pre_process(document_id: int) -> None: with get_cursor() as cursor: cursor.execute("SELECT uid FROM submission WHERE id = %s", (document_id, )) uid = cursor.fetchone()["uid"] # Input recognized file input_file = get_submission_file(uid, SubmissionStatus.NEW) recognized_file = get_submission_file(uid, SubmissionStatus.RECOGNIZED) # Run recognition num_tokens = recognize_file(input_file, recognized_file) with get_cursor() as cursor: cursor.execute( "UPDATE submission SET status = %s, num_tokens = %s WHERE id = %s", (SubmissionStatus.RECOGNIZED.value, num_tokens, document_id)) commit() ctl = controller.Controller(cursor, document_id) # Run pre-annotation detect_recognized_name_entries(recognized_file, ctl) # Re-annotate apply_rules(recognized_file, ctl) # Update document status cursor.execute("UPDATE submission SET status = %s WHERE id = %s", (SubmissionStatus.PRE_ANNOTATED.value, document_id)) commit()
def test_rule_db(client: FlaskClient) -> None: """ Test that 'rule' database exist and has access from Flask """ with app.app_context(): condition = ["test_word"] confidence = random.randint(10, 10000) # Test insert with get_cursor() as cursor: cursor.execute( "DELETE FROM rule WHERE type = %s and condition = %s", (RuleType.WORD_TYPE.value, condition)) cursor.execute( "INSERT INTO rule (type, condition, confidence) VALUES (%s, %s, %s)", (RuleType.WORD_TYPE.value, condition, confidence)) commit() # Test select with get_cursor() as cursor: cursor.execute( "SELECT confidence FROM rule WHERE type = %s and condition = %s", (RuleType.WORD_TYPE.value, condition)) data = cursor.fetchone() assert data is not None assert data["confidence"] == confidence cursor.execute( "DELETE FROM rule WHERE type = %s and condition = %s", (RuleType.WORD_TYPE.value, condition)) commit()
def window(): # Parse input params submission_id = request.args.get("doc_id", type=int) start = request.args.get("start", type=int) end = request.args.get("end", type=int) _check_permissinns(start, end, submission_id) # Don't generate text window each time (use cache instead) if request.if_none_match and f"{submission_id}-{start}-{end}" in request.if_none_match: return Response(status=304) # Return HTTP 304 (Not modified) else: # Find UID with get_cursor() as cursor: cursor.execute("SELECT uid FROM submission WHERE id = %s", (submission_id, )) submission_uid = cursor.fetchone()["uid"] filename = get_submission_file(submission_uid, SubmissionStatus.RECOGNIZED) # Transform line for UI output = StringIO() generator = XMLGenerator(output) filter = RecognizedTagFilter(start, end, make_parser()) filter.setContentHandler(generator) # Line has to be surrounded with XML tags sax.parse(filename, filter) filter.appendNeTypes() # Prepare response response = make_response(output.getvalue()) # Enable browser cache response.set_etag(f"{submission_id}-{start}-{end}") return response
def user_remove(user_id: int): with get_cursor() as cursor: cursor.execute("DELETE FROM account WHERE id = %s", (user_id, )) commit() # Return OK reply return jsonify({"stutus": "ok"})
def next(): if "permitted_doc_id" in session: # Window info doc_id = session["permitted_doc_id"] win_start = session["permitted_win_start"] win_end = session["permitted_win_end"] # Number of anntations from user with get_cursor() as cursor: cursor.execute( "SELECT count(*) as done FROM annotation" " WHERE author = %s and submission=%s and %s <= ref_start and ref_end <= %s", (g.account["id"], doc_id, win_start, win_end)) done = cursor.fetchone()["done"] # Number of annotation missing in window missing = session['permitted_missing'] if (missing < 3) or (done > 0): session.pop("permitted_doc_id") session.pop("permitted_win_start") session.pop("permitted_win_end") session.pop("permitted_missing") else: flash(_("You should annotate more to move to the next window."), category="error") # Show another window return redirect(url_for(".index"))
def label(): # Window detail doc_id = request.form.get("doc_id", type=int) ref_start = request.form.get("ref_start", type=int) ref_end = request.form.get("ref_end", type=int) types = json.loads(request.form.get("types", type=str)) label = request.form.get("label", type=int) as_rule = request.form.get("rule", type=bool) _check_permissinns(ref_start, ref_end, doc_id) interval = Interval(ref_start, ref_end) with get_cursor() as cursor: ctl = Controller(cursor, doc_id, g.account["id"]) # Token level annotation annotation_id = ctl.token_annotation(interval, AnnotationDecision.SECRET) # Update label ctl.set_label(interval, label) # Improved search for candidates candidate = ctl.add_candidate_rule(types, annotation_id) # Set rule label if as_rule: ctl.set_rule_label(types, label) commit() # Annotate rest of file using new candidate if candidate: _call_re_annotate() return jsonify({"status": "ok"})
def login(): """Log in a registered account by adding the account id to the session.""" form = LoginForm(request.form) if request.method == "POST": error = False if not form.validate(): flash(_("Form content is not valid."), category="error") else: with get_cursor() as cursor: cursor.execute("SELECT * FROM account WHERE email = %s", (form.email.data, )) account = cursor.fetchone() if account is None: flash(_("Incorrect e-mail."), category="error") error = True elif not check_password_hash(account["password"], form.password.data): flash(_("Incorrect password."), category="error") error = True if error is False: # store the account id in a new session and return to the index session.clear() session["account_id"] = account["id"] return redirect(url_for("account.index")) return render_template("auth/login.html", form=form)
def data(): # GET params search = request.args.get("search", type=str) with get_cursor() as cursor: if search: cursor.execute("SELECT count(*) FROM rule") not_filtered = cursor.fetchone()[0] cursor.execute( "SELECT * FROM rule WHERE array_to_string(condition, ' ') LIKE %s", (search, )) else: cursor.execute("SELECT * FROM rule") not_filtered = cursor.rowcount # Prepare data rows = [] for row in cursor: # Prepare condition string if row["type"] == RuleType.NE_TYPE.value: condition_str = "NE_TYPE:" + ' '.join(row["condition"]) else: condition_str = ' '.join(row["condition"]) # Prepare output rows.append({ "id": row["id"], "type": row["type"], "condition": condition_str, "decision": row["confidence"] }) # Return output return jsonify({ "total": cursor.rowcount, "totalNotFiltered": not_filtered, "rows": rows })
def remove(rule_id: int): with get_cursor() as cursor: # Remove already made annotation cursor.execute("DELETE FROM rule WHERE id = %s", (rule_id, )) commit() # Return OK reply return jsonify({"status": "ok"})
def re_annotate_all(skip_doc_id: int) -> None: with get_cursor() as cursor: # Get submission file cursor.execute("SELECT id,uid FROM submission WHERE status = %s", (SubmissionStatus.PRE_ANNOTATED.value, )) for row in cursor: id = row["id"] uid = row["uid"] if skip_doc_id == id: continue submission_file = get_submission_file(uid, SubmissionStatus.RECOGNIZED) with get_cursor() as cur: ctl = controller.Controller(cur, id) # Parse file and apply rules apply_rules(submission_file, ctl) commit()
def decision(): # Window detail doc_id = request.form.get("doc_id", type=int) ref_start = request.form.get("ref_start", type=int) ref_end = request.form.get("ref_end", type=int) _check_permissinns(ref_start, ref_end, doc_id) # Process decision if request.form["decision"] == "PUBLIC": decision = AnnotationDecision.PUBLIC else: decision = AnnotationDecision.SECRET # Rule type rule_type = None condition = None if request.form["kind"] == "NE_TYPE": rule_type = RuleType.NE_TYPE condition = [request.form["ne_type"]] elif request.form["kind"] == "WORD_TYPE": rule_type = RuleType.WORD_TYPE condition = json.loads(request.form["tokens"]) # Save result to db interval = Interval(ref_start, ref_end) rule_confidence = 1 if decision == AnnotationDecision.PUBLIC else -1 with get_cursor() as cursor: ctl = Controller(cursor, doc_id, g.account["id"]) # Decision connected with rules if rule_type: rule = ctl.set_rule(rule_type, condition, rule_confidence) # Token level annotation annotation_id = ctl.token_annotation(interval, decision) # Improved search for candidates candidate = None if rule_type != RuleType.NE_TYPE: if decision == AnnotationDecision.SECRET: candidate = ctl.add_candidate_rule( json.loads(request.form["tokens"]), annotation_id) else: ctl.drop_candidate_rule(annotation_id) # (Pre) connect rule to annotation if rule_type: ctl.connect(annotation_id, rule) # Commit changes to database commit() if rule_type or candidate: # Annotate rest using background task _call_re_annotate(doc_id) # Send OK reply return jsonify({"status": "ok"})
def new(): # Prepare requests label = request.form["label"] replacement = request.form["replacement"] with get_cursor() as cursor: # Update value cursor.execute("INSERT INTO label (name, replacement) VALUES (%s, %s)", (label, replacement)) commit() # Return OK reply return jsonify({"status": "ok"})
def load_logged_in_account(): """If a account id is stored in the session, load the account object from the database into ``g.account``.""" account_id = session.get("account_id") g.account = None if account_id: with get_cursor() as cursor: cursor.execute("SELECT * FROM account WHERE id = %s", (account_id, )) g.account = cursor.fetchone()
def upload(): form = UploadForm() if form.validate_on_submit(): if not form.file.data and not form.text.data: flash(_("File or text input required."), category="error") else: # Load input if form.file.data: csv_input = csv.DictReader( TextIOWrapper(form.file.data, "UTF8")) else: csv_input = csv.DictReader(form.text.data.splitlines()) # Check header format if any(field not in csv_input.fieldnames for field in ["type", "condition", "decision"]): flash(_("Some header columns are missing"), category="warning") return render_template("rule/import.html", form=form) if "author" in csv_input.fieldnames: flash(_("Rule authors were ignored in import"), category="warning") # Parse input line_num = 0 with get_cursor() as cursor: for row in csv_input: # Line numbering line_num += 1 # Try to import to db try: # Check row format if row["type"] is None or row[ "condition"] is None or row["decision"] is None: raise IndexError # Import data cursor.execute( "INSERT INTO rule (type, condition, confidence) VALUES(%s, %s, %s)" " ON CONFLICT (type, condition) DO UPDATE SET confidence = EXCLUDED.confidence", (row["type"], row["condition"].split('='), row["decision"])) except (IndexError, DataError): flash(_("Illegal format on line %(line_num)s.", line_num=line_num), category="error") return render_template("rule/import.html", form=form) commit() flash(_("%(num)s rules imported", num=csv_input.line_num), category="message") _call_re_annotate() return redirect(url_for(".index")) # Prepare output return render_template("rule/import.html", form=form)
def re_annotate(doc_id: int) -> None: with get_cursor() as cursor: # Get submission file cursor.execute("SELECT uid FROM submission WHERE id = %s", (doc_id, )) uid = cursor.fetchone()["uid"] submission_file = get_submission_file(uid, SubmissionStatus.RECOGNIZED) ctl = controller.Controller(cursor, doc_id) # Parse file and apply rules apply_rules(submission_file, ctl) commit()
def update(): # Prepare requests row_id = int(request.form["pk"]) new_value = request.form["value"] with get_cursor() as cursor: # Update value if request.form["name"] == "label": cursor.execute("UPDATE label SET name = %s WHERE id = %s", (new_value, row_id)) else: cursor.execute("UPDATE label SET replacement = %s WHERE id = %s", (new_value, row_id)) commit() # Return OK reply return jsonify({"status": "ok"})
def export(): si = StringIO() cw = csv.DictWriter(si, fieldnames=["label", "replacement"]) cw.writeheader() # Prepare data with get_cursor() as cursor: cursor.execute("SELECT * FROM label") for row in cursor: cw.writerow({"label": row["name"], "replacement": row["replacement"]}) # Prepare output output = make_response(si.getvalue()) output.headers["Content-Disposition"] = "attachment; filename=export.csv" output.headers["Content-type"] = "text/csv" return output
def delete_account(): delete_form = DeleteAccountForm(request.form) if delete_form.validate_on_submit(): if check_password_hash(g.account["password"], delete_form.password.data): with get_cursor() as cursor: cursor.execute("DELETE FROM account WHERE id = %s", (g.account["id"], )) commit() session.clear() # Show confirmation UI flash(_("Account was deleted."), category="message") return redirect(url_for("index")) else: delete_form.password.errors.append(_("Incorrect password.")) return render_template("account/delete.html", form=delete_form)
def users(): # Prepare data with get_cursor() as cursor: cursor.execute( "SELECT id, full_name, email, type, window_size FROM account") # Prepare data rows = [] for row in cursor: rows.append({ "id": row["id"], "name": f"{row['full_name']} ({row['email']})", "type": row["type"], "window_size": row["window_size"] }) # Return output return jsonify({ "total": cursor.rowcount, "totalNotFiltered": cursor.rowcount, "rows": rows })
def output(): # Parse input params submission_uid = request.args.get("doc_uid", type=str) min_confidence = current_app.config["RULE_AUTOAPPLY_CONFIDENCE"] # Find data in db with get_cursor() as cursor: # Find ID and document name cursor.execute("SELECT id, name FROM submission WHERE uid = %s", (submission_uid, )) data = cursor.fetchone() submission_name = data["name"] submission_id = data["id"] # Find decisions ctl = Controller(cursor, submission_id, g.account["id"]) decisions = ctl.get_decisions(None, min_confidence, True) # Filter secret decisions decisions = [ dec for dec in decisions if dec["decision"] == AnnotationDecision.SECRET.value ] # Transform output output = StringIO() parser = xml.sax.make_parser() # nosec - parse only internal XML parser.setFeature(xml.sax.handler.feature_namespaces, 0) handler = OutputTagFilter(decisions, output) parser.setContentHandler(handler) filename = get_submission_file(submission_uid, SubmissionStatus.RECOGNIZED) parser.parse(filename) # Prepare response response = make_response(output.getvalue()) response.headers[ "Content-Disposition"] = f"attachment; filename={submission_name}.out.txt" response.headers["Content-type"] = "text/plain" return response
def detail(): # Parse input params submission_id = request.args.get("doc_id", type=int) start = request.args.get("start", type=int) end = request.args.get("end", type=int) _check_permissinns(start, end, submission_id) # Returns decision in defined interval with get_cursor() as cursor: # Annotation info cursor.execute( "SELECT a.id, token_level, account.full_name AS annotation_author" " FROM annotation a" " LEFT JOIN account ON a.author = account.id" " WHERE submission = %s and ref_start = %s and ref_end = %s", (submission_id, start, end)) row = cursor.fetchone() annotation_id = row["id"] response = { "token_author": row["annotation_author"], "token_level": row["token_level"] } # Rules info rules = [] cursor.execute( "SELECT r.type, condition, confidence, account.full_name AS rule_author" " FROM rule r" " JOIN annotation_rule ar ON r.id = ar.rule AND ar.annotation = %s" " LEFT JOIN account ON r.author=account.id" " ORDER BY confidence ASC", (annotation_id, )) for row in cursor: rules.append({ "type": row["type"], "condition": row["condition"], "confidence": row["confidence"], "author": row["rule_author"] }) response["rules"] = rules return jsonify(response)
def data(): # GET params search = request.args.get("search", type=str) with get_cursor() as cursor: if search: cursor.execute("SELECT count(*) FROM label") not_filtered = cursor.fetchone()[0] cursor.execute("SELECT * FROM label WHERE name LIKE %s OR replacement LIKE %s", (search, search)) else: cursor.execute("SELECT * FROM label") not_filtered = cursor.rowcount # Prepare data rows = [] for row in cursor: # Prepare output data = {"id": row["id"], "label": row["name"]} if g.account["type"] == AccountType.ADMIN.value: data["replacement"] = row["replacement"] rows.append(data) # Return output return jsonify({"total": cursor.rowcount, "totalNotFiltered": not_filtered, "rows": rows})
def change_password(): # Access checks token = request.args.get("token") if token: # Token is used for password resetting using email try: account_id = read_reset_token(token) except BadTimeSignature: flash(_("Link has expired. Create a new one."), category="error") return redirect(url_for("auth.reset")) except BadSignature: raise BadRequest("Invalid token") elif g.account is None: # Without token the user has to be logged in return redirect(url_for("auth.login")) else: account_id = g.account["id"] # Web page form = ChangePasswordForm(request.form) # Validate form if form.validate_on_submit(): if token or check_password_hash(g.account["password"], form.old_password.data): # Update db with get_cursor() as cursor: cursor.execute( "UPDATE account SET password = %s WHERE id = %s", (generate_password_hash( form.new_password.data), account_id)) commit() # Notify user flash(_("Password was changed."), category="message") return redirect(url_for(".index")) else: form.old_password.errors.append(_("Incorrect password.")) return render_template("account/password.html", form=form, token=token is not None)
def register(): """Register a new account. Validates that the email is not already taken. Handles password for security. """ form = AccountRegisterForm(request.form) if request.method == "POST": # Prepare cursor for db access cursor = get_cursor() # Check form data if not form.validate(): flash(_("Form content is not valid."), category="error") elif is_email_unique(cursor, form.email.data): cursor.execute( "INSERT INTO account (full_name, type, window_size, email, password) " "VALUES (%s, %s, %s, %s, %s)", (form.full_name.data, form.type.data, form.window_size.data, form.email.data, generate_password_hash(form.password.data)), ) commit() cursor.close() # Notify user flash(_("Registration was successful."), category="message") # Redirect to correct page if g.account: return redirect(url_for("account.index")) else: return redirect(url_for("auth.login")) else: cursor.close() form.email.errors.append(_("Value is already taken.")) else: form.password.data = ''.join( secrets.choice(string.ascii_letters + string.digits) for _ in range(8)) return render_template("auth/register.html", form=form)
def reset(): """Send e-mail with link to reset password.""" form = PasswordResetForm(request.form) if request.method == "POST": if not form.validate(): flash(_("Form content is not valid."), category="error") else: with get_cursor() as cursor: cursor.execute("SELECT * FROM account WHERE email = %s", (form.email.data, )) account = cursor.fetchone() if account is None: flash(_("Unknown e-mail."), category="error") else: password_reset(request.remote_addr, account["id"]) flash(_( "Login and password reset link was sent to your e-mail address." ), category="message") return redirect(url_for("index")) return render_template("auth/reset.html", form=form)
def index(): if "permitted_doc_id" in session: # Window doc_id = session["permitted_doc_id"] win_start = session["permitted_win_start"] win_end = session["permitted_win_end"] # Selection with get_cursor() as cursor: ref_start, ref_end = _next_annotation_for_window(cursor, doc_id) # Show window if next selection is withing window if ref_end <= win_end: # Permissions is_admin = (g.account["type"] == AccountType.ADMIN.value) return render_template("annotate/index.html", submission_id=doc_id, win_start=win_start, win_end=win_end, highlight_start=ref_start, highlight_end=ref_end, is_admin=is_admin) return _next_window()
def decisions(): # Parse input params submission_id = request.args.get("doc_id", type=int) window_start = request.args.get("start", type=int) window_end = request.args.get("end", type=int) _check_permissinns(window_start, window_end, submission_id) min_confidence = current_app.config["RULE_AUTOAPPLY_CONFIDENCE"] # Returns decision in defined interval decisions = [] with get_cursor() as cursor: ctl = Controller(cursor, submission_id, g.account["id"]) decisions = ctl.get_decisions(Interval(window_start, window_end), min_confidence) # Window annotations missing session["permitted_missing"] = sum( 1 for d in decisions if d["decision"] is None or (d["decision"] == AnnotationDecision. SECRET.value and d["label"] is None)) return jsonify(decisions)
def export(): si = StringIO() cw = csv.DictWriter(si, fieldnames=["type", "condition", "decision", "author"]) cw.writeheader() # Prepare data with get_cursor() as cursor: cursor.execute( "SELECT rule.type, rule.condition, rule.confidence, account.full_name FROM rule" " LEFT JOIN account ON rule.author = account.id") for row in cursor: cw.writerow({ "type": row["type"], "condition": '='.join(row["condition"]), "decision": row["confidence"], "author": row["full_name"] }) # Prepare output output = make_response(si.getvalue()) output.headers["Content-Disposition"] = "attachment; filename=export.csv" output.headers["Content-type"] = "text/csv" return output
def password_reset(client_ip: str, account_id: int) -> None: # Account info with get_cursor() as cursor: cursor.execute("SELECT * FROM account WHERE id = %s", (account_id, )) account = cursor.fetchone() reset_url = url_for("account.change_password", token=build_token((account_id, ), "reset"), _external=True) # Email message msg = email.message.EmailMessage() msg['Subject'] = _("Password reset for PSAN") msg['From'] = current_app.config["TOKEN_FROM_EMAIL"] msg['To'] = account["email"] body = reset_password_text.format( email=account["email"], ip=client_ip, reset_url=reset_url, max_age=current_app.config["TOKEN_MAX_AGE"] / 60, server_root=url_for("index", _external=True)) msg.set_content(body, cte='quoted-printable') smtp = smtplib.SMTP(current_app.config["TOKEN_SMTP_HOST"]) smtp.send_message(msg) smtp.quit()
def _next_window(): # Find longest submission from db with get_cursor() as cursor: min_confidence = current_app.config["RULE_AUTOAPPLY_CONFIDENCE"] # Faster method cursor.execute( "SELECT submission.id as id FROM submission " " WHERE status = %s AND EXISTS ( SELECT 1 FROM" " annotation WHERE submission.id=annotation.submission and" " (token_level IS NULL and ABS(rule_level) < %s))" " ORDER BY random() LIMIT 1", (SubmissionStatus.PRE_ANNOTATED.value, min_confidence)) document = cursor.fetchone() if not document: # Much slower method for missing labels cursor.execute( "SELECT s.id as id FROM submission s " " WHERE s.status = %s AND EXISTS (" " SELECT 1 FROM annotation a" " LEFT JOIN (annotation_rule ar " " INNER JOIN rule r ON r.id = ar.rule AND r.label IS NOT NULL AND r.confidence < 0)" " ON ar.annotation = a.id" " WHERE a.submission = s.id and ((token_level IS NULL AND ABS(rule_level) < %s)" # not decided " OR ((token_level = %s OR (token_level IS NULL AND rule_level < %s))" # or (secret " and COALESCE(a.label, r.label) IS NULL)))" " ORDER BY random() LIMIT 1", (SubmissionStatus.PRE_ANNOTATED.value, min_confidence, AnnotationDecision.SECRET.value, min_confidence)) document = cursor.fetchone() if document: doc_id = document["id"] # Show first candadate of submission ref_start, ref_end = _next_annotation_for_window(cursor, doc_id) return _show_window(doc_id, ref_start, ref_end) else: return render_template("annotate/empty.html")