def delete_metadata(): resp = {} try: delete_meta(int(id), int(docid), corpus=corpus) resp['Result'] = 'OK' cache.invalidate_by_doc(docid, "meta") print json.dumps(resp) except: resp['Result'] = 'Error' resp['Message'] = 'Could not delete metadata' print json.dumps(resp)
def create_metadata(): resp = {} try: id = save_meta(int(docid), key.decode("utf8"), value.decode("utf8"), corpus=corpus) resp['Result'] = 'OK' resp['Record'] = {'id': id, 'docid': docid, 'key': key, 'value': value} cache.invalidate_by_doc(docid, "meta") print json.dumps(resp) except: resp['Result'] = 'Error' resp['Message'] = 'Could not create metadata' print json.dumps(resp)
def load_page(user, admin, theform): global ether_url global code_2fa if theform.getvalue("2fa"): code_2fa = theform.getvalue("2fa") else: code_2fa = "" max_id = generic_query("SELECT MAX(id) AS max_id FROM docs", "")[0][0] if not max_id: # This is for the initial case after init db max_id = 0 text_content = "" repo_name = "" corpus = "" status = "" assignee = "" mode = "xml" schema = "" doc_id = "" # Should only remain so if someone navigated directly to editor.py docname = "" old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = [ "", "", "", "", "", "", "" ] if int(admin) > 0: git_username, git_token, git_2fa = get_git_credentials( user, admin, code_2fa) else: git_username, git_token, git_2fa = (None, None, None) # dict of variables we'll need to render the html render_data = {} if theform.getvalue('id'): doc_id = theform.getvalue('id') if int(doc_id) > int(max_id): # Creating new doc case, assign some default values docname = "new_document" repo_name = "account/repo_name" status = "editing" assignee = "default_user" corpus = "default_corpus" schema = "" text_content = "" # If one of the four forms is edited or we're cloning a doc, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc) if theform.getvalue('edit_docname') and user != "demo": if docname != 'new_document': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_docname(doc_id, docname) if theform.getvalue('edit_filename') and user != "demo": repo_name = theform.getvalue('edit_filename') if repo_name != 'account/repo_name': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_filename(doc_id, repo_name) if theform.getvalue('edit_corpusname') and user != "demo": corpus = theform.getvalue('edit_corpusname') if corpus != 'default_corpus': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_corpus(doc_id, corpus) if theform.getvalue('edit_status') and user != "demo": status = theform.getvalue('edit_status') if status != 'editing': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_status(doc_id, status) if theform.getvalue('edit_assignee') and user != "demo": assignee = theform.getvalue('edit_assignee') if assignee != "default_user": if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_assignee(doc_id, assignee) # cloning metadata from an existing doc into a new doc if theform.getvalue('source_doc'): source_meta = get_doc_meta(theform.getvalue('source_doc')) if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id for meta in source_meta: m_key, m_val = meta[2:4] save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) cache.invalidate_by_doc(doc_id, "meta") else: # Get previous values from DB old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = get_doc_info( doc_id) # Assume new values are same, overwrite with different form values and update DB if new values found docname, corpus, repo_name, status, assignee, mode, schema = old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema docname = old_docname # Handle switch to spreadsheet mode if NLP spreadsheet service is called if theform.getvalue( 'nlp_spreadsheet' ) == "do_nlp_spreadsheet" and mode == "xml" and user != "demo": data_to_process = generic_query( "SELECT content FROM docs WHERE id=?", (doc_id, ))[0][0] api_call = spreadsheet_nlp_api if api_call != "": nlp_user, nlp_password = get_nlp_credentials() data = { "data": data_to_process, "lb": "line", "format": "sgml_no_parse" } resp = requests.post(api_call, data, auth=HTTPBasicAuth( nlp_user, nlp_password)) sgml = resp.text.encode("utf8") else: sgml = data_to_process.encode("utf8") out, err = make_spreadsheet( sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") mode = "ether" # handle copying metadata if theform.getvalue('source_doc'): source_meta = get_doc_meta(theform.getvalue('source_doc')) existing_meta_keys = [x[2] for x in get_doc_meta(doc_id)] # don't overwrite existing keys meta_to_write = [ x for x in source_meta if x[2] not in existing_meta_keys ] for meta in meta_to_write: m_key, m_val = meta[2], meta[3] save_meta(int(doc_id), m_key, m_val) cache.invalidate_by_doc(doc_id, "meta") if theform.getvalue('edit_docname'): docname = theform.getvalue('edit_docname') elif old_docname != "": docname = old_docname if theform.getvalue('edit_corpusname'): corpus = theform.getvalue('edit_corpusname') elif old_corpus != "": corpus = old_corpus if theform.getvalue('id'): if int(doc_id) <= int(max_id): # After clicking edit in landing page, editing existing doc case, get the values from the db. pull the content from db to be displayed in the editor window. if theform.getvalue('edit_docname'): docname = theform.getvalue('edit_docname') if docname != old_docname and user != "demo": update_docname(doc_id, docname) if theform.getvalue('edit_filename'): repo_name = theform.getvalue('edit_filename') if repo_name != old_repo and user != "demo": update_filename(doc_id, repo_name) if theform.getvalue('edit_corpusname'): corpus = theform.getvalue('edit_corpusname') if corpus != old_corpus and user != "demo": update_corpus(doc_id, corpus) if theform.getvalue('edit_status'): status = theform.getvalue('edit_status') if status != old_status and user != "demo": update_status(doc_id, status) if theform.getvalue('edit_assignee'): assignee = theform.getvalue('edit_assignee') if assignee != old_assignee and user != "demo": update_assignee(doc_id, assignee) if theform.getvalue('edit_mode'): mode = theform.getvalue('edit_mode') if mode != old_mode and user != "demo": update_mode(doc_id, mode) if theform.getvalue( 'nlp_spreadsheet' ) == "do_nlp_spreadsheet": # mode has been changed to spreadsheet via NLP update_mode(doc_id, "ether") mode = "ether" if old_docname != docname or old_corpus != corpus: old_sheet_name = "gd" + "_" + old_corpus + "_" + old_docname if sheet_exists(ether_url, old_sheet_name ): # Check if there is an ether sheet to copy old_socialcalc = get_socialcalc(ether_url, old_sheet_name) out, err = make_spreadsheet( old_socialcalc, ether_url + "_/gd_" + corpus + "_" + docname, "socialcalc") if out == "OK": delete_spreadsheet(ether_url, old_sheet_name) text_content = generic_query("SELECT content FROM docs WHERE id=?", (doc_id, ))[0][0] # In the case of reloading after hitting 'save', either create new doc into db, or update db # CodeMirror sends the form with its code content in it before 'save' so we just display it again if theform.getvalue('code'): text_content = theform.getvalue('code') text_content = text_content.replace("\r", "") text_content = re.sub(r'&(?!amp;)', r'&', text_content) # Escape unescaped XML & text_content = unicode(text_content.decode("utf8")) if user != "demo": if int(doc_id) > int(max_id): create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) else: save_changes(doc_id, text_content) cache.invalidate_by_doc(doc_id, "xml") git_status = False commit_message = "" if theform.getvalue('commit_msg'): commit_message = theform.getvalue('commit_msg') if theform.getvalue('push_git') == "push_git": repo_name = generic_query("SELECT filename FROM docs WHERE id=?", (doc_id, ))[0][0] file_name = generic_query("SELECT name FROM docs WHERE id=?", (doc_id, ))[0][0] repo_info = repo_name.split('/') git_account, git_repo = repo_info[0], repo_info[1] if len(repo_info) > 2: subdir = '/'.join(repo_info[2:]) + "/" else: subdir = "" # The user will indicate the subdir in the repo_name stored in the db. # Therefore, a file may be associated with the target repo subdir zangsir/coptic-xml-tool/uploaded_commits, # and that is fine, but we will need to make this uploaded_commits subdir first to create our file. if not os.path.isdir(prefix + subdir) and subdir != "": dirs = subdir.split(os.sep)[:-1] path_so_far = "" for dir in dirs: if not os.path.isdir(prefix + path_so_far + dir + os.sep): os.mkdir(prefix + path_so_far + dir + os.sep, 0755) path_so_far += dir + os.sep if mode == "xml": text_content = generic_query("SELECT content FROM docs WHERE id=?", (doc_id, ))[0][0] serializable_content = build_meta_tag( doc_id) + text_content.strip() + "\n</meta>\n" serializable_content = serializable_content.encode('utf8') file_name = file_name.replace(" ", "_") + ".xml" else: # (mode == "ether") text_content = ether_to_sgml( get_socialcalc(ether_url, "gd" + "_" + corpus + "_" + docname), doc_id) serializable_content = text_content file_name = file_name.replace(" ", "_") + "_ether.sgml" saved_file = subdir + file_name serialize_file(serializable_content, saved_file) git_status = push_update_to_git(git_username, git_token, saved_file, git_account, git_repo, commit_message) # File system cleanup if subdir == "": # Delete a file os.remove(prefix + file_name) else: # Delete a subdirectory shutil.rmtree(prefix + subdir) if theform.getvalue('nlp_xml') == "do_nlp_xml" and mode == "xml": api_call = xml_nlp_api if api_call != "": nlp_user, nlp_password = get_nlp_credentials() data = {"data": text_content, "format": "pipes"} resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user, nlp_password)) text_content = resp.text # Editing options # Docname # Filename status_list = open(prefix + "status.tab").read().replace("\r", "").split("\n") render_data['status_options'] = [{ 'text': x, 'selected': x == status } for x in status_list] render_data['assignee_options'] = [{ 'text': x, 'selected': x == assignee } for x in get_user_list()] render_data['mode_options'] = [{ 'text': x, 'selected': x == mode } for x in ["xml", "ether"]] render_data['nlp_service'] = { 'xml_button_html': xml_nlp_button.decode("utf8"), 'spreadsheet_button_html': spreadsheet_nlp_button.decode("utf8"), 'disabled': user == "demo" or mode == "ether" } render_data['git_2fa'] = git_2fa == "true" if git_status: render_data['git_commit_response'] = git_status.replace('<', '').replace( '>', '') # prepare embedded editor html if mode == "ether": render_data['ether_mode'] = True ether_url += "gd_" + corpus + "_" + docname render_data['ether_url'] = ether_url render_data['ether_stylesheets'] = get_ether_stylesheets() if "file" in theform and user != "demo": fileitem = theform["file"] if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) if fn.endswith(".xls") or fn.endswith(".xlsx"): make_spreadsheet( fileitem.file.read(), "https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname, "excel") else: sgml = fileitem.file.read() meta_key_val = harvest_meta(sgml) make_spreadsheet( sgml, "https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname) for (key, value) in iteritems(meta_key_val): key = key.replace("@", "_") save_meta(int(doc_id), key.decode("utf8"), value.decode("utf8")) cache.invalidate_by_doc(doc_id, "meta") else: render_data['ether_mode'] = False # stop here if no doc selected if doc_id: render_data['doc_is_selected'] = len(doc_id) != 0 else: return render("editor", render_data) render_data['id'] = doc_id render_data['mode'] = mode render_data['schema'] = schema render_data['docname'] = docname render_data['corpusname'] = corpus render_data['text_content'] = text_content render_data['repo'] = repo_name render_data["admin_gt_zero"] = int(admin) > 0 render_data["admin_eq_three"] = admin == "3" # handle clone meta button, and allow github pushing if int(admin) > 0: doc_list = generic_query( "SELECT id,corpus,name,status,assignee_username,mode FROM docs ORDER BY corpus, name COLLATE NOCASE", ()) render_data["docs"] = [] for doc in doc_list: doc_vars = {} doc_vars["id"] = str(doc[0]) doc_vars["corpus"] = doc[1] doc_vars["name"] = doc[2] render_data['docs'].append(doc_vars) render_data["can_save"] = not (int(admin) < 3) render_data["editor_help_link_html"] = editor_help_link render_data["first_load"] = len(theform.keys()) == 1 return render("editor", render_data)