def export_all_docs(config=None, corpus_filter=None, status=None, extension="sgml"): docs = get_all_docs(corpus_filter, status) files = [] all_corpus_meta = defaultdict(dict) for doc in docs: doc_id, docname, corpus, mode, content = doc if corpus not in all_corpus_meta: corpus_meta = get_doc_meta(doc_id, corpus=True) for md in corpus_meta: key, val = md[2], md[3] all_corpus_meta[corpus][key] = val if corpus_filter is None: # All documents exported, use corpus prefix to avoid name clashes filename = corpus + "_" + docname else: # Only exporting one user specified corpus, name documents without prefix filename = docname if mode == "xml" and config != "[CSV]": content = build_meta_tag(doc_id) + content.strip() + "\n</meta>\n" files.append((content, filename + ".xml")) elif mode == "ether": ether_name = "_".join(["gd", corpus, docname]) if config == "[CSV]": csv = ether_to_csv(ether_url, ether_name) files.append((csv, filename + ".csv")) else: sgml = ether_to_sgml(get_socialcalc(ether_url, ether_name), doc_id, config=config) files.append((sgml, filename + "." + extension)) for corp in all_corpus_meta: serialized_meta = "" for key in all_corpus_meta[corp]: serialized_meta += key + "\t" + all_corpus_meta[corp][key] + "\n" files.append( (serialized_meta.encode("utf8"), "_meta_" + corp + ".tab")) zip_io = create_zip(files) temp = tempfile.NamedTemporaryFile(delete=False, mode='w+b') temp.write(zip_io.getvalue()) temp.close() if corpus_filter is not None: zipname = corpus_filter + ".zip" else: zipname = "export.zip" print("Content-type: application/download") print("Content-Disposition: attachment; filename=" + zipname) print("") sys.stdout.flush() with open(temp.name, 'rb') as z: copyfileobj(z, sys.stdout) os.remove(temp.name)
def validate_doc_export(doc_id, rules, timestamps=None): doc_info = get_doc_info(doc_id) doc_name = doc_info[0] doc_corpus = doc_info[1] doc_content = get_doc_content(doc_id) ether_doc_name = "gd_" + doc_corpus + "_" + doc_name if not timestamps: timestamps = get_timestamps(ether_url) last_edit = int(timestamps[ether_doc_name]) if last_edit <= int(cache.get_timestamp(doc_id, "export")): return cache.get_report(doc_id, "export") socialcalc = get_socialcalc(ether_url, ether_doc_name) report = "" export_rule_fired = False for rule in rules: if not rule.applies(doc_name, doc_corpus): continue export_rule_fired = True res = rule.validate(socialcalc, doc_id) report += res if not export_rule_fired: report = "<strong>No applicable export schemas</strong><br>" elif report: report = "<strong>Export problems:</strong><br>" + report else: report = "<strong>Export is valid</strong><br>" cache.cache_timestamped_validation_result(doc_id, "export", report, last_edit) return report
def validate_doc_ether(doc_id, rules, timestamps=None, editor=False): doc_info = get_doc_info(doc_id) doc_name = doc_info[0] doc_corpus = doc_info[1] ether_doc_name = "gd_" + doc_corpus + "_" + doc_name if not timestamps: timestamps = get_timestamps(ether_url) last_edit = int(timestamps[ether_doc_name]) if last_edit <= int(cache.get_timestamp(doc_id, "ether")): return cache.get_report(doc_id, "ether") socialcalc = get_socialcalc(ether_url, ether_doc_name) parsed_ether = parse_ether(socialcalc, doc_id=doc_id) report = '' cells = [] ether_rule_fired = False for rule in rules: if not rule.applies(doc_name, doc_corpus): continue ether_rule_fired = True res = rule.validate(parsed_ether) if len(res['tooltip']) > 0: report += ("""<div class="tooltip">""" + res['report'][:-5] + """ <i class="fa fa-ellipsis-h"></i>""" + "<span class=\"msg\">" + res['tooltip'] + "</span>" + "</div>") else: report += res['report'] cells += res['cells'] if not ether_rule_fired: report = "<strong>No applicable spreadsheet validation rules</strong><br>" elif report: report = "<strong>Spreadsheet Problems:</strong><br>" + report else: report = "<strong>Spreadsheet is valid</strong><br>" cache.cache_timestamped_validation_result(doc_id, "ether", report, last_edit) if editor: highlight_cells(cells, ether_url, ether_doc_name) return report
def export_doc(doc_id, stylesheet=None): docname, corpus, filename, status, assignee_username, mode, schema = get_doc_info( doc_id) ether_name = "_".join(["gd", corpus, docname]) sgml = ether_to_sgml(get_socialcalc(ether_url, ether_name), doc_id, config=stylesheet) cpout = "" cpout += "Content-Type: application/download\n" cpout += "Content-Disposition: attachment; filename=" + corpus + "_" + docname + ".sgml\n\n" if isinstance(cpout, unicode): cpout = str(cpout.encode("utf8")) cpout += sgml print(cpout)
def highlight_cells(cells, ether_url, ether_doc_name): old_ether = get_socialcalc(ether_url, ether_doc_name) old_ether_lines = old_ether.splitlines() new_ether_lines = [] old_color_numbers = [] new_color_number = '1' for line in old_ether_lines: color_line = re.match(r'color:(\d+):(rgb.*$)', line) if color_line is not None: if color_line.group(2) == 'rgb(242, 242, 142)': old_color_numbers.append(color_line.group(1)) else: new_color_number = str(1 + int(color_line.group(1))) if len(old_color_numbers) > 0: new_color_number = old_color_numbers[0] for line in old_ether_lines: parts = line.split(":") # Check for pure formatting cells, e.g. cell:K15:f:1 if len(parts) == 4: if parts[2] == "f": # Pure formatting cell, no content continue parsed_cell = re.match(r'cell:([A-Z]+)(\d+)(:.*)$', line) if parsed_cell is not None: col = parsed_cell.group(1) row = parsed_cell.group(2) col_row = col + row other = parsed_cell.group(3) bg = re.search(r':bg:(\d+)($|:)', other) if bg is not None: bg = bg.group(1) span = parts[-1] if "rowspan:" in line else "1" spanned_rows = [col + str(int(row) + x) for x in range(int(span))] highlighted_spanned_rows = [x for x in spanned_rows if x in cells] if len(highlighted_spanned_rows) > 0: if bg is not None: if bg != new_color_number: new_line = re.sub(r':bg:' + bg, r':bg:' + new_color_number, line) else: new_line = line else: new_line = line + ':bg:' + new_color_number else: if bg is not None: if bg in old_color_numbers: new_line = re.sub(r':bg:' + bg, r'', line) else: new_line = line else: new_line = line new_ether_lines.append(new_line) elif re.match(r'sheet:', line) is not None: new_ether_lines.append(line) if new_color_number not in old_color_numbers: new_ether_lines.append('color:' + new_color_number + ':rgb(242, 242, 142)') else: new_ether_lines.append(line) new_ether = '\n'.join(new_ether_lines) make_spreadsheet(new_ether, ether_url + "_/" + ether_doc_name, "socialcalc")
def load_page(user, admin, theform): global ether_url global code_2fa if theform.getvalue("2fa"): code_2fa = theform.getvalue("2fa") else: code_2fa = "" max_id = generic_query("SELECT MAX(id) AS max_id FROM docs", "")[0][0] if not max_id: # This is for the initial case after init db max_id = 0 text_content = "" repo_name = "" corpus = "" status = "" assignee = "" mode = "xml" schema = "" doc_id = "" # Should only remain so if someone navigated directly to editor.py docname = "" old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = [ "", "", "", "", "", "", "" ] if int(admin) > 0: git_username, git_token, git_2fa = get_git_credentials( user, admin, code_2fa) else: git_username, git_token, git_2fa = (None, None, None) # dict of variables we'll need to render the html render_data = {} if theform.getvalue('id'): doc_id = theform.getvalue('id') if int(doc_id) > int(max_id): # Creating new doc case, assign some default values docname = "new_document" repo_name = "account/repo_name" status = "editing" assignee = "default_user" corpus = "default_corpus" schema = "" text_content = "" # If one of the four forms is edited or we're cloning a doc, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc) if theform.getvalue('edit_docname') and user != "demo": if docname != 'new_document': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_docname(doc_id, docname) if theform.getvalue('edit_filename') and user != "demo": repo_name = theform.getvalue('edit_filename') if repo_name != 'account/repo_name': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_filename(doc_id, repo_name) if theform.getvalue('edit_corpusname') and user != "demo": corpus = theform.getvalue('edit_corpusname') if corpus != 'default_corpus': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_corpus(doc_id, corpus) if theform.getvalue('edit_status') and user != "demo": status = theform.getvalue('edit_status') if status != 'editing': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_status(doc_id, status) if theform.getvalue('edit_assignee') and user != "demo": assignee = theform.getvalue('edit_assignee') if assignee != "default_user": if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_assignee(doc_id, assignee) # cloning metadata from an existing doc into a new doc if theform.getvalue('source_doc'): source_meta = get_doc_meta(theform.getvalue('source_doc')) if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id for meta in source_meta: m_key, m_val = meta[2:4] save_meta(int(doc_id), m_key.decode("utf8"), m_val.decode("utf8")) cache.invalidate_by_doc(doc_id, "meta") else: # Get previous values from DB old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = get_doc_info( doc_id) # Assume new values are same, overwrite with different form values and update DB if new values found docname, corpus, repo_name, status, assignee, mode, schema = old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema docname = old_docname # Handle switch to spreadsheet mode if NLP spreadsheet service is called if theform.getvalue( 'nlp_spreadsheet' ) == "do_nlp_spreadsheet" and mode == "xml" and user != "demo": data_to_process = generic_query( "SELECT content FROM docs WHERE id=?", (doc_id, ))[0][0] api_call = spreadsheet_nlp_api if api_call != "": nlp_user, nlp_password = get_nlp_credentials() data = { "data": data_to_process, "lb": "line", "format": "sgml_no_parse" } resp = requests.post(api_call, data, auth=HTTPBasicAuth( nlp_user, nlp_password)) sgml = resp.text.encode("utf8") else: sgml = data_to_process.encode("utf8") out, err = make_spreadsheet( sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml") mode = "ether" # handle copying metadata if theform.getvalue('source_doc'): source_meta = get_doc_meta(theform.getvalue('source_doc')) existing_meta_keys = [x[2] for x in get_doc_meta(doc_id)] # don't overwrite existing keys meta_to_write = [ x for x in source_meta if x[2] not in existing_meta_keys ] for meta in meta_to_write: m_key, m_val = meta[2], meta[3] save_meta(int(doc_id), m_key, m_val) cache.invalidate_by_doc(doc_id, "meta") if theform.getvalue('edit_docname'): docname = theform.getvalue('edit_docname') elif old_docname != "": docname = old_docname if theform.getvalue('edit_corpusname'): corpus = theform.getvalue('edit_corpusname') elif old_corpus != "": corpus = old_corpus if theform.getvalue('id'): if int(doc_id) <= int(max_id): # After clicking edit in landing page, editing existing doc case, get the values from the db. pull the content from db to be displayed in the editor window. if theform.getvalue('edit_docname'): docname = theform.getvalue('edit_docname') if docname != old_docname and user != "demo": update_docname(doc_id, docname) if theform.getvalue('edit_filename'): repo_name = theform.getvalue('edit_filename') if repo_name != old_repo and user != "demo": update_filename(doc_id, repo_name) if theform.getvalue('edit_corpusname'): corpus = theform.getvalue('edit_corpusname') if corpus != old_corpus and user != "demo": update_corpus(doc_id, corpus) if theform.getvalue('edit_status'): status = theform.getvalue('edit_status') if status != old_status and user != "demo": update_status(doc_id, status) if theform.getvalue('edit_assignee'): assignee = theform.getvalue('edit_assignee') if assignee != old_assignee and user != "demo": update_assignee(doc_id, assignee) if theform.getvalue('edit_mode'): mode = theform.getvalue('edit_mode') if mode != old_mode and user != "demo": update_mode(doc_id, mode) if theform.getvalue( 'nlp_spreadsheet' ) == "do_nlp_spreadsheet": # mode has been changed to spreadsheet via NLP update_mode(doc_id, "ether") mode = "ether" if old_docname != docname or old_corpus != corpus: old_sheet_name = "gd" + "_" + old_corpus + "_" + old_docname if sheet_exists(ether_url, old_sheet_name ): # Check if there is an ether sheet to copy old_socialcalc = get_socialcalc(ether_url, old_sheet_name) out, err = make_spreadsheet( old_socialcalc, ether_url + "_/gd_" + corpus + "_" + docname, "socialcalc") if out == "OK": delete_spreadsheet(ether_url, old_sheet_name) text_content = generic_query("SELECT content FROM docs WHERE id=?", (doc_id, ))[0][0] # In the case of reloading after hitting 'save', either create new doc into db, or update db # CodeMirror sends the form with its code content in it before 'save' so we just display it again if theform.getvalue('code'): text_content = theform.getvalue('code') text_content = text_content.replace("\r", "") text_content = re.sub(r'&(?!amp;)', r'&', text_content) # Escape unescaped XML & text_content = unicode(text_content.decode("utf8")) if user != "demo": if int(doc_id) > int(max_id): create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) else: save_changes(doc_id, text_content) cache.invalidate_by_doc(doc_id, "xml") git_status = False commit_message = "" if theform.getvalue('commit_msg'): commit_message = theform.getvalue('commit_msg') if theform.getvalue('push_git') == "push_git": repo_name = generic_query("SELECT filename FROM docs WHERE id=?", (doc_id, ))[0][0] file_name = generic_query("SELECT name FROM docs WHERE id=?", (doc_id, ))[0][0] repo_info = repo_name.split('/') git_account, git_repo = repo_info[0], repo_info[1] if len(repo_info) > 2: subdir = '/'.join(repo_info[2:]) + "/" else: subdir = "" # The user will indicate the subdir in the repo_name stored in the db. # Therefore, a file may be associated with the target repo subdir zangsir/coptic-xml-tool/uploaded_commits, # and that is fine, but we will need to make this uploaded_commits subdir first to create our file. if not os.path.isdir(prefix + subdir) and subdir != "": dirs = subdir.split(os.sep)[:-1] path_so_far = "" for dir in dirs: if not os.path.isdir(prefix + path_so_far + dir + os.sep): os.mkdir(prefix + path_so_far + dir + os.sep, 0755) path_so_far += dir + os.sep if mode == "xml": text_content = generic_query("SELECT content FROM docs WHERE id=?", (doc_id, ))[0][0] serializable_content = build_meta_tag( doc_id) + text_content.strip() + "\n</meta>\n" serializable_content = serializable_content.encode('utf8') file_name = file_name.replace(" ", "_") + ".xml" else: # (mode == "ether") text_content = ether_to_sgml( get_socialcalc(ether_url, "gd" + "_" + corpus + "_" + docname), doc_id) serializable_content = text_content file_name = file_name.replace(" ", "_") + "_ether.sgml" saved_file = subdir + file_name serialize_file(serializable_content, saved_file) git_status = push_update_to_git(git_username, git_token, saved_file, git_account, git_repo, commit_message) # File system cleanup if subdir == "": # Delete a file os.remove(prefix + file_name) else: # Delete a subdirectory shutil.rmtree(prefix + subdir) if theform.getvalue('nlp_xml') == "do_nlp_xml" and mode == "xml": api_call = xml_nlp_api if api_call != "": nlp_user, nlp_password = get_nlp_credentials() data = {"data": text_content, "format": "pipes"} resp = requests.post(api_call, data, auth=HTTPBasicAuth(nlp_user, nlp_password)) text_content = resp.text # Editing options # Docname # Filename status_list = open(prefix + "status.tab").read().replace("\r", "").split("\n") render_data['status_options'] = [{ 'text': x, 'selected': x == status } for x in status_list] render_data['assignee_options'] = [{ 'text': x, 'selected': x == assignee } for x in get_user_list()] render_data['mode_options'] = [{ 'text': x, 'selected': x == mode } for x in ["xml", "ether"]] render_data['nlp_service'] = { 'xml_button_html': xml_nlp_button.decode("utf8"), 'spreadsheet_button_html': spreadsheet_nlp_button.decode("utf8"), 'disabled': user == "demo" or mode == "ether" } render_data['git_2fa'] = git_2fa == "true" if git_status: render_data['git_commit_response'] = git_status.replace('<', '').replace( '>', '') # prepare embedded editor html if mode == "ether": render_data['ether_mode'] = True ether_url += "gd_" + corpus + "_" + docname render_data['ether_url'] = ether_url render_data['ether_stylesheets'] = get_ether_stylesheets() if "file" in theform and user != "demo": fileitem = theform["file"] if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) if fn.endswith(".xls") or fn.endswith(".xlsx"): make_spreadsheet( fileitem.file.read(), "https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname, "excel") else: sgml = fileitem.file.read() meta_key_val = harvest_meta(sgml) make_spreadsheet( sgml, "https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname) for (key, value) in iteritems(meta_key_val): key = key.replace("@", "_") save_meta(int(doc_id), key.decode("utf8"), value.decode("utf8")) cache.invalidate_by_doc(doc_id, "meta") else: render_data['ether_mode'] = False # stop here if no doc selected if doc_id: render_data['doc_is_selected'] = len(doc_id) != 0 else: return render("editor", render_data) render_data['id'] = doc_id render_data['mode'] = mode render_data['schema'] = schema render_data['docname'] = docname render_data['corpusname'] = corpus render_data['text_content'] = text_content render_data['repo'] = repo_name render_data["admin_gt_zero"] = int(admin) > 0 render_data["admin_eq_three"] = admin == "3" # handle clone meta button, and allow github pushing if int(admin) > 0: doc_list = generic_query( "SELECT id,corpus,name,status,assignee_username,mode FROM docs ORDER BY corpus, name COLLATE NOCASE", ()) render_data["docs"] = [] for doc in doc_list: doc_vars = {} doc_vars["id"] = str(doc[0]) doc_vars["corpus"] = doc[1] doc_vars["name"] = doc[2] render_data['docs'].append(doc_vars) render_data["can_save"] = not (int(admin) < 3) render_data["editor_help_link_html"] = editor_help_link render_data["first_load"] = len(theform.keys()) == 1 return render("editor", render_data)
def load_page(user,admin,theform): global ether_url max_id = generic_query("SELECT MAX(id) AS max_id FROM docs","")[0][0] if not max_id: # This is for the initial case after init db max_id = 0 text_content = "" repo_name = "" corpus = "" status = "" assignee = "" mode = "xml" doc_id = "" # Should only remain so if someone navigated directly to editor.py docname = "" mymsg = "" old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode = ["","","","","",""] if theform.getvalue('id'): doc_id = theform.getvalue('id') if int(doc_id) > int(max_id): # Creating new doc case, assign some default values docname = "new_document" repo_name = "account/repo_name" status = "editing" assignee = "default_user" corpus = "default_corpus" text_content = "" # If one of the four forms is edited, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc) if theform.getvalue('edit_docname'): if docname != 'new_document': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_docname(doc_id, docname) if theform.getvalue('edit_filename'): repo_name = theform.getvalue('edit_filename') if repo_name != 'account/repo_name': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_filename(doc_id, repo_name) if theform.getvalue('edit_corpusname'): corpus = theform.getvalue('edit_corpusname') if corpus != 'default_corpus': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_corpus(doc_id, corpus) if theform.getvalue('edit_status'): status = theform.getvalue('edit_status') if status != 'editing': if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_status(doc_id, status) if theform.getvalue('edit_assignee'): assignee = theform.getvalue('edit_assignee') if assignee != "default_user": if doc_id > max_id: create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content) max_id = doc_id else: update_assignee(doc_id, assignee) else: # Get previous values from DB old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode = get_doc_info(doc_id) # Assume new value are same, overwrite with different form values and update DB if new values found docname, corpus, repo_name, status, assignee, mode = old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode docname = old_docname if theform.getvalue('edit_docname'): docname = theform.getvalue('edit_docname') elif old_docname != "": docname = old_docname if theform.getvalue('edit_corpusname'): corpus = theform.getvalue('edit_corpusname') elif old_corpus != "": corpus = old_corpus if theform.getvalue('id'): if int(doc_id) <= int(max_id): # After clicking edit in landing page, editing existing doc case, get the values from the db. pull the content from db to be displayed in the editor window. if theform.getvalue('edit_docname'): docname = theform.getvalue('edit_docname') if docname != old_docname: update_docname(doc_id,docname) if theform.getvalue('edit_filename'): repo_name=theform.getvalue('edit_filename') if repo_name != old_repo: update_filename(doc_id,repo_name) if theform.getvalue('edit_corpusname'): corpus = theform.getvalue('edit_corpusname') if corpus != old_corpus: update_corpus(doc_id,corpus) if theform.getvalue('edit_status'): status = theform.getvalue('edit_status') if status != old_status: update_status(doc_id,status) if theform.getvalue('edit_assignee'): assignee = theform.getvalue('edit_assignee') if assignee != old_assignee: update_assignee(doc_id,assignee) if theform.getvalue('edit_mode'): mode = theform.getvalue('edit_mode') if mode != old_mode: update_mode(doc_id,mode) if old_docname != docname or old_corpus != corpus: old_sheet_name = "gd" + "_" + old_corpus + "_" + old_docname if sheet_exists(ether_url, old_sheet_name): # Check if there is an ether sheet to copy old_socialcalc = get_socialcalc(ether_url, old_sheet_name) out, err = make_spreadsheet(old_socialcalc, ether_url + "_/gd_" + corpus + "_" + docname, "socialcalc") if out == "OK": out, err = delete_spreadsheet(ether_url,old_sheet_name) else: mymsg += "out was: " + out + " err was" + err text_content = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0] # In the case of reloading after hitting 'save', either create new doc into db, or update db # CodeMirror sends the form with its code content in it before 'save' so we just display it again if theform.getvalue('code'): text_content = theform.getvalue('code') text_content = text_content.replace("\r","") text_content = unicode(text_content.decode("utf8")) if int(doc_id)>int(max_id): create_document(doc_id, docname,corpus,status,assignee,repo_name,text_content) else: save_changes(doc_id,text_content) git_status=False if theform.getvalue('commit_msg'): commit_message = theform.getvalue('commit_msg') if theform.getvalue('push_git') == "push_git" and mode == "xml": text_content = generic_query("SELECT content FROM docs WHERE id=?", (doc_id,))[0][0] repo_name = generic_query("SELECT filename FROM docs WHERE id=?", (doc_id,))[0][0] file_name = generic_query("SELECT name FROM docs WHERE id=?", (doc_id,))[0][0] file_name = file_name.replace(" ","_") + ".xml" repo_info = repo_name.split('/') git_account, git_repo = repo_info[0], repo_info[1] if len(repo_info)>2: subdir = '/'.join(repo_info[2:]) + "/" else: subdir = "" if not os.path.isdir(prefix+subdir) and subdir != "": os.mkdir(prefix+subdir, 0755) # The user will indicate the subdir in the repo_name stored in the db. # Therefore, a file may be associated with the target repo subdir zangsir/coptic-xml-tool/uploaded_commits, # and that is fine, but we will need to make this uploaded_commits subdir first to create our file. saved_file = subdir + file_name serialize_file (text_content,saved_file) git_username,git_password=get_git_credentials(user,admin) git_status = push_update_to_git(git_username, git_password, saved_file, git_account, git_repo, commit_message) if subdir == "": # Delete a file os.remove(prefix+file_name) else: shutil.rmtree(prefix+subdir) if theform.getvalue('nlp_service') == "do_nlp" and mode == "xml": api_call="https://corpling.uis.georgetown.edu/coptic-nlp/api?data=%s&lb=line&format=pipes" %text_content resp = requests.get(api_call, auth=HTTPBasicAuth('coptic_client', 'kz7hh2')) text_content=resp.text # Editing options # Docname # Filename push_git = """<input type="hidden" name="push_git" id="push_git" value=""> <input type="text" name="commit_msg" placeholder = "commit message here" style="width:140px"> <div name="push_git" class="button" onclick="document.getElementById('push_git').value='push_git'; document.getElementById('editor_form').submit();"> <i class="fa fa-github"></i> Commit </div> """ if git_status: # Remove some html keyword symbols in the commit message returned by github3 push_msg=git_status.replace('<','') push_msg=push_msg.replace('>','') push_git+="""<p style='color:red;'>""" + push_msg + ' successful' + """</p>""" status_list = open(prefix+"status.tab").read().replace("\r","").split("\n") options = "" for stat in status_list: options +='<option value="'+stat+'">'+stat+'</option>\n' options = options.replace('">'+status, '" selected="selected">'+status) edit_status="""<select name="edit_status" onchange='this.form.submit()'>""" edit_status += options+"</select>" # Get user_list from the logintools user_list=[] scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep userdir = scriptpath + "users" + os.sep userfiles = [ f for f in listdir(userdir) if isfile(join(userdir,f)) ] for userfile in sorted(userfiles): if userfile != "config.ini" and userfile != "default.ini" and userfile != "admin.ini" and userfile.endswith(".ini"): userfile = userfile.replace(".ini","") user_list.append(userfile) edit_assignee="""<select name="edit_assignee" onchange="this.form.submit()">""" for user in user_list: assignee_select="" user_name=user if user_name==assignee: assignee_select="selected" edit_assignee+="""<option value='""" + user_name + "' %s>" + user_name + """</option>""" edit_assignee=edit_assignee%assignee_select edit_assignee+="</select>" edit_mode = '''<select name="edit_mode" onchange="this.form.submit()">\n<option value="xml">xml</option>\n<option value="ether">spreadsheet</option>\n</select>''' edit_mode = edit_mode.replace(mode+'"', mode+'" selected="selected"') # Metadata if theform.getvalue('metakey'): metakey = theform.getvalue('metakey') metavalue = theform.getvalue('metavalue') save_meta(doc_id,metakey,metavalue) if theform.getvalue('metaid'): metaid = theform.getvalue('metaid') delete_meta(metaid) metadata = print_meta(doc_id) nlp_service = """ <div class="button" name="nlp_button" onclick="document.getElementById('nlp_service').value='do_nlp'; document.getElementById('editor_form').submit();"> <i class="fa fa-cogs"></i> NLP </div> """ page= "Content-type:text/html\r\n\r\n" #page += str(theform) page += urllib.urlopen(prefix + "templates" + os.sep + "editor.html").read() page += mymsg if mode == "ether": embedded_editor = urllib.urlopen(prefix + "templates" + os.sep + "ether.html").read() ether_url += "gd_" + corpus + "_" + docname if "file" in theform: fileitem = theform["file"] if len(fileitem.filename) > 0: # strip leading path from file name to avoid directory traversal attacks fn = os.path.basename(fileitem.filename) msg = 'The file "' + fn + '" was uploaded successfully' if fn.endswith(".xls") or fn.endswith(".xlsx"): make_spreadsheet(fileitem.file.read(),"https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname,"excel") else: sgml = fileitem.file.read() meta_key_val = harvest_meta(sgml) make_spreadsheet(sgml,"https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname) for key, value in meta_key_val.iteritems(): key = key.replace("@","_") save_meta(doc_id,key,value) else: msg = "no file was uploaded" embedded_editor = embedded_editor.replace("**source**",ether_url) else: embedded_editor = urllib.urlopen(prefix + "templates" + os.sep + "codemirror.html").read() page = page.replace("**embedded_editor**",embedded_editor) if len(doc_id) == 0: exp = re.compile(r"<article>.*</article>",re.DOTALL) page = exp.sub("""<h2>No document selected | <a href="index.py">back to document list</a> </h2>""",page) else: page=page.replace("**content**",text_content) page=page.replace("**docname**",docname) page=page.replace("**corpusname**",corpus) page=page.replace("**edit_status**",edit_status) page=page.replace("**repo**",repo_name) page=page.replace("**edit_assignee**",edit_assignee) page=page.replace("**edit_mode**",edit_mode) page=page.replace("**metadata**",metadata) page=page.replace("**NLP**",nlp_service) page=page.replace("**id**",doc_id) if int(admin)>0: page=page.replace("**github**",push_git) else: page = page.replace("**github**", '') return page