Пример #1
0
def export_all_docs(config=None,
                    corpus_filter=None,
                    status=None,
                    extension="sgml"):
    docs = get_all_docs(corpus_filter, status)
    files = []
    all_corpus_meta = defaultdict(dict)
    for doc in docs:
        doc_id, docname, corpus, mode, content = doc
        if corpus not in all_corpus_meta:
            corpus_meta = get_doc_meta(doc_id, corpus=True)
            for md in corpus_meta:
                key, val = md[2], md[3]
                all_corpus_meta[corpus][key] = val
        if corpus_filter is None:  # All documents exported, use corpus prefix to avoid name clashes
            filename = corpus + "_" + docname
        else:  # Only exporting one user specified corpus, name documents without prefix
            filename = docname
        if mode == "xml" and config != "[CSV]":
            content = build_meta_tag(doc_id) + content.strip() + "\n</meta>\n"
            files.append((content, filename + ".xml"))
        elif mode == "ether":
            ether_name = "_".join(["gd", corpus, docname])
            if config == "[CSV]":
                csv = ether_to_csv(ether_url, ether_name)
                files.append((csv, filename + ".csv"))
            else:
                sgml = ether_to_sgml(get_socialcalc(ether_url, ether_name),
                                     doc_id,
                                     config=config)
                files.append((sgml, filename + "." + extension))

    for corp in all_corpus_meta:
        serialized_meta = ""
        for key in all_corpus_meta[corp]:
            serialized_meta += key + "\t" + all_corpus_meta[corp][key] + "\n"
        files.append(
            (serialized_meta.encode("utf8"), "_meta_" + corp + ".tab"))

    zip_io = create_zip(files)

    temp = tempfile.NamedTemporaryFile(delete=False, mode='w+b')
    temp.write(zip_io.getvalue())
    temp.close()

    if corpus_filter is not None:
        zipname = corpus_filter + ".zip"
    else:
        zipname = "export.zip"

    print("Content-type: application/download")
    print("Content-Disposition: attachment; filename=" + zipname)
    print("")

    sys.stdout.flush()

    with open(temp.name, 'rb') as z:
        copyfileobj(z, sys.stdout)

    os.remove(temp.name)
Пример #2
0
def validate_doc_export(doc_id, rules, timestamps=None):
	doc_info = get_doc_info(doc_id)
	doc_name = doc_info[0]
	doc_corpus = doc_info[1]
	doc_content = get_doc_content(doc_id)

	ether_doc_name = "gd_" + doc_corpus + "_" + doc_name
	if not timestamps:
		timestamps = get_timestamps(ether_url)
	last_edit = int(timestamps[ether_doc_name])
	if last_edit <= int(cache.get_timestamp(doc_id, "export")):
		return cache.get_report(doc_id, "export")

	socialcalc = get_socialcalc(ether_url, ether_doc_name)

	report = ""
	export_rule_fired = False
	for rule in rules:
		if not rule.applies(doc_name, doc_corpus):
			continue

		export_rule_fired = True
		res = rule.validate(socialcalc, doc_id)
		report += res

	if not export_rule_fired:
		report = "<strong>No applicable export schemas</strong><br>"
	elif report:
		report = "<strong>Export problems:</strong><br>" + report
	else:
		report = "<strong>Export is valid</strong><br>"

	cache.cache_timestamped_validation_result(doc_id, "export", report, last_edit)

	return report
Пример #3
0
def validate_doc_ether(doc_id, rules, timestamps=None, editor=False):
    doc_info = get_doc_info(doc_id)
    doc_name = doc_info[0]
    doc_corpus = doc_info[1]

    ether_doc_name = "gd_" + doc_corpus + "_" + doc_name
    if not timestamps:
        timestamps = get_timestamps(ether_url)
    last_edit = int(timestamps[ether_doc_name])
    if last_edit <= int(cache.get_timestamp(doc_id, "ether")):
        return cache.get_report(doc_id, "ether")

    socialcalc = get_socialcalc(ether_url, ether_doc_name)
    parsed_ether = parse_ether(socialcalc, doc_id=doc_id)

    report = ''
    cells = []

    ether_rule_fired = False
    for rule in rules:
        if not rule.applies(doc_name, doc_corpus):
            continue

        ether_rule_fired = True
        res = rule.validate(parsed_ether)
        if len(res['tooltip']) > 0:
            report += ("""<div class="tooltip">""" + res['report'][:-5] +
                       """ <i class="fa fa-ellipsis-h"></i>""" +
                       "<span class=\"msg\">" + res['tooltip'] + "</span>" +
                       "</div>")
        else:
            report += res['report']
        cells += res['cells']

    if not ether_rule_fired:
        report = "<strong>No applicable spreadsheet validation rules</strong><br>"
    elif report:
        report = "<strong>Spreadsheet Problems:</strong><br>" + report
    else:
        report = "<strong>Spreadsheet is valid</strong><br>"

    cache.cache_timestamped_validation_result(doc_id, "ether", report,
                                              last_edit)

    if editor:
        highlight_cells(cells, ether_url, ether_doc_name)
    return report
Пример #4
0
def export_doc(doc_id, stylesheet=None):
    docname, corpus, filename, status, assignee_username, mode, schema = get_doc_info(
        doc_id)
    ether_name = "_".join(["gd", corpus, docname])

    sgml = ether_to_sgml(get_socialcalc(ether_url, ether_name),
                         doc_id,
                         config=stylesheet)

    cpout = ""
    cpout += "Content-Type: application/download\n"
    cpout += "Content-Disposition: attachment; filename=" + corpus + "_" + docname + ".sgml\n\n"

    if isinstance(cpout, unicode):
        cpout = str(cpout.encode("utf8"))

    cpout += sgml
    print(cpout)
Пример #5
0
def highlight_cells(cells, ether_url, ether_doc_name):
    old_ether = get_socialcalc(ether_url, ether_doc_name)
    old_ether_lines = old_ether.splitlines()
    new_ether_lines = []

    old_color_numbers = []
    new_color_number = '1'
    for line in old_ether_lines:
        color_line = re.match(r'color:(\d+):(rgb.*$)', line)
        if color_line is not None:
            if color_line.group(2) == 'rgb(242, 242, 142)':
                old_color_numbers.append(color_line.group(1))
            else:
                new_color_number = str(1 + int(color_line.group(1)))
    if len(old_color_numbers) > 0:
        new_color_number = old_color_numbers[0]

    for line in old_ether_lines:

        parts = line.split(":")
        # Check for pure formatting cells, e.g. cell:K15:f:1
        if len(parts) == 4:
            if parts[2] == "f":  # Pure formatting cell, no content
                continue

        parsed_cell = re.match(r'cell:([A-Z]+)(\d+)(:.*)$', line)
        if parsed_cell is not None:
            col = parsed_cell.group(1)
            row = parsed_cell.group(2)
            col_row = col + row
            other = parsed_cell.group(3)
            bg = re.search(r':bg:(\d+)($|:)', other)
            if bg is not None:
                bg = bg.group(1)
            span = parts[-1] if "rowspan:" in line else "1"

            spanned_rows = [col + str(int(row) + x) for x in range(int(span))]
            highlighted_spanned_rows = [x for x in spanned_rows if x in cells]
            if len(highlighted_spanned_rows) > 0:
                if bg is not None:
                    if bg != new_color_number:
                        new_line = re.sub(r':bg:' + bg,
                                          r':bg:' + new_color_number, line)
                    else:
                        new_line = line
                else:
                    new_line = line + ':bg:' + new_color_number
            else:
                if bg is not None:
                    if bg in old_color_numbers:
                        new_line = re.sub(r':bg:' + bg, r'', line)
                    else:
                        new_line = line
                else:
                    new_line = line
            new_ether_lines.append(new_line)
        elif re.match(r'sheet:', line) is not None:
            new_ether_lines.append(line)
            if new_color_number not in old_color_numbers:
                new_ether_lines.append('color:' + new_color_number +
                                       ':rgb(242, 242, 142)')
        else:
            new_ether_lines.append(line)

    new_ether = '\n'.join(new_ether_lines)
    make_spreadsheet(new_ether, ether_url + "_/" + ether_doc_name,
                     "socialcalc")
Пример #6
0
def load_page(user, admin, theform):
    global ether_url
    global code_2fa

    if theform.getvalue("2fa"):
        code_2fa = theform.getvalue("2fa")
    else:
        code_2fa = ""
    max_id = generic_query("SELECT MAX(id) AS max_id FROM docs", "")[0][0]
    if not max_id:  # This is for the initial case after init db
        max_id = 0
    text_content = ""
    repo_name = ""
    corpus = ""
    status = ""
    assignee = ""
    mode = "xml"
    schema = ""
    doc_id = ""  # Should only remain so if someone navigated directly to editor.py
    docname = ""
    old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = [
        "", "", "", "", "", "", ""
    ]

    if int(admin) > 0:
        git_username, git_token, git_2fa = get_git_credentials(
            user, admin, code_2fa)
    else:
        git_username, git_token, git_2fa = (None, None, None)

    # dict of variables we'll need to render the html
    render_data = {}

    if theform.getvalue('id'):
        doc_id = theform.getvalue('id')
        if int(doc_id) > int(max_id):
            # Creating new doc case, assign some default values
            docname = "new_document"
            repo_name = "account/repo_name"
            status = "editing"
            assignee = "default_user"
            corpus = "default_corpus"
            schema = ""
            text_content = ""
            # If one of the four forms is edited or we're cloning a doc, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc)
            if theform.getvalue('edit_docname') and user != "demo":
                if docname != 'new_document':
                    if doc_id > max_id:
                        create_document(doc_id, docname, corpus, status,
                                        assignee, repo_name, text_content)
                        max_id = doc_id
                    else:
                        update_docname(doc_id, docname)

            if theform.getvalue('edit_filename') and user != "demo":
                repo_name = theform.getvalue('edit_filename')
                if repo_name != 'account/repo_name':
                    if doc_id > max_id:
                        create_document(doc_id, docname, corpus, status,
                                        assignee, repo_name, text_content)
                        max_id = doc_id
                    else:
                        update_filename(doc_id, repo_name)

            if theform.getvalue('edit_corpusname') and user != "demo":
                corpus = theform.getvalue('edit_corpusname')
                if corpus != 'default_corpus':
                    if doc_id > max_id:
                        create_document(doc_id, docname, corpus, status,
                                        assignee, repo_name, text_content)
                        max_id = doc_id
                    else:
                        update_corpus(doc_id, corpus)

            if theform.getvalue('edit_status') and user != "demo":
                status = theform.getvalue('edit_status')
                if status != 'editing':
                    if doc_id > max_id:
                        create_document(doc_id, docname, corpus, status,
                                        assignee, repo_name, text_content)
                        max_id = doc_id
                    else:
                        update_status(doc_id, status)

            if theform.getvalue('edit_assignee') and user != "demo":
                assignee = theform.getvalue('edit_assignee')
                if assignee != "default_user":
                    if doc_id > max_id:
                        create_document(doc_id, docname, corpus, status,
                                        assignee, repo_name, text_content)
                        max_id = doc_id
                    else:
                        update_assignee(doc_id, assignee)

            # cloning metadata from an existing doc into a new doc
            if theform.getvalue('source_doc'):
                source_meta = get_doc_meta(theform.getvalue('source_doc'))
                if doc_id > max_id:
                    create_document(doc_id, docname, corpus, status, assignee,
                                    repo_name, text_content)
                    max_id = doc_id
                for meta in source_meta:
                    m_key, m_val = meta[2:4]
                    save_meta(int(doc_id), m_key.decode("utf8"),
                              m_val.decode("utf8"))
                    cache.invalidate_by_doc(doc_id, "meta")

        else:
            # Get previous values from DB
            old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema = get_doc_info(
                doc_id)
            # Assume new values are same, overwrite with different form values and update DB if new values found
            docname, corpus, repo_name, status, assignee, mode, schema = old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode, old_schema
            docname = old_docname

            # Handle switch to spreadsheet mode if NLP spreadsheet service is called
            if theform.getvalue(
                    'nlp_spreadsheet'
            ) == "do_nlp_spreadsheet" and mode == "xml" and user != "demo":
                data_to_process = generic_query(
                    "SELECT content FROM docs WHERE id=?", (doc_id, ))[0][0]
                api_call = spreadsheet_nlp_api
                if api_call != "":
                    nlp_user, nlp_password = get_nlp_credentials()
                    data = {
                        "data": data_to_process,
                        "lb": "line",
                        "format": "sgml_no_parse"
                    }
                    resp = requests.post(api_call,
                                         data,
                                         auth=HTTPBasicAuth(
                                             nlp_user, nlp_password))
                    sgml = resp.text.encode("utf8")
                else:
                    sgml = data_to_process.encode("utf8")
                out, err = make_spreadsheet(
                    sgml, ether_url + "_/gd_" + corpus + "_" + docname, "sgml")
                mode = "ether"

            # handle copying metadata
            if theform.getvalue('source_doc'):
                source_meta = get_doc_meta(theform.getvalue('source_doc'))
                existing_meta_keys = [x[2] for x in get_doc_meta(doc_id)]
                # don't overwrite existing keys
                meta_to_write = [
                    x for x in source_meta if x[2] not in existing_meta_keys
                ]
                for meta in meta_to_write:
                    m_key, m_val = meta[2], meta[3]
                    save_meta(int(doc_id), m_key, m_val)
                    cache.invalidate_by_doc(doc_id, "meta")

    if theform.getvalue('edit_docname'):
        docname = theform.getvalue('edit_docname')
    elif old_docname != "":
        docname = old_docname
    if theform.getvalue('edit_corpusname'):
        corpus = theform.getvalue('edit_corpusname')
    elif old_corpus != "":
        corpus = old_corpus

    if theform.getvalue('id'):
        if int(doc_id) <= int(max_id):
            # After clicking edit in landing page, editing existing doc case, get the values from the db. pull the content from db to be displayed in the editor window.
            if theform.getvalue('edit_docname'):
                docname = theform.getvalue('edit_docname')
                if docname != old_docname and user != "demo":
                    update_docname(doc_id, docname)
            if theform.getvalue('edit_filename'):
                repo_name = theform.getvalue('edit_filename')
                if repo_name != old_repo and user != "demo":
                    update_filename(doc_id, repo_name)
            if theform.getvalue('edit_corpusname'):
                corpus = theform.getvalue('edit_corpusname')
                if corpus != old_corpus and user != "demo":
                    update_corpus(doc_id, corpus)
            if theform.getvalue('edit_status'):
                status = theform.getvalue('edit_status')
                if status != old_status and user != "demo":
                    update_status(doc_id, status)
            if theform.getvalue('edit_assignee'):
                assignee = theform.getvalue('edit_assignee')
                if assignee != old_assignee and user != "demo":
                    update_assignee(doc_id, assignee)
            if theform.getvalue('edit_mode'):
                mode = theform.getvalue('edit_mode')
                if mode != old_mode and user != "demo":
                    update_mode(doc_id, mode)
            if theform.getvalue(
                    'nlp_spreadsheet'
            ) == "do_nlp_spreadsheet":  # mode has been changed to spreadsheet via NLP
                update_mode(doc_id, "ether")
                mode = "ether"
            if old_docname != docname or old_corpus != corpus:
                old_sheet_name = "gd" + "_" + old_corpus + "_" + old_docname
                if sheet_exists(ether_url, old_sheet_name
                                ):  # Check if there is an ether sheet to copy
                    old_socialcalc = get_socialcalc(ether_url, old_sheet_name)
                    out, err = make_spreadsheet(
                        old_socialcalc,
                        ether_url + "_/gd_" + corpus + "_" + docname,
                        "socialcalc")
                    if out == "OK":
                        delete_spreadsheet(ether_url, old_sheet_name)

            text_content = generic_query("SELECT content FROM docs WHERE id=?",
                                         (doc_id, ))[0][0]

    # In the case of reloading after hitting 'save', either create new doc into db, or update db
    # CodeMirror sends the form with its code content in it before 'save' so we just display it again
    if theform.getvalue('code'):
        text_content = theform.getvalue('code')
        text_content = text_content.replace("\r", "")
        text_content = re.sub(r'&(?!amp;)', r'&amp;',
                              text_content)  # Escape unescaped XML &
        text_content = unicode(text_content.decode("utf8"))
        if user != "demo":
            if int(doc_id) > int(max_id):
                create_document(doc_id, docname, corpus, status, assignee,
                                repo_name, text_content)
            else:
                save_changes(doc_id, text_content)
                cache.invalidate_by_doc(doc_id, "xml")

    git_status = False

    commit_message = ""
    if theform.getvalue('commit_msg'):
        commit_message = theform.getvalue('commit_msg')

    if theform.getvalue('push_git') == "push_git":
        repo_name = generic_query("SELECT filename FROM docs WHERE id=?",
                                  (doc_id, ))[0][0]
        file_name = generic_query("SELECT name FROM docs WHERE id=?",
                                  (doc_id, ))[0][0]
        repo_info = repo_name.split('/')
        git_account, git_repo = repo_info[0], repo_info[1]
        if len(repo_info) > 2:
            subdir = '/'.join(repo_info[2:]) + "/"
        else:
            subdir = ""

        # The user will indicate the subdir in the repo_name stored in the db.
        # Therefore, a file may be associated with the target repo subdir zangsir/coptic-xml-tool/uploaded_commits,
        # and that is fine, but we will need to make this uploaded_commits subdir first to create our file.
        if not os.path.isdir(prefix + subdir) and subdir != "":
            dirs = subdir.split(os.sep)[:-1]
            path_so_far = ""
            for dir in dirs:
                if not os.path.isdir(prefix + path_so_far + dir + os.sep):
                    os.mkdir(prefix + path_so_far + dir + os.sep, 0755)
                path_so_far += dir + os.sep

        if mode == "xml":
            text_content = generic_query("SELECT content FROM docs WHERE id=?",
                                         (doc_id, ))[0][0]
            serializable_content = build_meta_tag(
                doc_id) + text_content.strip() + "\n</meta>\n"
            serializable_content = serializable_content.encode('utf8')
            file_name = file_name.replace(" ", "_") + ".xml"
        else:  # (mode == "ether")
            text_content = ether_to_sgml(
                get_socialcalc(ether_url, "gd" + "_" + corpus + "_" + docname),
                doc_id)
            serializable_content = text_content
            file_name = file_name.replace(" ", "_") + "_ether.sgml"
        saved_file = subdir + file_name
        serialize_file(serializable_content, saved_file)
        git_status = push_update_to_git(git_username, git_token, saved_file,
                                        git_account, git_repo, commit_message)

        # File system cleanup
        if subdir == "":
            # Delete a file
            os.remove(prefix + file_name)
        else:
            # Delete a subdirectory
            shutil.rmtree(prefix + subdir)

    if theform.getvalue('nlp_xml') == "do_nlp_xml" and mode == "xml":
        api_call = xml_nlp_api
        if api_call != "":
            nlp_user, nlp_password = get_nlp_credentials()
            data = {"data": text_content, "format": "pipes"}
            resp = requests.post(api_call,
                                 data,
                                 auth=HTTPBasicAuth(nlp_user, nlp_password))
            text_content = resp.text

    # Editing options
    # Docname
    # Filename
    status_list = open(prefix + "status.tab").read().replace("\r",
                                                             "").split("\n")
    render_data['status_options'] = [{
        'text': x,
        'selected': x == status
    } for x in status_list]
    render_data['assignee_options'] = [{
        'text': x,
        'selected': x == assignee
    } for x in get_user_list()]
    render_data['mode_options'] = [{
        'text': x,
        'selected': x == mode
    } for x in ["xml", "ether"]]
    render_data['nlp_service'] = {
        'xml_button_html': xml_nlp_button.decode("utf8"),
        'spreadsheet_button_html': spreadsheet_nlp_button.decode("utf8"),
        'disabled': user == "demo" or mode == "ether"
    }
    render_data['git_2fa'] = git_2fa == "true"
    if git_status:
        render_data['git_commit_response'] = git_status.replace('<',
                                                                '').replace(
                                                                    '>', '')

    # prepare embedded editor html
    if mode == "ether":
        render_data['ether_mode'] = True
        ether_url += "gd_" + corpus + "_" + docname
        render_data['ether_url'] = ether_url
        render_data['ether_stylesheets'] = get_ether_stylesheets()

        if "file" in theform and user != "demo":
            fileitem = theform["file"]
            if len(fileitem.filename) > 0:
                #  strip leading path from file name to avoid directory traversal attacks
                fn = os.path.basename(fileitem.filename)
                if fn.endswith(".xls") or fn.endswith(".xlsx"):
                    make_spreadsheet(
                        fileitem.file.read(),
                        "https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_"
                        + corpus + "_" + docname, "excel")
                else:
                    sgml = fileitem.file.read()
                    meta_key_val = harvest_meta(sgml)
                    make_spreadsheet(
                        sgml,
                        "https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_"
                        + corpus + "_" + docname)
                    for (key, value) in iteritems(meta_key_val):
                        key = key.replace("@", "_")
                        save_meta(int(doc_id), key.decode("utf8"),
                                  value.decode("utf8"))
                        cache.invalidate_by_doc(doc_id, "meta")
    else:
        render_data['ether_mode'] = False

    # stop here if no doc selected
    if doc_id:
        render_data['doc_is_selected'] = len(doc_id) != 0
    else:
        return render("editor", render_data)

    render_data['id'] = doc_id
    render_data['mode'] = mode
    render_data['schema'] = schema
    render_data['docname'] = docname
    render_data['corpusname'] = corpus

    render_data['text_content'] = text_content
    render_data['repo'] = repo_name

    render_data["admin_gt_zero"] = int(admin) > 0
    render_data["admin_eq_three"] = admin == "3"

    # handle clone meta button, and allow github pushing
    if int(admin) > 0:
        doc_list = generic_query(
            "SELECT id,corpus,name,status,assignee_username,mode FROM docs ORDER BY corpus, name COLLATE NOCASE",
            ())
        render_data["docs"] = []
        for doc in doc_list:
            doc_vars = {}
            doc_vars["id"] = str(doc[0])
            doc_vars["corpus"] = doc[1]
            doc_vars["name"] = doc[2]
            render_data['docs'].append(doc_vars)

    render_data["can_save"] = not (int(admin) < 3)
    render_data["editor_help_link_html"] = editor_help_link
    render_data["first_load"] = len(theform.keys()) == 1

    return render("editor", render_data)
Пример #7
0
def load_page(user,admin,theform):
	global ether_url
	max_id = generic_query("SELECT MAX(id) AS max_id FROM docs","")[0][0]
	if not max_id:  # This is for the initial case after init db
		max_id = 0
	text_content = ""
	repo_name = ""
	corpus = ""
	status = ""
	assignee = ""
	mode = "xml"
	doc_id = ""  # Should only remain so if someone navigated directly to editor.py
	docname = ""
	mymsg = ""
	old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode = ["","","","","",""]

	if theform.getvalue('id'):
		doc_id = theform.getvalue('id')
		if int(doc_id) > int(max_id):
			# Creating new doc case, assign some default values
			docname = "new_document"
			repo_name = "account/repo_name"
			status = "editing"
			assignee = "default_user"
			corpus = "default_corpus"
			text_content = ""
			# If one of the four forms is edited, then we create the doc, otherwise nothing happens (user cannot fill in nothing and create the doc)
			if theform.getvalue('edit_docname'):
				if docname != 'new_document':
					if doc_id > max_id:
						create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content)
						max_id = doc_id
					else:
						update_docname(doc_id, docname)

			if theform.getvalue('edit_filename'):
				repo_name = theform.getvalue('edit_filename')
				if repo_name != 'account/repo_name':
					if doc_id > max_id:
						create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content)
						max_id = doc_id
					else:
						update_filename(doc_id, repo_name)

			if theform.getvalue('edit_corpusname'):
				corpus = theform.getvalue('edit_corpusname')
				if corpus != 'default_corpus':
					if doc_id > max_id:
						create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content)
						max_id = doc_id
					else:
						update_corpus(doc_id, corpus)

			if theform.getvalue('edit_status'):
				status = theform.getvalue('edit_status')
				if status != 'editing':
					if doc_id > max_id:
						create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content)
						max_id = doc_id
					else:
						update_status(doc_id, status)

			if theform.getvalue('edit_assignee'):
				assignee = theform.getvalue('edit_assignee')
				if assignee != "default_user":
					if doc_id > max_id:
						create_document(doc_id, docname, corpus, status, assignee, repo_name, text_content)
						max_id = doc_id
					else:
						update_assignee(doc_id, assignee)
		else:
			# Get previous values from DB
			old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode = get_doc_info(doc_id)
			# Assume new value are same, overwrite with different form values and update DB if new values found
			docname, corpus, repo_name, status, assignee, mode = old_docname, old_corpus, old_repo, old_status, old_assignee, old_mode
			docname = old_docname

	if theform.getvalue('edit_docname'):
		docname = theform.getvalue('edit_docname')
	elif old_docname != "":
		docname = old_docname
	if theform.getvalue('edit_corpusname'):
		corpus = theform.getvalue('edit_corpusname')
	elif old_corpus != "":
		corpus = old_corpus

	if theform.getvalue('id'):
		if int(doc_id) <= int(max_id):
		# After clicking edit in landing page, editing existing doc case, get the values from the db. pull the content from db to be displayed in the editor window.
			if theform.getvalue('edit_docname'):
				docname = theform.getvalue('edit_docname')
				if docname != old_docname:
					update_docname(doc_id,docname)
			if theform.getvalue('edit_filename'):
				repo_name=theform.getvalue('edit_filename')
				if repo_name != old_repo:
					update_filename(doc_id,repo_name)
			if theform.getvalue('edit_corpusname'):
				corpus = theform.getvalue('edit_corpusname')
				if corpus != old_corpus:
					update_corpus(doc_id,corpus)
			if theform.getvalue('edit_status'):
				status = theform.getvalue('edit_status')
				if status != old_status:
					update_status(doc_id,status)
			if theform.getvalue('edit_assignee'):
				assignee = theform.getvalue('edit_assignee')
				if assignee != old_assignee:
					update_assignee(doc_id,assignee)
			if theform.getvalue('edit_mode'):
				mode = theform.getvalue('edit_mode')
				if mode != old_mode:
					update_mode(doc_id,mode)
			if old_docname != docname or old_corpus != corpus:
				old_sheet_name = "gd" + "_" + old_corpus + "_" + old_docname
				if sheet_exists(ether_url, old_sheet_name):  # Check if there is an ether sheet to copy
					old_socialcalc = get_socialcalc(ether_url, old_sheet_name)
					out, err = make_spreadsheet(old_socialcalc, ether_url + "_/gd_" + corpus + "_" + docname, "socialcalc")
					if out == "OK":
						out, err = delete_spreadsheet(ether_url,old_sheet_name)
					else:
						mymsg += "out was: " + out + " err was" + err

			text_content = generic_query("SELECT content FROM docs WHERE id=?",(doc_id,))[0][0]

	# In the case of reloading after hitting 'save', either create new doc into db, or update db
	# CodeMirror sends the form with its code content in it before 'save' so we just display it again
	if theform.getvalue('code'):
		text_content = theform.getvalue('code')
		text_content = text_content.replace("\r","")
		text_content = unicode(text_content.decode("utf8"))
		if int(doc_id)>int(max_id):
			create_document(doc_id, docname,corpus,status,assignee,repo_name,text_content)
		else:
			save_changes(doc_id,text_content)

	git_status=False

	if theform.getvalue('commit_msg'):
		commit_message = theform.getvalue('commit_msg')

	if theform.getvalue('push_git') == "push_git" and mode == "xml":
		text_content = generic_query("SELECT content FROM docs WHERE id=?", (doc_id,))[0][0]
		repo_name = generic_query("SELECT filename FROM docs WHERE id=?", (doc_id,))[0][0]
		file_name = generic_query("SELECT name FROM docs WHERE id=?", (doc_id,))[0][0]
		file_name = file_name.replace(" ","_") + ".xml"
		repo_info = repo_name.split('/')
		git_account, git_repo = repo_info[0], repo_info[1]
		if len(repo_info)>2:
			subdir = '/'.join(repo_info[2:]) + "/"
		else:
			subdir = ""
		if not os.path.isdir(prefix+subdir) and subdir != "":
			os.mkdir(prefix+subdir, 0755)

		# The user will indicate the subdir in the repo_name stored in the db.
		# Therefore, a file may be associated with the target repo subdir zangsir/coptic-xml-tool/uploaded_commits,
		# and that is fine, but we will need to make this uploaded_commits subdir first to create our file.
		saved_file = subdir + file_name
		serialize_file (text_content,saved_file)
		git_username,git_password=get_git_credentials(user,admin)
		git_status = push_update_to_git(git_username, git_password, saved_file, git_account, git_repo, commit_message)
		if subdir == "":
			# Delete a file
			os.remove(prefix+file_name)
		else:
			shutil.rmtree(prefix+subdir)
	
	if theform.getvalue('nlp_service') == "do_nlp" and mode == "xml":
		api_call="https://corpling.uis.georgetown.edu/coptic-nlp/api?data=%s&lb=line&format=pipes" %text_content
		resp = requests.get(api_call, auth=HTTPBasicAuth('coptic_client', 'kz7hh2'))
		text_content=resp.text


	# Editing options
	# Docname
	# Filename
	push_git = """<input type="hidden" name="push_git" id="push_git" value="">
	<input type="text" name="commit_msg" placeholder = "commit message here" style="width:140px">
	<div name="push_git" class="button" onclick="document.getElementById('push_git').value='push_git'; document.getElementById('editor_form').submit();"> <i class="fa fa-github"></i> Commit </div>
	"""

	if git_status:
		# Remove some html keyword symbols in the commit message returned by github3
		push_msg=git_status.replace('<','')
		push_msg=push_msg.replace('>','')
		push_git+="""<p style='color:red;'>""" + push_msg + ' successful' + """</p>"""

	status_list = open(prefix+"status.tab").read().replace("\r","").split("\n")

	options = ""
	for stat in status_list:
		options +='<option value="'+stat+'">'+stat+'</option>\n'
	options = options.replace('">'+status, '" selected="selected">'+status)

	edit_status="""<select name="edit_status" onchange='this.form.submit()'>"""

	edit_status += options+"</select>"

	# Get user_list from the logintools
	user_list=[]
	scriptpath = os.path.dirname(os.path.realpath(__file__)) + os.sep
	userdir = scriptpath + "users" + os.sep

	userfiles = [ f for f in listdir(userdir) if isfile(join(userdir,f)) ]
	for userfile in sorted(userfiles):
		if userfile != "config.ini" and userfile != "default.ini" and userfile != "admin.ini" and userfile.endswith(".ini"):
			userfile = userfile.replace(".ini","")
			user_list.append(userfile)

	edit_assignee="""<select name="edit_assignee" onchange="this.form.submit()">"""
	for user in user_list:
		assignee_select=""
		user_name=user
		if user_name==assignee:
			assignee_select="selected"
		edit_assignee+="""<option value='""" + user_name + "' %s>" + user_name + """</option>""" 
		edit_assignee=edit_assignee%assignee_select
	edit_assignee+="</select>"

	edit_mode = '''<select name="edit_mode" onchange="this.form.submit()">\n<option value="xml">xml</option>\n<option value="ether">spreadsheet</option>\n</select>'''
	edit_mode = edit_mode.replace(mode+'"', mode+'" selected="selected"')

	# Metadata
	if theform.getvalue('metakey'):
		metakey = theform.getvalue('metakey')
		metavalue = theform.getvalue('metavalue')
		save_meta(doc_id,metakey,metavalue)
	if theform.getvalue('metaid'):
		metaid = theform.getvalue('metaid')
		delete_meta(metaid)
	metadata = print_meta(doc_id)

	nlp_service = """

	<div class="button" name="nlp_button" onclick="document.getElementById('nlp_service').value='do_nlp'; document.getElementById('editor_form').submit();"> <i class="fa fa-cogs"></i> NLP </div>


	"""


	page= "Content-type:text/html\r\n\r\n"
	#page += str(theform)
	page += urllib.urlopen(prefix + "templates" + os.sep + "editor.html").read()

	page += mymsg
	if mode == "ether":
		embedded_editor = urllib.urlopen(prefix + "templates" + os.sep + "ether.html").read()
		ether_url += "gd_" + corpus + "_" + docname

		if "file" in theform:
			fileitem = theform["file"]
			if len(fileitem.filename) > 0:
				#  strip leading path from file name to avoid directory traversal attacks
				fn = os.path.basename(fileitem.filename)
				msg = 'The file "' + fn + '" was uploaded successfully'
				if fn.endswith(".xls") or fn.endswith(".xlsx"):
					make_spreadsheet(fileitem.file.read(),"https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname,"excel")
				else:
					sgml = fileitem.file.read()
					meta_key_val = harvest_meta(sgml)
					make_spreadsheet(sgml,"https://*****:*****@corpling.uis.georgetown.edu/ethercalc/_/gd_" + corpus + "_" + docname)
					for key, value in meta_key_val.iteritems():
						key = key.replace("@","_")
						save_meta(doc_id,key,value)
		else:
			msg = "no file was uploaded"

		embedded_editor = embedded_editor.replace("**source**",ether_url)
	else:
		embedded_editor = urllib.urlopen(prefix + "templates" + os.sep + "codemirror.html").read()

	page = page.replace("**embedded_editor**",embedded_editor)

	if len(doc_id) == 0:
		exp = re.compile(r"<article>.*</article>",re.DOTALL)
		page = exp.sub("""<h2>No document selected | <a href="index.py">back to document list</a> </h2>""",page)
	else:
		page=page.replace("**content**",text_content)
		page=page.replace("**docname**",docname)
		page=page.replace("**corpusname**",corpus)
		page=page.replace("**edit_status**",edit_status)
		page=page.replace("**repo**",repo_name)
		page=page.replace("**edit_assignee**",edit_assignee)
		page=page.replace("**edit_mode**",edit_mode)
		page=page.replace("**metadata**",metadata)
		page=page.replace("**NLP**",nlp_service)
		page=page.replace("**id**",doc_id)
		if int(admin)>0:
			page=page.replace("**github**",push_git)
		else:
			page = page.replace("**github**", '')

	return page