def get_file_tree(project, file_path=None): """ Get a file listing from the git remote """ config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 # Fetch changes (to update index) but don't merge, and then run ls-files to get file listing. try: if not is_a_test(project): run_git_command(project, ["fetch"]) if file_path is None: output = run_git_command(project, ["ls-files"]) else: output = run_git_command(project, ["ls-files", file_path]) file_listing = [ s.strip().decode('utf-8', 'ignore') for s in output.splitlines() ] except subprocess.CalledProcessError as e: return jsonify({ "msg": "Git file listing failed.", "reason": str(e.output) }), 500 tree = path_list_to_tree(file_listing) return jsonify(tree)
def get_gallery_image(project, collection_id, file_name): logger.info("Getting galleries") try: project_id = get_project_id_from_name(project) config = get_project_config(project) connection = db_engine.connect() sql = sqlalchemy.sql.text( "SELECT image_path as image_path from media_collection WHERE project_id = :p_id AND id = :id " ).bindparams(p_id=project_id, id=collection_id) result = connection.execute(sql).fetchone() result = dict(result) connection.close() file_path = safe_join(config["file_root"], "media", str(result['image_path']), "{}".format(str(file_name))) try: output = io.BytesIO() with open(file_path, mode="rb") as img_file: output.write(img_file.read()) content = output.getvalue() output.close() return Response(content, status=200, content_type="image/jpeg") except Exception: logger.exception(f"Failed to read from image file at {file_path}") return Response("File not found: " + file_path, status=404, content_type="text/json") except Exception: logger.exception("Failed to parse gallery image request.") return Response("Couldn't get gallery file.", status=404, content_type="text/json")
def get_title(project, collection_id, publication_id, lang="swe"): """ Get title page for a given publication @TODO: remove publication_id, it is not needed? """ config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: can_show, message = get_collection_published_status( project, collection_id) if can_show: logger.info("Getting XML for {} and transforming...".format( request.full_path)) version = "int" if config["show_internally_published"] else "ext" # TODO get original_filename from publication_collection_title table? how handle language/version filename = "{}_tit_{}_{}.xml".format(collection_id, lang, version) xsl_file = "title.xsl" content = get_content(project, "tit", filename, xsl_file, None) data = { "id": "{}_{}_tit".format(collection_id, publication_id), "content": content.replace(" id=", " data-id=") } return jsonify(data), 200 else: return jsonify({ "id": "{}_{}".format(collection_id, publication_id), "error": message }), 403
def get_song_file(project, file_type, file_name): """ Retrieve a single file from project root that belongs to a song It can be musicxml, midi """ config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 file_path = "" if 'musicxml' in str(file_type): file_path = safe_join(config["file_root"], "musicxml", "{}.xml".format(str(file_name))) file_name = "{}.xml".format(str(file_name)) elif 'midi' in str(file_type): file_path = safe_join(config["file_root"], "midi-files", "{}.mid".format(str(file_name))) try: return send_file(file_path, as_attachment=True, mimetype='application/octet-stream', attachment_filename=file_name) except Exception: logger.exception(f"Failed sending file from {file_path}") return Response("File not found.", status=404, content_type="text/json")
def file_exists_in_file_root(project, file_path): """ Check if the given file exists in the webfiles repository for the given project Returns True if the file exists, otherwise False. """ config = get_project_config(project) if config is None: return False return os.path.exists(safe_join(config["file_root"], file_path))
def is_a_test(project): """ Returns true if running in debug mode and project git_repository not configured, indicating that this is a test """ config = get_project_config(project) if config is None and int(os.environ.get("FLASK_DEBUG", 0)) == 1: return True elif config is not None and config["git_repository"] is None and int( os.environ.get("FLASK_DEBUG", 0)) == 1: return True
def update_config(project): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: request_data = request.get_json() file_path = os.path.join(config["file_root"], "config.json") with open(file_path, "w") as f: json.dump(request_data, f) return jsonify({"msg": "received"})
def get_config_file(project): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: file_path = os.path.join(config["file_root"], "config.json") if not os.path.exists(file_path): return jsonify({}) with open(file_path) as f: json_data = json.load(f) return jsonify(json_data)
def run_git_command(project, command): """ Helper method to run arbitrary git commands as if in the project's webfiles repository root folder @type project: str @type command: list """ config = get_project_config(project) git_root = config["file_root"] git_command = ["git", "-C", git_root] for c in command: git_command.append(c) return subprocess.check_output(git_command, stderr=subprocess.STDOUT)
def check_project_config(project): """ Check the config file for project webfiles repository configuration. Returns True if config okay, otherwise False and a message """ config = get_project_config(project) if config is None: return False, "Project config not found." if not is_a_test(project) and "git_repository" not in config: return False, "git_repository not in project config." if "git_branch" not in config: return False, "git_branch information not in project config." if "file_root" not in config: return False, "file_root information not in project config." return True, "Project config OK."
def get_pdf_file(project, collection_id, file_type, download_name, use_download_name): """ Retrieve a single file from project root Currently only PDF or ePub """ config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 connection = db_engine.connect() # Check that the collection exists statement = sqlalchemy.sql.text( "SELECT * FROM publication_collection WHERE id=:coll_id").bindparams( coll_id=collection_id) row = connection.execute(statement).fetchone() if row is None: return jsonify({ "msg": "Desired publication collection was not found in database!" }), 404 file_path = "" if use_download_name and 'pdf' in str(file_type): if '.pdf' in str(download_name): direct_download_name = download_name.split('.pdf')[0] else: direct_download_name = download_name file_path = safe_join(config["file_root"], "downloads", collection_id, "{}.pdf".format(direct_download_name)) elif 'pdf' in str(file_type): file_path = safe_join(config["file_root"], "downloads", collection_id, "{}.pdf".format(int(collection_id))) elif 'epub' in str(file_type): file_path = safe_join(config["file_root"], "downloads", collection_id, "{}.epub".format(int(collection_id))) connection.close() try: return send_file(file_path, attachment_filename=download_name, conditional=True) except Exception: logger.exception(f"Failed sending file from {file_path}") return Response("File not found.", status=404, content_type="text/json")
def get_static_pages_as_json(project, language): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: logger.info( "Getting static content from /{}/static-pages-toc/{}".format( project, language)) folder_path = safe_join(config["file_root"], "md", language) if os.path.exists(folder_path): data = path_hierarchy(project, folder_path, language) return jsonify(data), 200 else: logger.info("did not find {}".format(folder_path)) abort(404)
def get_json_file(project, folder, file_name): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: file_path = safe_join(config["file_root"], folder, "{}.json".format(str(file_name))) try: with open(file_path) as f: data = json.load(f) return jsonify(data), 200 except Exception: logger.exception(f"Failed to read JSON file at {file_path}") return Response("File not found.", status=404, content_type="text/json")
def get_html_contents_as_json(project, filename): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: logger.info("Getting static content from /{}/html/{}".format( project, filename)) file_path = safe_join(config["file_root"], "html", "{}.html".format(filename)) if os.path.exists(file_path): with io.open(file_path, encoding="UTF-8") as html_file: contents = html_file.read() data = {"filename": filename, "content": contents} return jsonify(data), 200 else: abort(404)
def get_type_gallery_image(project, connection_type, connection_id): logger.info("Getting gallery file") if connection_type not in ['tag', 'location', 'subject']: return Response("Couldn't get media connection data.", status=404, content_type="text/json") type_column = "{}_id".format(connection_type) try: project_id = get_project_id_from_name(project) config = get_project_config(project) connection = db_engine.connect() sql = f"SELECT mcol.image_path, m.image_filename_front FROM media_connection mcon " \ f"JOIN {connection_type} t ON t.id = mcon.{type_column} " \ f"JOIN media m ON m.id = mcon.media_id " \ f"JOIN media_collection mcol ON mcol.id = m.media_collection_id " \ f"WHERE t.id = :id " \ f"AND t.project_id = :p_id " \ f"AND mcol.deleted != 1 AND t.deleted != 1 AND m.deleted != 1 AND mcon.deleted != 1 LIMIT 1" sql = sqlalchemy.sql.text(sql).bindparams(p_id=project_id, id=connection_id) result = connection.execute(sql).fetchone() result = dict(result) connection.close() file_path = safe_join( config["file_root"], "media", str(result['image_path']), str(result['image_filename_front']).replace(".jpg", "_thumb.jpg")) try: output = io.BytesIO() with open(file_path, mode="rb") as img_file: output.write(img_file.read()) content = output.getvalue() output.close() return Response(content, status=200, content_type="image/jpeg") except Exception: logger.exception(f"Failed to read from image file at {file_path}") return Response("File not found: " + file_path, status=404, content_type="text/json") except Exception: logger.exception("Failed to parse gallery image request.") return Response("Couldn't get type file.", status=404, content_type="text/json")
def get_collections(project): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: logger.info("Getting collections /{}/collections".format(project)) connection = db_engine.connect() status = 1 if config["show_internally_published"] else 2 project_id = get_project_id_from_name(project) sql = sqlalchemy.sql.text( """ SELECT id, name as title, published, date_created, date_modified, date_published_externally, legacy_id, project_id, publication_collection_title_id, publication_collection_introduction_id, name FROM publication_collection WHERE project_id = :p_id AND published>=:p_status ORDER BY name """ ) statement = sql.bindparams(p_status=status, p_id=project_id) results = [] for row in connection.execute(statement).fetchall(): results.append(dict(row)) connection.close() return jsonify(results)
def update_files_in_git_repo(project, specific_file=False): """ Helper method to sync local repositories with remote to get latest changes """ config = get_project_config(project) if config is None: return False, "No such project." git_branch = config["git_branch"] # First, fetch latest changes from remote, but don't update local try: run_git_command(project, ["fetch"]) except subprocess.CalledProcessError as e: return False, str(e.output) if not specific_file: # If we're updating all files, get the list of changed files and then merge in remote changes to local repo try: output = run_git_command(project, [ "show", "--pretty=format:", "--name-only", "..origin/{}".format(git_branch) ]) new_and_changed_files = [ s.strip().decode('utf-8', 'ignore') for s in output.splitlines() ] except subprocess.CalledProcessError as e: return False, str(e.output) try: run_git_command(project, ["merge", "origin/{}".format(git_branch)]) except subprocess.CalledProcessError as e: return False, str(e.output) return True, new_and_changed_files else: # If we're only updating one file, checkout that specific file, ignoring the others # This makes things go faster if we're not concerned with the changes in other files at the moment try: run_git_command(project, [ "checkout", "origin/{}".format(git_branch), "--", specific_file ]) except subprocess.CalledProcessError as e: return False, str(e.output) return True, specific_file
def get_md_contents_as_json(project, fileid): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: parts = fileid.split("-") pathTmp = fileid if len(parts) > 4: if "0" in parts[4]: pathTmp = parts[0] + "-" + parts[1] + "-" + parts[ 2] + "-" + parts[3] + "-" + parts[4] else: pathTmp = parts[0] + "-" + parts[1] + "-" + parts[ 2] + "-0" + parts[4] path = "*/".join(pathTmp.split("-")) + "*" file_path_query = safe_join(config["file_root"], "md", path) try: file_path_full = [f for f in glob.iglob(file_path_query)] if len(file_path_full) <= 0: logger.info( "Not found {} (md_contents fetch)".format(file_path_full)) abort(404) else: file_path = file_path_full[0] logger.info("Finding {} (md_contents fetch)".format(file_path)) if os.path.exists(file_path): with io.open(file_path, encoding="UTF-8") as md_file: contents = md_file.read() data = {"fileid": fileid, "content": contents} return jsonify(data), 200 else: abort(404) except Exception: logger.exception("Error fetching: {}".format(file_path_query)) abort(404)
def get_file(project, file_path): """ Get latest file from git remote """ config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 # TODO swift and/or S3 support for large files (images/facsimiles) config_okay = check_project_config(project) if not config_okay[0]: return jsonify({ "msg": "Error in git configuration, check configuration file.", "reason": config_okay[1] }), 500 if not is_a_test(project): # Sync the desired file from remote repository to local API repository update_repo = update_files_in_git_repo(project, file_path) if not update_repo[0]: return jsonify({ "msg": "Git update failed to execute properly.", "reason": update_repo[1] }), 500 if file_exists_in_file_root(project, file_path): # read file, encode as base64 string and return to user as JSON data. with io.open(safe_join(config["file_root"], file_path), mode="rb") as file: file_bytestring = base64.b64encode(file.read()) return jsonify({ "file": file_bytestring.decode("utf-8"), "filepath": file_path }) else: return jsonify( {"msg": "The requested file was not found in the git repository."}), 404
def get_facsimile_page_image(project, facsimile_type, facs_id, facs_nr): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: logger.info("Getting facsimile page image") try: zoom_level = "4" if facsimile_type == 'facsimile': file_path = safe_join(config["file_root"], "facsimiles", facs_id, zoom_level, "{}.jpg".format(int(facs_nr))) elif facsimile_type == 'song-example': file_path = safe_join(config["file_root"], "song-example-images", facs_id, "{}.jpg".format(int(facs_nr))) else: # TODO placeholder page image file? file_path = "" output = io.BytesIO() try: with open(file_path, mode="rb") as img_file: output.write(img_file.read()) content = output.getvalue() output.close() return Response(content, status=200, content_type="image/jpeg") except Exception: logger.exception(f"Failed to read facsimile page from {file_path}") return Response("File not found: " + file_path, status=404, content_type="text/json") except Exception: logger.exception( f"Failed to interpret facsimile page image request {request.url}") return Response("Couldn't get facsimile page.", status=404, content_type="text/json")
def upload_facsimile_file(project, collection_id, page_number): """ Upload a facsimile file in image format. Endpoint accepts requests with enctype=multipart/form-data Endpoint assumes facsimile is provided as form parameter named 'facsimile' (for example, curl -F 'facsimile=@path/to/local/file' https://api.sls.fi/digitaledition/<project>/facsimiles/<collection_id>/<page_number>) --- First and foremost, only accept images. Reject with 400 anything that allowed_facsimile() doesn't accept. Then, attempt to convert image to 4 different "zoom levels" of .jpg with imagemagick Lastly, store the images in root/facsimiles/<collection_id>/<zoom_level>/<page_number>.jpg Where zoom_level is determined by FACSIMILE_IMAGE_SIZES in generics.py (1-4) """ # TODO OpenStack Swift support for ISILON file storage - config param for root 'facsimiles' path # ensure temporary facsimile upload folder exists os.makedirs(FACSIMILE_UPLOAD_FOLDER, exist_ok=True) config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 if request.files is None: return jsonify({"msg": "Request.files is none!"}), 400 if "facsimile" not in request.files: return jsonify({"msg": "No file provided in request (facsimile)!"}), 400 # get a folder path for the facsimile collection from the database if set, otherwise use project file root connection = db_engine.connect() collection_check_statement = sqlalchemy.sql.text( "SELECT * FROM publication_facsimile_collection WHERE deleted != 1 AND id=:coll_id" ).bindparams(coll_id=collection_id) row = connection.execute(collection_check_statement).fetchone() if row is None: return jsonify( {"msg": "Desired facsimile collection was not found in database!"}), 404 elif row.folder_path != '' and row.folder_path is not None: collection_folder_path = safe_join(row.folder_path, collection_id) else: collection_folder_path = safe_join(config["file_root"], "facsimiles", collection_id) connection.close() # handle received file uploaded_file = request.files["facsimile"] # if user selects no file, some libraries send a POST with an empty file and filename if uploaded_file.filename == "": return jsonify({"msg": "No file provided in uploaded_file.filename!"}), 400 if uploaded_file and allowed_facsimile(uploaded_file.filename): # handle potentially malicious filename and save file to temp folder temp_path = os.path.join(FACSIMILE_UPLOAD_FOLDER, secure_filename(uploaded_file.filename)) uploaded_file.save(temp_path) # resize file using imagemagick resize = convert_resize_uploaded_facsimile(temp_path, collection_folder_path, page_number) if resize: return jsonify({"msg": "OK"}) else: return jsonify({"msg": "Failed to resize uploaded facsimile!"}), 500 else: return jsonify({ "msg": f"Invalid facsimile provided. Allowed filetypes are {ALLOWED_EXTENSIONS_FOR_FACSIMILE_UPLOAD}. TIFF files are preferred." }), 400
def handle_toc(project, collection_id): config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: if request.method == "GET": logger.info( f"Getting table of contents for /{project}/toc/{collection_id}" ) file_path_query = safe_join(config["file_root"], "toc", f'{collection_id}.json') try: file_path = [f for f in glob.iglob(file_path_query)][0] logger.info(f"Finding {file_path} (toc collection fetch)") if os.path.exists(file_path): with io.open(file_path, encoding="UTF-8") as json_file: contents = json_file.read() return contents, 200 else: abort(404) except IndexError: logger.warning(f"File {file_path_query} not found on disk.") abort(404) except Exception: logger.exception(f"Error fetching {file_path_query}") abort(404) elif request.method == "PUT": # uploading a new table of contents requires authorization and project permission identity = get_jwt_identity() if identity is None: return jsonify({"msg": "Missing Authorization Header"}), 403 else: authorized = False # in debug mode, test user has access to every project if int(os.environ.get( "FLASK_DEBUG", 0)) == 1 and identity["sub"] == "*****@*****.**": authorized = True elif identity["projects"] is not None and project in identity[ "projects"]: authorized = True if not authorized: return jsonify({"msg": "No access to this project."}), 403 else: logger.info( f"Processing new table of contents for /{project}/toc/{collection_id}" ) data = request.get_json() if not data: return jsonify({"msg": "No JSON in payload."}), 400 file_path = safe_join(config["file_root"], "toc", f"{collection_id}.json") try: # save new toc as file_path.new with open(f"{file_path}.new", "w", encoding="utf-8") as outfile: json.dump(data, outfile) except Exception as ex: # if we fail to save the file, make sure it doesn't exist before returning an error try: os.remove(f"{file_path}.new") except FileNotFoundError: pass return jsonify({ "msg": "Failed to save JSON data to disk.", "reason": ex }), 500 else: # if we succeed, remove the old file and rename file_path.new to file_path # (could be combined into just os.rename, but some OSes don't like that) os.rename(f"{file_path}.new", file_path) # get author and construct git commit message author_email = get_jwt_identity()["sub"] author = "{} <{}>".format( author_email.split("@")[0], author_email) message = "TOC update by {}".format(author_email) # git commit (and possibly push) file commit_result = git_commit_and_push_file( project, author, message, file_path) if commit_result: return jsonify( {"msg": f"Saved new toc as {file_path}"}) else: return jsonify({ "msg": "git commit failed! Possible configuration fault or git conflict." }), 500
def get_facsimile_file(project, collection_id, number, zoom_level): """ Retrieve a single facsimile image file from project root Facsimile files are stored as follows: root/facsimiles/<collection_id>/<zoom_level>/<page_number>.jpg The collection_id these are sorted by is the publication_facsimile_collection id, stored as publication_id in the old database structure? However, the first page of a publication is not necessarily 1.jpg, as facsimiles often contain title pages and blank pages Thus, calling for facsimiles/1/1/1 may require fetching a file from root/facsimiles/1/1/5.jpg """ # TODO OpenStack Swift support for ISILON file storage - config param for root 'facsimiles' path config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: connection = db_engine.connect() check_statement = sqlalchemy.sql.text( "SELECT published FROM publication WHERE deleted != 1 AND id = " "(SELECT publication_id FROM publication_facsimile WHERE deleted != 1 AND publication_facsimile_collection_id=:coll_id LIMIT 1)" ).bindparams(coll_id=collection_id) row = connection.execute(check_statement).fetchone() if row is None: return jsonify( {"msg": "Desired facsimile file not found in database."}), 404 else: try: status = int(row[0]) except ValueError: logger.exception(f"Couldn't convert {row[0]} to integer.") return jsonify( {"msg": "Desired facsimile file not found in database."}), 404 except Exception: logger.exception( f"Unknown exception handling {row} during facsimile file fetch." ) return jsonify( {"msg": "Desired facsimile file not found in database."}), 404 if status == 0: return jsonify( {"msg": "Desired facsimile file not found in database."}), 404 elif status == 1: if not config["show_internally_published"]: return jsonify({ "msg": "Desired facsimile file not found in database." }), 404 statement = sqlalchemy.sql.text( "SELECT * FROM publication_facsimile_collection WHERE deleted != 1 AND id=:coll_id" ).bindparams(coll_id=collection_id) row = connection.execute(statement).fetchone() if row is None: return jsonify({ "msg": "Desired facsimile collection was not found in database!" }), 404 elif row.folder_path != '' and row.folder_path is not None: file_path = safe_join(row.folder_path, collection_id, zoom_level, "{}.jpg".format(int(number))) else: file_path = safe_join(config["file_root"], "facsimiles", collection_id, zoom_level, "{}.jpg".format(int(number))) connection.close() output = io.BytesIO() try: with open(file_path, mode="rb") as img_file: output.write(img_file.read()) content = output.getvalue() output.close() return Response(content, status=200, content_type="image/jpeg") except Exception: logger.exception(f"Exception reading facsimile at {file_path}") return jsonify({"msg": "Desired facsimile file not found."}), 404
def get_facsimiles(project, publication_id, section_id=None): config = get_project_config(project) if publication_id is None or str(publication_id) == "undefined": return False, "No such publication_id." if config is None: return jsonify({"msg": "No such project."}), 400 else: logger.info("Getting facsimiles /{}/facsimiles/{}".format( project, publication_id)) connection = db_engine.connect() sql = 'select *, f.id as publication_facsimile_id from publication_facsimile as f \ left join publication_facsimile_collection as fc on fc.id=f.publication_facsimile_collection_id \ left join publication p on p.id=f.publication_id \ where f.deleted != 1 and fc.deleted != 1 and f.publication_id=:p_id \ ' if config["show_internally_published"]: sql = " ".join([sql, "and p.published>0"]) elif config["show_unpublished"]: sql = " ".join([sql, "and p.published>2"]) if section_id is not None: sql = " ".join([sql, "and f.section_id = :section"]) sql = " ".join([sql, "ORDER BY f.priority"]) if '_' in publication_id: pub_id = publication_id.split('_')[1] else: pub_id = publication_id if section_id is not None: section_id = str(section_id).replace('ch', '') statement = sqlalchemy.sql.text(sql).bindparams(p_id=pub_id, section=section_id) else: statement = sqlalchemy.sql.text(sql).bindparams(p_id=pub_id) result = [] for row in connection.execute(statement).fetchall(): facsimile = dict(row) if row.folder_path != '' and row.folder_path is not None: facsimile["start_url"] = row.folder_path else: facsimile["start_url"] = safe_join( "digitaledition", project, "facsimile", str(row["publication_facsimile_collection_id"])) pre_pages = row["start_page_number"] or 0 facsimile["first_page"] = pre_pages + row["page_nr"] sql2 = "SELECT * FROM publication_facsimile WHERE deleted != 1 AND publication_facsimile_collection_id=:fc_id AND page_nr>:page_nr ORDER BY page_nr ASC LIMIT 1" statement2 = sqlalchemy.sql.text(sql2).bindparams( fc_id=row["publication_facsimile_collection_id"], page_nr=row["page_nr"]) for row2 in connection.execute(statement2).fetchall(): facsimile["last_page"] = pre_pages + row2["page_nr"] - 1 if "last_page" not in facsimile.keys(): facsimile["last_page"] = row["number_of_pages"] result.append(facsimile) connection.close() return_data = result return jsonify(return_data), 200
def update_file(project, file_path): """ Add new or update existing file in git remote. PUT data MUST be in JSON format PUT data MUST contain the following: file: xml file data in base64, to be created or updated in git repository PUT data MAY contain the following override information: author: email of the person authoring this change, if not given, JWT identity is used instead message: commit message for this change, if not given, generic "File update by <author>" message is used instead force: boolean value, if True uses force-push to override errors and possibly mangle the git remote to get the update through """ config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 # Check if request has valid JSON and set author/message/force accordingly request_data = request.get_json() if not request_data: return jsonify({"msg": "No JSON in PUT request."}), 400 elif "file" not in request_data: return jsonify({"msg": "No file in JSON data."}), 400 author_email = request_data.get("author", get_jwt_identity()["sub"]) message = request_data.get("message", "File update by {}".format(author_email)) force = bool(request_data.get("force", False)) # git commit requires author info to be in the format "Name <email>" # As we only have an email address to work with, split email on @ and give first part as name # - [email protected] becomes "foo <*****@*****.**>" author = "{} <{}>".format(author_email.split("@")[0], author_email) # Read the file from request and decode the base64 string into raw binary data file = io.BytesIO(base64.b64decode(request_data["file"])) # verify git config config_okay = check_project_config(project) if not config_okay[0]: return jsonify({ "msg": "Error in git configuration, check configuration file.", "reason": config_okay[1] }), 500 # fetch latest changes from remote if not is_a_test(project): try: run_git_command(project, ["fetch"]) except subprocess.CalledProcessError as e: return jsonify({ "msg": "Git fetch failed to execute properly.", "reason": str(e.output) }), 500 # check if desired file has changed in remote since last update # if so, fail and return both user file and repo file to user, unless force=True try: output = run_git_command(project, [ "show", "--pretty=format:", "--name-only", "..origin/{}".format(config["git_branch"]) ]) new_and_changed_files = [ s.strip().decode('utf-8', 'ignore') for s in output.splitlines() ] except subprocess.CalledProcessError as e: return jsonify({ "msg": "Git show failed to execute properly.", "reason": str(e.output) }), 500 if safe_join(config["file_root"], file_path) in new_and_changed_files and not force: with io.open(safe_join(config["file_root"], file_path), mode="rb") as repo_file: file_bytestring = base64.b64encode(repo_file.read()) return jsonify({ "msg": "File {} has been changed in git repository since last update, please manually check file changes.", "your_file": request_data["file"], "repo_file": file_bytestring.decode("utf-8") }), 409 # merge in latest changes so that the local repository is updated try: run_git_command( project, ["merge", "origin/{}".format(config["git_branch"])]) except subprocess.CalledProcessError as e: return jsonify({ "msg": "Git merge failed to execute properly.", "reason": str(e.output) }), 500 # check the status of the git repo, so we know if we need to git add later file_exists = file_exists_in_file_root(project, file_path) # Secure filename and save new file to local repo # Could be more secure... pos = file_path.find('.xml') if pos > 0: filename = safe_join(config["file_root"], file_path) if file and filename: with io.open(filename, mode="wb") as new_file: new_file.write(file.getvalue()) else: return jsonify({"msg": "File path error"}), 500 # Add file to local repo if it wasn't already in the repository if not file_exists: try: run_git_command(project, ["add", filename]) except subprocess.CalledProcessError as e: return jsonify({ "msg": "Git add failed to execute properly.", "reason": str(e.output) }), 500 # Commit changes to local repo, noting down user and commit message try: run_git_command( project, ["commit", "--author={}".format(author), "-m", message]) except subprocess.CalledProcessError as e: return jsonify({ "msg": "Git commit failed to execute properly.", "reason": str(e.output) }), 500 # push new commit to remote repository if not is_a_test(project): try: if force: run_git_command(project, ["push", "-f"]) else: run_git_command(project, ["push"]) except subprocess.CalledProcessError as e: return jsonify({ "msg": "Git push failed to execute properly.", "reason": str(e.output) }), 500 return jsonify({"msg": "File updated successfully in repository."})
def git_commit_and_push_file(project, author, message, file_path, force=False): # verify git config config_okay = check_project_config(project) if not config_okay[0]: logger.error("Error in git config, check project configuration!") return False config = get_project_config(project) # fetch latest changes from remote if not is_a_test(project): try: run_git_command(project, ["fetch"]) except subprocess.CalledProcessError: logger.exception("Git fetch failed to execute properly.") return False # check if desired file has changed in remote since last update # if so, fail and return both user file and repo file to user, unless force=True try: output = run_git_command(project, [ "show", "--pretty=format:", "--name-only", "..origin/{}".format(config["git_branch"]) ]) new_and_changed_files = [ s.strip().decode('utf-8', 'ignore') for s in output.splitlines() ] except subprocess.CalledProcessError as e: logger.error("Git show failed to execute properly.") logger.error(str(e.output)) return False if safe_join(config["file_root"], file_path) in new_and_changed_files and not force: logger.error( "File {} has been changed in git repository since last update, please manually check file changes." .format(file_path)) return False # merge in latest changes so that the local repository is updated try: run_git_command( project, ["merge", "origin/{}".format(config["git_branch"])]) except subprocess.CalledProcessError as e: logger.error("Git merge failed to execute properly.") logger.error(str(e.output)) return False # git add file try: run_git_command(project, ["add", file_path]) except subprocess.CalledProcessError as e: logger.error("Git add failed to execute properly!") logger.error(str(e.output)) return False # Commit changes to local repo, noting down user and commit message try: run_git_command( project, ["commit", "--author={}".format(author), "-m", message]) except subprocess.CalledProcessError as e: logger.error("Git commit failed to execute properly.") logger.error(str(e.output)) else: logger.info("git commit of {} succeeded".format(file_path)) # push new commit to remote repository if not is_a_test(project): try: if force: run_git_command(project, ["push", "-f"]) else: run_git_command(project, ["push"]) except subprocess.CalledProcessError as e: logger.error("Git push failed to execute properly.") logger.error(str(e.output)) return False else: logger.info("git push of {} succeeded".format(file_path)) # if we reach this point, the file has been commited (and possibly pushed) return True
def get_comments(project, collection_id, publication_id, note_id=None, section_id=None): """ Get comments file text for a given publication """ config = get_project_config(project) if config is None: return jsonify({"msg": "No such project."}), 400 else: can_show, message = get_published_status(project, collection_id, publication_id) if can_show: logger.info("Getting XML for {} and transforming...".format( request.full_path)) connection = db_engine.connect() select = "SELECT legacy_id FROM publication_comment WHERE id IN (SELECT publication_comment_id FROM publication WHERE id = :p_id) \ AND legacy_id IS NOT NULL AND original_filename IS NULL" statement = sqlalchemy.sql.text(select).bindparams( p_id=publication_id) result = connection.execute(statement).fetchone() bookId = get_collection_legacy_id(collection_id) if bookId is None: bookId = collection_id bookId = '"{}"'.format(bookId) if result is not None: filename = "{}_com.xml".format(result["legacy_id"]) connection.close() else: filename = "{}_{}_com.xml".format(collection_id, publication_id) connection.close() logger.debug("Filename (com) for {} is {}".format( publication_id, filename)) params = { "estDocument": '"file://{}"'.format( safe_join(config["file_root"], "xml", "est", filename.replace("com", "est"))), "bookId": bookId } if note_id is not None and section_id is None: params["noteId"] = '"{}"'.format(note_id) xsl_file = "notes.xsl" else: xsl_file = "com.xsl" if section_id is not None: section_id = '"{}"'.format(section_id) content = get_content( project, "com", filename, xsl_file, { "sectionId": str(section_id), "estDocument": '"file://{}"'.format( safe_join(config["file_root"], "xml", "est", filename.replace("com", "est"))), "bookId": bookId }) else: content = get_content(project, "com", filename, xsl_file, params) data = { "id": "{}_{}_com".format(collection_id, publication_id), "content": content } connection.close() return jsonify(data), 200 else: return jsonify({ "id": "{}_{}".format(collection_id, publication_id), "error": message }), 403