def test_lifecycle(self, db_session, create_user): """ Advance a dataset through the entire lifecycle using the state transition dict. """ ds = Dataset(owner=create_user.username, controller="frodo", name="fio") ds.add() assert ds.state == States.UPLOADING beenthere = [ds.state] while ds.state in Dataset.transitions: advances = Dataset.transitions[ds.state] for n in advances: if n not in beenthere: next = n break else: break # avoid infinite reindex loop! beenthere.append(next) ds.advance(next) assert ds.state == next lifecycle = ",".join([s.name for s in beenthere]) assert ( lifecycle == "UPLOADING,UPLOADED,UNPACKING,UNPACKED,INDEXING,INDEXED,EXPIRING,EXPIRED" )
def test_advanced_bad_state(self, db_session, create_user): """Test with a non-States state value """ ds = Dataset(owner=create_user.username, controller="frodo", name="fio") ds.add() with pytest.raises(DatasetBadParameterType): ds.advance("notStates")
def test_advanced_illegal(self, db_session, create_user): """ Test that we can't advance to a state that's not a successor to the initial state. """ ds = Dataset(owner=create_user.username, controller="frodo", name="fio") ds.add() with pytest.raises(DatasetBadStateTransition): ds.advance(States.EXPIRED)
def test_advanced_good(self, db_session, create_user): """ Test advancing the state of a dataset """ ds = Dataset(owner=create_user.username, controller="frodo", name="fio") ds.add() ds.advance(States.UPLOADED) assert ds.state == States.UPLOADED assert ds.created <= ds.transition
def test_advanced_terminal(self, db_session, create_user): """ Test that we can't advance from a terminal state """ ds = Dataset( owner=create_user.username, controller="frodo", name="fio", state=States.EXPIRED, ) ds.add() with pytest.raises(DatasetTerminalStateViolation): ds.advance(States.UPLOADING)
def put(self, filename: str): try: username = Auth.token_auth.current_user().username except Exception as exc: self.logger.error("Error verifying the username: '******'", exc) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") if os.path.basename(filename) != filename: msg = "File must not contain a path" self.logger.warning( "{} for user = {}, file = {!a}", msg, username, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) if not self.supported_file_extension(filename): msg = f"File extension not supported, must be {self.ALLOWED_EXTENSION}" self.logger.warning( "{} for user = {}, file = {!a}", msg, username, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) controller = request.headers.get("controller") if not controller: msg = "Missing required controller header" self.logger.warning("{} for user = {}, file = {!a}", msg, username, filename) abort(HTTPStatus.BAD_REQUEST, message=msg) if validate_hostname(controller) != 0: msg = "Invalid controller header" self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) md5sum = request.headers.get("Content-MD5") if not md5sum: msg = "Missing required Content-MD5 header" self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) status = HTTPStatus.OK try: content_length = int(request.headers["Content-Length"]) except KeyError: msg = "Missing required Content-Length header" status = HTTPStatus.LENGTH_REQUIRED except ValueError: msg = f"Invalid Content-Length header, not an integer ({content_length})" status = HTTPStatus.BAD_REQUEST else: if not (0 < content_length <= self.max_content_length): msg = "Content-Length ({}) must be greater than 0 and no greater than {}".format( content_length, humanize.naturalsize(self.max_content_length)) status = (HTTPStatus.REQUEST_ENTITY_TOO_LARGE if 0 < content_length else HTTPStatus.BAD_REQUEST) if status != HTTPStatus.OK: self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(status, message=msg) path = self.upload_directory / controller path.mkdir(exist_ok=True) tar_full_path = Path(path, filename) md5_full_path = Path(path, f"{filename}.md5") bytes_received = 0 # Create a tracking dataset object; it'll begin in UPLOADING state try: dataset = Dataset(owner=username, controller=controller, path=tar_full_path, md5=md5sum) dataset.add() except DatasetDuplicate: self.logger.info( "Dataset already exists, user = {}, ctrl = {!a}, file = {!a}", username, controller, filename, ) response = jsonify(dict(message="Dataset already exists")) response.status_code = HTTPStatus.OK return response except Exception as exc: self.logger.error( "unable to create dataset, '{}', for user = {}, ctrl = {!a}, file = {!a}", exc, username, controller, filename, ) abort( HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR", ) if tar_full_path.is_file() or md5_full_path.is_file(): self.logger.error( "Dataset, or corresponding md5 file, already present; tar {} ({}), md5 {} ({})", tar_full_path, "present" if tar_full_path.is_file() else "missing", md5_full_path, "present" if md5_full_path.is_file() else "missing", ) abort( HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR", ) self.logger.info( "Uploading file {!a} (user = {}, ctrl = {!a}) to {}", filename, username, controller, dataset, ) with tempfile.NamedTemporaryFile(mode="wb", dir=path) as ofp: hash_md5 = hashlib.md5() try: while True: chunk = request.stream.read(self.CHUNK_SIZE) bytes_received += len(chunk) if len(chunk) == 0 or bytes_received > content_length: break ofp.write(chunk) hash_md5.update(chunk) except OSError as exc: if exc.errno == errno.ENOSPC: self.logger.error( "Not enough space on volume, {}, for upload:" " user = {}, ctrl = {!a}, file = {!a}", path, username, controller, filename, ) abort(HTTPStatus.INSUFFICIENT_STORAGE) else: msg = "Unexpected error encountered during file upload" self.logger.error( "{}, {}, for user = {}, ctrl = {!a}, file = {!a}", msg, exc, username, controller, filename, ) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") except Exception as exc: msg = "Unexpected error encountered during file upload" self.logger.error( "{}, {}, for user = {}, ctrl = {!a}, file = {!a}", msg, exc, username, controller, filename, ) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") if bytes_received != content_length: msg = ( "Bytes received do not match Content-Length header" f" (expected {content_length}; received {bytes_received})") self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) elif hash_md5.hexdigest() != md5sum: msg = ("MD5 checksum does not match Content-MD5 header" f" ({hash_md5.hexdigest()} != {md5sum})") self.logger.warning( "{} for user = {}, ctrl = {!a}, file = {!a}", msg, username, controller, filename, ) abort(HTTPStatus.BAD_REQUEST, message=msg) # First write the .md5 try: md5_full_path.write_text(f"{md5sum} {filename}\n") except Exception as exc: try: md5_full_path.unlink(missing_ok=True) except Exception as md5_exc: self.logger.error( "Failed to remove .md5 {} when trying to clean up: '{}'", md5_full_path, md5_exc, ) self.logger.error("Failed to write .md5 file, '{}': '{}'", md5_full_path, exc) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") # Then create the final filename link to the temporary file. try: os.link(ofp.name, tar_full_path) except Exception as exc: try: md5_full_path.unlink() except Exception as md5_exc: self.logger.error( "Failed to remove .md5 {} when trying to clean up: {}", md5_full_path, md5_exc, ) self.logger.error( "Failed to rename tar ball '{}' to '{}': '{}'", ofp.name, md5_full_path, exc, ) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") try: dataset.advance(States.UPLOADED) except Exception as exc: self.logger.error("Unable to finalize {}, '{}'", dataset, exc) abort(HTTPStatus.INTERNAL_SERVER_ERROR, message="INTERNAL ERROR") response = jsonify(dict(message="File successfully uploaded")) response.status_code = HTTPStatus.CREATED return response