def __init__(self) -> None: # enter GeoData in neo4j attributes: Optional[List[str]] = None graph = neo4j.get_instance() with open(DATA_PATH.joinpath("geodata.tsv")) as fd: rd = csv.reader(fd, delimiter="\t", quotechar='"') for row in rd: if not attributes: # use the first row to get the list of attributes attributes = row else: props = dict(zip(attributes, row)) geodata = graph.GeoData.nodes.get_or_none( **{attributes[0]: row[0]}) if not geodata: # create a new one geodata = graph.GeoData(**props).save() else: # check if an update is needed for key, value in props.items(): if getattr(geodata, key) != value: setattr(geodata, key, value) geodata.save() log.info("GeoData nodes succesfully created")
def put(self, force: bool = False) -> Response: # This is just to test the allowed exts without adding a new parameter.. if not force: self.set_allowed_exts(["txt"]) response = self.upload( subfolder=DATA_PATH.joinpath("fixsubfolder"), force=force) return response
def put(self, filename: str, force: bool = False) -> Response: path = DATA_PATH.joinpath("fixed") completed, response = self.chunk_upload(path, filename) if completed: log.info("Upload completed") return response
def get(self, folder: str, fname: str, stream: bool = False) -> Response: # The same as defined in test_upload subfolder = DATA_PATH.joinpath(folder) if stream: return Downloader.send_file_streamed(fname, subfolder=subfolder) return Downloader.send_file_content(fname, subfolder=subfolder)
def post( self, name: str, mimeType: str, size: int, lastModified: int, force: bool = False, ) -> Response: # This is just to test the allowed exts without adding a new parameter.. if not force: self.set_allowed_exts(["txt"]) path = DATA_PATH.joinpath("fixed") return self.init_chunk_upload(path, name, force=force)
def launch_pipeline( self: Task, dataset_list: List[str], snakefile: str = "Single_Sample.smk", force: bool = False, ) -> None: task_id = self.request.id log.info("Start task [{}:{}]", task_id, self.name) # create a unique workdir for every celery task / and snakemake launch) wrkdir = DATA_PATH.joinpath("jobs", task_id) wrkdir.mkdir(parents=True, exist_ok=True) # copy the files used by snakemake in the work dir source_dir = Path("/snakemake") for snk_file in source_dir.glob("*"): if snk_file.is_file(): shutil.copy(snk_file, wrkdir) # get the file list from the dataset list file_list = [] graph = neo4j.get_instance() for d in dataset_list: # get the path of the dataset directory dataset = graph.Dataset.nodes.get_or_none(uuid=d) owner = dataset.ownership.single() group = owner.belongs_to.single() study = dataset.parent_study.single() datasetDirectory = INPUT_ROOT.joinpath(group.uuid, study.uuid, dataset.uuid) # check if the directory exists if not datasetDirectory.exists(): # an error should be raised? log.warning("Folder for dataset {} not found", d) continue # append the contained files in the file list for f in datasetDirectory.iterdir(): file_list.append(f) # mark the dataset as running dataset.status = "RUNNING" dataset.save() # create a list of fastq files as csv file: fastq.csv fastq = [] # the pattern is check also in the file upload endpoint. This is an additional check pattern = r"([a-zA-Z0-9_-]+)_(R[12]).fastq.gz" for filepath in file_list: fname = filepath.name if match := re.match(pattern, fname): file_label = match.group(1) fragment = match.group(2) # get the input path input_path = filepath.parent # create the output path output_path = OUTPUT_ROOT.joinpath( input_path.relative_to(INPUT_ROOT)) output_path.mkdir(parents=True, exist_ok=True) if not output_path.joinpath(fname).exists(): output_path.joinpath(fname).symlink_to(filepath) # create row for csv fastq_row = [file_label, fragment, input_path, output_path] fastq.append(fastq_row) else: log.info( "fastq {} should follow correct naming convention: " "SampleName_R1/R2.fastq.gz", filepath, )
def test_simple_upload_and_download(self, client: FlaskClient, faker: Faker) -> None: warnings.filterwarnings( "ignore", message="unclosed file <_io.BufferedReader name=") self.fcontent = faker.paragraph() self.save("fcontent", self.fcontent) # as defined in test_upload.py for normal uploads upload_folder = "fixsubfolder" self.fname = f"{faker.pystr()}.notallowed" r = client.put( f"{API_URI}/tests/upload", data={ "file": (io.BytesIO(str.encode(self.fcontent)), self.fname), # By setting force False only txt files will be allowed for upload # Strange, but it is how the endpoint is configured to improve the tests "force": False, }, ) assert r.status_code == 400 assert self.get_content(r) == "File extension not allowed" self.fname = f"{faker.pystr()}.txt" self.save("fname", self.fname) r = client.put( f"{API_URI}/tests/upload", data={ "file": (io.BytesIO(str.encode(self.fcontent)), self.fname), # By setting force False only txt files will be allowed for upload # Strange, but it is how the endpoint is configured to improve the tests "force": False, }, ) assert r.status_code == 200 destination_path = DATA_PATH.joinpath(upload_folder, self.fname) assert destination_path.exists() assert oct(os.stat(destination_path).st_mode & 0o777) == "0o440" r = client.put( f"{API_URI}/tests/upload", data={"file": (io.BytesIO(str.encode(self.fcontent)), self.fname)}, ) assert r.status_code == 409 err = f"File '{self.fname}' already exists, use force parameter to overwrite" assert self.get_content(r) == err r = client.put( f"{API_URI}/tests/upload", data={ "file": (io.BytesIO(str.encode(self.fcontent)), self.fname), "force": True, }, ) assert r.status_code == 200 destination_path = DATA_PATH.joinpath(upload_folder, self.fname) assert destination_path.exists() assert oct(os.stat(destination_path).st_mode & 0o777) == "0o440" c = self.get_content(r) assert isinstance(c, dict) assert c.get("filename") == self.fname meta = c.get("meta") assert meta is not None assert meta.get("charset") is not None assert meta.get("type") is not None self.fname = self.get("fname") self.fcontent = self.get("fcontent") # as defined in test_upload.py for normal uploads upload_folder = "fixsubfolder" r = client.get(f"{API_URI}/tests/download/folder/doesnotexist") assert r.status_code == 404 assert self.get_content(r) == "The requested file does not exist" r = client.get( f"{API_URI}/tests/download/{upload_folder}/{self.fname}") assert r.status_code == 200 content = r.data.decode("utf-8") assert content == self.fcontent new_content = "new content" r = client.put( f"{API_URI}/tests/upload", data={ "file": (io.BytesIO(str.encode(new_content)), self.fname), "force": True, }, ) assert r.status_code == 200 r = client.get( f"{API_URI}/tests/download/{upload_folder}/{self.fname}") assert r.status_code == 200 content = r.data.decode("utf-8") assert content != self.fcontent assert content == new_content r = client.get( f"{API_URI}/tests/download/{upload_folder}/{self.fname}", query_string={"stream": True}, ) assert r.status_code == 200 content = r.data.decode("utf-8") assert content == new_content r = client.get( f"{API_URI}/tests/download/{upload_folder}/doesnotexist", query_string={"stream": True}, ) assert r.status_code == 404
def test_chunked_upload_and_download(self, client: FlaskClient, faker: Faker) -> None: warnings.filterwarnings( "ignore", message="unclosed file <_io.BufferedReader name=") self.fname = self.get("fname") self.fcontent = self.get("fcontent") # as defined in test_upload.py for chunked uploads upload_folder = "fixed" r = client.post(f"{API_URI}/tests/chunkedupload", data={"force": True}) assert r.status_code == 400 filename = "fixed.filename.txt" data = { "force": True, "name": filename, "size": "999", "mimeType": "application/zip", "lastModified": 1590302749209, } r = client.post(f"{API_URI}/tests/chunkedupload", data=data) assert r.status_code == 201 assert self.get_content(r) == "" upload_endpoint = get_location_header( r.headers, expected=f"{API_URI}/tests/chunkedupload/{filename}") data["force"] = False r = client.post(f"{API_URI}/tests/chunkedupload", data=data) assert r.status_code == 409 assert self.get_content(r) == f"File '{filename}' already exists" with io.StringIO(faker.text()) as f: r = client.put(upload_endpoint, data=f) assert r.status_code == 400 assert self.get_content(r) == "Invalid request" with io.StringIO(faker.text()) as f: r = client.put( upload_endpoint, data=f, headers={"Content-Range": "!"}, ) assert r.status_code == 400 assert self.get_content(r) == "Invalid request" up_data = faker.pystr(min_chars=24, max_chars=48).lower() STR_LEN = len(up_data) with io.StringIO(up_data[0:5]) as f: r = client.put( upload_endpoint, data=f, headers={"Content-Range": f"bytes 0-5/{STR_LEN}"}, ) assert r.status_code == 206 assert self.get_content(r) == "partial" destination_path = DATA_PATH.joinpath(upload_folder, filename) assert destination_path.exists() # The file is still writeable because the upload is in progress assert oct(os.stat(destination_path).st_mode & 0o777) != "0o440" with io.StringIO(up_data[5:]) as f: r = client.put( upload_endpoint, data=f, headers={"Content-Range": f"bytes 5-{STR_LEN}/{STR_LEN}"}, ) assert r.status_code == 200 c = self.get_content(r) assert isinstance(c, dict) assert c.get("filename") is not None uploaded_filename = c.get("filename") meta = c.get("meta") assert meta is not None assert meta.get("charset") == "us-ascii" assert meta.get("type") == "text/plain" destination_path = DATA_PATH.joinpath(upload_folder, filename) assert destination_path.exists() assert oct(os.stat(destination_path).st_mode & 0o777) == "0o440" r = client.get( f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}") assert r.status_code == 200 content = r.data.decode("utf-8") assert content == up_data r = client.get( f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}") assert r.status_code == 200 content = r.data.decode("utf-8") assert content == up_data r = client.get( f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}", headers={"Range": ""}, ) assert r.status_code == 416 r = client.get( f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}", headers={"Range": f"0-{STR_LEN - 1}"}, ) assert r.status_code == 416 r = client.get( f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}", headers={"Range": "bytes=0-9999999999999999"}, ) assert r.status_code == 206 r = client.get( f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}", headers={"Range": "bytes=0-4"}, ) assert r.status_code == 206 content = r.data.decode("utf-8") assert content == up_data[0:5] r = client.get( f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}", headers={"Range": f"bytes=5-{STR_LEN - 1}"}, ) assert r.status_code == 206 content = r.data.decode("utf-8") assert content == up_data[5:] r = client.get( f"{API_URI}/tests/download/{upload_folder}/{uploaded_filename}", headers={"Range": f"bytes=0-{STR_LEN - 1}"}, ) assert r.status_code == 206 content = r.data.decode("utf-8") assert content == up_data # Send a new string as content file. Will be appended as prefix up_data2 = faker.pystr(min_chars=24, max_chars=48) STR_LEN = len(up_data2) with io.StringIO(up_data2) as f: r = client.put( upload_endpoint, data=f, headers={"Content-Range": f"bytes */{STR_LEN}"}, ) assert r.status_code == 503 assert self.get_content( r) == "Permission denied: failed to write the file" # force the file to be writeable again destination_path = DATA_PATH.joinpath(upload_folder, filename) # -rw-rw---- destination_path.chmod(0o660) with io.StringIO(up_data2) as f: r = client.put( upload_endpoint, data=f, headers={"Content-Range": f"bytes */{STR_LEN}"}, ) assert r.status_code == 200 destination_path = DATA_PATH.joinpath(upload_folder, filename) assert destination_path.exists() # File permissions are restored assert oct(os.stat(destination_path).st_mode & 0o777) == "0o440" # c = self.get_content(r) # assert c.get('filename') is not None # uploaded_filename = c.get('filename') # meta = c.get('meta') # assert meta is not None # assert meta.get('charset') == 'us-ascii' # assert meta.get('type') == 'text/plain' # r = client.get( # f'{API_URI}/tests/download/{upload_folder}/{uploaded_filename}' # ) # assert r.status_code == 200 # content = r.data.decode('utf-8') # # Uhmmm... should not be up_data2 + up_data ?? # assert content == up_data + up_data2 data["force"] = False r = client.post(f"{API_URI}/tests/chunkedupload", data=data) assert r.status_code == 409 err = f"File '{uploaded_filename}' already exists" assert self.get_content(r) == err data["force"] = True r = client.post(f"{API_URI}/tests/chunkedupload", data=data) assert r.status_code == 201 assert self.get_content(r) == "" upload_endpoint = get_location_header( r.headers, expected=f"{API_URI}/tests/chunkedupload/{filename}") data["name"] = "fixed.filename.notallowed" data["force"] = False r = client.post(f"{API_URI}/tests/chunkedupload", data=data) assert r.status_code == 400 assert self.get_content(r) == "File extension not allowed" # Send an upload on a file endpoint not previously initialized filename = f"{faker.pystr()}.txt" with io.StringIO(up_data2) as f: r = client.put( f"{API_URI}/tests/chunkedupload/{filename}", data=f, headers={"Content-Range": f"bytes */{STR_LEN}"}, ) assert r.status_code == 503 error = "Permission denied: the destination file does not exist" assert self.get_content(r) == error destination_path = DATA_PATH.joinpath(upload_folder, filename) assert not destination_path.exists()
from pathlib import Path from typing import Any, List, Optional from restapi.config import DATA_PATH from restapi.exceptions import BadRequest, NotFound from restapi.rest.definition import EndpointResource from restapi.services.authentication import User from restapi.utilities.logs import log INPUT_ROOT = DATA_PATH.joinpath("input") OUTPUT_ROOT = DATA_PATH.joinpath("output") STUDY_NOT_FOUND = "This study cannot be found or you are not authorized to access" DATASET_NOT_FOUND = "This dataset cannot be found or you are not authorized to access" PHENOTYPE_NOT_FOUND = ( "This phenotype cannot be found or you are not authorized to access") TECHMETA_NOT_FOUND = ( "This set of technical metadata cannot be found or you are not authorized to access" ) FILE_NOT_FOUND = "This file cannot be found or you are not authorized to access" # Should be the class models, but can't be imported here Study = Any Dataset = Any File = Any class NIGEndpoint(EndpointResource): # group used for test or, in general, groups we don't want to be counted in stats GROUPS_TO_FILTER: List[str] = []