def new_dummy_folder(): new_dummy_folder_name = "Dummy folder" _new_dummy_folder = mc.add_folder( name=new_dummy_folder_name, folder_type=Folder.FolderType.home, description="I am a dummy folder for testing purpose", ) return _new_dummy_folder
def new_folder(): new_folder_name = "New folder" _new_folder = mc.add_folder( name=new_folder_name, folder_type=Folder.FolderType.folder, description="Test new folder description", ) return _new_folder
def test_edit_owned(session: SessionBase): new_user = mc.add_user("test", "*****@*****.**") flask.g.current_user = new_user # new_folder_owned = FolderFactory(creator=new_user) new_folder_owned = mc.add_folder(name="Test folder", folder_type=Folder.FolderType.folder, description="") right_owned = mc.get_rights(new_folder_owned.id) assert right_owned == EntryRightsEnum.can_edit
def test_view_not_owned(session: SessionBase): new_user = mc.add_user("test", "*****@*****.**") flask.g.current_user = new_user new_user_not_tested = mc.add_user("test_useless", "*****@*****.**") # new_folder_not_owned = FolderFactory(creator=new_user_not_tested) new_folder_not_owned = mc.add_folder(name="folder_not_owned", folder_type=Folder.FolderType.folder, description="") new_folder_not_owned.creator = new_user_not_tested right_not_owned = mc.get_rights(new_folder_not_owned.id) assert right_not_owned == EntryRightsEnum.can_view
def test_get_parent_folders(session: SessionBase, new_folder, new_dummy_folder): folder_in_dummy_and_new_folder_folders = mc.add_folder( name="Inception", folder_type=Folder.FolderType.folder, description="Folder inside two folders", ) # new_folder.entries.append(new_dummy_folder) new_folder.entries.append(folder_in_dummy_and_new_folder_folders) new_dummy_folder.entries.append(folder_in_dummy_and_new_folder_folders) parent_folders = mc.get_parent_folders( folder_in_dummy_and_new_folder_folders.id) assert len(parent_folders) == 2 assert new_folder in parent_folders assert new_dummy_folder in parent_folders
def create_folder(metadata): """Create a folder given a metadata dictionary of this form: metadata['name'], metadata['description'], metadata['parentId']""" # TODO: Add the add_folder_entry inside the add_folder function? folder_name = metadata["name"] folder_description = metadata["description"] parent_id = metadata["parentId"] new_folder = models_controller.add_folder( name=folder_name, folder_type=models_controller.Folder.FolderType.folder, description=folder_description, ) models_controller.add_folder_entry(parent_id, new_folder.id) folder_named_id_schema = schemas.FolderNamedIdSchema() json_folder_named_id = folder_named_id_schema.dump(new_folder).data return flask.jsonify(json_folder_named_id)
def populate_db(dataset_csv_path, dataset_version_with_datafile_csv_path): # TODO: We should handle the Public folder properly, instead of adding it to Philip's account # Summary nb_user_created = 0 nb_user_skipped = 0 nb_dataset_created = 0 nb_row_dataset_skipped = 0 nb_datafile_created = 0 nb_datafile_skipped = 0 nb_row_datafile_skipped = 0 nb_dataset_version_created = 0 nb_dataset_version_skipped = 0 # Dictionary to link find the dataset matching the dataset via the permanames to create the dataset versions # Dict<String, Array<int>> dict_permaname_datafile_ids = {} # We first manage the dataset creation with open(dataset_csv_path) as dataset_file: print("Creating the users and the datasets") reader = csv.DictReader(dataset_file) for row in reader: is_public = False if not row["permaname"]: print( "Warning: We found an empty permaname entry: {}. Skipping it." .format(row)) nb_row_dataset_skipped += 1 continue dataset_name = row["name"] dataset_permaname = row["permaname"] dataset_description = row["description"] if row["folder"].startswith("home"): dataset_folder_user = row["folder"] # To get the user from dataset_folder_user, we extract the user from the parenthesis dataset_user_email = dataset_folder_user[ dataset_folder_user.find("(") + 1:dataset_folder_user.find(")")] # Handle the case where user email is None if dataset_user_email == "None": print( "Warning: We found a row with folder {}. Skipping it.". format(row["folder"])) nb_user_skipped += 1 continue # To get the target folder, we take the string before the parenthesis dataset_folder_name = dataset_folder_user.split("(")[0] else: # For now, we store all the others into [email protected] is_public = True dataset_folder_name = row["folder"] dataset_user_email = "*****@*****.**" # Setting up the user try: dataset_current_user = models_controller.get_user_by_email( dataset_user_email) except NoResultFound: # User does not exists yet, so we create it dataset_user_name = dataset_user_email[:dataset_user_email. find("@")] dataset_current_user = models_controller.add_user( name=dataset_user_name, email=dataset_user_email) print("User with email: {} created".format(dataset_user_email)) nb_user_created += 1 flask.g.current_user = dataset_current_user # TODO: We should not create the dataset if it already exists new_dataset = models_controller.add_dataset( name=dataset_name, permaname=dataset_permaname, description=dataset_description, ) try: # TODO: Check it is case insensitive if str.lower(dataset_folder_name) == "home": dataset_folder = dataset_current_user.home_folder elif str.lower(dataset_folder_name) == "trash": dataset_folder = dataset_current_user.trash_folder else: dataset_folder = models_controller.get_folder_by_name( dataset_folder_name) except NoResultFound: # If no result, it means we need to create the folder in the user space or in public dataset_folder = models_controller.add_folder( name=dataset_folder_name, folder_type=models_controller.Folder.FolderType.folder, description=None, ) if is_public: models_controller.move_to_folder( entry_ids=[dataset_folder.id], current_folder_id=None, target_folder_id=models_controller.get_public_folder(). id, ) else: models_controller.move_to_folder( entry_ids=[dataset_folder.id], current_folder_id=None, target_folder_id=dataset_current_user.home_folder_id, ) # Now we can move the dataset to the folder models_controller.move_to_folder([new_dataset.id], None, dataset_folder.id) # We add the dataset_permaname as key with value an empty array so we can add each matching datafile dict_permaname_datafile_ids[dataset_permaname] = [] nb_dataset_created += 1 # We then manage the attribution of the dataset_version to the freshly created datasets with open(dataset_version_with_datafile_csv_path ) as dataset_version_with_datafile_csv: print("") print("Creating the datafiles") reader = csv.DictReader(dataset_version_with_datafile_csv) for row in reader: if not row["permaname"]: print("We found an empty permaname entry: {}. Skipping it.". format(row)) nb_row_datafile_skipped += 1 nb_datafile_skipped += 1 continue # We first create the datafiles datafile_type = row["type"] datafile_name = row.get("name", "data") datafile_s3_location = urlparse(row["s3_location"]) datafile_short_summary = row["short_desc"] datafile_long_summary = row.get("long_desc", "") datafile_id = row["id"] datafile_creation_date = row["created_timestamp"] datafile_version = row["version"] datafile_created_by = row["created_by"] dataset_permaname = row["permaname"] # s3://taiga2/imported/4bb2169e-5b87-4d1c-a78e-3e6006316561.hdf5 datafile_s3_bucket = datafile_s3_location.netloc datafile_s3_key = datafile_s3_location.path[ 1:] # We remove the first '/' # Set the user to the one in the row to make the manipulations under his name try: current_user = models_controller.get_user_by_email( datafile_created_by) except NoResultFound: print( "Warning: The user email found in 'created_by' column ({}) was not found in the dataset side. " "Creating one.".format(datafile_created_by)) datafile_created_by_name = datafile_created_by[: datafile_created_by .find("@")] current_user = models_controller.add_user( name=datafile_created_by_name, email=datafile_created_by) nb_user_created += 1 flask.g.current_user = current_user # TODO: We should not create the datafile if it already exists: ie s3_bucket/s3_key exists new_datafile = models_controller.add_s3_datafile( s3_bucket=datafile_s3_bucket, s3_key=datafile_s3_key, name=datafile_name, type=datafile_type, short_summary=datafile_short_summary, long_summary=datafile_long_summary, ) # We register the datafile with its permaname dataset to later create the dataset version # with all the datafiles if dataset_permaname in dict_permaname_datafile_ids: datafile_info = DataFileInfo( id=datafile_id, datafile=new_datafile, version=datafile_version, creation_date=datafile_creation_date, owner_email=datafile_created_by, ) dict_permaname_datafile_ids[dataset_permaname].append( datafile_info) else: print( "Warning: We found a dataset ({}) without a matching dataset ({}). Skipping it." .format(datafile_id, dataset_permaname)) nb_datafile_skipped += 1 continue nb_datafile_created += 1 # Then we create the dataset_version with the taiga id, linking with the dataset using its permaname print("") print("Linking the datafiles with the datasets") for dataset_permaname, array_data_file_info in dict_permaname_datafile_ids.items( ): dataset = models_controller.get_dataset_from_permaname( dataset_permaname) # Get the creation date from the first dataset_version for datafile_info in array_data_file_info: flask.g.current_user = models_controller.get_user_by_email( datafile_info.owner_email) # TODO: We should not create the dataset_version if it already exists. ie version already exists for this dataset dataset_version = models_controller.add_dataset_version( dataset_id=dataset.id, datafiles_ids=[datafile_info.datafile.id], anterior_creation_date=datafile_info.creation_date, forced_id=datafile_info.id, ) # Then we edit the dataset version creation_date to the if int(datafile_info.version) == 1: models_controller.update_dataset_creation_date( dataset_id=dataset.id, new_date=datafile_info.creation_date) nb_dataset_version_created += 1 print("") print("Done! Here is the summary:") print("\tLines skipped in dataset file: {}".format(nb_row_dataset_skipped)) print( "\tLines skipped in datafile file: {}".format(nb_row_datafile_skipped)) print("") print("\tDatasets created: {}".format(nb_dataset_created)) print("\tUsers created: {}".format(nb_user_created)) print("\tUsers skipped: {}".format(nb_user_skipped)) print("") print("\tDatafiles created: {}".format(nb_datafile_created)) print("\tDatafiles skipped: {}".format(nb_datafile_skipped)) print("") print("\tDatasetVersions created: {}".format(nb_dataset_version_created)) print("\tDatasetVersions skipped and datasets cleaned: {}".format( nb_dataset_version_skipped))
def test_create_virtual_dataset_endpoint(session: SessionBase): folder = models_controller.add_folder( "folder", models_controller.Folder.FolderType.folder, "folder desc" ) dataset1 = _create_dataset_with_a_file("datafile") data_file_1 = "{}.1/datafile".format(dataset1.permaname) session.flush() upload_session_1 = new_upload_session() _add_virtual_file_to_upload_session(upload_session_1.id, "alias", data_file_1) sessionDatasetInfo = { "sessionId": upload_session_1.id, "datasetName": "version-1-name", "datasetDescription": "version-1-desc", "currentFolderId": folder.id, } response_json_create_dataset = endpoint.create_dataset( sessionDatasetInfo=sessionDatasetInfo ) virtual_dataset_id = get_data_from_flask_jsonify(response_json_create_dataset) # versionInfo = { # "description": "updated desc", # "files": [ # { # "name": "alias", # "datafile": data_file_2 # } # ] # } # now update with a new version upload_session_2 = new_upload_session() dataset2 = _create_dataset_with_a_file() data_file_2 = "{}.1/datafile".format(dataset2.permaname) _add_virtual_file_to_upload_session(upload_session_2.id, "alias", data_file_2) datasetVersionMetadata = { "sessionId": upload_session_2.id, "datasetId": virtual_dataset_id, "newDescription": "version-2-desc", } endpoint.create_new_dataset_version(datasetVersionMetadata=datasetVersionMetadata) v = models_controller.get_dataset(virtual_dataset_id) latest_dataset_version = models_controller.get_latest_dataset_version(v.id) assert v.name == "version-1-name" assert v.description == None assert latest_dataset_version.description == "version-2-desc" assert len(v.dataset_versions) == 2 # check each version version = v.dataset_versions[0] assert version.version == 1 assert len(version.datafiles) == 1 entry = version.datafiles[0] assert entry.name == "alias" data_file_id_1 = entry.underlying_file_id version = v.dataset_versions[1] assert version.version == 2 assert len(version.datafiles) == 1 entry = version.datafiles[0] assert entry.name == "alias" assert entry.underlying_file_id != data_file_id_1
def test_dataset_endpoints_on_virtual_dataset(session: SessionBase): dataset1 = _create_dataset_with_a_file() data_file_1 = dataset1.dataset_versions[0].datafiles[0] data_file_1_label = "{}.1/datafile".format(dataset1.permaname) folder = models_controller.add_folder( "folder", models_controller.Folder.FolderType.folder, "folder desc" ) folder_id = folder.id vdatafile_name = "alias" vdataset = _create_dataset_with_a_virtual_file( folder_id=folder_id, files=[(vdatafile_name, data_file_1.id)] ) vdataset_id = vdataset.id folder_contents = get_data_from_flask_jsonify(endpoint.get_folder(folder.id)) assert len(folder_contents["entries"]) == 1 assert folder_contents["entries"][0]["type"] == "dataset" # verify that get_dataset accomodates virtual_dataset_ids the same as real datasets dataset = get_data_from_flask_jsonify(endpoint.get_dataset(vdataset_id)) assert dataset["name"] == "virtual" assert len(dataset["versions"]) == 1 vdataset_permaname = dataset["permanames"][0] vdataset_version_id = dataset["versions"][0]["id"] # verify get_datasets is also sane datasets = get_data_from_flask_jsonify( endpoint.get_datasets(dict(datasetIds=[vdataset_id])) ) assert len(datasets) == 1 # make sure we can get it by permaname too dataset = get_data_from_flask_jsonify(endpoint.get_dataset(vdataset_permaname)) assert dataset["name"] == "virtual" assert len(dataset["versions"]) == 1 # run through all the dataset endpoints and just make sure we don't get any exceptions get_data_from_flask_jsonify(endpoint.get_dataset_last(vdataset_id)) get_data_from_flask_jsonify( endpoint.update_dataset_name(vdataset_id, {"name": "new name"}) ) get_data_from_flask_jsonify( endpoint.update_dataset_description( vdataset_id, {"description": "new description"} ) ) dataset_version = get_data_from_flask_jsonify( endpoint.get_dataset_version(vdataset_version_id) ) assert len(dataset_version["datafiles"]) == 1 datafile = dataset_version["datafiles"][0] assert datafile["name"] == vdatafile_name assert datafile["type"] == "Raw" assert datafile["underlying_file_id"] == data_file_1_label # skipping get_dataset_versions because I don't know what uses it # endpoint.get_dataset_versions() get_data_from_flask_jsonify( endpoint.get_dataset_version_from_dataset(vdataset_id, vdataset_version_id) ) get_data_from_flask_jsonify( endpoint.update_dataset_version_description( vdataset_version_id, {"description": "new description"} ) ) get_data_from_flask_jsonify( endpoint.deprecate_dataset_version( vdataset_version_id, {"deprecationReason": "reason"} ) ) get_data_from_flask_jsonify( endpoint.de_deprecate_dataset_version(vdataset_version_id) ) get_data_from_flask_jsonify(endpoint.delete_dataset_version(vdataset_version_id)) get_data_from_flask_jsonify(endpoint.de_delete_dataset_version(vdataset_version_id)) version = 1 format = "raw" get_data_from_flask_jsonify( endpoint.get_datafile( format, dataset_version_id=vdataset_version_id, datafile_name=vdatafile_name ) ) get_data_from_flask_jsonify( endpoint.get_datafile( format, dataset_permaname=vdataset_permaname, version=version, datafile_name=vdatafile_name, ) ) get_data_from_flask_jsonify( endpoint.get_datafile_short_summary( dataset_permaname=vdataset_permaname, version=version, datafile_name=vdatafile_name, ) ) get_data_from_flask_jsonify(endpoint.search_within_folder(folder_id, "description")) folder2 = models_controller.add_folder( "folder2", models_controller.Folder.FolderType.folder, "folder desc" ) get_data_from_flask_jsonify( endpoint.move_to_folder( dict( entryIds=[vdataset_id], currentFolderId=folder_id, targetFolderId=folder2.id, ) ) )
def create_db_and_populate(): create_db() admin_group = models_controller.get_group_by_name("Admin") # Create the Admin user admin_user = models_controller.add_user(name="admin", email="*****@*****.**", token="test-token") admin_group.users.append(admin_user) home_folder_admin = admin_user.home_folder # Setting up the flask user flask.g.current_user = admin_user # Create a session where all this is happening upload_session_origin = models_controller.add_new_upload_session() # Create the origin data upload_session_file_origin = models_controller.add_upload_session_s3_file( session_id=upload_session_origin.id, filename="origin", s3_bucket=bucket_name, initial_file_type=models.InitialFileType.Raw, initial_s3_key="x", encoding="UTF-8", ) origin_dataset = models_controller.add_dataset_from_session( session_id=upload_session_origin.id, dataset_name="origin", dataset_description="No description", current_folder_id=home_folder_admin.id, ) # Create the Folder A folder folderA = models_controller.add_folder( name="Folder A", folder_type=models.Folder.FolderType.folder, description="desc") models_controller.add_folder_entry(folder_id=home_folder_admin.id, entry_id=folderA.id) # Create Folder B inside Folder A folderB = models_controller.add_folder( name="Folder B", folder_type=models.Folder.FolderType.folder, description="") models_controller.add_folder_entry(folder_id=folderA.id, entry_id=folderB.id) # Create Data inside Folder B upload_session_data = models_controller.add_new_upload_session() upload_session_file_data = models_controller.add_upload_session_s3_file( session_id=upload_session_data.id, filename="Data", s3_bucket=bucket_name, initial_file_type=models.InitialFileType.Raw, initial_s3_key="y", encoding="UTF-8", ) data = models_controller.add_dataset_from_session( session_id=upload_session_data.id, dataset_name="Data", dataset_description="No description", current_folder_id=folderB.id, ) data_datafiles = get_latest_version_datafiles_from_dataset(data.id) temp_data_datafiles = copy.copy(data_datafiles) # Create A1 Data/A2 Data/A3 Data inside Folder A for i in range(1, 4): name = "".join(["A", str(i), " DatasetVersion"]) # We need now to generate new datafiles if i >= 1: loop_datafiles = [] for datafile in temp_data_datafiles: loop_datafile = models_controller.add_s3_datafile( name=datafile.name + "v" + str(i), s3_bucket=bucket_name, s3_key=models_controller.generate_convert_key(), compressed_s3_key=models_controller. generate_compressed_key(), type=datafile.format, encoding="UTF-8", short_summary="short summary", long_summary="long_summary", ) loop_datafiles.append(loop_datafile) temp_data_datafiles = loop_datafiles datafiles_id = [datafile.id for datafile in temp_data_datafiles] dataAX = models_controller.add_dataset_version( dataset_id=origin_dataset.id, datafiles_ids=datafiles_id) # create a sample dataset in a known location with a known permaname create_sample_dataset(forced_permaname="sample-1", folder_id="public")