示例#1
0
def new_dummy_folder():
    new_dummy_folder_name = "Dummy folder"
    _new_dummy_folder = mc.add_folder(
        name=new_dummy_folder_name,
        folder_type=Folder.FolderType.home,
        description="I am a dummy folder for testing purpose",
    )

    return _new_dummy_folder
示例#2
0
def new_folder():
    new_folder_name = "New folder"
    _new_folder = mc.add_folder(
        name=new_folder_name,
        folder_type=Folder.FolderType.folder,
        description="Test new folder description",
    )

    return _new_folder
示例#3
0
def test_edit_owned(session: SessionBase):
    new_user = mc.add_user("test", "*****@*****.**")
    flask.g.current_user = new_user

    # new_folder_owned = FolderFactory(creator=new_user)
    new_folder_owned = mc.add_folder(name="Test folder",
                                     folder_type=Folder.FolderType.folder,
                                     description="")

    right_owned = mc.get_rights(new_folder_owned.id)

    assert right_owned == EntryRightsEnum.can_edit
示例#4
0
def test_view_not_owned(session: SessionBase):
    new_user = mc.add_user("test", "*****@*****.**")
    flask.g.current_user = new_user

    new_user_not_tested = mc.add_user("test_useless", "*****@*****.**")

    # new_folder_not_owned = FolderFactory(creator=new_user_not_tested)
    new_folder_not_owned = mc.add_folder(name="folder_not_owned",
                                         folder_type=Folder.FolderType.folder,
                                         description="")
    new_folder_not_owned.creator = new_user_not_tested

    right_not_owned = mc.get_rights(new_folder_not_owned.id)

    assert right_not_owned == EntryRightsEnum.can_view
示例#5
0
def test_get_parent_folders(session: SessionBase, new_folder,
                            new_dummy_folder):
    folder_in_dummy_and_new_folder_folders = mc.add_folder(
        name="Inception",
        folder_type=Folder.FolderType.folder,
        description="Folder inside two folders",
    )
    # new_folder.entries.append(new_dummy_folder)
    new_folder.entries.append(folder_in_dummy_and_new_folder_folders)

    new_dummy_folder.entries.append(folder_in_dummy_and_new_folder_folders)

    parent_folders = mc.get_parent_folders(
        folder_in_dummy_and_new_folder_folders.id)

    assert len(parent_folders) == 2
    assert new_folder in parent_folders
    assert new_dummy_folder in parent_folders
示例#6
0
def create_folder(metadata):
    """Create a folder given a metadata dictionary of this form:
    metadata['name'], metadata['description'], metadata['parentId']"""
    # TODO: Add the add_folder_entry inside the add_folder function?
    folder_name = metadata["name"]
    folder_description = metadata["description"]
    parent_id = metadata["parentId"]

    new_folder = models_controller.add_folder(
        name=folder_name,
        folder_type=models_controller.Folder.FolderType.folder,
        description=folder_description,
    )
    models_controller.add_folder_entry(parent_id, new_folder.id)

    folder_named_id_schema = schemas.FolderNamedIdSchema()
    json_folder_named_id = folder_named_id_schema.dump(new_folder).data

    return flask.jsonify(json_folder_named_id)
示例#7
0
def populate_db(dataset_csv_path, dataset_version_with_datafile_csv_path):
    # TODO: We should handle the Public folder properly, instead of adding it to Philip's account
    # Summary
    nb_user_created = 0
    nb_user_skipped = 0
    nb_dataset_created = 0
    nb_row_dataset_skipped = 0
    nb_datafile_created = 0
    nb_datafile_skipped = 0
    nb_row_datafile_skipped = 0
    nb_dataset_version_created = 0
    nb_dataset_version_skipped = 0

    # Dictionary to link find the dataset matching the dataset via the permanames to create the dataset versions
    # Dict<String, Array<int>>
    dict_permaname_datafile_ids = {}

    # We first manage the dataset creation
    with open(dataset_csv_path) as dataset_file:
        print("Creating the users and the datasets")
        reader = csv.DictReader(dataset_file)

        for row in reader:
            is_public = False

            if not row["permaname"]:
                print(
                    "Warning: We found an empty permaname entry: {}. Skipping it."
                    .format(row))
                nb_row_dataset_skipped += 1
                continue

            dataset_name = row["name"]
            dataset_permaname = row["permaname"]
            dataset_description = row["description"]

            if row["folder"].startswith("home"):
                dataset_folder_user = row["folder"]

                # To get the user from dataset_folder_user, we extract the user from the parenthesis
                dataset_user_email = dataset_folder_user[
                    dataset_folder_user.find("(") +
                    1:dataset_folder_user.find(")")]

                # Handle the case where user email is None
                if dataset_user_email == "None":
                    print(
                        "Warning: We found a row with folder {}. Skipping it.".
                        format(row["folder"]))
                    nb_user_skipped += 1
                    continue

                # To get the target folder, we take the string before the parenthesis
                dataset_folder_name = dataset_folder_user.split("(")[0]
            else:
                # For now, we store all the others into [email protected]
                is_public = True
                dataset_folder_name = row["folder"]
                dataset_user_email = "*****@*****.**"

            # Setting up the user
            try:
                dataset_current_user = models_controller.get_user_by_email(
                    dataset_user_email)
            except NoResultFound:
                # User does not exists yet, so we create it
                dataset_user_name = dataset_user_email[:dataset_user_email.
                                                       find("@")]
                dataset_current_user = models_controller.add_user(
                    name=dataset_user_name, email=dataset_user_email)
                print("User with email: {} created".format(dataset_user_email))
                nb_user_created += 1

            flask.g.current_user = dataset_current_user

            # TODO: We should not create the dataset if it already exists
            new_dataset = models_controller.add_dataset(
                name=dataset_name,
                permaname=dataset_permaname,
                description=dataset_description,
            )
            try:
                # TODO: Check it is case insensitive
                if str.lower(dataset_folder_name) == "home":
                    dataset_folder = dataset_current_user.home_folder
                elif str.lower(dataset_folder_name) == "trash":
                    dataset_folder = dataset_current_user.trash_folder
                else:
                    dataset_folder = models_controller.get_folder_by_name(
                        dataset_folder_name)
            except NoResultFound:
                # If no result, it means we need to create the folder in the user space or in public
                dataset_folder = models_controller.add_folder(
                    name=dataset_folder_name,
                    folder_type=models_controller.Folder.FolderType.folder,
                    description=None,
                )

                if is_public:
                    models_controller.move_to_folder(
                        entry_ids=[dataset_folder.id],
                        current_folder_id=None,
                        target_folder_id=models_controller.get_public_folder().
                        id,
                    )
                else:
                    models_controller.move_to_folder(
                        entry_ids=[dataset_folder.id],
                        current_folder_id=None,
                        target_folder_id=dataset_current_user.home_folder_id,
                    )

            # Now we can move the dataset to the folder
            models_controller.move_to_folder([new_dataset.id], None,
                                             dataset_folder.id)

            # We add the dataset_permaname as key with value an empty array so we can add each matching datafile
            dict_permaname_datafile_ids[dataset_permaname] = []

            nb_dataset_created += 1

    # We then manage the attribution of the dataset_version to the freshly created datasets
    with open(dataset_version_with_datafile_csv_path
              ) as dataset_version_with_datafile_csv:
        print("")
        print("Creating the datafiles")
        reader = csv.DictReader(dataset_version_with_datafile_csv)

        for row in reader:
            if not row["permaname"]:
                print("We found an empty permaname entry: {}. Skipping it.".
                      format(row))
                nb_row_datafile_skipped += 1
                nb_datafile_skipped += 1
                continue

            # We first create the datafiles
            datafile_type = row["type"]
            datafile_name = row.get("name", "data")
            datafile_s3_location = urlparse(row["s3_location"])
            datafile_short_summary = row["short_desc"]
            datafile_long_summary = row.get("long_desc", "")
            datafile_id = row["id"]
            datafile_creation_date = row["created_timestamp"]
            datafile_version = row["version"]
            datafile_created_by = row["created_by"]

            dataset_permaname = row["permaname"]

            # s3://taiga2/imported/4bb2169e-5b87-4d1c-a78e-3e6006316561.hdf5
            datafile_s3_bucket = datafile_s3_location.netloc
            datafile_s3_key = datafile_s3_location.path[
                1:]  # We remove the first '/'

            # Set the user to the one in the row to make the manipulations under his name
            try:
                current_user = models_controller.get_user_by_email(
                    datafile_created_by)
            except NoResultFound:
                print(
                    "Warning: The user email found in 'created_by' column ({}) was not found in the dataset side. "
                    "Creating one.".format(datafile_created_by))
                datafile_created_by_name = datafile_created_by[:
                                                               datafile_created_by
                                                               .find("@")]
                current_user = models_controller.add_user(
                    name=datafile_created_by_name, email=datafile_created_by)
                nb_user_created += 1

            flask.g.current_user = current_user

            # TODO: We should not create the datafile if it already exists: ie s3_bucket/s3_key exists
            new_datafile = models_controller.add_s3_datafile(
                s3_bucket=datafile_s3_bucket,
                s3_key=datafile_s3_key,
                name=datafile_name,
                type=datafile_type,
                short_summary=datafile_short_summary,
                long_summary=datafile_long_summary,
            )

            # We register the datafile with its permaname dataset to later create the dataset version
            # with all the datafiles
            if dataset_permaname in dict_permaname_datafile_ids:
                datafile_info = DataFileInfo(
                    id=datafile_id,
                    datafile=new_datafile,
                    version=datafile_version,
                    creation_date=datafile_creation_date,
                    owner_email=datafile_created_by,
                )
                dict_permaname_datafile_ids[dataset_permaname].append(
                    datafile_info)
            else:
                print(
                    "Warning: We found a dataset ({}) without a matching dataset ({}). Skipping it."
                    .format(datafile_id, dataset_permaname))
                nb_datafile_skipped += 1
                continue

            nb_datafile_created += 1

    # Then we create the dataset_version with the taiga id, linking with the dataset using its permaname
    print("")
    print("Linking the datafiles with the datasets")
    for dataset_permaname, array_data_file_info in dict_permaname_datafile_ids.items(
    ):
        dataset = models_controller.get_dataset_from_permaname(
            dataset_permaname)

        # Get the creation date from the first dataset_version
        for datafile_info in array_data_file_info:
            flask.g.current_user = models_controller.get_user_by_email(
                datafile_info.owner_email)
            # TODO: We should not create the dataset_version if it already exists. ie version already exists for this dataset
            dataset_version = models_controller.add_dataset_version(
                dataset_id=dataset.id,
                datafiles_ids=[datafile_info.datafile.id],
                anterior_creation_date=datafile_info.creation_date,
                forced_id=datafile_info.id,
            )

            # Then we edit the dataset version creation_date to the
            if int(datafile_info.version) == 1:
                models_controller.update_dataset_creation_date(
                    dataset_id=dataset.id,
                    new_date=datafile_info.creation_date)

        nb_dataset_version_created += 1

    print("")
    print("Done! Here is the summary:")
    print("\tLines skipped in dataset file: {}".format(nb_row_dataset_skipped))
    print(
        "\tLines skipped in datafile file: {}".format(nb_row_datafile_skipped))
    print("")
    print("\tDatasets created: {}".format(nb_dataset_created))
    print("\tUsers created: {}".format(nb_user_created))
    print("\tUsers skipped: {}".format(nb_user_skipped))
    print("")
    print("\tDatafiles created: {}".format(nb_datafile_created))
    print("\tDatafiles skipped: {}".format(nb_datafile_skipped))
    print("")
    print("\tDatasetVersions created: {}".format(nb_dataset_version_created))
    print("\tDatasetVersions skipped and datasets cleaned: {}".format(
        nb_dataset_version_skipped))
示例#8
0
def test_create_virtual_dataset_endpoint(session: SessionBase):
    folder = models_controller.add_folder(
        "folder", models_controller.Folder.FolderType.folder, "folder desc"
    )

    dataset1 = _create_dataset_with_a_file("datafile")
    data_file_1 = "{}.1/datafile".format(dataset1.permaname)

    session.flush()

    upload_session_1 = new_upload_session()
    _add_virtual_file_to_upload_session(upload_session_1.id, "alias", data_file_1)

    sessionDatasetInfo = {
        "sessionId": upload_session_1.id,
        "datasetName": "version-1-name",
        "datasetDescription": "version-1-desc",
        "currentFolderId": folder.id,
    }

    response_json_create_dataset = endpoint.create_dataset(
        sessionDatasetInfo=sessionDatasetInfo
    )
    virtual_dataset_id = get_data_from_flask_jsonify(response_json_create_dataset)

    # versionInfo = {
    #     "description": "updated desc",
    #     "files": [
    #         {
    #             "name": "alias",
    #             "datafile": data_file_2
    #         }
    #     ]
    # }

    # now update with a new version
    upload_session_2 = new_upload_session()
    dataset2 = _create_dataset_with_a_file()
    data_file_2 = "{}.1/datafile".format(dataset2.permaname)
    _add_virtual_file_to_upload_session(upload_session_2.id, "alias", data_file_2)

    datasetVersionMetadata = {
        "sessionId": upload_session_2.id,
        "datasetId": virtual_dataset_id,
        "newDescription": "version-2-desc",
    }

    endpoint.create_new_dataset_version(datasetVersionMetadata=datasetVersionMetadata)

    v = models_controller.get_dataset(virtual_dataset_id)
    latest_dataset_version = models_controller.get_latest_dataset_version(v.id)
    assert v.name == "version-1-name"
    assert v.description == None
    assert latest_dataset_version.description == "version-2-desc"

    assert len(v.dataset_versions) == 2

    # check each version
    version = v.dataset_versions[0]
    assert version.version == 1
    assert len(version.datafiles) == 1
    entry = version.datafiles[0]
    assert entry.name == "alias"
    data_file_id_1 = entry.underlying_file_id

    version = v.dataset_versions[1]
    assert version.version == 2
    assert len(version.datafiles) == 1
    entry = version.datafiles[0]
    assert entry.name == "alias"
    assert entry.underlying_file_id != data_file_id_1
示例#9
0
def test_dataset_endpoints_on_virtual_dataset(session: SessionBase):
    dataset1 = _create_dataset_with_a_file()
    data_file_1 = dataset1.dataset_versions[0].datafiles[0]
    data_file_1_label = "{}.1/datafile".format(dataset1.permaname)
    folder = models_controller.add_folder(
        "folder", models_controller.Folder.FolderType.folder, "folder desc"
    )
    folder_id = folder.id

    vdatafile_name = "alias"
    vdataset = _create_dataset_with_a_virtual_file(
        folder_id=folder_id, files=[(vdatafile_name, data_file_1.id)]
    )
    vdataset_id = vdataset.id

    folder_contents = get_data_from_flask_jsonify(endpoint.get_folder(folder.id))

    assert len(folder_contents["entries"]) == 1
    assert folder_contents["entries"][0]["type"] == "dataset"

    # verify that get_dataset accomodates virtual_dataset_ids the same as real datasets
    dataset = get_data_from_flask_jsonify(endpoint.get_dataset(vdataset_id))
    assert dataset["name"] == "virtual"
    assert len(dataset["versions"]) == 1

    vdataset_permaname = dataset["permanames"][0]
    vdataset_version_id = dataset["versions"][0]["id"]

    # verify get_datasets is also sane
    datasets = get_data_from_flask_jsonify(
        endpoint.get_datasets(dict(datasetIds=[vdataset_id]))
    )
    assert len(datasets) == 1

    # make sure we can get it by permaname too
    dataset = get_data_from_flask_jsonify(endpoint.get_dataset(vdataset_permaname))
    assert dataset["name"] == "virtual"
    assert len(dataset["versions"]) == 1

    # run through all the dataset endpoints and just make sure we don't get any exceptions
    get_data_from_flask_jsonify(endpoint.get_dataset_last(vdataset_id))
    get_data_from_flask_jsonify(
        endpoint.update_dataset_name(vdataset_id, {"name": "new name"})
    )
    get_data_from_flask_jsonify(
        endpoint.update_dataset_description(
            vdataset_id, {"description": "new description"}
        )
    )
    dataset_version = get_data_from_flask_jsonify(
        endpoint.get_dataset_version(vdataset_version_id)
    )
    assert len(dataset_version["datafiles"]) == 1
    datafile = dataset_version["datafiles"][0]
    assert datafile["name"] == vdatafile_name
    assert datafile["type"] == "Raw"
    assert datafile["underlying_file_id"] == data_file_1_label

    # skipping get_dataset_versions because I don't know what uses it
    # endpoint.get_dataset_versions()
    get_data_from_flask_jsonify(
        endpoint.get_dataset_version_from_dataset(vdataset_id, vdataset_version_id)
    )
    get_data_from_flask_jsonify(
        endpoint.update_dataset_version_description(
            vdataset_version_id, {"description": "new description"}
        )
    )
    get_data_from_flask_jsonify(
        endpoint.deprecate_dataset_version(
            vdataset_version_id, {"deprecationReason": "reason"}
        )
    )
    get_data_from_flask_jsonify(
        endpoint.de_deprecate_dataset_version(vdataset_version_id)
    )
    get_data_from_flask_jsonify(endpoint.delete_dataset_version(vdataset_version_id))
    get_data_from_flask_jsonify(endpoint.de_delete_dataset_version(vdataset_version_id))

    version = 1
    format = "raw"
    get_data_from_flask_jsonify(
        endpoint.get_datafile(
            format, dataset_version_id=vdataset_version_id, datafile_name=vdatafile_name
        )
    )
    get_data_from_flask_jsonify(
        endpoint.get_datafile(
            format,
            dataset_permaname=vdataset_permaname,
            version=version,
            datafile_name=vdatafile_name,
        )
    )
    get_data_from_flask_jsonify(
        endpoint.get_datafile_short_summary(
            dataset_permaname=vdataset_permaname,
            version=version,
            datafile_name=vdatafile_name,
        )
    )
    get_data_from_flask_jsonify(endpoint.search_within_folder(folder_id, "description"))

    folder2 = models_controller.add_folder(
        "folder2", models_controller.Folder.FolderType.folder, "folder desc"
    )
    get_data_from_flask_jsonify(
        endpoint.move_to_folder(
            dict(
                entryIds=[vdataset_id],
                currentFolderId=folder_id,
                targetFolderId=folder2.id,
            )
        )
    )
def create_db_and_populate():
    create_db()

    admin_group = models_controller.get_group_by_name("Admin")

    # Create the Admin user
    admin_user = models_controller.add_user(name="admin",
                                            email="*****@*****.**",
                                            token="test-token")
    admin_group.users.append(admin_user)
    home_folder_admin = admin_user.home_folder

    # Setting up the flask user
    flask.g.current_user = admin_user

    # Create a session where all this is happening
    upload_session_origin = models_controller.add_new_upload_session()

    # Create the origin data
    upload_session_file_origin = models_controller.add_upload_session_s3_file(
        session_id=upload_session_origin.id,
        filename="origin",
        s3_bucket=bucket_name,
        initial_file_type=models.InitialFileType.Raw,
        initial_s3_key="x",
        encoding="UTF-8",
    )

    origin_dataset = models_controller.add_dataset_from_session(
        session_id=upload_session_origin.id,
        dataset_name="origin",
        dataset_description="No description",
        current_folder_id=home_folder_admin.id,
    )

    # Create the Folder A folder
    folderA = models_controller.add_folder(
        name="Folder A",
        folder_type=models.Folder.FolderType.folder,
        description="desc")
    models_controller.add_folder_entry(folder_id=home_folder_admin.id,
                                       entry_id=folderA.id)

    # Create Folder B inside Folder A
    folderB = models_controller.add_folder(
        name="Folder B",
        folder_type=models.Folder.FolderType.folder,
        description="")
    models_controller.add_folder_entry(folder_id=folderA.id,
                                       entry_id=folderB.id)

    # Create Data inside Folder B
    upload_session_data = models_controller.add_new_upload_session()
    upload_session_file_data = models_controller.add_upload_session_s3_file(
        session_id=upload_session_data.id,
        filename="Data",
        s3_bucket=bucket_name,
        initial_file_type=models.InitialFileType.Raw,
        initial_s3_key="y",
        encoding="UTF-8",
    )

    data = models_controller.add_dataset_from_session(
        session_id=upload_session_data.id,
        dataset_name="Data",
        dataset_description="No description",
        current_folder_id=folderB.id,
    )

    data_datafiles = get_latest_version_datafiles_from_dataset(data.id)

    temp_data_datafiles = copy.copy(data_datafiles)

    # Create A1 Data/A2 Data/A3 Data inside Folder A
    for i in range(1, 4):
        name = "".join(["A", str(i), " DatasetVersion"])

        # We need now to generate new datafiles
        if i >= 1:
            loop_datafiles = []
            for datafile in temp_data_datafiles:
                loop_datafile = models_controller.add_s3_datafile(
                    name=datafile.name + "v" + str(i),
                    s3_bucket=bucket_name,
                    s3_key=models_controller.generate_convert_key(),
                    compressed_s3_key=models_controller.
                    generate_compressed_key(),
                    type=datafile.format,
                    encoding="UTF-8",
                    short_summary="short summary",
                    long_summary="long_summary",
                )
                loop_datafiles.append(loop_datafile)
            temp_data_datafiles = loop_datafiles
        datafiles_id = [datafile.id for datafile in temp_data_datafiles]
        dataAX = models_controller.add_dataset_version(
            dataset_id=origin_dataset.id, datafiles_ids=datafiles_id)

    # create a sample dataset in a known location with a known permaname
    create_sample_dataset(forced_permaname="sample-1", folder_id="public")