Пример #1
0
def test_add_dataset_version(session: SessionBase):
    new_dataset_version_name = "New Dataset Version"

    new_dataset_name = "New Dataset for test_add_dataset_version"
    new_dataset_description = "New description for test_add_dataset_version"
    _new_datafile = mc.add_s3_datafile(
        name="Datafile for test_add_dataset_version",
        s3_bucket="broadtaiga2prototype",
        s3_key=mc.generate_convert_key(),
        compressed_s3_key=mc.generate_compressed_key(),
        type=mc.S3DataFile.DataFileFormat.Raw,
        encoding="UTF-8",
        short_summary="short",
        long_summary="long",
    )

    _new_dataset = mc.add_dataset(
        name=new_dataset_version_name,
        description=new_dataset_description,
        datafiles_ids=[_new_datafile.id],
    )

    _new_dataset_version = _new_dataset.dataset_versions[0]

    _new_dataset_version.datafiles.append(_new_datafile)

    db.session.add(_new_dataset_version)
    db.session.commit()

    return _new_dataset_version

    assert new_dataset_version.name == new_dataset_version_name
    assert new_dataset_version.creator == flask.g.current_user
    assert new_dataset_version.dataset == new_dataset
    assert new_dataset_version.state == DatasetVersion.DatasetVersionState.approved
Пример #2
0
def new_datafile():
    # TODO: These tests should be using the endpoint and not the model
    new_datafile_name = "New Datafile"

    _new_datafile = models_controller.add_s3_datafile(
        name=new_datafile_name,
        s3_bucket="broadtaiga2prototype",
        s3_key=models_controller.generate_convert_key(),
        compressed_s3_key=models_controller.generate_compressed_key(),
        type=S3DataFile.DataFileFormat.Raw,
        encoding="UTF-8",
        short_summary="short",
        long_summary="long",
    )

    return _new_datafile
Пример #3
0
def new_datafile():
    new_datafile_name = "New Datafile"
    new_datafile_url = "http://google.com"

    _new_datafile = mc.add_s3_datafile(
        name=new_datafile_name,
        s3_bucket="broadtaiga2prototype",
        s3_key=mc.generate_convert_key(),
        compressed_s3_key=mc.generate_compressed_key(),
        type=S3DataFile.DataFileFormat.Raw,
        encoding="UTF-8",
        short_summary="short",
        long_summary="long",
    )

    return _new_datafile
Пример #4
0
def test_upload_session_file(filename, initial_file_type, session: SessionBase,
                             user_id):
    print("initial_file_type", initial_file_type, filename)

    new_bucket = aws.s3.Bucket("bucket")
    new_bucket.create()

    converted_s3_key = models_controller.generate_convert_key()
    compressed_s3_key = models_controller.generate_compressed_key()

    with open(filename, "rb") as data:
        aws.s3.Bucket(new_bucket.name).put_object(Key=converted_s3_key,
                                                  Body=data)
    s3_raw_uploaded_file = aws.s3.Object(new_bucket.name, converted_s3_key)

    bucket_name = "bucket"
    initial_s3_key = s3_raw_uploaded_file.key

    session = models_controller.add_new_upload_session()
    upload_session_file = models_controller.add_upload_session_s3_file(
        session.id,
        os.path.basename(filename),
        initial_file_type,
        initial_s3_key,
        bucket_name,
        "UTF-8",
    )

    background_process_new_upload_session_file.delay(
        upload_session_file.id,
        initial_s3_key,
        initial_file_type,
        bucket_name,
        converted_s3_key,
        compressed_s3_key,
        upload_session_file.encoding,
    ).wait()

    # confirm the converted object was published back to s3
    assert aws.s3.Object(bucket_name,
                         converted_s3_key).download_as_bytes() is not None

    # Check updated UploadSessionFile
    updated_upload_session_file = models_controller.get_upload_session_file(
        upload_session_file.id)

    assert updated_upload_session_file.column_types_as_json is None
Пример #5
0
def _create_dataset_with_a_file(name="datafile") -> Dataset:
    _new_datafile = models_controller.add_s3_datafile(
        name=name,
        s3_bucket="broadtaiga2prototype",
        s3_key=models_controller.generate_convert_key(),
        compressed_s3_key=models_controller.generate_compressed_key(),
        type=models_controller.S3DataFile.DataFileFormat.Raw,
        encoding="UTF-8",
        short_summary="short",
        long_summary="long",
    )

    dataset = models_controller.add_dataset(
        name="dataset", description="", datafiles_ids=[_new_datafile.id]
    )

    return dataset
Пример #6
0
def test_basic_create_virtual_dataset(session: SessionBase):
    # create mock data of a single dataset and a virtual dataset which references the files but with a different name
    _new_datafile = mc.add_s3_datafile(
        name="underlying-datafile",
        s3_bucket="broadtaiga2prototype",
        s3_key=mc.generate_convert_key(),
        compressed_s3_key=mc.generate_compressed_key(),
        type=mc.S3DataFile.DataFileFormat.Raw,
        encoding="UTF-8",
        short_summary="short",
        long_summary="long",
    )

    mc.add_dataset(name="underlying-dataset",
                   description="",
                   datafiles_ids=[_new_datafile.id])

    virtual_datafile = mc.add_virtual_datafile(name="alias",
                                               datafile_id=_new_datafile.id)

    virtual_dataset = mc.add_dataset(name="virtual-dataset",
                                     description="desc",
                                     datafiles_ids=[virtual_datafile.id])

    # make sure the subsequent queries can find new objects
    session.flush()

    assert virtual_dataset.id is not None

    v = mc.get_dataset(virtual_dataset.id)
    assert v.name == "virtual-dataset"

    assert len(v.dataset_versions) == 1

    version = v.dataset_versions[0]
    assert len(version.datafiles)

    entry = version.datafiles[0]
    assert entry.name == "alias"
    assert entry.underlying_data_file.id == _new_datafile.id
def create_db_and_populate():
    create_db()

    admin_group = models_controller.get_group_by_name("Admin")

    # Create the Admin user
    admin_user = models_controller.add_user(name="admin",
                                            email="*****@*****.**",
                                            token="test-token")
    admin_group.users.append(admin_user)
    home_folder_admin = admin_user.home_folder

    # Setting up the flask user
    flask.g.current_user = admin_user

    # Create a session where all this is happening
    upload_session_origin = models_controller.add_new_upload_session()

    # Create the origin data
    upload_session_file_origin = models_controller.add_upload_session_s3_file(
        session_id=upload_session_origin.id,
        filename="origin",
        s3_bucket=bucket_name,
        initial_file_type=models.InitialFileType.Raw,
        initial_s3_key="x",
        encoding="UTF-8",
    )

    origin_dataset = models_controller.add_dataset_from_session(
        session_id=upload_session_origin.id,
        dataset_name="origin",
        dataset_description="No description",
        current_folder_id=home_folder_admin.id,
    )

    # Create the Folder A folder
    folderA = models_controller.add_folder(
        name="Folder A",
        folder_type=models.Folder.FolderType.folder,
        description="desc")
    models_controller.add_folder_entry(folder_id=home_folder_admin.id,
                                       entry_id=folderA.id)

    # Create Folder B inside Folder A
    folderB = models_controller.add_folder(
        name="Folder B",
        folder_type=models.Folder.FolderType.folder,
        description="")
    models_controller.add_folder_entry(folder_id=folderA.id,
                                       entry_id=folderB.id)

    # Create Data inside Folder B
    upload_session_data = models_controller.add_new_upload_session()
    upload_session_file_data = models_controller.add_upload_session_s3_file(
        session_id=upload_session_data.id,
        filename="Data",
        s3_bucket=bucket_name,
        initial_file_type=models.InitialFileType.Raw,
        initial_s3_key="y",
        encoding="UTF-8",
    )

    data = models_controller.add_dataset_from_session(
        session_id=upload_session_data.id,
        dataset_name="Data",
        dataset_description="No description",
        current_folder_id=folderB.id,
    )

    data_datafiles = get_latest_version_datafiles_from_dataset(data.id)

    temp_data_datafiles = copy.copy(data_datafiles)

    # Create A1 Data/A2 Data/A3 Data inside Folder A
    for i in range(1, 4):
        name = "".join(["A", str(i), " DatasetVersion"])

        # We need now to generate new datafiles
        if i >= 1:
            loop_datafiles = []
            for datafile in temp_data_datafiles:
                loop_datafile = models_controller.add_s3_datafile(
                    name=datafile.name + "v" + str(i),
                    s3_bucket=bucket_name,
                    s3_key=models_controller.generate_convert_key(),
                    compressed_s3_key=models_controller.
                    generate_compressed_key(),
                    type=datafile.format,
                    encoding="UTF-8",
                    short_summary="short summary",
                    long_summary="long_summary",
                )
                loop_datafiles.append(loop_datafile)
            temp_data_datafiles = loop_datafiles
        datafiles_id = [datafile.id for datafile in temp_data_datafiles]
        dataAX = models_controller.add_dataset_version(
            dataset_id=origin_dataset.id, datafiles_ids=datafiles_id)

    # create a sample dataset in a known location with a known permaname
    create_sample_dataset(forced_permaname="sample-1", folder_id="public")