Exemplo n.º 1
0
def create_user(user_handle: str, config_file: str, force: bool) -> None:
    """Create user.

    Create user defined in config_file and users JSON file.
    """
    # Init
    if config.PRODUCTION and not force:
        print(
            "Create user on a PRODUCTION instance not allowed. Use --force to force it."
        )
        sys.exit()
    users = read_json(config.USER_FILENAME)
    workflow = read_json(os.path.join(ROOT_DIR, config_file))

    # Users
    for user in workflow["users"]:
        if "create" in user:
            filename = os.path.join(ROOT_DIR, user["create"]["filename"])
            data = read_json(filename)
            if "update" in user["create"]:
                for key, val in user["create"]["update"].items():
                    data[key] = val
            requests.post(
                f'{config.BASE_URL}/api/builtin-users?password={users[user_handle]["password"]}&key={config.BUILTIN_USER_KEY}',
                json=data,
            )
Exemplo n.º 2
0
def generate_data(tree: dict,
                  user_handle: str,
                  filename: str = "tree.json") -> None:
    """Pre-process data coming from collect data.

    Generates lists of Dataverses (`dataverses.json`),
    Datasets (`datasets.json`) and Datafiles
    (`datafiles.json`) from the tree structure (`tree.json`).
    The created lists are then used for tests
    (`test_all_dataverses()`, `test_all_datasets()`,
    `test_all_datafiles()`). The generated JSON files
    are stored inside `utils/` in the related instance
    folder.

    """
    data = read_json(os.path.join(UTILS_DATA_DIR, user_handle, filename))
    dataverses, datasets, datafiles = dataverse_tree_walker(data)
    filename_dv = os.path.join(UTILS_DATA_DIR, user_handle,
                               config.FILENAME_DATAVERSES)
    if not os.path.isdir(os.path.join(ROOT_DIR, "data")):
        os.makedirs(os.path.join(ROOT_DIR, "data"))
        if not os.path.isdir(os.path.join(ROOT_DIR, "data", "utils")):
            os.makedirs(os.path.join(ROOT_DIR, "data", "utils"))
            if not os.path.isdir(
                    os.path.join(ROOT_DIR, "data", "utils", user_handle)):
                os.makedirs(
                    os.path.join(ROOT_DIR, "data", "utils", user_handle))
    if os.path.isfile(filename_dv):
        os.remove(filename_dv)
    filename_ds = os.path.join(UTILS_DATA_DIR, user_handle,
                               config.FILENAME_DATASETS)
    if os.path.isfile(filename_ds):
        os.remove(filename_ds)
    filename_df = os.path.join(UTILS_DATA_DIR, user_handle,
                               config.FILENAME_DATAFILES)
    if os.path.isfile(filename_df):
        os.remove(filename_df)
    write_json(filename_dv, dataverses)
    write_json(filename_ds, datasets)
    write_json(filename_df, datafiles)
    metadata = {
        "dataverses": len(dataverses),
        "datasets": len(datasets),
        "datafiles": len(datafiles),
    }
    write_json(os.path.join(UTILS_DATA_DIR, config.FILENAME_METADATA),
               metadata)
    print(f"- Dataverses: {len(dataverses)}")
    print(f"- Datasets: {len(datasets)}")
    print(f"- Datafiles: {len(datafiles)}")
Exemplo n.º 3
0
def collect_data(
    user_handle: str,
    parent: str,
    data_types: List[str],
    filename: str,
    create_json: bool,
) -> None:
    """Collect data of a Dataverse installation.

    Collect data from a data node down the Dataverse
    tree-like data structure.

    Collects the complete data of a Dataverse instance in
    a tree structure (`tree.json`), containing all
    Dataverses, Datasets and Datafiles. The file is
    stored in your instance directory (e. g.
    `utils/data/instances/dataverse_production`).

    """

    if user_handle == "public":
        api = NativeApi(config.BASE_URL)
    else:
        users = read_json(config.USER_FILENAME)
        api = NativeApi(config.BASE_URL, users[user_handle]["api-token"])
    tree = api.get_children(parent, children_types=data_types)
    if not os.path.isdir(os.path.join(ROOT_DIR, "data")):
        os.makedirs(os.path.join(ROOT_DIR, "data"))
        if not os.path.isdir(os.path.join(ROOT_DIR, "data", "utils")):
            os.makedirs(os.path.join(ROOT_DIR, "data", "utils"))
            if not os.path.isdir(
                    os.path.join(ROOT_DIR, "data", "utils", user_handle)):
                os.makedirs(
                    os.path.join(ROOT_DIR, "data", "utils", user_handle))
    write_json(os.path.join(UTILS_DATA_DIR, user_handle, filename), tree)
    if create_json:
        generate_data(tree, user_handle, filename)
Exemplo n.º 4
0
def remove_testdata(
    user_handle: str,
    parent: str,
    data_types: List[str] = ["dataverses", "datasets"],
    force: bool = False,
    parent_data_type: str = "dataverse",
    remove_parent: bool = False,
) -> None:
    """Remove testdata.

    Removes all data created by `create-testdata`.
    It recursively collects all Dataverses and Datasets
    from a passed Dataverse down (by default =
    `science`). If `PRODUCTION` is `true`, this function
    will not execute, as long as you not add `--force`
    to the function call. This is to protect from
    unwanted changes on a production instance.

    """
    if config.PRODUCTION and not force:
        print(
            "Delete testdata on a PRODUCTION instance not allowed. Use --force to force it."
        )
        sys.exit()

    user = read_json(config.USER_FILENAME)[user_handle]
    api = NativeApi(config.BASE_URL, user["api-token"])

    # Clean up
    data = api.get_children(parent, children_types=data_types)
    dataverses, datasets, = dataverse_tree_walker(data)
    if parent_data_type == "dataverse" and remove_parent:
        dataverses.append({"dataverse_alias": parent})
    for ds in datasets:
        api.destroy_dataset(ds["pid"])
    for dv in dataverses:
        api.delete_dataverse(dv["dataverse_alias"])
Exemplo n.º 5
0
    def test_dataverse_tree_walker_valid_default(self):
        dv_ids = [1, 2, 3]
        dv_aliases = ["parent_dv_1", "parent_dv_1_sub_dv_1", "parent_dv_2"]
        ds_ids = ["1AB23C", "4DE56F", "7GH89I", "0JK1LM", "2NO34P"]
        ds_pids = [
            "doi:12.34567/1AB23C",
            "doi:12.34567/4DE56F",
            "doi:12.34567/7GH89I",
            "doi:12.34567/0JK1LM",
            "doi:12.34567/2NO34P",
        ]
        df_ids = [1, 2, 3, 4, 5, 6, 7]
        df_filenames = [
            "appendix.pdf",
            "survey.zsav",
            "manual.pdf",
            "study.zsav",
            "documentation.pdf",
            "data.R",
            "summary.md",
        ]
        df_labels = [
            "appendix.pdf",
            "survey.zsav",
            "manual.pdf",
            "study.zsav",
            "documentation.pdf",
            "data.R",
            "summary.md",
        ]
        df_pids = [
            "doi:12.34567/1AB23C/ABC123",
            "doi:12.34567/1AB23C/DEF456",
            "doi:12.34567/4DE56F/GHI789",
            "doi:12.34567/7GH89I/JKL012",
            "doi:12.34567/0JK1LM/MNO345",
            "doi:12.34567/0JK1LM/PQR678",
            "doi:12.34567/2NO34P/STU901",
        ]

        data = read_json(test_config["tree_filename"])
        dataverses, datasets, datafiles = dataverse_tree_walker(data)

        assert isinstance(dataverses, list)
        assert isinstance(datasets, list)
        assert isinstance(datafiles, list)
        assert len(dataverses) == 3
        assert len(datasets) == 5
        assert len(datafiles) == 7

        for dv in dataverses:
            assert "dataverse_alias" in dv
            assert "dataverse_id" in dv
            assert dv["dataverse_alias"] in dv_aliases
            dv_aliases.pop(dv_aliases.index(dv["dataverse_alias"]))
            assert dv["dataverse_id"] in dv_ids
            dv_ids.pop(dv_ids.index(dv["dataverse_id"]))
        assert (len(dv_aliases)) == 0
        assert (len(dv_ids)) == 0

        for ds in datasets:
            assert "dataset_id" in ds
            assert "pid" in ds
            assert ds["dataset_id"] in ds_ids
            ds_ids.pop(ds_ids.index(ds["dataset_id"]))
            assert ds["pid"] in ds_pids
            ds_pids.pop(ds_pids.index(ds["pid"]))
        assert (len(ds_ids)) == 0
        assert (len(ds_pids)) == 0

        for df in datafiles:
            assert "datafile_id" in df
            assert "filename" in df
            assert "label" in df
            assert "pid" in df
            assert df["datafile_id"] in df_ids
            df_ids.pop(df_ids.index(df["datafile_id"]))
            assert df["filename"] in df_filenames
            df_filenames.pop(df_filenames.index(df["filename"]))
            assert df["label"] in df_labels
            df_labels.pop(df_labels.index(df["label"]))
            assert df["pid"] in df_pids
            df_pids.pop(df_pids.index(df["pid"]))
        assert (len(df_ids)) == 0
        assert (len(df_filenames)) == 0
        assert (len(df_pids)) == 0
Exemplo n.º 6
0
def create_testdata(config_file: str, force: bool) -> None:
    """Create testdata defined in a config file.

    Creates a pre-defined set of testdata on your
    instance. By default, the function uses the
    AUSSDA test data repository, which is so far not
    publicly available. If `PRODUCTION` is `true`,
    this function will not execute, as long as you
    not add `--force` to the function call. This is
    to protect from unwanted changes on a production
    instance.

    """
    # Init
    if config.PRODUCTION and not force:
        print(
            "Create testdata on a PRODUCTION instance not allowed. Use --force to force it."
        )
        sys.exit()
    pid_idx = []
    users = read_json(config.USER_FILENAME)
    workflow = read_json(os.path.join(ROOT_DIR, config_file))

    # Dataverses
    for dv_conf in workflow["dataverses"]:
        dv_alias = None
        if "create" in dv_conf:
            api = NativeApi(
                config.BASE_URL,
                users[dv_conf["create"]["user-handle"]]["api-token"])
            dv = Dataverse()
            dv_filename = os.path.join(ROOT_DIR,
                                       dv_conf["create"]["metadata-filename"])
            dv.from_json(read_file(dv_filename))
            if "update" in dv_conf["create"]:
                for key, val in dv_conf["create"]["update"].items():
                    kwargs = {key: val}
                    dv.set(kwargs)
            dv_alias = dv.get()["alias"]
            resp = api.create_dataverse(dv_conf["create"]["parent"], dv.json())

        if "publish" in dv_conf:
            api = NativeApi(
                config.BASE_URL,
                users[dv_conf["publish"]["user-handle"]]["api-token"])
            if not dv_alias and "alias" in dv_conf["publish"]:
                dv_alias = dv_conf["publish"]["alias"]
            resp = api.publish_dataverse(dv_alias)

    # Datasets
    for ds_conf in workflow["datasets"]:
        pid = None
        if "create" in ds_conf:
            api = NativeApi(
                config.BASE_URL,
                users[ds_conf["create"]["user-handle"]]["api-token"])
            ds = Dataset()
            ds_filename = os.path.join(ROOT_DIR,
                                       ds_conf["create"]["metadata-filename"])
            ds.from_json(read_file(ds_filename))
            if "update" in ds_conf["create"]:
                for key, val in ds_conf["create"]["update"].items():
                    kwargs = {key: val}
                    ds.set(kwargs)
            resp = api.create_dataset(dv_alias, ds.json())
            pid = resp.json()["data"]["persistentId"]
            pid_idx.append(pid)

        if "publish" in ds_conf:
            if not pid:
                print("ERROR: PID missing!")
                sys.exit()
            api = NativeApi(
                config.BASE_URL,
                users[ds_conf["publish"]["user-handle"]]["api-token"])
            resp = api.publish_dataset(pid, release_type="major")

    # Datafiles
    for dataset_id, ds_datafiles in workflow["datafiles"].items():
        if int(dataset_id) == workflow["datasets"][int(dataset_id)]["id"]:
            pid = pid_idx[int(dataset_id)]
        else:
            print("ERROR: Dataset ID not matching.")
            sys.exit()
        for df_conf in ds_datafiles:
            if "upload" in df_conf:
                api = NativeApi(
                    config.BASE_URL,
                    users[df_conf["upload"]["user-handle"]]["api-token"],
                )
                metadata = read_json(df_conf["upload"]["metadata-filename"])
                df = Datafile()
                df.set(metadata)
                if "update" in df_conf["upload"]:
                    for key, val in df_conf["upload"]["update"].items():
                        kwargs = {key: val}
                        df.set(kwargs)
                df.set({"pid": pid})
                filename = df_conf["upload"]["filename"]
                resp = api.upload_datafile(pid, filename, df.json())
                if filename[-4:] == ".sav" or filename[-4:] == ".dta":
                    sleep(30)
                else:
                    sleep(3)
        if "publish-dataset" in df_conf:
            api = NativeApi(
                config.BASE_URL,
                users[df_conf["publish-dataset"]["user-handle"]]["api-token"],
            )
            if df_conf["publish-dataset"]:
                resp = api.publish_dataset(pid, release_type="major")