Exemplo n.º 1
0
def collection_fixture(request):
    collection_path = tempfile.mkdtemp()

    collection = dtoolcore.Collection()
    collection.persist_to_path(collection_path)

    for ds_name in ["rice", "wheat", "barley"]:

        ds_path = os.path.join(collection_path, ds_name)
        os.mkdir(ds_path)

        dataset = dtoolcore.DataSet(ds_name, "data")
        dataset.persist_to_path(ds_path)

        for s in ["sow", "grow", "harvest"]:
            fname = s + ".txt"
            fpath = os.path.join(ds_path, "data", fname)
            with open(fpath, "w") as fh:
                fh.write("{} {}\n".format(s, ds_name))

        dataset.update_manifest()

    @request.addfinalizer
    def teardown():
        shutil.rmtree(collection_path)
    return collection_path
Exemplo n.º 2
0
def dataset_fixture(request):
    d = tempfile.mkdtemp()

    dataset = dtoolcore.DataSet("test", "data")
    dataset.persist_to_path(d)

    for s in ["hello", "world"]:
        fname = s + ".txt"
        fpath = os.path.join(d, "data", fname)
        with open(fpath, "w") as fh:
            fh.write(s)

    dataset.update_manifest()

    @request.addfinalizer
    def teardown():
        shutil.rmtree(d)
    return d
Exemplo n.º 3
0
def template(dataset_path, new_dataset_path):
    """Create new empty dataset with metadata from existing dataset."""
    parent_dataset = dtoolcore.DataSet.from_path(dataset_path)

    new_dataset_path = os.path.abspath(new_dataset_path)
    output_dir, dataset_name = os.path.split(new_dataset_path)

    # There are ways of doing this that result in error messages where
    # the specific offending argument is highlighted.
    # http://click.pocoo.org/5/options/#callbacks-for-validation
    if os.path.exists(new_dataset_path):
        raise click.BadParameter(
            "Path already exists: {}".format(new_dataset_path)
        )
    if not os.path.isdir(output_dir):
        raise click.BadParameter(
            "Output directory does not exist: {}".format(output_dir)
        )

    # Create empty dataset
    new_dataset = dtoolcore.DataSet(dataset_name, data_directory="data")
    os.mkdir(new_dataset_path)
    new_dataset.persist_to_path(new_dataset_path)

    # Template the descriptive metadata.
    with open(parent_dataset.abs_readme_path) as fh:
        descriptive_metadata = yaml.load(fh)

    # Need explicit call to str() to ensure pyyaml does not mark up data with
    # Python types.
    descriptive_metadata["dataset_name"] = str(dataset_name)
    descriptive_metadata["creation_date"] = str(datetime.date.today())

    descriptive_metadata["parent_dataset"] = dict(path=parent_dataset._abs_path,
                                                  uuid=str(parent_dataset.uuid))

    with open(new_dataset.abs_readme_path, "w") as fh:
        yaml.dump(
            descriptive_metadata,
            fh,
            explicit_start=True,
            default_flow_style=False)
Exemplo n.º 4
0
def dminify(dataset_path, new_dataset_path, n):

    parent_dataset = dtoolcore.DataSet.from_path(dataset_path)

    output_dir, dataset_name = os.path.split(new_dataset_path)

    # There are ways of doing this that result in error messages where
    # the specific offending argument is highlighted.
    # http://click.pocoo.org/5/options/#callbacks-for-validation
    if os.path.exists(new_dataset_path):
        raise click.BadParameter(
            "Path already exists: {}".format(new_dataset_path)
        )
    if not os.path.isdir(output_dir):
        raise click.BadParameter(
            "Output directory does not exist: {}".format(output_dir)
        )

    output_dataset_data_dir = os.path.join(new_dataset_path, 'data')

    for entry in parent_dataset.manifest['file_list']:
        if is_file_extension_in_list(entry['path'], ['.fq', '.fq.gz']):
            output_file_path = os.path.join(
                output_dataset_data_dir,
                entry['path']
            )
            identifier = entry['hash']
            input_file_path = parent_dataset.abspath_from_identifier(
                identifier
            )
            minify(input_file_path, output_file_path, n)

    output_dataset = dtoolcore.DataSet(dataset_name, 'data')
    output_dataset.persist_to_path(new_dataset_path)
    output_dataset.update_manifest()

    with open(parent_dataset.abs_readme_path, 'r') as ifh:
        with open(output_dataset.abs_readme_path, 'w') as ofh:
            ofh.write(ifh.read())
            ofh.write("minified_from: {}\n".format(parent_dataset.uuid))