Python Dataset.create_dataset示例，librarian.models.Dataset.create_dataset Python示例

示例#1

0

显示文件

文件： runcontainer.py 项目： cfe-lab/Kive

    def save_outputs(self, run):
        output_path = os.path.join(run.full_sandbox_path, 'output')
        upload_path = os.path.join(run.full_sandbox_path, 'upload')
        os.mkdir(upload_path)
        for argument in run.app.arguments.filter(type=ContainerArgument.OUTPUT):
            argument_path = os.path.join(output_path, argument.name)
            dataset_name = self.build_dataset_name(run, argument.name)
            new_argument_path = os.path.join(upload_path, dataset_name)
            try:
                os.rename(argument_path, new_argument_path)
                dataset = Dataset.create_dataset(new_argument_path,
                                                 name=dataset_name,
                                                 user=run.user)
                dataset.copy_permissions(run)
                run.datasets.create(dataset=dataset,
                                    argument=argument)
            except (OSError, IOError) as ex:
                if ex.errno != errno.ENOENT:
                    raise
        logs_path = os.path.join(run.full_sandbox_path, 'logs')
        for file_name, log_type in (('stdout.txt', ContainerLog.STDOUT),
                                    ('stderr.txt', ContainerLog.STDERR)):
            run.load_log(os.path.join(logs_path, file_name), log_type)

        run.set_md5()
        run.state = (ContainerRun.COMPLETE
                     if run.return_code == 0
                     else ContainerRun.FAILED)
        run.end_time = timezone.now()

示例#2

0

显示文件

文件： runcontainer.py 项目： cfe-lab/Kive

 def _save_output_directory_argument(cls, run: ContainerRun,
                                     argument: ContainerArgument,
                                     output_path: str,
                                     upload_path: str) -> None:
     output_path = pathlib.Path(output_path).absolute()
     dirarg_path = output_path / argument.name
     for dirpath, _, filenames in os.walk(dirarg_path):
         dirpath = pathlib.Path(dirpath)
         for filename in filenames:
             datafile_path: pathlib.Path = (dirpath / filename).absolute()
             dataset_filename = cls._build_directory_file_name(
                 run.id, output_path, datafile_path)
             destination_path = os.path.join(upload_path, dataset_filename)
             dataset_name = cls._build_directory_dataset_name(
                 run.id, output_path, datafile_path)
             try:
                 os.rename(datafile_path, destination_path)
                 dataset = Dataset.create_dataset(
                     destination_path,
                     name=dataset_name,
                     user=run.user,
                 )
                 dataset.copy_permissions(run)
                 run.datasets.create(dataset=dataset, argument=argument)
             except (OSError, IOError) as ex:
                 if ex.errno != errno.ENOENT:
                     raise

示例#3

0

显示文件

文件： serializers.py 项目： cfe-lab/Kive

    def create(self, validated_data):
        """
        Create a Dataset object from deserialized and validated data.
        """
        # The default behaviour for keep_file depends on the mode of creation.
        keep_file = True
        file_path = validated_data.get("external_path", "")
        efd = validated_data.get("externalfiledirectory", None)
        # Both or neither are specified (this is enforced in serializer validation).
        if file_path:
            file_path = os.path.join(efd.path, file_path)
            keep_file = False  # don't retain a copy by default

        # Override the default if specified.
        keep_file = validated_data.get("save_in_db", keep_file)

        dataset = Dataset.create_dataset(
            is_uploaded=True,  # Assume serializer is only used for uploads.
            file_path=file_path,
            user=self.context["request"].user,
            users_allowed=validated_data["users_allowed"],
            groups_allowed=validated_data["groups_allowed"],
            keep_file=keep_file,
            name=validated_data["name"],
            description=validated_data.get("description"),
            file_source=None,
            check=True,
            file_handle=validated_data.get(
                "dataset_file",
                None),  # should be freshly opened so cursor is at start
            externalfiledirectory=efd)
        return dataset

示例#4

0

显示文件

文件： runcontainer.py 项目： dmacmillan/Kive

    def save_outputs(self, run):
        output_path = os.path.join(run.full_sandbox_path, 'output')
        upload_path = os.path.join(run.full_sandbox_path, 'upload')
        os.mkdir(upload_path)
        for argument in run.app.arguments.filter(
                type=ContainerArgument.OUTPUT):
            argument_path = os.path.join(output_path, argument.name)
            dataset_name = self.build_dataset_name(run, argument.name)
            new_argument_path = os.path.join(upload_path, dataset_name)
            try:
                os.rename(argument_path, new_argument_path)
                dataset = Dataset.create_dataset(new_argument_path,
                                                 name=dataset_name,
                                                 user=run.user)
                dataset.copy_permissions(run)
                run.datasets.create(dataset=dataset, argument=argument)
            except (OSError, IOError) as ex:
                if ex.errno != errno.ENOENT:
                    raise
        logs_path = os.path.join(run.full_sandbox_path, 'logs')
        for file_name, log_type in (('stdout.txt', ContainerLog.STDOUT),
                                    ('stderr.txt', ContainerLog.STDERR)):
            run.load_log(os.path.join(logs_path, file_name), log_type)

        run.set_md5()
        run.state = (ContainerRun.COMPLETE
                     if run.return_code == 0 else ContainerRun.FAILED)
        run.end_time = timezone.now()

示例#5

0

显示文件

    def create_datasets(self, user):
        """
        Creates the Datasets and the corresponding SymbolicDatasets in same order as cleaned_data["dataset_files"].
        Will still save successful Datasets to database even if some of the Datasets fail to create.

        :return:  None and a list of the created Dataset objects in the same order
            as cleaned_data["dataset_files"].
            If particular Dataset failed to create, then the list element contains a dict that can be
        used to inform the user about the file.
        """
        results = []
        for file_size, uploaded_file in self.cleaned_data['dataset_file']:
            # Note that uploaded_file should be seek'd to the beginning.  It was presumably
            # just opened so that should be OK but if this ever changes we will have to fix this.
            dataset = error_str = auto_name = None
            try:
                # TODO:  use correct unique constraints
                name_prefix = ""
                if self.cleaned_data["name_prefix"]:
                    name_prefix = self.cleaned_data["name_prefix"] + "_"
                auto_name = name_prefix + uploaded_file.name + "_" + datetime.now(
                ).strftime('%Y%m%d%H%M%S%f')

                if self.cleaned_data["description"]:
                    auto_description = self.cleaned_data["description"]
                else:
                    auto_description = "Bulk Uploaded File " + uploaded_file.name

                dataset = Dataset.create_dataset(is_uploaded=True,
                                                 file_path=None,
                                                 user=user,
                                                 keep_file=True,
                                                 name=auto_name,
                                                 description=auto_description,
                                                 file_source=None,
                                                 check=True,
                                                 file_handle=uploaded_file)
                dataset.grant_from_json(self.cleaned_data["permissions"])

            except Exception as e:
                error_str = str(e)
                LOGGER.exception(
                    "Error while creating Dataset for file with original file name="
                    + str(uploaded_file.name) +
                    " and autogenerated Dataset name = " + str(auto_name))

            if dataset and error_str is None:
                results.append(dataset)
            elif error_str and dataset is None:
                results.append({
                    "name": uploaded_file.name,
                    "errstr": error_str,
                    "size": file_size
                })
            else:
                raise ValueError(
                    "Invalid situation.  Must either have a dataset or error.  Can not have both or none."
                )

        return None, results

示例#6

0

显示文件

文件： tests_rm.py 项目： cfe-lab/Kive

    def setUp(self):
        super(RawTests, self).setUp()

        self.addTypeEqualityFunc(str, self.assertMultiLineEqual)
        self.pipeline_raw = tools.make_first_pipeline(
            "raw noop", "a pipeline to do nothing to raw data",
            self.user_bob)
        tools.create_linear_pipeline(self.pipeline_raw, [self.method_noop_raw], "raw_in", "raw_out")
        self.pipeline_raw.create_outputs()

        self.dataset_raw = Dataset.create_dataset(
            "/usr/share/dict/words",
            user=self.user_bob,
            cdt=None,
            keep_file=True,
            name="raw",
            description="some raw data"
        )

示例#7

0

显示文件

文件： update_test_fixtures.py 项目： cfe-lab/Kive

    def build(self):
        user = User.objects.first()
        assert user is not None
        input_path = os.path.abspath(
            os.path.join(
                __file__,
                '../../../../../samplecode/singularity/host_input/example_names.csv'
            ))
        family = ContainerFamily.objects.create(name='fixture family',
                                                user=user)
        container_path = os.path.abspath(
            os.path.join(
                __file__,
                '../../../../../samplecode/singularity/python2-alpine-trimmed.simg'
            ))
        with open(container_path, "rb") as f:
            container_md5 = compute_md5(f)
        container = family.containers.create(
            tag='vFixture',
            user=user,
            file='Containers/kive-default.simg',
            md5=container_md5)
        app = container.apps.create()
        arg1 = app.arguments.create(type=ContainerArgument.INPUT,
                                    name='names_csv',
                                    position=1)
        app.arguments.create(type=ContainerArgument.OUTPUT,
                             name='greetings_csv',
                             position=2)
        dataset = Dataset.create_dataset(input_path,
                                         name='names.csv',
                                         user=user)
        run = app.runs.create(name='fixture run', user=user)
        run.sandbox_path = ""  # blank this out as it won't be accessible in testing anyway
        run.slurm_job_id = None  # this also would cause tests to fail on a fresh system
        run.save(schedule=False)  # scheduling would overwrite sandbox_path
        run.datasets.create(argument=arg1, dataset=dataset)

        upload_path = os.path.join(settings.MEDIA_ROOT, Container.UPLOAD_DIR)
        readme_path = os.path.join(upload_path, 'README.md')
        os.makedirs(upload_path)
        with open(readme_path, 'w') as f:
            f.write('Just a placeholder to create the folder for containers.')

示例#8

0

显示文件

文件： runcontainer.py 项目： cfe-lab/Kive

 def _save_output_argument(
     self,
     run: ContainerRun,
     argument: ContainerArgument,
     output_path: str,
     upload_path: str,
 ):
     argument_path = os.path.join(output_path, argument.name)
     dataset_name = self.build_dataset_name(run, argument.name)
     new_argument_path = os.path.join(upload_path, dataset_name)
     try:
         os.rename(argument_path, new_argument_path)
         dataset = Dataset.create_dataset(new_argument_path,
                                          name=dataset_name,
                                          user=run.user)
         dataset.copy_permissions(run)
         run.datasets.create(dataset=dataset, argument=argument)
     except (OSError, IOError) as ex:
         if ex.errno != errno.ENOENT:
             raise

示例#9

0

显示文件

文件： serializers.py 项目： cfe-lab/Kive

    def create(self, validated_data):
        """
        Create a Dataset object from deserialized and validated data.
        """
        cdt = None
        if "structure" in validated_data:
            cdt = validated_data["structure"].get("compounddatatype", None)

        # The default behaviour for keep_file depends on the mode of creation.
        keep_file = True
        file_path = validated_data.get("external_path", "")
        efd = validated_data.get("externalfiledirectory", None)
        # Both or neither are specified (this is enforced in serializer validation).
        if file_path:
            file_path = os.path.join(efd.path, file_path)
            keep_file = False  # don't retain a copy by default

        # Override the default if specified.
        keep_file = validated_data.get("save_in_db", keep_file)

        dataset = Dataset.create_dataset(
            is_uploaded=True,  # Assume serializer is only used for uploads.
            file_path=file_path,
            user=self.context["request"].user,
            users_allowed=validated_data["users_allowed"],
            groups_allowed=validated_data["groups_allowed"],
            cdt=cdt,
            keep_file=keep_file,
            name=validated_data["name"],
            description=validated_data.get("description"),
            file_source=None,
            check=True,
            file_handle=validated_data.get("dataset_file", None),  # should be freshly opened so cursor is at start
            externalfiledirectory=efd
        )
        return dataset

示例#10

0

显示文件

文件： forms.py 项目： cfe-lab/Kive

    def create_datasets(self, user):
        """
        Creates the Datasets and the corresponding SymbolicDatasets in same order as cleaned_data["dataset_files"].
        Will still save successful Datasets to database even if some of the Datasets fail to create.

        :return:  CDT object and a list of the created Dataset objects in the same order
            as cleaned_data["dataset_files"].
            If particular Dataset failed to create, then the list element contains a dict that can be
        used to inform the user about the file.
        """
        compound_datatype_obj = None
        if self.cleaned_data['compound_datatype'] != CompoundDatatype.RAW_ID:
            compound_datatype_obj = CompoundDatatype.objects.get(pk=self.cleaned_data['compound_datatype'])

        results = []
        for file_size, uploaded_file in self.cleaned_data['dataset_file']:
            # Note that uploaded_file should be seek'd to the beginning.  It was presumably
            # just opened so that should be OK but if this ever changes we will have to fix this.
            dataset = error_str = auto_name = None
            try:
                # TODO:  use correct unique constraints
                name_prefix = ""
                if self.cleaned_data["name_prefix"]:
                    name_prefix = self.cleaned_data["name_prefix"] + "_"
                auto_name = name_prefix + uploaded_file.name + "_" + datetime.now().strftime('%Y%m%d%H%M%S%f')

                if self.cleaned_data["description"]:
                    auto_description = self.cleaned_data["description"]
                else:
                    auto_description = "Bulk Uploaded File " + uploaded_file.name

                dataset = Dataset.create_dataset(
                    is_uploaded=True,
                    file_path=None,
                    user=user,
                    cdt=compound_datatype_obj,
                    keep_file=True,
                    name=auto_name,
                    description=auto_description,
                    file_source=None,
                    check=True,
                    file_handle=uploaded_file
                )
                dataset.grant_from_json(self.cleaned_data["permissions"])

            except Exception as e:
                error_str = str(e)
                LOGGER.exception("Error while creating Dataset for file with original file name=" +
                                 str(uploaded_file.name) +
                                 " and autogenerated Dataset name = " +
                                 str(auto_name))

            if dataset and error_str is None:
                results.append(dataset)
            elif error_str and dataset is None:
                results.append({"name": uploaded_file.name,
                                "errstr": error_str,
                                "size": file_size})
            else:
                raise ValueError("Invalid situation.  Must either have a dataset or error.  Can not have both or none.")

        return compound_datatype_obj, results

示例#11

0

显示文件

文件： test_migration_prototype.py 项目： cfe-lab/Kive

# A dummy Datatype with a prototype.
with tempfile.TemporaryFile() as f:
    f.write("""example,valid
True,True
true,False
y,False
n,False
False,False
false,false"""
    )
    f.seek(0)
    proto_SD = Dataset.create_dataset(
        file_path=None,
        user=kive_user(),
        cdt=CompoundDatatype.objects.get(pk=CDTs.PROTOTYPE_PK),
        name="AlwaysTruePrototype",
        description="Prototype for dummy Datatype",
        file_handle=f
    )

always_true = Datatype(
    user=kive_user(),
    name="Python True",
    description="True in python",
    proto_SD=proto_SD
)
always_true.save()
always_true.restricts.add(Datatype.objects.get(pk=datatypes.BOOL_PK))

always_true.basic_constraints.create(
    ruletype=BasicConstraint.REGEXP,

示例#12

0

显示文件

文件： test_migration_prototype.py 项目： dmacmillan/Kive

prototype_CDT = CompoundDatatype.objects.get(pk=CDTs.PROTOTYPE_PK)

# A dummy Datatype with a prototype.
with tempfile.TemporaryFile() as f:
    f.write("""example,valid
True,True
true,False
y,False
n,False
False,False
false,false""")
    f.seek(0)
    proto_SD = Dataset.create_dataset(
        file_path=None,
        user=kive_user(),
        cdt=CompoundDatatype.objects.get(pk=CDTs.PROTOTYPE_PK),
        name="AlwaysTruePrototype",
        description="Prototype for dummy Datatype",
        file_handle=f)

always_true = Datatype(user=kive_user(),
                       name="Python True",
                       description="True in python",
                       proto_SD=proto_SD)
always_true.save()
always_true.restricts.add(Datatype.objects.get(pk=datatypes.BOOL_PK))

always_true.basic_constraints.create(ruletype=BasicConstraint.REGEXP,
                                     rule="True")

示例#13

0

显示文件

文件： prepare_initial_pipeline.py 项目： cfe-lab/Kive

from django.core.files import File
from django.contrib.auth.models import User

import metadata.models
from librarian.models import Dataset
import method.models
import kive.testing_utils as tools

# This comes from the initial_user fixture.
kive_user = User.objects.get(pk=1)

test_fasta = Dataset.create_dataset(
    file_path="../samplecode/step_0_raw.fasta",
    user=kive_user,
    cdt=None,
    keep_file=True,
    name="TestFASTA",
    description="Toy FASTA file for testing pipelines"
)

# Set up a test Pipeline.
resource = method.models.CodeResource(name="Fasta2CSV", description="FASTA converter script", filename="Fasta2CSV.py")
resource.clean()
resource.save()
with open("../samplecode/fasta2csv.py", "rb") as f:
    revision = method.models.CodeResourceRevision(
        coderesource=resource,
        revision_name="v1",
        revision_desc="First version",
        content_file=File(f))
    revision.clean()

示例#14

0

显示文件

from django.core.files import File
from django.contrib.auth.models import User

import metadata.models
from librarian.models import Dataset
import method.models
import kive.testing_utils as tools

# This comes from the initial_user fixture.
kive_user = User.objects.get(pk=1)

test_fasta = Dataset.create_dataset(
    file_path="../samplecode/step_0_raw.fasta",
    user=kive_user,
    cdt=None,
    keep_file=True,
    name="TestFASTA",
    description="Toy FASTA file for testing pipelines")

# Set up a test Pipeline.
resource = method.models.CodeResource(name="Fasta2CSV",
                                      description="FASTA converter script",
                                      filename="Fasta2CSV.py")
resource.clean()
resource.save()
with open("../samplecode/fasta2csv.py", "rb") as f:
    revision = method.models.CodeResourceRevision(
        coderesource=resource,
        revision_name="v1",
        revision_desc="First version",