Пример #1
0
    def test_get_dataset_metadata(self):
        import pbtestdata
        md = get_dataset_metadata(pbtestdata.get_file("subreads-xml"))
        assert md.metatype == "PacBio.DataSet.SubreadSet"

        from pbcore.io import SubreadSet
        ds = SubreadSet(pbtestdata.get_file("subreads-xml"))
        assert md.uuid == ds.uuid

        with pytest.raises(Exception) as e:
            get_dataset_metadata(None)
Пример #2
0
def dataset_to_datastore(dataset_file,
                         datastore_file,
                         source_id="dataset_to_datastore"):
    """Copied from pbcoretools.tasks.barcoding"""
    # FIXME: replace barcoding
    dsmd = get_dataset_metadata(dataset_file)
    ds_file = DataStoreFile(dsmd.uuid, source_id, dsmd.metatype, dataset_file)
    ds_out = DataStore([ds_file])
    ds_out.write_json(datastore_file)
    return 0
Пример #3
0
def gather_report(json_files, output_file, dataset_xml=None):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [load_report_from_json(fn) for fn in json_files]
    merged = Report.merge(reports)
    if dataset_xml is not None:
        ds_md = get_dataset_metadata(dataset_xml)
        merged._dataset_uuids = [ds_md.uuid]
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
Пример #4
0
 def test_get_dataset_metadata(self):
     try:
         import pbcore.io
         import pbcore.data
     except ImportError:
         raise unittest.SkipTest("pbcore not available, skipping")
     else:
         ds = pbcore.io.SubreadSet(pbcore.data.getUnalignedBam())
         ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
         ds.write(ds_file)
         md = get_dataset_metadata(ds_file)
         self.assertEqual(md.metatype, "PacBio.DataSet.SubreadSet")
         self.assertEqual(md.uuid, ds.uuid)
Пример #5
0
 def test_get_dataset_metadata(self):
     try:
         import pbcore.io
         import pbcore.data
     except ImportError:
         raise unittest.SkipTest("pbcore not available, skipping")
     else:
         ds = pbcore.io.SubreadSet(pbcore.data.getUnalignedBam())
         ds_file = tempfile.NamedTemporaryFile(
             suffix=".subreadset.xml").name
         ds.write(ds_file)
         md = get_dataset_metadata(ds_file)
         self.assertEqual(md.metatype, "PacBio.DataSet.SubreadSet")
         self.assertEqual(md.uuid, ds.uuid)
Пример #6
0
 def test_get_dataset_metadata(self):
     try:
         import pbtestdata
     except ImportError:
         raise unittest.SkipTest("pbtestdata not available, skipping")
     else:
         md = get_dataset_metadata(pbtestdata.get_file("subreads-xml"))
         self.assertEqual(md.metatype, "PacBio.DataSet.SubreadSet")
         try:
             from pbcore.io import SubreadSet
         except ImportError:
             raise unittest.SkipTest("pbcore not available, skipping")
         else:
             ds = SubreadSet(pbtestdata.get_file("subreads-xml"))
             self.assertEqual(md.uuid, ds.uuid)
Пример #7
0
    def run_import_local_dataset(self, path):
        """Import a file from FS that is local to where the services are running

        Returns a JobResult instance

        :rtype: JobResult
        """
        dataset_meta_type = get_dataset_metadata(path)
        result = self.get_dataset_by_uuid(dataset_meta_type.uuid)
        if result is None:
            log.info("Importing dataset {p}".format(p=path))
            return self.run_import_dataset_by_type(dataset_meta_type.metatype, path)
        else:
            log.debug("{f} already imported. Skipping importing. {r}".format(r=result, f=dataset_meta_type.metatype))
            # need to clean this up
            return JobResult(self.get_job_by_id(result['jobId']), 0, "")
Пример #8
0
    def run_import_local_dataset(self, path):
        """Import a file from FS that is local to where the services are running

        Returns a JobResult instance

        :rtype: JobResult
        """
        dataset_meta_type = get_dataset_metadata(path)

        def _verify_dataset_in_list():
            file_type = FileTypes.ALL()[dataset_meta_type.metatype]
            ds_endpoint = _get_endpoint_or_raise(file_type)

            # all datasets for a specific type
            datasets = self._get_datasets_by_type(ds_endpoint)

            uuids = {ds['uuid'] for ds in datasets}
            if dataset_meta_type.uuid not in uuids:
                raise JobExeError(("Dataset {u} was imported but does not " +
                                   "appear in the dataset list; this may " +
                                   "indicate XML schema errors.").format(
                                       u=dataset_meta_type.uuid))

        result = self.get_dataset_by_uuid(dataset_meta_type.uuid,
                                          ignore_errors=True)
        if result is None:
            log.info("Importing dataset {p}".format(p=path))
            job_result = self.run_import_dataset_by_type(
                dataset_meta_type.metatype, path)
            log.info("Confirming database update")
            # validation 1: attempt to retrieve dataset info
            result_new = self.get_dataset_by_uuid(dataset_meta_type.uuid)
            if result_new is None:
                raise JobExeError(
                    ("Dataset {u} was imported but could " +
                     "not be retrieved; this may indicate " +
                     "XML schema errors.").format(u=dataset_meta_type.uuid))
            # validation 2: make sure it shows up in the listing
            _verify_dataset_in_list()
            return job_result
        else:
            log.info("{f} already imported. Skipping importing. {r}".format(
                r=result, f=dataset_meta_type.metatype))
            _verify_dataset_in_list()
            # need to clean this up
            return JobResult(self.get_job_by_id(result['jobId']), 0, "")
Пример #9
0
    def run_import_local_dataset(self, path):
        """Import a file from FS that is local to where the services are running

        Returns a JobResult instance

        :rtype: JobResult
        """
        dataset_meta_type = get_dataset_metadata(path)

        def _verify_dataset_in_list():
            file_type = FileTypes.ALL()[dataset_meta_type.metatype]
            ds_endpoint = _get_endpoint_or_raise(file_type)

            # all datasets for a specific type
            datasets = self._get_datasets_by_type(ds_endpoint)

            uuids = {ds['uuid'] for ds in datasets}
            if dataset_meta_type.uuid not in uuids:
                raise JobExeError(("Dataset {u} was imported but does not "+
                                    "appear in the dataset list; this may "+
                                    "indicate XML schema errors.").format(
                                    u=dataset_meta_type.uuid))

        result = self.get_dataset_by_uuid(dataset_meta_type.uuid,
                                          ignore_errors=True)
        if result is None:
            log.info("Importing dataset {p}".format(p=path))
            job_result = self.run_import_dataset_by_type(dataset_meta_type.metatype, path)
            log.info("Confirming database update")
            # validation 1: attempt to retrieve dataset info
            result_new = self.get_dataset_by_uuid(dataset_meta_type.uuid)
            if result_new is None:
                raise JobExeError(("Dataset {u} was imported but could "+
                                   "not be retrieved; this may indicate "+
                                   "XML schema errors.").format(
                                   u=dataset_meta_type.uuid))
            # validation 2: make sure it shows up in the listing
            _verify_dataset_in_list()
            return job_result
        else:
            log.info("{f} already imported. Skipping importing. {r}".format(r=result, f=dataset_meta_type.metatype))
            _verify_dataset_in_list()
            # need to clean this up
            return JobResult(self.get_job_by_id(result['jobId']), 0, "")