Пример #1
0
    def test_001_create_df_status(self, mock_stat):
        """When a new datafile record is verified, metadata for it's
        online/offline status should be created and populated with the
        current online status"""
        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=100,
                                       st_mtime=datetime.now())

        temp = tempfile.NamedTemporaryFile(dir=tempfile.gettempdir())
        temp_name = os.path.basename(temp.name)
        df2 = DataFile(dataset=self.dataset, filename=temp_name)
        df2.save()
        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri=temp_name)
        dfo2.save()
        df2.verify()

        param_name = ParameterName.objects.get(
            schema__namespace=HSM_DATAFILE_NAMESPACE, name="online")

        paramset = DatafileParameterSet.objects.get(
            schema__namespace=HSM_DATAFILE_NAMESPACE, datafile=df2)

        param = DatafileParameter.objects.get(parameterset=paramset,
                                              name=param_name)

        self.assertEquals(param.string_value, "True")
        temp.close()
    def test_000_update_df_status_offline(self, mock_stat):
        """update_df_status should check the online status of
        preferred DFOs for all previously online datafiles and
        update online Parameter to 'False' for any offline files."""
        df1 = DataFile(dataset=self.dataset,
                       filename="test_df.jpg")
        df1.save()
        dfo1 = DataFileObject(datafile=df1,
                              storage_box=self.sbox1,
                              uri="stream/test.jpg",
                              verified=True)
        dfo1.save()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps = DatafileParameterSet(schema=schema, datafile=df1)
        ps.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param = DatafileParameter(parameterset=ps, name=param_name)
        param.string_value = True
        param.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=0,
                                       st_mtime=datetime.now())
        update_df_status()

        params = DatafileParameter.objects.filter(
            parameterset__schema=schema,
            parameterset__datafile=df1)

        self.assertEquals(params.count(), 1)
        self.assertEquals(params[0].string_value, "False")
Пример #3
0
def register_squashfile(exp_id, epn, sq_dir, sq_filename, namespace):
    '''
    example:
    register_squashfile(456, '1234A', '/srv/squashstore', '1234A.squashfs',
        'http://synchrotron.org.au/mx/squashfsarchive/1')
    '''
    dfs = DataFile.objects.filter(filename=sq_filename,
                                  dataset__experiments__id=exp_id)
    if len(dfs) == 1:
        return dfs[0]
    e = Experiment.objects.get(id=exp_id)
    ds = Dataset(description="01 SquashFS Archive")
    ds.save()
    ds.experiments.add(e)
    filepath = os.path.join(sq_dir, sq_filename)
    try:
        md5sum = open(filepath + '.md5sum', 'r').read().strip()[:32]
    except:
        print 'no md5sum file found'
        return None
    size = os.path.getsize(filepath)
    df = DataFile(md5sum=md5sum,
                  filename=sq_filename,
                  size=str(size),
                  dataset=ds)
    df.save()
    schema = Schema.objects.filter(namespace=namespace)[0]
    ps = DatafileParameterSet(schema=schema, datafile=df)
    ps.save()
    ps.set_param('EPN', epn)
    sbox = StorageBox.objects.get(name='squashstore')
    dfo = DataFileObject(storage_box=sbox, datafile=df, uri=sq_filename)
    dfo.save()
    return df
    def test_003_update_df_status_skip_offline(self, mock_stat, mock_df_online):
        """update_df_status should skip any files that have previously
        marked as offline."""
        df2 = DataFile(dataset=self.dataset,
                       filename="test_df2.jpg")
        df2.save()
        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri="stream/test_df2.jpg",
                              verified=True)
        dfo2.save()
        # df2.verify()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps2 = DatafileParameterSet(schema=schema, datafile=df2)
        ps2.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param2 = DatafileParameter(parameterset=ps2, name=param_name)
        param2.string_value = False
        param2.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=100,
                                       st_mtime=datetime.now())
        update_df_status()

        # assert that the df_online method wasn't called
        self.assertEquals(mock_df_online.call_count, 0)
Пример #5
0
 def create_dfo(self, top, filename, dataset=None):
     '''
     create dfo and datafile if necessary
     '''
     df, df_data = self.find_datafile(top, filename)
     if df is None and df_data is None:
         return True  # is a link
     if df:
         if dataset is not None and df.dataset.id != dataset.id:
             # olddataset_id = df.dataset.id
             df.dataset = dataset
             df.save()
             # oldds = Dataset.objects.get(id=olddataset_id)
             # if oldds.datafile_set.count() == 0:
             #     oldds.delete()
         elif dataset is None and top.startswith('frames'):
             prefix = 'Raw data for'
             prefix_dataset(df.dataset, prefix)
         self.update_dataset(df.dataset, top)
     else:
         if dataset is None:
             dataset = self.get_or_create_dataset('lost and found')
         df = DataFile(dataset=dataset,
                       filename=filename,
                       directory=top,
                       **df_data)
         df.save()
     dfo = DataFileObject(datafile=df,
                          storage_box=self.s_box,
                          uri=os.path.join(top, filename))
     dfo.save()
     return True
Пример #6
0
    def test_002_no_duplicate_params(self, mock_stat):
        """Datafile should only ever have one online param"""
        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=100,
                                       st_mtime=datetime.now())

        df1 = DataFile(dataset=self.dataset, filename="test_df.jpg")
        df1.save()
        dfo1 = DataFileObject(datafile=df1,
                              storage_box=self.sbox1,
                              uri="stream/test.jpg",
                              verified=True)
        dfo1.save()
        df1.verify()

        param_name = ParameterName.objects.get(
            schema__namespace=HSM_DATAFILE_NAMESPACE, name="online")

        paramset = DatafileParameterSet.objects.get(
            schema__namespace=HSM_DATAFILE_NAMESPACE, datafile=df1)

        params = DatafileParameter.objects.filter(parameterset=paramset,
                                                  name=param_name)

        self.assertEquals(params.count(), 1)

        self.assertRaises(OnlineParamExistsError,
                          create_df_status(df1, HSM_DATAFILE_NAMESPACE, 500))

        params = DatafileParameter.objects.filter(parameterset=paramset,
                                                  name=param_name)

        self.assertEquals(params.count(), 1)
Пример #7
0
    def setUp(self):
        """Setup test fixtures if needed."""
        self.user = User.objects.create_user("doctor", '', "pwd")

        self.exp = Experiment(title="Wonderful",
                              institution_name="Monash University",
                              created_by=self.user)
        self.exp.save()

        group = Group(name="Group1")
        group.save()

        facility = Facility(name="Test Facility", manager_group=group)
        facility.save()

        self.inst = Instrument(name="Test Instrument1", facility=facility)
        self.inst.save()

        self.dataset = Dataset(description="Dataset1", instrument=self.inst)
        self.dataset.save()

        storage_classes = getattr(settings, "HSM_STORAGE_CLASSES",
                                  DEFAULT_HSM_CLASSES)
        self.sbox1 = StorageBox(name="SBOX1",
                                django_storage_class=storage_classes[0],
                                status='online',
                                max_size=256)
        self.sbox1.save()
        sbox1_attr = StorageBoxAttribute(storage_box=self.sbox1,
                                         key='type',
                                         value=StorageBox.DISK)
        sbox1_attr.save()
        sbox1_loc_opt = StorageBoxOption(storage_box=self.sbox1,
                                         key="location",
                                         value="/dummy/path")
        sbox1_loc_opt.save()

        self.sbox2 = StorageBox(
            name="SBOX2",
            django_storage_class="any.non.disk.StorageSystem",
            status='offline',
            max_size=256)
        self.sbox2.save()
        sbox2_attr = StorageBoxAttribute(storage_box=self.sbox2,
                                         key='type',
                                         value=StorageBox.TAPE)
        sbox2_attr.save()

        self.df1 = DataFile(dataset=self.dataset, filename="test_df.jpg")
        self.df1.save()
        self.dfo1 = DataFileObject(datafile=self.df1,
                                   storage_box=self.sbox1,
                                   uri="stream/test.jpg",
                                   verified=True)
        self.dfo1.save()
        self.df1.verify()
Пример #8
0
    def setUp(self):
        raise SkipTest  # temporarily disabling this feature, needs coding
        from tempfile import mkdtemp, mktemp
        from django.conf import settings
        import os

        # Disconnect post_save signal
        from django.db.models.signals import post_save
        from tardis.tardis_portal.models import Experiment, \
            staging_hook, Dataset, DataFile, DataFileObject, StorageBox
        post_save.disconnect(staging_hook, sender=DataFileObject)

        from django.contrib.auth.models import User
        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        try:
            os.makedirs(settings.GET_FULL_STAGING_PATH_TEST)
        except OSError:
            pass
        self.temp = mkdtemp(dir=settings.GET_FULL_STAGING_PATH_TEST)

        self.filepath = mktemp(dir=self.temp)
        content = 'test file'
        with open(self.filepath, "w+b") as f:
            f.write(content)

        # make datafile
        exp = Experiment(title='test exp1',
                         institution_name='monash',
                         created_by=self.user)
        exp.save()

        # make dataset
        dataset = Dataset(description="dataset description...")
        dataset.save()
        dataset.experiments.add(exp)
        dataset.save()

        # create datafile
        df = DataFile(dataset=dataset, size=len(content),
                      filename=path.basename(self.file),
                      md5sum='f20d9f2072bbeb6691c0f9c5099b01f3')
        df.save()

        # create replica
        base_url = settings.GET_FULL_STAGING_PATH_TEST
        s_box = StorageBox.get_default_storage(location=base_url)
        dfo = DataFileObject(datafile=df,
                             uri=self.filepath,
                             storage_box=s_box)
        dfo.save()
        self.dfo = dfo
Пример #9
0
    def test_005_dfo_non_disk(self):
        """Files in StorageBoxes with a django_storage_class other than
        those specified in settings should not be processed"""
        dfo2 = DataFileObject(datafile=self.df1,
                              storage_box=self.sbox2,
                              uri="stream/test.jpg",
                              verified=True)
        self.assertRaises(StorageClassNotSupportedError, dfo_online, dfo2)

        with self.settings(HSM_STORAGE_CLASSES=["random.storage.CLASS"]):
            self.assertRaises(StorageClassNotSupportedError, dfo_online,
                              self.dfo1)
Пример #10
0
 def _build(dataset, filename, url):
     datafile_content = b"\n".join([b'some data %d' % i for i in range(1000)])
     filesize = len(datafile_content)
     datafile = DataFile(
         dataset=dataset, filename=filename, size=filesize)
     datafile.save()
     dfo = DataFileObject(
         datafile=datafile,
         storage_box=datafile.get_default_storage_box(),
         uri=url)
     dfo.file_object = BytesIO(datafile_content)
     dfo.save()
     return datafile
Пример #11
0
    def test_007_dfo_unverified(self):
        """df_online and dfo_online should raise Exception for an unverfied DataFile or
        DataFileObject, respectively"""
        df2 = DataFile(dataset=self.dataset, filename="test_df.jpg")
        df2.save()
        self.assertRaises(DataFileNotVerified, df_online, df2)

        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri="stream/test.jpg",
                              verified=False)
        dfo2.save()

        self.assertRaises(DataFileObjectNotVerified, dfo_online, dfo2)
Пример #12
0
 def _build(dataset, filename, url=None):
     datafile = DataFile(dataset=dataset, filename=filename)
     datafile.save()
     if url is None:
         datafile.file_object = StringIO('bla')
         return datafile
     from tardis.tardis_portal.models import \
         DataFileObject
     dfo = DataFileObject(
         datafile=datafile,
         storage_box=datafile.get_default_storage_box(),
         uri=url)
     dfo.save()
     return datafile
Пример #13
0
    def test_003_offline_dataset(self, mock_stat):
        """A dataset should be offline if any datafiles are offline"""
        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=0,
                                       st_mtime=datetime.now())
        ds = Dataset(description="Dataset2", instrument=self.inst)
        ds.save()

        df2 = DataFile(dataset=ds, filename="test_file.jpg")
        df2.save()
        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri=df2.filename)
        dfo2.save()
        df2.verify()

        self.assertFalse(dataset_online(ds))
Пример #14
0
 def _build(dataset, filename, url=None):
     datafile = DataFile(dataset=dataset, filename=filename)
     datafile.save()
     if url is None:
         datafile.file_object = StringIO(u'bla')
         return datafile
     dfo = DataFileObject(
         datafile=datafile,
         storage_box=datafile.get_default_storage_box(),
         uri=url)
     dfo.save()
     # Tests are run with CELERY_ALWAYS_EAGER = True,
     # so saving a DFO will trigger an immediate attempt
     # to verify the DFO which will trigger an attempt
     # to apply filters because we are overriding the
     # USE_FILTERS setting to True in this test:
     self.assertNotEqual(mock_send_task.call_count, 0)
     return datafile
Пример #15
0
 def _build_datafile(self,
                     testfile,
                     filename,
                     dataset,
                     checksum=None,
                     size=None,
                     mimetype=''):
     filesize, sha512sum = get_size_and_sha512sum(testfile)
     datafile = DataFile(dataset=dataset,
                         filename=filename,
                         mimetype=mimetype,
                         size=size if size is not None else filesize,
                         sha512sum=(checksum if checksum else sha512sum))
     datafile.save()
     dfo = DataFileObject(datafile=datafile,
                          storage_box=datafile.get_default_storage_box())
     dfo.save()
     with open(testfile, 'r') as sourcefile:
         dfo.file_object = sourcefile
     return DataFile.objects.get(pk=datafile.pk)
Пример #16
0
 def test_deleting_dfo_without_uri(self):
     dataset = Dataset(description="dataset description")
     dataset.save()
     save1 = settings.REQUIRE_DATAFILE_SIZES
     save2 = settings.REQUIRE_DATAFILE_CHECKSUMS
     try:
         settings.REQUIRE_DATAFILE_SIZES = False
         settings.REQUIRE_DATAFILE_CHECKSUMS = False
         datafile = DataFile(dataset=dataset, filename='test1.txt')
         datafile.save()
     finally:
         settings.REQUIRE_DATAFILE_SIZES = save1
         settings.REQUIRE_DATAFILE_CHECKSUMS = save2
     dfo = DataFileObject(
             datafile=datafile,
             storage_box=datafile.get_default_storage_box(),
             uri=None)
     dfo.save()
     self.assertIsNone(dfo.uri)
     self.assertIsNotNone(dfo.id)
     dfo.delete()
     self.assertIsNone(dfo.id)
    def test_002_update_df_status_skip_unverified(self, mock_stat, df_online):
        """update_df_status should skip files that are unverified"""
        df2 = DataFile(dataset=self.dataset,
                       filename="test_df2.jpg")
        df2.save()
        dfo2 = DataFileObject(datafile=df2,
                              storage_box=self.sbox1,
                              uri="stream/test_df2.jpg")
        dfo2.save()

        schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE)
        ps2 = DatafileParameterSet(schema=schema, datafile=df2)
        ps2.save()

        param_name = ParameterName.objects.get(schema=schema, name="online")
        param2 = DatafileParameter(parameterset=ps2, name=param_name)
        param2.string_value = True
        param2.save()

        mock_stat.return_value = Stats(st_size=10000,
                                       st_blocks=100,
                                       st_mtime=datetime.now())
        update_df_status()
        df_online.assert_not_called()
Пример #18
0
    def setUp(self):
        from django.contrib.auth.models import User
        from tempfile import mkdtemp

        user = '******'
        pwd = 'secret'
        email = ''
        self.user = User.objects.create_user(user, email, pwd)

        self.test_dir = mkdtemp()

        self.exp = Experiment(title='test exp1',
                              institution_name='monash',
                              created_by=self.user)
        self.exp.save()

        self.dataset = Dataset(description="dataset description...")
        self.dataset.save()
        self.dataset.experiments.add(self.exp)
        self.dataset.save()

        self.datafile = DataFile(dataset=self.dataset,
                                 filename="testfile.txt",
                                 size="42",
                                 md5sum='bogus')
        self.datafile.save()

        self.dfo = DataFileObject(
            datafile=self.datafile,
            storage_box=self.datafile.get_default_storage_box(),
            uri="1/testfile.txt")
        self.dfo.save()

        self.schema = Schema(namespace="http://localhost/psmtest/df/",
                             name="Parameter Set Manager",
                             type=3)
        self.schema.save()

        self.parametername1 = ParameterName(schema=self.schema,
                                            name="parameter1",
                                            full_name="Parameter 1")
        self.parametername1.save()

        self.parametername2 = ParameterName(schema=self.schema,
                                            name="parameter2",
                                            full_name="Parameter 2",
                                            data_type=ParameterName.NUMERIC)
        self.parametername2.save()

        self.parametername3 = ParameterName(schema=self.schema,
                                            name="parameter3",
                                            full_name="Parameter 3",
                                            data_type=ParameterName.DATETIME)
        self.parametername3.save()

        self.datafileparameterset = DatafileParameterSet(
            schema=self.schema, datafile=self.datafile)
        self.datafileparameterset.save()

        self.datafileparameter1 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername1,
            string_value="test1")
        self.datafileparameter1.save()

        self.datafileparameter2 = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername2,
            numerical_value=2)
        self.datafileparameter2.save()

        # Create a ParameterName and Parameter of type LINK to an experiment
        self.parametername_exp_link = ParameterName(
            schema=self.schema,
            name="exp_link",
            full_name="This parameter is a experiment LINK",
            data_type=ParameterName.LINK)
        self.parametername_exp_link.save()

        self.exp_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_exp_link)
        exp_url = self.exp.get_absolute_url()  # /experiment/view/1/
        self.exp_link_param.set_value(exp_url)
        self.exp_link_param.save()

        # Create a ParameterName and Parameter of type LINK to a dataset
        self.parametername_dataset_link = ParameterName(
            schema=self.schema,
            name="dataset_link",
            full_name="This parameter is a dataset LINK",
            data_type=ParameterName.LINK)
        self.parametername_dataset_link.save()

        self.dataset_link_param = DatafileParameter(
            parameterset=self.datafileparameterset,
            name=self.parametername_dataset_link)
        dataset_url = self.dataset.get_absolute_url()  # /dataset/1/
        self.dataset_link_param.set_value(dataset_url)
        self.dataset_link_param.save()

        # Create a ParameterName type LINK to an unresolvable (non-URL)
        # free-text value
        self.parametername_unresolvable_link = ParameterName(
            schema=self.schema,
            name="freetext_link",
            full_name="This parameter is a non-URL LINK",
            data_type=ParameterName.LINK)
        self.parametername_unresolvable_link.save()