def test_001_create_df_status(self, mock_stat): """When a new datafile record is verified, metadata for it's online/offline status should be created and populated with the current online status""" mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) temp = tempfile.NamedTemporaryFile(dir=tempfile.gettempdir()) temp_name = os.path.basename(temp.name) df2 = DataFile(dataset=self.dataset, filename=temp_name) df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri=temp_name) dfo2.save() df2.verify() param_name = ParameterName.objects.get( schema__namespace=HSM_DATAFILE_NAMESPACE, name="online") paramset = DatafileParameterSet.objects.get( schema__namespace=HSM_DATAFILE_NAMESPACE, datafile=df2) param = DatafileParameter.objects.get(parameterset=paramset, name=param_name) self.assertEquals(param.string_value, "True") temp.close()
def test_000_update_df_status_offline(self, mock_stat): """update_df_status should check the online status of preferred DFOs for all previously online datafiles and update online Parameter to 'False' for any offline files.""" df1 = DataFile(dataset=self.dataset, filename="test_df.jpg") df1.save() dfo1 = DataFileObject(datafile=df1, storage_box=self.sbox1, uri="stream/test.jpg", verified=True) dfo1.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps = DatafileParameterSet(schema=schema, datafile=df1) ps.save() param_name = ParameterName.objects.get(schema=schema, name="online") param = DatafileParameter(parameterset=ps, name=param_name) param.string_value = True param.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=0, st_mtime=datetime.now()) update_df_status() params = DatafileParameter.objects.filter( parameterset__schema=schema, parameterset__datafile=df1) self.assertEquals(params.count(), 1) self.assertEquals(params[0].string_value, "False")
def register_squashfile(exp_id, epn, sq_dir, sq_filename, namespace): ''' example: register_squashfile(456, '1234A', '/srv/squashstore', '1234A.squashfs', 'http://synchrotron.org.au/mx/squashfsarchive/1') ''' dfs = DataFile.objects.filter(filename=sq_filename, dataset__experiments__id=exp_id) if len(dfs) == 1: return dfs[0] e = Experiment.objects.get(id=exp_id) ds = Dataset(description="01 SquashFS Archive") ds.save() ds.experiments.add(e) filepath = os.path.join(sq_dir, sq_filename) try: md5sum = open(filepath + '.md5sum', 'r').read().strip()[:32] except: print 'no md5sum file found' return None size = os.path.getsize(filepath) df = DataFile(md5sum=md5sum, filename=sq_filename, size=str(size), dataset=ds) df.save() schema = Schema.objects.filter(namespace=namespace)[0] ps = DatafileParameterSet(schema=schema, datafile=df) ps.save() ps.set_param('EPN', epn) sbox = StorageBox.objects.get(name='squashstore') dfo = DataFileObject(storage_box=sbox, datafile=df, uri=sq_filename) dfo.save() return df
def test_003_update_df_status_skip_offline(self, mock_stat, mock_df_online): """update_df_status should skip any files that have previously marked as offline.""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg", verified=True) dfo2.save() # df2.verify() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = False param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() # assert that the df_online method wasn't called self.assertEquals(mock_df_online.call_count, 0)
def create_dfo(self, top, filename, dataset=None): ''' create dfo and datafile if necessary ''' df, df_data = self.find_datafile(top, filename) if df is None and df_data is None: return True # is a link if df: if dataset is not None and df.dataset.id != dataset.id: # olddataset_id = df.dataset.id df.dataset = dataset df.save() # oldds = Dataset.objects.get(id=olddataset_id) # if oldds.datafile_set.count() == 0: # oldds.delete() elif dataset is None and top.startswith('frames'): prefix = 'Raw data for' prefix_dataset(df.dataset, prefix) self.update_dataset(df.dataset, top) else: if dataset is None: dataset = self.get_or_create_dataset('lost and found') df = DataFile(dataset=dataset, filename=filename, directory=top, **df_data) df.save() dfo = DataFileObject(datafile=df, storage_box=self.s_box, uri=os.path.join(top, filename)) dfo.save() return True
def test_002_no_duplicate_params(self, mock_stat): """Datafile should only ever have one online param""" mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) df1 = DataFile(dataset=self.dataset, filename="test_df.jpg") df1.save() dfo1 = DataFileObject(datafile=df1, storage_box=self.sbox1, uri="stream/test.jpg", verified=True) dfo1.save() df1.verify() param_name = ParameterName.objects.get( schema__namespace=HSM_DATAFILE_NAMESPACE, name="online") paramset = DatafileParameterSet.objects.get( schema__namespace=HSM_DATAFILE_NAMESPACE, datafile=df1) params = DatafileParameter.objects.filter(parameterset=paramset, name=param_name) self.assertEquals(params.count(), 1) self.assertRaises(OnlineParamExistsError, create_df_status(df1, HSM_DATAFILE_NAMESPACE, 500)) params = DatafileParameter.objects.filter(parameterset=paramset, name=param_name) self.assertEquals(params.count(), 1)
def setUp(self): """Setup test fixtures if needed.""" self.user = User.objects.create_user("doctor", '', "pwd") self.exp = Experiment(title="Wonderful", institution_name="Monash University", created_by=self.user) self.exp.save() group = Group(name="Group1") group.save() facility = Facility(name="Test Facility", manager_group=group) facility.save() self.inst = Instrument(name="Test Instrument1", facility=facility) self.inst.save() self.dataset = Dataset(description="Dataset1", instrument=self.inst) self.dataset.save() storage_classes = getattr(settings, "HSM_STORAGE_CLASSES", DEFAULT_HSM_CLASSES) self.sbox1 = StorageBox(name="SBOX1", django_storage_class=storage_classes[0], status='online', max_size=256) self.sbox1.save() sbox1_attr = StorageBoxAttribute(storage_box=self.sbox1, key='type', value=StorageBox.DISK) sbox1_attr.save() sbox1_loc_opt = StorageBoxOption(storage_box=self.sbox1, key="location", value="/dummy/path") sbox1_loc_opt.save() self.sbox2 = StorageBox( name="SBOX2", django_storage_class="any.non.disk.StorageSystem", status='offline', max_size=256) self.sbox2.save() sbox2_attr = StorageBoxAttribute(storage_box=self.sbox2, key='type', value=StorageBox.TAPE) sbox2_attr.save() self.df1 = DataFile(dataset=self.dataset, filename="test_df.jpg") self.df1.save() self.dfo1 = DataFileObject(datafile=self.df1, storage_box=self.sbox1, uri="stream/test.jpg", verified=True) self.dfo1.save() self.df1.verify()
def setUp(self): raise SkipTest # temporarily disabling this feature, needs coding from tempfile import mkdtemp, mktemp from django.conf import settings import os # Disconnect post_save signal from django.db.models.signals import post_save from tardis.tardis_portal.models import Experiment, \ staging_hook, Dataset, DataFile, DataFileObject, StorageBox post_save.disconnect(staging_hook, sender=DataFileObject) from django.contrib.auth.models import User user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) try: os.makedirs(settings.GET_FULL_STAGING_PATH_TEST) except OSError: pass self.temp = mkdtemp(dir=settings.GET_FULL_STAGING_PATH_TEST) self.filepath = mktemp(dir=self.temp) content = 'test file' with open(self.filepath, "w+b") as f: f.write(content) # make datafile exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) exp.save() # make dataset dataset = Dataset(description="dataset description...") dataset.save() dataset.experiments.add(exp) dataset.save() # create datafile df = DataFile(dataset=dataset, size=len(content), filename=path.basename(self.file), md5sum='f20d9f2072bbeb6691c0f9c5099b01f3') df.save() # create replica base_url = settings.GET_FULL_STAGING_PATH_TEST s_box = StorageBox.get_default_storage(location=base_url) dfo = DataFileObject(datafile=df, uri=self.filepath, storage_box=s_box) dfo.save() self.dfo = dfo
def test_005_dfo_non_disk(self): """Files in StorageBoxes with a django_storage_class other than those specified in settings should not be processed""" dfo2 = DataFileObject(datafile=self.df1, storage_box=self.sbox2, uri="stream/test.jpg", verified=True) self.assertRaises(StorageClassNotSupportedError, dfo_online, dfo2) with self.settings(HSM_STORAGE_CLASSES=["random.storage.CLASS"]): self.assertRaises(StorageClassNotSupportedError, dfo_online, self.dfo1)
def _build(dataset, filename, url): datafile_content = b"\n".join([b'some data %d' % i for i in range(1000)]) filesize = len(datafile_content) datafile = DataFile( dataset=dataset, filename=filename, size=filesize) datafile.save() dfo = DataFileObject( datafile=datafile, storage_box=datafile.get_default_storage_box(), uri=url) dfo.file_object = BytesIO(datafile_content) dfo.save() return datafile
def test_007_dfo_unverified(self): """df_online and dfo_online should raise Exception for an unverfied DataFile or DataFileObject, respectively""" df2 = DataFile(dataset=self.dataset, filename="test_df.jpg") df2.save() self.assertRaises(DataFileNotVerified, df_online, df2) dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test.jpg", verified=False) dfo2.save() self.assertRaises(DataFileObjectNotVerified, dfo_online, dfo2)
def _build(dataset, filename, url=None): datafile = DataFile(dataset=dataset, filename=filename) datafile.save() if url is None: datafile.file_object = StringIO('bla') return datafile from tardis.tardis_portal.models import \ DataFileObject dfo = DataFileObject( datafile=datafile, storage_box=datafile.get_default_storage_box(), uri=url) dfo.save() return datafile
def test_003_offline_dataset(self, mock_stat): """A dataset should be offline if any datafiles are offline""" mock_stat.return_value = Stats(st_size=10000, st_blocks=0, st_mtime=datetime.now()) ds = Dataset(description="Dataset2", instrument=self.inst) ds.save() df2 = DataFile(dataset=ds, filename="test_file.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri=df2.filename) dfo2.save() df2.verify() self.assertFalse(dataset_online(ds))
def _build(dataset, filename, url=None): datafile = DataFile(dataset=dataset, filename=filename) datafile.save() if url is None: datafile.file_object = StringIO(u'bla') return datafile dfo = DataFileObject( datafile=datafile, storage_box=datafile.get_default_storage_box(), uri=url) dfo.save() # Tests are run with CELERY_ALWAYS_EAGER = True, # so saving a DFO will trigger an immediate attempt # to verify the DFO which will trigger an attempt # to apply filters because we are overriding the # USE_FILTERS setting to True in this test: self.assertNotEqual(mock_send_task.call_count, 0) return datafile
def _build_datafile(self, testfile, filename, dataset, checksum=None, size=None, mimetype=''): filesize, sha512sum = get_size_and_sha512sum(testfile) datafile = DataFile(dataset=dataset, filename=filename, mimetype=mimetype, size=size if size is not None else filesize, sha512sum=(checksum if checksum else sha512sum)) datafile.save() dfo = DataFileObject(datafile=datafile, storage_box=datafile.get_default_storage_box()) dfo.save() with open(testfile, 'r') as sourcefile: dfo.file_object = sourcefile return DataFile.objects.get(pk=datafile.pk)
def test_deleting_dfo_without_uri(self): dataset = Dataset(description="dataset description") dataset.save() save1 = settings.REQUIRE_DATAFILE_SIZES save2 = settings.REQUIRE_DATAFILE_CHECKSUMS try: settings.REQUIRE_DATAFILE_SIZES = False settings.REQUIRE_DATAFILE_CHECKSUMS = False datafile = DataFile(dataset=dataset, filename='test1.txt') datafile.save() finally: settings.REQUIRE_DATAFILE_SIZES = save1 settings.REQUIRE_DATAFILE_CHECKSUMS = save2 dfo = DataFileObject( datafile=datafile, storage_box=datafile.get_default_storage_box(), uri=None) dfo.save() self.assertIsNone(dfo.uri) self.assertIsNotNone(dfo.id) dfo.delete() self.assertIsNone(dfo.id)
def test_002_update_df_status_skip_unverified(self, mock_stat, df_online): """update_df_status should skip files that are unverified""" df2 = DataFile(dataset=self.dataset, filename="test_df2.jpg") df2.save() dfo2 = DataFileObject(datafile=df2, storage_box=self.sbox1, uri="stream/test_df2.jpg") dfo2.save() schema = Schema.objects.get(namespace=HSM_DATAFILE_NAMESPACE) ps2 = DatafileParameterSet(schema=schema, datafile=df2) ps2.save() param_name = ParameterName.objects.get(schema=schema, name="online") param2 = DatafileParameter(parameterset=ps2, name=param_name) param2.string_value = True param2.save() mock_stat.return_value = Stats(st_size=10000, st_blocks=100, st_mtime=datetime.now()) update_df_status() df_online.assert_not_called()
def setUp(self): from django.contrib.auth.models import User from tempfile import mkdtemp user = '******' pwd = 'secret' email = '' self.user = User.objects.create_user(user, email, pwd) self.test_dir = mkdtemp() self.exp = Experiment(title='test exp1', institution_name='monash', created_by=self.user) self.exp.save() self.dataset = Dataset(description="dataset description...") self.dataset.save() self.dataset.experiments.add(self.exp) self.dataset.save() self.datafile = DataFile(dataset=self.dataset, filename="testfile.txt", size="42", md5sum='bogus') self.datafile.save() self.dfo = DataFileObject( datafile=self.datafile, storage_box=self.datafile.get_default_storage_box(), uri="1/testfile.txt") self.dfo.save() self.schema = Schema(namespace="http://localhost/psmtest/df/", name="Parameter Set Manager", type=3) self.schema.save() self.parametername1 = ParameterName(schema=self.schema, name="parameter1", full_name="Parameter 1") self.parametername1.save() self.parametername2 = ParameterName(schema=self.schema, name="parameter2", full_name="Parameter 2", data_type=ParameterName.NUMERIC) self.parametername2.save() self.parametername3 = ParameterName(schema=self.schema, name="parameter3", full_name="Parameter 3", data_type=ParameterName.DATETIME) self.parametername3.save() self.datafileparameterset = DatafileParameterSet( schema=self.schema, datafile=self.datafile) self.datafileparameterset.save() self.datafileparameter1 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername1, string_value="test1") self.datafileparameter1.save() self.datafileparameter2 = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername2, numerical_value=2) self.datafileparameter2.save() # Create a ParameterName and Parameter of type LINK to an experiment self.parametername_exp_link = ParameterName( schema=self.schema, name="exp_link", full_name="This parameter is a experiment LINK", data_type=ParameterName.LINK) self.parametername_exp_link.save() self.exp_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_exp_link) exp_url = self.exp.get_absolute_url() # /experiment/view/1/ self.exp_link_param.set_value(exp_url) self.exp_link_param.save() # Create a ParameterName and Parameter of type LINK to a dataset self.parametername_dataset_link = ParameterName( schema=self.schema, name="dataset_link", full_name="This parameter is a dataset LINK", data_type=ParameterName.LINK) self.parametername_dataset_link.save() self.dataset_link_param = DatafileParameter( parameterset=self.datafileparameterset, name=self.parametername_dataset_link) dataset_url = self.dataset.get_absolute_url() # /dataset/1/ self.dataset_link_param.set_value(dataset_url) self.dataset_link_param.save() # Create a ParameterName type LINK to an unresolvable (non-URL) # free-text value self.parametername_unresolvable_link = ParameterName( schema=self.schema, name="freetext_link", full_name="This parameter is a non-URL LINK", data_type=ParameterName.LINK) self.parametername_unresolvable_link.save()