示例#1
0
    def test_ensure_res_metadata_enhanced_refs(self):
        self.assertFalse(os.path.exists(self.bagdir))
        self.assertIsNone(self.bagr.inpodfile)

        cfg = {
            'enrich_refs': False,
            'doi_resolver': {
                'client_info': self.doiclientinfo
            }
        }
        self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent,
                                              self.revdir, self.upldir, cfg)
        self.bagr.ensure_res_metadata()
        self.assertEqual(len(self.bagr.resmd['references']), 1)
        self.assertIn('doi.org', self.bagr.resmd['references'][0]['location'])
        self.assertNotIn('citation', self.bagr.resmd['references'][0])

        cfg['enrich_refs'] = True
        self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent,
                                              self.revdir, self.upldir, cfg)
        self.bagr.ensure_res_metadata(True)
        self.assertEqual(len(self.bagr.resmd['references']), 1)
        self.assertIn('doi.org', self.bagr.resmd['references'][0]['location'])
        self.assertIn('citation', self.bagr.resmd['references'][0])

        rmd = self.bagr.bagbldr.bag.nerd_metadata_for('', False)
        self.assertEqual(len(rmd['references']), 1)
        self.assertIn('doi.org', rmd['references'][0]['location'])
        self.assertNotIn('citation', rmd['references'][0])

        rmd = self.bagr.bagbldr.bag.annotations_metadata_for('')
        self.assertEqual(len(rmd['references']), 1)
        self.assertIn('doi.org', rmd['references'][0]['location'])
        self.assertIn('citation', rmd['references'][0])
示例#2
0
    def test_fileExaminer_autolaunch(self):
        # show that the async thread does its work with autolaunch
        self.bagr = midas.MIDASMetadataBagger(self.midasid,
                                              self.bagparent,
                                              self.revdir,
                                              asyncexamine=True)
        self.assertIsNotNone(self.bagr.fileExaminer)
        # self.bagr.fileExaminer_autolaunch = True

        metadir = os.path.join(self.bagdir, 'metadata')
        self.assertFalse(os.path.exists(self.bagdir))
        self.assertIsNone(self.bagr.datafiles)

        try:
            self.bagr.prepare()
        except Exception as ex:
            self.bagr.fileExaminer.thread.join()
            raise
        self.assertTrue(os.path.exists(self.bagdir))
        fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial1.json")
        self.assertIn('checksum', fmd)  # because there's a .sha256 file

        #        time.sleep(0.1)
        if self.bagr.fileExaminer.thread.is_alive():
            print("waiting for file examiner thread")
            n = 20
            while n > 0 and self.bagr.fileExaminer.thread.is_alive():
                n -= 1
                time.sleep(0.1)
            if n == 0:
                self.fail("file examiner is taking too long")
        fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial2.json")
        self.assertIn('checksum', fmd)
示例#3
0
    def test_fileExaminer(self):
        # turn on asyncexamine (but turn off autolaunch so that we can test
        # more easily).  Show that the checksum is not calculated for
        # trial2.json.  Start the asynchronous thread; after it is done,
        # show that trial2.json now has a checksum.

        self.bagr = midas.MIDASMetadataBagger(self.midasid,
                                              self.bagparent,
                                              self.revdir,
                                              asyncexamine=True)
        self.assertIsNotNone(self.bagr.fileExaminer)
        self.bagr.fileExaminer_autolaunch = False

        metadir = os.path.join(self.bagdir, 'metadata')
        self.assertFalse(os.path.exists(self.bagdir))
        self.assertIsNone(self.bagr.datafiles)

        self.bagr.prepare()
        self.assertTrue(os.path.exists(self.bagdir))
        fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial1.json")
        self.assertIn('checksum', fmd)  # because there's a .sha256 file
        self.assertIn('_status', fmd)
        fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial2.json")
        self.assertIn('_status', fmd)
        self.assertNotIn('checksum', fmd)

        # self.bagr.fileExaminer.thread.run()
        self.bagr.fileExaminer.launch()
        self.bagr.fileExaminer.thread.join()
        fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial2.json")
        self.assertIn('checksum', fmd)
        self.assertNotIn('_status', fmd)
示例#4
0
 def setUp(self):
     self.tf = Tempfiles()
     self.bagparent = self.tf.mkdir("bagger")
     self.revdir = os.path.join(self.testsip, "review")
     self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent,
                                           self.revdir)
     self.bagdir = os.path.join(self.bagparent, self.midasid)
示例#5
0
    def test_ensure_file_metadata_resmd(self):
        # fix the config and recreate
        self.bagr.cfg.setdefault('bag_builder', {})
        self.bagr.cfg['bag_builder']['distrib_service_baseurl'] = \
                        "https://testdata.nist.gov/od/ds"
        self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent,
                                              self.revdir, self.upldir,
                                              self.bagr.cfg)
        self.assertFalse(os.path.exists(self.bagdir))

        self.bagr.ensure_res_metadata()

        destpath = os.path.join("trial3", "trial3a.json")
        dfile = os.path.join(self.upldir, self.midasid[32:], destpath)
        self.bagr.ensure_file_metadata(dfile, destpath, self.bagr.resmd)

        mdfile = os.path.join(self.bagdir, 'metadata', destpath, "nerdm.json")
        self.assertTrue(os.path.exists(self.bagdir))
        self.assertTrue(os.path.exists(mdfile))

        data = midas.read_nerd(mdfile)
        self.assertEqual(data['size'], 70)
        self.assertTrue(data['checksum']['hash'])
        self.assertEqual(data['checksum']['algorithm'],
                         { "@type": "Thing", "tag": "sha256" })
        self.assertTrue(data['downloadURL'])
        self.assertTrue(data['downloadURL'].startswith('https://testdata.nist.gov/'),
                        "Unexpected downloadURL: "+ data['downloadURL'])

        # trial3a.json has no matching distribution in _pod.json; thus, no desc
        self.assertNotIn('description', data)

        destpath = os.path.join("trial2.json")
        dfile = os.path.join(self.revdir, self.midasid[32:], destpath)
        self.bagr.ensure_file_metadata(dfile, destpath, self.bagr.resmd)

        mdfile = os.path.join(self.bagdir, 'metadata', destpath, "nerdm.json")
        self.assertTrue(os.path.exists(self.bagdir))
        self.assertTrue(os.path.exists(mdfile))

        data = midas.read_nerd(mdfile)
        self.assertEqual(data['size'], 69)
        self.assertTrue(data['checksum']['hash'])
        self.assertEqual(data['checksum']['algorithm'],
                         { "@type": "Thing", "tag": "sha256" })
        self.assertTrue(data['downloadURL'])
        self.assertTrue(data['description'])
示例#6
0
    def test_ark_ediid(self):
        indir = os.path.join(self.bagparent, os.path.basename(self.testsip))
        shutil.copytree(self.testsip, indir)
        self.upldir = os.path.join(indir, "upload")
        self.revdir = os.path.join(indir, "review")

        podf = os.path.join(self.upldir, "1491", "_pod.json")
        with open(podf) as fd:
            pod = json.load(fd)
        pod['identifier'] = self.arkid
        with open(podf, 'w') as fd:
            json.dump(pod, fd, indent=2)
        podf = os.path.join(self.revdir, "1491", "_pod.json")
        with open(podf, 'w') as fd:
            json.dump(pod, fd, indent=2)

        cfg = {'bag_builder': {'validate_id': r'(pdr\d)|(mds[01])'}}

        self.bagr = midas.MIDASMetadataBagger(self.arkid,
                                              self.bagparent,
                                              self.revdir,
                                              self.upldir,
                                              config=cfg)
        self.assertEqual(self.bagr.midasid, self.arkid)
        self.assertEqual(self.bagr.name, self.arkid[11:])
        self.assertEqual(self.bagr._indirs[0],
                         os.path.join(self.revdir, self.arkid[16:]))
        self.assertEqual(self.bagr._indirs[1],
                         os.path.join(self.upldir, self.arkid[16:]))

        self.assertEqual(os.path.basename(self.bagr.bagbldr.bagdir),
                         self.arkid[11:])

        self.bagr.ensure_base_bag()
        self.assertEqual(self.bagr.bagbldr.id, self.arkid)
        self.assertEqual(os.path.basename(self.bagr.bagbldr.bag.dir),
                         self.arkid[11:])

        self.bagr.ensure_res_metadata(True)
        nerdm = self.bagr.bagbldr.bag.nerd_metadata_for('')
        self.assertEqual(nerdm['ediid'], self.arkid)
        self.assertEqual(nerdm['@id'], self.arkid)
示例#7
0
 def setUp(self):
     self.tf = Tempfiles()
     self.bagparent = self.tf.mkdir("bagger")
     self.upldir = os.path.join(self.testsip, "upload")
     self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent,
                                           None, self.upldir)
示例#8
0
 def test_wrong_ediid(self):
     with self.assertRaises(midas.SIPDirectoryNotFound):
         self.bagr = midas.MIDASMetadataBagger(self.wrongid, self.bagparent,
                                               self.revdir, self.upldir)