Пример #1
0
 def test_5_harvest_multi(self):
     '''test harvesting in multiprocessing'''
     infos, _ = harvest.get_ds_infos('.*')
     destdir = '/gridgroup/cms/cbernet/unittests/multi'
     harvest.harvest(infos, destdir, ntgzs=2, nworkers=2, delete='y')
     results = os.listdir(destdir)
     self.assertEqual(len(results), 2)
Пример #2
0
 def test_6_harvest_sequential(self):
     '''test that harvesting can be done in steps'''
     infos, _ = harvest.get_ds_infos('.*')
     destdir = '/gridgroup/cms/cbernet/unittests/multi'
     infos1 = infos[:1]
     infos2 = infos[1:2]
     harvest.harvest(infos1, destdir, ntgzs=2, nworkers=1, delete='y')
     harvest.harvest(infos2, destdir, ntgzs=2, nworkers=1, delete='n')
     results = os.listdir(destdir)
     self.assertEqual(len(results), 2)
Пример #3
0
 def test_1_ds_info(self):
     '''test that dataset info can be readout and filtered'''
     # import pdb; pdb.set_trace()
     infos, done = harvest.get_ds_infos('.*DY1Jets.*')
     self.assertEqual(len(infos), 2)
     self.assertEqual(len(done), 0)
     for info in infos:
         self.assertTrue('path' in info)
         self.assertTrue(len(info['tgzs']['0000']) > 1)
     # test that datasets already harvested are masked
     # create dummy harvesting information:
     harv_info = {
         'time': time.time(),
         'parent': None,
         'dir': 'foo',
         'tgzs': infos[1]['tgzs']
     }
     infos[1]['harvesting'] = harv_info
     harvest.datasetdb.insert('se', infos[1])
     infos, done = harvest.get_ds_infos('.*DY1Jets.*')
     print('selected')
     pprint.pprint(infos)
     print('done')
     pprint.pprint(done)
     self.assertEqual(len(infos), 1)
     self.assertEqual(len(done), 1)
     # add one more tgz to a sample that has already been harvested
     newinfo = copy.copy(done[0])
     newinfo['tgzs']['0000'].append('heppyOutput_666.tgz')
     harvest.datasetdb.insert('se', newinfo)
     infos2, done2 = harvest.get_ds_infos('.*DY1Jets.*')
     self.assertEqual(len(infos2), 2)
     self.assertEqual(len(done2), 0)
     # add one more subdir
     newinfo = copy.copy(done[0])
     newinfo['tgzs']['0001'] = []
     infos2, done2 = harvest.get_ds_infos('.*DY1Jets.*')
     self.assertEqual(len(infos2), 2)
     self.assertEqual(len(done2), 0)
Пример #4
0
 def test_4_harvest_one(self):
     infos, _ = harvest.get_ds_infos('.*DY1Jets.*_ext')
     destdir = '/gridgroup/cms/cbernet/unittests/single'
     info = infos[0]
     start = time.time()
     hinfo = harvest.harvest_one(info, destdir, ntgzs=2)
     harvest.datasetdb.insert('se', hinfo)
     # check that dataset exists on destination:
     # result = subprocess.check_output(
     #     'ssh -p 2222 localhost ls {}'.format(destdir).split()
     #   )
     result = os.listdir(destdir)
     self.assertEqual(len(result), 1)
     self.assertEqual(result[0], info['name'])
     # check harvesting time in db
     hinfo2 = harvest.datasetdb.find('se', {'name': info['name']})
     self.assertEqual(len(hinfo2), 1)
     # pprint.pprint(hinfo2[0])
     self.assertTrue(hinfo2[0]['harvesting']['time'] > start)
Пример #5
0
 def test_2_fetch(self):
     infos, _ = harvest.get_ds_infos('.*DY1Jets.*_ext')
     outdir = tempfile.mkdtemp()
     # print(outdir)
     ntgzs = 2
     harvest.fetch(infos[0], outdir, ntgzs)
     harvest.unpack(infos[0], outdir, ntgzs)
     chunks = os.listdir(outdir)
     # pprint.pprint(chunks)
     self.assertListEqual(chunks, [
         '190503%DY1JetsToLL_M50_LO_ext%tt_DY_nominal_Chunk10',
         '190503%DY1JetsToLL_M50_LO_ext%tt_DY_nominal_Chunk1'
     ])
     harvest.hadd(outdir)
     results = os.listdir(outdir)
     self.assertEqual(len(results), 1)
     self.assertEqual(results[0],
                      '190503%DY1JetsToLL_M50_LO_ext%tt_DY_nominal')
     shutil.rmtree(outdir)