from datasetdb import DatasetDB import pprint from getpass import getpass import os import pprint import copy pwd = getpass() dsdb = DatasetDB('writer', pwd, db='datasets') infos = dsdb.find('se', {}) for info in infos: tier_info = info.get('tiers', None) if not tier_info: continue info['harvesting'] = info['tiers']['harvesting'] info['harvesting']['parent'] = None # del info['_id'] del info['tiers'] # pprint.pprint(info) dsdb.insert('se', info) # pprint.pprint(info)
def test_1_mode(self): with self.assertRaises(ValueError): DatasetDB('foo', 'thepass')
from datasetdb import DatasetDB from getpass import getpass from htt_plot.tools.dataset import Dataset print('Dataset reader password:'******'reader', pwd, db='datasets') def get_nchunks(info): '''returns number of chunks for a dataset info. Sums up all chunks in subdirectories 0000, 0001, etc ''' tgzs = info.get('tgzs', None) if tgzs is None: return 0 nchunks = 0 for subd, subdchunks in tgzs.iteritems(): nchunks += len(subdchunks) return nchunks def efficiency(name): '''returns efficiency for dataset with this name. name should be like 190503%HiggsSUSYGG1400%tt_mssm_signals_CMS_scale_j_RelativeSample_13TeV_up ''' infos = dsdb.find('se', {'name': name}) if not infos: raise ValueError(name + ' not found in the database') info = infos[0] nchunks = get_nchunks(info)
from datasetdb import DatasetDB import pprint from getpass import getpass import os import pprint import copy pwd = getpass() dsdb = DatasetDB('writer', pwd, db='datasets') infos = dsdb.find('se', {'harvesting': {'$exists': 1}}) for info in infos: dirname = info['harvesting']['dir'] if '%' not in dirname: continue newdirname = os.path.dirname(dirname) print(dirname) print(newdirname) info['harvesting']['dir'] = newdirname dsdb.insert('se', info)
from datasetdb import DatasetDB import pprint dsdb = DatasetDB(mode='writer', db='datasets') infos = dsdb.find('harvested', {}) basedir = '/gridgroup/cms/cbernet/test/' for info in infos: name = info['harv_dir'].split('/')[-1] harvdir = basedir + name del info['_id'] info['harv_dir'] = harvdir dsdb.insert('harvested', info)
def setUpClass(cls): cls.db = DatasetDB('writer', pwd, db='datasets_unittests')
from datasetdb import DatasetDB import pprint from getpass import getpass import os import pprint import copy pwd = getpass() dsdb = DatasetDB('writer', pwd, db='datasets') hinfos = dsdb.find('harvested', {}) for hinfo in hinfos: harvdir = hinfo['harv_dir'] basedir = os.path.dirname(harvdir) # print(basedir) infos = dsdb.find('se', {'name': hinfo['name']}) assert (len(infos) == 1) info = infos[0] tier_info = { 'dir': basedir, 'time': hinfo['harv_time'], 'tgzs': hinfo['tgzs'], } # pprint.pprint(tier_info) info.setdefault('tiers', {})['harvesting'] = tier_info del info['tiers'] # dsdb.insert('se', info) # pprint.pprint(info)
from datasetdb import DatasetDB import pprint from getpass import getpass import os import pprint import copy pwd = getpass() dsdb = DatasetDB('reader', pwd, db='datasets') infos = dsdb.find('se', {'path': {'$exists': 1}}) for info in infos: tgzs = info['tgzs'] htgzs = info['tiers']['harvesting']['tgzs'] if cmp(tgzs, htgzs) != 0: pprint.pprint(tgzs) pprint.pprint(htgzs) assert (False)