def uploadWorker(input, results, dbsUrl): """ _uploadWorker_ Put JSONized blocks in the input Get confirmation in the output """ # Init DBS Stuff logging.debug("Creating dbsAPI with address %s" % dbsUrl) dbsApi = DbsApi(url = dbsUrl) while True: try: work = input.get() except (EOFError, IOError): crashMessage = "Hit EOF/IO in getting new work\n" crashMessage += "Assuming this is a graceful break attempt.\n" logging.error(crashMessage) break if work == 'STOP': # Then halt the process break name = work.get('name', None) block = work.get('block', None) # Do stuff with DBS try: logging.debug("About to call insert block with block: %s" % block) dbsApi.insertBulkBlock(blockDump = block) results.put({'name': name, 'success': "uploaded"}) except Exception as ex: exString = str(ex) if 'Block %s already exists' % name in exString: # Then this is probably a duplicate # Ignore this for now logging.error("Had duplicate entry for block %s. Ignoring for now." % name) logging.debug("Exception: %s" % exString) logging.debug("Traceback: %s" % str(traceback.format_exc())) results.put({'name': name, 'success': "uploaded"}) elif 'Proxy Error' in exString: # This is probably a successfully inserton that went bad. # Put it on the check list msg = "Got a proxy error for block (%s)." % name logging.error(msg) logging.error(str(traceback.format_exc())) results.put({'name': name, 'success': "check"}) else: msg = "Error trying to process block %s through DBS.\n" % name msg += exString logging.error(msg) logging.error(str(traceback.format_exc())) logging.debug("block: %s \n" % block) results.put({'name': name, 'success': "error", 'error': msg}) return
def uploadWorker(workInput, results, dbsUrl): """ _uploadWorker_ Put JSONized blocks in the workInput Get confirmation in the output """ # Init DBS Stuff logging.debug("Creating dbsAPI with address %s", dbsUrl) dbsApi = DbsApi(url = dbsUrl) while True: try: work = workInput.get() except (EOFError, IOError): crashMessage = "Hit EOF/IO in getting new work\n" crashMessage += "Assuming this is a graceful break attempt.\n" logging.error(crashMessage) break if work == 'STOP': # Then halt the process break name = work.get('name', None) block = work.get('block', None) # Do stuff with DBS try: logging.debug("About to call insert block with block: %s", block) dbsApi.insertBulkBlock(blockDump = block) results.put({'name': name, 'success': "uploaded"}) except Exception as ex: exString = str(ex) if 'Block %s already exists' % name in exString: # Then this is probably a duplicate # Ignore this for now logging.error("Had duplicate entry for block %s. Ignoring for now.", name) logging.debug("Exception: %s", exString) logging.debug("Traceback: %s", str(traceback.format_exc())) results.put({'name': name, 'success': "uploaded"}) elif 'Proxy Error' in exString: # This is probably a successfully inserton that went bad. # Put it on the check list msg = "Got a proxy error for block (%s)." % name logging.error(msg) logging.error(str(traceback.format_exc())) results.put({'name': name, 'success': "check"}) else: msg = "Error trying to process block %s through DBS.\n" % name msg += exString logging.error(msg) logging.error(str(traceback.format_exc())) logging.debug("block: %s \n", block) results.put({'name': name, 'success': "error", 'error': msg}) return
def uploadWorker(workInput, results, dbsUrl): """ _uploadWorker_ Put JSONized blocks in the workInput Get confirmation in the output """ # Init DBS Stuff logging.debug("Creating dbsAPI with address %s", dbsUrl) dbsApi = DbsApi(url=dbsUrl) while True: try: work = workInput.get() except (EOFError, IOError): crashMessage = "Hit EOF/IO in getting new work\n" crashMessage += "Assuming this is a graceful break attempt.\n" logging.error(crashMessage) break if work == 'STOP': # Then halt the process break name = work.get('name', None) # this is the block name block = work.get('block', None) # this is the block data structure # Do stuff with DBS try: logging.debug("About to call insert block with block: %s", block) dbsApi.insertBulkBlock(blockDump=block) results.put({'name': name, 'success': "uploaded"}) except Exception as ex: exString = str(ex) if 'Block %s already exists' % name in exString: # Then this is probably a duplicate # Ignore this for now logging.warning("Block %s already exists. Marking it as uploaded.", name) logging.debug("Exception: %s", exString) results.put({'name': name, 'success': "uploaded"}) elif 'Proxy Error' in exString: # This is probably a successfully insertion that went bad. # Put it on the check list msg = "Got a proxy error for block %s." % name logging.warning(msg) results.put({'name': name, 'success': "check"}) elif 'Missing data when inserting to dataset_parents' in exString: msg = "Parent dataset is not inserted yet for block %s." % name logging.warning(msg) results.put({'name': name, 'success': "error", 'error': msg}) else: msg = "Error trying to process block %s through DBS. Error: %s" % (name, exString) logging.exception(msg) logging.debug("block info: %s \n", block) results.put({'name': name, 'success': "error", 'error': msg}) return
def uploadWorker(input, results, dbsUrl): """ _uploadWorker_ Put JSONized blocks in the input Get confirmation in the output """ # Init DBS Stuff logging.debug("Creating dbsAPI with address %s" % dbsUrl) dbsApi = DbsApi(url = dbsUrl) while True: try: work = input.get() except (EOFError, IOError): crashMessage = "Hit EOF/IO in getting new work\n" crashMessage += "Assuming this is a graceful break attempt.\n" logging.error(crashMessage) break if work == 'STOP': # Then halt the process break name = work.get('name', None) block = work.get('block', None) # Do stuff with DBS try: logging.debug("About to call insert block with block: %s" % block) dbsApi.insertBulkBlock(blockDump = block) results.put({'name': name, 'success': True}) except Exception, ex: exString = str(ex) if 'Duplicate entry' in exString: # Then this is probably a duplicate # Ignore this for now logging.error("Had duplicate entry for block %s\n" % name) logging.error("Ignoring for now.\n") logging.error("Exception: %s\n" % exString) logging.error("Traceback: %s\n" % str(traceback.format_exc())) results.put({'name': name, 'success': True}) else: msg = "Error trying to process block %s through DBS.\n" % name msg += exString logging.error(msg) logging.error(str(traceback.format_exc())) results.put({'name': name, 'success': False, 'error': msg})
def upload_to_dbs(dataset_info_file, file_info_file, origin_site_name, dry=False): print("Uploading to DBS3...") with open(dataset_info_file, "r") as f: dataset_info = json.loads(f.read()) with open(file_info_file, "r") as f: file_info = json.loads(f.read()) phy3WriteUrl = "https://cmsweb.cern.ch/dbs/prod/phys03/DBSWriter" writeApi = DbsApi(url=phy3WriteUrl, debug=1) total_files = 0 total_events = 0 print("insert block in DBS3: %s" % writeApi.url) print("Preparing upload for {}".format(dataset_info["processed_ds"])) print("Blocks to be processed: {}".format(len(file_info["blocks"]))) print("DatasetName: {}".format( createEmptyBlock(dataset_info, origin_site_name, "asdf")["dataset"]["dataset"])) for block in file_info["blocks"]: blockid = block["blockid"] filedata = block["files"] filelist = [] print("Processing block {} - Number of files: {}".format( blockid, len(filedata))) total_files += len(filedata) blockDict = createEmptyBlock(dataset_info, origin_site_name, blockid) for file in filedata: fileDic = {} lfn = file["name"] fileDic["file_type"] = "EDM" fileDic["logical_file_name"] = lfn for key in ["check_sum", "adler32", "file_size", "event_count"]: fileDic[key] = file[key] total_events += file["event_count"] fileDic["file_lumi_list"] = file["lumis"] fileDic["auto_cross_section"] = 0.0 fileDic["last_modified_by"] = "sbrommer" filelist.append(fileDic) # now upload the block blockDict = addFilesToBlock(blockDict, filelist) if not dry: writeApi.insertBulkBlock(blockDict) else: print("Dry run, not inserting block into DBS3") pprint.pprint(blockDict) exit() print("Total files: {} // Total Events: {}".format(total_files, total_events))
lfn = common_lfn_prefix + directory_path + file['name'] fileDic['file_type'] = common_file_type fileDic['logical_file_name'] = lfn for key in ['check_sum', 'adler32', 'file_size', 'event_count']: fileDic[key] = file[key] fileDic['file_lumi_list'] = common_dummy_lumi files.append(fileDic) files_in_block += 1 print "file count %d" % files_in_block if files_in_block == max_files_in_block: blockDict = addFilesToBlock(blockDict, files) print "insert block in DBS3: %s" % writeApi.url print "ALAN: just before writing to DBS." pprint.pprint(blockDict) sys.exit(0) writeApi.insertBulkBlock(blockDict) files_in_block = 0 # end loop on input Files # any leftovers ? if files_in_block: blockDict = addFilesToBlock(blockDict, files) print "insert block in DBS3: %s" % writeApi.url print "ALAN: leftovers just before writing to DBS." pprint.pprint(blockDict) sys.exit(0) writeApi.insertBulkBlock(blockDict)
fileDic={} lfn=common_lfn_prefix + directory_path + file['name'] print "inserting file:",lfn fileDic['file_type'] = common_file_type fileDic['logical_file_name'] = lfn for key in ['check_sum','adler32','file_size','event_count']: fileDic[key] = file [key] fileDic['file_lumi_list'] = common_dummy_lumi files.append(fileDic) files_in_block += 1 if files_in_block == max_files_in_block: blockDict = addFilesToBlock(blockDict, files) # print "insert block in DBS3: %s" % writeApi.url # pprint.pprint(blockDict) writeApi.insertBulkBlock(blockDict) files_in_block = 0 # end loop on input Files # any leftovers ? if files_in_block: blockDict = addFilesToBlock(blockDict, files) # print "insert block in DBS3: %s" % writeApi.url # pprint.pprint(blockDict) writeApi.insertBulkBlock(blockDict) print "/%s/%s/%s" % (dataset_info['primary_ds'], dataset_info['processed_ds'], dataset_info['tier'])
def get_command_line_options(): parser = OptionParser(usage='%prog --in MyBlock.txt --url=<DBS_Instance_URL>') parser.add_option("-i", "--in", dest="input", help="Input file containing the block dump. Wildcard support.", metavar="MyBlock*.txt") parser.add_option("-u", "--url", dest="url", help="DBS Instance url", metavar="DBS_Instance_URL") (options, args) = parser.parse_args() if not (options.input and options.url): parser.print_help() parser.error('Mandatory options are --input and --url') return options, args if __name__ == '__main__': options, args = get_command_line_options() input_files = glob.glob(options.input) for input_file in input_files: with open(input_file, 'r') as f: block_dump = literal_eval(f.read()) api = DbsApi(url=options.url) try: api.insertBulkBlock(block_dump) except: raise else: print("Successfully inserted block!")
def insertFilesToBlock(files, injectNode, injectSE, mode, commit): # pick a DBS3 instance # instance = 'dev' instance = 'int' # instance = 'prod' if instance=='dev': # host = 'dbs3-dev01.cern.ch' host = 'cmsweb-dev.cern.ch' if instance=='int': host = 'cmsweb-testbed.cern.ch' if instance=='prod': host = 'cmsweb.cern.ch' globReadUrl = 'https://%s/dbs/%s/global/DBSReader' % (host, instance) globWriteUrl = 'https://%s/dbs/%s/global/DBSWriter' % (host, instance) phy3ReadUrl = 'https://%s/dbs/%s/phys03/DBSReader' % (host, instance) phy3WriteUrl = 'https://%s/dbs/%s/phys03/DBSWriter' % (host, instance) readApi = DbsApi(url=globReadUrl) writeApi = DbsApi(url=globWriteUrl) # readApi = DbsApi(url=phy3ReadUrl) # writeApi = DbsApi(url=phy3WriteUrl) if mode == "lhe": ds_info = { 'data_type' : 'mc', 'acquisition_era' : 'LHE', 'primary_ds' : 'QCD_HT-100To250_8TeV-madgraph', 'processed_ds' : 'LHE-testAlan_Attempt3-v2', 'data_tier' : 'LHE', 'physics_group' : 'GEN', 'application' : 'Madgraph', 'app_version' : 'Mad_5_1_3_30', 'proc_version' : 1, 'proc_descript' : 'test_LHE_injection' } elif mode == "pixel": ds_info = { 'data_type' : 'data', 'acquisition_era' : 'Run2012', 'primary_ds' : 'QCD_HT-100To250_8TeV-madgraph', 'processed_ds' : 'LHE-testAlan_Attempt3-v2', 'data_tier' : 'LHE', 'physics_group' : None, 'application' : 'Madgraph', 'app_version' : 'Mad_5_1_3_30', 'proc_version' : 1, 'proc_descript' : 'test_LHE_injection' } acquisition_era_config = { 'acquisition_era_name' : ds_info['acquisition_era'], 'start_date' : int(time.time()) } processing_era_config = { 'processing_version': ds_info['proc_version'], 'description': ds_info['proc_descript'] } primds_config = { 'primary_ds_type': ds_info['data_type'], 'primary_ds_name': ds_info['primary_ds'] } dataset_name = "/%s/%s/%s" % (ds_info['primary_ds'], ds_info['processed_ds'], ds_info['data_tier']) dataset_config = { 'physics_group_name' : ds_info['physics_group'], 'dataset_access_type' : 'VALID', 'data_tier_name' : ds_info['data_tier'], 'processed_ds_name' : ds_info['processed_ds'], 'dataset' : dataset_name } block_name = "%s#%s" % (dataset_name, str(uuid.uuid4())) block_config = { 'block_name' : block_name, 'origin_site_name' : injectSE, 'open_for_writing' : 0 } dataset_conf_list = [ { 'app_name' : ds_info['application'], 'global_tag' : 'dummytag', 'output_module_label' : 'out', 'pset_hash' : 'dummyhash', 'release_version' : ds_info['app_version'] } ] blockDict = { 'files': files, 'processing_era': processing_era_config, 'primds': primds_config, 'dataset': dataset_config, 'dataset_conf_list' : dataset_conf_list, 'acquisition_era': acquisition_era_config, 'block': block_config, 'file_parent_list':[], 'file_conf_list':[] } blockDict['files'] = files blockDict['block']['file_count'] = len(files) blockDict['block']['block_size'] = sum([int(file['file_size']) for file in files]) if commit: logging.info("inserted block into DBS : %s" % writeApi.url) logging.debug(pprint.pformat(blockDict)) writeApi.insertBulkBlock(blockDict) else: logging.info("dry run, this block would have been inserted into DBS : %s" % writeApi.url) logging.info(pprint.pformat(blockDict)) injectionSpec = XMLDrop.XMLInjectionSpec(writeApi.url) datasetSpec = injectionSpec.getDataset(dataset_name) blockSpec = datasetSpec.getFileblock(block_name, "n") for f in files: blockSpec.addFile(f['logical_file_name'], { 'adler32' : f['adler32'] }, f['file_size']) xmlData = injectionSpec.save() # SELECT PHEDEX URL phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/dev/" #phedexURL = "https://cmsweb.cern.ch/phedex/datasvc/json/prod/" if commit: logging.info("inserting block into PhEDEx : %s" % phedexURL) logging.debug(pprint.pformat(xmlData)) phedex = PhEDEx({"endpoint": config.PhEDExInjector.phedexurl}, "json") injectRes = phedex.injectBlocks(injectNode, xmlData) else: logging.info("dry run, this block would have been inserted into PhEDEx : %s" % phedexURL) logging.info(pprint.pformat(xmlData)) return
blockDict['block']['block_size'] = sum([int(file['file_size']) for file in files]) if options.verbose: pprint.pprint(blockDict) if not options.publish: print "Dry run ended. Please use --publish option if you want to publish files in DBS" sys.exit() # Insert primary dataset name. It's safe to do it for already existing primary datasets primds_config = {'primary_ds_name': primary_ds_name, 'primary_ds_type': 'mc'} dbsWriter.insertPrimaryDataset(primds_config) # Insert block of files try: dbsWriter.insertBulkBlock(blockDict) except HTTPError, he: print he # # Info # # Missing: file_lumi_list # Example: # 'file_lumi_list': [{u'lumi_section_num': 4027414, u'run_num': 1}, # {u'lumi_section_num': 26422, u'run_num': 2}, # 41{u'lumi_section_num': 29838, u'run_num': 3}] # https://cmsweb.cern.ch/dbs/prod/phys03/DBSWriter/ # https://github.com/dmwm/AsyncStageout/blob/master/src/python/AsyncStageOut/PublisherWorker.py#L743 # xrd eoscms stat /store/user/dmytro/lhe/DM_ttbar01j/DMScalar_ttbar01j_mphi_200_mchi_150_gSM_1p0_gDM_1p0.root # xrd cms-xrd-global.cern.ch stat /store/user/dmytro/lhe/DM_ttbar01j/DMScalar_ttbar01j_mphi_200_mchi_150_gSM_1p0_gDM_1p0.root
def main(): # get a validate file name from args parser = argparse.ArgumentParser() parser.add_argument('--file', help='file containing the dump of the block', default=None, required=True) args = parser.parse_args() fileName = args.file #fileName = 'failed-block-at-1611258668.34.txt' # just an example failedBlocksDir = '/data/srv/Publisher_files/FailedBlocks/' filePath = failedBlocksDir + fileName if not os.path.isfile(filePath): print("File %s not found in %s" % (fileName, failedBlocksDir)) return # initialize DBS access # if X509 vars are not defined, use default Publisher location userProxy = os.getenv('X509_USER_PROXY') if userProxy: os.environ['X509_USER_CERT'] = userProxy os.environ['X509_USER_KEY'] = userProxy if not os.getenv('X509_USER_CERT'): os.environ['X509_USER_CERT'] = '/data/certs/servicecert.pem' if not os.getenv('X509_USER_KEY'): os.environ['X509_USER_KEY'] = '/data/certs/servicekey.pem' #migUrl = 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSMigrate' phy3Url = 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader' #globUrl = 'https://cmsweb.cern.ch/dbs/prod/global/DBSReader' destUrl = 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSWriter' #apiG = DbsApi(url=globUrl) apiP3 = DbsApi(url=phy3Url) #apiMig = DbsApi(url=migUrl) apiDest = DbsApi(url=destUrl) with open(filePath) as fp: blockData = fp.read() # from pprint.pprint format to a dictionary (slow, unsafe, but handy) block = eval(blockData) # pylint: disable=eval-used targetDataset = block['dataset']['dataset'] print('Block is meant to be added to dataset\n%s' % targetDataset) # look for files already present in DBS phys03 alreadyPresentFile = False lfns = [f['logical_file_name'] for f in block['files']] print('Block contains %d files' % len(lfns)) numPresent = 0 sameDSet = 0 otherDSet = 0 otherDSlist = set() for lfn in lfns: ret = apiP3.listFiles(logical_file_name=lfn) if ret: alreadyPresentFile = True numPresent += 1 if numPresent < 5: print('file %s found in DBS' % lfn) if numPresent == 5: print('more files found ...') #details = apiP3.listFiles(logical_file_name=lfn, detail=True) #print(details) lfnDSet = apiP3.listDatasets(logical_file_name=lfn)[0]['dataset'] if lfnDSet == targetDataset: sameDSet += 1 if sameDSet < 5: print('this lfn is already in target dataset') else: otherDSet += 1 if otherDSet < 5: print('this lfn belongs to another dataset:\n%s' % lfnDSet) if not lfnDSet in otherDSlist: otherDSlist.add(lfnDSet) #lfnBlock = apiP3.listBlocks(logical_file_name=lfn) #print('in block:\n%s' % lfnBlock[0]['block_name']) if alreadyPresentFile: print( '%d/%d file(s) from input blocks are already in DBS/phys03. Publication will fail' % (numPresent, len(lfns))) print('files already present in target dataset: %d' % sameDSet) print('files present in DBS in another dataset: %d' % otherDSet) if otherDSet: print('other datasets containing files from this block:\n%s' % otherDSlist) return print( 'No obvious reason for Publication failure found, try to insert again') try: apiDest.insertBulkBlock(block) except Exception as ex: print("Publication failed with exception:\n%s" % str(ex)) return print("Block publication done OK") return