def e3crawl(endDate = None, daysSpanned = 2, minSize = 0, blackList = [], overwrite = False, maxNumRuns = None, dryRun = False): """ Crawl the raw data and process the files. """ logDate = datetime.datetime.today() datestr = date2str(logDate) timestr = logDate.strftime('%Y-%m-%d-%H-%M-%S-%f') logFilePath = os.path.join(E3PIPE_LOG_BASE, datestr, '%s.log' % timestr) logFolder = os.path.dirname(logFilePath) __utils__.createFolder(logFolder) logFileHandler = E3FileHandler(logFilePath) crawler = E3RunDbRawFileCrawler(endDate, daysSpanned, minSize, blackList, overwrite) logger.info(crawler) if dryRun: logger.info('Just kidding, dry run :-)') return numFiles = maxNumRuns or len(crawler) curFile = 1 for filePath in crawler: logger.info('Processing file %d/%d: %s' % (curFile, numFiles, filePath)) _cmd = 'e3process.py %s' % filePath exitCode = __utils__.cmd(_cmd) if maxNumRuns is not None and curFile >= maxNumRuns: break curFile += 1 logFileHandler.close()
def e3mergeTimeSpan(outputFilePath, station, endDate, daysSpanned=1, **kwargs): """ Merge the DST ROOT files for a given station in a given time span. Note that we are not offering a sub-daily granularity at this point. That might come in the future if needed. """ crawler = E3RunDbDstFileCrawler(station, endDate, daysSpanned) fileList = crawler.fileList() # The crawler return an ordered file list, so no need for an extra sort. kwargs['sort'] = False # Fill some more details. startDate = endDate - datetime.timedelta(daysSpanned - 1) kwargs['date'] = '%s--%s' % (date2str(startDate), date2str(endDate)) kwargs['station'] = station return e3mergeFiles(outputFilePath, *fileList, **kwargs)
def folderPath(self, station, date): """ Return the path to the data folder for a given station and date for the data products of interest. Note that this can be overloaded in derived classes. """ return os.path.join(self.ROOT_FOLDER, station, date2str(date))
def e3crawl(endDate=None, daysSpanned=2, minSize=0, blackList=[], overwrite=False, maxNumRuns=None, dryRun=False): """ Crawl the raw data and process the files. """ logDate = datetime.datetime.today() datestr = date2str(logDate) timestr = logDate.strftime('%Y-%m-%d-%H-%M-%S-%f') logFilePath = os.path.join(E3PIPE_LOG_BASE, datestr, '%s.log' % timestr) logFolder = os.path.dirname(logFilePath) __utils__.createFolder(logFolder) logFileHandler = E3FileHandler(logFilePath) crawler = E3RunDbRawFileCrawler(endDate, daysSpanned, minSize, blackList, overwrite) logger.info(crawler) if dryRun: logger.info('Just kidding, dry run :-)') return numFiles = maxNumRuns or len(crawler) curFile = 1 for filePath in crawler: logger.info('Processing file %d/%d: %s' % (curFile, numFiles, filePath)) _cmd = 'e3process.py %s' % filePath exitCode = __utils__.cmd(_cmd) if maxNumRuns is not None and curFile >= maxNumRuns: break curFile += 1 logFileHandler.close()
def e3mergeTimeSpan(outputFilePath, station, endDate, daysSpanned = 1, **kwargs): """ Merge the DST ROOT files for a given station in a given time span. Note that we are not offering a sub-daily granularity at this point. That might come in the future if needed. """ crawler = E3RunDbDstFileCrawler(station, endDate, daysSpanned) fileList = crawler.fileList() # The crawler return an ordered file list, so no need for an extra sort. kwargs['sort'] = False # Fill some more details. startDate = endDate - datetime.timedelta(daysSpanned - 1) kwargs['date'] = '%s--%s' % (date2str(startDate), date2str(endDate)) kwargs['station'] = station return e3mergeFiles(outputFilePath, *fileList, **kwargs)
def e3report2(station, endDate = None, daysSpanned = 2, outputFolder = None, **kwargs): """ Read the DST and run the data quality monitoring. """ endDate = endDate or datetime.date.today() startDate = endDate - datetime.timedelta(daysSpanned - 1) if outputFolder is None: outputFolder = os.path.join(E3PIPE_DQM_REPORT_BASE, station, date2str(endDate)) mergedFileName = '%s_%s_%s_summary.root' % (station, startDate, endDate) mergedFilePath = os.path.join(outputFolder, mergedFileName) e3mergeTimeSpan2(mergedFilePath, station, endDate, daysSpanned, mergeEvents = False) e3root2text(mergedFilePath, 'Header') e3root2text(mergedFilePath, 'Trending') e3root2text(mergedFilePath, 'Weather') report = E3DqmReport(mergedFilePath, outputFolder) report.fill()
def __init__(self, filePath): """ Constructor. """ if not filePath.endswith('.bin'): abort('%s does not look like a raw binary file' % filePath) dict.__init__(self) self['RawFilePath'] = filePath self['RawDirName'], self['RawFileName'] = os.path.split(filePath) data = splitFilePath(filePath) self['Station'] = data[0] year, month, day = [int(item) for item in data[1:4]] self['RunString'] = data[4] self['RunNumber'] = int(self.RunString) self['Date'] = datetime.date(year, month, day) self['DateString'] = date2str(self.Date) self['DstFilePath'] = self.__dstFilePath() self['CalibFilePath'] = self.__calibFilePath() self['DqmFolderPath'] = self.__dqmFolderPath() self['LogFilePath'] = self.__logFilePath() self['LockFilePath'] = self.__lockFilePath()
def e3report2(station, endDate=None, daysSpanned=2, outputFolder=None, **kwargs): """ Read the DST and run the data quality monitoring. """ endDate = endDate or datetime.date.today() startDate = endDate - datetime.timedelta(daysSpanned - 1) if outputFolder is None: outputFolder = os.path.join(E3PIPE_DQM_REPORT_BASE, station, date2str(endDate)) mergedFileName = '%s_%s_%s_summary.root' % (station, startDate, endDate) mergedFilePath = os.path.join(outputFolder, mergedFileName) e3mergeTimeSpan2(mergedFilePath, station, endDate, daysSpanned, mergeEvents=False) e3root2text(mergedFilePath, 'Header') e3root2text(mergedFilePath, 'Trending') e3root2text(mergedFilePath, 'Weather') report = E3DqmReport(mergedFilePath, outputFolder) report.fill()
def rawDataFolder(station, date=datetime.date.today()): """ Return the base folder with the raw data for a given station and date. """ return os.path.join(E3PIPE_RAW_BASE, station, 'data', date2str(date))
def rawDataFolder(station, date = datetime.date.today()): """ Return the base folder with the raw data for a given station and date. """ return os.path.join(E3PIPE_RAW_BASE, station, 'data', date2str(date))
def folderPath(self, station, date): """ Overloaded class method. """ return os.path.join(self.ROOT_FOLDER, station, 'data', date2str(date))
def dstDataFolder(station, date = datetime.date.today()): """ Return the base folder for the DST products. """ return os.path.join(E3PIPE_RECON_BASE, station, date2str(date))
def e3trending2(dstFilePath): """ Parse a DST root file produced by the new analyzer and add the bits that are missing for the DQM (e.g., the trending). """ chrono = E3Chrono() #uniqueId = uniqueRunIdFromFilePath(baseFilePath) #station, year, month, day, runId = splitFilePath(baseFilePath) #date = datetime.date(int(year), int(month), int(day)) #logger.info('Unique run ID is %s.' % uniqueId) logger.info('Opening output ROOT file %s...' % dstFilePath) rootFile = E3OutputRootFile(dstFilePath, 'e3dst', date2str(date), station) logger.info('Initializing event tree...') eventTree = E3DstEventTree() #eventTree.setUniqueRunId(uniqueId) #logger.info('Filling event tree...') #for row in outFile: # eventTree.fillRow(row) #eventStat = outFile.eventStat() #tmin = outFile.minTimestamp() #tmax = outFile.maxTimestamp() # If we have less than two good events there is nothing we could # possibly do, here. # Close all files and remove the output dst ROOT file so that # we know the run has not been processed. #if eventStat['hits'] < 2: # logger.info('Closing all files...') # rootFile.Close() # outFile.close() # sumFile.close() # __utils__.rm(dstFilePath) # logger.info('No events with hits, processing terminated after %.3f s.' %\ # chrono.stop()) # sys.exit(E3PIPE_EXIT_CODE_NO_HITS_EVENTS) #logger.info('Event stats: %s' % eventStat) #logger.info('Range of timestamps in the output files: %.3f--%.3f' %\ # (tmin, tmax)) #duration = tmax - tmin #logger.info('Corresponding run duration: %.3f s' % duration) #if duration > MAX_RUN_DURATION: # logger.error('Run looks way too long, something must be wrong.') # sys.exit(E3PIPE_EXIT_CODE_RUN_TOO_LONG) #eventTree.Write() logger.info('Done, %d event(s) filled in.' % eventTree.GetEntries()) if eventTree.GetEntries() == 0: abort('No events found (maybe an issue with eee_calib.txt?)') logger.info('Creating monitoring plots...') #eventTree.doMonitoring() #logger.info('Initializing weather tree...') #weatherTree = E3DstWeatherTree() #weatherTree.setUniqueRunId(uniqueId) #binFile = E3BinFile('%s.bin' % baseFilePath) #weatherRecord = binFile.weatherStationRecord() #if weatherRecord is not None: # logger.info('Filling weather tree...') # weatherTree.fillRow(weatherRecord.data()) #weatherTree.Write() #logger.info('Creating trending data products...') #trendingTree = eventTree.doTrending(TRENDING_TIME_BIN, tmin, tmax, # weatherRecord) #logger.info('Writing trending tree...') #trendingTree.Write() #logger.info('Writing monitoring/trending plots...') #for plot in eventTree.plots(): # plot.Write() #logger.info('Initializing header tree...') #headerTree = E3DstHeaderTree() #headerTree.setUniqueRunId(uniqueId) #logger.info('Filling header tree...') #data = sumFile.data() # Mind we need to add a few things "by hand", here, as not all the # information that we want in the header is really coming from the # sum file. #data['RunNumber'] = row['RunNumber'] #data['RunStart'] = tmin #data['RunStop'] = tmax #data['RunDuration'] = tmax - tmin #data['NumHitEvents'] = eventStat['hits'] #data['NumTrackEvents'] = eventStat['track'] #data['NumNoHitsEvents'] = eventStat['no_hits'] #data['NumNoHitEvents'] = eventStat['no_hit'] #data['NumMalformedEvents'] = eventStat['malformed'] #data['NumBackwardEvents'] = eventStat['backward'] #headerTree.fillRow(data) #headerTree.Write() #logger.info('Creating histograms...') #for key in ['HitMultBot', 'HitMultMid', 'HitMultTop', # 'ClusterMultBot', 'ClusterMultMid', 'ClusterMultTop']: # h = data2hist(data, key, xmax = 15.5) # h.Write() #for key in ['HitMultTotal', 'ClusterMultTotal']: # h = data2hist(data, key, xmax = 35.5) # h.Write() #logger.info('Closing all files...') #rootFile.Close() #outFile.close() #sumFile.close() logger.info('DST created in %.3f s.' % chrono.stop()) listTemp() logger.info('Returning DST path: %s...' % dstFilePath) return dstFilePath
def e3dst(baseFilePath): """ Parse all the output text files from the analyzer and build the actual DST in ROOT format. """ chrono = E3Chrono() logger.info('Collecting input files for the DST...') outFile = E3AnalyzerOutFile('%s.out' % baseFilePath) sumFile = E3AnalyzerSumFile('%s.sum' % baseFilePath) dstFilePath = '%s_dst.root' % baseFilePath uniqueId = uniqueRunIdFromFilePath(baseFilePath) station, year, month, day, runId = splitFilePath(baseFilePath) date = datetime.date(int(year), int(month), int(day)) logger.info('Unique run ID is %s.' % uniqueId) logger.info('Opening output ROOT file %s...' % dstFilePath) rootFile = E3OutputRootFile(dstFilePath, 'e3dst', date2str(date), station) logger.info('Initializing event tree...') eventTree = E3DstEventTree() eventTree.setUniqueRunId(uniqueId) logger.info('Filling event tree...') for row in outFile: eventTree.fillRow(row) eventStat = outFile.eventStat() tmin = outFile.minTimestamp() tmax = outFile.maxTimestamp() # If we have less than two good events there is nothing we could # possibly do, here. # Close all files and remove the output dst ROOT file so that # we know the run has not been processed. if eventStat['hits'] < 2: logger.info('Closing all files...') rootFile.Close() outFile.close() sumFile.close() __utils__.rm(dstFilePath) logger.info('No events with hits, processing terminated after %.3f s.' %\ chrono.stop()) sys.exit(E3PIPE_EXIT_CODE_NO_HITS_EVENTS) logger.info('Event stats: %s' % eventStat) logger.info('Range of timestamps in the output files: %.3f--%.3f' %\ (tmin, tmax)) duration = tmax - tmin logger.info('Corresponding run duration: %.3f s' % duration) if duration > MAX_RUN_DURATION: logger.error('Run looks way too long, something must be wrong.') sys.exit(E3PIPE_EXIT_CODE_RUN_TOO_LONG) eventTree.Write() logger.info('Done, %d event(s) filled in.' % eventTree.GetEntries()) if eventTree.GetEntries() == 0: abort('No events found (maybe an issue with eee_calib.txt?)') logger.info('Creating monitoring plots...') eventTree.doMonitoring() logger.info('Initializing weather tree...') weatherTree = E3DstWeatherTree() weatherTree.setUniqueRunId(uniqueId) binFile = E3BinFile('%s.bin' % baseFilePath) weatherRecord = binFile.weatherStationRecord() if weatherRecord is not None: logger.info('Filling weather tree...') weatherTree.fillRow(weatherRecord.data()) weatherTree.Write() logger.info('Creating trending data products...') trendingTree = eventTree.doTrending(TRENDING_TIME_BIN, tmin, tmax, weatherRecord) logger.info('Writing trending tree...') trendingTree.Write() logger.info('Writing monitoring/trending plots...') for plot in eventTree.plots(): plot.Write() logger.info('Initializing header tree...') headerTree = E3DstHeaderTree() headerTree.setUniqueRunId(uniqueId) logger.info('Filling header tree...') data = sumFile.data() # Mind we need to add a few things "by hand", here, as not all the # information that we want in the header is really coming from the # sum file. data['RunNumber'] = row['RunNumber'] data['RunStart'] = tmin data['RunStop'] = tmax data['RunDuration'] = tmax - tmin data['NumHitEvents'] = eventStat['hits'] data['NumTrackEvents'] = eventStat['track'] data['NumNoHitsEvents'] = eventStat['no_hits'] data['NumNoHitEvents'] = eventStat['no_hit'] data['NumMalformedEvents'] = eventStat['malformed'] data['NumBackwardEvents'] = eventStat['backward'] headerTree.fillRow(data) headerTree.Write() logger.info('Creating histograms...') for key in ['HitMultBot', 'HitMultMid', 'HitMultTop', 'ClusterMultBot', 'ClusterMultMid', 'ClusterMultTop']: h = data2hist(data, key, xmax = 15.5) h.Write() for key in ['HitMultTotal', 'ClusterMultTotal']: h = data2hist(data, key, xmax = 35.5) h.Write() logger.info('Closing all files...') rootFile.Close() outFile.close() sumFile.close() logger.info('DST created in %.3f s.' % chrono.stop()) listTemp() logger.info('Returning DST path: %s...' % dstFilePath) return dstFilePath
def dstDataFolder(station, date=datetime.date.today()): """ Return the base folder for the DST products. """ return os.path.join(E3PIPE_RECON_BASE, station, date2str(date))