示例#1
0
def e3crawl(endDate = None, daysSpanned = 2, minSize = 0,
            blackList = [], overwrite = False, maxNumRuns = None,
            dryRun = False):
    """ Crawl the raw data and process the files.
    """
    logDate = datetime.datetime.today()
    datestr = date2str(logDate)
    timestr = logDate.strftime('%Y-%m-%d-%H-%M-%S-%f')
    logFilePath = os.path.join(E3PIPE_LOG_BASE, datestr, '%s.log' % timestr)
    logFolder = os.path.dirname(logFilePath)
    __utils__.createFolder(logFolder)
    logFileHandler = E3FileHandler(logFilePath)
    crawler = E3RunDbRawFileCrawler(endDate, daysSpanned, minSize,
                                    blackList, overwrite)
    logger.info(crawler)
    if dryRun:
        logger.info('Just kidding, dry run :-)')
        return
    numFiles = maxNumRuns or len(crawler)
    curFile = 1
    for filePath in crawler:
        logger.info('Processing file %d/%d: %s' % (curFile, numFiles, filePath))
        _cmd = 'e3process.py %s' % filePath
        exitCode = __utils__.cmd(_cmd)
        if maxNumRuns is not None and curFile >= maxNumRuns:
            break
        curFile += 1
    logFileHandler.close()
示例#2
0
def e3mergeTimeSpan(outputFilePath, station, endDate, daysSpanned=1, **kwargs):
    """ Merge the DST ROOT files for a given station in a given time
    span.

    Note that we are not offering a sub-daily granularity at this point.
    That might come in the future if needed.
    """
    crawler = E3RunDbDstFileCrawler(station, endDate, daysSpanned)
    fileList = crawler.fileList()
    # The crawler return an ordered file list, so no need for an extra sort.
    kwargs['sort'] = False
    # Fill some more details.
    startDate = endDate - datetime.timedelta(daysSpanned - 1)
    kwargs['date'] = '%s--%s' % (date2str(startDate), date2str(endDate))
    kwargs['station'] = station
    return e3mergeFiles(outputFilePath, *fileList, **kwargs)
示例#3
0
    def folderPath(self, station, date):
        """ Return the path to the data folder for a given station and date
        for the data products of interest.

        Note that this can be overloaded in derived classes.
        """
        return os.path.join(self.ROOT_FOLDER, station, date2str(date))
示例#4
0
def e3crawl(endDate=None,
            daysSpanned=2,
            minSize=0,
            blackList=[],
            overwrite=False,
            maxNumRuns=None,
            dryRun=False):
    """ Crawl the raw data and process the files.
    """
    logDate = datetime.datetime.today()
    datestr = date2str(logDate)
    timestr = logDate.strftime('%Y-%m-%d-%H-%M-%S-%f')
    logFilePath = os.path.join(E3PIPE_LOG_BASE, datestr, '%s.log' % timestr)
    logFolder = os.path.dirname(logFilePath)
    __utils__.createFolder(logFolder)
    logFileHandler = E3FileHandler(logFilePath)
    crawler = E3RunDbRawFileCrawler(endDate, daysSpanned, minSize, blackList,
                                    overwrite)
    logger.info(crawler)
    if dryRun:
        logger.info('Just kidding, dry run :-)')
        return
    numFiles = maxNumRuns or len(crawler)
    curFile = 1
    for filePath in crawler:
        logger.info('Processing file %d/%d: %s' %
                    (curFile, numFiles, filePath))
        _cmd = 'e3process.py %s' % filePath
        exitCode = __utils__.cmd(_cmd)
        if maxNumRuns is not None and curFile >= maxNumRuns:
            break
        curFile += 1
    logFileHandler.close()
示例#5
0
def e3mergeTimeSpan(outputFilePath, station, endDate, daysSpanned = 1,
                    **kwargs):
    """ Merge the DST ROOT files for a given station in a given time
    span.

    Note that we are not offering a sub-daily granularity at this point.
    That might come in the future if needed.
    """
    crawler = E3RunDbDstFileCrawler(station, endDate, daysSpanned)
    fileList = crawler.fileList()
    # The crawler return an ordered file list, so no need for an extra sort.
    kwargs['sort'] = False
    # Fill some more details.
    startDate = endDate - datetime.timedelta(daysSpanned - 1)
    kwargs['date'] = '%s--%s' % (date2str(startDate), date2str(endDate))
    kwargs['station'] = station
    return e3mergeFiles(outputFilePath, *fileList, **kwargs)
示例#6
0
def e3report2(station, endDate = None, daysSpanned = 2, outputFolder = None,
             **kwargs):
    """ Read the DST and run the data quality monitoring.
    """
    endDate = endDate or datetime.date.today()
    startDate = endDate - datetime.timedelta(daysSpanned - 1)
    if outputFolder is None:
        outputFolder = os.path.join(E3PIPE_DQM_REPORT_BASE, station,
                                    date2str(endDate))
    mergedFileName = '%s_%s_%s_summary.root' % (station, startDate, endDate)
    mergedFilePath = os.path.join(outputFolder, mergedFileName)
    e3mergeTimeSpan2(mergedFilePath, station, endDate, daysSpanned,
                    mergeEvents = False)
    e3root2text(mergedFilePath, 'Header')
    e3root2text(mergedFilePath, 'Trending')
    e3root2text(mergedFilePath, 'Weather')
    report = E3DqmReport(mergedFilePath, outputFolder)
    report.fill()
示例#7
0
 def __init__(self, filePath):
     """ Constructor.
     """
     if not filePath.endswith('.bin'):
         abort('%s does not look like a raw binary file' % filePath)
     dict.__init__(self)
     self['RawFilePath'] = filePath
     self['RawDirName'], self['RawFileName'] = os.path.split(filePath)
     data = splitFilePath(filePath)
     self['Station'] = data[0]
     year, month, day = [int(item) for item in data[1:4]]
     self['RunString'] = data[4]
     self['RunNumber'] = int(self.RunString)
     self['Date'] = datetime.date(year, month, day)
     self['DateString'] = date2str(self.Date)
     self['DstFilePath'] = self.__dstFilePath()
     self['CalibFilePath'] = self.__calibFilePath()
     self['DqmFolderPath'] = self.__dqmFolderPath()
     self['LogFilePath'] = self.__logFilePath()
     self['LockFilePath'] = self.__lockFilePath()
示例#8
0
 def __init__(self, filePath):
     """ Constructor.
     """
     if not filePath.endswith('.bin'):
         abort('%s does not look like a raw binary file' % filePath)
     dict.__init__(self)
     self['RawFilePath'] = filePath
     self['RawDirName'], self['RawFileName'] = os.path.split(filePath)
     data = splitFilePath(filePath)
     self['Station'] = data[0]
     year, month, day = [int(item) for item in data[1:4]]
     self['RunString'] = data[4]
     self['RunNumber'] = int(self.RunString)
     self['Date'] = datetime.date(year, month, day)
     self['DateString'] = date2str(self.Date)
     self['DstFilePath'] = self.__dstFilePath()
     self['CalibFilePath'] = self.__calibFilePath()
     self['DqmFolderPath'] = self.__dqmFolderPath()
     self['LogFilePath'] = self.__logFilePath()
     self['LockFilePath'] = self.__lockFilePath()
示例#9
0
def e3report2(station,
              endDate=None,
              daysSpanned=2,
              outputFolder=None,
              **kwargs):
    """ Read the DST and run the data quality monitoring.
    """
    endDate = endDate or datetime.date.today()
    startDate = endDate - datetime.timedelta(daysSpanned - 1)
    if outputFolder is None:
        outputFolder = os.path.join(E3PIPE_DQM_REPORT_BASE, station,
                                    date2str(endDate))
    mergedFileName = '%s_%s_%s_summary.root' % (station, startDate, endDate)
    mergedFilePath = os.path.join(outputFolder, mergedFileName)
    e3mergeTimeSpan2(mergedFilePath,
                     station,
                     endDate,
                     daysSpanned,
                     mergeEvents=False)
    e3root2text(mergedFilePath, 'Header')
    e3root2text(mergedFilePath, 'Trending')
    e3root2text(mergedFilePath, 'Weather')
    report = E3DqmReport(mergedFilePath, outputFolder)
    report.fill()
示例#10
0
def rawDataFolder(station, date=datetime.date.today()):
    """ Return the base folder with the raw data for a given station and
    date.
    """
    return os.path.join(E3PIPE_RAW_BASE, station, 'data', date2str(date))
示例#11
0
def rawDataFolder(station, date = datetime.date.today()):
    """ Return the base folder with the raw data for a given station and
    date.
    """
    return os.path.join(E3PIPE_RAW_BASE, station, 'data', date2str(date))
示例#12
0
 def folderPath(self, station, date):
     """ Overloaded class method.
     """
     return os.path.join(self.ROOT_FOLDER, station, 'data', date2str(date))
示例#13
0
def dstDataFolder(station, date = datetime.date.today()):
    """ Return the base folder for the DST products.
    """
    return os.path.join(E3PIPE_RECON_BASE, station, date2str(date))
示例#14
0
 def folderPath(self, station, date):
     """ Overloaded class method.
     """
     return os.path.join(self.ROOT_FOLDER, station, 'data', date2str(date))
示例#15
0
def e3trending2(dstFilePath):
    """ Parse a DST root file produced by the new analyzer and add the bits that
    are missing for the DQM (e.g., the trending).
    """
    chrono = E3Chrono()
    #uniqueId = uniqueRunIdFromFilePath(baseFilePath)
    #station, year, month, day, runId = splitFilePath(baseFilePath)
    #date = datetime.date(int(year), int(month), int(day))
    #logger.info('Unique run ID is %s.' % uniqueId)
    logger.info('Opening output ROOT file %s...' % dstFilePath)
    rootFile = E3OutputRootFile(dstFilePath, 'e3dst', date2str(date), station)
    logger.info('Initializing event tree...')
    eventTree = E3DstEventTree()
    #eventTree.setUniqueRunId(uniqueId)
    #logger.info('Filling event tree...')
    #for row in outFile:
    #    eventTree.fillRow(row)
    #eventStat = outFile.eventStat()
    #tmin = outFile.minTimestamp()
    #tmax = outFile.maxTimestamp()
    # If we have less than two good events there is nothing we could
    # possibly do, here.
    # Close all files and remove the output dst ROOT file so that
    # we know the run has not been processed.
    #if eventStat['hits'] < 2:
    #    logger.info('Closing all files...')
    #    rootFile.Close()
    #    outFile.close()
    #    sumFile.close()
    #    __utils__.rm(dstFilePath)
    #    logger.info('No events with hits, processing terminated after %.3f s.' %\
    #                chrono.stop())
    #    sys.exit(E3PIPE_EXIT_CODE_NO_HITS_EVENTS)
    #logger.info('Event stats: %s' % eventStat)
    #logger.info('Range of timestamps in the output files: %.3f--%.3f' %\
    #            (tmin, tmax))
    #duration = tmax - tmin
    #logger.info('Corresponding run duration: %.3f s' % duration)
    #if duration > MAX_RUN_DURATION:
    #    logger.error('Run looks way too long, something must be wrong.')
    #    sys.exit(E3PIPE_EXIT_CODE_RUN_TOO_LONG)
    #eventTree.Write()
    logger.info('Done, %d event(s) filled in.' % eventTree.GetEntries())
    if eventTree.GetEntries() == 0:
        abort('No events found (maybe an issue with eee_calib.txt?)')
    logger.info('Creating monitoring plots...')
    #eventTree.doMonitoring()
    #logger.info('Initializing weather tree...')
    #weatherTree = E3DstWeatherTree()
    #weatherTree.setUniqueRunId(uniqueId)
    #binFile = E3BinFile('%s.bin' % baseFilePath)
    #weatherRecord = binFile.weatherStationRecord()
    #if weatherRecord is not None:
    #    logger.info('Filling weather tree...')
    #    weatherTree.fillRow(weatherRecord.data())
    #weatherTree.Write()
    #logger.info('Creating trending data products...')
    #trendingTree = eventTree.doTrending(TRENDING_TIME_BIN, tmin, tmax,
    #                                    weatherRecord)
    #logger.info('Writing trending tree...')
    #trendingTree.Write()
    #logger.info('Writing monitoring/trending plots...')
    #for plot in eventTree.plots():
    #    plot.Write()
    #logger.info('Initializing header tree...')
    #headerTree = E3DstHeaderTree()
    #headerTree.setUniqueRunId(uniqueId)
    #logger.info('Filling header tree...')
    #data = sumFile.data()
    # Mind we need to add a few things "by hand", here, as not all the
    # information that we want in the header is really coming from the
    # sum file.
    #data['RunNumber'] = row['RunNumber']
    #data['RunStart'] = tmin
    #data['RunStop'] = tmax
    #data['RunDuration'] = tmax - tmin
    #data['NumHitEvents'] = eventStat['hits']
    #data['NumTrackEvents'] = eventStat['track']
    #data['NumNoHitsEvents'] = eventStat['no_hits']
    #data['NumNoHitEvents'] = eventStat['no_hit']
    #data['NumMalformedEvents'] = eventStat['malformed']
    #data['NumBackwardEvents'] = eventStat['backward']
    #headerTree.fillRow(data)
    #headerTree.Write()
    #logger.info('Creating histograms...')
    #for key in ['HitMultBot', 'HitMultMid', 'HitMultTop',
    #            'ClusterMultBot', 'ClusterMultMid', 'ClusterMultTop']:
    #    h = data2hist(data, key, xmax = 15.5)
    #    h.Write()
    #for key in ['HitMultTotal', 'ClusterMultTotal']:
    #    h = data2hist(data, key, xmax = 35.5)
    #    h.Write()
    #logger.info('Closing all files...')
    #rootFile.Close()
    #outFile.close()
    #sumFile.close()
    logger.info('DST created in %.3f s.' % chrono.stop())
    listTemp()
    logger.info('Returning DST path: %s...' % dstFilePath)
    return dstFilePath
示例#16
0
def e3dst(baseFilePath):
    """ Parse all the output text files from the analyzer and build the
    actual DST in ROOT format.
    """
    chrono = E3Chrono()
    logger.info('Collecting input files for the DST...')
    outFile = E3AnalyzerOutFile('%s.out' % baseFilePath)
    sumFile = E3AnalyzerSumFile('%s.sum' % baseFilePath)
    dstFilePath = '%s_dst.root' % baseFilePath
    uniqueId = uniqueRunIdFromFilePath(baseFilePath)
    station, year, month, day, runId = splitFilePath(baseFilePath)
    date = datetime.date(int(year), int(month), int(day))
    logger.info('Unique run ID is %s.' % uniqueId)
    logger.info('Opening output ROOT file %s...' % dstFilePath)
    rootFile = E3OutputRootFile(dstFilePath, 'e3dst', date2str(date), station)
    logger.info('Initializing event tree...')
    eventTree = E3DstEventTree()
    eventTree.setUniqueRunId(uniqueId)
    logger.info('Filling event tree...')
    for row in outFile:
        eventTree.fillRow(row)
    eventStat = outFile.eventStat()
    tmin = outFile.minTimestamp()
    tmax = outFile.maxTimestamp()
    # If we have less than two good events there is nothing we could
    # possibly do, here.
    # Close all files and remove the output dst ROOT file so that
    # we know the run has not been processed.
    if eventStat['hits'] < 2:
        logger.info('Closing all files...')
        rootFile.Close()
        outFile.close()
        sumFile.close()
        __utils__.rm(dstFilePath)
        logger.info('No events with hits, processing terminated after %.3f s.' %\
                    chrono.stop())
        sys.exit(E3PIPE_EXIT_CODE_NO_HITS_EVENTS)
    logger.info('Event stats: %s' % eventStat)
    logger.info('Range of timestamps in the output files: %.3f--%.3f' %\
                (tmin, tmax))
    duration = tmax - tmin
    logger.info('Corresponding run duration: %.3f s' % duration)
    if duration > MAX_RUN_DURATION:
        logger.error('Run looks way too long, something must be wrong.')
        sys.exit(E3PIPE_EXIT_CODE_RUN_TOO_LONG)
    eventTree.Write()
    logger.info('Done, %d event(s) filled in.' % eventTree.GetEntries())
    if eventTree.GetEntries() == 0:
        abort('No events found (maybe an issue with eee_calib.txt?)')
    logger.info('Creating monitoring plots...')
    eventTree.doMonitoring()
    logger.info('Initializing weather tree...')
    weatherTree = E3DstWeatherTree()
    weatherTree.setUniqueRunId(uniqueId)
    binFile = E3BinFile('%s.bin' % baseFilePath)
    weatherRecord = binFile.weatherStationRecord()
    if weatherRecord is not None:
        logger.info('Filling weather tree...')
        weatherTree.fillRow(weatherRecord.data())
    weatherTree.Write()
    logger.info('Creating trending data products...')
    trendingTree = eventTree.doTrending(TRENDING_TIME_BIN, tmin, tmax,
                                        weatherRecord)
    logger.info('Writing trending tree...')
    trendingTree.Write()
    logger.info('Writing monitoring/trending plots...')
    for plot in eventTree.plots():
        plot.Write()
    logger.info('Initializing header tree...')
    headerTree = E3DstHeaderTree()
    headerTree.setUniqueRunId(uniqueId)
    logger.info('Filling header tree...')
    data = sumFile.data()
    # Mind we need to add a few things "by hand", here, as not all the
    # information that we want in the header is really coming from the
    # sum file.
    data['RunNumber'] = row['RunNumber']
    data['RunStart'] = tmin
    data['RunStop'] = tmax
    data['RunDuration'] = tmax - tmin
    data['NumHitEvents'] = eventStat['hits']
    data['NumTrackEvents'] = eventStat['track']
    data['NumNoHitsEvents'] = eventStat['no_hits']
    data['NumNoHitEvents'] = eventStat['no_hit']
    data['NumMalformedEvents'] = eventStat['malformed']
    data['NumBackwardEvents'] = eventStat['backward']
    headerTree.fillRow(data)
    headerTree.Write()
    
    logger.info('Creating histograms...')
    for key in ['HitMultBot', 'HitMultMid', 'HitMultTop',
                'ClusterMultBot', 'ClusterMultMid', 'ClusterMultTop']:
        h = data2hist(data, key, xmax = 15.5)
        h.Write()
    for key in ['HitMultTotal', 'ClusterMultTotal']:
        h = data2hist(data, key, xmax = 35.5)
        h.Write()
    logger.info('Closing all files...')
    rootFile.Close()
    outFile.close()
    sumFile.close()
    logger.info('DST created in %.3f s.' % chrono.stop())
    listTemp()
    logger.info('Returning DST path: %s...' % dstFilePath)
    return dstFilePath
示例#17
0
def dstDataFolder(station, date=datetime.date.today()):
    """ Return the base folder for the DST products.
    """
    return os.path.join(E3PIPE_RECON_BASE, station, date2str(date))