Exemplo n.º 1
0
def fixSubsetCol( var, nSorts = 1, nEta = 1, nEt = 1, level = None ):
  """
    Helper method to correct variable to be a looping bound collection
    correctly represented by a LoopingBoundsCollection instance.
  """
  tree_types = (SubsetGeneratorCollection, SubsetGeneratorPatterns, list, tuple )
  try: 
    # Retrieve collection maximum depth
    _, _, _, _, depth = traverse(var, tree_types = tree_types).next()
  except GeneratorExit:
    depth = 0
  if depth < 5:
    if depth == 0:
      var = [[[[var]]]]
    elif depth == 1:
      var = [[[var]]]
    elif depth == 2:
      var = [[var]]
    elif depth == 3:
      var = [var]
    # We also want to be sure that they are in correct type and correct size:
    from RingerCore import inspect_list_attrs
    var = inspect_list_attrs(var, 3, SubsetGeneratorPatterns  , tree_types = tree_types,                                level = level   )
    var = inspect_list_attrs(var, 2, SubsetGeneratorCollection, tree_types = tree_types, dim = nSorts, name = "nSorts",                 )
    var = inspect_list_attrs(var, 1, SubsetGeneratorCollection, tree_types = tree_types, dim = nEta,   name = "nEta",                   )
    var = inspect_list_attrs(var, 0, SubsetGeneratorCollection, tree_types = tree_types, dim = nEt,    name = "nEt",    deepcopy = True )
  else:
    raise ValueError("subset generator dimensions size is larger than 5.")

  return var
Exemplo n.º 2
0
def select(fl, filters, popListInCaseOneItem=True):
    """
  Return a selection from fl maching f

  WARNING: This selection method retrieves the same string contained in fl
  if it matches two different filters.
  """
    try:
        iter(filters)
        if isinstance(filters, basestring): raise Exception
    except:
        filters = [filters]
    ret = []
    from RingerCore import traverse
    for filt in filters:
        taken = filter(lambda obj: type(obj) in (str, unicode) and filt in obj,
                       traverse(fl, simple_ret=True))
        ret.append(taken)
    if popListInCaseOneItem and len(ret) == 1: ret = ret[0]
    return ret
Exemplo n.º 3
0
     [0.9892, 0.9869, 0.9431, 0.9834, 0.9633],
     [0.9914, 0.9904, 0.9724, 0.9885, 0.9680],
     [0.9930, 0.9921, 0.9778, 0.9763, 0.9533],
     [0.9938, 0.9933, 0.9794, 0.9925, 0.9826]]) * 100.

#etaBins      = [0, 0.8]

#for ref in (veryloose20160701, loose20160701, medium20160701, tight20160701):
ref = tight20160701
from RingerCore import traverse
pdrefs = ref
#print pdrefs
pfrefs = np.array([[0.05] * len(etaBins)] * len(etBins)) * 100.  # 3 5 7 10
efficiencyValues = np.array([
    np.array([refs])
    for refs in zip(traverse(pdrefs, tree_types=(np.ndarray), simple_ret=True),
                    traverse(pfrefs, tree_types=(np.ndarray), simple_ret=True))
]).reshape(pdrefs.shape + (2, ))

basePath = '/eos/user/j/jodafons/CERN-DATA/data/data17_13TeV/'
sgnInputFile = 'EGAM1'
bkgInputFile = 'EGAM7'
outputFile = 'sample'
treePath = ["*/HLT/Physval/Egamma/probes", "*/HLT/Physval/Egamma/fakes"]

import os.path
from TuningTools import Reference, RingerOperation, Detector
from TuningTools import createData
from RingerCore import LoggingLevel
from TuningTools.dataframe import Dataframe
from RingerCore.Configure import Development
Exemplo n.º 4
0
  def __call__( self, fList, ringerOperation, **kw):
    """
      Read ntuple and return patterns and efficiencies.
      Arguments:
        - fList: The file path or file list path. It can be an argument list of
        two types:
          o List: each element is a string path to the file;
          o Comma separated string: each path is separated via a comma
          o Folders: Expand folders recursively adding also files within them to analysis
        - ringerOperation: Set Operation type. It can be both a string or the
          RingerOperation
      Optional arguments:
        - filterType [None]: whether to filter. Use FilterType enumeration
        - reference [Truth]: set reference for targets. Use Reference enumeration
        - treePath [Set using operation]: set tree name on file, this may be set to
          use different sources then the default.
            Default for:
              o Offline: Offline/Egamma/Ntuple/electron
              o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH
        - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger
        - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger
        - offEtCut [None]: Set Offline cluster energy cut value
        - nClusters [None]: Read up to nClusters. Use None to run for all clusters.
        - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters.
        - etBins [None]: E_T bins (GeV) where the data should be segmented
        - etaBins [None]: eta bins where the data should be segmented
        - ringConfig [100]: A list containing the number of rings available in the data
          for each eta bin.
        - crossVal [None]: Whether to measure benchmark efficiency splitting it
          by the crossVal-validation datasets
        - extractDet [None]: Which detector to export (use Detector enumeration).
          Defaults are:
            o L2Calo: Calorimetry
            o L2: Tracking
            o Offline: Calorimetry
            o Others: CaloAndTrack
        - standardCaloVariables [False]: Whether to extract standard track variables.
        - useTRT [False]: Whether to export TRT information when dumping track
          variables.
        - supportTriggers [True]: Whether reading data comes from support triggers
    """
    # Offline information branches:
    __offlineBranches = ['el_et',
                         'el_eta',
                         #'el_loose',
                         #'el_medium',
                         #'el_tight',
                         'el_lhLoose',
                         'el_lhMedium',
                         'el_lhTight',
                         'mc_hasMC',
                         'mc_isElectron',
                         'mc_hasZMother',
                         'el_nPileupPrimaryVtx',
                         ]
    # Online information branches
    __onlineBranches = []
    __l2stdCaloBranches = ['trig_L2_calo_et',
                           'trig_L2_calo_eta',
                           'trig_L2_calo_phi',
                           'trig_L2_calo_e237', # rEta
                           'trig_L2_calo_e277', # rEta
                           'trig_L2_calo_fracs1', # F1: fraction sample 1
                           'trig_L2_calo_weta2', # weta2
                           'trig_L2_calo_ehad1', # energy on hadronic sample 1
                           'trig_L2_calo_emaxs1', # eratio
                           'trig_L2_calo_e2tsts1', # eratio
                           'trig_L2_calo_wstot',] # wstot
    __l2trackBranches = [ # Do not add non patter variables on this branch list
                         #'trig_L2_el_pt',
                         #'trig_L2_el_eta',
                         #'trig_L2_el_phi',
                         #'trig_L2_el_caloEta',
                         #'trig_L2_el_charge',
                         #'trig_L2_el_nTRTHits',
                         #'trig_L2_el_nTRTHiThresholdHits',
                         'trig_L2_el_etOverPt',
                         'trig_L2_el_trkClusDeta',
                         'trig_L2_el_trkClusDphi',]
    # Retrieve information from keyword arguments
    filterType            = retrieve_kw(kw, 'filterType',            FilterType.DoNotFilter )
    reference             = retrieve_kw(kw, 'reference',             Reference.Truth        )
    l1EmClusCut           = retrieve_kw(kw, 'l1EmClusCut',           None                   )
    l2EtCut               = retrieve_kw(kw, 'l2EtCut',               None                   )
    efEtCut               = retrieve_kw(kw, 'efEtCut',               None                   )
    offEtCut              = retrieve_kw(kw, 'offEtCut',              None                   )
    treePath              = retrieve_kw(kw, 'treePath',              None                   )
    nClusters             = retrieve_kw(kw, 'nClusters',             None                   )
    getRates              = retrieve_kw(kw, 'getRates',              True                   )
    getRatesOnly          = retrieve_kw(kw, 'getRatesOnly',          False                  )
    etBins                = retrieve_kw(kw, 'etBins',                None                   )
    etaBins               = retrieve_kw(kw, 'etaBins',               None                   )
    crossVal              = retrieve_kw(kw, 'crossVal',              None                   )
    ringConfig            = retrieve_kw(kw, 'ringConfig',            100                    )
    extractDet            = retrieve_kw(kw, 'extractDet',            None                   )
    standardCaloVariables = retrieve_kw(kw, 'standardCaloVariables', False                  )
    useTRT                = retrieve_kw(kw, 'useTRT',                False                  )
    supportTriggers       = retrieve_kw(kw, 'supportTriggers',       True                   )
    monitoring            = retrieve_kw(kw, 'monitoring',            None                   )
    pileupRef             = retrieve_kw(kw, 'pileupRef',             NotSet                 )
    import ROOT, pkgutil
    #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C");
    #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C')
    if not( bool( pkgutil.find_loader( 'libTuningTools' ) ) and ROOT.gSystem.Load('libTuningTools') >= 0 ) and \
       not( bool( pkgutil.find_loader( 'libTuningToolsLib' ) ) and ROOT.gSystem.Load('libTuningToolsLib') >= 0 ):
        #ROOT.gSystem.Load('libTuningToolsPythonLib') < 0:
      self._fatal("Could not load TuningTools library", ImportError)

    if 'level' in kw: self.level = kw.pop('level')
    # and delete it to avoid mistakes:
    checkForUnusedVars( kw, self._warning )
    del kw
    ### Parse arguments
    # Mutual exclusive arguments:
    if not getRates and getRatesOnly:
      self._logger.error("Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True.")
      getRates = True
    # Also parse operation, check if its type is string and if we can
    # transform it to the known operation enum:
    fList = csvStr2List ( fList )
    fList = expandFolders( fList )
    ringerOperation = RingerOperation.retrieve(ringerOperation)
    reference = Reference.retrieve(reference)
    if isinstance(l1EmClusCut, str):
      l1EmClusCut = float(l1EmClusCut)
    if l1EmClusCut:
      l1EmClusCut = 1000.*l1EmClusCut # Put energy in MeV
      __onlineBranches.append( 'trig_L1_emClus'  )
    if l2EtCut:
      l2EtCut = 1000.*l2EtCut # Put energy in MeV
      __onlineBranches.append( 'trig_L2_calo_et' )
    if efEtCut:
      efEtCut = 1000.*efEtCut # Put energy in MeV
      __onlineBranches.append( 'trig_EF_calo_et' )
    if offEtCut:
      offEtCut = 1000.*offEtCut # Put energy in MeV
      __offlineBranches.append( 'el_et' )
    if not supportTriggers:
      __onlineBranches.append( 'trig_L1_accept' )
    # Check if treePath is None and try to set it automatically
    if treePath is None:
      treePath = 'Offline/Egamma/Ntuple/electron' if ringerOperation < 0 else \
                 'Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH'
    # Check whether using bins
    useBins=False; useEtBins=False; useEtaBins=False
    nEtaBins = 1; nEtBins = 1
    # Set the detector which we should extract the information:
    if extractDet is None:
      if ringerOperation < 0:
        extractDet = Detector.Calorimetry
      elif ringerOperation is RingerOperation.L2Calo:
        extractDet = Detector.Calorimetry
      elif ringerOperation is RingerOperation.L2:
        extractDet = Detector.Tracking
      else:
        extractDet = Detector.CaloAndTrack
    else:
      extractDet = Detector.retrieve( extractDet )

    if etaBins is None: etaBins = npCurrent.fp_array([])
    if type(etaBins) is list: etaBins=npCurrent.fp_array(etaBins)
    if etBins is None: etBins = npCurrent.fp_array([])
    if type(etBins) is list: etBins=npCurrent.fp_array(etBins)

    if etBins.size:
      etBins = etBins * 1000. # Put energy in MeV
      nEtBins  = len(etBins)-1
      if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max:
        self._fatal(('Number of et bins (%d) is larger or equal than maximum '
            'integer precision can hold (%d). Increase '
            'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtBins,
            np.iinfo(npCurrent.scounter_dtype).max)
      # Flag that we are separating data through bins
      useBins=True
      useEtBins=True
      self._debug('E_T bins enabled.')

    if not type(ringConfig) is list and not type(ringConfig) is np.ndarray:
      ringConfig = [ringConfig] * (len(etaBins) - 1) if etaBins.size else 1
    if type(ringConfig) is list: ringConfig=npCurrent.int_array(ringConfig)
    if not len(ringConfig):
      self._fatal('Rings size must be specified.');

    if etaBins.size:
      nEtaBins = len(etaBins)-1
      if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max:
        self._fatal(('Number of eta bins (%d) is larger or equal than maximum '
            'integer precision can hold (%d). Increase '
            'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'), nEtaBins,
            np.iinfo(npCurrent.scounter_dtype).max)
      if len(ringConfig) != nEtaBins:
        self._fatal(('The number of rings configurations (%r) must be equal than '
                            'eta bins (%r) region config'),ringConfig, etaBins)
      useBins=True
      useEtaBins=True
      self._debug('eta bins enabled.')
    else:
      self._debug('eta/et bins disabled.')

    ### Prepare to loop:
    # Open root file
    t = ROOT.TChain(treePath)
    for inputFile in progressbar(fList, len(fList),
                                 logger = self._logger,
                                 prefix = "Creating collection tree "):

      # Check if file exists
      f  = ROOT.TFile.Open(inputFile, 'read')
      if not f or f.IsZombie():
        self._warning('Couldn''t open file: %s', inputFile)
        continue
      # Inform user whether TTree exists, and which options are available:
      self._debug("Adding file: %s", inputFile)
      obj = f.Get(treePath)
      if not obj:
        self._warning("Couldn't retrieve TTree (%s)!", treePath)
        self._info("File available info:")
        f.ReadAll()
        f.ReadKeys()
        f.ls()
        continue
      elif not isinstance(obj, ROOT.TTree):
        self._fatal("%s is not an instance of TTree!", treePath, ValueError)
      t.Add( inputFile )

    # Turn all branches off.
    t.SetBranchStatus("*", False)

    # RingerPhysVal hold the address of required branches
    event = ROOT.RingerPhysVal()

    # Add offline branches, these are always needed
    cPos = 0
    for var in __offlineBranches:
      self.__setBranchAddress(t,var,event)

    # Add online branches if using Trigger
    if ringerOperation > 0:
      for var in __onlineBranches:
        self.__setBranchAddress(t,var,event)


    ## Allocating memory for the number of entries
    entries = t.GetEntries()
    nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \
                                                                else nClusters

    ## Retrieve the dependent operation variables:
    if useEtBins:
      etBranch = 'el_et' if ringerOperation < 0 else 'trig_L2_calo_et'
      self.__setBranchAddress(t,etBranch,event)
      self._debug("Added branch: %s", etBranch)
      if not getRatesOnly:
        npEt    = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs))
        self._debug("Allocated npEt    with size %r", npEt.shape)

    if useEtaBins:
      etaBranch    = "el_eta" if ringerOperation < 0 else "trig_L2_calo_eta"
      self.__setBranchAddress(t,etaBranch,event)
      self._debug("Added branch: %s", etaBranch)
      if not getRatesOnly:
        npEta    = npCurrent.scounter_zeros(shape=npCurrent.shape(npat = 1, nobs = nobs))
        self._debug("Allocated npEta   with size %r", npEta.shape)

    # The base information holder, such as et, eta and pile-up
    if pileupRef is NotSet:
      if ringerOperation > 0:
        pileupRef = PileupReference.avgmu
      else:
        pileupRef = PileupReference.nvtx

    pileupRef = PileupReference.retrieve( pileupRef )

    self._info("Using '%s' as pile-up reference.", PileupReference.tostring( pileupRef ) )

    if pileupRef is PileupReference.nvtx:
      pileupBranch = 'el_nPileupPrimaryVtx'
      pileupDataType = np.uint16
    elif pileupRef is PileupReference.avgmu:
      pileupBranch = 'avgmu'
      pileupDataType = np.float32
    else:
      raise NotImplementedError("Pile-up reference %r is not implemented." % pileupRef)
    baseInfoBranch = BaseInfo((etBranch, etaBranch,  pileupBranch, 'el_phi' if ringerOperation < 0 else 'trig_L2_el_phi',),
                              (npCurrent.fp_dtype, npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType) )
    baseInfo = [None, ] * baseInfoBranch.nInfo

    # Make sure all baseInfoBranch information is available:
    for idx in baseInfoBranch:
      self.__setBranchAddress(t,baseInfoBranch.retrieveBranch(idx),event)

    # Allocate numpy to hold as many entries as possible:
    if not getRatesOnly:
      # Retrieve the rings information depending on ringer operation
      ringerBranch = "el_ringsE" if ringerOperation < 0 else \
                     "trig_L2_calo_rings"
      self.__setBranchAddress(t,ringerBranch,event)
      if ringerOperation > 0:
        if ringerOperation is RingerOperation.L2:
          for var in __l2trackBranches:
            self.__setBranchAddress(t,var,event)
      if standardCaloVariables:
        if ringerOperation in (RingerOperation.L2, RingerOperation.L2Calo,):
          for var in __l2stdCaloBranches:
            self.__setBranchAddress(t, var, event)
        else:
          self._warning("Unknown standard calorimeters for Operation:%s. Setting operation back to use rings variables.",
                               RingerOperation.tostring(ringerOperation))
      t.GetEntry(0)
      npat = 0
      if extractDet in (Detector.Calorimetry,
                        Detector.CaloAndTrack,
                        Detector.All):
        if standardCaloVariables:
          npat+= 6
        else:
          npat += ringConfig.max()
      if extractDet in (Detector.Tracking,
                       Detector.CaloAndTrack,
                       Detector.All):
        if ringerOperation is RingerOperation.L2:
          if useTRT:
            self._info("Using TRT information!")
            npat += 2
            __l2trackBranches.append('trig_L2_el_nTRTHits')
            __l2trackBranches.append('trig_L2_el_nTRTHiThresholdHits')
          npat += 3
          for var in __l2trackBranches:
            self.__setBranchAddress(t,var,event)
          self.__setBranchAddress(t,"trig_L2_el_pt",event)
        elif ringerOperation < 0: # Offline
          self._warning("Still need to implement tracking for the ringer offline.")
      npPatterns = npCurrent.fp_zeros( shape=npCurrent.shape(npat=npat, #getattr(event, ringerBranch).size()
                                                   nobs=nobs)
                                     )
      self._debug("Allocated npPatterns with size %r", npPatterns.shape)

      # Add E_T, eta and luminosity information
      npBaseInfo = [npCurrent.zeros( shape=npCurrent.shape(npat=1, nobs=nobs ), dtype=baseInfoBranch.dtype(idx) )
                                    for idx in baseInfoBranch]
    else:
      npPatterns = npCurrent.fp_array([])
      npBaseInfo = [deepcopy(npCurrent.fp_array([])) for _ in baseInfoBranch]

    ## Allocate the branch efficiency collectors:
    if getRates:
      if ringerOperation < 0:
        benchmarkDict = OrderedDict(
          [(  RingerOperation.Offline_CutBased_Loose  , 'el_loose'            ),
           (  RingerOperation.Offline_CutBased_Medium , 'el_medium'           ),
           (  RingerOperation.Offline_CutBased_Tight  , 'el_tight'            ),
           (  RingerOperation.Offline_LH_Loose        , 'el_lhLoose'          ),
           (  RingerOperation.Offline_LH_Medium       , 'el_lhMedium'         ),
           (  RingerOperation.Offline_LH_Tight        , 'el_lhTight'          ),
          ])
      else:
        benchmarkDict = OrderedDict(
          [( RingerOperation.L2Calo                  , 'trig_L2_calo_accept' ),
           ( RingerOperation.L2                      , 'trig_L2_el_accept'   ),
           ( RingerOperation.EFCalo                  , 'trig_EF_calo_accept' ),
           ( RingerOperation.HLT                     , 'trig_EF_el_accept'   ),
          ])


      from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector
      branchEffCollectors = OrderedDict()
      branchCrossEffCollectors = OrderedDict()
      for key, val in benchmarkDict.iteritems():
        branchEffCollectors[key] = list()
        branchCrossEffCollectors[key] = list()
        # Add efficincy branch:
        if getRates or getRatesOnly:
          self.__setBranchAddress(t,val,event)
        for etBin in range(nEtBins):
          if useBins:
            branchEffCollectors[key].append(list())
            branchCrossEffCollectors[key].append(list())
          for etaBin in range(nEtaBins):
            etBinArg = etBin if useBins else -1
            etaBinArg = etaBin if useBins else -1
            argList = [ RingerOperation.tostring(key), val, etBinArg, etaBinArg ]
            branchEffCollectors[key][etBin].append(BranchEffCollector( *argList ) )
            if crossVal:
              branchCrossEffCollectors[key][etBin].append(BranchCrossEffCollector( entries, crossVal, *argList ) )
          # etBin
        # etaBin
      # benchmark dict
      if self._logger.isEnabledFor( LoggingLevel.DEBUG ):
        self._debug( 'Retrieved following branch efficiency collectors: %r',
            [collector[0].printName for collector in traverse(branchEffCollectors.values())])
    # end of (getRates)

    etaBin = 0; etBin = 0
    step = int(entries/100) if int(entries/100) > 0 else 1
    ## Start loop!
    self._info("There is available a total of %d entries.", entries)

    for entry in progressbar(range(entries), entries,
                             step = step, logger = self._logger,
                             prefix = "Looping over entries "):

      #self._verbose('Processing eventNumber: %d/%d', entry, entries)
      t.GetEntry(entry)

      # Check if it is needed to remove energy regions (this means that if not
      # within this range, it will be ignored as well for efficiency measuremnet)
      if event.el_et < offEtCut:
        self._verbose("Ignoring entry due to offline E_T cut.")
        continue
      # Add et distribution for all events

      if not monitoring is None:
        # Book all distribtions before the event selection
        self.__fillHistograms(monitoring,filterType,event,False)

      if ringerOperation > 0:
        # Remove events which didn't pass L1_calo
        if not supportTriggers and not event.trig_L1_accept:
          #self._verbose("Ignoring entry due to L1Calo cut (trig_L1_accept = %r).", event.trig_L1_accept)
          continue
        if event.trig_L1_emClus  < l1EmClusCut:
          #self._verbose("Ignoring entry due to L1Calo E_T cut (%d < %r).", event.trig_L1_emClus, l1EmClusCut)
          continue
        if event.trig_L2_calo_et < l2EtCut:
          #self._verbose("Ignoring entry due to L2Calo E_T cut.")
          continue
        if  efEtCut is not None and event.trig_L2_calo_accept :
          # EF calo is a container, search for electrons objects with et > cut
          trig_EF_calo_et_list = stdvector_to_list(event.trig_EF_calo_et)
          found=False
          for v in trig_EF_calo_et_list:
            if v < efEtCut:  found=True
          if found:
            #self._verbose("Ignoring entry due to EFCalo E_T cut.")
            continue

      # Set discriminator target:
      target = Target.Unknown
      if reference is Reference.Truth:
        if event.mc_isElectron and event.mc_hasZMother:
          target = Target.Signal
        elif not (event.mc_isElectron and (event.mc_hasZMother or event.mc_hasWMother) ):
          target = Target.Background
      elif reference is Reference.Off_Likelihood:
        if event.el_lhTight: target = Target.Signal
        elif not event.el_lhLoose: target = Target.Background
      elif reference is Reference.AcceptAll:
        target = Target.Signal if filterType is FilterType.Signal else Target.Background
      else:
        if event.el_tight: target = Target.Signal
        elif not event.el_loose: target = Target.Background

      # Run filter if it is defined
      if filterType and \
         ( (filterType is FilterType.Signal and target != Target.Signal) or \
           (filterType is FilterType.Background and target != Target.Background) or \
           (target == Target.Unknown) ):
        #self._verbose("Ignoring entry due to filter cut.")
        continue

      # Add et distribution for all events
      if not monitoring is None:
        # Book all distributions after the event selection
        self.__fillHistograms(monitoring,filterType,event,True)

      # Retrieve base information:
      for idx in baseInfoBranch:
        lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx))
        baseInfo[idx] = lInfo
        if not getRatesOnly: npBaseInfo[idx][cPos] = lInfo
      # Retrieve dependent operation region
      if useEtBins:
        etBin  = self.__retrieveBinIdx( etBins, baseInfo[0] )
      if useEtaBins:
        etaBin = self.__retrieveBinIdx( etaBins, np.fabs( baseInfo[1]) )


      # Check if bin is within range (when not using bins, this will always be true):
      if (etBin < nEtBins and etaBin < nEtaBins):
        # Retrieve patterns:
        if not getRatesOnly:
          if useEtBins:  npEt[cPos] = etBin
          if useEtaBins: npEta[cPos] = etaBin
          ## Retrieve calorimeter information:
          cPat = 0
          caloAvailable = True
          if extractDet in (Detector.Calorimetry,
                           Detector.CaloAndTrack,
                           Detector.All):
            if standardCaloVariables:
              patterns = []
              if ringerOperation is RingerOperation.L2Calo:
                from math import cosh
                cosh_eta = cosh( event.trig_L2_calo_eta )
                # second layer ratio between 3x7 7x7
                rEta = event.trig_L2_calo_e237 / event.trig_L2_calo_e277
                base = event.trig_L2_calo_emaxs1 + event.trig_L2_calo_e2tsts1
                # Ratio between first and second highest energy cells
                eRatio = ( event.trig_L2_calo_emaxs1 - event.trig_L2_calo_e2tsts1 ) / base if base > 0 else 0
                # ratio of energy in the first layer (hadronic particles should leave low energy)
                F1 = event.trig_L2_calo_fracs1 / ( event.trig_L2_calo_et * cosh_eta )
                # weta2 is calculated over the middle layer using 3 x 5
                weta2 = event.trig_L2_calo_weta2
                # wstot is calculated over the first layer using (typically) 20 strips
                wstot = event.trig_L2_calo_wstot
                # ratio between EM cluster and first hadronic layers:
                Rhad1 = ( event.trig_L2_calo_ehad1 / cosh_eta ) / event.trig_L2_calo_et
                # allocate patterns:
                patterns = [rEta, eRatio, F1, weta2, wstot, Rhad1]
                for pat in patterns:
                  npPatterns[npCurrent.access( pidx=cPat, oidx=cPos) ] = pat
                  cPat += 1
              # end of ringerOperation
            else:
              # Remove events without rings
              if getattr(event,ringerBranch).empty():
                caloAvailable = False
              # Retrieve rings:
              if caloAvailable:
                try:
                  patterns = stdvector_to_list( getattr(event,ringerBranch) )
                  lPat = len(patterns)
                  if lPat == ringConfig[etaBin]:
                    npPatterns[npCurrent.access(pidx=slice(cPat,ringConfig[etaBin]),oidx=cPos)] = patterns
                  else:
                    oldEtaBin = etaBin
                    if etaBin > 0 and ringConfig[etaBin - 1] == lPat:
                      etaBin -= 1
                    elif etaBin + 1 < len(ringConfig) and ringConfig[etaBin + 1] == lPat:
                      etaBin += 1
                    npPatterns[npCurrent.access(pidx=slice(cPat, ringConfig[etaBin]),oidx=cPos)] = patterns
                    self._warning(("Recovered event which should be within eta bin (%d: %r) "
                                          "but was found to be within eta bin (%d: %r). "
                                          "Its read eta value was of %f."),
                                          oldEtaBin, etaBins[oldEtaBin:oldEtaBin+2],
                                          etaBin, etaBins[etaBin:etaBin+2],
                                          np.fabs( getattr(event,etaBranch)))
                except ValueError:
                  self._logger.error(("Patterns size (%d) do not match expected "
                                    "value (%d). This event eta value is: %f, and ringConfig is %r."),
                                    lPat, ringConfig[etaBin], np.fabs( getattr(event,etaBranch)), ringConfig
                                    )
                  continue
              else:
                if extractDet is Detector.Calorimetry:
                  # Also display warning when extracting only calorimetry!
                  self._warning("Rings not available")
                  continue
                self._warning("Rings not available")
                continue
              cPat += ringConfig.max()
            # which calo variables
          # end of (extractDet needed calorimeter)
          # And track information:
          if extractDet in (Detector.Tracking,
                           Detector.CaloAndTrack,
                           Detector.All):
            if caloAvailable or extractDet is Detector.Tracking:
              if ringerOperation is RingerOperation.L2:
                # Retrieve nearest deta/dphi only, so we need to find each one is the nearest:
                if event.trig_L2_el_trkClusDeta.size():
                  clusDeta = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDeta ) )
                  clusDphi = npCurrent.fp_array( stdvector_to_list( event.trig_L2_el_trkClusDphi ) )
                  bestTrackPos = int( np.argmin( clusDeta**2 + clusDphi**2 ) )
                  for var in __l2trackBranches:
                    npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = getattr(event, var)[bestTrackPos]
                    cPat += 1
                else:
                  #self._verbose("Ignoring entry due to track information not available.")
                  continue
                  #for var in __l2trackBranches:
                  #  npPatterns[npCurrent.access( pidx=cPat,oidx=cPos) ] = np.nan
                  #  cPat += 1
              elif ringerOperation < 0: # Offline
                pass
            # caloAvailable or only tracking
          # end of (extractDet needs tracking)
        # end of (getRatesOnly)

        ## Retrieve rates information:
        if getRates:
          for branch in branchEffCollectors.itervalues():
            if not useBins:
              branch.update(event)
            else:
              branch[etBin][etaBin].update(event)
          if crossVal:
            for branchCross in branchCrossEffCollectors.itervalues():
              if not useBins:
                branchCross.update(event)
              else:
                branchCross[etBin][etaBin].update(event)
        # end of (getRates)

        # We only increment if this cluster will be computed
        cPos += 1
      # end of (et/eta bins)

      # Limit the number of entries to nClusters if desired and possible:
      if not nClusters is None and cPos >= nClusters:
        break
    # for end

    ## Treat the rings information
    if not getRatesOnly:

      ## Remove not filled reserved memory space:
      if npPatterns.shape[npCurrent.odim] > cPos:
        npPatterns = np.delete( npPatterns, slice(cPos,None), axis = npCurrent.odim)

      ## Segment data over bins regions:
      # Also remove not filled reserved memory space:
      if useEtBins:
        npEt  = npCurrent.delete( npEt, slice(cPos,None))
      if useEtaBins:
        npEta = npCurrent.delete( npEta, slice(cPos,None))
      # Treat
      npObject = self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins,
                                  nEtBins, nEtaBins, standardCaloVariables, ringConfig,
                                  npPatterns, )
      data = [self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins,
                                                      nEtBins, nEtaBins, standardCaloVariables, ringConfig,
                                                      npData) for npData in npBaseInfo]
      npBaseInfo = npCurrent.array( data, dtype=np.object )
    else:
      npObject = npCurrent.array([], dtype=npCurrent.dtype)
    # not getRatesOnly

    if getRates:
      if crossVal:
        for etBin in range(nEtBins):
          for etaBin in range(nEtaBins):
            for branchCross in branchCrossEffCollectors.itervalues():
              if not useBins:
                branchCross.finished()
              else:
                branchCross[etBin][etaBin].finished()

      # Print efficiency for each one for the efficiency branches analysed:
      for etBin in range(nEtBins) if useBins else range(1):
        for etaBin in range(nEtaBins) if useBins else range(1):
          for branch in branchEffCollectors.itervalues():
            lBranch = branch if not useBins else branch[etBin][etaBin]
            self._info('%s',lBranch)
          if crossVal:
            for branchCross in branchCrossEffCollectors.itervalues():
              lBranchCross = branchCross if not useBins else branchCross[etBin][etaBin]
              lBranchCross.dump(self._debug, printSort = True,
                                 sortFcn = self._verbose)
          # for branch
        # for eta
      # for et
    # end of (getRates)

    outputs = []
    #if not getRatesOnly:
    outputs.extend((npObject, npBaseInfo))
    #if getRates:
    outputs.extend((branchEffCollectors, branchCrossEffCollectors))
    #outputs = tuple(outputs)
    return outputs
Exemplo n.º 5
0
from zipfile import BadZipfile
from copy import deepcopy

for f in files:
    logger.info("Turning numpy matrix file '%s' into pre-processing file...", f)
    fileparts = f.split("/")
    folder = "/".join(fileparts[0:-1]) + "/"
    fname = fileparts[-1]
    try:
        data = dict(load(f))
    except BadZipfile, e:
        logger.warning("Couldn't load file '%s'. Reason:\n%s", f, str(e))
        continue
    logger.debug("Finished loading file '%s'...", f)
    for key in data:
        if key == "W":
            ppCol = deepcopy(data["W"])
            from TuningTools.PreProc import *

            for obj, idx, parent, _, _ in traverse(ppCol, tree_types=(np.ndarray,), max_depth=3):
                parent[idx] = PreProcChain(RemoveMean(), Projection(matrix=obj), UnitaryRMS())
            # Turn arrays into mutable objects:
            ppCol = ppCol.tolist()
            ppCol = fixPPCol(ppCol, len(ppCol[0][0]), len(ppCol[0]), len(ppCol))
    if fname.endswith(".npz"):
        fname = fname[:-4]
    newFilePath = folder + fname + ".pic"
    logger.info('Saving to: "%s"...', newFilePath)
    place = PreProcArchieve(newFilePath, ppCol=ppCol).save(compress=False)
    logger.info("File saved at path: '%s'", place)
Exemplo n.º 6
0
    def __call__(self, fList, ringerOperation, **kw):
        """
      Read ntuple and return patterns and efficiencies.
      Arguments:
        - fList: The file path or file list path. It can be an argument list of 
        two types:
          o List: each element is a string path to the file;
          o Comma separated string: each path is separated via a comma
          o Folders: Expand folders recursively adding also files within them to analysis
        - ringerOperation: Set Operation type. It can be both a string or the
          RingerOperation
      Optional arguments:
        - filterType [None]: whether to filter. Use FilterType enumeration
        - reference [Truth]: set reference for targets. Use Reference enumeration
        - treePath [Set using operation]: set tree name on file, this may be set to
          use different sources then the default.
            Default for:
              o Offline: Offline/Egamma/Ntuple/electron
              o L2: Trigger/HLT/Egamma/TPNtuple/e24_medium_L1EM18VH
        - l1EmClusCut [None]: Set L1 cluster energy cut if operating on the trigger
        - l2EtCut [None]: Set L2 cluster energy cut value if operating on the trigger
        - offEtCut [None]: Set Offline cluster energy cut value
        - nClusters [None]: Read up to nClusters. Use None to run for all clusters.
        - getRatesOnly [False]: Read up to nClusters. Use None to run for all clusters.
        - etBins [None]: E_T bins (GeV) where the data should be segmented
        - etaBins [None]: eta bins where the data should be segmented
        - ringConfig [100]: A list containing the number of rings available in the data
          for each eta bin.
        - crossVal [None]: Whether to measure benchmark efficiency splitting it
          by the crossVal-validation datasets
        - extractDet [None]: Which detector to export (use Detector enumeration).
          Defaults are:
            o L2Calo: Calorimetry
            o L2: Tracking
            o Offline: Calorimetry
            o Others: CaloAndTrack
        - standardCaloVariables [False]: Whether to extract standard track variables.
        - useTRT [False]: Whether to export TRT information when dumping track
          variables.
        - supportTriggers [True]: Whether reading data comes from support triggers
    """

        __eventBranches = [
            'EventNumber', 'RunNumber', 'RandomRunNumber', 'MCChannelNumber',
            'RandomLumiBlockNumber', 'MCPileupWeight', 'VertexZPosition',
            'Zcand_M', 'Zcand_pt', 'Zcand_eta', 'Zcand_phi', 'Zcand_y',
            'isTagTag'
        ]

        __trackBranches = [
            'elCand2_deltaeta1', 'elCand2_DeltaPOverP',
            'elCand2_deltaphiRescaled', 'elCand2_d0significance',
            'elCand2_trackd0pvunbiased', 'elCand2_eProbabilityHT'
        ]

        __monteCarloBranches = [
            'type',
            'origin',
            'originbkg',
            'typebkg',
            'isTruthElectronFromZ',
            'TruthParticlePdgId',
            'firstEgMotherPdgId',
            'TruthParticleBarcode',
            'firstEgMotherBarcode',
            'MotherPdgId',
            'MotherBarcode',
            'FirstEgMotherTyp',
            'FirstEgMotherOrigin',
            'dRPdgId',
        ]

        __onlineBranches = ['match', 'ringerMatch', 'ringer_rings']

        __offlineBranches = ['et', 'eta']

        # The current pid map used as offline reference
        pidConfigs = {
            key: value
            for key, value in RingerOperation.efficiencyBranches().iteritems()
            if key in (RingerOperation.Offline_LH_Tight,
                       RingerOperation.Offline_LH_Medium,
                       RingerOperation.Offline_LH_Loose,
                       RingerOperation.Offline_LH_VeryLoose)
        }

        # Retrieve information from keyword arguments
        filterType = retrieve_kw(kw, 'filterType', FilterType.DoNotFilter)
        reference = retrieve_kw(kw, 'reference', Reference.AcceptAll)
        offEtCut = retrieve_kw(kw, 'offEtCut', None)
        l2EtCut = retrieve_kw(kw, 'l2EtCut', None)
        treePath = retrieve_kw(kw, 'treePath', 'ZeeCandidate')
        nClusters = retrieve_kw(kw, 'nClusters', None)
        etBins = retrieve_kw(kw, 'etBins', None)
        etaBins = retrieve_kw(kw, 'etaBins', None)
        crossVal = retrieve_kw(kw, 'crossVal', None)
        ringConfig = retrieve_kw(kw, 'ringConfig', 100)
        monitoring = retrieve_kw(kw, 'monitoring', None)
        pileupRef = retrieve_kw(kw, 'pileupRef', NotSet)
        getRates = retrieve_kw(kw, 'getRates', True)
        getRatesOnly = retrieve_kw(kw, 'getRatesOnly', False)
        getTagsOnly = retrieve_kw(kw, 'getTagsOnly', False)
        extractDet = retrieve_kw(kw, 'extractDet', None)

        import ROOT
        #gROOT.ProcessLine (".x $ROOTCOREDIR/scripts/load_packages.C");
        #ROOT.gROOT.Macro('$ROOTCOREDIR/scripts/load_packages.C')
        if ROOT.gSystem.Load('libTuningTools') < 0:
            self._fatal("Could not load TuningTools library", ImportError)

        if 'level' in kw: self.level = kw.pop('level')
        # and delete it to avoid mistakes:
        checkForUnusedVars(kw, self._warning)
        del kw

        ### Parse arguments
        # Also parse operation, check if its type is string and if we can
        # transform it to the known operation enum:
        fList = csvStr2List(fList)
        fList = expandFolders(fList)
        ringerOperation = RingerOperation.retrieve(ringerOperation)
        reference = Reference.retrieve(reference)

        # Offline E_T cut
        if offEtCut:
            offEtCut = 1000. * offEtCut  # Put energy in MeV

        # Check whether using bins
        useBins = False
        useEtBins = False
        useEtaBins = False
        nEtaBins = 1
        nEtBins = 1

        if etaBins is None: etaBins = npCurrent.fp_array([])
        if type(etaBins) is list: etaBins = npCurrent.fp_array(etaBins)
        if etBins is None: etBins = npCurrent.fp_array([])
        if type(etBins) is list: etBins = npCurrent.fp_array(etBins)

        if etBins.size:
            etBins = etBins * 1000.  # Put energy in MeV
            nEtBins = len(etBins) - 1
            if nEtBins >= np.iinfo(npCurrent.scounter_dtype).max:
                self._fatal((
                    'Number of et bins (%d) is larger or equal than maximum '
                    'integer precision can hold (%d). Increase '
                    'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'
                ), nEtBins,
                            np.iinfo(npCurrent.scounter_dtype).max)
            # Flag that we are separating data through bins
            useBins = True
            useEtBins = True
            self._debug('E_T bins enabled.')

        if not type(ringConfig) is list and not type(ringConfig) is np.ndarray:
            ringConfig = [ringConfig] * (len(etaBins) -
                                         1) if etaBins.size else 1
        if type(ringConfig) is list:
            ringConfig = npCurrent.int_array(ringConfig)
        if not len(ringConfig):
            self._fatal('Rings size must be specified.')

        if etaBins.size:
            nEtaBins = len(etaBins) - 1
            if nEtaBins >= np.iinfo(npCurrent.scounter_dtype).max:
                self._fatal((
                    'Number of eta bins (%d) is larger or equal than maximum '
                    'integer precision can hold (%d). Increase '
                    'TuningTools.coreDef.npCurrent scounter_dtype number of bytes.'
                ), nEtaBins,
                            np.iinfo(npCurrent.scounter_dtype).max)
            if len(ringConfig) != nEtaBins:
                self._fatal((
                    'The number of rings configurations (%r) must be equal than '
                    'eta bins (%r) region config'), ringConfig, etaBins)
            useBins = True
            useEtaBins = True
            self._debug('eta bins enabled.')
        else:
            self._debug('eta/et bins disabled.')

        # The base information holder, such as et, eta and pile-up
        if pileupRef is NotSet:
            if ringerOperation > 0:
                pileupRef = PileupReference.avgmu
            else:
                pileupRef = PileupReference.nvtx

        pileupRef = PileupReference.retrieve(pileupRef)
        self._info("Using '%s' as pile-up reference.",
                   PileupReference.tostring(pileupRef))

        # Candidates: (1) is tags and (2) is probes. Default is probes
        self._candIdx = 1 if getTagsOnly else 2

        # Mutual exclusive arguments:
        if not getRates and getRatesOnly:
            self._logger.error(
                "Cannot run with getRates set to False and getRatesOnly set to True. Setting getRates to True."
            )
            getRates = True

        ### Prepare to loop:
        t = ROOT.TChain(treePath)
        for inputFile in progressbar(fList,
                                     len(fList),
                                     logger=self._logger,
                                     prefix="Creating collection tree "):
            # Check if file exists
            f = ROOT.TFile.Open(inputFile, 'read')
            if not f or f.IsZombie():
                self._warning('Couldn' 't open file: %s', inputFile)
                continue
            # Inform user whether TTree exists, and which options are available:
            self._debug("Adding file: %s", inputFile)
            obj = f.Get(treePath)
            if not obj:
                self._warning("Couldn't retrieve TTree (%s)!", treePath)
                self._info("File available info:")
                f.ReadAll()
                f.ReadKeys()
                f.ls()
                continue
            elif not isinstance(obj, ROOT.TTree):
                self._fatal("%s is not an instance of TTree!", treePath,
                            ValueError)
            t.Add(inputFile)
        # Turn all branches off.
        t.SetBranchStatus("*", False)
        # RingerPhysVal hold the address of required branches
        event = ROOT.SkimmedNtuple()
        # Ready to retrieve the total number of events
        t.GetEntry(0)
        ## Allocating memory for the number of entries
        entries = t.GetEntries()
        nobs = entries if (nClusters is None or nClusters > entries or nClusters < 1) \
                                                                    else nClusters
        ## Retrieve the dependent operation variables:
        if useEtBins:
            etBranch = ('elCand%d_et') % (
                self._candIdx) if ringerOperation < 0 else ('fcCand%d_et') % (
                    self._candIdx)
            self.__setBranchAddress(t, etBranch, event)
            self._debug("Added branch: %s", etBranch)
            npEt = npCurrent.scounter_zeros(
                shape=npCurrent.shape(npat=1, nobs=nobs))
            self._debug("Allocated npEt    with size %r", npEt.shape)

        if useEtaBins:
            etaBranch = ('elCand%d_eta') % (
                self._candIdx) if ringerOperation < 0 else ('fcCand%d_eta') % (
                    self._candIdx)
            self.__setBranchAddress(t, etaBranch, event)
            self._debug("Added branch: %s", etaBranch)
            npEta = npCurrent.scounter_zeros(
                shape=npCurrent.shape(npat=1, nobs=nobs))
            self._debug("Allocated npEta   with size %r", npEta.shape)

        if reference is Reference.Truth:
            self.__setBranchAddress(t, ('elCand%d_isTruthElectronFromZ') %
                                    (self._candIdx), event)

        for var in __offlineBranches:
            self.__setBranchAddress(t, ('elCand%d_%s') % (self._candIdx, var),
                                    event)
        #for var in pidConfigs.values():
        #  self.__setBranchAddress(t,var,event)

        for var in __trackBranches:
            self.__setBranchAddress(t, var, event)

        # Add online branches if using Trigger
        if ringerOperation > 0:
            for var in __onlineBranches:
                self.__setBranchAddress(t,
                                        ('fcCand%d_%s') % (self._candIdx, var),
                                        event)
        else:
            self.__setBranchAddress(t, ('elCand%d_%s') %
                                    (self._candIdx, 'ringer_rings'), event)

        if pileupRef is PileupReference.nvtx:
            pileupBranch = 'Nvtx'
            pileupDataType = np.uint16
        elif pileupRef is PileupReference.avgmu:
            pileupBranch = 'averageIntPerXing'
            pileupDataType = np.float32
        else:
            raise NotImplementedError(
                "Pile-up reference %r is not implemented." % pileupRef)

        #for var in __eventBranches +
        for var in [pileupBranch]:
            self.__setBranchAddress(t, var, event)

        ### Allocate memory
        if extractDet == (Detector.Calorimetry):
            npat = ringConfig.max()
        elif extractDet == (Detector.Tracking):
            npat = len(__trackBranches)
        # NOTE: Check if pat is correct for both Calo and track data
        elif extractDet in (Detector.CaloAndTrack, Detector.All):
            npat = ringConfig.max() + len(__trackBranches)

        npPatterns = npCurrent.fp_zeros(shape=npCurrent.shape(
            npat=npat,  #getattr(event, ringerBranch).size()
            nobs=nobs))
        self._debug("Allocated npPatterns with size %r", npPatterns.shape)

        baseInfoBranch = BaseInfo(
            (etBranch, etaBranch, pileupBranch),
            (npCurrent.fp_dtype, npCurrent.fp_dtype, pileupDataType))

        baseInfo = [
            None,
        ] * baseInfoBranch.nInfo
        # Add E_T, eta and luminosity information
        npBaseInfo = [
            npCurrent.zeros(shape=npCurrent.shape(npat=1, nobs=nobs),
                            dtype=baseInfoBranch.dtype(idx))
            for idx in baseInfoBranch
        ]

        from TuningTools.CreateData import BranchEffCollector, BranchCrossEffCollector
        branchEffCollectors = OrderedDict()
        branchCrossEffCollectors = OrderedDict()

        if ringerOperation < 0:
            from operator import itemgetter
            benchmarkDict = OrderedDict(
                sorted([(key, value) for key, value in
                        RingerOperation.efficiencyBranches().iteritems()
                        if key < 0 and not (isinstance(value, (list, tuple)))],
                       key=itemgetter(0)))
        else:
            benchmarkDict = OrderedDict()

        for key, val in benchmarkDict.iteritems():
            branchEffCollectors[key] = list()
            branchCrossEffCollectors[key] = list()
            # Add efficincy branch:
            if ringerOperation < 0:
                self.__setBranchAddress(t, val, event)

            for etBin in range(nEtBins):
                if useBins:
                    branchEffCollectors[key].append(list())
                    branchCrossEffCollectors[key].append(list())
                for etaBin in range(nEtaBins):
                    etBinArg = etBin if useBins else -1
                    etaBinArg = etaBin if useBins else -1
                    argList = [
                        RingerOperation.tostring(key), val, etBinArg, etaBinArg
                    ]
                    branchEffCollectors[key][etBin].append(
                        BranchEffCollector(*argList))
                    if crossVal:
                        branchCrossEffCollectors[key][etBin].append(
                            BranchCrossEffCollector(entries, crossVal,
                                                    *argList))
                # etBin
            # etaBin
        # benchmark dict

        if self._logger.isEnabledFor(LoggingLevel.DEBUG):
            self._debug(
                'Retrieved following branch efficiency collectors: %r', [
                    collector[0].printName
                    for collector in traverse(branchEffCollectors.values())
                ])

        etaBin = 0
        etBin = 0
        step = int(entries / 100) if int(entries / 100) > 0 else 1

        ## Start loop!
        self._info("There is available a total of %d entries.", entries)
        cPos = 0

        ### Loop over entries
        for entry in progressbar(range(entries),
                                 entries,
                                 step=step,
                                 logger=self._logger,
                                 prefix="Looping over entries "):

            self._verbose('Processing eventNumber: %d/%d', entry, entries)
            t.GetEntry(entry)

            #print self.__getEt(event)
            if event.elCand2_et < offEtCut:
                self._debug(
                    "Ignoring entry due to offline E_T cut. E_T = %1.3f < %1.3f MeV",
                    event.elCand2_et, offEtCut)
                continue
            # Add et distribution for all events

            if ringerOperation > 0:
                if event.fcCand2_et < l2EtCut:
                    self._debug("Ignoring entry due Fast Calo E_T cut.")
                    continue
                # Add et distribution for all events

            # Set discriminator target:
            target = Target.Unknown
            # Monte Carlo cuts
            if reference is Reference.Truth:
                if getattr(event, ('elCand%d_isTruthElectronFromZ') %
                           (self._candIdx)):
                    target = Target.Signal
                elif not getattr(event, ('elCand%d_isTruthElectronFromZ') %
                                 (self._candIdx)):
                    target = Target.Background
            # Offline Likelihood cuts
            elif reference is Reference.Off_Likelihood:
                if getattr(event,
                           pidConfigs[RingerOperation.Offline_LH_Tight]):
                    target = Target.Signal
                elif not getattr(
                        event,
                        pidConfigs[RingerOperation.Offline_LH_VeryLoose]):
                    target = Target.Background
            # By pass everything (Default)
            elif reference is Reference.AcceptAll:
                target = Target.Signal if filterType is FilterType.Signal else Target.Background

            # Run filter if it is defined
            if filterType and \
               ( (filterType is FilterType.Signal and target != Target.Signal) or \
                 (filterType is FilterType.Background and target != Target.Background) or \
                 (target == Target.Unknown) ):
                #self._verbose("Ignoring entry due to filter cut.")
                continue

            ## Retrieve base information and rings:
            for idx in baseInfoBranch:
                lInfo = getattr(event, baseInfoBranch.retrieveBranch(idx))
                baseInfo[idx] = lInfo
            # Retrieve dependent operation region
            if useEtBins:
                etBin = self.__retrieveBinIdx(etBins, baseInfo[0])
            if useEtaBins:
                etaBin = self.__retrieveBinIdx(etaBins, np.fabs(baseInfo[1]))

            # Check if bin is within range (when not using bins, this will always be true):
            if (etBin < nEtBins and etaBin < nEtaBins):

                if useEtBins: npEt[cPos] = etBin
                if useEtaBins: npEta[cPos] = etaBin
                # Online operation
                cPat = 0
                caloAvailable = True
                if ringerOperation > 0 and self.__get_ringer_onMatch(
                        event) < 1:
                    continue
                # TODO Treat case where we don't use rings energy
                # Check if the rings empty
                if self.__get_rings_energy(event, ringerOperation).empty():
                    self._debug(
                        'No rings available in this event. Skipping...')
                    caloAvailable = False

                # Retrieve rings:
                if extractDet in (Detector.Calorimetry, Detector.CaloAndTrack,
                                  Detector.All):
                    if caloAvailable:
                        try:
                            pass
                            patterns = stdvector_to_list(
                                self.__get_rings_energy(
                                    event, ringerOperation))
                            lPat = len(patterns)
                            if lPat == ringConfig[etaBin]:
                                npPatterns[npCurrent.access(
                                    pidx=slice(cPat, ringConfig[etaBin]),
                                    oidx=cPos)] = patterns
                            else:
                                oldEtaBin = etaBin
                                if etaBin > 0 and ringConfig[etaBin -
                                                             1] == lPat:
                                    etaBin -= 1
                                elif etaBin + 1 < len(
                                        ringConfig) and ringConfig[etaBin +
                                                                   1] == lPat:
                                    etaBin += 1
                                npPatterns[npCurrent.access(
                                    pidx=slice(cPat, ringConfig[etaBin]),
                                    oidx=cPos)] = patterns
                                self._warning((
                                    "Recovered event which should be within eta bin (%d: %r) "
                                    "but was found to be within eta bin (%d: %r). "
                                    "Its read eta value was of %f."),
                                              oldEtaBin,
                                              etaBins[oldEtaBin:oldEtaBin + 2],
                                              etaBin,
                                              etaBins[etaBin:etaBin + 2],
                                              np.fabs(getattr(
                                                  event, etaBranch)))
                        except ValueError:
                            self._logger.error((
                                "Patterns size (%d) do not match expected "
                                "value (%d). This event eta value is: %f, and ringConfig is %r."
                            ), lPat, ringConfig[etaBin],
                                               np.fabs(
                                                   getattr(event, etaBranch)),
                                               ringConfig)
                            continue
                        cPat += ringConfig[etaBin]
                    else:
                        # Also display warning when extracting only calorimetry!
                        self._warning("Rings not available")
                        continue

                if extractDet in (Detector.Tracking, Detector.CaloAndTrack,
                                  Detector.All):
                    for var in __trackBranches:
                        npPatterns[npCurrent.access(pidx=cPat,
                                                    oidx=cPos)] = getattr(
                                                        event, var)
                        if var == 'elCand2_eProbabilityHT':
                            from math import log
                            TRT_PID = npPatterns[npCurrent.access(pidx=cPat,
                                                                  oidx=cPos)]
                            epsilon = 1e-99
                            if TRT_PID >= 1.0: TRT_PID = 1.0 - 1.e-15
                            elif TRT_PID <= 0.0: TRT_PID = epsilon
                            tau = 15.0
                            TRT_PID = -(1 / tau) * log((1.0 / TRT_PID) - 1.0)
                            npPatterns[npCurrent.access(pidx=cPat,
                                                        oidx=cPos)] = TRT_PID
                        cPat += 1

                ## Retrieve rates information:
                if getRates and ringerOperation < 0:
                    #event.elCand2_isEMVerLoose2015 = not( event.elCand2_isEMVeryLoose2015 & 34896 )
                    event.elCand2_isEMLoose2015 = not (
                        event.elCand2_isEMLoose2015 & 34896)
                    event.elCand2_isEMMedium2015 = not (
                        event.elCand2_isEMMedium2015 & 276858960)
                    event.elCand2_isEMTight2015 = not (
                        event.elCand2_isEMTight2015 & 281053264)

                    for branch in branchEffCollectors.itervalues():
                        if not useBins:
                            branch.update(event)
                        else:
                            branch[etBin][etaBin].update(event)
                    if crossVal:
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            if not useBins:
                                branchCross.update(event)
                            else:
                                branchCross[etBin][etaBin].update(event)
                # end of (getRates)

                if not monitoring is None:
                    self.__fillHistograms(monitoring, filterType, pileupRef,
                                          pidConfigs, event)

                # We only increment if this cluster will be computed
                cPos += 1
            # end of (et/eta bins)

            # Limit the number of entries to nClusters if desired and possible:
            if not nClusters is None and cPos >= nClusters:
                break
        # for end

        ## Treat the rings information
        ## Remove not filled reserved memory space:
        if npPatterns.shape[npCurrent.odim] > cPos:
            npPatterns = np.delete(npPatterns,
                                   slice(cPos, None),
                                   axis=npCurrent.odim)

        ## Segment data over bins regions:
        # Also remove not filled reserved memory space:
        if useEtBins:
            npEt = npCurrent.delete(npEt, slice(cPos, None))
        if useEtaBins:
            npEta = npCurrent.delete(npEta, slice(cPos, None))

        # Treat
        standardCaloVariables = False
        npObject = self.treatNpInfo(
            cPos,
            npEt,
            npEta,
            useEtBins,
            useEtaBins,
            nEtBins,
            nEtaBins,
            standardCaloVariables,
            ringConfig,
            npPatterns,
        )

        data = [
            self.treatNpInfo(cPos, npEt, npEta, useEtBins, useEtaBins, nEtBins,
                             nEtaBins, standardCaloVariables, ringConfig,
                             npData) for npData in npBaseInfo
        ]
        npBaseInfo = npCurrent.array(data, dtype=np.object)

        if getRates:
            if crossVal:
                for etBin in range(nEtBins):
                    for etaBin in range(nEtaBins):
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            if not useBins:
                                branchCross.finished()
                            else:
                                branchCross[etBin][etaBin].finished()

            # Print efficiency for each one for the efficiency branches analysed:
            for etBin in range(nEtBins) if useBins else range(1):
                for etaBin in range(nEtaBins) if useBins else range(1):
                    for branch in branchEffCollectors.itervalues():
                        lBranch = branch if not useBins else branch[etBin][
                            etaBin]
                        self._info('%s', lBranch)
                    if crossVal:
                        for branchCross in branchCrossEffCollectors.itervalues(
                        ):
                            lBranchCross = branchCross if not useBins else branchCross[
                                etBin][etaBin]
                            lBranchCross.dump(self._debug,
                                              printSort=True,
                                              sortFcn=self._verbose)
                    # for branch
                # for eta
            # for et
        else:
            branchEffCollectors = None
            branchCrossEffCollectors = None
        # end of (getRates)

        outputs = []
        outputs.extend((npObject, npBaseInfo))
        if getRates:
            outputs.extend((branchEffCollectors, branchCrossEffCollectors))

        return outputs
Exemplo n.º 7
0
    refIdx = etIdx + 3
    if etIdx == 0: # 15 up to 20
      shorterEffTable[etIdx,etaIdx] = val[refIdx,etaIdx]
    if etIdx == 1: # merge 20, 25
      shorterEffTable[etIdx,etaIdx] = (val[refIdx,etaIdx]*.4 + val[refIdx+1,etaIdx]*.6)
    if etIdx == 2: # merge 30, 35
      shorterEffTable[etIdx,etaIdx] = (val[refIdx+1,etaIdx]*.48 + val[refIdx+2,etaIdx]*.52)
    if etIdx == 3: # merge 40, 45
      shorterEffTable[etIdx,etaIdx] = (val[refIdx+2,etaIdx]*.5 + val[refIdx+3,etaIdx]*.5)
  return shorterEffTable

from RingerCore import traverse
pdrefs = mergeEffTable( medium20160701 )
print pdrefs
pfrefs = np.array( [[0.05]*len(etaBins)]*len(etBins) )*100. # 3 5 7 10
efficiencyValues = np.array([np.array([refs]) for refs in zip(traverse(pdrefs,tree_types=(np.ndarray),simple_ret=True)
                                                 ,traverse(pfrefs,tree_types=(np.ndarray),simple_ret=True))]).reshape(pdrefs.shape + (2,) )
print efficiencyValues
basePath     = '/home/wsfreund/CERN-DATA'
sgnInputFile = 'user.jodafons.mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.AOD.e3601_s2876_r7917_r7676.dump.trigPB.p0200_GLOBAL/'
bkgInputFile = 'user.jodafons.mc15_13TeV.423300.Pythia8EvtGen_A14NNPDF23LO_perf_JF17.merge.AOD.e3848_s2876_r7917_r7676.dump.trigEL.p0201_GLOBAL/'
outputFile   = 'mc15_13TeV.361106.423300.sgn.trigegprobes.bkg.vetotruth.trig.l2calo.eg.std.grid.medium'
treePath     = ["HLT/Egamma/Expert/support/probes",
                "HLT/Egamma/Expert/support/trigger"]
#crossValPath = 'crossValid_5sorts.pic.gz'


#from TuningTools  import CrossValidArchieve
#with CrossValidArchieve( crossValPath ) as CVArchieve:
#  crossVal = CVArchieve
#  del CVArchieve
Exemplo n.º 8
0
  ,[ 0.95465, 0.94708108, 0.8706,  0.93477684] # Et 20
  ,[ 0.96871, 0.96318919, 0.87894, 0.95187642] # Et 30
  ,[ 0.97425, 0.97103378, 0.884,   0.96574474] # Et 40
  ,[ 0.97525, 0.97298649, 0.887,   0.96703158]])*100. # Et 50
  
veryloose20160701 = np.array(
  [[ 0.978  ,    0.96458108, 0.9145 ,    0.95786316]
  ,[ 0.98615,    0.97850541, 0.9028 ,    0.96738947]
  ,[ 0.99369,    0.9900427 , 0.90956,    0.97782105]
  ,[ 0.995  ,    0.99293919, 0.917  ,    0.98623421]
  ,[ 0.99525,    0.99318919, 0.9165 ,    0.98582632]])*100.

from RingerCore import traverse
pdrefs = medium20160701
pfrefs = np.array( [[0.05]*len(etaBins)]*len(etBins) )*100.
efficiencyValues = np.array([np.array([refs]) for refs in zip(traverse(pdrefs,tree_types=(np.ndarray),simple_ret=True)
                                                 ,traverse(pfrefs,tree_types=(np.ndarray),simple_ret=True))]).reshape(pdrefs.shape + (2,) )
basePath     = '/home/jodafons/CERN-DATA/backup/old_samples/mc15_13TeV/'
sgnInputFile = 'user.jodafons.mc15_13TeV.361106.Zee.merge.SelectionZee.PhysVal.r0005_GLOBAL'
bkgInputFile = 'user.jodafons.mc15_13TeV.423300.JF17.merge.SelectionFakes.PhysVal.r0005_GLOBAL'
outputFile   = 'mc15_13TeV.361106.423300.sgn.trigegprobes.bkg.vetotruth.trig.l2calo.medium'
treePath     = ["run_284500/HLT/Egamma/Expert/support/probes",
                "run_284500/HLT/Egamma/Expert/support/fakes"]
#crossValPath = 'crossValid_5sorts.pic.gz'


#from TuningTools  import CrossValidArchieve
#with CrossValidArchieve( crossValPath ) as CVArchieve:
#  crossVal = CVArchieve
#  del CVArchieve
Exemplo n.º 9
0
  def getModels(self, summaryInfoList,  **kw):
        
    refBenchCol         = kw.pop( 'refBenchCol',       None              )
    configCol           = kw.pop( 'configCol',         []                )
    muBin               = kw.pop( 'muBin',             [-999,9999]       )
    checkForUnusedVars( kw, self._logger.warning )

    # Treat the summaryInfoList
    if not isinstance( summaryInfoList, (list,tuple)):
      summaryInfoList = [ summaryInfoList ]
    summaryInfoList = list(traverse(summaryInfoList,simple_ret=True))
    nSummaries = len(summaryInfoList)
    if not nSummaries:
      logger.fatal("Summary dictionaries must be specified!")

    if refBenchCol is None:
      refBenchCol = summaryInfoList[0].keys()

    # Treat the reference benchmark list
    if not isinstance( refBenchCol, (list,tuple)):
      refBenchCol = [ refBenchCol ] * nSummaries

    if len(refBenchCol) == 1:
      refBenchCol = refBenchCol * nSummaries

    nRefs = len(list(traverse(refBenchCol,simple_ret=True)))

    # Make sure that the lists are the same size as the reference benchmark:
    nConfigs = len(list(traverse(configCol,simple_ret=True)))
    if nConfigs == 0:
      configCol = [None for i in range(nRefs)]
    elif nConfigs == 1:
      configCol = configCol * nSummaries
    nConfigs = len(list(traverse(configCol,simple_ret=True)))

    if nConfigs != nRefs:
      logger.fatal("Summary size is not equal to the configuration list.", ValueError)
    
    if nRefs == nConfigs == nSummaries:
      # If user input data without using list on the configuration, put it as a list:
      for o, idx, parent, _, _ in traverse(configCol):
        parent[idx] = [o]
      for o, idx, parent, _, _ in traverse(refBenchCol):
        parent[idx] = [o]

    configCol   = list(traverse(configCol,max_depth_dist=1,simple_ret=True))
    refBenchCol = list(traverse(refBenchCol,max_depth_dist=1,simple_ret=True))
    nConfigs = len(configCol)
    nSummary = len(refBenchCol)

    if nRefs != nConfigs != nSummary:
      logger.fatal("Number of references, configurations and summaries do not match!", ValueError)

    discrList = []

    from itertools import izip, count
    for summaryInfo, refBenchmarkList, configList in zip(summaryInfoList,refBenchCol,configCol):

      if type(summaryInfo) is str:
        self._logger.info('Loading file "%s"...', summaryInfo)
        summaryInfo = load(summaryInfo)
      elif type(summaryInfo) is dict:
        pass
      else:
        logger.fatal("Cross-valid summary info is not string and not a dictionary.", ValueError)
    

      for idx, refBenchmarkName, config in izip(count(), refBenchmarkList, configList):
        
        try:
          key = filter(lambda x: refBenchmarkName in x, summaryInfo)[0]
          refDict = summaryInfo[ key ]
        except IndexError :
          self._logger.fatal("Could not find reference %s in summaryInfo. Available options are: %r", refBenchmarkName, summaryInfo.keys())
        
        self._logger.info("Using Reference key: %s", key )
        

        ppInfo = summaryInfo['infoPPChain']
        etBinIdx = refDict['etBinIdx']
        etaBinIdx = refDict['etaBinIdx']
        etBin = refDict['etBin']

        etaBin = refDict['etaBin']
        info   = refDict['infoOpBest'] if config is None else refDict['config_' + str(config).zfill(3)]['infoOpBest']
            
        # Check if user specified parameters for exporting discriminator
        # operation information:
        sort =  info['sort']
        init =  info['init']
        pyThres = info['cut']
        
        from RingerCore import retrieveRawDict
        if isinstance( pyThres, float ):
          pyThres = RawThreshold( thres = pyThres
                                , etBinIdx = etBinIdx, etaBinIdx = etaBinIdx
                                , etBin = etBin, etaBin =  etaBin)

        else:
          # Get the object from the raw dict
          pyThres = retrieveRawDict( pyThres )

        if pyThres.etBin is None:
          pyThres.etBin = etBin
        elif pyThres.etBin is '':
          pyThres.etBin = etBin
        elif isinstance( pyThres.etBin, (list,tuple)):
          pyThres.etBin = np.array( pyThres.etBin)

        if not(np.array_equal( pyThres.etBin, etBin )):
          self._logger.fatal("etBin does not match for threshold! Should be %r, is %r", pyThres.etBin, etBin )
        if pyThres.etaBin is None:
          pyThres.etaBin = etaBin
        elif pyThres.etaBin is '':
          pyThres.etaBin = etaBin
        elif isinstance( pyThres.etaBin, (list,tuple)):
          pyThres.etaBin = np.array( pyThres.etaBin)
        if not(np.array_equal( pyThres.etaBin, etaBin )):
          self._logger.fatal("etaBin does not match for threshold! Should be %r, is %r", pyThres.etaBin, etaBin )

        if type(pyThres) is RawThreshold:
          thresValues = [pyThres.thres]
        else:
          thresValues = [pyThres.slope, pyThres.intercept, pyThres.rawThres]

        pyPreProc = ppInfo['sort_'+str(sort).zfill(3)]['items'][0]
        pyPreProc = retrieveRawDict( pyPreProc )

        useCaloRings=False; useTrack=False; useShowerShape=False

        if type(pyPreProc) is Norm1:
          useCaloRings=True
        elif type(pyPreProc) is TrackSimpleNorm:
          useTrack=True
        elif type(pyPreProc) is ShowerShapesSimpleNorm:
          useShowerShape=True
        elif type(pyPreProc) is ExpertNetworksSimpleNorm:
          useCaloRings=True; useTrack=True
        elif type(pyPreProc) is ExpertNetworksShowerShapeSimpleNorm:
          useCaloRings=True; useShowerShape=True
        elif type(pyPreProc) is ExpertNetworksShowerShapeAndTrackSimpleNorm:
          useCaloRings=True; useTrack=True; useShowerShape=True
        elif type(pyPreProc) is PreProcMerge:
          for slot in pyPreProc.slots:
            if type(pyPreProc) is Norm1:
              useCaloRings=True
            elif type(pyPreProc) is TrackSimpleNorm:
              useTrack=True
            elif type(pyPreProc) is ShowerShapesSimpleNorm:
              useShowerShape=True
            elif type(pyPreProc) is ExpertNetworksSimpleNorm:
              useCaloRings=True; useTrack=True
            elif type(pyPreProc) is ExpertNetworksShowerShapeSimpleNorm:
              useCaloRings=True; useShowerShape=True
            elif type(pyPreProc) is ExpertNetworksShowerShapeAndTrackSimpleNorm:
              useCaloRings=True; useTrack=True; useShowerShape=True
        else:
          self._logger.fatal('PrepProc strategy not found...') 


        discrDict = info['discriminator']
        model   = { 
                  'discriminator' : discrDict,
                  'threshold'     : thresValues,
                  'etBin'         : etBin,
                  'etaBin'        : etaBin,
                  'muBin'         : muBin,
                  'etBinIdx'      : etBinIdx,
                  'etaBinIdx'     : etaBinIdx,
 
                  }
      
        removeOutputTansigTF = refDict.get('removeOutputTansigTF', None )
        
        model['removeOutputTansigTF'] = removeOutputTansigTF
        model['useCaloRings']         = useCaloRings
        model['useShowerShape']       = useShowerShape
        model['useTrack']             = useTrack

                 
        discrList.append( model )
  
        self._logger.info('neuron = %d, sort = %d, init = %d',
                     info['neuron'],
                     info['sort'],
                     info['init'])
        
      # for benchmark
    # for summay in list

    return discrList
Exemplo n.º 10
0
from RingerCore import Logger, LoggingLevel, save, load, expandFolders, traverse
import numpy as np
from TuningTools.coreDef import retrieve_npConstants
npCurrent, _ = retrieve_npConstants()
npCurrent.level = args.output_level
logger = Logger.getModuleLogger( __name__, args.output_level )

files = expandFolders( args.inputs ) # FIXME *.npz

from zipfile import BadZipfile
for f in files:
  logger.info("Changing representation of file '%s'...", f)
  try:
    data = dict(load(f))
  except BadZipfile, e:
    logger.warning("Couldn't load file '%s'. Reason:\n%s", f, str(e))
    continue
  logger.debug("Finished loading file '%s'...", f)
  for key in data:
    if key == 'W':
      for obj, idx,  parent, _, _ in traverse(data[key],
                                              tree_types = (np.ndarray,),
                                              max_depth = 3):
        parent[idx] = obj.T
    elif type(data[key]) is np.ndarray:
      logger.debug("Checking key '%s'...", key)
      data[key] = npCurrent.toRepr(data[key])
  path = save(data, f, protocol = 'savez_compressed')
  logger.info("Overwritten file '%s'",f)
Exemplo n.º 11
0
 from TuningTools.PreProc import *
 from TuningTools import CrossValid 
 if args.pp_nEtaBins is NotSet:
   raise NoBinInfo('eta')
 if args.pp_nEtBins is NotSet:
   raise NoBinInfo('et')
 # Retrieve information:
 import ast, re
 replacer = re.compile("(\w+(\(.*?\))?)")
 args.ppCol = replacer.sub(r'"\1"', args.ppCol)
 ppCol = ast.literal_eval(args.ppCol)
 from RingerCore import traverse
 class_str_re = re.compile("(\w+)(\(.*?\))?")
 fix_args = re.compile("(\([^{}]+)((?<!,)\))")
 dict_args = re.compile("(\{.*\})")
 for str_, idx, parent, _, _ in traverse(ppCol):
   m = class_str_re.match(str_)
   if m:
     # The class representation in string:
     class_str = m.group(1)
     # Retrieve the class itself (must be from PreProc module)
     tClass = str_to_class( "TuningTools.PreProc", class_str)
     # Retrieve the arguments, if available
     class_attr = m.group(2)
     if class_attr:
       # Fix the arguments:
       m2 = fix_args.search( m.group(2) )
       if m2:
         class_attr = m2.expand(r'\1,\2')
         # Parse it:
         class_attr_parsed = ast.literal_eval( class_attr )
Exemplo n.º 12
0
                f)
    fileparts = f.split('/')
    folder = '/'.join(fileparts[0:-1]) + '/'
    fname = fileparts[-1]
    try:
        data = dict(load(f))
    except BadZipfile, e:
        logger.warning("Couldn't load file '%s'. Reason:\n%s", f, str(e))
        continue
    logger.debug("Finished loading file '%s'...", f)
    for key in data:
        if key == 'W':
            ppCol = deepcopy(data['W'])
            from TuningTools.PreProc import *
            for obj, idx, parent, _, _ in traverse(ppCol,
                                                   tree_types=(np.ndarray, ),
                                                   max_depth=3):
                parent[idx] = PreProcChain(RemoveMean(),
                                           Projection(matrix=obj),
                                           UnitaryRMS())
            # Turn arrays into mutable objects:
            ppCol = ppCol.tolist()
            ppCol = fixPPCol(ppCol, len(ppCol[0][0]), len(ppCol[0]),
                             len(ppCol))
    if fname.endswith('.npz'):
        fname = fname[:-4]
    newFilePath = folder + fname + '.pic'
    logger.info('Saving to: "%s"...', newFilePath)
    place = PreProcArchieve(newFilePath, ppCol=ppCol).save(compress=False)
    logger.info("File saved at path: '%s'", place)
Exemplo n.º 13
0
args = parser.parse_args(namespace=LoggerNamespace())

from RingerCore import Logger, LoggingLevel, save, load, expandFolders, traverse
import numpy as np
from TuningTools.coreDef import npCurrent
npCurrent.level = args.output_level
logger = Logger.getModuleLogger(__name__, args.output_level)

files = expandFolders(args.inputs)  # FIXME *.npz

from zipfile import BadZipfile
for f in files:
    logger.info("Changing representation of file '%s'...", f)
    try:
        data = dict(load(f))
    except BadZipfile, e:
        logger.warning("Couldn't load file '%s'. Reason:\n%s", f, str(e))
        continue
    logger.debug("Finished loading file '%s'...", f)
    for key in data:
        if key == 'W':
            for obj, idx, parent, _, _ in traverse(data[key],
                                                   tree_types=(np.ndarray, ),
                                                   max_depth=3):
                parent[idx] = obj.T
        elif type(data[key]) is np.ndarray:
            logger.debug("Checking key '%s'...", key)
            data[key] = npCurrent.toRepr(data[key])
    path = save(data, f, protocol='savez_compressed')
    logger.info("Overwritten file '%s'", f)
Exemplo n.º 14
0
     args.pp_nEtaBins = 1
     args.pp_nEtBins = 1
 elif args.pp_nEtBins is NotSet:
     raise NoBinInfo('et')
 elif args.pp_nEtaBins is NotSet:
     raise NoBinInfo('eta')
 # Retrieve information:
 import ast, re
 replacer = re.compile("(\w+(\(.*?\))?)")
 args.ppCol = replacer.sub(r'"\1"', args.ppCol)
 ppCol = ast.literal_eval(args.ppCol)
 from RingerCore import traverse
 class_str_re = re.compile("(\w+)(\(.*?\))?")
 fix_args = re.compile("(\([^{}]+)((?<!,)\))")
 dict_args = re.compile("(\{.*\})")
 for str_, idx, parent, _, _ in traverse(ppCol):
     m = class_str_re.match(str_)
     if m:
         # The class representation in string:
         class_str = m.group(1)
         # Retrieve the class itself (must be from PreProc module)
         tClass = str_to_class("TuningTools.PreProc", class_str)
         # Retrieve the arguments, if available
         class_attr = m.group(2)
         if class_attr:
             # Fix the arguments:
             m2 = fix_args.search(m.group(2))
             if m2:
                 class_attr = m2.expand(r'\1,\2')
                 # Parse it:
                 class_attr_parsed = ast.literal_eval(class_attr)
Exemplo n.º 15
0
def expandFolders(pathList, filters=None, logger=None, level=None):
    """
    Expand all folders to the contained files using the filters on pathList

    Input arguments:

    -> pathList: a list containing paths to files and folders;
    filters;
    -> filters: return a list for each filter with the files contained on the
    list matching the filter glob.
    -> logger: whether to print progress using logger;
    -> level: logging level to print messages with logger;

    WARNING: This function is extremely slow and will severely decrease
    performance if used to expand base paths with several folders in it.
  """
    if not isinstance(pathList, (
            list,
            tuple,
    )):
        pathList = [pathList]
    from glob import glob
    if filters is None:
        filters = ['*']
    if not (type(filters) in (
            list,
            tuple,
    )):
        filters = [filters]
    retList = [[] for idx in range(len(filters))]
    from RingerCore import progressbar, traverse
    pathList = list(
        traverse([
            glob(path) if '*' in path else path
            for path in traverse(pathList, simple_ret=True)
        ],
                 simple_ret=True))
    for path in progressbar(pathList,
                            len(pathList),
                            'Expanding folders: ',
                            60,
                            50,
                            True if logger is not None else False,
                            logger=logger,
                            level=level):
        path = expandPath(path)
        if not os.path.exists(path):
            raise ValueError("Cannot reach path '%s'" % path)
        if os.path.isdir(path):
            for idx, filt in enumerate(filters):
                cList = filter(lambda x: not (os.path.isdir(x)),
                               [f for f in glob(os.path.join(path, filt))])
                if cList:
                    retList[idx].extend(cList)
            folders = [
                os.path.join(path, f) for f in os.listdir(path)
                if os.path.isdir(os.path.join(path, f))
            ]
            if folders:
                recList = expandFolders(folders, filters)
                if len(filters) is 1:
                    recList = [recList]
                for l in recList:
                    retList[idx].extend(l)
        else:
            for idx, filt in enumerate(filters):
                if path in glob(os.path.join(os.path.dirname(path), filt)):
                    retList[idx].append(path)
    if len(filters) is 1:
        retList = retList[0]
    return retList