示例#1
0
    def get_best_models( self, best_sorts , remove_last=True, with_history=False):
        '''
        This method will load the best models.

        Arguments:

        - best_sorts: the table that contains the best_sorts;
        - remove_last: a bolean variable to remove or not the tanh in tha output layer;
        - with_history: unused variable.
        '''
        from tensorflow.keras.models import Model, model_from_json
        import json

        models = [[ None for _ in range(len(self.__etabins)-1)] for __ in range(len(self.__etbins)-1)]
        for et_bin in range(len(self.__etbins)-1):
            for eta_bin in range(len(self.__etabins)-1):
                d_tuned = {}
                best = best_sorts.loc[(best_sorts.et_bin==et_bin) & (best_sorts.eta_bin==eta_bin)]
                tuned = load(best.file_name.values[0])['tunedData'][best.model_idx.values[0]]
                model = model_from_json( json.dumps(tuned['sequence'], separators=(',', ':')) ) #custom_objects={'RpLayer':RpLayer} )
                model.set_weights( tuned['weights'] )
                new_model = Model(model.inputs, model.layers[-2].output) if remove_last else model
                #new_model.summary()
                d_tuned['model']    = new_model
                d_tuned['etBin']    = [self.__etbins[et_bin], self.__etbins[et_bin+1]]
                d_tuned['etaBin']   = [self.__etabins[eta_bin], self.__etabins[eta_bin+1]]
                d_tuned['etBinIdx'] = et_bin
                d_tuned['etaBinIdx']= eta_bin
                d_tuned['history']  = tuned['history']
                models[et_bin][eta_bin] = d_tuned
        return models
示例#2
0
 def generator_for_signal(path):
     from Gaugi import load
     raw = load(path)
     features = raw['features'].tolist()
     data = raw['data']
     target =  raw['target']
     return data[target==1,:], features
示例#3
0
    def dump_all_history( self, table, output_path , tag):
        '''
        This method will dump the train history. This is a way to get more easy this information when plotting the train evolution.

        Arguments:

        - table: a table with the path information.
        - output_path: the path to sabe the hitories.
        - tag: the train tag.
        '''
        if not os.path.exists( output_path ):
          os.mkdir( output_path )
        for _ , row in table.iterrows():
            if row.train_tag != tag:
              continue
            # Load history
            history = load( row.file_name )['tunedData'][row.tuned_idx]['history']
            history['loc'] = {'et_bin' : row.et_bin, 'eta_bin' : row.eta_bin, 'sort' : row.sort, 'model_idx' : row.model_idx}
            name = 'history_et_%i_eta_%i_model_%i_sort_%i.json' % (row.et_bin,row.eta_bin,row.model_idx,row.sort)
            jbl_name = 'history_et_%i_eta_%i_model_%i_sort_%i.joblib' % (row.et_bin,row.eta_bin,row.model_idx,row.sort)
            joblib.dump(history['summary'], os.path.join(output_path, jbl_name))
            history.pop('summary')
            with open(os.path.join(output_path, '%s' %name), 'w', encoding='utf-8') as fp:
                #json.dump(transform_serialize(history), fp)
                json.dump(str(history), fp)
示例#4
0
  def __call__( self , generator, tunedFile, outputfile, crossval, decorators):


    context = Context()

    MSG_INFO( self, "Opening file %s...", tunedFile )
    raw = load(tunedFile)

    tunedData = TunedData_v1()

    for idx, tuned in enumerate(raw['tunedData']):

      # force the context is empty for each iteration
      context.clear()


      sort = tuned['sort']
      init = tuned['init']
      imodel = tuned['imodel']
      history = tuned['history']

      # get the current kfold and train, val sets
      x_train, x_val, y_train, y_val, index_from_cv = self.pattern_g( generator, crossval, sort )

      # recover keras model
      model = model_from_json( json.dumps(tuned['sequence'], separators=(',', ':')) )#, custom_objects={'RpLayer':RpLayer} )
      model.set_weights( tuned['weights'] )


      # Should not be store
      context.setHandler( "valData" , (x_val, y_val)       )
      context.setHandler( "trnData" , (x_train, y_train)   )
      context.setHandler( "index"   , index_from_cv        )
      context.setHandler( "crossval", crossval             )


      # It will be store into the file
      context.setHandler( "model"   , model         )
      context.setHandler( "sort"    , sort          )
      context.setHandler( "init"    , init          )
      context.setHandler( "imodel"  , imodel        )
      context.setHandler( "time"    , tuned['time'] )
      context.setHandler( "history" , history       )


      for tool in decorators:
        #MSG_INFO( self, "Executing the pos processor %s", tool.name() )
        tool.decorate( history, context )

      tunedData.attach_ctx( context )


    try:
      MSG_INFO( self, "Saving file..." )
      tunedData.save( outputfile+'/'+ tunedFile.split('/')[-1] )
    except Exception as e:
      MSG_FATAL( self, "Its not possible to save the tuned data: %s" , e )


    return StatusCode.SUCCESS
示例#5
0
    def generator( path ):

        def norm1( data ):
            norms = np.abs( data.sum(axis=1) )
            norms[norms==0] = 1
            return data/norms[:,None]
        from Gaugi import load
        import numpy as np
        d = load(path)
        feature_names = d['features'].tolist()
        data = norm1(d['data'][:,1:101])
        target = d['target']
        avgmu = d['data'][:,0]
        references = ['T0HLTElectronT2CaloTight','T0HLTElectronT2CaloMedium','T0HLTElectronT2CaloLoose','T0HLTElectronT2CaloVLoose']
        ref_dict = {}
        for ref in references:
            answers = d['data'][:, feature_names.index(ref)]
            signal_passed = sum(answers[target==1])
            signal_total = len(answers[target==1])
            background_passed = sum(answers[target==0])
            background_total = len(answers[target==0])
            pd = signal_passed/signal_total
            fa = background_passed/background_total
            ref_dict[ref] = {'signal_passed': signal_passed, 'signal_total': signal_total, 'pd' : pd,
                             'background_passed': background_passed, 'background_total': background_total, 'fa': fa}

        return data, target, avgmu
示例#6
0
    def load(self, fList):
        from Gaugi import load
        from Gaugi import csvStr2List, expandFolders, progressbar
        fList = csvStr2List(fList)
        fList = expandFolders(fList)
        from saphyra import TunedData_v1
        self._obj = TunedData_v1()

        for inputFile in progressbar(fList,
                                     len(fList),
                                     prefix="Reading tuned data collection...",
                                     logger=self._logger):

            raw = load(inputFile)
            # get the file version
            version = raw['__version']
            # the current file version
            if version == 1:
                obj = TunedData_v1.fromRawObj(raw)
                self._obj.merge(obj)
            else:
                # error because the file does not exist
                self._logger.fatal('File version (%d) not supported in (%s)',
                                   version, inputFile)

        # return the list of keras models
        return self._obj
示例#7
0
def getPatterns(path, cv, sort):
    def norm1(data):
        norms = np.abs(data.sum(axis=1))
        norms[norms == 0] = 1
        return data / norms[:, None]

    from Gaugi import load
    import numpy as np
    d = load(path)
    # ------------------------------------------------------- #
    # remove zero rings
    #m_rings = m_rings = list(range(8,80)) + list(range(88,100))
    #data = norm1(d['data'][:,m_rings])
    data = norm1(d['data'][:, 1:101])
    # ------------------------------------------------------- #
    target = d['target']
    target[target != 1] = -1
    splits = [(train_index, val_index)
              for train_index, val_index in cv.split(data, target)]

    x_train = data[splits[sort][0]]
    y_train = target[splits[sort][0]]
    x_val = data[splits[sort][1]]
    y_val = target[splits[sort][1]]

    return x_train, x_val, y_train, y_val, splits, []  #d['features']
示例#8
0
    def fill(self, path, tag):
        '''
        This method will fill the information dictionary and convert then into a pandas DataFrame.

        Arguments.:

        - path: the path to the tuned files;
        - tag: the training tag used;
        '''
        paths = expandFolders( path )
        MSG_INFO(self, "Reading file for %s tag from %s", tag , path)

        # Creating the dataframe
        dataframe = collections.OrderedDict({
                              'train_tag'      : [],
                              'et_bin'         : [],
                              'eta_bin'        : [],
                              'model_idx'      : [],
                              'sort'           : [],
                              'init'           : [],
                              'file_name'      : [],
                              'tuned_idx'      : [],
                          })


        # Complete the dataframe for each varname in the config dict
        for varname in self.__config_dict.keys():
            dataframe[varname] = []

        MSG_INFO(self, 'There are %i files for this task...' %(len(paths)))
        MSG_INFO(self, 'Filling the table... ')

        for ituned_file_name in paths:
            gfile = load(ituned_file_name)
            tuned_file = gfile['tunedData']

            for idx, ituned in enumerate(tuned_file):
                history = ituned['history']
                #model = model_from_json( json.dumps(ituned['sequence'], separators=(',', ':')) , custom_objects={'RpLayer':RpLayer} )
                #model.set_weights( ituned['weights'] )

                # get the basic from model
                dataframe['train_tag'].append(tag)
                #dataframe['model'].append(model)
                dataframe['model_idx'].append(ituned['imodel'])
                dataframe['sort'].append(ituned['sort'])
                dataframe['init'].append(ituned['init'])
                dataframe['et_bin'].append(self.get_etbin(ituned_file_name))
                dataframe['eta_bin'].append(self.get_etabin(ituned_file_name))
                dataframe['file_name'].append(ituned_file_name)
                dataframe['tuned_idx'].append( idx )

                # Get the value for each wanted key passed by the user in the contructor args.
                for key, local  in self.__config_dict.items():
                    dataframe[key].append( self.__get_value( history, local ) )

        # append tables if is need
        # ignoring index to avoid duplicated entries in dataframe
        self.__table = self.__table.append( pd.DataFrame(dataframe), ignore_index=True ) if not self.__table is None else pd.DataFrame(dataframe)
        MSG_INFO(self, 'End of fill step, a pandas DataFrame was created...')
示例#9
0
 def get_history(self, path, index):
     tuned_list = load(path)['tunedData']
     for tuned in tuned_list:
         if tuned['imodel'] == index:
             return tuned['history']
     MSG_FATAL(self,
               "It's not possible to find the history for model index %d",
               index)
示例#10
0
 def __call__(self, inputFiles):
     obj = None
     for idx, f in progressbar(enumerate(inputFiles),
                               len(inputFiles),
                               'Reading...: ',
                               60,
                               logger=self._logger):
         #d = dict(np.load(f,allow_pickle=True))
         d = dict(load(f))
         obj = self.merge(d, obj, self._skip_these_keys) if obj else d
     return obj
示例#11
0
 def dump_all_history( self, table, output_path , tag):
     if not os.path.exists( output_path ):
       os.mkdir( output_path )
     for _ , row in table.iterrows():
         if row.train_tag != tag:
           continue
         # Load history
         history = load( row.file_name )['tunedData'][row.tuned_idx]['history']
         history['loc'] = {'et_bin' : row.et_bin, 'eta_bin' : row.eta_bin, 'sort' : row.sort, 'model_idx' : row.model_idx}
         name = 'history_et_%i_eta_%i_model_%i_sort_%i.json' % (row.et_bin,row.eta_bin,row.model_idx,row.sort)
         with open(os.path.join(output_path, '%s' %name), 'w') as fp:
             #json.dump(transform_serialize(history), fp)
             json.dump(str(history), fp)
示例#12
0
  def load( self, ofile ):

    from Gaugi import load
    raw = load( ofile )
    # get the file version
    version = raw['__version']
    # the current file version
    if version == 1:
      import 
      self._obj = Job_v1.fromRawObj( raw )
    else:
      # error because the file does not exist
      self._logger.fatal( 'File version (%d) not supported in (%s)', version, ofile)

    # return the list of keras models
    return self._obj
示例#13
0
def getPatterns(path, cv, sort):

    from Gaugi import load

    def norm1(data):
        norms = np.abs(data.sum(axis=1))
        norms[norms == 0] = 1
        #return np.expand_dims( data/norms[:,None], axis=2 )
        return data / norms[:, None]

    # Load data
    d = load(path)
    feature_names = d['features'].tolist()

    # Get the normalized rings
    data_rings = norm1(d['data'][:, 1:101])

    # How many events?
    n = data_rings.shape[0]

    # extract all shower shapes
    data_reta = d['data'][:, feature_names.index('reta')].reshape((n, 1))
    data_rphi = d['data'][:, feature_names.index('rphi')].reshape((n, 1))
    data_eratio = d['data'][:, feature_names.index('eratio')].reshape((n, 1))
    data_weta2 = d['data'][:, feature_names.index('weta2')].reshape((n, 1))
    data_f1 = d['data'][:, feature_names.index('f1')].reshape((n, 1))

    # Get the mu average
    data_mu = d['data'][:, feature_names.index('avgmu')].reshape((n, 1))
    target = d['target']

    # This is mandatory
    splits = [(train_index, val_index)
              for train_index, val_index in cv.split(data_mu, target)]

    data_shower_shapes = np.concatenate(
        (data_reta, data_rphi, data_eratio, data_weta2, data_f1), axis=1)

    # split for this sort
    x_train = [
        data_rings[splits[sort][0]], data_shower_shapes[splits[sort][0]]
    ]
    x_val = [data_rings[splits[sort][1]], data_shower_shapes[splits[sort][1]]]
    y_train = target[splits[sort][0]]
    y_val = target[splits[sort][1]]

    return x_train, x_val, y_train, y_val, splits
示例#14
0
    def fill(self, path, tag):

        paths = expandFolders(path)
        MSG_INFO(self, "Reading file for %s tag from %s", tag, path)

        # Creating the dataframe
        dataframe = collections.OrderedDict({
            'train_tag': [],
            'et_bin': [],
            'eta_bin': [],
            'model_idx': [],
            'sort': [],
            'init': [],
            'file_name': [],
            'tuned_idx': [],
        })

        # Complete the dataframe for each varname in the config dict
        for varname in self.__config_dict.keys():
            dataframe[varname] = []

        MSG_INFO(self, 'There are %i files for this task...' % (len(paths)))
        MSG_INFO(self, 'Filling the table... ')

        for ituned_file_name in paths:
            gfile = load(ituned_file_name)
            tuned_file = gfile['tunedData']

            for idx, ituned in enumerate(tuned_file):
                history = ituned['history']
                # get the basic from model
                dataframe['train_tag'].append(tag)
                dataframe['model_idx'].append(ituned['imodel'])
                dataframe['sort'].append(ituned['sort'])
                dataframe['init'].append(ituned['init'])
                dataframe['et_bin'].append(self.get_etbin(ituned_file_name))
                dataframe['eta_bin'].append(self.get_etabin(ituned_file_name))
                dataframe['file_name'].append(ituned_file_name)
                dataframe['tuned_idx'].append(idx)
                # Get the value for each wanted key passed by the user in the contructor args.
                for key, local in self.__config_dict.items():
                    dataframe[key].append(self.__get_value(history, local))

        self.__table = self.__table.append(
            pd.DataFrame(dataframe)
        ) if not self.__table is None else pd.DataFrame(dataframe)
        MSG_INFO(self, 'End of fill step, a pandas DataFrame was created...')
示例#15
0
from Gaugi import load
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

data = load('e.npz')
features = data['features'].tolist()


def plot_layers(sampling, d, norm=False, evt=None):
    def plot_cells(ax, cells, vmin=None, vmax=None, label='', norm=False):

        ax.imshow(cells,
                  interpolation='nearest',
                  aspect=cells.shape[1] / cells.shape[0],
                  norm=LogNorm(vmin=vmin, vmax=vmax) if norm else None)
        ax.set(xlabel='$\eta$', ylabel='$\phi$', title=label)
        ax.tick_params(axis='both',
                       which='both',
                       bottom='off',
                       top='off',
                       left='off',
                       right='off',
                       labelbottom='off',
                       labelleft='off')

    import random
    f, axarr = plt.subplots(1, 3, figsize=(18, 10))
    evt = 1
    #if evt is None:
示例#16
0
    def fill(self, path, tag):
        '''
        This method will fill the information dictionary and convert then into a pandas DataFrame.

        Arguments.:

        - path: the path to the tuned files;
        - tag: the training tag used;
        '''
        paths = expand_folders(path)
        MSG_INFO(self, "Reading file for %s tag from %s", tag, path)

        # Creating the dataframe
        dataframe = collections.OrderedDict({
            'train_tag': [],
            'et_bin': [],
            'eta_bin': [],
            'model_idx': [],
            'sort': [],
            'init': [],
            'file_name': [],
            'tuned_idx': [],
            'op_name': [],
        })

        MSG_INFO(self, 'There are %i files for this task...' % (len(paths)))
        MSG_INFO(self, 'Filling the table... ')

        for ituned_file_name in progressbar(paths, 'Reading %s...' % tag):
            #for ituned_file_name in paths:

            try:
                gfile = load(ituned_file_name)
            except:
                #MSG_WARNING(self, "File %s not open. skip.", ituned_file_name)
                continue
            tuned_file = gfile['tunedData']

            for idx, ituned in enumerate(tuned_file):

                history = ituned['history']

                for op, config_dict in self.__config_dict.items():

                    # get the basic from model
                    dataframe['train_tag'].append(tag)
                    dataframe['model_idx'].append(ituned['imodel'])
                    dataframe['sort'].append(ituned['sort'])
                    dataframe['init'].append(ituned['init'])
                    dataframe['et_bin'].append(
                        self.get_etbin(ituned_file_name))
                    dataframe['eta_bin'].append(
                        self.get_etabin(ituned_file_name))
                    dataframe['file_name'].append(ituned_file_name)
                    dataframe['tuned_idx'].append(idx)
                    dataframe['op_name'].append(op)

                    # Get the value for each wanted key passed by the user in the contructor args.
                    for key, local in config_dict.items():
                        if not key in dataframe.keys():
                            dataframe[key] = [self.__get_value(history, local)]
                        else:
                            dataframe[key].append(
                                self.__get_value(history, local))

        # append tables if is need
        # ignoring index to avoid duplicated entries in dataframe
        self.__table = self.__table.append(
            pd.DataFrame(dataframe), ignore_index=True
        ) if not self.__table is None else pd.DataFrame(dataframe)
        MSG_INFO(self, 'End of fill step, a pandas DataFrame was created...')
示例#17
0
def getJobConfigId(path):
    from Gaugi import load
    return dict(load(path))['id']
示例#18
0
def generator(path):
    def norm1(data):
        norms = np.abs(data.sum(axis=1))
        norms[norms == 0] = 1
        return data / norms[:, None]

    from Gaugi import load
    import numpy as np
    d = load(path)
    feature_names = d['features'].tolist()

    #n = d['data'].shape[0]

    # extract all shower shapes
    #data_reta   = d['data'][:, feature_names.index('L2Calo_reta')].reshape((n,1)) / 1.0
    #data_eratio = d['data'][:, feature_names.index('L2Calo_eratio')].reshape((n,1)) / 1.0
    #data_f1     = d['data'][:, feature_names.index('L2Calo_f1')].reshape((n,1)) / 0.6
    #data_f3     = d['data'][:, feature_names.index('f3')].reshape((n,1)) / 0.04
    #data_weta2  = d['data'][:, feature_names.index('weta2')].reshape((n,1)) / 0.02
    #data_wstot  = d['data'][:, feature_names.index('wtots1')].reshape((n,1)) / 1.0

    #target = d['target']

    # Fix all shower shapes variables
    #print( 'eratio > [10,inf[ = %d'%len(data_eratio[data_eratio>10.0]) )
    #data_eratio[data_eratio>10.0]=0
    #print( 'eratio > [1,10[ = %d'%len(data_eratio[data_eratio>1.0]) )
    #data_eratio[data_eratio>1.]=1.0
    #print ('wstor < -99 =  %d'%len(data_wstot[data_wstot<-99]))
    #data_wstot[data_wstot<-99]=0

    # This is mandatory
    #splits = [(train_index, val_index) for train_index, val_index in cv.split(data_reta,target)]
    #dataRings = norm1(d['data'][:,1:101])
    #data_shower = np.concatenate( (data_reta,data_eratio,data_f1,data_f3,data_weta2, data_wstot), axis=1)
    #dataSS = np.transpose(data_shower)
    #data = np.concatenate((dataRings,data_shower),axis=1)
    data = norm1(d['data'][:, 1:101])

    #data = norm1(d['data'][:,1:101])
    target = d['target']
    avgmu = d['data'][:, 0]
    references = [
        'T0HLTElectronT2CaloTight', 'T0HLTElectronT2CaloMedium',
        'T0HLTElectronT2CaloLoose', 'T0HLTElectronT2CaloVLoose'
    ]
    ref_dict = {}

    for ref in references:
        answers = d['data'][:, feature_names.index(ref)]
        signal_passed = sum(answers[target == 1])
        signal_total = len(answers[target == 1])
        background_passed = sum(answers[target == 0])
        background_total = len(answers[target == 0])
        pd = signal_passed / signal_total
        fa = background_passed / background_total
        ref_dict[ref] = {
            'signal_passed': signal_passed,
            'signal_total': signal_total,
            'pd': pd,
            'background_passed': background_passed,
            'background_total': background_total,
            'fa': fa
        }

    return data, target, avgmu
示例#19
0
def getPileup(path):
    from Gaugi import load
    return load(path)['data'][:, 0]