Exemplo n.º 1
0
 def invokeGenerator(self):
     generator = TrainDataGenerator()
     generator.setBatchSize(self.__batchsize)
     generator.setSquaredElementsLimit(self.batch_uses_sum_of_squares)
     generator.setFileList([self.dataDir + "/" + s for s in self.samples])
     return generator
Exemplo n.º 2
0
        if args.unbuffered:
            td.readFromFile(use_inputdir + "/" + inputfile)
        else:
            td.readFromFileBuffered(use_inputdir + "/" + inputfile)
    else:
        print('converting ' + inputfile)
        td.readFromSourceFile(use_inputdir + "/" + inputfile,
                              dc.weighterobjects,
                              istraining=False)

    gen = TrainDataGenerator()
    if batchsize < 1:
        batchsize = dc.getBatchSize()
    print('batch size', batchsize)
    gen.setBatchSize(batchsize)
    gen.setSquaredElementsLimit(dc.batch_uses_sum_of_squares)
    gen.setSkipTooLargeBatches(False)
    gen.setBuffer(td)

    predicted = model.predict_generator(gen.feedNumpyData(),
                                        steps=gen.getNBatches(),
                                        max_queue_size=1,
                                        use_multiprocessing=False,
                                        verbose=1)

    x = td.transferFeatureListToNumpy()
    w = td.transferWeightListToNumpy()
    y = td.transferTruthListToNumpy()

    td.clear()
    gen.clear()
Exemplo n.º 3
0
    def predict(self, model=None, model_path=None, output_to_file=True):
        if model_path == None:
            model_path = self.model_path

        if model is None:
            if not os.path.exists(model_path):
                raise FileNotFoundError('Model file not found')

        assert model_path is not None or model is not None

        outputs = []
        if output_to_file:
            os.system('mkdir -p ' + self.predict_dir)

        if model is None:
            model = load_model(model_path)

        all_data = []
        for inputfile in self.input_data_files:

            use_inputdir = self.inputdir
            if inputfile[0] == "/":
                use_inputdir = ""
            outfilename = "pred_" + os.path.basename(inputfile)

            print('predicting ', use_inputdir + '/' + inputfile)

            td = self.dc.dataclass()

            #also allows for inheriting classes now, like with tracks or special PU
            if not isinstance(td, TrainData_NanoML) and type(
                    td) is not TrainData_TrackML:
                raise RuntimeError(
                    "TODO: make sure this works for other traindata formats")

            if inputfile[-5:] == 'djctd':
                if self.unbuffered:
                    td.readFromFile(use_inputdir + "/" + inputfile)
                else:
                    td.readFromFileBuffered(use_inputdir + "/" + inputfile)
            else:
                print('converting ' + inputfile)
                td.readFromSourceFile(use_inputdir + "/" + inputfile,
                                      self.dc.weighterobjects,
                                      istraining=False)

            gen = TrainDataGenerator()
            # the batch size must be one otherwise we need to play tricks with the row splits later on
            gen.setBatchSize(1)
            gen.setSquaredElementsLimit(False)
            gen.setSkipTooLargeBatches(False)
            gen.setBuffer(td)

            num_steps = gen.getNBatches()
            generator = gen.feedNumpyData()

            dumping_data = []

            thistime = time.time()
            for _ in range(num_steps):
                data_in = next(generator)
                predictions_dict = model(data_in[0])
                for k in predictions_dict.keys():
                    predictions_dict[k] = predictions_dict[k].numpy()
                features_dict = td.createFeatureDict(data_in[0])
                truth_dict = td.createTruthDict(data_in[0])

                dumping_data.append(
                    [features_dict, truth_dict, predictions_dict])

            totaltime = time.time() - thistime
            print('took approx', totaltime / num_steps,
                  's per endcap (also includes dict building)')

            td.clear()
            gen.clear()
            outfilename = os.path.splitext(outfilename)[0] + '.bin.gz'
            if output_to_file:
                td.writeOutPredictionDict(dumping_data,
                                          self.predict_dir + "/" + outfilename)
            outputs.append(outfilename)
            if not output_to_file:
                all_data.append(dumping_data)

        if output_to_file:
            with open(self.predict_dir + "/outfiles.txt", "w") as f:
                for l in outputs:
                    f.write(l + '\n')

        if not output_to_file:
            return all_data