Пример #1
0
    def hydra_base(self, nepochs, algorithm):

        trainData = json.load(open(self.params.files['train_struct']))

        evalData = json.load(open(self.params.files['test_struct']))

        model = get_cnn_model(self.params, algorithm)

        if self.params.num_gpus > 1:
            model = make_parallel(model, self.params.num_gpus)

        model.compile(optimizer=Adam(lr=self.params.cnn_adam_learning_rate),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        train_datagen = read_data(self.params, trainData)

        eval_datagen = read_data(self.params, evalData)

        def lr_scheduler(epoch):
            if self.params.lr_mode is 'progressive_drops':
                lr = 1e-4
            print('lr_scheduler (epoch: %d, lr: %f)' % (epoch, lr))
            return lr

        lr_decay = LearningRateScheduler(lr_scheduler)

        print(("Hydra base (%d epochs): ") % (nepochs))

        callbacks_list = [lr_decay]

        model.fit_generator(
            train_datagen,
            steps_per_epoch=(len(trainData) / self.params.batch_size_cnn + 1),
            epochs=nepochs,
            callbacks=callbacks_list,
            validation_data=eval_datagen,
            validation_steps=(len(evalData) / self.params.batch_size_cnn + 1))

        fileName = 'weights.hydra.base.' + algorithm + '.hdf5'

        filePath = os.path.join(
            self.params.directories['cnn_checkpoint_weights'], fileName)

        model.save(filePath)
Пример #2
0
    def test_ensemble(self):

        algorithm = 'densenet'
        weights_densenet = glob.glob(
            self.params.directories['cnn_checkpoint_weights'] +
            '/weights.hydra.head.' + algorithm + '*.hdf5')

        algorithm = 'resnet50'
        weights_resnet = glob.glob(
            self.params.directories['cnn_checkpoint_weights'] +
            '/weights.hydra.head.' + algorithm + '*.hdf5')

        print('List size:', len(weights_densenet) + len(weights_resnet))

        ListModel = [0] * (len(weights_densenet) + len(weights_resnet))

        i = 0
        for classifier in range(len(weights_densenet)):
            print("Weight list number: %s" % (weights_densenet[i]))
            cnnModel = get_cnn_model(self.params, 'densenet')
            if self.params.num_gpus > 1:
                cnnModel = make_parallel(cnnModel, self.params.num_gpus)
            cnnModel.load_weights(weights_densenet[i])
            cnnModel = cnnModel.layers[-2]
            ListModel[i] = cnnModel
            i = i + 1

        j = i
        i = 0
        for classifier in range(len(weights_resnet)):
            print("Weight list number: %s" % (weights_resnet[i]))
            cnnModel = get_cnn_model(self.params, 'resnet50')
            cnnModel = make_parallel(cnnModel, 4)
            if self.params.num_gpus > 1:
                cnnModel.load_weights(weights_resnet[i])
            cnnModel = cnnModel.layers[-2]
            print('Adding descriptor: ', i, ' in position: ', j)
            ListModel[j] = cnnModel
            j = j + 1
            i = i + 1

        timestr = time.strftime("%Y%m%d-%H%M%S")

        fileCNN1 = open(
            os.path.join(self.params.directories['predictions'],
                         'predictions-clas-cnn-%s.txt' % (timestr)), 'w')
        fileCNN2 = open(
            os.path.join(self.params.directories['predictions'],
                         'predictions-vect-cnn-%s.txt' % (timestr)), 'w')
        fileCNN3 = open(
            os.path.join(self.params.directories['predictions'],
                         'predictions-all-cnn-%s.txt' % (timestr)), 'w')

        currBatchSize = 1
        ind = 0
        hit = 0
        miss = 0
        total = 0
        for root, dirs, files in tqdm(
                os.walk(os.path.join(self.params.directories['test_data']))):
            if len(files) > 0:
                slashes = [i for i, ltr in enumerate(root) if ltr == '/']

            for file in files:
                if file.endswith('.jpg'):
                    baseName = file[:-4]
                    true_category = root[slashes[-1] + 1:]
                    filename = os.path.join(root, file)
                    img = image.load_img(filename)
                    img = image.img_to_array(img)
                    img.setflags(write=True)
                    imgdata = np.zeros(
                        (currBatchSize, self.params.target_img_size[0],
                         self.params.target_img_size[1],
                         self.params.num_channels))
                    imgdata[ind, :, :, :] = img
                    imgdata = imagenet_utils.preprocess_input(imgdata)
                    imgdata = imgdata / 255.0

                    predictionsCNN = []

                    fileCNN3.write("%s;%s;" % (baseName, true_category))
                    for i in range(
                            len(weights_densenet) + len(weights_resnet)):

                        cnnModel = ListModel[i]

                        predictionsPartial = cnnModel.predict(
                            imgdata, batch_size=currBatchSize)
                        predCNNPartial = np.argmax(predictionsPartial)
                        classification = self.params.category_names[
                            predCNNPartial]
                        fileCNN3.write('%s;' % (classification))

                        predictionsCNN.append(predictionsPartial[0])

                    fileCNN3.write("\n")

                    predFinal = np.sum(predictionsCNN, axis=0)
                    predCNN = np.argmax(predFinal)
                    oursCNNStr = self.params.category_names[predCNN]
                    fileCNN1.write('%s;%s;%s;\n' %
                                   (baseName, true_category, oursCNNStr))

                    fileCNN2.write("%s %s %s " %
                                   (baseName, true_category, oursCNNStr)),
                    for pred in predFinal:
                        fileCNN2.write("%5.12f " % (pred)),
                    fileCNN2.write("\n")

                    if (true_category == oursCNNStr):
                        hit += 1
                    elif (true_category != oursCNNStr):
                        miss += 1
                    total += 1
        print('hit: ', hit, ' miss: ', miss, ' total: ', total,
              ' percentage: ',
              float(hit) / float(total))

        fileCNN1.close()
        fileCNN2.close()
        fileCNN3.close()
Пример #3
0
    def test_models(self, algorithm, model_weights):

        cnnModel = get_cnn_model(self.params, algorithm)
        if self.params.num_gpus > 1:
            cnnModel = make_parallel(cnnModel, self.params.num_gpus)
        cnnModel.load_weights(model_weights)
        cnnModel = cnnModel.layers[-2]

        index = 0
        timestr = time.strftime("%Y%m%d-%H%M%S")

        fidCNN1 = open(
            os.path.join(
                self.params.directories['predictions'],
                'predictions-%s-clas-cnn-%s.txt' % (algorithm, timestr)), 'w')
        fidCNN2 = open(
            os.path.join(
                self.params.directories['predictions'],
                'predictions-%s-vect-cnn-%s.txt' % (algorithm, timestr)), 'w')

        currBatchSize = 1
        ind = 0
        hit = 0
        miss = 0
        total = 0
        for root, dirs, files in tqdm(
                os.walk(os.path.join(self.params.directories['test_data']))):
            if len(files) > 0:
                slashes = [i for i, ltr in enumerate(root) if ltr == '/']

            for file in files:
                if file.endswith('.jpg'):
                    baseName = file[:-4]
                    category = root[slashes[-1] + 1:]
                    filename = os.path.join(root, file)
                    img = image.load_img(filename)
                    img = image.img_to_array(img)
                    img.setflags(write=True)
                    imgdata = np.zeros(
                        (currBatchSize, self.params.target_img_size[0],
                         self.params.target_img_size[1],
                         self.params.num_channels))
                    imgdata[ind, :, :, :] = img
                    imgdata = imagenet_utils.preprocess_input(imgdata)
                    imgdata = imgdata / 255.0
                    predictionsCNN = cnnModel.predict(imgdata,
                                                      batch_size=currBatchSize)
                    predCNN = np.argmax(predictionsCNN)
                    oursCNNStr = self.params.category_names[predCNN]
                    print('%s;%s;%s;\n' % (baseName, category, oursCNNStr))
                    fidCNN1.write('%s;%s;%s;\n' %
                                  (baseName, category, oursCNNStr))

                    fidCNN2.write("%s %s %s " %
                                  (baseName, category, oursCNNStr)),
                    for pred in predictionsCNN[0]:
                        fidCNN2.write("%5.12f " % (pred)),
                    fidCNN2.write("\n")

                    if (category == oursCNNStr):
                        hit += 1
                    elif (category != oursCNNStr):
                        miss += 1
                    total += 1
        print('hit: ', hit, ' miss: ', miss, ' total: ', total,
              ' percentage: ',
              float(hit) / float(total))

        fidCNN1.close()
        fidCNN2.close()
Пример #4
0
    def hydra_head(self, nepochs, weight_name, algorithm, prefix,
                   augmentation):

        trainData = json.load(open(self.params.files['train_struct']))

        evalData = json.load(open(self.params.files['test_struct']))

        model = get_cnn_model(self.params, algorithm)

        if self.params.num_gpus > 1:
            model = make_parallel(model, self.params.num_gpus)

        print('Loading weights: ', weight_name)
        model.load_weights(weight_name, by_name=True)

        model.compile(optimizer=Adam(lr=self.params.cnn_adam_learning_rate),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        if (augmentation == 'no'):
            train_datagen = read_data(self.params, trainData)
        else:
            train_datagen = img_generator(self.params, trainData, augmentation)

        eval_datagen = read_data(self.params, evalData)

        def lr_scheduler(epoch):
            if self.params.lr_mode is 'progressive_drops':
                if epoch >= 0.75 * nepochs:
                    lr = 1e-6
                elif epoch >= 0.15 * nepochs:
                    lr = 1e-5
                else:
                    lr = 1e-4
            print('lr_scheduler (epoch: %d, lr: %f)' % (epoch, lr))
            return lr

        lr_decay = LearningRateScheduler(lr_scheduler)

        print(("Hydra head (%d epochs): ") % (nepochs))

        #fileName = 'weights.' + algorithm + '.' + self.params.prefix + '.{epoch:02d}.hdf5'
        #filePath = os.path.join(self.params.directories['cnn_checkpoint_weights'], fileName)
        #checkpoint = ModelCheckpoint(filepath=filePath, monitor='loss', verbose=0, save_best_only=False,save_weights_only=False, mode='auto', period=1)

        #checkpoint = ModelCheckpoint(monitor='loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto')
        #callbacks_list = [checkpoint,lr_decay]
        callbacks_list = [lr_decay]

        model.fit_generator(
            train_datagen,
            steps_per_epoch=(len(trainData) / self.params.batch_size_cnn + 1),
            epochs=nepochs,
            callbacks=callbacks_list,
            validation_data=eval_datagen,
            validation_steps=(len(evalData) / self.params.batch_size_cnn + 1))

        fileName = 'weights.hydra.head.' + algorithm + '.' + prefix + '.hdf5'

        filePath = os.path.join(
            self.params.directories['cnn_checkpoint_weights'], fileName)

        model.save(filePath)
Пример #5
0
    def test_models(self):

        if (self.params.database == 'v1'):
            metadataStats = json.load(open(
                self.params.files['dataset_stats1']))
        elif (self.params.database == 'v2'):
            metadataStats = json.load(open(
                self.params.files['dataset_stats2']))
        elif (self.params.database == 'v3'):
            metadataStats = json.load(open(
                self.params.files['dataset_stats3']))
        else:
            print('Error: define a dataset!')

        metadataMean = np.array(metadataStats['metadata_mean'])
        metadataMax = np.array(metadataStats['metadata_max'])

        cnnModel = get_cnn_model(self.params, self.params.algorithm)
        if self.params.num_gpus > 1:
            cnnModel = make_parallel(cnnModel, self.params.num_gpus)
        cnnModel.load_weights(self.params.model_weights)
        cnnModel = cnnModel.layers[-2]

        index = 0
        timestr = time.strftime("%Y%m%d-%H%M%S")

        fidCNN1 = open(
            os.path.join(
                self.params.directories['predictions'],
                'predictions-challenge-%s-%s-%s-clas-cnn-%s.txt' %
                (self.params.algorithm, self.params.database,
                 self.params.prefix, timestr)), 'w')
        fidCNN2 = open(
            os.path.join(
                self.params.directories['predictions'],
                'predictions-challenge-%s-%s-%s-vect-cnn-%s.txt' %
                (self.params.algorithm, self.params.database,
                 self.params.prefix, timestr)), 'w')

        def walkdir(folder):
            for root, dirs, files in os.walk(folder):
                if len(files) > 0:
                    yield (root, dirs, files)

        num_sequences = 0
        for _ in walkdir(self.params.directories['test_data']):
            num_sequences += 1

        for root, dirs, files in tqdm(walkdir(
                self.params.directories['test_data']),
                                      total=num_sequences):
            if len(files) > 0:
                imgPaths = []
                metadataPaths = []
                slashes = [i for i, ltr in enumerate(root) if ltr == '/']
                bbID = int(root[slashes[-1] + 1:])

            for file in files:
                if (self.params.database
                        == 'v1') and file.endswith('_rgba.jpg'):
                    imgPaths.append(os.path.join(root, file))
                    metadataPaths.append(
                        os.path.join(root, file[:-4] + '_features.json'))
                elif (self.params.database
                      == 'v2') and file.endswith('_rgbb.jpg'):
                    imgPaths.append(os.path.join(root, file))
                    metadataPaths.append(
                        os.path.join(root, file[:-4] + '_features.json'))
                elif (self.params.database
                      == 'v3') and (file.endswith('_rgba.jpg')
                                    or file.endswith('_msrgba.jpg')):
                    imgPaths.append(os.path.join(root, file))
                    metadataPaths.append(
                        os.path.join(root, file[:-4] + '_features.json'))

            if len(files) > 0:
                inds = []
                for metadataPath in metadataPaths:
                    underscores = [
                        ind for ind, ltr in enumerate(metadataPath)
                        if ltr == '_'
                    ]
                    inds.append(
                        int(metadataPath[underscores[-3] + 1:underscores[-2]]))
                inds = np.argsort(np.array(inds)).tolist()

                currBatchSize = len(inds)
                imgdata = np.zeros(
                    (currBatchSize, self.params.target_img_size[0],
                     self.params.target_img_size[1], self.params.num_channels))
                metadataFeatures = np.zeros(
                    (currBatchSize, self.params.metadata_length))

                for ind in inds:
                    img = image.load_img(imgPaths[ind])
                    img = image.img_to_array(img)
                    img.setflags(write=True)
                    imgdata[ind, :, :, :] = img

                    features = np.array(json.load(open(metadataPaths[ind])))
                    features = np.divide(features - metadataMean, metadataMax)
                    metadataFeatures[ind, :] = features

                imgdata = imagenet_utils.preprocess_input(imgdata)
                imgdata = imgdata / 255.0

                if self.params.use_metadata:
                    predictionsCNN = np.sum(cnnModel.predict(
                        [imgdata, metadataFeatures], batch_size=currBatchSize),
                                            axis=0)
                else:
                    predictionsCNN = np.sum(cnnModel.predict(
                        imgdata, batch_size=currBatchSize),
                                            axis=0)

            if len(files) > 0:
                if self.params.test_cnn:
                    predCNN = np.argmax(predictionsCNN)
                    oursCNNStr = self.params.category_names[predCNN]

                    fidCNN1.write('%d;%s;\n' % (bbID, oursCNNStr))

                    fidCNN2.write("%d " % (bbID)),
                    for pred in predictionsCNN:
                        fidCNN2.write("%5.12f " % (pred)),
                    fidCNN2.write("\n")

                index += 1

        fidCNN1.close()
        fidCNN2.close()
Пример #6
0
    def train_cnn(self):
        """
        Train CNN with or without metadata depending on setting of 'use_metadata' in params.py.
        :param: 
        :return: 
        """

        if (self.params.database == 'v1'):
            print('self.params.database = v1')
            trainData = json.load(open(self.params.files['training_struct1']))
            metadataStats = json.load(open(
                self.params.files['dataset_stats1']))
        elif (self.params.database == 'v2'):
            print('self.params.database = v2')
            trainData = json.load(open(self.params.files['training_struct2']))
            metadataStats = json.load(open(
                self.params.files['dataset_stats2']))
        elif (self.params.database == 'v3'):
            print('self.params.database = v3')
            trainData = json.load(open(self.params.files['training_struct3']))
            metadataStats = json.load(open(
                self.params.files['dataset_stats3']))
        else:
            print('Error: define a dataset!')

        model = get_cnn_model(self.params, self.params.algorithm)
        if self.params.num_gpus > 1:
            model = make_parallel(model, self.params.num_gpus)
        if (self.params.model_weights != ''):
            model.load_weights(self.params.model_weights, by_name=True)
        model.compile(optimizer=Adam(lr=self.params.cnn_adam_learning_rate),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        if (self.params.class_weights == 'no_weights'):
            print('self.params.class_weights = no_weights')
            classWeights = {
                0: 1.0,
                1: 1.0,
                2: 1.0,
                3: 1.0,
                4: 1.0,
                5: 1.0,
                6: 1.0,
                7: 1.0,
                8: 1.0,
                9: 1.0,
                10: 1.0,
                11: 1.0,
                12: 1.0,
                13: 1.0,
                14: 1.0,
                15: 1.0,
                16: 1.0,
                17: 1.0,
                18: 1.0,
                19: 1.0,
                20: 1.0,
                21: 1.0,
                22: 1.0,
                23: 1.0,
                24: 1.0,
                25: 1.0,
                26: 1.0,
                27: 1.0,
                28: 1.0,
                29: 1.0,
                30: 1.0,
                31: 1.0,
                32: 1.0,
                33: 1.0,
                34: 1.0,
                35: 1.0,
                36: 1.0,
                37: 1.0,
                38: 1.0,
                39: 1.0,
                40: 1.0,
                41: 1.0,
                42: 1.0,
                43: 1.0,
                44: 1.0,
                45: 1.0,
                46: 1.0,
                47: 1.0,
                48: 1.0,
                49: 1.0,
                50: 1.0,
                51: 1.0,
                52: 1.0,
                53: 1.0,
                54: 1.0,
                55: 1.0,
                56: 1.0,
                57: 1.0,
                58: 1.0,
                59: 1.0,
                60: 1.0,
                61: 1.0,
                62: 1.0
            }
        elif (self.params.class_weights == 'class_weights'):
            print('self.params.class_weights = class_weights')
            classWeights = {
                0: 1.0,
                1: 0.6,
                2: 1.0,
                3: 1.0,
                4: 1.0,
                5: 1.0,
                6: 1.0,
                7: 1.0,
                8: 1.4,
                9: 1.0,
                10: 1.0,
                11: 1.4,
                12: 0.6,
                13: 1.0,
                14: 0.6,
                15: 1.4,
                16: 1.0,
                17: 1.4,
                18: 1.4,
                19: 0.6,
                20: 1.0,
                21: 1.4,
                22: 1.0,
                23: 1.0,
                24: 1.0,
                25: 1.0,
                26: 1.0,
                27: 1.0,
                28: 1.0,
                29: 0.6,
                30: 1.0,
                31: 0.6,
                32: 1.0,
                33: 1.0,
                34: 1.0,
                35: 1.0,
                36: 1.0,
                37: 1.4,
                38: 1.0,
                39: 1.0,
                40: 1.0,
                41: 1.0,
                42: 1.0,
                43: 1.0,
                44: 1.4,
                45: 1.0,
                46: 1.0,
                47: 1.0,
                48: 0.6,
                49: 1.4,
                50: 0.6,
                51: 1.0,
                52: 1.0,
                53: 1.0,
                54: 1.0,
                55: 1.0,
                56: 1.0,
                57: 1.4,
                58: 0.6,
                59: 1.0,
                60: 1.0,
                61: 0.6,
                62: 1.0
            }
        elif (self.params.class_weights == 'class_pond'):
            print('self.params.class_weights = class_pond')
            classWeights = {
                0: 0.65,
                1: 0.95,
                2: 0.83,
                3: 0.84,
                4: 0.80,
                5: 0.94,
                6: 0.91,
                7: 0.78,
                8: 0.95,
                9: 0.85,
                10: 0.82,
                11: 0.88,
                12: 0.30,
                13: 0.85,
                14: 0.98,
                15: 0.58,
                16: 0.81,
                17: 0.87,
                18: 0.82,
                19: 0.98,
                20: 0.80,
                21: 0.81,
                22: 0.87,
                23: 0.62,
                24: 0.85,
                25: 0.81,
                26: 0.87,
                27: 0.97,
                28: 0.87,
                29: 0.56,
                30: 0.73,
                31: 0.99,
                32: 0.80,
                33: 0.84,
                34: 0.83,
                35: 0.53,
                36: 0.40,
                37: 0.82,
                38: 0.94,
                39: 0.85,
                40: 0.81,
                41: 0.84,
                42: 0.20,
                43: 0.95,
                44: 0.82,
                45: 0.91,
                46: 0.98,
                47: 0.80,
                48: 0.63,
                49: 0.82,
                50: 0.82,
                51: 0.99,
                52: 0.80,
                53: 0.81,
                54: 0.82,
                55: 0.71,
                56: 0.81,
                57: 0.74,
                58: 0.78,
                59: 0.85,
                60: 0.81,
                61: 0.82,
                62: 0.94
            }
        elif (self.params.class_weights == 'sklearn_class_weight'):
            print('self.params.class_weights = sklearn_class_weight')
            train_qtd = [
                10381, 1660, 5675, 5179, 6715, 1887, 3044, 7079, 1753, 4847,
                5999, 3828, 28445, 4946, 2433, 13875, 6486, 4210, 6067, 2399,
                6616, 6417, 4447, 12504, 5090, 6144, 4383, 998, 4267, 14729,
                8935, 255, 6676, 5165, 5727, 15404, 22198, 6063, 2140, 5064,
                6402, 5452, 33114, 1559, 6120, 2848, 758, 6794, 12386, 5738,
                5791, 258, 6778, 6338, 5877, 9703, 6228, 8487, 7270, 4917,
                6263, 5921, 1862
            ]

            Y = []
            for i in range(len(train_qtd)):
                for j in range(train_qtd[i]):
                    Y.append(i)
            print(len(Y))

            classWeights = class_weight.compute_class_weight(
                'balanced', np.unique(Y), Y)

            print(classWeights)

        train_datagen = img_metadata_generator(self.params, trainData,
                                               metadataStats)

        def lr_scheduler1(epoch):
            if self.params.lr_mode is 'progressive_drops':
                if epoch >= 0.75 * self.params.cnn_epochs:
                    lr = 1e-6
                elif epoch >= 0.15 * self.params.cnn_epochs:
                    lr = 1e-5
                else:
                    lr = 1e-4
            print('lr_scheduler1 - epoch: %d, lr: %f' % (epoch, lr))
            return lr

        def lr_scheduler2(epoch):
            if self.params.lr_mode is 'progressive_drops':
                if epoch > 0.75 * self.params.cnn_epochs:
                    lr = 1e-6
                elif epoch > 0.45 * self.params.cnn_epochs:
                    lr = 1e-5
                else:
                    lr = 1e-4
            print('lr_scheduler2 - epoch: %d, lr: %f' % (epoch, lr))
            return lr

        if (self.params.fine_tunning):
            lr_decay = LearningRateScheduler(lr_scheduler1)
        else:
            lr_decay = LearningRateScheduler(lr_scheduler2)

        print("training")
        fileName = 'weights.' + self.params.database + '.' + self.params.algorithm + '.' + self.params.prefix + '.{epoch:02d}.hdf5'
        filePath = os.path.join(
            self.params.directories['cnn_checkpoint_weights'], fileName)
        checkpoint = ModelCheckpoint(filepath=filePath,
                                     monitor='loss',
                                     verbose=0,
                                     save_best_only=False,
                                     save_weights_only=False,
                                     mode='auto',
                                     period=1)

        callbacks_list = [checkpoint, lr_decay]

        model.fit_generator(
            train_datagen,
            steps_per_epoch=(len(trainData) / self.params.batch_size_cnn + 1),
            epochs=self.params.cnn_epochs,
            class_weight=classWeights,
            callbacks=callbacks_list)

        fileNameEnd = 'weights.final.' + self.params.database + '.' + self.params.algorithm + '.' + self.params.prefix + '.hdf5'
        filePathEnd = os.path.join(
            self.params.directories['cnn_checkpoint_weights'], fileNameEnd)
        model.save(filePathEnd)