Exemplo n.º 1
0
def show_worst(model_file, n, attribute, mapping, dk2ak=None):
    # Dataset for analysis
    val_partition, val_labels = bu.get_ids_labels(val_labels_csv, class_map_file)
    pr_file = '.'.join(model_file.split('.')[:-1]) + '_predictions.csv'
    predictions, y_scores, img_ids = dt.get_scores_from_file(csv_path + pr_file, val_labels)
    args_sorted_scores = np.argsort(y_scores)

    shift = 10
    worst_ids = []
    i = 0
    while len(worst_ids) < n + shift:
        if dk2ak:
            key = dk2ak(img_ids[args_sorted_scores[i]])
        else:
            key = img_ids[args_sorted_scores[i]]
        if mapping[key] == attribute:
            worst_ids.append(img_ids[args_sorted_scores[i]])
        i += 1

    data = []
    tag_list = []
    for id in worst_ids:
        print(id)
        data.append(cv2.imread(id))
        tag_list.append(str(np.argmax(predictions[id])) + ' (' + str(val_labels[id]) + ')')

    print(np.array(data).shape)
    plotting.show_imgs(xrange(shift, n + shift), 'Worst of ' + attribute, data, tag_list)

    return worst_ids
Exemplo n.º 2
0
def subset_selection_test():
    labels_path = '../../bdd100k/classification/labels/'
    bdd100k_labels_path = "../../bdd100k/labels/"
    val_labels_csv = '../../bdd100k/classification/labels/val_ground_truth.csv'
    class_map_file = labels_path + 'class_mapping.csv'
    val_json = '../../bdd100k/labels/bdd100k_labels_images_val.json'
    attr_val_file = bdd100k_labels_path + 'bdd100k_labels_images_val_attributes.csv'
    attr_tr_file = bdd100k_labels_path + 'bdd100k_labels_images_train_attributes.csv'
    train_labels = '../../bdd100k/classification/labels/train_ground_truth.csv'

    class_map_file = bu.class_mapping(input_json=val_json,
                                      output_csv=class_map_file)

    # Dataset for analysis
    tr_partition, tr_labels = bu.get_ids_labels(train_labels, class_map_file)

    w, s, tod, wst_dk2ak = bu.wst_attribute_mapping(attr_tr_file)

    d_tod = analyse.DiscreteAttribute(tod)
    d_s = analyse.DiscreteAttribute(s)

    scene_tod_distrib = np.zeros(
        (len(d_tod.get_labels()), len(d_s.get_labels())))
    print(scene_tod_distrib)
    for data_key in tr_partition:
        attr_key = wst_dk2ak(data_key)
        x = d_tod.index_of(d_tod.labelof(attr_key))
        y = d_s.index_of(d_s.labelof(attr_key))
        scene_tod_distrib[x][y] += 1

    print("        " + " / ".join(d_s.get_labels()))
    for k in xrange(len(scene_tod_distrib)):
        print(k)
        print(d_tod.get_labels()[k] + "\t" +
              " ".join([str(val) for val in scene_tod_distrib[k]]))
Exemplo n.º 3
0
def bdd100k_analysis(model_file, do_plot_boxes=False):
    class_map_file = bu.class_mapping(input_json=val_json, output_csv=labels_path + 'class_mapping.csv')

    # Dataset for analysis
    val_partition, val_labels = bu.get_ids_labels(val_labels_csv, class_map_file)

    # Attribute mapping and data_key to attr_key function (dk2ak)
    weather, scene, timeofday, wst_dk2ak = bu.wst_attribute_mapping(attr_val_file)
    box_size, box_size_dk2ak = bu.box_size_attribute_mapping(box_val_file, box_val_json)

    # show_worst(model_file, 10, 'daytime', timeofday, dk2ak=wst_dk2ak)

    # for attr in attributes.values():
    #     print(attr['d_attribute'].get_labels())
    #     print(attr['d_attribute'].get_distribution())

    attributes = {'weather': {'name': 'weather',
                              'map': weather,
                              'dk2ak': wst_dk2ak,
                              'd_attribute': None,
                              'metrics': ['score', 'acc'],
                              'weaks': [[], []]},
                  'scene': {'name': 'scene',
                            'map': scene,
                            'dk2ak': wst_dk2ak,
                            'd_attribute': None,
                            'metrics': ['score', 'acc'],
                            'weaks': [[], []]},
                  'timeofday': {'name': 'timeofday',
                                'map': timeofday,
                                'dk2ak': wst_dk2ak,
                                'd_attribute': None,
                                'metrics': ['score', 'acc'],
                                'weaks': [[], []]},
                  'box_size': {'name': 'box_size',
                               'map': box_size,
                               'dk2ak': box_size_dk2ak,
                               'd_attribute': None,
                               'metrics': ['score', 'acc'],
                               'weaks': [[], []]},
                  }

    bdd100k_model_analysis(model_file, attributes, val_labels)

    if do_plot_boxes:
        plotting.plot_discrete_attribute_scores(attributes, 'score', model_file)
        # plotting.plot_discrete_attribute_scores(attributes, 'acc', model_file)

    return attributes
Exemplo n.º 4
0
def bdd100k_compare(model_file, ref_file, attribute, metric):
    class_map_file = bu.class_mapping(input_json=val_json, output_csv=labels_path + 'class_mapping.csv')

    # Dataset for analysis
    val_partition, val_labels = bu.get_ids_labels(val_labels_csv, class_map_file)

    # Attribute mapping and data_key to attr_key function (dk2ak)
    weather, scene, timeofday, wst_dk2ak = bu.wst_attribute_mapping(attr_val_file)
    box_size, box_size_dk2ak = bu.box_size_attribute_mapping(box_val_file, box_val_json)

    attributes = {'weather': {'name': 'weather',
                              'map': weather,
                              'dk2ak': wst_dk2ak,
                              'd_attribute': None,
                              'metrics': ['score', 'acc'],
                              'weaks': [[], []]},
                  'scene': {'name': 'scene',
                            'map': scene,
                            'dk2ak': wst_dk2ak,
                            'd_attribute': None,
                            'metrics': ['score', 'acc'],
                            'weaks': [[], []]},
                  'timeofday': {'name': 'timeofday',
                                'map': timeofday,
                                'dk2ak': wst_dk2ak,
                                'd_attribute': None,
                                'metrics': ['score', 'acc'],
                                'weaks': [[], []]},
                  'box_size': {'name': 'box_size',
                               'map': box_size,
                               'dk2ak': box_size_dk2ak,
                               'd_attribute': None,
                               'metrics': ['score', 'acc'],
                               'weaks': [[], []]},
                  }

    bdd100k_model_analysis(model_file, attributes, val_labels)
    labels = attributes[attribute]['d_attribute'].get_labels()
    distrib = [int(v) for v in attributes[attribute]['d_attribute'].get_distribution()]
    series = [attributes[attribute]['d_attribute'].get_metric_value_list(metric, label) for label in labels]
    series_names = ["%s (%1.2f)" % (labels[k], distrib[k] / sum(distrib)) for k in xrange(len(labels))]
    attributes[attribute]['d_attribute'].flush()

    bdd100k_model_analysis(ref_file, attributes, val_labels)
    series_ref = [attributes[attribute]['d_attribute'].get_metric_value_list(metric, label) for label in labels]

    plotting.n_box_plot_compare(series, series_ref, series_names, metric)
Exemplo n.º 5
0
def bdd100k_sel_partition_test():
    labels_path = '../../bdd100k/classification/labels/'
    train_labels = '../../bdd100k/classification/labels/train_ground_truth.csv'
    val_json = '../../bdd100k/labels/bdd100k_labels_images_val.json'

    class_map_file = bu.class_mapping(input_json=val_json,
                                      output_csv=labels_path +
                                      'class_mapping.csv')

    # Datasets
    tr_partition, tr_labels = bu.get_ids_labels(train_labels, class_map_file)
    ft_partition = tr_partition[500000:1000000]
    sel_partition = analyse.select_ft_data(
        'densenet121_bdd100k_cl0-500k_20ep_woda_ep20_vl0.22.hdf5',
        ft_partition)

    print('selection res=', len(sel_partition))
Exemplo n.º 6
0
def bdd100k_cc_analysis():
    model_files = ['densenet121_bdd100k_cl0-500k_20ep_woda_ep20_vl0.22.hdf5',
                   ]
                   # 'resnet50_bdd100k_cl0-500k_20ep_woda_ep13_vl0.27.hdf5',
                   # 'mobilenet_bdd100k_cl0-500k_20ep_woda_ep15_vl0.24.hdf5',
                   # 'mobilenetv2_bdd100k_cl0-500k_20ep_woda_ep17_vl0.22.hdf5',
                   # 'nasnet_bdd100k_cl0-500k_20ep_woda_ep17_vl0.24.hdf5']

    class_map_file = bu.class_mapping(input_json=val_json, output_csv=labels_path + 'class_mapping.csv')

    # Dataset for analysis
    val_partition, val_labels = bu.get_ids_labels(val_labels_csv, class_map_file)

    for m in model_files:

        # test_subset creation
        pr_file = '.'.join(m.split('.')[:-1]) + '_predictions.csv'
        predictions, y_scores, img_ids = dt.get_scores_from_file(csv_path + pr_file, val_labels)
        top_n_args, bot_n_args = dt.get_topbot_n_args(20000, y_scores)

        cc_high = metrics_color.ColorDensityCube(resolution=4)
        for arg in top_n_args:
            cc_high.feed(cv2.imread(img_ids[arg]))
        print('high sum', np.sum(cc_high.get_cube().flatten()))
        cc_high.normalize()
        cc_high.plot_cube()

        cc_low = metrics_color.ColorDensityCube(resolution=4)
        for arg in bot_n_args:
            cc_low.feed(cv2.imread(img_ids[arg]))
        print('low sum', np.sum(cc_low.get_cube().flatten()))
        cc_low.normalize()
        cc_low.plot_cube()

        cc_diff = cc_high.substract(cc_low, 'value')
        print('diff mean', np.sum(cc_diff.get_cube().flatten()))
        print('diff mean', np.mean(cc_diff.get_cube().flatten()))
        cc_diff.normalize()
        # cc_diff.plot_cube()

        # cc_diff.normalize()
        cc_diff.plot_cube(title='Color cube analysis difference (' + str(20000) + ' images/series)', normalize=True,
                          save=True)
Exemplo n.º 7
0
def bdd100k_local_finetune_test(model_files):
    labels_path = '../../bdd100k/classification/labels/'
    train_labels_csv = '../../bdd100k/classification/labels/train_ground_truth.csv'
    val_labels_csv = '../../bdd100k/classification/labels/val_ground_truth.csv'
    val_json = '../../bdd100k/labels/bdd100k_labels_images_val.json'

    # Parameters
    params = {
        'dim': (64, 64, 3),
        'batch_size': 32,
        'n_classes': 10,
        'shuffle': True
    }
    epochs = 30
    class_map_file = bu.class_mapping(input_json=val_json,
                                      output_csv=labels_path +
                                      'class_mapping.csv')

    # Datasets
    tr_partition, tr_labels = bu.get_ids_labels(train_labels_csv,
                                                class_map_file)
    val_partition, val_labels = bu.get_ids_labels(val_labels_csv,
                                                  class_map_file)

    for model_file in model_files:
        ft_partition = tr_partition[500000:1000000]

        if 'densenet121' in model_file:
            ref_generator = mt.DataGenerator(tr_partition[:300000], tr_labels,
                                             **params)
            val_generator = mt.DataGenerator(val_partition[:100000],
                                             val_labels, **params)

            mt.ft(h5_path + model_file,
                  ref_generator,
                  val_generator,
                  epochs,
                  save_history=True,
                  tag='ref2')
            mt.ft(h5_path + model_file,
                  ref_generator,
                  val_generator,
                  epochs,
                  save_history=True,
                  tag='ref3')

        else:

            ref_generator = mt.DataGenerator(tr_partition[:300000], tr_labels,
                                             **params)
            val_generator = mt.DataGenerator(val_partition[:100000],
                                             val_labels, **params)

            mt.ft(h5_path + model_file,
                  ref_generator,
                  val_generator,
                  epochs,
                  save_history=True,
                  tag='ref2')
            mt.ft(h5_path + model_file,
                  ref_generator,
                  val_generator,
                  epochs,
                  save_history=True,
                  tag='ref3')

            epochs = 15
            # daytime timeofday finetuning
            # Selected data partition
            day_sel_partition = analyse.select_ft_data(model_file,
                                                       ft_partition,
                                                       'timeofday',
                                                       'daytime',
                                                       do_plot_boxes=False)
            # Generators
            sp = 4 * len(day_sel_partition) // 5  # split point
            day_ft_generator = mt.DataGenerator(day_sel_partition[:sp],
                                                tr_labels, **params)
            day_val_generator = mt.DataGenerator(day_sel_partition[sp:],
                                                 tr_labels, **params)

            mt.ft(h5_path + model_file,
                  day_ft_generator,
                  day_val_generator,
                  epochs,
                  save_history=True,
                  tag='daytime2')
            mt.ft(h5_path + model_file,
                  day_ft_generator,
                  day_val_generator,
                  epochs,
                  save_history=True,
                  tag='daytime3')

            # Night timeofday finetuning
            night_sel_partition = analyse.select_ft_data(
                model_file, ft_partition, 'timeofday', 'night')
            sp = 4 * len(night_sel_partition) // 5  # split point
            night_ft_generator = mt.DataGenerator(night_sel_partition[:sp],
                                                  tr_labels, **params)
            night_val_generator = mt.DataGenerator(night_sel_partition[sp:],
                                                   tr_labels, **params)

            mt.ft(h5_path + model_file,
                  night_ft_generator,
                  night_val_generator,
                  epochs,
                  save_history=True,
                  tag='night2')
            mt.ft(h5_path + model_file,
                  night_ft_generator,
                  night_val_generator,
                  epochs,
                  save_history=True,
                  tag='night3')

            # Highway scene finetuning
            highway_sel_partition = analyse.select_ft_data(
                model_file, ft_partition, 'scene', 'highway')
            sp = 4 * len(highway_sel_partition) // 5  # split point
            highway_ft_generator = mt.DataGenerator(highway_sel_partition[:sp],
                                                    tr_labels, **params)
            highway_val_generator = mt.DataGenerator(
                highway_sel_partition[sp:], tr_labels, **params)

            mt.ft(h5_path + model_file,
                  highway_ft_generator,
                  highway_val_generator,
                  epochs,
                  save_history=True,
                  tag='highway2')
            mt.ft(h5_path + model_file,
                  highway_ft_generator,
                  highway_val_generator,
                  epochs,
                  save_history=True,
                  tag='highway3')

            # City street scene finetuning
            city_street_sel_partition = analyse.select_ft_data(
                model_file, ft_partition, 'scene', 'city street')
            sp = 4 * len(city_street_sel_partition) // 5  # split point
            city_street_ft_generator = mt.DataGenerator(
                city_street_sel_partition[:sp], tr_labels, **params)
            city_street_val_generator = mt.DataGenerator(
                city_street_sel_partition[sp:], tr_labels, **params)

            mt.ft(h5_path + model_file,
                  city_street_ft_generator,
                  city_street_val_generator,
                  epochs,
                  save_history=True,
                  tag='city_street2')
            mt.ft(h5_path + model_file,
                  city_street_ft_generator,
                  city_street_val_generator,
                  epochs,
                  save_history=True,
                  tag='city_street3')
Exemplo n.º 8
0
def bdd100k_global_finetune_test(model_files):
    labels_path = '../../bdd100k/classification/labels/'
    train_labels = '../../bdd100k/classification/labels/train_ground_truth.csv'
    val_labels_csv = '../../bdd100k/classification/labels/val_ground_truth.csv'
    # class_map_file = labels_path + 'class_mapping.csv'
    val_json = '../../bdd100k/labels/bdd100k_labels_images_val.json'

    # Parameters
    params = {
        'dim': (64, 64, 3),
        'batch_size': 32,
        'n_classes': 10,
        'shuffle': False
    }

    n_test_data = 100000
    epochs = 30

    class_map_file = bu.class_mapping(input_json=val_json,
                                      output_csv=labels_path +
                                      'class_mapping.csv')

    # Datasets
    tr_partition, tr_labels = bu.get_ids_labels(train_labels, class_map_file)
    val_partition, val_labels = bu.get_ids_labels(val_labels_csv,
                                                  class_map_file)

    for model_file in model_files:
        ft_partition = tr_partition[500000:1000000]
        n_sel_data = 300000
        sel_partition = analyse.select_ft_data(
            model_file, ft_partition)  # Selected data partition

        # Generators
        finetune_generator = mt.DataGenerator(sel_partition[:n_sel_data],
                                              tr_labels, **params)
        reference_generator = mt.DataGenerator(
            tr_partition[500000:500000 + len(ft_partition)], tr_labels,
            **params)
        validation_generator = mt.DataGenerator(val_partition[:n_test_data],
                                                val_labels, **params)

        # finetune
        model = load_model(h5_path + model_file)
        model.compile('adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        checkpoint = ModelCheckpoint(model_file.rstrip('.hdf5') +
                                     '_ftep{epoch:02d}_vl{val_loss:.2f}.hdf5',
                                     monitor='val_acc',
                                     verbose=0,
                                     save_best_only=True,
                                     save_weights_only=False,
                                     mode='auto')

        # Train model on selected dataset
        ft_history = model.fit_generator(generator=finetune_generator,
                                         validation_data=validation_generator,
                                         verbose=1,
                                         epochs=epochs,
                                         use_multiprocessing=True,
                                         workers=6,
                                         callbacks=[checkpoint])

        with open(model_file.rstrip('.hdf5') + '_ft_hist.pkl', 'w') as fd:
            pickle.dump(ft_history, fd)

        # reference
        model = load_model(h5_path + model_file)
        model.compile('adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        checkpoint = ModelCheckpoint(model_file.rstrip('.h5') +
                                     '_refep{epoch:02d}_vl{val_loss:.2f}.hdf5',
                                     monitor='val_acc',
                                     verbose=0,
                                     save_best_only=True,
                                     save_weights_only=False,
                                     mode='auto')

        # Train model on ref dataset
        ref_history = model.fit_generator(generator=reference_generator,
                                          validation_data=validation_generator,
                                          verbose=1,
                                          epochs=epochs,
                                          use_multiprocessing=True,
                                          workers=6,
                                          callbacks=[checkpoint])

        with open(model_file.rstrip('.hdf5') + '_ref_hist.pkl', 'w') as fd:
            pickle.dump(ref_history, fd)
Exemplo n.º 9
0
def load_model_test(model_files, overwrite=False):
    for model_file in model_files:
        m = load_model(h5_path + model_file)
        m.summary()
    return
    labels_path = '../../bdd100k/classification/labels/'
    val_labels_csv = '../../bdd100k/classification/labels/val_ground_truth.csv'
    val_json = '../../bdd100k/labels/bdd100k_labels_images_val.json'

    # Parameters
    params = {
        'dim': (64, 64, 3),
        'batch_size': 32,
        'n_classes': 10,
        'shuffle': False
    }

    n_test_data = 100000

    class_map_file = bu.class_mapping(input_json=val_json,
                                      output_csv=labels_path +
                                      'class_mapping.csv')

    # Datasets
    val_partition, val_labels = bu.get_ids_labels(val_labels_csv,
                                                  class_map_file)

    # Generators
    validation_generator = mt.DataGenerator(val_partition[:n_test_data],
                                            val_labels, **params)

    label_distrib = [
        val_labels.values()[:n_test_data].count(k) / n_test_data
        for k in xrange(params['n_classes'])
    ]
    print(label_distrib)

    for model_file in model_files:
        predictions_file = '.'.join(
            model_file.split('.')[:-1]) + '_predictions.csv'

        if os.path.isfile(predictions_file) and not overwrite:
            print('File ' + predictions_file + ' already exists. Not written.')
            return

        start_time = datetime.now()
        m = load_model(h5_path + model_file)
        print('File successfully loaded', model_file, 'in',
              str(datetime.now() - start_time))

        # print("Validation ")
        # start_time = datetime.now()
        # print(m.metrics_names)
        # print(m.evaluate_generator(validation_generator))
        # print('Model successfully evaluated', model_file, 'in (s)', str(datetime.now() - start_time))

        print('Writing predictions')
        out_pr = open(csv_path + predictions_file, 'w')

        start_time = datetime.now()
        y_predicted = m.predict_generator(validation_generator)

        # prediction
        for i in xrange(len(y_predicted)):
            out_pr.write(val_partition[i] + ',' +
                         str(y_predicted[i].tolist()) + '\n')
        out_pr.close()

        predicted_classes = np.argmax(y_predicted, axis=1)

        print('Predictions successfully written', model_file, 'in',
              str(datetime.now() - start_time))
        true_classes = [
            val_labels[id] for id in val_partition[:len(y_predicted)]
        ]
        acc = metrics.accuracy(predicted_classes, true_classes)
        print('acc=', acc)
        print(sk_metrics.confusion_matrix(true_classes, predicted_classes))
Exemplo n.º 10
0
def train_bdd100k_cl():
    labels_path = '../../bdd100k/classification/labels/'
    train_labels = '../../bdd100k/classification/labels/train_ground_truth.csv'
    val_labels = '../../bdd100k/classification/labels/val_ground_truth.csv'
    # class_map_file = labels_path + 'class_mapping.csv'
    val_json = '../../bdd100k/labels/bdd100k_labels_images_val.json'

    epochs = 20

    # Parameters
    params = {
        'dim': (64, 64, 3),
        'batch_size': 32,
        'n_classes': 10,
        'shuffle': True
    }

    class_map_file = bu.class_mapping(input_json=val_json,
                                      output_csv=labels_path +
                                      'class_mapping.csv')

    # Datasets
    val_partition, val_labels = bu.get_ids_labels(val_labels, class_map_file)
    tr_partition, tr_labels = bu.get_ids_labels(train_labels, class_map_file)

    # Generators
    training_generator = mt.DataGenerator(tr_partition[:500000], tr_labels,
                                          **params)
    validation_generator = mt.DataGenerator(val_partition[:100000], val_labels,
                                            **params)
    print(len(training_generator))

    for m in models:

        weight_file = mt.weight_file_name(m,
                                          'bdd100k_cl0-500k',
                                          epochs,
                                          data_augmentation=False)
        weight_file = h5_path + weight_file
        print("Building: " + weight_file)
        if m in ('mobilenet', 'mobilenetv2', 'nasnet'):
            ###
            model = mt.model_struct(m, (224, 224, 3),
                                    params['n_classes'],
                                    weights='imagenet',
                                    include_top=False)
            new_model = mt.model_struct(m,
                                        params['dim'],
                                        params['n_classes'],
                                        weights=None,
                                        include_top=False)
            print("Loading weights...")

            for new_layer, layer in zip(new_model.layers[1:],
                                        model.layers[1:]):
                new_layer.set_weights(layer.get_weights())
            base_model = new_model
            ###
        else:
            base_model = mt.model_struct(m,
                                         params['dim'],
                                         params['n_classes'],
                                         weights='imagenet',
                                         include_top=False)

        print("Configuring top layers")
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(1024, activation='relu')(x)
        predictions = Dense(10, activation='softmax')(x)
        model = Model(inputs=base_model.input, outputs=predictions)
        model.summary()
        # for layer in base_model.layers:
        #     layer.trainable = False

        model.compile('adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        checkpoint = ModelCheckpoint(weight_file.rstrip('.h5') +
                                     '_ep{epoch:02d}_vl{val_loss:.2f}.hdf5',
                                     monitor='val_acc',
                                     verbose=0,
                                     save_best_only=True,
                                     save_weights_only=False,
                                     mode='auto')

        # Train model on dataset
        model.fit_generator(generator=training_generator,
                            validation_data=validation_generator,
                            verbose=1,
                            epochs=epochs,
                            use_multiprocessing=True,
                            workers=6,
                            callbacks=[checkpoint])