def script_setUpFilesForTrainingOnSubset(params):
    parent_synset_words_file = params.parent_synset_words_file
    parent_val_file = params.parent_val_file
    parent_train_file = params.parent_train_file
    parent_synset_file = params.parent_synset_file
    new_val_file = params.new_val_file
    new_train_file = params.new_train_file
    new_synset_file = params.new_synset_file
    new_synset_words_file = params.new_synset_words_file
    to_exclude_text_file = params.to_exclude_text_file

    val_ids = imagenet.readLabelsFile(parent_synset_words_file)
    val_just_ids = list(zip(*val_ids)[0])
    val_just_labels = list(zip(*val_ids)[1])

    with open(to_exclude_text_file, "rb") as f:
        to_exclude = f.readlines()
        to_exclude = [to_exclude_curr.strip("\n") for to_exclude_curr in to_exclude]

    ims_to_keep, class_ids_to_keep, classes_to_keep = imagenet.removeImagesFromListByClass(
        parent_val_file, parent_synset_file, to_exclude
    )
    classes_uni_val = imagenet.writeNewDataClassFile(new_val_file, zip(ims_to_keep, classes_to_keep))

    ims_to_keep, class_ids_to_keep, classes_to_keep = imagenet.removeImagesFromListByClass(
        parent_train_file, parent_synset_file, to_exclude
    )
    classes_uni_train = imagenet.writeNewDataClassFile(new_train_file, zip(ims_to_keep, classes_to_keep))

    assert str(classes_uni_val) == str(classes_uni_train)

    with open(new_synset_file, "wb") as f:
        for class_id in classes_uni_train:
            f.write(class_id + "\n")

    with open(new_synset_words_file, "wb") as f:
        for class_id in classes_uni_train:
            f.write(class_id + " " + val_just_labels[val_just_ids.index(class_id)] + "\n")

    with open(new_synset_file, "rb") as f:
        content = f.read()

    # sanity check
    for id_to_exclude in to_exclude:
        if id_to_exclude in content:
            print "FOUND ERROR", id_to_exclude
示例#2
0
def script_setUpPascalExcludedTextFiles():
    path_to_file = '../../data/ilsvrc12/synset_words.txt'
    val_ids = imagenet.readLabelsFile(path_to_file)
    val_just_ids = list(zip(*val_ids)[0])
    val_just_labels = list(zip(*val_ids)[1])

    pascal_ids_file = '/disk2/octoberExperiments/nn_performance_without_pascal/pascal_classes.txt'
    pascal_ids = imagenet.readLabelsFile(pascal_ids_file)
    pascal_just_ids = list(zip(*pascal_ids)[0])

    to_exclude = imagenet.removeClassesWithOverlap(val_just_ids,
                                                   pascal_just_ids)

    im_list_file = '../../data/ilsvrc12/val.txt'
    mapping_file = '../../data/ilsvrc12/synsets.txt'

    ims_to_keep, class_ids_to_keep, classes_to_keep = imagenet.removeImagesFromListByClass(
        im_list_file, mapping_file, to_exclude)

    new_file_val = '/disk2/octoberExperiments/nn_performance_without_pascal/val.txt'
    classes_uni_val = writeNewDataClassFile(new_file_val,
                                            zip(ims_to_keep, classes_to_keep))

    im_list_file = '../../data/ilsvrc12/train.txt'
    ims_to_keep, class_ids_to_keep, classes_to_keep = imagenet.removeImagesFromListByClass(
        im_list_file, mapping_file, to_exclude)

    new_file_val = '/disk2/octoberExperiments/nn_performance_without_pascal/train.txt'
    classes_uni_train = imagenet.writeNewDataClassFile(
        new_file_val, zip(ims_to_keep, classes_to_keep))

    assert (str(classes_uni_val) == str(classes_uni_train))

    class_file = '/disk2/octoberExperiments/nn_performance_without_pascal/synsets.txt'

    with open(class_file, 'wb') as f:
        for class_id in classes_uni_train:
            f.write(class_id + '\n')

    with open(new_file_val, 'rb') as f:
        content = f.read()

    #sanity check
    for id_to_exclude in to_exclude:
        if id_to_exclude in content:
            print 'FOUND ERROR', id_to_exclude