Exemplo n.º 1
0
def read_train(f):
    """
    Reads train data
    :param f: csv file containing rows of image number - whale type pairs
    :rtype : numpy array of image number - whale id pairs and whale types numpy array
    """
    reader = csv.reader(f)

    header = next(reader)
    assert (header[0], header[1]) == ('Image', 'whaleID')

    whale_ids = set()
    image_ids_whale_ids = []

    for image_name, whale_name in reader:
        if not (image_name.startswith('w_') and image_name.endswith('.jpg')):
            continue
        assert whale_name.startswith('whale_')

        image_id = ImagesReader.get_image_id(image_name)
        whale_id = whale_name.split("_")[1]
        whale_ids.add(whale_id)

        image_ids_whale_ids.append((image_id, whale_id))

    return np.array(image_ids_whale_ids), whale_ids
Exemplo n.º 2
0
def main():
    with open(TRAIN_FILENAME) as train_data_file:
        image_ids_whale_ids, whale_ids = utility.read_train(train_data_file)

        images_reader = ImagesReader(IMAGES_DIR)
        images_reader.pre_process(image_processors.region_crop_gray_downscale, rewrite=False, threads=1)

        all_train_images_ids = image_ids_whale_ids[:, 0]
        unique_train_images_ids = set(all_train_images_ids)

        all_images_ids = set(images_reader.image_ids)
        result_images_ids = all_images_ids.difference(unique_train_images_ids)
        train_image_id_whale_id = dict(image_ids_whale_ids)        

        print('Reading train data\n')
        x_train = np.asarray([images_reader.read_image_vector(image_id)
                              for image_id in all_train_images_ids])
        y_train = np.asarray([train_image_id_whale_id[image_id] for image_id in all_train_images_ids])

        features_cnt = len(x_train[0])
        num_targets = len(set(y_train))
        clf = simple_cnn.CNN(features_cnt, num_targets, 
                             num_epochs=10,
                             fresh_start=False,
                             dump_dir="network_weights/",
                             filename_to_dump="net.w")
        # clf = SVC(probability=True)

        print('Fitting\n')
        clf.fit(x_train, y_train)

        print('Reading test data\n')
        x_test = np.array([images_reader.read_image_vector(image_id)
                           for image_id in result_images_ids])

        print('Predicting\n')
        y_predicted = clf.predict_proba(x_test)

        print('Writing submission')

    with open(SUBMISSION_FILENAME, 'w') as submission_file:
        utility.write_submission(whale_ids, result_images_ids, y_predicted, submission_file)

    return 0
Exemplo n.º 3
0
def write_submission(whale_ids, image_ids, whale_probs, submission_file):
    """
    Writes image_ids_whale_probabilities to submission_file
    :param submission_file: file to write the submission to
    :param whale_types: array of whale types to be written to csv header
    :param image_ids_whale_probabilities: array of pairs image_id - whale probabilities array
    """
    assert len(image_ids) == 6925 and len(whale_probs) == 6925

    submission_file.write(",".join(["Image"] + list('whale_'+str(whale_id) for whale_id in sorted(whale_ids))))
    submission_file.write("\n")
    for image_id, whale_probs in zip(image_ids, whale_probs):
        submission_file.write(",".join([ImagesReader.get_image_name(image_id)] + list(str(prob) for prob in whale_probs)))
        submission_file.write("\n")