def create_test_generator(testdf, categories, batch_size):
    categories = utils.define_categories(include_any=True)

    test_generator = utils.Dicom_Image_Generator(
        testdf[['filename']+categories],
        ycols=categories,
        desired_size=512,
        batch_size=batch_size,
        subset='test'
    )
    return test_generator
예제 #2
0
def create_test_generator(testdf, categories, batch_size):
    categories = utils.define_categories(testdf, include_any=False)

    test_generator = utils.Three_Channel_Generator(testdf[['filename'] +
                                                          categories],
                                                   ycols=categories,
                                                   desired_size=512,
                                                   batch_size=batch_size,
                                                   subset='test',
                                                   random_transform=False,
                                                   rgb=True)
    return test_generator
예제 #3
0
def build_submission(testdf, y_pred, dataloc):

    # build output dataframe
    df_output = testdf.copy()

    categories = utils.define_categories(df_output)

    if len(y_pred) < len(df_output):
        mismatch = len(df_output) - len(y_pred)
        # this is necessary because the number of test images isn't evenly divisible by the batch size.
        # The predict generator stops early. I should return to this and fix it!!!
        warnings.warn(
            'y_pred is {} entries too short. Filling with zeros'.format(
                mismatch))
        for _ in range(mismatch):
            y_pred = np.vstack((y_pred, np.zeros_like(categories)))

    # populate columns of df_output with predictions
    for ii, cat in enumerate(categories):
        df_output[cat] = y_pred[:len(df_output), ii]

    # using the sample submission as the prototype, iterate through and fill with actual predictions
    df_output.set_index('ID', inplace=True)
    sample_submission = pd.read_csv(
        os.path.join(dataloc, 'stage_1_sample_submission.csv'))
    submission = sample_submission.copy()

    for idx, row in submission.iterrows():
        img_id = 'ID_' + row['ID'].split('_')[1]
        if img_id == 'ID_ffffcbff8':
            continue
        hem_type = row['ID'].split('_')[2]
        submission.at[idx, 'Label'] = df_output.at[img_id, hem_type]

    datestamp = str(datetime.datetime.now()).replace(':',
                                                     '_').replace(' ', 'T')
    submission_filename = os.path.join(dataloc,
                                       'submission_{}.csv'.format(datestamp))
    submission.to_csv(submission_filename, index=False)
    return submission_filename
    def build(self):

        # load training df
        self.tdf = utils.load_training_data(self.dataloc, stage=2)

        # drop missing image
        drop_idx = [
            i for i, row in self.tdf['filename'].iteritems()
            if fnmatch.fnmatch(row, '*ID_33fcf4d99*')
        ]
        self.tdf = self.tdf.drop(drop_idx)

        # set up training fraction
        ## train and validate dataframes
        shuff = self.tdf.sample(frac=self.training_fraction,
                                random_state=self.random_state)
        self.train_df = shuff.iloc[:int(0.90 * len(shuff))]
        self.validate_df = shuff.iloc[int(0.90 * len(shuff)):]
        len(shuff), len(self.train_df), len(self.validate_df)

        # set up generators
        self.categories = utils.define_categories(self.train_df)

        self.train_generator = utils.Three_Channel_Generator(
            self.train_df.reset_index(),
            ycols=self.categories,
            desired_size=self.img_size,
            batch_size=self.batch_size,
            random_transform=self.random_transform,
            rgb=True)

        self.validate_generator = utils.Three_Channel_Generator(
            self.validate_df.reset_index(),
            ycols=self.categories,
            desired_size=self.img_size,
            batch_size=self.batch_size,
            random_transform=False,
            rgb=True)

        # load model
        self.model = models(self.model_name,
                            input_image_size=self.img_size,
                            number_of_output_categories=len(self.categories))

        if self.weights_path is not None:
            self.model.load_weights(self.weights_path)

        # setup callbacks
        earlystop = EarlyStopping(patience=10)

        learning_rate_reduction = ReduceLROnPlateau(
            monitor='categorical_accuracy',
            patience=2,
            verbose=1,
            factor=0.5,
            min_lr=0.00001)

        checkpoint_name = "model_weights_vgg19_{}.h5".format(self.datestamp)
        checkpoint = ModelCheckpoint(checkpoint_name,
                                     monitor='val_acc',
                                     verbose=0,
                                     save_best_only=True,
                                     save_weights_only=True,
                                     mode='auto')

        self.callbacks = [earlystop, checkpoint]
def main(dataloc, path_to_weights, model='vgg', batch_size=16):
    
    categories = utils.define_categories(include_any=True)