Пример #1
0
    def test_learning_set(self):
        # Directory iterator option
        train, val = learning_set(path, split=split,
                                  batch_size=batch_size,
                                  iterator_mode=None,
                                  classes=classes)
        assert isinstance(path, str), 'the path should be in a string format'
        assert isinstance(split, (float, np.float32, int)), \
            ' the data split should be a number'
        assert isinstance(classes, list), \
            'the classes should be inputted as a list'
        for item in classes:
            assert isinstance(item, str), 'the class should be a string'
        assert isinstance(train, keras.preprocessing.image.DirectoryIterator),\
            'the training set should be an image iterator type of object'
        assert isinstance(val, keras.preprocessing.image.DirectoryIterator),\
            'the validation set should be an image iterator type of object'
        assert isinstance(batch_size, int), \
            'the batch size should be an integer'
        # array iterator option
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        train, val = learning_set(image_list=plot_tups, split=split,
                                  batch_size=batch_size,
                                  iterator_mode='arrays',
                                  classes=classes)
        assert isinstance(train, keras.preprocessing.image.NumpyArrayIterator
                          ), \
            'the training set should be an image iterator type of object'
        assert isinstance(val, keras.preprocessing.image.NumpyArrayIterator),\
            'the validation set should be an image iterator type of object'
Пример #2
0
    def test_model_analysis(self):

        num_files = 1
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        test_set_filenames = preprocessing.hold_out_test_set(
            data_path, number_of_files_per_class=num_files)

        test_set_list, learning_set_list = catalogue.data_set_split(
            plot_tups, test_set_filenames)
        train, val = catalogue.learning_set(image_list=learning_set_list,
                                            split=split,
                                            classes=['noise', 'one'],
                                            iterator_mode='arrays')
        testing_set = catalogue.test_set(image_list=test_set_list,
                                         classes=['noise', 'one'],
                                         iterator_mode='arrays')
        model, history = cnn.build_model(train,
                                         val,
                                         config_path='./hardy/test/')

        result = reporting.model_analysis(model, testing_set, test_set_list)

        assert isinstance(result, pd.DataFrame)
Пример #3
0
    def test_rgb_visualize(self):
        """
        Individual data frame image maker. This is included in prior wrapps
            so doesn't really need much testing? but not sure how coveralls
            works so I guess we'll do it.
        """
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)

        fdata = data_tups[0][1]
        assert type(fdata) is pd.DataFrame, "Need testing dataframe"

        image_arr = catalogue.rgb_visualize(fdata)

        assert image_arr.shape[2] == 3,\
            "Expected NxNx3 Image. Instead got {}".format(image_arr.shape)
Пример #4
0
    def test_save_load_data(self):
        # Simple pickeling save / load function
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'

        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        data = catalogue.save_load_data('test_pickled_data', data=plot_tups,
                                        save=True, location='./hardy/test/')
        assert data == 'Successfully Pickled'
        data = catalogue.save_load_data('test_pickled_data', data=plot_tups,
                                        load=True, location='./hardy/test/')
        assert isinstance(data, list), 'the data was correctly loaded'
        os.remove('./hardy/test/test_pickled_data.npy')
        print('compressed file correctly removed after testing')
        pass
Пример #5
0
    def test_data_split(self):
        num_files = 3
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1' + '.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        test_set_filenames = preprocessing.hold_out_test_set(
            data_path, number_of_files_per_class=num_files)
        test_set_list, learning_set_list = catalogue.data_set_split(
            plot_tups, test_set_filenames)
        assert isinstance(test_set_filenames, list), 'format should be a list'
        assert len(test_set_filenames) == 2*num_files, \
            'the test set is not the correct length'
        assert isinstance(test_set_list, list), 'format should be a list'
        assert isinstance(learning_set_list, list), 'format should be a list'
        pass
Пример #6
0
    def test_data_tuples_from_fnames(self):
        """
        Testing Fn for the List-Of-Tuples function Wrapper
        Largest wrapper of this file set.
        (Given just the folder with CSV files in it, will generate the
             designated "List-Of-Tuples" of raw data).
        """
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path,
                                                       skiprows=6,
                                                       classes=None)
        assert type(data_tups) is list,\
            "Data List-of-Tuples did not return a List"

        for row in data_tups:
            assert type(row) is tuple, "List-of-Tuples has non-tuple?"
            assert type(row[0]) is str, "File Name in Tuple is wrong format."
            assert type(row[1]) is pd.DataFrame,\
                "List-of-Tuples improperly importing data"
            assert type(row[2]) is str, "Class label is not a string?"
        pass
Пример #7
0
    def test_regular_plot_list(self):
        """
        Testing the Tuple-List image visualization wrapper
            Inputs the raw tuple-list from the prior wrapper
            and performs the visualizations as called in the "standard"
            methods.
        """

        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.regular_plot_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        for row in plot_tups:
            assert type(row) is tuple, "List-of-Tuples has non-tuple?"
            assert type(row[0]) is str, "File Name in Tuple is wrong format."
            assert type(row[1]) is np.ndarray,\
                "List-of-image-Tuples is not in np.ndarray format??"
            assert type(row[2]) is str, "Class label is not a string?"
        pass
Пример #8
0
    def test_test_set(self):
        # Directory iterator option
        testing = test_set(path, batch_size=batch_size, iterator_mode=None,
                           classes=classes)
        assert isinstance(path, str), 'the path should be in a string format'
        assert isinstance(classes, list), \
            'the classes should be inputted as a list'
        for item in classes:
            assert isinstance(item, str), 'the class should be a string'
        assert isinstance(testing, keras.preprocessing.image.DirectoryIterator
                          ), 'the training set should be an image iterator'
        # array iterator option
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        testing = test_set(image_list=plot_tups, batch_size=batch_size,
                           iterator_mode='arrays', classes=classes)
        assert isinstance(testing, keras.preprocessing.image.NumpyArrayIterator
                          ), 'the training set should be an image iterator'
Пример #9
0
def data_wrapper(run_name=None,
                 raw_datapath='./',
                 tform_command_dict=None,
                 classes=None,
                 plot_format="RGBrgb",
                 iterator_mode='arrays',
                 print_out=True,
                 project_name=None,
                 skiprows=0,
                 scale=1.0):
    """
    Overall "One-Click" Wrapper to create the three "Keras Ready" Datasets
        needed to train the model: "Training Set", "Validation Set" and
        "Test Set", all in the same format which is created via the
        Keras.Preprocessing.Data.Flow (<--- Not exact package/function)

    """
    # replace the tform_commands with tform_command_dict in the arguments
    # to make parallel processing possible

    tform_commands = tform_command_dict[run_name]

    if print_out:
        clock = time.perf_counter()
        print("Processing Data...\t", end="")
    # Make the raw Dataframe Tuples List
    raw_tuples_list = to_catalogue._data_tuples_from_fnames(raw_datapath,
                                                            classes=classes,
                                                            skiprows=skiprows)

    # Now perform trasnsform if given
    if tform_commands is None:
        tform_tuples_list = raw_tuples_list
    else:
        tform_tuples_list = arbitrage.tform_tuples(raw_tuples_list,
                                                   tform_commands,
                                                   rgb_format=plot_format)
    # save the tranformation info in a yaml file for final report
    if project_name and run_name:
        output = [[i, name.split('__')[0],
                   name.split('__')[-1]]
                  for i, name in enumerate(list(tform_tuples_list[0][1]))
                  if isinstance(name, str)]
        # save the tranform info in a dictionary
        run_tform = {'run_name': run_name}
        for i in range(len(output)):
            run_tform['tform_' + str(i)] = output[i]
        # generate a yaml file to store the trasnformation info
        output_path = preprocessing.save_to_folder(raw_datapath, project_name,
                                                   run_name)
        report_location = output_path + '/report/'
        if not os.path.exists(report_location):
            os.makedirs(report_location)
        with open(report_location + 'run_tform_config.yaml', 'w') as yaml_file:
            yaml.dump(run_tform, yaml_file)
            yaml_file.close()
    else:
        pass
    # Next make the rgb images Tuples List
    if plot_format == 'RGBrgb':
        data_store = raw_datapath + run_name + '.pkl'
        to_catalogue.rgb_list(tform_tuples_list,
                              scale=scale,
                              plot_format=plot_format,
                              storage_location=data_store)
    else:
        data_store = raw_datapath + run_name + '.pkl'
        to_catalogue.regular_plot_list(tform_tuples_list,
                                       scale=scale,
                                       storage_location=data_store)

    # OK! Now we have image arrays finished!
    #     EITHER Return that list of image tuples
    #     OR save images and Return the path to those folders!
    if iterator_mode == 'arrays':
        if print_out:
            print_time(time.perf_counter() - clock)
        return 0
    else:
        # Write Optional Split based on Iterator_Mode,
        # to optionally use the "to_dirFlow"
        # path options (Already partly written!)...
        return os.path.join(raw_datapath, "images")