def test_load_dataset_with_oneHot(self):
     galaxy_data_set_strategy = GalaxyDataSetLabelStrategy()
     context = Context(galaxy_data_set_strategy)
     dataset = context.load_dataset(csv_file=self.path,
                                    one_hot=True,
                                    validation_size=np.float32(0.2))
     self.assertTrue(dataset.train._num_examples == 25091)
 def test_load_dataset_with_oneHot(self):
     galaxy_data_set_strategy = MusicGenreJMIRMFCCsStrategy()
     context = Context(galaxy_data_set_strategy)
     dataset = context.load_dataset(csv_file=self.path,
                                    one_hot=True,
                                    validation_size=np.float32(0.2))
     self.assertTrue(dataset.train._num_examples == 52400)
示例#3
0
class TestDataSet(TestCase):

    def setUp(self):
        # Test batch size.
        self.batch_size = 64

        # Test CSV path.
        self.path = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv"

        # Load data set.
        galaxy_data_set_strategy = GalaxyDataSetImageStrategy()
        self.context = Context(galaxy_data_set_strategy)
        self.dataset = self.context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(0.2))

    def test_load_images(self):
        # Get a batch of data.
        x_batch, y_true_batch = self.dataset.train.next_image_batch(self.batch_size)

        # Load images in this batch.
        self.dataset.train.load_images(x_batch)

        # Get the first image ID in the training data set.
        test_dataset_img_name = self.dataset.train._img_names[0][0]

        # Get the reference image corresponding to the first image of data set.
        reference_dataset_img_name = self.dataset.train._images[0]

        # Get the path of the first image ID.
        path = os.environ["VIRTUAL_ENV"] + "/data/images/" + str(test_dataset_img_name) + ".jpg"

        # Load image.
        test_dataset_image = cv2.imread(path)

        # Transform image as a 32-bit numpy array.
        test_dataset_image = test_dataset_image.astype(np.float32)

        # Normalize the image.
        test_dataset_image = np.multiply(test_dataset_image, 1.0 / 255.0)

        np.testing.assert_array_equal(reference_dataset_img_name, test_dataset_image)

    def test_next_batch(self):
        # First batch
        x_batch1, y_true_batch1 = self.dataset.train.next_image_batch(self.batch_size)

        # Second batch
        x_batch2, y_true_batch2 = self.dataset.train.next_image_batch(self.batch_size)

        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, x_batch1, x_batch2)

    def test_validation_size(self):
        dataset = self.context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(0))

        self.assertTrue(dataset.train._num_examples == 31364)

        self.assertRaises(Exception, lambda:
                          self.context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(-0.1)))
 def test_load_dataset_with_oneHot(self):
     galaxy_data_set_strategy = GalaxyDataSetImageStrategy()
     context = Context(galaxy_data_set_strategy)
     dataset = context.load_dataset(csv_file=self.path,
                                    one_hot=True,
                                    validation_size=np.float32(0.2))
     self.assertTrue(
         dataset.train._num_examples == int(np.round(31364 * 0.8)))
     self.assertTrue(
         dataset.valid._num_examples == int(np.round(31364 * 0.2)))
示例#5
0
    def setUp(self):
        # Test batch size.
        self.batch_size = 64

        # Test CSV path.
        self.path = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv"

        # Load data set.
        galaxy_data_set_strategy = GalaxyDataSetImageStrategy()
        self.context = Context(galaxy_data_set_strategy)
        self.dataset = self.context.load_dataset(csv_file=self.path, one_hot=True, validation_size=np.float32(0.2))
示例#6
0
    def setUp(self):

        validation_size = 0.2
        # Get the ground truth CSV file from script's parameters.
        self.galaxy_csv_file = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv"
        self.galaxy_images_path = os.environ["VIRTUAL_ENV"] + "/data/images/"

        # Create instance of data set loading strategies.
        galaxy_label_data_set_strategy = GalaxyDataSetLabelStrategy()

        # Set the context to galaxy label data set loading strategy.
        context = Context(galaxy_label_data_set_strategy)
        context.set_strategy(galaxy_label_data_set_strategy)
        self.label_dataset = context.load_dataset(csv_file=self.galaxy_csv_file, one_hot=False,
                                             validation_size=np.float32(validation_size))
 def test_load_dataset_no_oneHot(self):
     galaxy_data_set_strategy = GalaxyDataSetFeatureStrategy()
     context = Context(galaxy_data_set_strategy)
     dataset = context.load_dataset(csv_file=self.path, one_hot=False, validation_size=self.validation_size)
     self.assertTrue(dataset.train._num_examples == 25091)
示例#8
0
def main():
    """
        Program's entry point.
    """
    # The desired validation size.
    validation_size = 0.2

    # Get the ground truth CSV file from script's parameters.
    galaxy_csv_file = os.environ["VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy.csv"
    galaxy_feature_csv_file = os.environ[
        "VIRTUAL_ENV"] + "/data/csv/galaxy/galaxy_feature_vectors.csv"
    spam_feature_csv_file = os.environ[
        "VIRTUAL_ENV"] + "/data/csv/spam/spam.csv"
    galaxy_images_path = os.environ["VIRTUAL_ENV"] + "/data/images/"
    galaxy_feature_vector_export_path = os.environ[
        "VIRTUAL_ENV"] + "/data/csv/galaxy/exported_personal_galaxy_feature_vectors.csv"
    galaxy_mlp_export_path = os.environ[
        "VIRTUAL_ENV"] + "/data/models/exports/MLP/my_mlp"

    # Create instance of data set loading strategies.
    galaxy_image_data_set_strategy = GalaxyDataSetImageStrategy()
    galaxy_feature_data_set_strategy = GalaxyDataSetFeatureStrategy()
    galaxy_label_data_set_strategy = GalaxyDataSetLabelStrategy()
    spam_feature_dataset_strategy = SpamDataSetFeatureStrategy()

    # Set the context to galaxy image data set loading strategy.
    context = Context(galaxy_image_data_set_strategy)
    img_dataset = context.load_dataset(
        csv_file=galaxy_csv_file,
        one_hot=True,
        validation_size=np.float32(validation_size))

    # Set the context to galaxy feature data set loading strategy.
    context.set_strategy(galaxy_feature_data_set_strategy)
    feature_oneHot_dataset = context.load_dataset(
        csv_file=galaxy_feature_csv_file,
        one_hot=True,
        validation_size=np.float32(0.2))

    feature_dataset = context.load_dataset(csv_file=galaxy_feature_csv_file,
                                           one_hot=False,
                                           validation_size=np.float32(0.2))

    # Set the context to galaxy label data set loading strategy.
    context.set_strategy(galaxy_label_data_set_strategy)
    label_dataset = context.load_dataset(
        csv_file=galaxy_csv_file,
        one_hot=False,
        validation_size=np.float32(validation_size))
    context.set_strategy(spam_feature_dataset_strategy)
    spam_feature_dataset = context.load_dataset(
        csv_file=spam_feature_csv_file,
        one_hot=False,
        validation_size=np.float32(validation_size))

    # For TP02, set the discretization strategy and discretize data.
    preprocessor_context = DiscretizerContext(
        SupervisedDiscretizationStrategy())

    supervised_discretised_dataset = preprocessor_context.discretize(
        data_set=feature_dataset, validation_size=np.float32(validation_size))

    preprocessor_context.set_strategy(UnsupervisedDiscretizationStrategy())

    unsupervised_discretised_dataset = preprocessor_context.discretize(
        data_set=feature_dataset, validation_size=np.float32(validation_size))

    # Process galaxies.
    galaxy_processor = GalaxyProcessor(galaxy_images_path)
    features = galaxy_processor.process_galaxy(label_dataset)

    # Save extracted features to file.
    np.savetxt(galaxy_feature_vector_export_path, features, delimiter=",")
    print("File saved in directory " + galaxy_feature_vector_export_path)