Exemplo n.º 1
0
    def test_display_dimensions(self):
        data_dir = 'datasets/'
        data_set = 'glass/glass.data'
        file = os.path.join(data_dir, data_set)

        print('Displaying data set {%s} in the Rn' % file)

        glass = Retriever(file, delimiter=',')

        # Glass has the samples' ids in the first column.
        glass.split_column(0)
        # Additionally, its last column represents the target feature.
        glass.split_target()

        data, c = glass.retrieve()
        reduced_data = algorithms.Isomap(data, e=20).run()

        d = Displayer(title=data_set)

        # Scatter all dimensions (3-by-3), using as many graphs as necessary.
        for begin in range(0, glass.features_count, 3):
            end = min(glass.features_count, begin + 3)
            d.load(data[:, begin:end],
                   color=c,
                   title='Dimensions: d e [%i, %i]' % (begin + 1, end))

        d \
            .load('Reduced glass data-set', reduced_data, c) \
            .show()
Exemplo n.º 2
0
    def test_split_target(self, data, target_column, expected_data,
                          expected_target):
        r = Retriever('nonexistentfile.data')
        r._data = np.array(data)
        actual_data, actual_target = r.split_target(target_column).retrieve()

        testing.assert_array_equal(actual_data, np.array(expected_data))
        testing.assert_array_equal(expected_target, np.array(expected_target))
Exemplo n.º 3
0
    def _load_data(self):
        r = Retriever('../../datasets/breast-cancer/wdbc.data', delimiter=',')
        r.split_column(0)  # Remove ids.

        # Split target from data and retrieve both.
        # Target feature is actually located in the 2nd column, but considering
        # we had the ids removed, it's now in the 1st one.
        self.data, self.target = r.split_target(0).retrieve()
        self.data = self.data.astype(float)
        self.feature_names = ['radius', 'texture', 'perimeter']
Exemplo n.º 4
0
    def _load_data(self):
        r = Retriever('../../datasets/diabetes/pima-indians-diabetes.data',
                      delimiter=',')
        self.data, self.target = r.split_target().retrieve()
        self.feature_names = [
            'Number of times pregnant', 'Plasma glucose concentration',
            'Diastolic blood pressure (mm Hg)'
        ]

        self.displayer.colors = [plt.cm.viridis]
Exemplo n.º 5
0
    def _run(self):
        # the data and marking that the data is delimited by commas.
        r = Retriever(self.data_set_file, delimiter=',')

        data, glass_type = r.split_target().retrieve()

        labels = [
            '\nAge', '\nGender', '\nTotal Bilirubin (TB)',
            '\nDirect Bilirubin (DB)', '\nAlkaline Phosphotase (Alk.)',
            '\nAlamine Aminotransferase (Sgpt)',
            '\nAspartate Aminotransferase (Sgot)', '\nTotal Protiens (TP)',
            '\nAlbumin (ALB)', '\nRatio Albumin and Globulin Ratio (A/G)'
        ]

        for i in range(0, data.shape[1], 3):
            self.displayer.load(data[:, i:i + 3],
                                glass_type,
                                axis_labels=labels[i:i + 3])

        print('Correlation matrix:')
        print(np.corrcoef(data, rowvar=0))

        self.displayer.save('displaying_ilpd')
Exemplo n.º 6
0
 def _load_data(self):
     r = Retriever(self.file, delimiter=',')
     r.split_column(0)  # Remove ids.
     self.data, self.target = r.split_target().retrieve()
     self.original_data = self.data