Exemplo n.º 1
0
 def test_plot_correlation_matrix(self):
     col1 = range(10)
     col2 = [cell * 3 + 1 for cell in col1]
     col3 = [1, 5, 8, 4, 1, 8, 5, 9, 0, 1]
     sa = utils.convert_to_sa(
             zip(col1, col2, col3), 
             col_names=['base', 'linear_trans', 'no_correlation'])
     fig = dsp.plot_correlation_matrix(sa, verbose=False)
     self.add_fig_to_report(fig, 'plot_correlation_matrix')
Exemplo n.º 2
0
M = cast_np_nd_to_sa(M)

#M is multi class, we want to remove those rows.
keep_index = np.where(labels != 2)

labels = labels[keep_index]
M = M[keep_index]

if False:
    for x in describe_cols(M):
        print x

if False:
    plot_correlation_scatter_plot(M)
    plot_correlation_matrix(M)
    plot_kernel_density(M['f0'])  #no designation of col name
    plot_box_plot(M['f0'])  #no designation of col name

if False:
    from diogenes.generate import val_between, choose_rows_where, append_cols  #val_btwn, where
    #generate a composite rule
    M = choose_rows_where(M, [{
        'func': val_between,
        'col_name': 'f0',
        'vals': (3.5, 5.0)
    }, {
        'func': val_between,
        'col_name': 'f1',
        'vals': (2.7, 3.1)
    }], 'a new col_name')
Exemplo n.º 3
0
                               plot_kernel_density,
                               plot_box_plot)

from diogenes.grid_search import Experiment 
from diogenes.grid_search import std_clfs as std_clfs
from diogenes.utils import remove_cols


data = open_csv_url(
            'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv',  
            delimiter=';')
y = data['quality']
M = remove_cols(data, 'quality')

y = y < np.average(y)


if False:
    for x in describe_cols(M):
        print x

if False:
   plot_correlation_scatter_plot(M) 
   plot_correlation_matrix(M)
   plot_kernel_density(M['f0']) #no designation of col name
   plot_box_plot(M['f0']) #no designation of col name

exp = Experiment(M, y, clfs=std_clfs)
exp.make_csv()