#Required Import import numpy as np import eights.investigate as inv #Investigate M,labels = inv.open_cvs(file_loc) #choose to Numpy Structures Arrays #Descriptive statistics inv.describe_cols(data) inv.cross_tabs inv.plot_correlation_matrix inv.plot_correlation_scatter_plot inv.plot_box_plot # Decontaminate Data import eights.investigate as dec replace_with_n_bins replace_missing_vals #generate features def is_this_word_in(a_text, word): return word in a_text
# import numpy array M = sklearn.datasets.load_iris().data labels = sklearn.datasets.load_iris().target M = cast_np_nd_to_sa(M) # M is multi class, we want to remove those rows. keep_index = np.where(labels != 2) labels = labels[keep_index] M = M[keep_index] if False: for x in describe_cols(M): print x if False: plot_correlation_scatter_plot(M) plot_correlation_matrix(M) plot_kernel_density(M["f0"]) # no designation of col name plot_box_plot(M["f0"]) # no designation of col name if False: from eights.generate import val_between, where_all_are_true, append_cols # val_btwn, where # generate a composite rule M = where_all_are_true( M,
plot_box_plot) #import numpy array M = sklearn.datasets.load_iris().data labels = sklearn.datasets.load_iris().target M = cast_np_nd_to_sa(M) #M is multi class, we want to remove those rows. keep_index = np.where(labels != 2) labels = labels[keep_index] M = M[keep_index] if False: for x in describe_cols(M): print x if False: plot_correlation_scatter_plot(M) plot_correlation_matrix(M) plot_kernel_density(M['f0']) #no designation of col name plot_box_plot(M['f0']) #no designation of col name if False: from eights.generate import val_between, where_all_are_true, append_cols #val_btwn, where #generate a composite rule M = where_all_are_true(M, [{ 'func': val_between, 'col_name': 'f0', 'vals': (3.5, 5.0)