#!/usr/bin/env python2 from __future__ import print_function from sklearn.decomposition import PCA #from sklearn import svm import numpy as np import funct from sys import argv script, d_f, l_f = argv data = funct.Data(d_f, l_f, w0=False) labels = data.test_labels training_data = data.training_rows #print(labels) training_data = [data.data[i] for i in training_data] #print(training_data) training_labels = [data.labels[i] for i in data.training_rows] test_data = [data.data[i] for i in labels] data = np.asarray(training_data) #print("Original training data") #print(data) pca = PCA(n_components=2) pca.fit(data) #print(pca.components_) data_applied = pca.transform(data) #print("Training data")
from sys import argv import funct as stat script, df, lf = argv data_arr = stat.Data(df, lf, w0 = False) for j in range(data_arr.cols):
#!/usr/bin/env python3 from sys import argv import funct as stat script, d_f, l_f = argv data_arr = stat.Data(d_f, l_f, w0=False) for i in range(data_arr.cols): var_temp = stat.variance(data_arr.get_col(i)) print("%d %f" % (i, var_temp))
### cs675 machine learning ### assignment 5: CART from sys import argv from re import split import funct ### functions for handling raw data if len(argv) != 4: print("""Please run in the format script data training_labels out""") quit() script, data_f, labels_f, out_f = argv data = funct.Data(data_f, labels_f, w0=False) ncol = data.cols nc1 = data.n_case nc2 = data.n_control train_size = nc1 + nc2 best_splits = [0] * ncol best_gini = [0] * ncol out = open(out_f, "w") for j in range(ncol): col_vect = data.get_col(j) split_hash = dict() for split in [0.5, 1.5]: