Exemplo n.º 1
0
#!/usr/bin/env python2

from __future__ import print_function
from sklearn.decomposition import PCA
#from sklearn import svm
import numpy as np
import funct
from sys import argv

script, d_f, l_f = argv

data = funct.Data(d_f, l_f, w0=False)
labels = data.test_labels
training_data = data.training_rows
#print(labels)

training_data = [data.data[i] for i in training_data]
#print(training_data)
training_labels = [data.labels[i] for i in data.training_rows]

test_data = [data.data[i] for i in labels]

data = np.asarray(training_data)
#print("Original training data")
#print(data)

pca = PCA(n_components=2)
pca.fit(data)
#print(pca.components_)
data_applied = pca.transform(data)
#print("Training data")
Exemplo n.º 2
0
from sys import argv
import funct as stat

script, df, lf = argv

data_arr = stat.Data(df, lf, w0 = False)

for j in range(data_arr.cols):
    
#!/usr/bin/env python3

from sys import argv
import funct as stat

script, d_f, l_f = argv

data_arr = stat.Data(d_f, l_f, w0=False)

for i in range(data_arr.cols):
    var_temp = stat.variance(data_arr.get_col(i))
    print("%d %f" % (i, var_temp))
Exemplo n.º 4
0
### cs675 machine learning
### assignment 5: CART

from sys import argv
from re import split
import funct

### functions for handling raw data

if len(argv) != 4:
    print("""Please run in the format
    script data training_labels out""")
    quit()
script, data_f, labels_f, out_f = argv

data = funct.Data(data_f, labels_f, w0=False)

ncol = data.cols
nc1 = data.n_case
nc2 = data.n_control
train_size = nc1 + nc2

best_splits = [0] * ncol
best_gini = [0] * ncol

out = open(out_f, "w")

for j in range(ncol):
    col_vect = data.get_col(j)
    split_hash = dict()
    for split in [0.5, 1.5]: