Пример #1
0
def evaluate(fmat, labels, targets, method, k, c_param, nu_param, learn_rate,
             n_estimators):
    nptargets = np.array(targets)
    out = []
    detailed = [[[], []], [[], []]]
    for tr, ts in mlpy.cv_kfold(len(labels), k, strat=labels):
        clas = train_classifier(fmat[tr], labels[tr], method, c_param,
                                nu_param, learn_rate, n_estimators)
        try:
            pred = clas.pred(fmat[ts])
        except AttributeError:
            pred = clas.predict(fmat[ts])
        for target, x, y in zip(nptargets[ts], labels[ts], pred):
            detailed[x][int(y)].append(target)
        tp = [x == int(y) == 1 for x, y in zip(labels[ts], pred)].count(True)
        tn = [x == int(y) == 0 for x, y in zip(labels[ts], pred)].count(True)
        fp = [x == 0 and int(y) == 1
              for x, y in zip(labels[ts], pred)].count(True)
        fn = [x == 1 and int(y) == 0
              for x, y in zip(labels[ts], pred)].count(True)
        try:
            precision = tp / (tp + fp)
        except:
            precision = None
        try:
            recall = tp / (tp + fn)
        except:
            recall = None
        try:
            accuracy = (tp + tn) / (tp + tn + fp + fn)
        except:
            accuracy = None
        out.append((precision, recall, accuracy))
    return out, detailed
Пример #2
0
def cross_validation(data,y):
    idx=mlpy.cv_kfold(numpy.size(data,0), numpy.size(data,0), strat=None, seed=4)
    corrects=0
    falses=0
    confusion=numpy.zeros((3,3))
    testavg = 0
    testavgcount = 0
    test_res=0
    for tr,ts in idx:
        X_tr=data[tr,:]
        Y_tr=y[tr]
        X_ts=data[ts,:]
        Y_ts=y[ts]
        
        model=train(X_tr,Y_tr)
        y_training_pred=model.pred(X_tr)
        y_trained=y_training_pred.copy()
        y_dat=model.pred(X_ts)    
        
        acc,conf=evaluate(y_trained,Y_tr)
        testavg = testavg + acc
        acc,confusion=evaluate(y_dat,Y_ts,confusion)
        test_res=test_res+acc
        testavgcount = testavgcount + 1
              
    return confusion, testavg/float(testavgcount), test_res/float(testavgcount)
Пример #3
0
def kfold(vectors, labels, count):
    """ Returns k-folded sets as a tuple of 
         (training_x, training_y, testing_x, testing_y) """
    x_splitted = []
    y_splitted = []
    clearx_splitted = []
    cleary_splitted = []
    for training, testing in mlpy.cv_kfold(n=len(vectors), k=count):
        x_splitted.append([vectors[ind] for ind in training])
        y_splitted.append([labels[ind] for ind in training])
        clearx_splitted.append([vectors[ind] for ind in testing])
        cleary_splitted.append([labels[ind] for ind in testing])
    return (x_splitted, y_splitted, clearx_splitted, cleary_splitted)
Пример #4
0
def kfold(vectors, labels, count):
    """ Returns k-folded sets as a tuple of 
         (training_x, training_y, testing_x, testing_y) """
    x_splitted = []
    y_splitted = []
    clearx_splitted = []
    cleary_splitted = []
    for training, testing in mlpy.cv_kfold(n=len(vectors), k=count):
        x_splitted.append([vectors[ind] for ind in training])
        y_splitted.append([labels[ind] for ind in training])
        clearx_splitted.append([vectors[ind] for ind in testing])
        cleary_splitted.append([labels[ind] for ind in testing])
    return (x_splitted, y_splitted, clearx_splitted, cleary_splitted)
Пример #5
0
def evaluate(fmat, labels, targets, method, k, c_param, nu_param,
             learn_rate, n_estimators):
    nptargets = np.array(targets)
    out = []
    detailed = [[[], []], [[], []]]
    for tr, ts in mlpy.cv_kfold(len(labels), k, strat=labels):
        clas = train_classifier(fmat[tr], labels[tr], method, c_param,
                                nu_param, learn_rate, n_estimators)
        try:
            pred = clas.pred(fmat[ts])
        except AttributeError:
            pred = clas.predict(fmat[ts])
        for target, x, y in zip(nptargets[ts], labels[ts], pred):
            detailed[x][int(y)].append(target)
        tp = [x == int(y) == 1
              for x, y in zip(labels[ts], pred)].count(True)
        tn = [x == int(y) == 0
              for x, y in zip(labels[ts], pred)].count(True)
        fp = [x == 0 and int(y) == 1
              for x, y in zip(labels[ts], pred)].count(True)
        fn = [x == 1 and int(y) == 0
              for x, y in zip(labels[ts], pred)].count(True)
        try:
            precision = tp / (tp + fp)
        except:
            precision = None
        try:
            recall = tp / (tp + fn)
        except:
            recall = None
        try:
            accuracy = (tp + tn) / (tp + tn + fp + fn)
        except:
            accuracy = None
        out.append((precision, recall, accuracy))
    return out, detailed
Пример #6
0
ys=[]

if random_labels:
    np.random.seed(0)
    tmp = y.copy()
    np.random.shuffle(tmp)
    for i in range(CV_N):
        ys.append(tmp)
else:
    for i in range(CV_N):
        ys.append(y)

for n in range(CV_N):
    seed = n
    while True:
        idx = mlpy.cv_kfold(n=x.shape[0], k=CV_K, strat=ys[n], seed=seed)

        for i, (idx_tr, idx_ts) in enumerate(idx):
            x_tr, x_ts = x[idx_tr], x[idx_ts]
            if any(np.var(x_tr, axis=0) == 0):
                seed += CV_N
                break
        else:
            break

        if seed > n + CV_N * KFOLD_TRY:
            raise IOError, 'filter threshold should be more higher'

        print "%d over %d experiments" % (n+1, CV_N)

    for i, (idx_tr, idx_ts) in enumerate(idx):
## run CV for transitions

## IN CONTROL PATIENTS
from operator import itemgetter
from sklearn import metrics
import mlpy

x = feature_matrix_med_counts_IN_CONTROL_FROM_ALL #set x (feature matrix)
y = np.array(y_IN_CONTROL_TRANSITION_FROM_ALL) #vector with classes (HTN control status); cast it as numpy array

#cv PARAMETERS
numsamples = len(y)
numfolds = 10
idx = mlpy.cv_kfold(n=numsamples, k=10)
#for tr, ts in idx: print(tr, ts) #print out the indexes for CV

#do a k-fold CV
d_results = {'ACCURACY': None, 'AUC': None, 'SENSITIVITY': None, 'SPECIFICITY' :None,  'CORRECT_PER_FOLD' : [],
             'NUM_FOLD': numfolds,'AUC_PER_FOLD': [], 'FPR_PER_FOLD':[],
             'TPR_PER_FOLD':[], 'ACC_PER_FOLD':[],
             'SENS_PER_FOLD': [] ,'SPEC_PER_FOLD':[],
             'REPORT_PER_FOLD': [],
             'PPV_PER_FOLD':[], 'NPV_PER_FOLD':[],
             'PPV': None, 'NPV': None}
for tr, ts in idx:
    trainset_samples = itemgetter(*tr)(y)
    testset_samples = itemgetter(*ts)(y)
    trainset_features = itemgetter(*tr)(x)
    testset_features = itemgetter(*ts)(x)
    #build the regression model ###################################
    model_logistic = mlpy.LibLinear(solver_type='l2r_lr') #default: mlpy.LibLinear(solver_type='l2r_lr', C=1, eps=0.01, weight={})
Пример #8
0
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn import neighbors, datasets
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix


datos = np.loadtxt('datasetnew.txt', delimiter=',')
x, y = datos[:, :10], datos[:, 10].astype(np.int)

# x = x - np.mean(x, axis=0)
xn = x / np.sqrt(np.sum(x**2, axis=0))
kfold = mlpy.cv_kfold(len(xn), len(xn))

#-----------------------------------------------------------
# percent = []
# # 0.722916666667
# result = []
# classifier = LDA()
# classifier.fit(xn, y)
# for crossval in range(len(kfold)):
#     trainindex = kfold[crossval][0].tolist()
#     evaluateindex = kfold[crossval][1].tolist()
#     trainx = xn[trainindex]
#     trainy = y[trainindex]
#     evaluatex = xn[evaluateindex]
#     evaluatey = y[evaluateindex]
#
Пример #9
0
        v.append(str(m).rpartition(",")[2])
    #close the file object
    fo.close()
    return r

def correctVals(array):
    c = 0
    for d in array:
        array[c] = abs(int(d))
        c += 1
    return array

data = openFile()
x = data[0]
y = data[1]
i = mlpy.cv_kfold(n=len(x), k=10)
m = 0

KNNRESULTS = []
DTRESULTS = []
LRRESULTS = []

while (len(i)>0):
    m += 1
    trainx = []
    trainy = []
    testx = []
    testy = []
    indx = i.pop()
    
    for c in indx[0]: