示例#1
0
 def testLoad(self):
     filename = 'C:\\Storage\\vft11ccas\\Source\\DataUCI\\pima\\pima-indians-diabetes.data'
     dataset = lsData.loadFromFile(filename)
     self.assertTrue(dataset.nItems >= 2)
     self.assertTrue(dataset.nFeatures > 0)
     trainSampleSize = dataset.nItems / 3
     samples = dataset.split(trainSampleSize)
     self.assertEqual(samples.TrainSample.nItems, trainSampleSize)
     self.assertEqual(samples.TestSample.nItems, dataset.nItems - trainSampleSize)
示例#2
0
__author__ = 'Alexander Frey'

# Import modules
import numpy as np
import statsmodels.api as sm
import LinearSampling.api as lsApi
import LinearSampling.dataset as lsData
np.random.mtrand.seed(10)

# Load dataset (Pima from UDI repository; 768 items, 8 features)
dataset = lsData.loadFromFile('pima-indians-diabetes.data')
dataset.X = sm.add_constant(dataset.X, prepend=False)

# Split dataset into train and test samples
samples = dataset.split(dataset.nItems / 2)
trainSample = samples.TrainSample
testSample = samples.TestSample

# Tune logistic regression and show the model
logisticRegression = sm.GLM(trainSample.target, trainSample.X, family=sm.families.Binomial())
model = logisticRegression.fit().params
print "Model : [ " + ", ".join(format(x, ".3f") for x in model) + " ]"

# Check error rate on train and test sample
trainPredictions = logisticRegression.predict(model, trainSample.X)
trainErrorRate = float(sum((trainPredictions > 0.5) != trainSample.target)) / trainSample.nItems

testPredictions = logisticRegression.predict(model, testSample.X)
testErrorRate = float(sum((testPredictions > 0.5) != testSample.target)) / testSample.nItems

print "Train error rate : " + '%.3f' % (100 * trainErrorRate) + " %"