# For dat file, use the input file name with the .csv extension tokens = test.split('.') testFileRoot = str.join('.', tokens[:-1]) datFileName = testFileRoot + '.csv' jp_results = [] ps_results = [] fp_results = [] jp_run = [] ps_run = [] fp_run = [] for i in range(tries): sdg = synthDataGen.run(test, datSize) d = getData.DataReader(datFileName) data = d.read() prob = ProbSpace(data) lim = 3 # Std's from the mean to test conditionals numPts = 30 # How many eval points for each conditional print('Test Limit = ', lim, 'standard deviations from mean') print('Dimensions = ', dims, '. Conditionals = ', dims - 1) print('Number of points to test for each conditional = ', numPts) N = prob.N evalpts = int( sqrt(N) ) # How many target points to sample for expected value: E(Z | X=x. Y=y) print('JPROB points for mean evaluation = ', evalpts) vars = prob.fieldList cond = [] # Get the conditional variables for i in range(len(vars)):
print('Testing: ', test, '--', testDescript) # For dat file, use the input file name with the .csv extension tokens = test.split('.') testFileRoot = str.join('.', tokens[:-1]) datFileName = testFileRoot + '.csv' jp_results = [] ps_results = [] jp_run = [] ps_run = [] for i in range(tries): sdg = synthDataGen.run(test, datSize) d = getData.DataReader(datFileName) data = d.read() prob = ProbSpace(data) lim = 3 # Std's from the mean to test conditionals numPts = 30 # How many eval points for each conditional print('Test Limit = ', lim, 'standard deviations from mean') print('Dimensions = ', dims, '. Conditionals = ', dims - 1) print('Number of points to test for each conditional = ', numPts) N = prob.N evalpts = int( sqrt(N) ) # How many target points to sample for expected value: E(Z | X=x. Y=y) print('JPROB points for mean evaluation = ', evalpts) vars = prob.fieldList cond = [] # Get the conditional variables for i in range(len(vars)):
print('Testing: ', test, '--', testDescript) # For dat file, use the input file name with the .csv extension tokens = test.split('.') testFileRoot = str.join('.', tokens[:-1]) datFileName = testFileRoot + '.csv' jp_results = [] ps_results = [] jp_run = [] ps_run = [] for i in range(tries): sdg = synthDataGen.run(test, datSize) d = getData.DataReader(datFileName) data = d.read() prob = ProbSpace(data) lim = 3 # Std's from the mean to test conditionals numPts = 30 # How many eval points for each conditional print('Test Limit = ', lim, 'standard deviations from mean') print('Dimensions = ', dims, '. Conditionals = ', dims - 1) print('Number of points to test for each conditional = ', numPts) N = prob.N cond = 'B' target = 'A2' R1 = RKHS(prob.ds, delta=None, includeVars=[target, cond], s=smoothness) # Do some general assessment of cumulative probabilities # Do some univariate CDF calculations. F is normal(0,1)
sum2 += t1*t2 if sum1 ==0: return 0 return sum2/sum1 if __name__ == '__main__': path = '../models/Cprobdata.csv' d = getData.DataReader(path) data = d.read() X1 = data['X'] Y1 = data['Y'] X = X1[:7500] Y = Y1[:7500] ps = ProbSpace(data) size = len(X) testPoints = [] testMin = 5 testMax = 7 tp = testMin numTP = 100 interval = (testMax - testMin) / numTP sq = [] probpy = [] cond = [] Perror = 0.0 Pdev = []
def condP(self, Vals,K = None): #Vals is a list of (x1,x2....xn) such that P(X1=x1|X2=x2.....), same UI as rkhsmv if(K != None): self.k = K filter_len = floor((len(self.includeVars)-1)*self.k*0.01) dims = len(Vals) print(dims,filter_len) if(self.rangeFactor==0): self.rangeFactor = 0.5 if(filter_len !=0): filter_vars = self.includeVars[-filter_len:] filter_vals = Vals[-filter_len:] include_vars = self.includeVars[:-filter_len] self.minPoints = ceil(self.N**((dims-filter_len)/dims))*self.rangeFactor self.maxPoints = ceil(self.N**((dims-filter_len)/dims))/self.rangeFactor else: filter_vars = [] filter_vals = [] include_vars = self.includeVars # print("filter vars:",filter_vars) # print("include vars:",self.includeVars[:-filter_len]) # print(self.includeVars) #Calculating R1 zDim = floor(self.k * (dims-1)* 0.01) minminpoints = 10 if(filter_len == (len(self.includeVars)-1)): for i in range(zDim,0,-1): print("runing i=",i) filter_len = floor(i*self.k*0.01) self.minPoints = ceil(self.N*((dims-i)/dims))*self.rangeFactor self.maxPoints = ceil(self.N*((dims-i)/dims))/self.rangeFactor rkhsminpoints = minminpoints*(dims-i) if(self.minPoints < rkhsminpoints): print("minpoints < minminpoints",self.minPoints,rkhsminpoints) continue P = ProbSpace(self.data) filter_data = [] for j in range(filter_len): x = (filter_vars[j],filter_vals[j]) filter_data.append(x) print("filter metrics = ",filter_data) FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints) if(len(FilterData[self.includeVars[0]])<self.minPoints): print("not enough filter points for filterlen =",i,len(FilterData[self.includeVars[0]]),self.minPoints,self.maxPoints) continue print("filter len",filter_len) print("filtered datapoints:",len(FilterData['B'])) print("include vars:",self.includeVars[:-filter_len]) self.R1 = RKHS(FilterData,includeVars=self.includeVars[:(dims-filter_len)],delta=self.delta,s=self.s) self.r1filters = filter_vals return self.R1.P(Vals[:dims-filter_len]) print("running empty") self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s) p = self.R1.condP(Vals) if p>0: return p else: return None elif(filter_len != 0 and self.r1filters != filter_vals): P = ProbSpace(self.data) filter_data = [] for i in range(filter_len): x = (filter_vars[i],filter_vals[i]) filter_data.append(x) FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints) # print("filter len",filter_len) # print("filtered datapoints:",len(FilterData['B'])) # print("include vars:",self.includeVars[:-filter_len]) self.R1 = RKHS(FilterData,includeVars=self.includeVars[:-filter_len],delta=self.delta,s=self.s) self.r1filters = filter_vals elif(self.R1==None): print("running empty") self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s) elif(self.R1.varNames != include_vars): self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s) if(filter_len != 0): p = self.R1.condP(Vals[:-filter_len]) if p>0: return p else: return None else: p = self.R1.condP(Vals) if p>0: return p else: return None
def condE(self,target, Vals,K = None): #Vals is a list of (x1,x2....xn) such that E(Y|X1=x1,X2=x2.....), same UI as rkhsmv if(K == None): K = self.k filter_len = floor((len(self.includeVars)-1)*K*0.01) #print("filter len",filter_len) dims = len(Vals) + 1 if(self.rangeFactor == None): self.rangeFactor = 0.8 minminpoints = 5 if(filter_len !=0): filter_vars = self.includeVars[-filter_len:] filter_vals = Vals[-filter_len:] include_vars = self.includeVars[1:-filter_len] self.minPoints = self.N**((dims-filter_len)/dims)*self.rangeFactor self.maxPoints = self.N**((dims-filter_len)/dims)/self.rangeFactor #print("minpoints,maxpoints=",self.minPoints,self.maxPoints) else: filter_vars = [] filter_vals = [] include_vars = self.includeVars #print("filter vars:",filter_vars) #print("include vars:",self.includeVars[:-filter_len]) #print("self:",self.R2.varNames,"cond",self.includeVars[:-filter_len]) if(filter_len == (len(self.includeVars)-1) ): P = ProbSpace(self.data) filter_vars = self.includeVars[1:] filter_vals = Vals filter_data = [] for i in range(filter_len): x = (filter_vars[i],filter_vals[i]) filter_data.append(x) #print("minpoints,maxpoints:",self.minPoints,self.maxPoints) FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints) X = FilterData[self.includeVars[0]] if(len(X)<self.minPoints): newk = ceil((((K*(dims-1)*0.01)-1)/(dims-1))*100) # update K =100 to K = 80 #newk = ceil(K - ((filter_len-1)/filter_len)*100) #update k = 100 to K = 20 print("not enough datapoints, newk=",newk) return self.condE(target, Vals, newk) #print(len(X)) if(len(X)!=0): return sum(X)/len(X) else: return 0 elif(filter_len != 0 and self.r2filters != filter_vals): P = ProbSpace(self.data) filter_data = [] for i in range(filter_len): x = (filter_vars[i],filter_vals[i]) #print(x) filter_data.append(x) FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints) print("filter len",filter_len) print("filtered datapoints:",len(FilterData['B'])) print("include vars:",self.includeVars[:-filter_len]) X = FilterData[self.includeVars[0]] if(len(X)<self.minPoints or len(X)<=minminpoints): newk = ceil((((K*(dims-1)*0.01)-1)/(dims-1)) * 100) #newk = ceil(((filter_len+1)/filter_len)*K) print("not enough datapoints, newk=",newk) return self.condE(target, Vals, newk) self.R2 = RKHS(FilterData,includeVars=self.includeVars[1:-filter_len],delta=self.delta,s=self.s) self.r2filters = filter_vals elif(self.R2==None): self.R2 = RKHS(self.data,includeVars=self.includeVars[1:],delta=self.delta,s=self.s) elif(self.R2.varNames != include_vars): self.R2 = RKHS(self.data,includeVars=self.includeVars[1:],delta=self.delta,s=self.s) if(filter_len !=0): return self.R2.condE(target,Vals[:-filter_len]) else: return self.R2.condE(target, Vals)
else: v3 = 'C' f = open(test, 'r') exec(f.read(), globals()) print('Testing: ', test, '--', testDescript) # For dat file, use the input file name with the .csv extension tokens = test.split('.') testFileRoot = str.join('.', tokens[:-1]) datFileName = testFileRoot + '.csv' d = getData.DataReader(datFileName) data = d.read() prob = ProbSpace(data) traces = {} traces['X'] = [] traces['Y'] = [] traces['Z'] = [] v1distr = prob.distr(v1) v2distr = prob.distr(v2) v3distr = prob.distr(v3) v1mean = v1distr.E() v2mean = v2distr.E() v3mean = v3distr.E() #ymin = targdistr.minVal() #ymax = targdistr.maxVal() #xmin = conddistr.minVal() #xmax = conddistr.maxVal() lim = 3
def run(filename): r = getData.DataReader(filename) dat = r.read() ps = ProbSpace(dat, density=1, power=1) start = time.time() print() print('Testing probability module.') print() print('Testing basic statistics for various types of distribution:') print('stats(A) = ', ps.fieldStats('A')) print('stats(C) = ', ps.fieldStats('C')) a = ps.distr('A') mean = a.mean() std = a.stDev() print('stats(dice1): mean, std, skew, kurtosis, median, mode = ', mean, std, a.skew(), a.kurtosis(), ' Exp: (3.5, ?, 0, ?)') c = ps.distr('C') print('stats(d1 + d2): mean, std, skew, kurtosis, median, mode = ', c.E(), c.stDev(), c.skew(), c.kurtosis(), c.median(), c.mode(), ' Exp: (7, ?, 0, ?, 7, 7)') d = ps.distr('EXP') print('stats(Exponential): mean, std, skew, kurtosis = ', d.E(), d.stDev(), d.skew(), d.kurtosis(), ' Exp: (1, 1, 2, 6)') d = ps.distr('IVB') print('stats(Logistic): mean, std, skew, kurtosis = ', d.E(), d.stDev(), d.skew(), d.kurtosis(), ' Exp: (0, 1.8138, 0, 1.2)') d = ps.distr('N') print('stats(Normal): mean, std, skew, kurtosis, median = ', d.E(), d.stDev(), d.skew(), d.kurtosis(), d.median(), 'Exp: (0, 1, 0, 0)') d = ps.distr('N2') print('stats(N2: sum of normals): mean, std, skew, kurtosis = ', d.E(), d.stDev(), d.skew(), d.kurtosis(), 'Exp: (1, 1.414, 0, 0)') print() print( 'Testing discrete deterministic probabilities (2-dice -- ala Craps):') print('A is Die #1. B is Die #2. C is the total of the 2 dice.') print('E(B) = ', ps.distr('B').E(), ' Exp: 3.5') print('P(B=0) = ', ps.P(('B', 0)), ' Exp: 0') print('P(B=1) = ', ps.P(('B', 1)), ' Exp: 1/6 = .166...') print('P(B=2) = ', ps.P(('B', 2)), ' Exp: 1/6 = .166...') print('P(B >= 0) = ', ps.P(('B', 0, None)), ' Exp: 1.0') print('P(B < 0) = ', ps.P(('B', None, 0)), ' Exp: 0.0') print('P(-inf <= B > inf) = ', ps.P(('B', None, None)), ' Exp: 1.0') print('P(-1 <= B < 3) = ', ps.P(('B', -1, 3)), ' Exp: 1/3') print('P(C = 2) =', ps.P(('C', 2)), ' Exp: 1/36 = .0277...') print('P(C = 3) =', ps.P(('C', 3)), ' Exp: 1/18 = .055...') print('P( 2 <= C < 4) = ', ps.P(('C', 2, 4)), ' Exp: 3/36 = .0833...') print('P( 2 <= C < 4 | A = 1) = ', ps.P(('C', 2, 4), ('B', 1)), ' Exp: 1/3') print('P( C = 7) = ', ps.P(('C', 7)), ' Exp: 1/6 = .166...') print('P( C = 7 | A = 1, B = 6) = ', ps.P(('C', 7), [('A', 1), ('B', 6)]), ' Exp: 1.0') print('P( C = 7 | A >= 2, B < 5) = ', ps.P(('C', 7), [('A', 2, None), ('B', None, 5)]), ' Exp: 1/5 = .2') print('P(-inf <= A < inf | B >= 1) = ', ps.P(('A', None, None), ('B', 1, None)), ' Exp: 1.0') print('P( A >= 3, B >= 3) = ', ps.P([('A', 3, None), ('B', 3, None)]), 'Exp: 4/9 (.444...)') print('P( C = 7, A = 5) = ', ps.P([('C', 7), ('A', 5)]), ' Exp: 1/36 (.0277...)') print('P( C = 7, A >= 5) = ', ps.P([('C', 7), ('A', 5, None)]), ' Exp: 1/18 (.0555...)') print('P( A = 2 | B = 5, C= 7) = ', ps.P(('A', 2), [('B', 5), ('C', 7)]), ' Exp: 1.0') print('P( B = 5, C= 7) = ', ps.P(('B', 5), ('C', 7)), ' Exp: 1/6 (.166...)') print('P( A = 2, B = 5) = ', ps.P([('A', 2), ('B', 5)]), ' Exp: 1/36 (.0277...)') print('P( A = 2, B = 5 | C = 7) = ', ps.P([('A', 2), ('B', 5)], ('C', 7)), ' Exp: 1/6 (.166...)') print('P( A = 2, B = 5, N < 0| C = 7) = ', ps.P([('A', 2), ('B', 5), ('N', None, 0)], ('C', 7)), ' Exp: 1/12 (.08333...)') print('E( C | A = 1, B = 6) = ', ps.distr('C', [('A', 1), ('B', 6)]).E(), ' Exp: 7') print('E( C | A = 1, B >= 5) = ', ps.distr('C', [('A', 1), ('B', 5, None)]).E(), ' Exp: 6') print() print('Testing continuous distributions. Using N = normal(0, 1)') n = ps.distr('N') mu1 = n.mean() mu2 = n.stDev() print('stats(N): mean, std, skew, kurtosis = ', mu1, mu2, n.skew(), n.kurtosis(), 'Exp: (0, 1, 0, 0)') print('P( -1 >= N > 1) = ', n.P((-1, 1)), 'Exp: .682') print('P( -2 >= N > 2) = ', n.P((-2, 2)), 'Exp: .954') print('P( -3 >= N > 3) = ', n.P((-3, 3)), 'Exp: .997') print('P( -inf >= N > 0) = ', n.P((None, 0)), 'Exp: .5') print('P( 0 >= N > inf) = ', n.P((0, None)), 'Exp: .5') print('P( -inf >= N > inf) = ', n.P((None, None)), 'Exp: 1.0') print('E( N2 | N = 1) = ', ps.distr('N2', ('N', 1)).E(), ' Exp: 2.0') print('E( N2 | 1 <= N < 2) = ', ps.distr('N2', ('N', 1, 2)).E()) print() print('Dependence testing. Note: values < .5 are considered independent') print('A _||_ B = ', ps.dependence('A', 'B'), ' Exp: < .5') print('A _||_ C = ', ps.dependence('A', 'C'), ' Exp: > .5') print('B _||_ C = ', ps.dependence('B', 'C'), ' Exp: > .5') print('N _||_ N2 = ', ps.dependence('N', 'N2'), ' Exp: > .5') print('N _||_ C = ', ps.dependence('N', 'C'), ' Exp: < .5') print('C _||_ N = ', ps.dependence('C', 'N'), ' Exp: < .5') print('A _||_ B | C >= 8 = ', ps.dependence('A', 'B', [('C', 8, None)]), ' Exp: > .5') print('A _||_ B | C < 7 = ', ps.dependence('A', 'B', [('C', None, 7)]), ' Exp: > .5') print('A _||_ B | C = 7 = ', ps.dependence('A', 'B', [('C', 7)]), ' Exp: > .5') print('A _||_ B | C = 6 = ', ps.dependence('A', 'B', [('C', 6)]), ' Exp: > .5') print('A _||_ B | C = 5 = ', ps.dependence('A', 'B', [('C', 5)]), ' Exp: > .5') print('A _||_ B | C = 4 = ', ps.dependence('A', 'B', [('C', 4)]), ' Exp: > .5') print('A _||_ B | C = 3 = ', ps.dependence('A', 'B', [('C', 3)]), ' Exp: > .5') print('A _||_ B | C = 2 = ', ps.dependence('A', 'B', [('C', 2)]), ' Exp: < .5') print('A _||_ B | C = 12 = ', ps.dependence('A', 'B', [('C', 12)]), ' Exp: < .5') print('A _||_ B | C = ', ps.dependence('A', 'B', ['C']), ' Exp: > .5') print() print('Independence testing (values > .5 are considered independent):') print('A _||_ B = ', ps.independence('A', 'B'), ps.isIndependent('A', 'B'), ' Exp: > .5, True') print('A _||_ C = ', ps.independence('A', 'C'), ps.isIndependent('A', 'C'), ' Exp: < .5, False') print('A _||_ B | C = ', ps.independence('A', 'B', 'C'), ps.isIndependent('A', 'B', 'C'), ' Exp: < .5, False') print('A _||_ N = ', ps.independence('A', 'N'), ps.isIndependent('A', 'N'), ' Exp: > .5, True') print() print('Testing Conditionalization:') ivaDist = ps.distr('IVA') ivaMean = ivaDist.E() ivaStd = ivaDist.stDev() upper = ivaMean + .5 * ivaStd lower = ivaMean - .5 * ivaStd diff = upper - lower pwr = 2 print('test interval = ', upper - lower) ivcGupper = ps.E('IVC', ('IVA', upper), power=pwr) print('E( IVC | IVA = upper)', ivcGupper) ivcGlower = ps.E('IVC', ('IVA', lower), power=pwr) print('E( IVC | IVA = upper)', ivcGupper) print('E( IVC | IVA = lower)', ivcGlower) ivcGupper = ps.E('IVC', [('IVA', upper), 'IVB'], power=pwr) print('E( IVC | IVA = upper, IVB)', ivcGupper) ivcGlower = ps.E('IVC', [('IVA', lower), 'IVB'], power=pwr) print('E( IVC | IVA = lower, IVB)', ivcGlower) print('ACE(A,C) = ', (ivcGupper - ivcGlower) / diff, ' Exp: ~ 0') print() print('Testing continuous causal dependence:') print('IVB _||_ IVA = ', ps.dependence('IVB', 'IVA'), ' Exp: > .5') print('IVA _||_ IVB = ', ps.dependence('IVA', 'IVB'), ' Exp: > .5') print('IVB _||_ IVC = ', ps.dependence('IVB', 'IVC'), ' Exp: > .5') print('IVA _||_ IVC = ', ps.dependence('IVA', 'IVC'), ' Exp: > .5') print('IVA _||_ IVC | IVB = ', ps.dependence('IVA', 'IVC', 'IVB'), ' Exp: < .5') print('IVA _||_ IVC | IVB, N = ', ps.dependence('IVA', 'IVC', ['IVB', 'N']), ' Exp: < .5') print() print('Testing Bayesian Relationships:') # P(C=7 | A=5) = P(A=5|C=7) * P(A=5) / P(C=7) pA_C = ps.P(('A', 5), ('C', 7)) pA = ps.P(('A', 5)) pC = ps.P(('C', 7)) pC_A = ps.P(('C', 7), ('A', 5)) invpC_A = pA_C * pA / pC err = abs(invpC_A - pC_A) print( 'Inverse P(A=5 | C=7) vs measured (Bayes(P(A | C)), P(A | C), diff): ', invpC_A, pC_A, err, ' Exp: ~ 0') # P(0 <= IVB < 1 | 1 <= IVA < 2) = P(1 <= IVA < 2 | 0 <= IVB < 1) * P(0 <= IVB < 1) / P(1 <= IVA < 2) pA_B = ps.P(('IVA', 1, 2), ('IVB', 0, 1)) pB = ps.P(('IVB', 0, 1)) pA = ps.P(('IVA', 1, 2)) pB_A = ps.P(('IVB', 0, 1), ('IVA', 1, 2)) invpB_A = pA_B * pB / pA err = abs(invpB_A - pB_A) print( 'Inverse P(0 <= IVB < 1 | 1 <= IVA < 2) vs measured (Bayes(P(IVB | IVA)), P(IVB | IVA), diff): ', invpB_A, pB_A, err, ' Exp: ~ 0') print() print('Testing Prediction and Classification:') testDat = {'A': [2, 3, 6], 'B': [5, 2, 6]} predDat = ps.Predict('C', testDat) for p in range(len(predDat)): val = predDat[p] a = testDat['A'][p] b = testDat['B'][p] print('Prediction(C) for A = ', a, ', B = ', b, ', = pred(C) = ', val, ' Exp:', a + b) predDat = ps.Classify('C', testDat) for p in range(len(predDat)): val = predDat[p] a = testDat['A'][p] b = testDat['B'][p] print('Classification(C) for A = ', a, ', B = ', b, ', = pred(C) = ', val, ' Exp:', a + b) testDat = {'N': [.5, 1, 1.5, 2, 2.5, 3], 'B': [1, 2, 3, 4, 5, 6]} predDists = ps.PredictDist('N2', testDat) for p in range(len(predDists)): d = predDists[p] n = testDat['N'][p] b = testDat['B'][p] print('Prediction(N2) for N = ', n, ', B = ', b, ', = pred(N2 (mean, std)) = ', d.E(), d.stDev(), ' Exp:', n + 1, ', 1') print() end = time.time() duration = end - start print('Test Time = ', round(duration))
def run(filename): r = getData.DataReader(filename) dat = r.read() start = time.time() # split data between 'training' and test vars = list(dat.keys()) datLen = len(dat[vars[0]]) trainLen = datLen - 100 tr = {} te = {} for var in dat.keys(): datL = list(dat[var]) tr[var] = datL[:trainLen] te[var] = datL[trainLen:] #print('te = ',te.keys(), te) print() print('Testing probability module\'s prediction capabilities.') ps = ProbSpace(tr, density=1, power=1) print() print('Testing non-linear regression with continuous variables.') d = ps.distr('Y') print('stats(Y) = ', d.mean(), d.stDev(), d.skew(), d.kurtosis()) # Note: Predict will automatically remove Y from the test data Ymean = d.mean() expected = te['Y'] results = ps.Predict('Y', te) #print('results = ', results) SSE = 0.0 # Sum of squared error SST = 0.0 # Sum of squared deviation for i in range(len(results)): val = results[i] exp = expected[i] X = [] for x in ['X1', 'X2', 'X3']: X.append(te[x][i]) #print('X = ', X, ', pred = ', val, ', expected = ', exp, ', err = ', val - exp) SSE += (val - exp)**2 SST += (exp - Ymean)**2 print('R2 = ', 1 - SSE / SST) print() print('Testing Classification with discontinuous discrete data') d = ps.distr('DY') print('stats(DY) = ', d.minVal(), d.maxVal(), d.mean(), d.stDev(), d.skew(), d.kurtosis()) expected = te['DY'] results = ps.Classify('DY', te) #print('results = ', results) cumErr = 0 for i in range(len(results)): val = results[i] exp = expected[i] X = [] for x in ['DX1', 'DX2', 'DX3', 'DX4']: X.append(te[x][i]) #print('X = ', X, ', pred = ', val, ', expected = ', exp, ', err = ', val != exp) if val != exp: cumErr += 1 print('Accuracy = ', 1 - (cumErr / len(results))) end = time.time() duration = end - start print('Test Time = ', round(duration))