示例#1
0
# For dat file, use the input file name with the .csv extension
tokens = test.split('.')
testFileRoot = str.join('.', tokens[:-1])
datFileName = testFileRoot + '.csv'
jp_results = []
ps_results = []
fp_results = []
jp_run = []
ps_run = []
fp_run = []
for i in range(tries):
    sdg = synthDataGen.run(test, datSize)
    d = getData.DataReader(datFileName)
    data = d.read()

    prob = ProbSpace(data)

    lim = 3  # Std's from the mean to test conditionals
    numPts = 30  # How many eval points for each conditional
    print('Test Limit = ', lim, 'standard deviations from mean')
    print('Dimensions = ', dims, '.  Conditionals = ', dims - 1)
    print('Number of points to test for each conditional = ', numPts)
    N = prob.N
    evalpts = int(
        sqrt(N)
    )  # How many target points to sample for expected value:  E(Z | X=x. Y=y)
    print('JPROB points for mean evaluation = ', evalpts)
    vars = prob.fieldList
    cond = []
    # Get the conditional variables
    for i in range(len(vars)):
示例#2
0
print('Testing: ', test, '--', testDescript)

# For dat file, use the input file name with the .csv extension
tokens = test.split('.')
testFileRoot = str.join('.', tokens[:-1])
datFileName = testFileRoot + '.csv'
jp_results = []
ps_results = []
jp_run = []
ps_run = []
for i in range(tries):
    sdg = synthDataGen.run(test, datSize)
    d = getData.DataReader(datFileName)
    data = d.read()

    prob = ProbSpace(data)

    lim = 3  # Std's from the mean to test conditionals
    numPts = 30  # How many eval points for each conditional
    print('Test Limit = ', lim, 'standard deviations from mean')
    print('Dimensions = ', dims, '.  Conditionals = ', dims - 1)
    print('Number of points to test for each conditional = ', numPts)
    N = prob.N
    evalpts = int(
        sqrt(N)
    )  # How many target points to sample for expected value:  E(Z | X=x. Y=y)
    print('JPROB points for mean evaluation = ', evalpts)
    vars = prob.fieldList
    cond = []
    # Get the conditional variables
    for i in range(len(vars)):
示例#3
0
print('Testing: ', test, '--', testDescript)

# For dat file, use the input file name with the .csv extension
tokens = test.split('.')
testFileRoot = str.join('.', tokens[:-1])
datFileName = testFileRoot + '.csv'
jp_results = []
ps_results = []
jp_run = []
ps_run = []
for i in range(tries):
    sdg = synthDataGen.run(test, datSize)
    d = getData.DataReader(datFileName)
    data = d.read()

    prob = ProbSpace(data)

    lim = 3  # Std's from the mean to test conditionals
    numPts = 30  # How many eval points for each conditional
    print('Test Limit = ', lim, 'standard deviations from mean')
    print('Dimensions = ', dims, '.  Conditionals = ', dims - 1)
    print('Number of points to test for each conditional = ', numPts)
    N = prob.N
    cond = 'B'

    target = 'A2'
    R1 = RKHS(prob.ds, delta=None, includeVars=[target, cond], s=smoothness)

    # Do some general assessment of cumulative probabilities

    # Do some univariate CDF calculations.  F is normal(0,1)
示例#4
0
            sum2 += t1*t2
    if sum1 ==0:
        return 0
    return sum2/sum1



if __name__ == '__main__':
    path = '../models/Cprobdata.csv'
    d = getData.DataReader(path)
    data = d.read()        
    X1 = data['X']
    Y1 = data['Y']
    X = X1[:7500]
    Y = Y1[:7500]
    ps = ProbSpace(data)

    size = len(X)    
   
    testPoints = []
    testMin = 5
    testMax = 7
    tp = testMin
    numTP = 100
    interval = (testMax - testMin) / numTP
    sq = []
    probpy = []
    cond = []
    
    Perror = 0.0    
    Pdev = []
示例#5
0
    def condP(self, Vals,K = None):
        #Vals is a list of (x1,x2....xn) such that P(X1=x1|X2=x2.....), same UI as rkhsmv
        if(K != None):
            self.k = K
        filter_len = floor((len(self.includeVars)-1)*self.k*0.01)
        dims = len(Vals)
        print(dims,filter_len)
        if(self.rangeFactor==0):
            self.rangeFactor = 0.5
        if(filter_len !=0):
            filter_vars = self.includeVars[-filter_len:]
            filter_vals = Vals[-filter_len:]
            include_vars = self.includeVars[:-filter_len]            
            self.minPoints = ceil(self.N**((dims-filter_len)/dims))*self.rangeFactor 
            self.maxPoints = ceil(self.N**((dims-filter_len)/dims))/self.rangeFactor
            

        else:
            filter_vars = []
            filter_vals = []
            include_vars = self.includeVars
        
        # print("filter vars:",filter_vars)
        # print("include vars:",self.includeVars[:-filter_len])
        # print(self.includeVars)
        
        #Calculating R1 
        zDim = floor(self.k * (dims-1)* 0.01)
        minminpoints = 10
        if(filter_len == (len(self.includeVars)-1)):
            for i in range(zDim,0,-1):
                print("runing i=",i)
                filter_len = floor(i*self.k*0.01)
                self.minPoints = ceil(self.N*((dims-i)/dims))*self.rangeFactor 
                self.maxPoints = ceil(self.N*((dims-i)/dims))/self.rangeFactor
                rkhsminpoints = minminpoints*(dims-i)
                if(self.minPoints < rkhsminpoints):
                    print("minpoints < minminpoints",self.minPoints,rkhsminpoints)
                    continue                
                P = ProbSpace(self.data)
                filter_data = []
                for j in range(filter_len):
                    x = (filter_vars[j],filter_vals[j])
                    filter_data.append(x)
                print("filter metrics = ",filter_data)                   
                FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
                if(len(FilterData[self.includeVars[0]])<self.minPoints):
                    print("not enough filter points for filterlen =",i,len(FilterData[self.includeVars[0]]),self.minPoints,self.maxPoints)
                    continue
                print("filter len",filter_len)
                print("filtered datapoints:",len(FilterData['B']))
                print("include vars:",self.includeVars[:-filter_len])
                self.R1 = RKHS(FilterData,includeVars=self.includeVars[:(dims-filter_len)],delta=self.delta,s=self.s)
                self.r1filters = filter_vals            
                return self.R1.P(Vals[:dims-filter_len])
            print("running empty")            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)
            p = self.R1.condP(Vals)
            if p>0:
                return p
            else:
                return None    
        
        
        
        elif(filter_len != 0 and self.r1filters != filter_vals):
            P = ProbSpace(self.data)
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                filter_data.append(x)                    
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            # print("filter len",filter_len)
            # print("filtered datapoints:",len(FilterData['B']))
            # print("include vars:",self.includeVars[:-filter_len])
            self.R1 = RKHS(FilterData,includeVars=self.includeVars[:-filter_len],delta=self.delta,s=self.s)
            self.r1filters = filter_vals

        elif(self.R1==None):
            print("running empty")            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)

        elif(self.R1.varNames != include_vars):            
            self.R1 = RKHS(self.data,includeVars=self.includeVars,delta=self.delta,s=self.s)

        if(filter_len != 0):                
            p = self.R1.condP(Vals[:-filter_len])
            if p>0:
                return p
            else:
                return None
        else:
            p = self.R1.condP(Vals)
            if p>0:
                return p
            else:
                return None
示例#6
0
    def condE(self,target, Vals,K = None):
        #Vals is a list of (x1,x2....xn) such that E(Y|X1=x1,X2=x2.....), same UI as rkhsmv
        if(K == None):
            K = self.k 
        filter_len = floor((len(self.includeVars)-1)*K*0.01)
        #print("filter len",filter_len)
        dims = len(Vals) + 1
        if(self.rangeFactor == None):
            self.rangeFactor = 0.8
        minminpoints = 5
        
        
        if(filter_len !=0):
            filter_vars = self.includeVars[-filter_len:]
            filter_vals = Vals[-filter_len:]
            include_vars = self.includeVars[1:-filter_len]
            self.minPoints = self.N**((dims-filter_len)/dims)*self.rangeFactor
            self.maxPoints = self.N**((dims-filter_len)/dims)/self.rangeFactor
            #print("minpoints,maxpoints=",self.minPoints,self.maxPoints)

        else:
            filter_vars = []
            filter_vals = []       
            include_vars = self.includeVars
        
        #print("filter vars:",filter_vars)
        #print("include vars:",self.includeVars[:-filter_len])
        #print("self:",self.R2.varNames,"cond",self.includeVars[:-filter_len])
                
        
        if(filter_len == (len(self.includeVars)-1) ):
            P = ProbSpace(self.data)
            filter_vars = self.includeVars[1:]
            filter_vals = Vals            
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                filter_data.append(x)                    
            #print("minpoints,maxpoints:",self.minPoints,self.maxPoints)
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            X = FilterData[self.includeVars[0]]
            if(len(X)<self.minPoints):
                newk = ceil((((K*(dims-1)*0.01)-1)/(dims-1))*100) # update K =100 to K = 80
                #newk = ceil(K - ((filter_len-1)/filter_len)*100) #update k = 100 to K = 20
                print("not enough datapoints, newk=",newk)
                return self.condE(target, Vals, newk)
            #print(len(X))
            if(len(X)!=0):
                return sum(X)/len(X)
            else:
                return 0
                
        
        elif(filter_len != 0 and self.r2filters != filter_vals):
            P = ProbSpace(self.data)
            filter_data = []
            for i in range(filter_len):
                x = (filter_vars[i],filter_vals[i])
                #print(x)
                filter_data.append(x)                    
            FilterData, parentProb, finalQuery = P.filter(filter_data,self.minPoints,self.maxPoints)
            print("filter len",filter_len)
            print("filtered datapoints:",len(FilterData['B']))
            print("include vars:",self.includeVars[:-filter_len])
            X = FilterData[self.includeVars[0]]
            if(len(X)<self.minPoints or len(X)<=minminpoints):
                newk = ceil((((K*(dims-1)*0.01)-1)/(dims-1)) * 100)
                #newk = ceil(((filter_len+1)/filter_len)*K)
                print("not enough datapoints, newk=",newk)
                return self.condE(target, Vals, newk)
            self.R2 = RKHS(FilterData,includeVars=self.includeVars[1:-filter_len],delta=self.delta,s=self.s)
            self.r2filters = filter_vals          
        
        elif(self.R2==None):
            self.R2 = RKHS(self.data,includeVars=self.includeVars[1:],delta=self.delta,s=self.s)

        elif(self.R2.varNames != include_vars):
            self.R2 = RKHS(self.data,includeVars=self.includeVars[1:],delta=self.delta,s=self.s)
        
        if(filter_len !=0):
            return self.R2.condE(target,Vals[:-filter_len])
        else:
            return self.R2.condE(target, Vals)
示例#7
0
else:
    v3 = 'C'
f = open(test, 'r')
exec(f.read(), globals())

print('Testing: ', test, '--', testDescript)

# For dat file, use the input file name with the .csv extension
tokens = test.split('.')
testFileRoot = str.join('.', tokens[:-1])
datFileName = testFileRoot + '.csv'

d = getData.DataReader(datFileName)
data = d.read()

prob = ProbSpace(data)
traces = {}
traces['X'] = []
traces['Y'] = []
traces['Z'] = []
v1distr = prob.distr(v1)
v2distr = prob.distr(v2)
v3distr = prob.distr(v3)
v1mean = v1distr.E()
v2mean = v2distr.E()
v3mean = v3distr.E()
#ymin = targdistr.minVal()
#ymax = targdistr.maxVal()
#xmin = conddistr.minVal()
#xmax = conddistr.maxVal()
lim = 3
示例#8
0
def run(filename):
    r = getData.DataReader(filename)
    dat = r.read()
    ps = ProbSpace(dat, density=1, power=1)
    start = time.time()
    print()
    print('Testing probability module.')
    print()
    print('Testing basic statistics for various types of distribution:')
    print('stats(A) =  ', ps.fieldStats('A'))
    print('stats(C) = ', ps.fieldStats('C'))
    a = ps.distr('A')
    mean = a.mean()
    std = a.stDev()
    print('stats(dice1): mean, std, skew, kurtosis, median, mode = ', mean,
          std, a.skew(), a.kurtosis(), ' Exp: (3.5, ?, 0, ?)')
    c = ps.distr('C')
    print('stats(d1 + d2): mean, std, skew, kurtosis, median, mode = ', c.E(),
          c.stDev(), c.skew(), c.kurtosis(), c.median(), c.mode(),
          ' Exp: (7, ?, 0, ?, 7, 7)')
    d = ps.distr('EXP')
    print('stats(Exponential): mean, std, skew, kurtosis = ', d.E(), d.stDev(),
          d.skew(), d.kurtosis(), ' Exp: (1, 1, 2, 6)')
    d = ps.distr('IVB')
    print('stats(Logistic): mean, std, skew, kurtosis = ', d.E(), d.stDev(),
          d.skew(), d.kurtosis(), ' Exp: (0, 1.8138, 0, 1.2)')
    d = ps.distr('N')
    print('stats(Normal):  mean, std, skew, kurtosis, median = ', d.E(),
          d.stDev(), d.skew(), d.kurtosis(), d.median(), 'Exp: (0, 1, 0, 0)')
    d = ps.distr('N2')
    print('stats(N2: sum of normals):  mean, std, skew, kurtosis = ', d.E(),
          d.stDev(), d.skew(), d.kurtosis(), 'Exp: (1, 1.414, 0, 0)')
    print()
    print(
        'Testing discrete deterministic probabilities (2-dice -- ala Craps):')
    print('A is Die #1.  B is Die #2.  C is the total of the 2 dice.')
    print('E(B) = ', ps.distr('B').E(), ' Exp: 3.5')
    print('P(B=0) = ', ps.P(('B', 0)), ' Exp: 0')
    print('P(B=1) = ', ps.P(('B', 1)), ' Exp: 1/6 = .166...')
    print('P(B=2) = ', ps.P(('B', 2)), ' Exp: 1/6 = .166...')
    print('P(B >= 0) = ', ps.P(('B', 0, None)), ' Exp: 1.0')
    print('P(B < 0) = ', ps.P(('B', None, 0)), ' Exp: 0.0')
    print('P(-inf <= B > inf) = ', ps.P(('B', None, None)), ' Exp: 1.0')
    print('P(-1 <= B < 3) = ', ps.P(('B', -1, 3)), ' Exp: 1/3')
    print('P(C = 2) =', ps.P(('C', 2)), ' Exp: 1/36 = .0277...')
    print('P(C = 3) =', ps.P(('C', 3)), ' Exp: 1/18 = .055...')
    print('P( 2 <= C < 4) = ', ps.P(('C', 2, 4)), ' Exp: 3/36 = .0833...')
    print('P( 2 <= C < 4 | A = 1) = ', ps.P(('C', 2, 4), ('B', 1)),
          ' Exp: 1/3')
    print('P( C = 7) = ', ps.P(('C', 7)), ' Exp: 1/6 = .166...')
    print('P( C = 7 | A = 1, B = 6) = ', ps.P(('C', 7), [('A', 1), ('B', 6)]),
          ' Exp: 1.0')
    print('P( C = 7 | A >= 2, B < 5) = ',
          ps.P(('C', 7), [('A', 2, None), ('B', None, 5)]), ' Exp: 1/5 = .2')
    print('P(-inf <= A < inf | B >= 1) = ',
          ps.P(('A', None, None), ('B', 1, None)), ' Exp: 1.0')
    print('P( A >= 3, B >= 3) = ', ps.P([('A', 3, None), ('B', 3, None)]),
          'Exp: 4/9 (.444...)')
    print('P( C = 7, A = 5) = ', ps.P([('C', 7), ('A', 5)]),
          ' Exp: 1/36 (.0277...)')
    print('P( C = 7, A >= 5) = ', ps.P([('C', 7), ('A', 5, None)]),
          ' Exp: 1/18 (.0555...)')
    print('P( A = 2 | B = 5, C= 7) = ', ps.P(('A', 2), [('B', 5), ('C', 7)]),
          ' Exp: 1.0')
    print('P( B = 5, C= 7) = ', ps.P(('B', 5), ('C', 7)),
          ' Exp: 1/6 (.166...)')
    print('P( A = 2, B = 5) = ', ps.P([('A', 2), ('B', 5)]),
          ' Exp: 1/36 (.0277...)')
    print('P( A = 2, B = 5 | C = 7) = ', ps.P([('A', 2), ('B', 5)], ('C', 7)),
          ' Exp: 1/6 (.166...)')
    print('P( A = 2, B = 5, N < 0| C = 7) = ',
          ps.P([('A', 2), ('B', 5), ('N', None, 0)], ('C', 7)),
          ' Exp: 1/12 (.08333...)')
    print('E( C | A = 1, B = 6) = ',
          ps.distr('C', [('A', 1), ('B', 6)]).E(), ' Exp: 7')
    print('E( C | A = 1, B >= 5) = ',
          ps.distr('C', [('A', 1), ('B', 5, None)]).E(), ' Exp: 6')
    print()
    print('Testing continuous distributions.  Using N = normal(0, 1)')
    n = ps.distr('N')
    mu1 = n.mean()
    mu2 = n.stDev()
    print('stats(N):  mean, std, skew, kurtosis = ', mu1, mu2, n.skew(),
          n.kurtosis(), 'Exp: (0, 1, 0, 0)')
    print('P( -1 >= N > 1) = ', n.P((-1, 1)), 'Exp: .682')
    print('P( -2 >= N > 2) = ', n.P((-2, 2)), 'Exp: .954')
    print('P( -3 >= N > 3) = ', n.P((-3, 3)), 'Exp: .997')
    print('P( -inf >= N > 0) = ', n.P((None, 0)), 'Exp: .5')
    print('P( 0 >= N > inf) = ', n.P((0, None)), 'Exp: .5')
    print('P( -inf >= N > inf) = ', n.P((None, None)), 'Exp: 1.0')
    print('E( N2 | N = 1) = ', ps.distr('N2', ('N', 1)).E(), ' Exp: 2.0')
    print('E( N2 | 1 <= N < 2) = ', ps.distr('N2', ('N', 1, 2)).E())
    print()
    print('Dependence testing.  Note: values < .5 are considered independent')
    print('A _||_ B = ', ps.dependence('A', 'B'), ' Exp: < .5')
    print('A _||_ C = ', ps.dependence('A', 'C'), ' Exp: > .5')
    print('B _||_ C = ', ps.dependence('B', 'C'), ' Exp: > .5')
    print('N _||_ N2 = ', ps.dependence('N', 'N2'), ' Exp: > .5')
    print('N _||_ C = ', ps.dependence('N', 'C'), ' Exp: < .5')
    print('C _||_ N = ', ps.dependence('C', 'N'), ' Exp: < .5')
    print('A _||_ B | C >= 8 = ', ps.dependence('A', 'B', [('C', 8, None)]),
          ' Exp: > .5')
    print('A _||_ B | C < 7 = ', ps.dependence('A', 'B', [('C', None, 7)]),
          ' Exp: > .5')
    print('A _||_ B | C = 7 = ', ps.dependence('A', 'B', [('C', 7)]),
          ' Exp: > .5')
    print('A _||_ B | C = 6 = ', ps.dependence('A', 'B', [('C', 6)]),
          ' Exp: > .5')
    print('A _||_ B | C = 5 = ', ps.dependence('A', 'B', [('C', 5)]),
          ' Exp: > .5')
    print('A _||_ B | C = 4 = ', ps.dependence('A', 'B', [('C', 4)]),
          ' Exp: > .5')
    print('A _||_ B | C = 3 = ', ps.dependence('A', 'B', [('C', 3)]),
          ' Exp: > .5')
    print('A _||_ B | C = 2 = ', ps.dependence('A', 'B', [('C', 2)]),
          ' Exp: < .5')
    print('A _||_ B | C = 12 = ', ps.dependence('A', 'B', [('C', 12)]),
          ' Exp: < .5')
    print('A _||_ B | C = ', ps.dependence('A', 'B', ['C']), ' Exp: > .5')
    print()
    print('Independence testing (values > .5 are considered independent):')
    print('A _||_ B = ', ps.independence('A', 'B'), ps.isIndependent('A', 'B'),
          ' Exp: > .5, True')
    print('A _||_ C = ', ps.independence('A', 'C'), ps.isIndependent('A', 'C'),
          ' Exp: < .5, False')
    print('A _||_ B | C = ', ps.independence('A', 'B', 'C'),
          ps.isIndependent('A', 'B', 'C'), ' Exp: < .5, False')
    print('A _||_ N = ', ps.independence('A', 'N'), ps.isIndependent('A', 'N'),
          ' Exp: > .5, True')
    print()
    print('Testing Conditionalization:')
    ivaDist = ps.distr('IVA')
    ivaMean = ivaDist.E()
    ivaStd = ivaDist.stDev()
    upper = ivaMean + .5 * ivaStd
    lower = ivaMean - .5 * ivaStd
    diff = upper - lower
    pwr = 2
    print('test interval = ', upper - lower)
    ivcGupper = ps.E('IVC', ('IVA', upper), power=pwr)
    print('E( IVC | IVA = upper)', ivcGupper)
    ivcGlower = ps.E('IVC', ('IVA', lower), power=pwr)
    print('E( IVC | IVA = upper)', ivcGupper)
    print('E( IVC | IVA = lower)', ivcGlower)
    ivcGupper = ps.E('IVC', [('IVA', upper), 'IVB'], power=pwr)
    print('E( IVC | IVA = upper, IVB)', ivcGupper)
    ivcGlower = ps.E('IVC', [('IVA', lower), 'IVB'], power=pwr)
    print('E( IVC | IVA = lower, IVB)', ivcGlower)
    print('ACE(A,C) = ', (ivcGupper - ivcGlower) / diff, ' Exp: ~ 0')
    print()
    print('Testing continuous causal dependence:')
    print('IVB _||_ IVA = ', ps.dependence('IVB', 'IVA'), ' Exp: > .5')
    print('IVA _||_ IVB = ', ps.dependence('IVA', 'IVB'), ' Exp: > .5')
    print('IVB _||_ IVC = ', ps.dependence('IVB', 'IVC'), ' Exp: > .5')
    print('IVA _||_ IVC = ', ps.dependence('IVA', 'IVC'), ' Exp: > .5')
    print('IVA _||_ IVC | IVB = ', ps.dependence('IVA', 'IVC', 'IVB'),
          ' Exp: < .5')
    print('IVA _||_ IVC | IVB, N = ',
          ps.dependence('IVA', 'IVC', ['IVB', 'N']), ' Exp: < .5')
    print()
    print('Testing Bayesian Relationships:')
    # P(C=7 | A=5) = P(A=5|C=7) * P(A=5) / P(C=7)
    pA_C = ps.P(('A', 5), ('C', 7))
    pA = ps.P(('A', 5))
    pC = ps.P(('C', 7))
    pC_A = ps.P(('C', 7), ('A', 5))
    invpC_A = pA_C * pA / pC
    err = abs(invpC_A - pC_A)
    print(
        'Inverse P(A=5 | C=7) vs measured (Bayes(P(A | C)), P(A | C), diff): ',
        invpC_A, pC_A, err, ' Exp: ~ 0')
    # P(0 <= IVB < 1 | 1 <= IVA < 2) = P(1 <= IVA < 2 | 0 <= IVB < 1) * P(0 <= IVB < 1) / P(1 <= IVA < 2)
    pA_B = ps.P(('IVA', 1, 2), ('IVB', 0, 1))
    pB = ps.P(('IVB', 0, 1))
    pA = ps.P(('IVA', 1, 2))
    pB_A = ps.P(('IVB', 0, 1), ('IVA', 1, 2))
    invpB_A = pA_B * pB / pA
    err = abs(invpB_A - pB_A)
    print(
        'Inverse P(0 <= IVB < 1 | 1 <= IVA < 2) vs measured (Bayes(P(IVB | IVA)), P(IVB | IVA), diff): ',
        invpB_A, pB_A, err, ' Exp: ~ 0')
    print()
    print('Testing Prediction and Classification:')
    testDat = {'A': [2, 3, 6], 'B': [5, 2, 6]}
    predDat = ps.Predict('C', testDat)
    for p in range(len(predDat)):
        val = predDat[p]
        a = testDat['A'][p]
        b = testDat['B'][p]
        print('Prediction(C) for A = ', a, ', B = ', b, ', = pred(C) = ', val,
              ' Exp:', a + b)
    predDat = ps.Classify('C', testDat)
    for p in range(len(predDat)):
        val = predDat[p]
        a = testDat['A'][p]
        b = testDat['B'][p]
        print('Classification(C) for A = ', a, ', B = ', b, ', = pred(C) = ',
              val, ' Exp:', a + b)
    testDat = {'N': [.5, 1, 1.5, 2, 2.5, 3], 'B': [1, 2, 3, 4, 5, 6]}
    predDists = ps.PredictDist('N2', testDat)
    for p in range(len(predDists)):
        d = predDists[p]
        n = testDat['N'][p]
        b = testDat['B'][p]
        print('Prediction(N2) for N = ', n, ', B = ', b,
              ', = pred(N2 (mean, std)) = ', d.E(), d.stDev(), ' Exp:', n + 1,
              ', 1')
    print()
    end = time.time()
    duration = end - start
    print('Test Time = ', round(duration))
示例#9
0
def run(filename):
    r = getData.DataReader(filename)
    dat = r.read()
    start = time.time()
    # split data between 'training' and test
    vars = list(dat.keys())
    datLen = len(dat[vars[0]])
    trainLen = datLen - 100
    tr = {}
    te = {}
    for var in dat.keys():
        datL = list(dat[var])
        tr[var] = datL[:trainLen]
        te[var] = datL[trainLen:]
    #print('te = ',te.keys(), te)
    print()
    print('Testing probability module\'s prediction capabilities.')
    ps = ProbSpace(tr, density=1, power=1)
    print()
    print('Testing non-linear regression with continuous variables.')
    d = ps.distr('Y')
    print('stats(Y) = ', d.mean(), d.stDev(), d.skew(), d.kurtosis())
    # Note: Predict will automatically remove Y from the test data
    Ymean = d.mean()
    expected = te['Y']
    results = ps.Predict('Y', te)
    #print('results = ', results)
    SSE = 0.0  # Sum of squared error
    SST = 0.0  # Sum of squared deviation
    for i in range(len(results)):
        val = results[i]
        exp = expected[i]
        X = []
        for x in ['X1', 'X2', 'X3']:
            X.append(te[x][i])

        #print('X = ', X, ', pred = ', val, ', expected = ', exp, ', err = ', val - exp)
        SSE += (val - exp)**2
        SST += (exp - Ymean)**2
    print('R2 = ', 1 - SSE / SST)
    print()
    print('Testing Classification with discontinuous discrete data')
    d = ps.distr('DY')
    print('stats(DY) = ', d.minVal(), d.maxVal(), d.mean(), d.stDev(),
          d.skew(), d.kurtosis())
    expected = te['DY']
    results = ps.Classify('DY', te)
    #print('results = ', results)
    cumErr = 0
    for i in range(len(results)):
        val = results[i]
        exp = expected[i]
        X = []
        for x in ['DX1', 'DX2', 'DX3', 'DX4']:
            X.append(te[x][i])

        #print('X = ', X, ', pred = ', val, ', expected = ', exp, ', err = ', val != exp)
        if val != exp:
            cumErr += 1
    print('Accuracy = ', 1 - (cumErr / len(results)))

    end = time.time()
    duration = end - start
    print('Test Time = ', round(duration))