示例#1
0
def determineDifficulty(fileName, samples):
	import analyzeSEM
	sa = analyzeSEM.SemAnalyzer(fileName)
	stats = sa.analyzeOne(samples, gen=False, valData=getValidation())
	diff = stats['normDifficulty']
	#print('sem = ', getSEM())
	#print('difficulty = ', diff)
	return diff
 def calibrateOneCITest(self, testType, filePath):
     synthDataGen.run(filePath + '.py', samples=self.datacount)
     exec('import ' + testType)
     module = eval(testType)
     SA = analyzeSEM.SemAnalyzer(filePath + '.py', self.datacount)
     reader = getData.DataReader(filePath + '.csv', self.datacount)
     dependencies, independencies = SA.getCondDependencies()
     # print('dependencies = ', dependencies)
     # print('independencies = ', independencies)
     errors = 0
     errorTerms = {}
     items = 0
     for item in dependencies:
         x, y, z = item
         X = reader.getSeries(x)
         Y = reader.getSeries(y)
         Z = reader.getSeries(z)
         ind = module.isIndependent(X, Y, Z)
         if ind:
             print('Error -- ', x, 'and', y, 'Should be dependent given', z)
             self.err1Count += 1
             errors += 1
             errorTerms[item] = 1
         self.testCount += 1
     for item in independencies:
         x, y, z = item
         X = reader.getSeries(x)
         Y = reader.getSeries(y)
         Z = reader.getSeries(z)
         ind = module.isIndependent(X, Y, Z)
         if not ind:
             print('Error -- ', x, 'and', y, 'Should be independent given',
                   z)
             self.err2Count += 1
             errors += 1
             errorTerms[item] = 1
         self.testCount += 1
     #print('Rating = ', (1 - (errors / items))*100, '%')
     print('Errors for file: ', filePath, '=', errors,
           list(errorTerms.keys()))
     return
示例#3
0
def CaLingamTest(testType, paramSet):
    global VALIDATION_ERRORS, CAUSAL_ORDERS
    VALIDATION_ERRORS = {}
    CAUSAL_ORDERS = {}
    maxDifficulty = MAX_DIFFICULTY
    fails = 0.0
    datafile, runs, dataPoints, validation = paramSet[:4]
    valData = None
    totalDuration = 0
    datafileRootName = datafile.split('.')[-2].split('\\')[-1]
    #print('dfrn = ', datafileRootName)
    sa = analyzeSEM.SemAnalyzer(datafileRootName)
    difficulty = 0
    diffNorm = 0
    for i in range(runs):
        if i == 0:
            reset = True
        if RESET_COUNT > 0 and i > 0 and i / RESET_COUNT == int(
                i / RESET_COUNT):
            reset = True
            print()
            print('Previous SEM:')
            print(synthDataGen.getSEM())
            #print('Resetting')
        else:
            reset = False
        if i == 0:
            reset = True
        prune = True
        if validation == 'SynthOrderVal':
            # Suppress pruning in order to increase perf when only testing order.
            prune = False
        if testType == 'synth':
            outFile = synthDataGen.run(datafile,
                                       samples=dataPoints,
                                       reset=reset,
                                       maxDifficulty=maxDifficulty)
            valData = synthDataGen.getValidation()
            if i == 0 or reset:
                saStats = sa.analyzeOne(dataPoints, gen=False, valData=valData)
                difficulty = round(saStats['difficulty'], 2)
                diffNorm = round(saStats['normDifficulty'], 2)
                print('difficulty = ', difficulty, ', norm difficulty = ',
                      diffNorm)
        elif testType == 'live':
            outFile = datafile
        else:
            print('*** Invalid Test Type = ', testType)
            return
        startTime = time.time()
        c = CaLingam.IC(outFile, limit=dataPoints, prune=prune)
        dag = c.getDAG()
        endTime = time.time()
        duration = endTime - startTime
        totalDuration += duration
        if len(validation) > 0:
            result = eval(validation + '(paramSet, dag, valData)')
        else:
            corder = str.join('||', dag.getVarOrder()[:5])
            if not corder in CAUSAL_ORDERS:
                CAUSAL_ORDERS[corder] = 1
                #print('Causal Order = ', dag.getVarOrder())
                # If we get a new causal order after the first time, it is considered an error
                if len(CAUSAL_ORDERS) > 1:
                    result = 0
                else:
                    result = 1
            else:
                # Got an existing causal order.  Consider that success
                count = CAUSAL_ORDERS[corder]
                count += 1
                CAUSAL_ORDERS[corder] = count
                result = 1
        if result:
            #print ('Success', )
            print('.', end='', flush=True)
        else:
            print('x', end='', flush=True)
            fails += 1
    print()
    reliability = round(1.0 - (fails / float(runs)), 2)
    #stats = 'Errors: ' + str(int(fails)) + ' / ' + str(int(i+1)) + ' -- Reliability: ' + str((1.0 - (fails / (i+1))) * 100) + '%'
    stats = 'Errors: ' + str(int(fails)) + ' / ' + str(
        int(runs)) + ' -- Reliability: ' + str(
            reliability * 100) + '% avg duration: ' + str(
                round(totalDuration / runs,
                      2)) + ' sec' + ' difficulty: ' + str(
                          difficulty) + ' diffNorm: ' + str(
                              diffNorm) + ' strength: ' + str(
                                  round(reliability * diffNorm, 2))
    print('Stats = ', stats)
    if fails > 0:
        if len(validation) > 0:
            # Sort validation_errors to show the most common first
            counts = []
            keys = []
            for key in VALIDATION_ERRORS.keys():
                count = VALIDATION_ERRORS[key]
                keys.append(key)
                counts.append(count)
            tuples = list(zip(counts, keys))
            tuples.sort()
            tuples.reverse()
            keys = [key for (count, key) in tuples]
            errStrs = []
            for key in keys:
                errStrs.append(key + ':' + str(VALIDATION_ERRORS[key]))
                errStr = str.join(', ', errStrs)
            print('ValidationErrors = ', errStr)
        else:
            maxKey = None
            maxKeyCount = 0
            totalKeyCount = 0
            keys = CAUSAL_ORDERS.keys()
            for key in keys:
                keyCount = CAUSAL_ORDERS[key]
                if keyCount > maxKeyCount:
                    maxKeyCount = keyCount
                    maxKey = key
                totalKeyCount += keyCount
            print('Most Common Order (', maxKeyCount / totalKeyCount * 100,
                  '%) = ', maxKey)
            print()
            print('CausalOrders = ', str(CAUSAL_ORDERS))
    return