def determineDifficulty(fileName, samples): import analyzeSEM sa = analyzeSEM.SemAnalyzer(fileName) stats = sa.analyzeOne(samples, gen=False, valData=getValidation()) diff = stats['normDifficulty'] #print('sem = ', getSEM()) #print('difficulty = ', diff) return diff
def calibrateOneCITest(self, testType, filePath): synthDataGen.run(filePath + '.py', samples=self.datacount) exec('import ' + testType) module = eval(testType) SA = analyzeSEM.SemAnalyzer(filePath + '.py', self.datacount) reader = getData.DataReader(filePath + '.csv', self.datacount) dependencies, independencies = SA.getCondDependencies() # print('dependencies = ', dependencies) # print('independencies = ', independencies) errors = 0 errorTerms = {} items = 0 for item in dependencies: x, y, z = item X = reader.getSeries(x) Y = reader.getSeries(y) Z = reader.getSeries(z) ind = module.isIndependent(X, Y, Z) if ind: print('Error -- ', x, 'and', y, 'Should be dependent given', z) self.err1Count += 1 errors += 1 errorTerms[item] = 1 self.testCount += 1 for item in independencies: x, y, z = item X = reader.getSeries(x) Y = reader.getSeries(y) Z = reader.getSeries(z) ind = module.isIndependent(X, Y, Z) if not ind: print('Error -- ', x, 'and', y, 'Should be independent given', z) self.err2Count += 1 errors += 1 errorTerms[item] = 1 self.testCount += 1 #print('Rating = ', (1 - (errors / items))*100, '%') print('Errors for file: ', filePath, '=', errors, list(errorTerms.keys())) return
def CaLingamTest(testType, paramSet): global VALIDATION_ERRORS, CAUSAL_ORDERS VALIDATION_ERRORS = {} CAUSAL_ORDERS = {} maxDifficulty = MAX_DIFFICULTY fails = 0.0 datafile, runs, dataPoints, validation = paramSet[:4] valData = None totalDuration = 0 datafileRootName = datafile.split('.')[-2].split('\\')[-1] #print('dfrn = ', datafileRootName) sa = analyzeSEM.SemAnalyzer(datafileRootName) difficulty = 0 diffNorm = 0 for i in range(runs): if i == 0: reset = True if RESET_COUNT > 0 and i > 0 and i / RESET_COUNT == int( i / RESET_COUNT): reset = True print() print('Previous SEM:') print(synthDataGen.getSEM()) #print('Resetting') else: reset = False if i == 0: reset = True prune = True if validation == 'SynthOrderVal': # Suppress pruning in order to increase perf when only testing order. prune = False if testType == 'synth': outFile = synthDataGen.run(datafile, samples=dataPoints, reset=reset, maxDifficulty=maxDifficulty) valData = synthDataGen.getValidation() if i == 0 or reset: saStats = sa.analyzeOne(dataPoints, gen=False, valData=valData) difficulty = round(saStats['difficulty'], 2) diffNorm = round(saStats['normDifficulty'], 2) print('difficulty = ', difficulty, ', norm difficulty = ', diffNorm) elif testType == 'live': outFile = datafile else: print('*** Invalid Test Type = ', testType) return startTime = time.time() c = CaLingam.IC(outFile, limit=dataPoints, prune=prune) dag = c.getDAG() endTime = time.time() duration = endTime - startTime totalDuration += duration if len(validation) > 0: result = eval(validation + '(paramSet, dag, valData)') else: corder = str.join('||', dag.getVarOrder()[:5]) if not corder in CAUSAL_ORDERS: CAUSAL_ORDERS[corder] = 1 #print('Causal Order = ', dag.getVarOrder()) # If we get a new causal order after the first time, it is considered an error if len(CAUSAL_ORDERS) > 1: result = 0 else: result = 1 else: # Got an existing causal order. Consider that success count = CAUSAL_ORDERS[corder] count += 1 CAUSAL_ORDERS[corder] = count result = 1 if result: #print ('Success', ) print('.', end='', flush=True) else: print('x', end='', flush=True) fails += 1 print() reliability = round(1.0 - (fails / float(runs)), 2) #stats = 'Errors: ' + str(int(fails)) + ' / ' + str(int(i+1)) + ' -- Reliability: ' + str((1.0 - (fails / (i+1))) * 100) + '%' stats = 'Errors: ' + str(int(fails)) + ' / ' + str( int(runs)) + ' -- Reliability: ' + str( reliability * 100) + '% avg duration: ' + str( round(totalDuration / runs, 2)) + ' sec' + ' difficulty: ' + str( difficulty) + ' diffNorm: ' + str( diffNorm) + ' strength: ' + str( round(reliability * diffNorm, 2)) print('Stats = ', stats) if fails > 0: if len(validation) > 0: # Sort validation_errors to show the most common first counts = [] keys = [] for key in VALIDATION_ERRORS.keys(): count = VALIDATION_ERRORS[key] keys.append(key) counts.append(count) tuples = list(zip(counts, keys)) tuples.sort() tuples.reverse() keys = [key for (count, key) in tuples] errStrs = [] for key in keys: errStrs.append(key + ':' + str(VALIDATION_ERRORS[key])) errStr = str.join(', ', errStrs) print('ValidationErrors = ', errStr) else: maxKey = None maxKeyCount = 0 totalKeyCount = 0 keys = CAUSAL_ORDERS.keys() for key in keys: keyCount = CAUSAL_ORDERS[key] if keyCount > maxKeyCount: maxKeyCount = keyCount maxKey = key totalKeyCount += keyCount print('Most Common Order (', maxKeyCount / totalKeyCount * 100, '%) = ', maxKey) print() print('CausalOrders = ', str(CAUSAL_ORDERS)) return