Exemplo n.º 1
0
def run(dataDefFile, var1, var2, datapoints):
	tau = TAU
	print('Generating data using: ', dataDefFile)
	fileExt = dataDefFile.split('.')[-1]
	if fileExt == 'py':
		# Data Definition File
		outFile = synthDataGen.run(dataDefFile, datapoints)
	elif fileExt == 'csv':
		# Data file
		outFile = dataDefFile
	else:
		print('*** Invalid file type = ', fileExt)
		return
	d = getData.DataReader(outFile, limit=datapoints)
	fig = plt.figure()
	ax = fig.add_axes([.1,.1,.8,.8],projection='3d')
	
	# If var1 and var2 are not specified, then build a master manifold out of three vars at a time.  Otherwise, build shadow manifolds
	# from the two specified vars
	if var1 is None:
		vars = d.getSeriesNames()
		vars.sort()
		print('Vars = ', vars)
		colors = ['b', 'g', 'r','o'] 
		for i in range(4):
			if len(vars) < i*3+3:
				break
			X = d.getSeries(vars[i*3])
			Y = d.getSeries(vars[i*3+1])
			Z = d.getSeries(vars[i*3+2])
			if standard:
				 X = standardize.standardize(X)
				 Y = standardize.standardize(Y)
				 Z = standardize.standardize(Z)
			color = colors[i]
			ax.plot(X, Y, Z)
			
	else:
		var1D = d.getSeries(var1)
		if standard:
			var1D = standardize.standardize(var1D)
		X1 = var1D[:-2*tau]
		Y1 = var1D[tau:-tau]
		Z1 = var1D[2*tau:]

		var2D = d.getSeries(var2)
		if standard:
			var2D = standardize.standardize(var2D)
		X2 = var2D[:-2*tau]
		Y2 = var2D[tau:-tau]
		Z2 = var2D[2*tau:]
		
		#plotData(d, datapoints)
		ax.plot(X1,Y1,Z1)
		ax.plot(X2,Y2,Z2, 'r')
		#ax.plot(X1, Y1, Z2, 'g')
	
	# `ax` is a 3D-aware axis instance because of the projection='3d' keyword argument to add_subplot
	#ax = fig.add_subplot(1, 2, 1, projection='3d')
	plt.show()
Exemplo n.º 2
0
def run(s1, s2):
    global firstTime, CUM_DEPENDENCE
    global SNR, SNR_DB, CUM_TIME
    reset = True
    if firstTime:
        #print('std ratio = ', s1D.std() / s2D.std())
        firstTime = False
    else:
        reset = False
    try:
        synthDataGen.run(FILE + '.py',
                         samples=DATA_POINTS,
                         maxDifficulty=MAX_DIFFICULTY,
                         reset=reset)
    except:
        pass
    dr = getData.DataReader(input=FILE + '.csv', limit=DATA_POINTS)
    s1D = np.array(dr.getSeries(s1))
    s2D = np.array(dr.getSeries(s2))
    dependence = IndHSIC.scoreDependence(s1D, s2D)
    #dependence = IndLn.scoreDependence(s1D, s2D)
    CUM_DEPENDENCE = CUM_DEPENDENCE + dependence
    #print('s1 = ', s1D.mean(), s1D.std())
    #print('s2 = ', s2D.mean(), s2D.std())
    if firstTime:
        #print('std ratio = ', s1D.std() / s2D.std())
        firstTime = False
    coefs = np.polyfit(s1D, s2D.T, 1)
    x1coef, x0coef = coefs
    Dprint('coefs = ', coefs)
    cs2D = s1D * x1coef + x0coef

    # coef = lstsq(np.array([list(s1D)]).T, s2D)[0][0]
    # Dprint('coef = ', coef)
    # cs2D = s1D * coef

    res = s2D - cs2D
    signal = cs2D.var(ddof=1)
    noise = res.var(ddof=1)
    snr = signal / noise
    if snr < SNR:
        SNR = snr
        SNR_DB = 10 * math.log(snr, 10)

    start = time.time()
    if IND_TYPE == 'PL' or IND_TYPE == 'CCM':
        dep = scoreDependence(s1D, s2D)
    else:
        dep = scoreDependence(s1D, res)
    end = time.time()
    duration = end - start
    CUM_TIME += duration
    Dprint('Residual Dependence for ', s1, '-->', s2, ' = ', dep)
    return dep
Exemplo n.º 3
0
	def analyzeOneLingam(self, dataCount, gen=True, valData=None):
		if gen:
			synthDataGen.run(self.genFile, dataCount)
		dr = getData.DataReader(self.testFile, dataCount)
		vars = dr.getSeriesNames()
		if valData is None:
			valData = synthDataGen.getValidation()
		lowestSNR = 10**100
		cumSNR = 0
		pairCount = 0
		lowestPair = None
		stats = {}
		nodeDiffs = {}
		for tuple in valData:
			successor, predecessors = tuple
			if len(predecessors) == 0:
				continue
			for predecessor in predecessors:
				nodeDiffs[predecessor] = {}
				pD = np.array(dr.getSeries(predecessor))
				sD = np.array(dr.getSeries(successor))
				coefs = np.polyfit(pD, sD.T, 1)
				x1coef, x0coef = coefs
				#Dprint('coefs = ', coefs)
				cs2D = pD * x1coef + x0coef
				res = sD - cs2D
				# Note that signal and noise are reversed from what you might expect.
				# In this case, the variance of the error term is the signal we are looking for,
				# while the linear component is actually the noise
				noise = cs2D.var(ddof=1)
				signal = res.var(ddof=1)
				#print('pair = ', predecessor, '--->', successor)
				#print('noise = ', noise)
				#print('signal = ', signal)
				snr = signal / noise
				#print('snr = ', snr)
				nodeDiffs[predecessor][successor] = 10* math.log(1.0/snr, 10)
				if snr < lowestSNR:
					lowestSNR = snr
					lowestPair = (predecessor, successor)
				cumSNR += snr
				pairCount += 1
		stats['minSnr'] = 10*math.log(lowestSNR,10)
		avgSNR = cumSNR/float(pairCount)
		stats['avgSnr'] = 10*math.log(avgSNR, 10)
		stats['weakestPair'] = lowestPair
		difficulty = max([10 * math.log(1.0 / lowestSNR, 10),0.0])
		stats['difficulty'] = difficulty
		stats['weakestPairing'] = lowestPair
		stats['variableDifficulty'] = nodeDiffs
		stats['normDifficulty'] = 100.0 * difficulty / (dataCount**.5)		
		return stats
Exemplo n.º 4
0
    def __init__(self, dataFile, limit=None, prune=True, adjustData=True):
        if limit is None:
            limit = dataLimit
        self.limit = limit
        self.doPrune = prune
        self.adjustData = adjustData
        self.dataFile = dataFile
        self.dataCache = {}
        self.data = getData.DataReader(dataFile, limit=limit)
        self.corrMatrix = None
        self.varIndexes = {}

        return
Exemplo n.º 5
0
def run(dataDefFile, datapoints):
    print('Generating data using: ', dataDefFile)
    fileExt = dataDefFile.split('.')[-1]
    if fileExt == 'py':
        # Data Definition File
        outFile = synthDataGen.run(dataDefFile, datapoints)
    elif fileExt == 'csv':
        # Data file
        outFile = dataDefFile
    else:
        print('*** Invalid file type = ', fileExt)
        return
    d = getData.DataReader(outFile, limit=datapoints)
    print('Vars = ', d.getSeriesNames())
    plotData(d, datapoints)
Exemplo n.º 6
0
def run():
    #d = getData.DataReader(input='..\data\experiment1.csv', limit=2000)
    # d = getData.DataReader(input='..\Tests\IS1c.csv', limit=2000)
    # X = d.getSeries('R11')
    d = getData.DataReader(input='..\data\experiment1.csv', limit=2000)
    X = d.getSeries('Io')
    print('acf = ', acf(X), ', pacf = ', pacf(X))
    print('arma = ', arma_order_select_ic(X))
    result = adfuller(X)
    pvalue = result[1]
    print('result = ', result)
    if pvalue > .05:
        # Non stationary -- Difference the series and try again
        Xa = np.array(X)
        X2 = [0]
        X2.extend(X[:-1])
        X2a = np.array(X2)
        X3a = Xa - X2a
        result = adfuller(X3a)
        print('result2 = ', result)
Exemplo n.º 7
0
 def calibrateOneCITest(self, testType, filePath):
     synthDataGen.run(filePath + '.py', samples=self.datacount)
     exec('import ' + testType)
     module = eval(testType)
     SA = analyzeSEM.SemAnalyzer(filePath + '.py', self.datacount)
     reader = getData.DataReader(filePath + '.csv', self.datacount)
     dependencies, independencies = SA.getCondDependencies()
     # print('dependencies = ', dependencies)
     # print('independencies = ', independencies)
     errors = 0
     errorTerms = {}
     items = 0
     for item in dependencies:
         x, y, z = item
         X = reader.getSeries(x)
         Y = reader.getSeries(y)
         Z = reader.getSeries(z)
         ind = module.isIndependent(X, Y, Z)
         if ind:
             print('Error -- ', x, 'and', y, 'Should be dependent given', z)
             self.err1Count += 1
             errors += 1
             errorTerms[item] = 1
         self.testCount += 1
     for item in independencies:
         x, y, z = item
         X = reader.getSeries(x)
         Y = reader.getSeries(y)
         Z = reader.getSeries(z)
         ind = module.isIndependent(X, Y, Z)
         if not ind:
             print('Error -- ', x, 'and', y, 'Should be independent given',
                   z)
             self.err2Count += 1
             errors += 1
             errorTerms[item] = 1
         self.testCount += 1
     #print('Rating = ', (1 - (errors / items))*100, '%')
     print('Errors for file: ', filePath, '=', errors,
           list(errorTerms.keys()))
     return