def genFakeData(td, method="genX", decay=0): for sub in subs: fake_data[sub] = [] fake_irts[sub] = [] for setnum in range(numsets): dataset = [] irtset = [] for listnum, trimval in enumerate(listlengths[sub]): td.trim = trimval ## SEED START # find animal key in USFanimals or substitute with closest animal startAnimal = real_data[sub][listnum][0] if startAnimal == 'polarbear': startAnimal = 'bear' if startAnimal == 'beagle': startAnimal = 'dog' if startAnimal == 'bulldog': startAnimal = 'dog' if startAnimal == 'cheetah': startAnimal = 'tiger' if startAnimal == 'python': startAnimal = 'snake' if startAnimal == 'ferret': startAnimal = 'possum' if startAnimal == 'hedgehog': startAnimal = 'chipmunk' for key, val in USFanimals.iteritems(): if val == startAnimal: td.startX = ('specific', key) ## SEED END print str(sub), str(td.trim) if method == "genX": [data, irts, alter_graph] = rw.genX(USFnet, td) irtset.append(irts) elif method == "frequency": data = [rw.nodeDegreeSearch(USFnet, td) for i in range(3)] elif method == "cbdfs": data = [rw.cbdfs(USFnet, td) for i in range(3)] elif method == "sa": data = [ rw.spreadingActivationSearch(USFnet, td, decay) for i in range(3) ] data = numToAnimal(data, USFanimals)[0] dataset.append(data) fake_data[sub].append(dataset) fake_irts[sub].append(irtset) return fake_data
usfg=nx.to_networkx_graph(usf) numnodes=len(items) usfsize=160 misses=[] f=open('usf_sims_misses.csv','a', 0) # write/append to file with no buffering for numlists in range(2,18): for listlength in [15,30,50,70]: misses=0 crs=0 for gnum in range(100): # how many samples for each numlists/listlength combo print numlists, listlength, gnum # generate toy lists Xs=[rw.genX(usfg)[0:listlength] for i in range(numlists)] itemsinXs=np.unique(rw.flatten_list(Xs)) # list of items that appear in toy data notinx=[] # nodes not in trimmed X for i in range(usfsize): if i not in itemsinXs: notinx.append(i) miss=sum([len([j for j in usfg.neighbors(i) if j > i]) for i in notinx]) misses=misses+miss cr=sum([(usfsize-i-1) for i in range(len(notinx))]) cr=cr-miss crs=crs+cr
import rw import numpy as np import random import math import sys numnodes=15 numlinks=4 probRewire=.3 numx=3 graph_seed=1 x_seed=graph_seed randomseed=1 g,a=rw.genG(numnodes,numlinks,probRewire,seed=graph_seed) Xs=[rw.genX(g, seed=x_seed+i) for i in range(numx)] def PfromB(b): P=np.empty([numnodes,numnodes],dtype=float) # set P from b (transition matix P[to,from]) for colnum, col in enumerate(b.T): for rownum, row in enumerate(col): P[rownum,colnum]=math.exp(row)/sum([math.exp(i) for i in col]) return P def regml(): #random.seed(randomseed) # only for replicability #np.random.seed(randomseed) # free parameters c=.75 # value used by Kwang, and Bottou, 2012
numedges = numnodes * (numlinks / 2) # number of edges in graph numx = 3 # How many observed lists? trim = 1 # ~ What proportion of graph does each list cover? # PARAMETERS FOR RECONTRUCTING GRAPH jeff = 0.9 # 1-IRT weight beta = (1 / 1.1 ) # for gamma distribution when generating IRTs from hidden nodes #graph_seed=None #x_seed=None graph_seed = 65 # make sure same toy network is generated every time x_seed = 65 # make sure same Xs are generated every time # toy data g, a = rw.genG(numnodes, numlinks, probRewire, seed=graph_seed) [Xs, steps] = zip(*[rw.genX(g, seed=x_seed + i, use_irts=1) for i in range(numx)]) Xs = list(Xs) steps = list(steps) irts = rw.stepsToIRT(steps, beta, seed=x_seed) [Xs, irts, alter_graph] = rw.trimX(trim, Xs, irts, g) # Find best graph! #best_graph, bestval=rw.findBestGraph(Xs, irts, jeff, beta, numnodes) best_graph, bestval = rw.findBestGraph(Xs, numnodes=numnodes) # convert best graph to networkX graph, write to file #g=nx.to_networkx_graph(best_graph) #nx.write_dot(g,subj+".dot")
# generate data for `numsub` participants, each having `numlists` lists of `listlengths` items seednum = 0 # seednum=150 (numsubs*numlists) means start at second sim, etc. with open('gradualpluscutoff7.csv', 'w', 0) as fh: fh.write("method,simnum,listnum,hit,miss,fa,cr,cost,startseed\n") for simnum in range(numsims): data = [] # Xs using usf_item indices datab = [ ] # Xs using ss_item indices (nodes only generated by subject) numnodes = [] items = [] # ss_items startseed = seednum # for recording for sub in range(numsubs): Xs = rw.genX(usf_graph_nx, toydata, seed=seednum)[0] data.append(Xs) # renumber dictionary and item list itemset = set(rw.flatten_list(Xs)) numnodes.append(len(itemset)) ss_items = {} convertX = {} for itemnum, item in enumerate(itemset): ss_items[itemnum] = usf_items[item] convertX[item] = itemnum items.append(ss_items) Xs = [[convertX[i] for i in x] for x in Xs]
numlinks=4 # initial number of edges per node (must be even) probRewire=.3 # probability of re-wiring an edge numedges=numnodes*(numlinks/2) # number of edges in graph numx=3 # How many observed lists? trim=1 # ~ What proportion of graph does each list cover? # PARAMETERS FOR RECONTRUCTING GRAPH jeff=0.9 # 1-IRT weight beta=(1/1.1) # for gamma distribution when generating IRTs from hidden nodes #graph_seed=None #x_seed=None graph_seed=65 # make sure same toy network is generated every time x_seed=65 # make sure same Xs are generated every time # toy data g,a=rw.genG(numnodes,numlinks,probRewire,seed=graph_seed) [Xs,steps]=zip(*[rw.genX(g, seed=x_seed+i,use_irts=1) for i in range(numx)]) Xs=list(Xs) steps=list(steps) irts=rw.stepsToIRT(steps, beta, seed=x_seed) [Xs,irts,alter_graph]=rw.trimX(trim,Xs,irts,g) # Find best graph! best_graph, bestval=rw.findBestGraph(Xs, irts, jeff, beta, numnodes) # convert best graph to networkX graph, write to file #g=nx.to_networkx_graph(best_graph) #nx.write_dot(g,subj+".dot")
import rw import numpy as np steps=[] ehs=[] meaneh=[] meanstep=[] fh1 = open('ehmean.csv','w') fh2 = open('eh.csv','w') for i in range(10000): print i g,a=rw.genG(20,4,.3) X,step=rw.genX(g,use_irts=1) eh=rw.expectedHidden([X],a,len(a))[0] steps.append(step) ehs.append(eh) meaneh.append(np.mean(eh)) meanstep.append(np.mean(step)) for num, i in enumerate(meaneh): fh1.write(str(i) + "," + str(meanstep[num-1]) + "\n") ehs=rw.flatten_list(ehs) steps=rw.flatten_list(steps) for num, i in enumerate(ehs): fh2.write(str(i) + "," + str(steps[num-1]) + "\n") fh1.close()
toygraph = rw.Toygraphs({ 'graphtype': "steyvers", 'numnodes': 50, 'numlinks': 6 }) fh = open('priming_test.csv', 'w') seed = 15 for td in toydata: print "numx: ", td.numx # generate data with priming and fit best graph g, a = rw.genG(toygraph, seed=seed) [Xs, irts, alter_graph] = rw.genX(g, td, seed=seed) bestgraph_priming, ll = rw.uinvite(Xs, td, toygraph.numnodes, fitinfo=fitinfo, seed=seed, debug=True) priming_cost = rw.cost(bestgraph_priming, a) print priming_cost td.priming = 0.0 # fit best graph assuming no priming bestgraph_nopriming, ll = rw.uinvite(Xs, td, toygraph.numnodes, fitinfo=fitinfo,
irts = rw.Irts({ 'data': [], 'irttype': "gamma", 'beta': (1 / 1.1), 'irt_weight': 0.9, 'rcutoff': 20 }) fitinfo = rw.Fitinfo({ 'tolerance': 1500, 'startGraph': "naiverw", 'prob_multi': 1.0, 'prob_overlap': 0.5 }) x_seed = 1 graph_seed = 1 td = toydata[0] g, a = rw.genG(toygraphs, seed=graph_seed) [Xs, irts.data] = zip(*[rw.genX(g, td, seed=x_seed + i) for i in range(td.numx)]) Xs = list(Xs) irts.data = list(irts.data) [Xs, irts.data, alter_graph] = rw.trimX(td.trim, Xs, irts.data) # trim data when necessary rw.drawMat(a, cmap=plt.cm.BuGn) newmat = rw.drawMatChange(Xs, a, td, (0, 1), keep=0) # e.g.
## generate fake_ data and irts yoked to real_ data numsets = 100 # number of sets of fake data per SS fake_data = {} fake_irts = {} for subj in subs: graph, items = rw.read_csv(real_graphs, cols=('node1', 'node2'), header=1, filters={ 'subj': str(subj), 'uinvite': '1' }) graph = nx.from_numpy_matrix(graph) fake_data[subj] = [] fake_irts[subj] = [] for setnum in range(numsets): dataset = [] irtset = [] for trimval in listlengths[subj]: toydata.trim = trimval print str(subj), str(toydata.trim) [data, irts, alter_graph] = rw.genX(graph, toydata) data = numToAnimal(data, items)[0] dataset.append(data) irtset.append(irts) fake_data[subj].append(dataset) fake_irts[subj].append(irtset)