def ssNumClusters(data, datatype): returnval = [] for sub in subs: if datatype == "real": numclusts_real = np.mean([ len(set(rw.flatten_list([j.split(';') for j in i]))) for i in rw.labelClusters(data[sub], scheme) ]) returnval.append(numclusts_real) elif datatype == "fake": numclusts_fake = [] for setnum in range(numsets): numclusts_fake.append( np.mean([ len(set(rw.flatten_list([j.split(';') for j in i]))) for i in rw.labelClusters(data[sub][setnum], scheme) ])) numclusts_fake = np.mean(numclusts_fake) returnval.append(numclusts_fake) return returnval
def ssUniqueAnimalsNamed(data, datatype, ngram=1): returnval = [] for sub in subs: if datatype == "fake": simAnimalsNamed = [] for setnum in range(numsets): animallists = [ find_ngrams(data[sub][setnum][i], ngram) for i in range(len(data[sub][setnum])) ] simAnimalsNamed.append(len(set(rw.flatten_list(animallists)))) simAnimalsNamed = np.mean(simAnimalsNamed) returnval.append(simAnimalsNamed), elif datatype == "real": animallists = [ find_ngrams(data[sub][i], ngram) for i in range(len(data[sub])) ] realAnimalsNamed = len(set(rw.flatten_list(animallists))) returnval.append(realAnimalsNamed) return returnval
usfsize=160 misses=[] f=open('usf_sims_misses.csv','a', 0) # write/append to file with no buffering for numlists in range(2,18): for listlength in [15,30,50,70]: misses=0 crs=0 for gnum in range(100): # how many samples for each numlists/listlength combo print numlists, listlength, gnum # generate toy lists Xs=[rw.genX(usfg)[0:listlength] for i in range(numlists)] itemsinXs=np.unique(rw.flatten_list(Xs)) # list of items that appear in toy data notinx=[] # nodes not in trimmed X for i in range(usfsize): if i not in itemsinXs: notinx.append(i) miss=sum([len([j for j in usfg.neighbors(i) if j > i]) for i in notinx]) misses=misses+miss cr=sum([(usfsize-i-1) for i in range(len(notinx))]) cr=cr-miss crs=crs+cr misses=misses/100.0 # 100 = gnum crs=crs/100.0
sdt_uis = [] f = open('usf_sims.csv', 'a', 0) # write/append to file with no buffering for numlists in range(2, 50): for listlength in [15, 30, 50, 70]: for gnum in range( 10): # how many samples for each numlists/listlength combo print numlists, listlength, gnum # generate toy lists #numlists=5 #listlength=50 # must be <= numnodes Xs = [rw.genX(usfg)[0:listlength] for i in range(numlists)] itemsinXs = np.unique( rw.flatten_list(Xs)) # list of items that appear in toy data # reconstruct graph rw_graph = rw.noHidden(Xs, numnodes) ui_graph, bestval = rw.findBestGraph(Xs, numnodes=numnodes) # remove nodes not in X from all graphs before comparison rw_graph = rw_graph[itemsinXs, ] rw_graph = rw_graph[:, itemsinXs] ui_graph = ui_graph[itemsinXs, ] ui_graph = ui_graph[:, itemsinXs] usfcopy = np.copy(usf) usfcopy = usfcopy[itemsinXs, ] usfcopy = usfcopy[:, itemsinXs] sdt_rw = rw.costSDT(rw_graph, usfcopy)
allsubs=["S101","S102","S103","S104","S105","S106","S107","S108","S109","S110", "S111","S112","S113","S114","S115","S116","S117","S118","S119","S120"] types=['rw','invite','irt5','irt9'] onezero={True: '1', False: '0'} # write edges from all graphs to file with no buffering path='human_graphs/vegetables_1500' outfile='vegetables_1500.csv' f=open(outfile,'w', 0) for sub in allsubs: gs=[] for typ in types: gs.append(nx.read_dot(path+'/'+sub+'_'+typ+'.dot')) edges=set(rw.flatten_list([gs[i].edges() for i in range(len(gs))])) # write ALL edges for edge in edges: edgelist="" for g in gs: edgelist=edgelist+","+onezero[g.has_edge(edge[0],edge[1])] f.write(sub + "," + edge[0] + "," + edge[1] + edgelist + "\n")
usfg = nx.to_networkx_graph(usf) numnodes = len(items) sdt_rws = [] sdt_uis = [] f = open('usf_sims.csv', 'a', 0) # write/append to file with no buffering for gnum in range(100): # generate toy lists numlists = 11 listlength = 160 # must be <= numnodes Xs = [rw.genX(usfg)[0:listlength] for i in range(numlists)] itemsinXs = np.unique( rw.flatten_list(Xs)) # list of items that appear in toy data # reconstruct graph rw_graph = rw.noHidden(Xs, numnodes) # remove nodes not in X from all graphs before comparison rw_graph = rw_graph[itemsinXs, ] rw_graph = rw_graph[:, itemsinXs] usfcopy = np.copy(usf) usfcopy = usfcopy[itemsinXs, ] usfcopy = usfcopy[:, itemsinXs] for xnum, x in enumerate(Xs): for itemnum, item in enumerate(x): Xs[xnum][itemnum] = list(itemsinXs).index(item)
import ExGUtils.exgauss as exg subs = [ 'S101', 'S102', 'S103', 'S104', 'S105', 'S106', 'S107', 'S108', 'S109', 'S110', 'S111', 'S112', 'S113', 'S114', 'S115', 'S116', 'S117', 'S118', 'S119', 'S120' ] data = [] for sub in subs: category = "animals" Xs, items, irts, numnodes = rw.readX(sub, category, './Spring2015/results_cleaned.csv', ignorePerseverations=True) [mu, sig, tau, aa] = exg.fit_exgauss(rw.flatten_list(irts)) data.append(rw.flatten_list(irts)) #print sub, sig, tau #S101 6.44853818847 0.715309248564 #S102 0.169641810849 2.59713557468 #S103 0.190086798166 2.57079135211 #S104 3.68198689247 0.465869916984 #S105 -14.5108699657 2.68546950516 #S106 -2.82975386342 11430177.9084 #S107 0.809851426574 1.85289167062 #S108 0.159991621663 1.59469307533 #S109 0.344188580293 2.13119136732 #S110 0.209335112985 1.06848210514 #S111 0.0150209747958 3.5679534317 #S112 -2.69435183698 3.28844605905
irts=rw.Irts({ 'data': [], 'irttype': "exgauss", 'exgauss_lambda': 0.721386887, 'exgauss_sigma': 6.58655566, 'irt_weight': 0.95, 'rcutoff': 20}) for subj in subs: print subj category="animals" Xs, items, irts.data, numnodes=rw.readX(subj,category,'./Spring2015/results_cleaned.csv',ignorePerseverations=True) toydata.numx = len(Xs) [mu, sig, lambd] = rw.mexgauss(rw.flatten_list(irts.data)) irts.exgauss_sigma = sig irts.exgauss_lambda = lambd # u-invite uinvite_graph, bestval=rw.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo) # irt95 irts.irt_weight=0.95 irt95_graph, bestval=rw.uinvite(Xs, toydata, numnodes, irts=irts, fitinfo=fitinfo) # irt5 irts.irt_weight=0.5 irt5_graph, bestval=rw.uinvite(Xs, toydata, numnodes, irts=irts, fitinfo=fitinfo) # rw
fh.write("method,simnum,listnum,hit,miss,fa,cr,cost,startseed\n") for simnum in range(numsims): data = [] # Xs using usf_item indices datab = [ ] # Xs using ss_item indices (nodes only generated by subject) numnodes = [] items = [] # ss_items startseed = seednum # for recording for sub in range(numsubs): Xs = rw.genX(usf_graph_nx, toydata, seed=seednum)[0] data.append(Xs) # renumber dictionary and item list itemset = set(rw.flatten_list(Xs)) numnodes.append(len(itemset)) ss_items = {} convertX = {} for itemnum, item in enumerate(itemset): ss_items[itemnum] = usf_items[item] convertX[item] = itemnum items.append(ss_items) Xs = [[convertX[i] for i in x] for x in Xs] datab.append(Xs) seednum += numlists
from itertools import * import random allsubs = [ "S101", "S102", "S103", "S104", "S105", "S106", "S107", "S108", "S109", "S110", "S111", "S112", "S113", "S114", "S115", "S116", "S117", "S118", "S119", "S120" ] types = ['rw', 'invite', 'irt5', 'irt9'] onezero = {True: '1', False: '0'} # write edges from all graphs to file with no buffering path = 'human_graphs/vegetables_1500' outfile = 'vegetables_1500.csv' f = open(outfile, 'w', 0) for sub in allsubs: gs = [] for typ in types: gs.append(nx.read_dot(path + '/' + sub + '_' + typ + '.dot')) edges = set(rw.flatten_list([gs[i].edges() for i in range(len(gs))])) # write ALL edges for edge in edges: edgelist = "" for g in gs: edgelist = edgelist + "," + onezero[g.has_edge(edge[0], edge[1])] f.write(sub + "," + edge[0] + "," + edge[1] + edgelist + "\n")
# generate data for `numsub` participants, each having `numlists` lists of `listlengths` items seednum = 0 # seednum=150 (numsubs*numlists) means start at second sim, etc. for simnum in range(numsims): data = [] # Xs using usf_item indices datab = [] # Xs using ss_item indices (nodes only generated by subject) numnodes = [] items = [] # ss_items startseed = seednum # for recording for sub in range(numsubs): Xs = rw.genX(usf_graph_nx, toydata, seed=seednum)[0] data.append(Xs) # renumber dictionary and item list itemset = set(rw.flatten_list(Xs)) numnodes.append(len(itemset)) ss_items = {} convertX = {} for itemnum, item in enumerate(itemset): ss_items[itemnum] = usf_items[item] convertX[item] = itemnum items.append(ss_items) Xs = [[convertX[i] for i in x] for x in Xs] datab.append(Xs) seednum += numlists
steps=[] ehs=[] meaneh=[] meanstep=[] fh1 = open('ehmean.csv','w') fh2 = open('eh.csv','w') for i in range(10000): print i g,a=rw.genG(20,4,.3) X,step=rw.genX(g,use_irts=1) eh=rw.expectedHidden([X],a,len(a))[0] steps.append(step) ehs.append(eh) meaneh.append(np.mean(eh)) meanstep.append(np.mean(step)) for num, i in enumerate(meaneh): fh1.write(str(i) + "," + str(meanstep[num-1]) + "\n") ehs=rw.flatten_list(ehs) steps=rw.flatten_list(steps) for num, i in enumerate(ehs): fh2.write(str(i) + "," + str(steps[num-1]) + "\n") fh1.close() fh2.close()
#orig=rw.probX(Xs, uinvite_graph, toydata) extra_data={} for inum, i in enumerate(uinvite_graph): for jnum, j in enumerate(i): if uinvite_graph[inum,jnum]==1: uinvite_graph[inum,jnum]=0 uinvite_graph[jnum,inum]=0 result=rw.probX(Xs, uinvite_graph, toydata, forceCompute=True) if items[inum] not in extra_data: extra_data[items[inum]]={} if items[jnum] not in extra_data: extra_data[items[jnum]]={} extra_data[items[inum]][items[jnum]] = (result[0], np.mean(rw.flatten_list(result[1]))) extra_data[items[jnum]][items[inum]] = (result[0], np.mean(rw.flatten_list(result[1]))) uinvite_graph[inum,jnum]=1 uinvite_graph[jnum,inum]=1 g=nx.to_networkx_graph(uinvite_graph) nx.relabel_nodes(g, items, copy=False) rw.write_csv(g,subj+".csv",subj,extra_data=extra_data) # write multiple graphs #@joe #what if we only include edges that are either bidirectional or uni-directional AND in the undirected graph #there's also a standard procedure for converting directed graphs into undirected graphs #it's called moralization # match edge density of USF network (i.e., prior on edges)
'other_limit': np.inf }) seednum =0 data=[] datab=[] numnodes=[] items=[] for sub in range(numsubs): Xs = rw.genX(usf_graph_nx, toydata, seed=seednum)[0] data.append(Xs) # renumber dictionary and item listuinvite_group_graph = rw.priorToGraph(priordict, usf_items) itemset = set(rw.flatten_list(Xs)) numnodes.append(len(itemset)) ss_items = {} convertX = {} for itemnum, item in enumerate(itemset): ss_items[itemnum] = usf_items[item] convertX[item] = itemnum items.append(ss_items) Xs = [[convertX[i] for i in x] for x in Xs] datab.append(Xs) seednum += numlists