Exemplo n.º 1
0
def ssNumClusters(data, datatype):
    returnval = []
    for sub in subs:
        if datatype == "real":
            numclusts_real = np.mean([
                len(set(rw.flatten_list([j.split(';') for j in i])))
                for i in rw.labelClusters(data[sub], scheme)
            ])
            returnval.append(numclusts_real)
        elif datatype == "fake":
            numclusts_fake = []
            for setnum in range(numsets):
                numclusts_fake.append(
                    np.mean([
                        len(set(rw.flatten_list([j.split(';') for j in i])))
                        for i in rw.labelClusters(data[sub][setnum], scheme)
                    ]))
            numclusts_fake = np.mean(numclusts_fake)
            returnval.append(numclusts_fake)
    return returnval
Exemplo n.º 2
0
def ssUniqueAnimalsNamed(data, datatype, ngram=1):
    returnval = []
    for sub in subs:
        if datatype == "fake":
            simAnimalsNamed = []
            for setnum in range(numsets):
                animallists = [
                    find_ngrams(data[sub][setnum][i], ngram)
                    for i in range(len(data[sub][setnum]))
                ]
                simAnimalsNamed.append(len(set(rw.flatten_list(animallists))))
            simAnimalsNamed = np.mean(simAnimalsNamed)
            returnval.append(simAnimalsNamed),
        elif datatype == "real":
            animallists = [
                find_ngrams(data[sub][i], ngram) for i in range(len(data[sub]))
            ]
            realAnimalsNamed = len(set(rw.flatten_list(animallists)))
            returnval.append(realAnimalsNamed)
    return returnval
Exemplo n.º 3
0
usfsize=160

misses=[]

f=open('usf_sims_misses.csv','a', 0)                # write/append to file with no buffering

for numlists in range(2,18):
    for listlength in [15,30,50,70]:
        misses=0
        crs=0
        for gnum in range(100):  # how many samples for each numlists/listlength combo
            print numlists, listlength, gnum
            # generate toy lists
            Xs=[rw.genX(usfg)[0:listlength] for i in range(numlists)]

            itemsinXs=np.unique(rw.flatten_list(Xs))    # list of items that appear in toy data

            notinx=[]       # nodes not in trimmed X
            for i in range(usfsize):
                if i not in itemsinXs:
                    notinx.append(i)

            miss=sum([len([j for j in usfg.neighbors(i) if j > i]) for i in notinx])
            misses=misses+miss

            cr=sum([(usfsize-i-1) for i in range(len(notinx))])
            cr=cr-miss
            crs=crs+cr
        
        misses=misses/100.0 # 100 = gnum
        crs=crs/100.0
Exemplo n.º 4
0
sdt_uis = []

f = open('usf_sims.csv', 'a', 0)  # write/append to file with no buffering

for numlists in range(2, 50):
    for listlength in [15, 30, 50, 70]:
        for gnum in range(
                10):  # how many samples for each numlists/listlength combo
            print numlists, listlength, gnum
            # generate toy lists
            #numlists=5
            #listlength=50   # must be <= numnodes
            Xs = [rw.genX(usfg)[0:listlength] for i in range(numlists)]

            itemsinXs = np.unique(
                rw.flatten_list(Xs))  # list of items that appear in toy data

            # reconstruct graph
            rw_graph = rw.noHidden(Xs, numnodes)
            ui_graph, bestval = rw.findBestGraph(Xs, numnodes=numnodes)

            # remove nodes not in X from all graphs before comparison
            rw_graph = rw_graph[itemsinXs, ]
            rw_graph = rw_graph[:, itemsinXs]
            ui_graph = ui_graph[itemsinXs, ]
            ui_graph = ui_graph[:, itemsinXs]
            usfcopy = np.copy(usf)
            usfcopy = usfcopy[itemsinXs, ]
            usfcopy = usfcopy[:, itemsinXs]

            sdt_rw = rw.costSDT(rw_graph, usfcopy)
Exemplo n.º 5
0
allsubs=["S101","S102","S103","S104","S105","S106","S107","S108","S109","S110",
         "S111","S112","S113","S114","S115","S116","S117","S118","S119","S120"]
types=['rw','invite','irt5','irt9']
onezero={True: '1', False: '0'}

# write edges from all graphs to file with no buffering
path='human_graphs/vegetables_1500'
outfile='vegetables_1500.csv'
f=open(outfile,'w', 0)

for sub in allsubs:
    gs=[]

    for typ in types:
        gs.append(nx.read_dot(path+'/'+sub+'_'+typ+'.dot'))

    edges=set(rw.flatten_list([gs[i].edges() for i in range(len(gs))]))
    
    # write ALL edges
    for edge in edges:
        edgelist=""
        for g in gs:
            edgelist=edgelist+","+onezero[g.has_edge(edge[0],edge[1])]
        f.write(sub      + "," +
                edge[0]  + "," +
                edge[1]  + 
                edgelist + "\n")
    
    
Exemplo n.º 6
0
usfg = nx.to_networkx_graph(usf)
numnodes = len(items)

sdt_rws = []
sdt_uis = []

f = open('usf_sims.csv', 'a', 0)  # write/append to file with no buffering

for gnum in range(100):
    # generate toy lists
    numlists = 11
    listlength = 160  # must be <= numnodes
    Xs = [rw.genX(usfg)[0:listlength] for i in range(numlists)]

    itemsinXs = np.unique(
        rw.flatten_list(Xs))  # list of items that appear in toy data

    # reconstruct graph
    rw_graph = rw.noHidden(Xs, numnodes)

    # remove nodes not in X from all graphs before comparison
    rw_graph = rw_graph[itemsinXs, ]
    rw_graph = rw_graph[:, itemsinXs]
    usfcopy = np.copy(usf)
    usfcopy = usfcopy[itemsinXs, ]
    usfcopy = usfcopy[:, itemsinXs]

    for xnum, x in enumerate(Xs):
        for itemnum, item in enumerate(x):
            Xs[xnum][itemnum] = list(itemsinXs).index(item)
Exemplo n.º 7
0
import ExGUtils.exgauss as exg

subs = [
    'S101', 'S102', 'S103', 'S104', 'S105', 'S106', 'S107', 'S108', 'S109',
    'S110', 'S111', 'S112', 'S113', 'S114', 'S115', 'S116', 'S117', 'S118',
    'S119', 'S120'
]

data = []
for sub in subs:
    category = "animals"
    Xs, items, irts, numnodes = rw.readX(sub,
                                         category,
                                         './Spring2015/results_cleaned.csv',
                                         ignorePerseverations=True)
    [mu, sig, tau, aa] = exg.fit_exgauss(rw.flatten_list(irts))
    data.append(rw.flatten_list(irts))
    #print sub, sig, tau

#S101 6.44853818847 0.715309248564
#S102 0.169641810849 2.59713557468
#S103 0.190086798166 2.57079135211
#S104 3.68198689247 0.465869916984
#S105 -14.5108699657 2.68546950516
#S106 -2.82975386342 11430177.9084
#S107 0.809851426574 1.85289167062
#S108 0.159991621663 1.59469307533
#S109 0.344188580293 2.13119136732
#S110 0.209335112985 1.06848210514
#S111 0.0150209747958 3.5679534317
#S112 -2.69435183698 3.28844605905
Exemplo n.º 8
0
irts=rw.Irts({
        'data': [],
        'irttype': "exgauss",
        'exgauss_lambda': 0.721386887,
        'exgauss_sigma': 6.58655566,
        'irt_weight': 0.95,
        'rcutoff': 20})

for subj in subs:
    print subj
    category="animals"
    Xs, items, irts.data, numnodes=rw.readX(subj,category,'./Spring2015/results_cleaned.csv',ignorePerseverations=True)
    
    toydata.numx = len(Xs)
    [mu, sig, lambd] = rw.mexgauss(rw.flatten_list(irts.data))
    irts.exgauss_sigma = sig    
    irts.exgauss_lambda = lambd

    # u-invite
    uinvite_graph, bestval=rw.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo)

    # irt95
    irts.irt_weight=0.95
    irt95_graph, bestval=rw.uinvite(Xs, toydata, numnodes, irts=irts, fitinfo=fitinfo)

    # irt5
    irts.irt_weight=0.5
    irt5_graph, bestval=rw.uinvite(Xs, toydata, numnodes, irts=irts, fitinfo=fitinfo)

    # rw
Exemplo n.º 9
0
    fh.write("method,simnum,listnum,hit,miss,fa,cr,cost,startseed\n")

    for simnum in range(numsims):
        data = []  # Xs using usf_item indices
        datab = [
        ]  # Xs using ss_item indices (nodes only generated by subject)
        numnodes = []
        items = []  # ss_items
        startseed = seednum  # for recording

        for sub in range(numsubs):
            Xs = rw.genX(usf_graph_nx, toydata, seed=seednum)[0]
            data.append(Xs)

            # renumber dictionary and item list
            itemset = set(rw.flatten_list(Xs))
            numnodes.append(len(itemset))

            ss_items = {}
            convertX = {}
            for itemnum, item in enumerate(itemset):
                ss_items[itemnum] = usf_items[item]
                convertX[item] = itemnum

            items.append(ss_items)

            Xs = [[convertX[i] for i in x] for x in Xs]
            datab.append(Xs)

            seednum += numlists
Exemplo n.º 10
0
from itertools import *
import random

allsubs = [
    "S101", "S102", "S103", "S104", "S105", "S106", "S107", "S108", "S109",
    "S110", "S111", "S112", "S113", "S114", "S115", "S116", "S117", "S118",
    "S119", "S120"
]
types = ['rw', 'invite', 'irt5', 'irt9']
onezero = {True: '1', False: '0'}

# write edges from all graphs to file with no buffering
path = 'human_graphs/vegetables_1500'
outfile = 'vegetables_1500.csv'
f = open(outfile, 'w', 0)

for sub in allsubs:
    gs = []

    for typ in types:
        gs.append(nx.read_dot(path + '/' + sub + '_' + typ + '.dot'))

    edges = set(rw.flatten_list([gs[i].edges() for i in range(len(gs))]))

    # write ALL edges
    for edge in edges:
        edgelist = ""
        for g in gs:
            edgelist = edgelist + "," + onezero[g.has_edge(edge[0], edge[1])]
        f.write(sub + "," + edge[0] + "," + edge[1] + edgelist + "\n")
Exemplo n.º 11
0
# generate data for `numsub` participants, each having `numlists` lists of `listlengths` items
seednum = 0  # seednum=150 (numsubs*numlists) means start at second sim, etc.

for simnum in range(numsims):
    data = []  # Xs using usf_item indices
    datab = []  # Xs using ss_item indices (nodes only generated by subject)
    numnodes = []
    items = []  # ss_items
    startseed = seednum  # for recording

    for sub in range(numsubs):
        Xs = rw.genX(usf_graph_nx, toydata, seed=seednum)[0]
        data.append(Xs)

        # renumber dictionary and item list
        itemset = set(rw.flatten_list(Xs))
        numnodes.append(len(itemset))

        ss_items = {}
        convertX = {}
        for itemnum, item in enumerate(itemset):
            ss_items[itemnum] = usf_items[item]
            convertX[item] = itemnum

        items.append(ss_items)

        Xs = [[convertX[i] for i in x] for x in Xs]
        datab.append(Xs)

        seednum += numlists
Exemplo n.º 12
0
steps=[]
ehs=[]
meaneh=[]
meanstep=[]

fh1 = open('ehmean.csv','w')
fh2 = open('eh.csv','w')

for i in range(10000):
    print i
    g,a=rw.genG(20,4,.3)
    X,step=rw.genX(g,use_irts=1)
    eh=rw.expectedHidden([X],a,len(a))[0]
    steps.append(step)
    ehs.append(eh)
    meaneh.append(np.mean(eh))
    meanstep.append(np.mean(step))

for num, i in enumerate(meaneh):
    fh1.write(str(i) + "," + str(meanstep[num-1]) + "\n")

ehs=rw.flatten_list(ehs)
steps=rw.flatten_list(steps)

for num, i in enumerate(ehs):
    fh2.write(str(i) + "," + str(steps[num-1]) + "\n")

fh1.close()
fh2.close()
Exemplo n.º 13
0
    #orig=rw.probX(Xs, uinvite_graph, toydata)

    extra_data={}

    for inum, i in enumerate(uinvite_graph):
        for jnum, j in enumerate(i):
            if uinvite_graph[inum,jnum]==1:
                uinvite_graph[inum,jnum]=0
                uinvite_graph[jnum,inum]=0
                result=rw.probX(Xs, uinvite_graph, toydata, forceCompute=True)
                if items[inum] not in extra_data:
                    extra_data[items[inum]]={}
                if items[jnum] not in extra_data:
                    extra_data[items[jnum]]={}
                extra_data[items[inum]][items[jnum]] = (result[0], np.mean(rw.flatten_list(result[1])))
                extra_data[items[jnum]][items[inum]] = (result[0], np.mean(rw.flatten_list(result[1])))
                uinvite_graph[inum,jnum]=1
                uinvite_graph[jnum,inum]=1

    g=nx.to_networkx_graph(uinvite_graph)
    nx.relabel_nodes(g, items, copy=False)
    rw.write_csv(g,subj+".csv",subj,extra_data=extra_data) # write multiple graphs


#@joe
#what if we only include edges that are either bidirectional or uni-directional AND in the undirected graph
#there's also a standard procedure for converting directed graphs into undirected graphs
#it's called moralization

# match edge density of USF network (i.e., prior on edges)
Exemplo n.º 14
0
        'other_limit': np.inf })


seednum =0

data=[]
datab=[]
numnodes=[]
items=[]

for sub in range(numsubs):
    Xs = rw.genX(usf_graph_nx, toydata, seed=seednum)[0]
    data.append(Xs)
                                                         
    # renumber dictionary and item listuinvite_group_graph = rw.priorToGraph(priordict, usf_items)
    itemset = set(rw.flatten_list(Xs))
    numnodes.append(len(itemset))
                                                         
    ss_items = {}
    convertX = {}
    for itemnum, item in enumerate(itemset):
        ss_items[itemnum] = usf_items[item]
        convertX[item] = itemnum
                                                         
    items.append(ss_items)
                                                         
    Xs = [[convertX[i] for i in x] for x in Xs]
    datab.append(Xs)
    
    seednum += numlists