def data_properties(command): def cluster_scheme_filename(x): current_dir = os.path.abspath(os.path.dirname(sys.argv[0])) schemes = { "Troyer": "/../schemes/troyer_animals.csv", "Troyer-Hills": "/../schemes/troyer_hills_animals.csv", "Troyer-Hills-Zemla": "/../schemes/troyer_hills_zemla_animals.csv" } filename = current_dir + schemes[x] return filename def spelling_filename(x): current_dir = os.path.abspath(os.path.dirname(sys.argv[0])) schemes = {"Zemla": "/../schemes/zemla_spellfile.csv", "None": None} if schemes[x]: filename = current_dir + schemes[x] else: # if "None" don'tm append directory filename = schemes[x] return filename command = command['data_parameters'] Xs, items, irts, numnodes = rw.readX(command['subject'], command['category'], command['fullpath'], spellfile=spelling_filename( command['spellfile'])) Xs = rw.numToAnimal(Xs, items) cluster_sizes = rw.clusterSize(Xs, cluster_scheme_filename( command['cluster_scheme']), clustertype=command['cluster_type']) avg_cluster_size = rw.avgClusterSize(cluster_sizes) avg_num_cluster_switches = rw.avgNumClusterSwitches(cluster_sizes) num_lists = len(Xs) avg_items_listed = np.mean([len(i) for i in Xs]) avg_unique_items_listed = np.mean([len(set(i)) for i in Xs]) intrusions = rw.intrusions( Xs, cluster_scheme_filename(command['cluster_scheme'])) avg_num_intrusions = rw.avgNumIntrusions(intrusions) perseverations = rw.perseverations(Xs) avg_num_perseverations = rw.avgNumPerseverations(Xs) return { "type": "data_properties", "num_lists": num_lists, "avg_items_listed": avg_items_listed, "intrusions": intrusions, "perseverations": perseverations, "avg_num_intrusions": avg_num_intrusions, "avg_num_perseverations": avg_num_perseverations, "avg_unique_items_listed": avg_unique_items_listed, "avg_num_cluster_switches": avg_num_cluster_switches, "avg_cluster_size": avg_cluster_size }
irts=rw.Irts({ 'data': [], 'irttype': "exgauss", 'lambda': 0.721386887, 'sigma': 6.58655566, 'irt_weight': 0.95, 'rcutoff': 20}) # USF prior #usfnet, usfitems = rw.read_csv('./snet/USF_animal_subset.snet') #priordict = rw.genGraphPrior([usfnet], [usfitems]) for subj in subs: print subj category="animals" Xs, items, irts.data, numnodes=rw.readX(subj,category,'./Spring2015/results_cleaned.csv',ignorePerseverations=True) # uinvite prior priorgraphs=[] prioritems=[] for osub in subs: if osub != subj: g, i = rw.read_csv('Sdirected2015.csv',cols=("node1","node2"),header=True,filters={"subj": osub, "uinvite": "1"},undirected=False) priorgraphs.append(g) prioritems.append(i) priordict = rw.genGraphPrior(priorgraphs, prioritems) toydata.numx = len(Xs) prior = (priordict, items) # u-invite
def network_properties(command): subj_props = command['data_parameters'] command = command['network_parameters'] Xs, items, irts, numnodes = rw.readX(subj_props['subject'], subj_props['category'], subj_props['fullpath']) def no_persev(x): seen = set() seen_add = seen.add return [i for i in x if not (i in seen or seen_add(i))] toydata = rw.Data({ 'numx': len(Xs), 'trim': 1, 'jump': 0.0, 'jumptype': "stationary", 'priming': 0.0, 'startX': "stationary" }) fitinfo = rw.Fitinfo({ 'startGraph': "goni_valid", 'goni_size': 2, 'goni_threshold': 2, 'followtype': "avg", 'prior_samplesize': 10000, 'recorddir': "records/", 'prune_limit': 100, 'triangle_limit': 100, 'other_limit': 100 }) if command['network_method'] == "RW": bestgraph = rw.noHidden(Xs, numnodes) elif command['network_method'] == "Goni": bestgraph = rw.goni(Xs, numnodes, td=toydata, valid=0, fitinfo=fitinfo) elif command['network_method'] == "U-INVITE": no_persev_Xs = [no_persev(x) for x in Xs ] # U-INVITE doesn't work with perseverations bestgraph, ll = rw.uinvite(no_persev_Xs, toydata, numnodes, fitinfo=fitinfo, debug=False) nxg = nx.to_networkx_graph(bestgraph) node_degree = np.mean(nxg.degree().values()) nxg_json = jsonGraph(nxg, items) clustering_coefficient = nx.average_clustering(nxg) try: aspl = nx.average_shortest_path_length(nxg) except: aspl = "disjointed graph" return { "type": "network_properties", "node_degree": node_degree, "clustering_coefficient": clustering_coefficient, "aspl": aspl, "graph": nxg_json }
# read in data from similarity experiment sims={} def loadRatings(f, sims): with open(f,'r') as ratings: for line in ratings: line=line.split(',') knowboth = line[5] if knowboth=="false": items = np.sort([line[2],line[3]]) rating = line[4] if items[0] not in sims.keys(): sims[items[0]] = {} if items[1] not in sims[items[0]].keys(): sims[items[0]][items[1]] = (int(rating), 1) else: currentrating = sims[items[0]][items[1]][0] numratings = sims[items[0]][items[1]][1] + 1 sims[items[0]][items[1]] = (((currentrating + int(rating)) / numratings), numratings) return sims # human ratings sims = loadRatings('joe1.csv', sims) sims = loadRatings('jeff1.csv', sims) sims = loadRatings('joe2.csv', sims) subj="S101" category="animals" Xs, items, irts, numnodes = rw.readX(subj,category,'/Users/jcz/Dropbox/projects/semnet/rw/Spring2015/results_cleaned.csv')
import networkx as nx #import graphviz import pygraphviz from itertools import * import random allsubs=["S101","S102","S103","S104","S105","S106","S107","S108","S109","S110", "S111","S112","S113","S114","S115","S116","S117","S118","S119","S120"] # free parameters jeff=0.9 # 1-IRT weight beta=1.1 # for gamma distribution when generating IRTs from hidden nodes subj="S105" category="animals" Xs, items, irts, numnodes=rw.readX(subj,category,'exp/results_cleaned.csv') # Find best graph! best_graph, bestval=rw.findBestGraph(Xs, irts, jeff, beta) best_rw=rw.noHidden(Xs, numnodes) # convert best graph to networkX graph, add labels, write to file g=nx.to_networkx_graph(best_graph) g2=nx.to_networkx_graph(best_rw) nx.relabel_nodes(g, items, copy=False) nx.relabel_nodes(g2, items, copy=False) #nx.write_dot(g,subj+".dot") # write to DOT #rw.write_csv(g,subj+".csv",subj) # write single graph rw.write_csv([g, g2],subj+".csv",subj) # write multiple graphs
import random allsubs = [ "S1", "S2", "S3", "S4", "S5", "S7", "S8", "S9", "S10", "S11", "S12", "S13" ] # free parameters #jeff=0.9 # 1-IRT weight #beta=1.1 # for gamma distribution when generating IRTs from hidden nodes category = "animals" vals = [] for subj in allsubs: Xs, items, irts, numnodes = rw.readX(subj, category, 'data/raw/data_cleaned.csv') # Find best graph! best_graph, bestval = rw.findBestGraph(Xs) best_graph2, bestval2 = rw.findBestGraph(Xs, startingpoint="windowgraph") #best_rw=rw.noHidden(Xs, numnodes) g = nx.to_networkx_graph(best_graph) #g2=nx.to_networkx_graph(best_rw) #nx.relabel_nodes(g, items, copy=False) #nx.relabel_nodes(g2, items, copy=False) #rw.write_csv([g, g2],subj+".csv",subj) # write multiple graphs val = [bestval, bestval2] print val
from itertools import * import random allsubs = [ "S101", "S102", "S103", "S104", "S105", "S106", "S107", "S108", "S109", "S110", "S111", "S112", "S113", "S114", "S115", "S116", "S117", "S118", "S119", "S120" ] # free parameters jeff = 0.9 # 1-IRT weight beta = 1.1 # for gamma distribution when generating IRTs from hidden nodes subj = "S001" category = "animals" Xs, items, irts, numnodes = rw.readX(subj, category, 's1_data.csv') # Find best graph! #best_graph, bestval=rw.findBestGraph(Xs, irts, jeff, beta) best_graph, bestval = rw.findBestGraph(Xs, numnodes=numnodes) best_rw = rw.noHidden(Xs, numnodes) # convert best graph to networkX graph, add labels, write to file g = nx.to_networkx_graph(best_graph) g2 = nx.to_networkx_graph(best_rw) nx.relabel_nodes(g, items, copy=False) nx.relabel_nodes(g2, items, copy=False) #nx.write_dot(g,subj+".dot") # write to DOT #rw.write_csv(g,subj+".csv",subj) # write single graph
'numlinks': 6, 'prob_rewire': .3 }) irts = rw.Irts({ 'data': [], 'irttype': "exgauss", 'lambda': 0.721386887, 'sigma': 6.58655566, 'irt_weight': 0.9, 'rcutoff': 20 }) for subj in subs: category = "animals" Xs, items, irts.data, numnodes = rw.readX( subj, category, './Spring2015/results_cleaned.csv') uinvite_irt9, bestval = rw.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo, irts=irts) irts.irt_weight = 0.95 uinvite_irt95, bestval = rw.uinvite(Xs, toydata, numnodes, fitinfo=fitinfo, irts=irts) irts.irt_weight = 0.5 uinvite_irt5, bestval = rw.uinvite(Xs, toydata, numnodes,
# from http://locallyoptimal.com/blog/2013/01/20/elegant-n-gram-generation-in-python/ def find_ngrams(input_list, n): return zip(*[input_list[i:] for i in range(n)]) real_lists = './Spring2015/results_cleaned.csv' real_graphs = './Spring2015/s2015_combined.csv' scheme = './categories/troyer_hills_extended.csv' ## import real data real_data = {} real_irts = {} listlengths = {} for sub in subs: data, items, irts, numnodes = rw.readX(sub, "animals", real_lists) listlengths[sub] = [len(x) for x in data] data = numToAnimal(data, items) real_data[sub] = data real_irts[sub] = irts ## generate fake_data and irts yoked to real_data numsets = 100 # number of sets of fake data per SS fake_data = {} fake_irts = {} USFnet, USFanimals = rw.read_csv('./snet/USF_animal_subset.snet') USFnet = nx.to_networkx_graph(USFnet) # methods: genX, frequency, cbdfs, sa
import random allsubs = [ "S101", "S102", "S103", "S104", "S105", "S106", "S107", "S108", "S109", "S110", "S111", "S112", "S113", "S114", "S115", "S116", "S117", "S118", "S119", "S120" ] # free parameters beta = 1.1 # for gamma distribution when generating IRTs from hidden nodes subj = "S120" category = "vegetables" for subj in allsubs: Xs, items, irts, numnodes = rw.readX(subj, category, 'exp/results_cleaned.csv') # Find best graphs! rw_graph = rw.noHidden(Xs, numnodes) rw_g = nx.to_networkx_graph(rw_graph) # use rw.drawDot instead nx.relabel_nodes(rw_g, items, copy=False) nx.write_dot(rw_g, subj + "_rw.dot") invite_graph, val = rw.findBestGraph(Xs) invite_g = nx.to_networkx_graph(invite_graph) nx.relabel_nodes(invite_g, items, copy=False) nx.write_dot(invite_g, subj + "_invite.dot") jeff = 0.9 # IRT weight irt_graph, val = rw.findBestGraph(Xs, irts, jeff, beta) irt_g = nx.to_networkx_graph(irt_graph)