def wordSummary(db, table): f = open("wordSummary_%s.txt" % table, 'w') d = {} header = "word, length, rtAVG, rtSTD, total, percCorrect\n" f.write(header) wordList = [] sql = "SELECT DISTINCT(word) FROM %s" % table for w in db.query(sql): wordList.append(w[0]) for word in wordList: sql = "SELECT RT FROM %s WHERE word = '%s' AND incorrect = 0" % (table, word) wordLen = len(word) rtList = [] zList = [] for rt in db.query(sql): rtList.append(rt[0]) rtAVG = stats.mean(rtList) rtSTD = stats.samplestdev(rtList) total = db.query("SELECT COUNT(*) FROM %s WHERE word = '%s'" % (table, word))[0][0] percCorrect = float(len(rtList)) / float(total) * 100.0 print len(rtList), total myString = "%s, %i, %f, %f, %i, %f\n" % (word, wordLen, rtAVG, rtSTD, total, percCorrect) print myString f.write(myString) f.close()
def writeByCategory(db, table): f = open("%s_by_category.txt" % table, "w") subList = db.getSubjects(table) for subject in subList: sex = db.getSex(subject, table) if sex == "male": se = 2 elif sex == "female": se = 1 f.write(str(subject) + "," + str(se) + ",") total = db.query("SELECT COUNT(*) FROM %s WHERE subject = %s" % (table, subject))[0][0] correct = db.query("SELECT SUM(incorrect) FROM %s WHERE subject = %s" % (table, subject))[0][0] if correct: percCorrect = float(correct) / float(total) * 100.0 else: percCorrect = 0.0 sql = "SELECT category, RT, zscore, incorrect FROM %s WHERE subject = %s" % (table, subject) d = {} for cat, RT, zscore, incorrect in db.query(sql): if d.has_key(cat): pass else: d[cat] = [] if incorrect == 0 and zscore <= 3 and zscore >= -3: #print s, cat, RT, zscore, incorrect d[cat].append(RT) key = d.keys() key.sort() string = "" for k in key: print k if d[k]: avg = stats.mean(d[k]) std = stats.samplestdev(d[k]) else: avg = "NA" std = "NA" string = string + "," + str(avg) + "," + str(std) f.write(string) f.write("," + str(percCorrect) + "\n") f.close()
def writeByCategory(db, table): f = open("%s_by_category.txt" % table, "w") subList = db.getSubjects(table) for subject in subList: sex = db.getSex(subject, table) if sex == "male": se = 2 elif sex == "female": se = 1 f.write(str(subject) + "," + str(se) + ",") total = db.query("SELECT COUNT(*) FROM %s WHERE subject = %s" % (table, subject))[0][0] correct = db.query("SELECT SUM(incorrect) FROM %s WHERE subject = %s" % (table, subject))[0][0] if correct: percCorrect = float(correct) / float(total) * 100.0 else: percCorrect = 0.0 sql = "SELECT category, RT, zscore, incorrect FROM %s WHERE subject = %s" % ( table, subject) d = {} for cat, RT, zscore, incorrect in db.query(sql): if d.has_key(cat): pass else: d[cat] = [] if incorrect == 0 and zscore <= 3 and zscore >= -3: #print s, cat, RT, zscore, incorrect d[cat].append(RT) key = d.keys() key.sort() string = "" for k in key: print k if d[k]: avg = stats.mean(d[k]) std = stats.samplestdev(d[k]) else: avg = "NA" std = "NA" string = string + "," + str(avg) + "," + str(std) f.write(string) f.write("," + str(percCorrect) + "\n") f.close()
def calculateZ(db, table): subList = db.getSubjects(table) for s in subList: d = {} sql = "SELECT ROWID, RT, incorrect FROM %s WHERE subject = %s" % (table, s) for rowid, RT, incorrect in db.query(sql): if not incorrect: d[str(rowid)] = RT avg = stats.mean(d.values()) std = stats.samplestdev(d.values()) for rowid in d.keys(): z = d[rowid] - avg / std sql = "UPDATE %s SET zscore = %f WHERE ROWID = %s" % (table, z, rowid) db.query(sql)
def calculateZ(db, table): subList = db.getSubjects(table) for s in subList: d = {} sql = "SELECT ROWID, RT, incorrect FROM %s WHERE subject = %s" % ( table, s) for rowid, RT, incorrect in db.query(sql): if not incorrect: d[str(rowid)] = RT avg = stats.mean(d.values()) std = stats.samplestdev(d.values()) for rowid in d.keys(): z = d[rowid] - avg / std sql = "UPDATE %s SET zscore = %f WHERE ROWID = %s" % (table, z, rowid) db.query(sql)
def make_subtrees_stddev(graph, ratio, distance, relabel=1, lab="cluster."): import stats cur_label = 0 remove = [] i = 0 for edge in graph.get_edges(): lengths = [] path = {} #print node().get_main_id(), edge.cost get_lengths(edge.from_node, distance, lengths, 0, path) lengths.remove(edge.cost) #print lengths if not (len(lengths) > 1): continue mean = stats.mean(lengths) stdev2 = stats.samplestdev([mean, edge.cost]) #print mean, stdev2, edge.cost, len(lengths) if stdev2 > ratio: #graph.remove_edge(edge) remove.append(edge) for edge in remove: graph.remove_edge(edge) if relabel: cur_label = 0 for node in graph.get_nodes(): node().classify_manual("") for node in graph.get_nodes(): if node().get_main_id() == "": label(graph, node, lab, cur_label) cur_label += 1 nodes = [] for node in graph.get_nodes(): nodes.append(node()) return nodes
print('relfreq:') print(stats.relfreq(l)) print(stats.relfreq(lf)) print(stats.relfreq(a)) print(stats.relfreq(af)) print('\nVARIATION') print('obrientransform:') l = range(1,21) a = N.array(l) ll = [l]*5 aa = N.array(ll) print(stats.obrientransform(l,l,l,l,l)) print(stats.obrientransform(a,a,a,a,a)) print('samplevar:',stats.samplevar(l),stats.samplevar(a)) print('samplestdev:',stats.samplestdev(l),stats.samplestdev(a)) print('var:',stats.var(l),stats.var(a)) print('stdev:',stats.stdev(l),stats.stdev(a)) print('sterr:',stats.sterr(l),stats.sterr(a)) print('sem:',stats.sem(l),stats.sem(a)) print('z:',stats.z(l,4),stats.z(a,4)) print('zs:') print(stats.zs(l)) print(stats.zs(a)) print('\nTRIMMING') print('trimboth:') print(stats.trimboth(l,.2)) print(stats.trimboth(lf,.2)) print(stats.trimboth(a,.2)) print(stats.trimboth(af,.2)) print('trim1:')