def demographics(con): data = {} cur = con.cursor() cur.execute("SELECT gender, birth_year, COUNT(trip_id) trip_count " "FROM trips WHERE gender NOT NULL AND birth_year NOT NULL " "GROUP BY gender, birth_year ORDER BY trip_count DESC") while True: rows = cur.fetchmany(1000) if not rows: break for row in rows: put_val(data, row["gender"], row["birth_year"], row["trip_count"]) cur.close() for kvs, data in traverse(data, "gender"): gender = kvs["gender"] xs = list(sorted(data.keys())) ys = [data[x] for x in xs] total = sum(ys) fig = plt.figure() plt.scatter(xs, ys) plt.yscale("log") plt.xlabel("birth year") plt.ylabel("number of rides") plt.grid(axis="y") plt.title("%s n=%d" % (gender, total)) plt.savefig("gender.%s.pdf" % gender, bbox_inches="tight")
def collect_nested_loops(dostmt): bag = odict({"top": dostmt.parent, "dopair": {dostmt.parent: dostmt}}) ret = traverse(dostmt.parent, search_nested_loops, bag, subnode="content", prerun=True) nested = odict() queue = [(dostmt.parent, nested)] while queue: parent, nestdict = queue.pop(0) nesteddict = odict() nestdict[bag["dopair"][parent]] = nesteddict if parent in bag: queue.extend([(p, nesteddict) for p in bag[parent]]) return nested
def write_test_set(self,f,test_set): traverse(lambda x: util.write_line(f,str(' '.join(nltk.word_tokenize(x)))),test_set)