if extension not in ('csv', 'html', 'xls', 'json', 'xlsx'): sys.stderr.write("invalid filename: %s filename\n" % filename) print extension try: open(filename,'w').write(ds.__getattribute__(extension)) sys.stdout.write("saved %s \n" % filename) except Exception, e: sys.stderr.write(str(e)) if __name__ == '__main__': c = mc.open_connection('mongolab') sys.stderr.write('loading events..') a = list(c.find({}).limit(LIMIT)) sys.stderr.write('loaded %d events\n' % len(a)) unique_keys = set() for doc in a: for k in doc.keys(): unique_keys.add(k) headers = list(map(str,list(unique_keys))) pprint.pprint(headers)
import sys from collections import defaultdict import mongo_connect as mc def key_count(collection): sys.stderr.write('loading collection...') a = list(collection.find({})) sys.stderr.write('loaded %d records\n' % len(a)) sys.stderr.write('counting keys...') keycount = defaultdict(int) for i, doc in enumerate(a): for key in doc.keys(): keycount[key] += 1 if i % 1000 == 0: print i, doc return dict(keycount) if __name__ == '__main__': c = mc.open_connection(account='mongolab') keycount = key_count(c)