def Run(json_path): acc = utils.EntryAccumulator(lambda x: x['date'].date()) data = json.load(file(json_path)) for idx, msg in enumerate(data): data = gmail_utils.ParseMessagePair(msg, 'text/html') acc.add(data) for day, entries in acc.iteritems(): day_counts = defaultdict(int) user_chats = defaultdict(list) for msg in entries: m = re.search(r'([0-9a-zA-Z.]+)@', msg['from']) assert m, msg['from'] buddy = m.group(1) day_counts[msg['from']] += msg['contents'].count('<div>') user_chats[buddy].append(msg['contents']) summary = utils.OrderedTallyStr(day_counts) utils.WriteSingleSummary(day, 'gmail-chat', summary=summary, dry_run=dry_run) for user, chats in user_chats.iteritems(): filename = user + '.html' contents = '<html>' + '<hr/>\n'.join(chats) + '</html>' utils.WriteOriginal(day, 'gmail-chat', filename=filename, contents=contents.encode('utf8'), dry_run=dry_run)
def Run(journal_file): raw_entries = plistlib.readPlist(journal_file) acc = utils.EntryAccumulator(lambda x: x['date']) for k, v in raw_entries.iteritems(): if not v: continue # 12/29/2001 -> 2001-12-29 new_k = re.sub(r'(\d\d)/(\d\d)/(\d\d\d\d)', r'\3-\1-\2', k) d = parser.parse(new_k) if isinstance(v, plistlib.Data): f = StringIO.StringIO(v.data) try: doc = Rtf15Reader.read(f) except ValueError as e: print v.data raise e txt = PlaintextWriter.write(doc).getvalue() acc.add({'date': d, 'rtf': v.data, 'text': txt}) else: acc.add({'date': d, 'text': v}) for day, entries in acc.iteritems(): assert len(entries) == 1 entry = entries[0] if not entry['text']: continue summary = utils.SummarizeText(entry['text']) utils.WriteSingleSummary(day, maker='osxapp', summary=summary, dry_run=dry_run) if 'rtf' in entry: utils.WriteOriginal(day, maker='osxapp', contents=entry['rtf'], filename='journal.rtf', dry_run=dry_run) else: utils.WriteOriginal(day, maker='osxapp', contents=entry['text'].encode('utf8'), filename='journal.txt', dry_run=dry_run)
def Run(): path = '~/Dropbox/Treasure Trove/misc journals/summer 2001/journal.json' path = os.path.expanduser(path) entries = json.load(file(path)) acc = utils.EntryAccumulator(lambda kv: datetime.strptime(kv[0], '%Y-%m-%d')) for kv in entries.iteritems(): if not kv[1]: continue acc.add(kv) for day, kv in acc.iteritems(): assert len(kv) == 1 entry = kv[0][1] utils.WriteSingleSummary(day, maker='journwfork', summary=utils.SummarizeText(entry), dry_run=dry_run) utils.WriteOriginal(day, maker='journwfork', contents=entry.encode('utf8'), filename='journal.txt', dry_run=dry_run)
if key == 'To' and ',' in value: value = re.sub(r', (.*)', r' <\1>', value) headers[key] = value.strip() last_key = key if not in_headers: msg += line + '\n' for k in ['Received', 'Sent']: if k in headers: headers[k] = parser.parse(headers[k]) return headers, msg acc = utils.EntryAccumulator(lambda x: x[0].date()) for f in sys.argv[1:]: tf = tarfile.open(f, 'r') for member in tf.getmembers(): if not member.isfile(): continue if '.DS_Store' in member.name: continue s = tf.extractfile(member) if not s: sys.stderr.write('Could not extract %s\n' % member.name) continue contents = s.read() contents = re.sub(r'\r\n', '\n', contents) contents = re.sub(r'\r', '\n', contents) headers, msg = ParseMessage(contents)
sys.exit(1) sms_file = sys.arv[1] data = csv.DictReader(file(sms_file)) # Prepopulated with a few special numbers that have never texted me back. # Sample google-voice.config.json file: # { # "extra-numbers": { # "+19876543210": "John Doe", # "+11234567890": "Jane Doe" # } # } config = json.load(file("google-voice.config.json")) number_to_name = config['extra-numbers'] acc = utils.EntryAccumulator(lambda row: parser.parse(row['date'])) for idx, row in enumerate(data): # i.e. "Me:" -> "Me" row['from'] = re.sub(r':$', '', row['from']) num, name = row['phone'], row['from'] if name == '': continue acc.add(row) if name == 'Me': continue if num in number_to_name: assert number_to_name[num] == name, '%5d %s: %s vs %s (%s)' % ( idx, num, name, number_to_name[num], row) else:
from datetime import date, datetime dry_run = False assert len(sys.argv) == 2, ('Usage: %s foursquare.checkins.json' % sys.argv[0]) datas = json.load(file(sys.argv[1])) checkins = [] for data in datas: batch_checkins = data['response']['checkins']['items'] checkins += batch_checkins checkins = sorted(checkins, key=lambda x: x['createdAt']) acc = utils.EntryAccumulator(lambda x: date.fromtimestamp(x['time_t'])) for x in checkins: if 'venue' not in x: # mysterious! continue assert 'checkin' == x['type'] try: venue = x['venue'] loc = venue['location'] c = { 'time_t': x['createdAt'], 'tz': x['timeZoneOffset'], 'name': venue['name'], 'location': {
assert len(sys.argv) == 2 json_file = sys.argv[1] parsestr = email.Parser.Parser().parsestr raw_msgs = json.load(file(json_file)) msgs = [] for raw_msg in raw_msgs: m = parsestr(raw_msg.encode('utf8')) if 'Date' not in m: print m sys.exit(0) msgs.append(m) acc = utils.EntryAccumulator(lambda x: dateutil.parser.parse(x['Date'])) for msg in msgs: if 'Date' not in msg: print msg sys.exit(1) acc.add(msg) for day, msgs in acc.iteritems(): def all_to(msg): return [to.strip() for to in msg['To'].split(',')] summary = utils.OrderedTallyStr( itertools.chain.from_iterable([all_to(msg) for msg in msgs])) summary = 'Sent emails to ' + summary utils.WriteSingleSummary(day,