Пример #1
0
def Run(json_path):
    acc = utils.EntryAccumulator(lambda x: x['date'].date())

    data = json.load(file(json_path))
    for idx, msg in enumerate(data):
        data = gmail_utils.ParseMessagePair(msg, 'text/html')
        acc.add(data)

    for day, entries in acc.iteritems():
        day_counts = defaultdict(int)
        user_chats = defaultdict(list)

        for msg in entries:
            m = re.search(r'([0-9a-zA-Z.]+)@', msg['from'])
            assert m, msg['from']
            buddy = m.group(1)
            day_counts[msg['from']] += msg['contents'].count('<div>')
            user_chats[buddy].append(msg['contents'])

        summary = utils.OrderedTallyStr(day_counts)
        utils.WriteSingleSummary(day,
                                 'gmail-chat',
                                 summary=summary,
                                 dry_run=dry_run)
        for user, chats in user_chats.iteritems():
            filename = user + '.html'
            contents = '<html>' + '<hr/>\n'.join(chats) + '</html>'
            utils.WriteOriginal(day,
                                'gmail-chat',
                                filename=filename,
                                contents=contents.encode('utf8'),
                                dry_run=dry_run)
Пример #2
0
def Run(journal_file):
    raw_entries = plistlib.readPlist(journal_file)

    acc = utils.EntryAccumulator(lambda x: x['date'])
    for k, v in raw_entries.iteritems():
        if not v: continue
        # 12/29/2001 -> 2001-12-29
        new_k = re.sub(r'(\d\d)/(\d\d)/(\d\d\d\d)', r'\3-\1-\2', k)
        d = parser.parse(new_k)

        if isinstance(v, plistlib.Data):
            f = StringIO.StringIO(v.data)
            try:
                doc = Rtf15Reader.read(f)
            except ValueError as e:
                print v.data
                raise e
            txt = PlaintextWriter.write(doc).getvalue()
            acc.add({'date': d, 'rtf': v.data, 'text': txt})
        else:
            acc.add({'date': d, 'text': v})

    for day, entries in acc.iteritems():
        assert len(entries) == 1
        entry = entries[0]

        if not entry['text']:
            continue

        summary = utils.SummarizeText(entry['text'])
        utils.WriteSingleSummary(day,
                                 maker='osxapp',
                                 summary=summary,
                                 dry_run=dry_run)
        if 'rtf' in entry:
            utils.WriteOriginal(day,
                                maker='osxapp',
                                contents=entry['rtf'],
                                filename='journal.rtf',
                                dry_run=dry_run)
        else:
            utils.WriteOriginal(day,
                                maker='osxapp',
                                contents=entry['text'].encode('utf8'),
                                filename='journal.txt',
                                dry_run=dry_run)
Пример #3
0
def Run():
  path = '~/Dropbox/Treasure Trove/misc journals/summer 2001/journal.json'
  path = os.path.expanduser(path)

  entries = json.load(file(path))
  acc = utils.EntryAccumulator(lambda kv: datetime.strptime(kv[0], '%Y-%m-%d'))
  for kv in entries.iteritems():
    if not kv[1]: continue
    acc.add(kv)

  for day, kv in acc.iteritems():
    assert len(kv) == 1
    entry = kv[0][1]
    utils.WriteSingleSummary(day, maker='journwfork',
        summary=utils.SummarizeText(entry), dry_run=dry_run)
    utils.WriteOriginal(day, maker='journwfork', contents=entry.encode('utf8'),
        filename='journal.txt', dry_run=dry_run)
Пример #4
0
            if key == 'To' and ',' in value:
                value = re.sub(r', (.*)', r' <\1>', value)
            headers[key] = value.strip()
            last_key = key

        if not in_headers:
            msg += line + '\n'

    for k in ['Received', 'Sent']:
        if k in headers:
            headers[k] = parser.parse(headers[k])

    return headers, msg


acc = utils.EntryAccumulator(lambda x: x[0].date())

for f in sys.argv[1:]:
    tf = tarfile.open(f, 'r')
    for member in tf.getmembers():
        if not member.isfile(): continue
        if '.DS_Store' in member.name: continue

        s = tf.extractfile(member)
        if not s:
            sys.stderr.write('Could not extract %s\n' % member.name)
            continue
        contents = s.read()
        contents = re.sub(r'\r\n', '\n', contents)
        contents = re.sub(r'\r', '\n', contents)
        headers, msg = ParseMessage(contents)
Пример #5
0
    sys.exit(1)

sms_file = sys.arv[1]
data = csv.DictReader(file(sms_file))

# Prepopulated with a few special numbers that have never texted me back.
# Sample google-voice.config.json file:
# {
#   "extra-numbers": {
#     "+19876543210": "John Doe",
#     "+11234567890": "Jane Doe"
#   }
# }
config = json.load(file("google-voice.config.json"))
number_to_name = config['extra-numbers']
acc = utils.EntryAccumulator(lambda row: parser.parse(row['date']))

for idx, row in enumerate(data):
    # i.e. "Me:" -> "Me"
    row['from'] = re.sub(r':$', '', row['from'])

    num, name = row['phone'], row['from']
    if name == '': continue

    acc.add(row)
    if name == 'Me': continue

    if num in number_to_name:
        assert number_to_name[num] == name, '%5d %s: %s vs %s (%s)' % (
            idx, num, name, number_to_name[num], row)
    else:
Пример #6
0
from datetime import date, datetime

dry_run = False

assert len(sys.argv) == 2, ('Usage: %s foursquare.checkins.json' % sys.argv[0])

datas = json.load(file(sys.argv[1]))

checkins = []
for data in datas:
    batch_checkins = data['response']['checkins']['items']
    checkins += batch_checkins

checkins = sorted(checkins, key=lambda x: x['createdAt'])

acc = utils.EntryAccumulator(lambda x: date.fromtimestamp(x['time_t']))

for x in checkins:
    if 'venue' not in x:
        # mysterious!
        continue

    assert 'checkin' == x['type']
    try:
        venue = x['venue']
        loc = venue['location']
        c = {
            'time_t': x['createdAt'],
            'tz': x['timeZoneOffset'],
            'name': venue['name'],
            'location': {
Пример #7
0
assert len(sys.argv) == 2
json_file = sys.argv[1]

parsestr = email.Parser.Parser().parsestr
raw_msgs = json.load(file(json_file))
msgs = []
for raw_msg in raw_msgs:
    m = parsestr(raw_msg.encode('utf8'))
    if 'Date' not in m:
        print m
        sys.exit(0)

    msgs.append(m)

acc = utils.EntryAccumulator(lambda x: dateutil.parser.parse(x['Date']))
for msg in msgs:
    if 'Date' not in msg:
        print msg
        sys.exit(1)
    acc.add(msg)

for day, msgs in acc.iteritems():

    def all_to(msg):
        return [to.strip() for to in msg['To'].split(',')]

    summary = utils.OrderedTallyStr(
        itertools.chain.from_iterable([all_to(msg) for msg in msgs]))
    summary = 'Sent emails to ' + summary
    utils.WriteSingleSummary(day,