示例#1
0
if latest_datetime[0]:
    latest_date_formatted = latest_datetime[0].strftime('%Y-%m-%dT%H:%M:%SZ')

    url = base_stream_url + '?since={date}'.format(
        date=latest_date_formatted)
else:
    url = base_stream_url

if len(sys.argv) > 1 and sys.argv[1] == 'nohistorical':
    url = base_stream_url

for event in EventSource(url):
    if event.event == 'message':
        try:
            change = json.loads(event.data)
        except ValueError:
            continue

        hashtag_matches = hashtag_match(change['comment'])
        if hashtag_matches and valid_edit(change):
            for hashtag in hashtag_matches:
                if db.is_duplicate(hashtag, change['id']):
                    print("Skipped duplicate {hashtag} ({id})".format(
                        hashtag=hashtag, id=change['id']))

                elif valid_hashtag(hashtag):
                    # Check edit_summary length, truncate if necessary
                    if len(change['comment']) > 800:
                        change['comment'] = change['comment'][:799]
                    db.insert_db(hashtag, change)
示例#2
0
        retry=300000,
        # The timeout argument gets passed to requests.get.
        # An integer value sets connect (socket connect) and
        # read (time to first byte / since last byte) timeout values.
        # A tuple value sets each respective value independently.
        # https://requests.readthedocs.io/en/latest/user/advanced/#timeouts
        timeout=(3.05, 30)):
    if event.event == 'message':
        try:
            change = json.loads(event.data)
        except ValueError:
            continue

        hashtag_matches = hashtag_match(change['comment'])
        if hashtag_matches and valid_edit(change):
            for hashtag in hashtag_matches:
                if 'id' not in change:
                    print("Couldn't find recent changes ID in data. Skipping.")
                    continue
                if db.is_duplicate(hashtag, change['id']):
                    print("Skipped duplicate {hashtag} (rc_id = {id})".format(
                        hashtag=hashtag, id=change['id']))
                    continue
                if not valid_hashtag(hashtag):
                    continue
                # Check edit_summary length, truncate if necessary
                if len(change['comment']) > 800:
                    change['comment'] = change['comment'][:799]
                populate_media_information(change)
                db.insert_db(hashtag, change)