def process_likes(request_identifier, likes_raw): likes_raw = likes_raw.replace('window.YTD.like.part0 = ', '') likes = json.loads(likes_raw) for like in likes: pdk_like = { 'pdk_hashed_tweetId': hash_content(like['like']['tweetId']), 'pdk_encrypted_tweetId': encrypt_content(like['like']['tweetId'].encode('utf-8')), 'pdk_encrypted_fullText': encrypt_content(like['like']['fullText'].encode('utf-8')), } annotate_field(pdk_like, 'fullText', like['like']['fullText']) created = timezone.now() # No timestamp available in this file! DataPoint.objects.create_data_point( 'pdk-external-twitter-like', request_identifier, pdk_like, user_agent='Passive Data Kit External Importer', created=created) create_engagement_event(source='twitter', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created)
def process_ad_impressions(request_identifier, ads_raw): ads_raw = ads_raw.replace('window.YTD.ad_impressions.part0 = ', '') ads = json.loads(ads_raw) for ad_view in ads: for impression in ad_view['ad']['adsUserData']['adImpressions'][ 'impressions']: created = arrow.get(impression['impressionTime']).datetime if include_data(request_identifier, created, impression): if 'promotedTweetInfo' in impression: annotate_field( impression, 'tweet_text', impression['promotedTweetInfo']['tweetText']) DataPoint.objects.create_data_point( 'pdk-external-twitter-ad-viewed', request_identifier, impression, user_agent='Passive Data Kit External Importer', created=created) create_engagement_event(source='twitter', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='advertising', start=created)
def process_search_history(request_identifier, searches_raw): searches = json.loads(searches_raw) for search in searches['searches']: created = None try: created = arrow.get(search['timestamp']).datetime except ValueError: try: created = arrow.get(search['timestamp'] / 1000).datetime except ValueError: pass if created is not None and include_data(request_identifier, created, search): # pylint: disable=too-many-nested-blocks if 'attachments' in search: for attachment in search['attachments']: if 'data' in attachment: for data in attachment['data']: if 'text' in data: payload = { 'pdk_encrypted_query': encrypt_content(data['text'].encode('utf-8')) } annotate_field(payload, 'query', data['text']) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='search', start=created) queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-search', request_identifier, payload, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))
def process_tweets(request_identifier, tweets_raw): tweets_raw = tweets_raw.replace('window.YTD.tweet.part0 = ', '') tweets = json.loads(tweets_raw) for tweet in tweets: if 'tweet' in tweet: tweet = tweet['tweet'] created = arrow.get(tweet['created_at'], 'ddd MMM DD HH:mm:ss Z YYYY').datetime if include_data(request_identifier, created, tweet): if 'id' in tweet: tweet['pdk_hashed_id'] = hash_content(tweet['id']) tweet['pdk_encrypted_id'] = encrypt_content( tweet['id'].encode('utf-8')) del tweet['id'] if 'id_str' in tweet: tweet['pdk_hashed_id_str'] = hash_content(tweet['id_str']) tweet['pdk_encrypted_id_str'] = encrypt_content( tweet['id_str'].encode('utf-8')) del tweet['id_str'] if 'full_text' in tweet: tweet['pdk_encrypted_full_text'] = encrypt_content( tweet['full_text'].encode('utf-8')) annotate_field(tweet, 'full_text', tweet['full_text']) del tweet['full_text'] if 'entities' in tweet: entities_str = json.dumps(tweet['entities'], indent=2) tweet['pdk_encrypted_entities'] = encrypt_content( entities_str.encode('utf-8')) del tweet['entities'] if 'urls' in tweet: urls_str = json.dumps(tweet['urls'], indent=2) tweet['pdk_encrypted_urls'] = urls_str( entities_str.encode('utf-8')) del tweet['urls'] DataPoint.objects.create_data_point( 'pdk-external-twitter-tweet', request_identifier, tweet, user_agent='Passive Data Kit External Importer', created=created) create_engagement_event(source='twitter', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='post', start=created)
def process_messages_new(request_identifier, username, messages_raw): messages = json.loads(messages_raw) if isinstance(messages, dict) is False: return for message in messages['messages']: created = arrow.get(message['timestamp_ms'] / 1000).datetime if include_data(request_identifier, created, message): pdk_message = { 'pdk_recipients_count': len(messages['participants']) - 1, 'pdk_hashed_senderId': hash_content(message['sender_name'].encode('utf-8')), 'pdk_encrypted_sender': encrypt_content(message['sender_name'].encode('utf-8')), 'created_at': message['timestamp_ms'] } if 'content' in message and message['content'] is not None: annotate_field(pdk_message, 'content', message['content']) pdk_message['pdk_encrypted_content'] = encrypt_content( message['content'].encode('utf-8')) if 'share' in message: pdk_message['pdk_encrypted_media_url'] = encrypt_content( message['share']['link'].encode('utf-8')) if 'share_text' in message['share']: annotate_field(pdk_message, 'share_text', message['share']['share_text']) queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-instagram-direct-message', request_identifier, pdk_message, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) if message['sender_name'] == username: create_engagement_event(source='instagram', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='message', start=created) else: create_engagement_event(source='instagram', identifier=request_identifier, incoming_engagement=1.0, engagement_type='message', start=created)
def process_post_comments(request_identifier, post_comments_raw): post_comments = json.loads(post_comments_raw) if isinstance(post_comments, dict) is False: return if ('comments_media_comments' in post_comments) is False: return warned = False for post_comment in post_comments['comments_media_comments']: try: post_comment['encrypted_title'] = encrypt_content( post_comment['title'].encode('utf-8')) del post_comment['title'] post_comment['string_list_data'][ 'encrypted_value'] = encrypt_content( post_comment['string_list_data']['value'].encode('utf-8')) annotate_field(post_comment['string_list_data'], 'value', post_comment['string_list_data']['value']) del post_comment['string_list_data']['value'] created = arrow.get( post_comment['string_map_data']['Time']['timestamp']).datetime if include_data(request_identifier, created, post_comment): queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-instagram-comment-posted', request_identifier, post_comment, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='instagram', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='comment', start=created) except TypeError: if warned is False: print( 'Unexpected structure encountered (process_liked_comments): %s' % json.dumps(post_comment, indent=2)) warned = True
def process_unfollows(request_identifier, unfollows): for item in unfollows: created = arrow.get(item['timestamp']).datetime if include_data(request_identifier, created, item): pdk_item = { 'pdk_hashed_blog_name': hash_content(item['blog_name']), 'pdk_encrypted_blog_name': encrypt_content(item['blog_name'].encode('utf-8')), 'timestamp': item['timestamp'], } annotate_field(pdk_item, 'blog_name', item['blog_name']) DataPoint.objects.create_data_point('pdk-external-tumblr-unfollow', request_identifier, pdk_item, user_agent='Passive Data Kit External Importer', created=created) create_engagement_event(source='tumblr', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='follow', start=created)
def process_messages(request_identifier, file_html): soup = bs4.BeautifulSoup(file_html, features='lxml') for list_element in soup.findAll('li'): created = None for child in list_element.contents: try: if child.startswith('Sent at '): date_str = child.replace('Sent at ', '').replace( 'while watching ', '') created = arrow.get(date_str).datetime except TypeError: pass # Not a string if created is not None and include_data(request_identifier, created, list_element): message = list_element.contents[-1] if isinstance(message, bs4.element.Tag) is False: if message is None: message = '' payload = { 'pdk_encrypted_message': encrypt_content(message.encode('utf-8')) } annotate_field(payload, 'message', message) queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-youtube-chat-message', request_identifier, payload, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='youtube', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='chatroom', start=created)
def process_messages(request_identifier, messages_raw, full_names): messages = json.loads(messages_raw) for message in messages['messages']: message = copy.deepcopy(message) created = None try: created = arrow.get(message['timestamp_ms']).datetime except ValueError: try: created = arrow.get(message['timestamp_ms'] / 1000).datetime except ValueError: pass if created is not None and include_data(request_identifier, created, message): if 'content' in message: message['pdk_encrypted_content'] = encrypt_content(message['content'].encode('utf-8')) annotate_field(message, 'content', message['content']) del message['content'] if 'share' in message: share = message['share'] for share_key in copy.deepcopy(share): if share_key == 'link': share['pdk_encrypted_link'] = encrypt_content(share[share_key].encode('utf-8')) annotate_field(share, 'link', share[share_key]) del share[share_key] if message['sender_name'] in full_names: message['pdk_direction'] = 'outgoing' create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='message', start=created) else: message['pdk_direction'] = 'incoming' create_engagement_event(source='facebook', identifier=request_identifier, incoming_engagement=1.0, engagement_type='message', start=created) queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-message', request_identifier, message, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True))
def process_likes(request_identifier, file_json): likes = json.loads(file_json) for like in likes: created = arrow.get(like['snippet']['publishedAt']).datetime if include_data(request_identifier, created, like): like['pdk_encrypted_title'] = encrypt_content( like['snippet']['title'].encode('utf-8')) like['pdk_length_title'] = len(like['snippet']['title']) annotate_field(like, 'title', like['snippet']['title']) if 'snippet' in like: snippet_str = json.dumps(like['snippet'], indent=2) like['pdk_encrypted_snippet'] = encrypt_content( snippet_str.encode('utf-8')) del like['snippet'] if 'contentDetails' in like: content_details_str = json.dumps(like['contentDetails'], indent=2) like['pdk_encrypted_contentDetails'] = encrypt_content( content_details_str.encode('utf-8')) del like['contentDetails'] queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-youtube-like', request_identifier, like, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='youtube', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created)
def process_uploads(request_identifier, file_json): uploads = json.loads(file_json) for upload in uploads: created = arrow.get(upload['snippet']['publishedAt']).datetime if include_data(request_identifier, created, upload): upload['pdk_encrypted_title'] = encrypt_content( upload['snippet']['title'].encode('utf-8')) annotate_field(upload, 'title', upload['snippet']['title']) if 'snippet' in upload: snippet_str = json.dumps(upload['snippet'], indent=2) upload['pdk_encrypted_snippet'] = encrypt_content( snippet_str.encode('utf-8')) del upload['snippet'] if 'contentDetails' in upload: content_details_str = json.dumps(upload['contentDetails'], indent=2) upload['pdk_encrypted_contentDetails'] = encrypt_content( content_details_str.encode('utf-8')) del upload['contentDetails'] queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-youtube-upload', request_identifier, upload, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='youtube', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='upload', start=created)
def process_page_reactions(request_identifier, reactions_raw): reactions = json.loads(reactions_raw) for reaction in reactions['page_likes']: created = arrow.get(reaction['timestamp']).datetime if include_data(request_identifier, created, reaction): if 'name' in reaction: reaction['pdk_encrypted_name'] = encrypt_content(reaction['name'].encode('utf-8')) annotate_field(reaction, 'name', reaction['name']) del reaction['name'] reaction['content_type'] = 'page' reaction['reaction'] = 'like' queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-reaction', request_identifier, reaction, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created)
def process_comments(request_identifier, comments_raw): comments = json.loads(comments_raw) if isinstance(comments, dict) is False: return for key in comments: comment_list = comments[key] for comment in comment_list: created = arrow.get(comment[0]).replace( tzinfo=pytz.timezone('US/Pacific')).datetime if include_data(request_identifier, created, comment): comment_point = {} comment_point['pdk_encrypted_comment'] = encrypt_content( comment[1].encode('utf-8')) annotate_field(comment_point, 'comment', comment[1]) comment_point['pdk_hashed_profile'] = hash_content(comment[2]) comment_point['pdk_encrypted_profile'] = encrypt_content( comment[2].encode('utf-8')) queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-instagram-comment', request_identifier, comment_point, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='instagram', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='comment', start=created)
def process_search_history(request_identifier, file_json): search_history = json.loads(file_json) for search in search_history: created = arrow.get(search['time']).datetime if include_data(request_identifier, created, search): search['pdk_encrypted_title'] = encrypt_content( search['title'].encode('utf-8')) annotate_field(search, 'title', search['title']) del search['title'] if 'titleUrl' in search: search['pdk_encrypted_titleUrl'] = encrypt_content( search['titleUrl'].encode('utf-8')) search['pdk_length_titleUrl'] = len(search['titleUrl']) del search['titleUrl'] queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-youtube-search', request_identifier, search, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='youtube', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='search', start=created)
def process_watch_history(request_identifier, file_json): watch_history = json.loads(file_json) for watch in watch_history: created = arrow.get(watch['time']).datetime if include_data(request_identifier, created, watch): annotate_field(watch, 'title', watch['title']) queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-youtube-watch', request_identifier, watch, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='youtube', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='watch', start=created)
def process_posts_made(request_identifier, posts_made_raw): posts_made = json.loads(posts_made_raw) if isinstance(posts_made, list) is False: return for post in posts_made: created = arrow.get(post['media'][0]['creation_timestamp']).datetime if include_data(request_identifier, created, post): for media in post['media']: media['encrypted_title'] = encrypt_content( media['title'].encode('utf-8')) annotate_field(media, 'title', media['title']) del media['title'] try: del media['media_metadata']['photo_metadata']['exif_data'] except KeyError: pass queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-instagram-post', request_identifier, post, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='instagram', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='post', start=created)
def process_comments(request_identifier, comments_raw): # pylint: disable=too-many-branches comments = json.loads(comments_raw) if 'comments' in comments: # pylint: disable=too-many-nested-blocks for comment in comments['comments']: # pylint: disable=too-many-nested-blocks comment = copy.deepcopy(comment) created = arrow.get(comment['timestamp']).datetime if include_data(request_identifier, created, comment): if 'title' in comment: comment['pdk_encrypted_title'] = encrypt_content(comment['title'].encode('utf-8')) annotate_field(comment, 'title', comment['title']) del comment['title'] if 'data' in comment: data = comment['data'] for datum in data: if 'comment' in datum: comment_obj = datum['comment'] if 'comment' in comment_obj: comment_obj['pdk_encrypted_comment'] = encrypt_content(comment_obj['comment'].encode('utf-8')) annotate_field(comment_obj, 'comment', comment_obj['comment']) del comment_obj['comment'] if 'author' in comment_obj: comment_obj['pdk_hashed_author'] = hash_content(comment_obj['author']) comment_obj['pdk_encrypted_author'] = encrypt_content(comment_obj['author'].encode('utf-8')) del comment_obj['author'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-comment', request_identifier, comment, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='comment', start=created) if 'comments_v2' in comments: # pylint: disable=too-many-nested-blocks for comment in comments['comments_v2']: # pylint: disable=too-many-nested-blocks comment = copy.deepcopy(comment) created = arrow.get(comment['timestamp']).datetime if include_data(request_identifier, created, comment): if 'title' in comment: comment['pdk_encrypted_title'] = encrypt_content(comment['title'].encode('utf-8')) annotate_field(comment, 'title', comment['title']) del comment['title'] if 'data' in comment: data = comment['data'] for datum in data: if 'comment' in datum: comment_obj = datum['comment'] if 'comment' in comment_obj: comment_obj['pdk_encrypted_comment'] = encrypt_content(comment_obj['comment'].encode('utf-8')) annotate_field(comment_obj, 'comment', comment_obj['comment']) del comment_obj['comment'] if 'author' in comment_obj: comment_obj['pdk_hashed_author'] = hash_content(comment_obj['author']) comment_obj['pdk_encrypted_author'] = encrypt_content(comment_obj['author'].encode('utf-8')) del comment_obj['author'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-comment', request_identifier, comment, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='comment', start=created)
def process_visited(request_identifier, viewed_raw): # pylint: disable=too-many-branches metadata = json.loads(viewed_raw) for thing in metadata['visited_things']: if thing['name'] == 'Profile visits': for entry in thing['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-profile-visit', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='profile', start=created) elif thing['name'] == 'Page visits': for entry in thing['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-page-visit', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='page', start=created) elif thing['name'] == 'Events visited': for entry in thing['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-event-visit', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='event', start=created) elif thing['name'] == 'Groups visited': for entry in thing['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-group-visit', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='group', start=created)
def process_media(request_identifier, media_raw): media = json.loads(media_raw) if 'photos' in media: for photo in media['photos']: created = arrow.get(photo['taken_at']).replace( tzinfo=pytz.timezone('US/Pacific')).datetime if include_data(request_identifier, created, photo): photo['pdk_encrypted_caption'] = encrypt_content( photo['caption'].encode('utf-8')) annotate_field(photo, 'caption', photo['caption']) del photo['caption'] if 'location' in photo: photo['pdk_encrypted_location'] = encrypt_content( photo['location'].encode('utf-8')) annotate_field(photo, 'location', photo['location']) del photo['location'] queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-instagram-photo', request_identifier, photo, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='instagram', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='photo', start=created) if 'videos' in media: for video in media['videos']: created = arrow.get(video['taken_at']).replace( tzinfo=pytz.timezone('US/Pacific')).datetime if include_data(request_identifier, created, video): video['pdk_encrypted_caption'] = encrypt_content( video['caption'].encode('utf-8')) annotate_field(video, 'caption', video['caption']) del video['caption'] if 'location' in video: video['pdk_encrypted_location'] = encrypt_content( video['location'].encode('utf-8')) annotate_field(video, 'location', video['location']) del video['location'] queue_batch_insert( DataPoint.objects.create_data_point( 'pdk-external-instagram-video', request_identifier, video, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='instagram', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='video', start=created)
def process_post_comment_reactions(request_identifier, reactions_raw): # pylint: disable=too-many-branches, too-many-statements reactions = json.loads(reactions_raw) if 'reactions' in reactions: # pylint: disable=too-many-nested-blocks for reaction in reactions['reactions']: # pylint: disable=too-many-nested-blocks created = arrow.get(reaction['timestamp']).datetime if include_data(request_identifier, created, reaction): if 'title' in reaction: reaction['pdk_encrypted_title'] = encrypt_content(reaction['title'].encode('utf-8')) annotate_field(reaction, 'title', reaction['title']) if '\'s post' in reaction['title']: reaction['content_type'] = 'post' elif '\'s comment' in reaction['title']: reaction['content_type'] = 'comment' elif '\'s photo' in reaction['title']: reaction['content_type'] = 'photo' elif '\'s video' in reaction['title']: reaction['content_type'] = 'video' else: reaction['content_type'] = 'unknown' del reaction['title'] if 'data' in reaction: for data_item in reaction['data']: if 'reaction' in data_item: data_item['reaction']['reaction'] = data_item['reaction']['reaction'].lower() if 'actor' in data_item['reaction']: data_item['reaction']['pdk_encrypted_actor'] = encrypt_content(data_item['reaction']['actor'].encode('utf-8')) annotate_field(data_item['reaction'], 'actor', data_item['reaction']['actor']) del data_item['reaction']['actor'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-reaction', request_identifier, reaction, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created) if 'reactions_v2' in reactions: # pylint: disable=too-many-nested-blocks for reaction in reactions['reactions_v2']: # pylint: disable=too-many-nested-blocks created = arrow.get(reaction['timestamp']).datetime if include_data(request_identifier, created, reaction): if 'title' in reaction: reaction['pdk_encrypted_title'] = encrypt_content(reaction['title'].encode('utf-8')) annotate_field(reaction, 'title', reaction['title']) if '\'s post' in reaction['title']: reaction['content_type'] = 'post' elif '\'s comment' in reaction['title']: reaction['content_type'] = 'comment' elif '\'s photo' in reaction['title']: reaction['content_type'] = 'photo' elif '\'s video' in reaction['title']: reaction['content_type'] = 'video' else: reaction['content_type'] = 'unknown' del reaction['title'] if 'data' in reaction: for data_item in reaction['data']: if 'reaction' in data_item: data_item['reaction']['reaction'] = data_item['reaction']['reaction'].lower() if 'actor' in data_item['reaction']: data_item['reaction']['pdk_encrypted_actor'] = encrypt_content(data_item['reaction']['actor'].encode('utf-8')) annotate_field(data_item['reaction'], 'actor', data_item['reaction']['actor']) del data_item['reaction']['actor'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-reaction', request_identifier, reaction, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created)
def process_posts(request_identifier, posts_raw): # pylint: disable=too-many-branches, too-many-statements posts = json.loads(posts_raw) source = 'user' if isinstance(posts, dict): source = 'others' if 'wall_posts_sent_to_you' in posts and 'activity_log_data' in posts['wall_posts_sent_to_you']: posts = posts['wall_posts_sent_to_you']['activity_log_data'] if 'timestamp' in posts: posts = [posts] for post in posts: # pylint: disable=too-many-nested-blocks post = copy.deepcopy(post) if isinstance(post, dict): created = arrow.get(post['timestamp']).datetime if include_data(request_identifier, created, post): if 'title' in post: post['pdk_encrypted_title'] = encrypt_content(post['title'].encode('utf-8')) annotate_field(post, 'title', post['title']) del post['title'] if 'data' in post: for datum in post['data']: if 'post' in datum: datum['pdk_encrypted_post'] = encrypt_content(datum['post'].encode('utf-8')) annotate_field(datum, 'post', datum['post']) del datum['post'] if 'attachments' in post: for attachment in post['attachments']: if 'data' in attachment: for datum in attachment['data']: if 'event' in datum: event = datum['event'] if 'name' in event: event['pdk_encrypted_name'] = encrypt_content(event['name'].encode('utf-8')) annotate_field(event, 'name', event['name']) del event['name'] if 'description' in event: event['pdk_encrypted_description'] = encrypt_content(event['description'].encode('utf-8')) annotate_field(event, 'description', event['description']) del event['description'] if 'place' in event: place_str = json.dumps(event['place'], indent=2) event['pdk_encrypted_place'] = encrypt_content(place_str.encode('utf-8')) annotate_field(event, 'place', place_str) del event['place'] if 'external_context' in datum: external_context = datum['external_context'] if 'url' in external_context: external_context['pdk_encrypted_url'] = encrypt_content(external_context['url'].encode('utf-8')) annotate_field(external_context, 'url', external_context['url']) del external_context['url'] if 'media' in datum: media = datum['media'] if 'title' in media: media['pdk_encrypted_title'] = encrypt_content(media['title'].encode('utf-8')) annotate_field(media, 'title', media['title']) del media['title'] if 'description' in media: media['pdk_encrypted_description'] = encrypt_content(media['description'].encode('utf-8')) annotate_field(media, 'description', media['description']) del media['description'] if 'uri' in media: media['pdk_encrypted_uri'] = encrypt_content(media['uri'].encode('utf-8')) annotate_field(media, 'uri', media['uri']) del media['uri'] if 'media_metadata' in media: metadata_str = json.dumps(media['media_metadata'], indent=2) media['pdk_encrypted_media_metadata'] = encrypt_content(metadata_str.encode('utf-8')) del media['media_metadata'] if 'place' in datum: place_str = json.dumps(datum['place'], indent=2) datum['pdk_encrypted_place'] = encrypt_content(place_str.encode('utf-8')) del datum['place'] post['pdk_facebook_source'] = source queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-post', request_identifier, post, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='post', start=created)
def process_direct_messages(request_identifier, messages_raw): # pylint: disable=too-many-branches messages_raw = messages_raw.replace('window.YTD.direct_message.part0 = ', '') messages_raw = messages_raw.replace('window.YTD.direct_messages.part0 = ', '') conversations = json.loads(messages_raw) my_ids = [] for conversation in conversations: if len(my_ids) != 1: tokens = conversation['dmConversation']['conversationId'].split( '-') if len(my_ids) == 0: # pylint: disable=len-as-condition my_ids = tokens else: my_ids = list(set().union(my_ids, tokens)) if len(my_ids) > 0: # pylint: disable=len-as-condition, too-many-nested-blocks my_id = my_ids[0] for conversation in conversations: for message in conversation['dmConversation']['messages']: if 'messageCreate' in message: msg_data = message['messageCreate'] created = arrow.get(msg_data['createdAt']).datetime if include_data(request_identifier, created, msg_data): pdk_message = { 'pdk_hashed_recipientId': hash_content(msg_data['recipientId']), 'pdk_encrypted_recipientId': encrypt_content( msg_data['recipientId'].encode('utf-8')), 'pdk_hashed_senderId': hash_content(msg_data['senderId']), 'pdk_encrypted_senderId': encrypt_content( msg_data['senderId'].encode('utf-8')), 'pdk_encrypted_text': encrypt_content(msg_data['text'].encode('utf-8')), 'id': msg_data['id'], 'conversationId': conversation['dmConversation']['conversationId'], 'createdAt': msg_data['createdAt'] } annotate_field(pdk_message, 'text', msg_data['text']) if msg_data['mediaUrls']: media_urls_str = json.dumps(msg_data['mediaUrls'], indent=2) pdk_message[ 'pdk_encrypted_mediaUrls'] = encrypt_content( media_urls_str.encode('utf-8')) DataPoint.objects.create_data_point( 'pdk-external-twitter-direct-message', request_identifier, pdk_message, user_agent='Passive Data Kit External Importer', created=created) if my_id == msg_data['senderId']: create_engagement_event( source='twitter', identifier=request_identifier, outgoing_engagement=1.0, engagement_type='message', start=created) else: create_engagement_event( source='twitter', identifier=request_identifier, incoming_engagement=1.0, engagement_type='message', start=created) elif 'reactionCreate' in message: msg_data = message['reactionCreate'] created = arrow.get(msg_data['createdAt']).datetime if include_data(request_identifier, created, msg_data): pdk_message = { 'pdk_hashed_senderId': hash_content(msg_data['senderId']), 'pdk_encrypted_senderId': encrypt_content( msg_data['senderId'].encode('utf-8')), 'eventId': msg_data['eventId'], 'reactionKey': msg_data['reactionKey'], 'createdAt': msg_data['createdAt'] } DataPoint.objects.create_data_point( 'pdk-external-twitter-direct-message-reaction', request_identifier, pdk_message, user_agent='Passive Data Kit External Importer', created=created) if my_id == msg_data['senderId']: create_engagement_event( source='twitter', identifier=request_identifier, outgoing_engagement=0.5, engagement_type='reaction', start=created) else: create_engagement_event( source='twitter', identifier=request_identifier, incoming_engagement=0.5, engagement_type='reaction', start=created) else: print('TWITTER/MSG: ' + json.dumps(message, indent=2))
def process_viewed(request_identifier, viewed_raw): # pylint: disable=too-many-branches, too-many-statements metadata = json.loads(viewed_raw) for thing in metadata['viewed_things']: # pylint: disable=too-many-nested-blocks if thing['name'] == 'Facebook Watch Videos and Shows': for child in thing['children']: if child['name'] == 'Shows': for entry in child['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-watch', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='video', start=created) elif child['name'] == 'Time Viewed': for entry in child['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-watch', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='video', start=created, duration=entry['data']['watch_position_seconds']) elif thing['name'] == 'Facebook Live Videos': for entry in thing['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-watch', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='video', start=created) elif thing['name'] == 'Articles': for entry in thing['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_share'] = encrypt_content(entry['data']['share'].encode('utf-8')) entry['data']['pdk_hashed_share'] = hash_content(entry['data']['share'].encode('utf-8')) del entry['data']['share'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-link', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='link', start=created) elif thing['name'] == 'Marketplace Interactions': for child in thing['children']: if child['name'] == 'Marketplace Items': for entry in child['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-market', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='shopping', start=created) elif thing['name'] == 'Ads': for entry in thing['entries']: created = arrow.get(entry['timestamp']).datetime if include_data(request_identifier, created, entry): if 'uri' in entry['data']: entry['data']['pdk_encrypted_uri'] = encrypt_content(entry['data']['uri'].encode('utf-8')) entry['data']['pdk_hashed_uri'] = hash_content(entry['data']['uri'].encode('utf-8')) del entry['data']['uri'] entry['data']['pdk_encrypted_name'] = encrypt_content(entry['data']['name'].encode('utf-8')) entry['data']['pdk_hashed_name'] = hash_content(entry['data']['name'].encode('utf-8')) annotate_field(entry, 'name', entry['data']['name']) del entry['data']['name'] queue_batch_insert(DataPoint.objects.create_data_point('pdk-external-facebook-ad-viewed', request_identifier, entry, user_agent='Passive Data Kit External Importer', created=created, skip_save=True, skip_extract_secondary_identifier=True)) create_engagement_event(source='facebook', identifier=request_identifier, outgoing_engagement=0.0, engagement_type='advertising', start=created)