def load_feedbackmodule_impression_events(events, from_date, to_date): feedbackmodule_impression_events = events.filter(events.event == "contentitem_feedbackmodule") \ .filter((events.time >= from_date) & (events.time <= to_date)) \ .rdd.map(lambda x: json.loads(x.properties)['properties']) \ .filter(lambda x: x['istest'] is False) \ .filter(lambda x: x.get('messageid', None) is not None) \ .map(lambda x: Row(feedback_module_impression_message_id=x['messageid'], feedback_module_impression_timestamp=to_timestamp(x['timestamp']))) \ .toDF(sampleRatio=0.2) print('FeedbackModule impression events loaded') return feedbackmodule_impression_events
def load_dismiss_events(events, from_date, to_date): dismiss_events = events.filter(events.event == "contentitem_dismiss") \ .filter((events.time >= from_date) & (events.time <= to_date)) \ .rdd.map(lambda x: json.loads(x.properties)['properties']) \ .filter(lambda x: x['istest'] is False) \ .filter(lambda x: x.get('messageid', None) is not None) \ .map(lambda x: Row(dismiss_message_id=x['messageid'], dismiss_timestamp=to_timestamp(x['timestamp']))) \ .toDF(sampleRatio=0.2) print('Dismiss events loaded') return dismiss_events
def load_view_events(events, from_date, to_date): view_events = events.filter(events.event == "contentitem_view") \ .filter((events.time >= from_date) & (events.time <= to_date)) \ .rdd.map(lambda x: json.loads(x.properties)['properties']) \ .filter(lambda x: x['istest'] is False) \ .filter(lambda x: x.get('daysfromeulaapproval', None) != None) \ .filter(lambda x: x.get('messageid', None) is not None) \ .map(lambda x: Row(view_message_id=x.get('messageid', None), view_timestamp=to_timestamp(x['timestamp']))) \ .toDF(sampleRatio=0.2) print('View events loaded') return view_events
def load_received_events(events, from_date, to_date): received_events = events.filter(events.event == "contentitem_received") \ .filter((events.time >= from_date) & (events.time <= to_date)) \ .rdd.map(lambda x: json.loads(x.properties)['properties']) \ .filter(lambda x: x['istest'] is False) \ .filter(lambda x: x.get('messageid', None) != None) \ .map(lambda x: Row(received_message_id=x.get('messageid', ''), agent_locale=x.get('agentlocale', ''), device_locale=x.get('devicelocale', ''), received_timestamp=to_timestamp(x['timestamp']))) \ .toDF(sampleRatio=0.2) print('Received events loaded') return received_events
def load_click_events(events, from_date, to_date): click_events = events.filter(events.event == "contentitem_click") \ .filter((events.time >= from_date) & (events.time <= to_date)) \ .rdd.map(lambda x: json.loads(x.properties)['properties']) \ .filter(lambda x: x['istest'] is False) \ .filter(lambda x: x.get('messageid', None) is not None) \ .filter(lambda x: x.get('actionid', None) is not None) \ .map(lambda x: Row(click_message_id=x['messageid'], action=x['actionid'], timestamp=to_timestamp(x['timestamp']))) \ .toDF(sampleRatio=0.2) grouped_click_events_by_messageid = click_events.groupBy('click_message_id') \ .agg(F.collect_list(F.struct("action", "timestamp"))) \ .withColumnRenamed('collect_list(named_struct(NamePlaceholder(), action, NamePlaceholder(), timestamp))', 'actions').rdd.map( lambda x: Row(click_message_id=x['click_message_id'], actions=x['actions'])).toDF(sampleRatio=0.2) print('click events loaded') return grouped_click_events_by_messageid
def load_sent_events(events, from_date, to_date, record_example, sqlContext): sent_events = events.filter(events.event == "contentitem_sent") \ .filter((events.time >= from_date) & (events.time <= to_date)) \ .rdd.map(lambda x: json.loads(x.properties)['properties']) \ .filter(lambda x: x['istest'] is False) \ .filter(lambda x: x.get('daysfromeulaapproval', None) != None) \ .filter(lambda x: x.get('messageid', None) != None) \ .filter(lambda x: x.get('deviceid', None) != None) \ .map(lambda x: Row(device_os_version=x.get('deviceosversion', None), device_id=x['deviceid'], message_id=x['messageid'], days_from_eula_approval=x.get('daysfromeulaapproval', None), program_name=x.get('programname', None), carrier_name=x.get('carriername', None), content_item_category=x.get('Category', None), chosen_conversation_rank=x.get('chosenconversationrank', None), device_model=x.get('devicemodel', None), device_os_type=x.get('deviceostype', None), device_vendor=x.get('devicevendor', None), expanded_notification_exists=x.get('expandednotification_exists', None), expanded_notification_is_enabled=x.get('expandednotification_isenabled', False), form_factor=x.get('formfactor', None), home_tier=x.get('hometier', None), installation_source=x.get('installationsource', None), is_pos_customer=x.get('isposcustomer', False), item_subject=to_list(x.get('itemsubject', '')), is_user_in_new_content_exploration_group=x.get('isuserinnewcontentexplorationgroup', False), sent_local_day_of_week=x.get('localdayofweek', None), sent_localtime=to_timestamp(x.get('localtime', None)), message_type=x.get('messagetype', None), min_days_between_notifications=x.get('mindaysbetweennotifications', None), model_fallback_level=x.get('modelfallbacklevel', None), model_id=x.get('modelid', None), model_id_version=x.get('modelidversion', None), model_recommendation_probability=to_float(x.get('modelrecommendationprobability', None)), model_original_model=x.get('modeloriginalmodel', None), notification_copy_original_variant_count=x.get('notificationcopyoriginalvariantcount', None), notification_copy_attributes=x.get('notificationcopyattributes', None), notification_copy_cariant_label=x.get('notificationcopyvariantlabel', None), notification_frequency_model_id=x.get('notificationfrequencymodelid', None), notification_loading_method=x.get('notificationloadingmethod', 'fullappinitialization'), notification_preview_headline=x.get('notificationpreviewheadline', None), notification_preview_message=x.get('notificationpreviewmessage', None), notification_render_type=x.get('notificationrendertype', None), possible_conversations_count=x.get('possibleconversationscount', None), priority_method=x.get('prioritymethod', None), sales_source=x.get('salessource', None), segments_attribute=to_list(x.get('segments_attribute', '')), sent_as_high_priority=x.get('sentashighpriority', False), show_footer=x.get('showfooter', None), source_sender=x.get('source_sender', None), timeline_enabled=x.get('timeline_enabled', False), word_count=x.get('wordcount', None), card_background_attribute=x.get('cardbackground_attribute', None), complexity_attribute=to_int(x.get('complexity_attribute', None)), content_channels_card=x.get('content_channels_card', False), content_channels_notification=x.get('content_channels_notification', False), content_channels_search_results=x.get('content_channels_searchresults', False), contenttype_attribute=to_list(x.get('contenttype_attribute', [])), device_type_mobile=x.get('deviceType_mobile', False), device_type_tablet=x.get('deviceType_Tablet', False), device_features_attribute=to_list(x.get('devicefeatures_attribute', [])), format_attribute=x.get('format_attribute', None), notificationcopy_attribute=to_list(x.get('notificationcopy_attribute', [])), subject_attribute=to_list(x.get('subject_attribute', [])), date=timestamp_str_to_date_str(x.get('timestamp', '')), sent_timestamp=to_timestamp(x.get('timestamp', None)), app_badge_count=x.get('appbadgecount', 0), app_badge_count_method=x.get('appbadgecountmethod', None), app_version=x.get('appversion', None), background_image_attribute=x.get('backgroundimageattribute', None), background_image_original_variant_count=x.get('backgroundimageoriginalvariantcount', None), registration_region=x.get('registrationregion', None), registration_city=x.get("registrationcity", None), registration_country=x.get('registrationcountry', None))).map(lambda x: x.asDict()) print('sent events loaded') return rdd_to_df(sent_events, record_example, sqlContext)