示例#1
0
def load_feedbackmodule_impression_events(events, from_date, to_date):
    feedbackmodule_impression_events = events.filter(events.event == "contentitem_feedbackmodule") \
        .filter((events.time >= from_date) & (events.time <= to_date)) \
        .rdd.map(lambda x: json.loads(x.properties)['properties']) \
        .filter(lambda x: x['istest'] is False) \
        .filter(lambda x: x.get('messageid', None) is not None) \
        .map(lambda x: Row(feedback_module_impression_message_id=x['messageid'],
                           feedback_module_impression_timestamp=to_timestamp(x['timestamp']))) \
        .toDF(sampleRatio=0.2)
    print('FeedbackModule impression events loaded')
    return feedbackmodule_impression_events
示例#2
0
def load_dismiss_events(events, from_date, to_date):
    dismiss_events = events.filter(events.event == "contentitem_dismiss") \
        .filter((events.time >= from_date) & (events.time <= to_date)) \
        .rdd.map(lambda x: json.loads(x.properties)['properties']) \
        .filter(lambda x: x['istest'] is False) \
        .filter(lambda x: x.get('messageid', None) is not None) \
        .map(lambda x: Row(dismiss_message_id=x['messageid'],
                           dismiss_timestamp=to_timestamp(x['timestamp']))) \
        .toDF(sampleRatio=0.2)
    print('Dismiss events loaded')
    return dismiss_events
示例#3
0
def load_view_events(events, from_date, to_date):
    view_events = events.filter(events.event == "contentitem_view") \
        .filter((events.time >= from_date) & (events.time <= to_date)) \
        .rdd.map(lambda x: json.loads(x.properties)['properties']) \
        .filter(lambda x: x['istest'] is False) \
        .filter(lambda x: x.get('daysfromeulaapproval', None) != None) \
        .filter(lambda x: x.get('messageid', None) is not None) \
        .map(lambda x: Row(view_message_id=x.get('messageid', None),
                           view_timestamp=to_timestamp(x['timestamp']))) \
        .toDF(sampleRatio=0.2)
    print('View events loaded')
    return view_events
示例#4
0
def load_received_events(events, from_date, to_date):
    received_events = events.filter(events.event == "contentitem_received") \
        .filter((events.time >= from_date) & (events.time <= to_date)) \
        .rdd.map(lambda x: json.loads(x.properties)['properties']) \
        .filter(lambda x: x['istest'] is False) \
        .filter(lambda x: x.get('messageid', None) != None) \
        .map(lambda x: Row(received_message_id=x.get('messageid', ''),
                           agent_locale=x.get('agentlocale', ''),
                           device_locale=x.get('devicelocale', ''),
                           received_timestamp=to_timestamp(x['timestamp']))) \
        .toDF(sampleRatio=0.2)
    print('Received events loaded')
    return received_events
示例#5
0
def load_click_events(events, from_date, to_date):
    click_events = events.filter(events.event == "contentitem_click") \
        .filter((events.time >= from_date) & (events.time <= to_date)) \
        .rdd.map(lambda x: json.loads(x.properties)['properties']) \
        .filter(lambda x: x['istest'] is False) \
        .filter(lambda x: x.get('messageid', None) is not None) \
        .filter(lambda x: x.get('actionid', None) is not None) \
        .map(lambda x: Row(click_message_id=x['messageid'],
                           action=x['actionid'],
                           timestamp=to_timestamp(x['timestamp']))) \
        .toDF(sampleRatio=0.2)
    grouped_click_events_by_messageid = click_events.groupBy('click_message_id') \
        .agg(F.collect_list(F.struct("action", "timestamp"))) \
        .withColumnRenamed('collect_list(named_struct(NamePlaceholder(), action, NamePlaceholder(), timestamp))',
                           'actions').rdd.map(
        lambda x: Row(click_message_id=x['click_message_id'], actions=x['actions'])).toDF(sampleRatio=0.2)
    print('click events loaded')
    return grouped_click_events_by_messageid
示例#6
0
def load_sent_events(events, from_date, to_date, record_example, sqlContext):
    sent_events = events.filter(events.event == "contentitem_sent") \
        .filter((events.time >= from_date) & (events.time <= to_date)) \
        .rdd.map(lambda x: json.loads(x.properties)['properties']) \
        .filter(lambda x: x['istest'] is False) \
        .filter(lambda x: x.get('daysfromeulaapproval', None) != None) \
        .filter(lambda x: x.get('messageid', None) != None) \
        .filter(lambda x: x.get('deviceid', None) != None) \
        .map(lambda x: Row(device_os_version=x.get('deviceosversion', None),
                           device_id=x['deviceid'],
                           message_id=x['messageid'],
                           days_from_eula_approval=x.get('daysfromeulaapproval', None),
                           program_name=x.get('programname', None),
                           carrier_name=x.get('carriername', None),
                           content_item_category=x.get('Category', None),
                           chosen_conversation_rank=x.get('chosenconversationrank', None),
                           device_model=x.get('devicemodel', None),
                           device_os_type=x.get('deviceostype', None),
                           device_vendor=x.get('devicevendor', None),
                           expanded_notification_exists=x.get('expandednotification_exists', None),
                           expanded_notification_is_enabled=x.get('expandednotification_isenabled', False),
                           form_factor=x.get('formfactor', None),
                           home_tier=x.get('hometier', None),
                           installation_source=x.get('installationsource', None),
                           is_pos_customer=x.get('isposcustomer', False),
                           item_subject=to_list(x.get('itemsubject', '')),
                           is_user_in_new_content_exploration_group=x.get('isuserinnewcontentexplorationgroup', False),
                           sent_local_day_of_week=x.get('localdayofweek', None),
                           sent_localtime=to_timestamp(x.get('localtime', None)),
                           message_type=x.get('messagetype', None),
                           min_days_between_notifications=x.get('mindaysbetweennotifications', None),
                           model_fallback_level=x.get('modelfallbacklevel', None),
                           model_id=x.get('modelid', None),
                           model_id_version=x.get('modelidversion', None),
                           model_recommendation_probability=to_float(x.get('modelrecommendationprobability', None)),
                           model_original_model=x.get('modeloriginalmodel', None),
                           notification_copy_original_variant_count=x.get('notificationcopyoriginalvariantcount', None),
                           notification_copy_attributes=x.get('notificationcopyattributes', None),
                           notification_copy_cariant_label=x.get('notificationcopyvariantlabel', None),
                           notification_frequency_model_id=x.get('notificationfrequencymodelid', None),
                           notification_loading_method=x.get('notificationloadingmethod', 'fullappinitialization'),
                           notification_preview_headline=x.get('notificationpreviewheadline', None),
                           notification_preview_message=x.get('notificationpreviewmessage', None),
                           notification_render_type=x.get('notificationrendertype', None),
                           possible_conversations_count=x.get('possibleconversationscount', None),
                           priority_method=x.get('prioritymethod', None),
                           sales_source=x.get('salessource', None),
                           segments_attribute=to_list(x.get('segments_attribute', '')),
                           sent_as_high_priority=x.get('sentashighpriority', False),
                           show_footer=x.get('showfooter', None),
                           source_sender=x.get('source_sender', None),
                           timeline_enabled=x.get('timeline_enabled', False),
                           word_count=x.get('wordcount', None),
                           card_background_attribute=x.get('cardbackground_attribute', None),
                           complexity_attribute=to_int(x.get('complexity_attribute', None)),
                           content_channels_card=x.get('content_channels_card', False),
                           content_channels_notification=x.get('content_channels_notification', False),
                           content_channels_search_results=x.get('content_channels_searchresults', False),
                           contenttype_attribute=to_list(x.get('contenttype_attribute', [])),
                           device_type_mobile=x.get('deviceType_mobile', False),
                           device_type_tablet=x.get('deviceType_Tablet', False),
                           device_features_attribute=to_list(x.get('devicefeatures_attribute', [])),
                           format_attribute=x.get('format_attribute', None),
                           notificationcopy_attribute=to_list(x.get('notificationcopy_attribute', [])),
                           subject_attribute=to_list(x.get('subject_attribute', [])),
                           date=timestamp_str_to_date_str(x.get('timestamp', '')),
                           sent_timestamp=to_timestamp(x.get('timestamp', None)),
                           app_badge_count=x.get('appbadgecount', 0),
                           app_badge_count_method=x.get('appbadgecountmethod', None),
                           app_version=x.get('appversion', None),
                           background_image_attribute=x.get('backgroundimageattribute', None),
                           background_image_original_variant_count=x.get('backgroundimageoriginalvariantcount', None),
                           registration_region=x.get('registrationregion', None),
                           registration_city=x.get("registrationcity", None),
                           registration_country=x.get('registrationcountry', None))).map(lambda x: x.asDict())
    print('sent events loaded')
    return rdd_to_df(sent_events, record_example, sqlContext)