def get_new_signup(yesterday):
    """
    Get yesterday new signup.
    
    
    Parameters
    ----------
    
    yesterday: date
        Yesterday's date.
        
        
    Global Variables
    ----------
    
    api_creator_secret: Mixpanel Creator Project API key.
        Use to make API calls to Mixpanel Creator Project.
        
    
    Returns
    ----------
    
    dataframe
        Dataframe containing user IDs and emails of creators who signed up yesterday.
    
    """

    # New Signup Web
    query = JQL(api_creator_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "New Signup Web"
                    }],
                    'from_date': yesterday,
                    'to_date': yesterday
                }),
                people=People({'user_selectors': []}),
                join_params={
                    'type': 'full',
                    'selectors': [{
                        'event': "New Signup Web"
                    }]
                })

    #store email, user id, and sign up events
    email_list = []
    userid_list = []
    for row in query.send():
        if 'user' in list(row.keys()):
            if '$username' in list(row['user']['properties'].keys()):
                userid_list.append(row['user']['properties']['$username'])
            if '$email' in list(row['user']['properties'].keys()):
                email_list.append(row['user']['properties']['$email'])

    #create dataframe
    data = {'user_id': userid_list, 'email': email_list}
    df_new_users = pd.DataFrame(data=data)

    return df_new_users
def AppStart_pull(from_date, to_date):
    """
    
    Pull app user AppStart events in Mixpanel.
    
    
    Parameters
    ----------
    
    from_date: date
        Start date of query.
        
    to_date: date
        End date of query.
        
        
    Global Variables
    ----------
    
    api_user_secret: str
        Client secret used to make calls to Mixpanel User Project.
        
        
    Returns
    ----------
    
    dataframe
        Dataframe contains user IDs and AppStart event count for app users in Mixpanel.
           
    """

    #generate JQL query
    query = JQL(api_user_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "AppStart"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(keys=["e.properties.zUserId"],
                             accumulator=Reducer.count())

    #initalize lists to record user ID and AppStarts
    user_id_list = []
    AppStart_list = []

    #process query results
    for row in query.send():
        if row['key'][0] is not None:
            user_id_list.append(int(row['key'][0]))
            AppStart_list.append(row['value'])

    #generate dataframe
    data = {'user_id': user_id_list, 'AppStart': AppStart_list}
    df_AppStart = pd.DataFrame(data)
    df_AppStart = df_AppStart.dropna()
    df_AppStart.user_id = df_AppStart.user_id.astype(int)

    return df_AppStart
示例#3
0
 def _try_invalid_join(self, params):
     try:
         JQL(api_secret="asas",
             events=Events(),
             people=People(),
             join_params=params)
         self.fail("Expected Events syntax error with params: %s" % params)
     except JQLSyntaxError as e:
         return e
def New_Signup_Web_pull(from_date, to_date):
    """
    Pull app creator sign up events in Mixpanel.
    
    
    Parameters
    ----------
    
    from_date: date
        Start date of query.
        
    to_date: date
        End date of query.
        
        
    Global Variables
    ----------
    
    api_creator_secret: str
        Client secret used to make calls to Mixpanel Creator Project.
        
        
    Returns
    ----------
    
    dataframe
        Dataframe contains user IDs and new sign up event count for app creators in Mixpanel.
           
    """

    #generate JQL query
    query = JQL(api_creator_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "New Signup Web"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(
                    keys=[
                        "e.properties.userId",  #use userId
                    ],
                    accumulator=Reducer.count())

    #initialize lists to record user IDs and New Signup Web
    user_id_list = []
    new_sign_up_list = []
    for row in query.send():
        if row['key'][0] is not None:
            user_id_list.append(int(row['key'][0]))
            new_sign_up_list.append(row['value'])

    #generate dataframe
    data = {'user_id': user_id_list, 'new_sign_up': new_sign_up_list}
    df_New_Signup_Web = pd.DataFrame(data)

    return df_New_Signup_Web
示例#5
0
    def test_event_date_keys(self):
        for k in ('to_date', 'from_date'):
            for v in ('2017-10-19', datetime(2017, 10,
                                             19), datetime(2017, 10,
                                                           19).date()):
                q = Events({k: v})
                self.assertIn('2017-10-19', str(q))

        # Now a bad key.
        e = self._try_invalid_events({'to_date': 232})
        self.assertEqual(str(e),
                         'to_date must be datetime, datetime.date, or str')
示例#6
0
    def test_join_types(self):
        # Good types
        for t in ('full', 'left', 'right', 'inner'):
            JQL('some_key',
                events=Events(),
                people=People(),
                join_params={'type': t})

        # Bad type
        e = self._try_invalid_join({'type': 'mew'})
        self.assertEqual(
            '"mew" is not a valid join type (valid types: full, left, right, inner)',
            str(e))
示例#7
0
    def test_join_selectors(self):
        def good_params():
            return {
                'selectors': [{
                    'event': 'my_event',
                    'selector': 'my selector'
                }]
            }

        # Test valid
        JQL('some_api_key',
            events=Events(),
            people=People(),
            join_params=good_params())

        # Bad array
        bad_params = good_params()
        bad_params['selectors'] = 3
        e = self._try_invalid_join(bad_params)
        self.assertEqual(str(e), "join_params['selectors'] must be iterable")

        # Bad key types
        for key in ('event', 'selector'):
            bad_params = good_params()
            bad_params['selectors'][0][key] = 3
            e = self._try_invalid_join(bad_params)
            self.assertEqual(
                str(e),
                "join_params['selectors'][0].%s must be a string" % key)

        # Bad key
        bad_params = good_params()
        bad_params['selectors'][0]['mrao'] = 3
        e = self._try_invalid_join(bad_params)
        self.assertEqual(
            str(e), "'mrao' is not a valid key in join_params['selectors'][0]")
示例#8
0
    def test_event_event_selectors(self):
        def good_params():
            return {
                'event_selectors': [{
                    'event': 'my_event',
                    'selector': 'my selector',
                    'label': 'my label'
                }]
            }

        # Test valid
        Events(good_params())

        # Bad array
        bad_params = good_params()
        bad_params['event_selectors'] = 3
        e = self._try_invalid_events(bad_params)
        self.assertEqual(str(e),
                         "event_params['event_selectors'] must be iterable")

        # Bad key types
        for key in ('event', 'selector', 'label'):
            bad_params = good_params()
            bad_params['event_selectors'][0][key] = 3
            e = self._try_invalid_events(bad_params)
            self.assertEqual(
                str(e),
                "event_params['event_selectors'][0].%s must be a string" % key)

        # Bad key
        bad_params = good_params()
        bad_params['event_selectors'][0]['mrao'] = 3
        e = self._try_invalid_events(bad_params)
        self.assertEqual(
            str(e),
            "'mrao' is not a valid key in event_params['event_selectors'][0]")
示例#9
0
def user_appstart_pull(from_date, to_date):
    """

    Pull app user AppStart events in Mixpanel.



    Parameters
    ----------

    from_date: date
        Start date of query.

    to_date: date
        End date of query.


    Global Variables
    ----------

    api_user_secret: str
        Client secret used to make calls to Mixpanel User Project.


    Returns
    ----------

    df_user_AppStart: dataframe
        Dataframe contains app creator user ID and number of app users.



    """

    #generate JQL query
    query = JQL(api_user_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "AppStart"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(
                    keys=["e.properties.zUserId", "e.properties.zAppOwnerId"],
                    accumulator=Reducer.count())

    #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts
    app_user_id_list = []
    owner_id_list = []
    AppStart_list = []

    #process query results
    for row in query.send():
        if (row['key'][0] is not None) & (row['key'][1] is not None):
            app_user_id_list.append(int(row['key'][0]))
            owner_id_list.append(int(row['key'][1]))
            AppStart_list.append(row['value'])

    #generate email
    data = {
        'app_user_id': app_user_id_list,
        'creator_id': owner_id_list,
        'AppStart': AppStart_list
    }
    df_AppStart = pd.DataFrame(data)

    #make sure IDs are valid
    df_user_AppStart = df_AppStart[(df_AppStart.app_user_id > 1)
                                   & (df_AppStart.creator_id > 1)]

    return df_user_AppStart
示例#10
0
 def test_query_plan(self):
     with warnings.catch_warnings(record=True) as w:
         q = JQL('key', events=Events(), people=People())
         q.query_plan()
         self.assertIs(w[-1].category, DeprecationWarning)
         self.assertIn('query_plan', str(w[-1].message))
def EditorAction_pull(from_date, to_date):
    """
    Pull app creator EditorAction-Save events in Mixpanel.
    
    
    Parameters
    ----------
    
    from_date: date
        Start date of query.
        
    to_date: date
        End date of query.
        
        
    Global Variables
    ----------
    
    api_creator_secret: str
        Client secret used to make calls to Mixpanel Creator Project.
        
        
    Returns
    ----------
    
    dataframe
        Dataframe contains app creator emails, number of EditorAction events, and zEvent = Save filter.
           
    """

    #generate JQL query
    query = JQL(api_creator_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "EditorAction"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(
                    keys=["e.properties.zUserEmail", "e.properties.zEvent"],
                    accumulator=Reducer.count())

    #initialize lists to record emails, EditorAction events, and zEvent
    user_email_list = []
    EditorAction_list = []
    zevent_list = []

    #process query results
    for row in query.send():
        if row['key'][0] is not None:
            user_email_list.append(row['key'][0])
            zevent_list.append(row['key'][1])
            EditorAction_list.append(row['value'])

    #create dataframe
    data = {
        'email': user_email_list,
        'zevent': zevent_list,
        'EditorAction': EditorAction_list
    }
    df_editor_action = pd.DataFrame(data)

    #filter to only include "Save" editor events
    df_editor_action = df_editor_action[df_editor_action.zevent == 'Save']

    return df_editor_action
def user_appstart_pull(from_date, to_date, df_app_users):
    """
    
    Pull app user AppStart events in Mixpanel.
    
    
    
    Parameters
    ----------
    
    from_date: date
        Start date of query.
        
    to_date: date
        End date of query.
        
        
    Global Variables
    ----------
    
    api_user_secret: str
        Client secret used to make calls to Mixpanel User Project.
        
        
    Returns
    ----------
    
    df_user_AppStart: dataframe
        Dataframe contains app creator user ID, list of app user email domains, and number of app users
        
    df_top_appstarts: dataframe
        Dataframe contains app creator user ID and number of AppStarts from top apps
    
           
    """

    #generate JQL query
    query = JQL(api_user_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "AppStart"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(keys=[
                    "e.properties.zUserId", "e.properties.zAppOwnerId",
                    "e.properties.zAppName"
                ],
                             accumulator=Reducer.count())

    #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts
    app_user_id_list = []
    owner_id_list = []
    app_name_list = []
    AppStart_list = []

    #process query results
    for row in query.send():
        if row['key'][0] is not None:
            app_user_id_list.append(row['key'][0])
            owner_id_list.append(row['key'][1])
            app_name_list.append(row['key'][2])
            AppStart_list.append(row['value'])

    #generate email
    data = {
        'app_user_id': app_user_id_list,
        'user_id': owner_id_list,
        'app_name': app_name_list,
        'AppStart': AppStart_list
    }
    df_AppStart = pd.DataFrame(data)

    #merge AppStart and app user dataframes to associate app user emails to number of AppStarts
    df_AppStart = pd.merge(df_AppStart,
                           df_app_users,
                           on='app_user_id',
                           how='left')

    #get total users and list of user emails for each app creator
    df_user_AppStart = df_AppStart.groupby(
        ['user_id', 'app_user_id'])['app_name'].count().reset_index()
    df_user_AppStart = df_user_AppStart.groupby(
        'user_id')['app_user_id'].count().reset_index()
    df_user_AppStart = df_user_AppStart.rename(
        columns={'app_user_id': 'num_app_users'})

    #get app user domain list for each app creator
    df_AppStart_temp = df_AppStart.groupby(
        ['user_id', 'app_user_id',
         'user_email'])['app_name'].count().reset_index()
    df_AppStart_temp['user_email_domains'] = df_AppStart_temp[
        'user_email'].str.split('@').str[1].fillna('')
    df_AppStart_temp = df_AppStart_temp[
        df_AppStart_temp['user_email_domains'] != '']
    df_user_domains_by_creators = df_AppStart_temp.groupby(
        'user_id')['user_email_domains'].apply(list).reset_index()
    df_user_AppStart = pd.merge(df_user_AppStart,
                                df_user_domains_by_creators,
                                on='user_id',
                                how='left')
    df_user_AppStart.loc[df_user_AppStart['user_email_domains'].isnull(),
                         ['user_email_domains']] = df_user_AppStart.loc[
                             df_user_AppStart['user_email_domains'].isnull(),
                             'user_email_domains'].apply(lambda x: [])

    #only keep the app with the highest number of AppStarts for each app creator
    df_top_appstarts = df_AppStart.groupby(
        ['user_id', 'app_name']).AppStart.sum().reset_index().sort_values(
            'AppStart', ascending=False).drop_duplicates('user_id',
                                                         keep='first')
    df_top_appstarts = df_top_appstarts.rename(
        columns={'AppStart': 'appstart_by_top_app'})

    return df_user_AppStart, df_top_appstarts
示例#13
0
 def _try_invalid_events(self, params):
     try:
         Events(params)
         self.fail("Expected Events syntax error with params: %s" % params)
     except JQLSyntaxError as e:
         return e
示例#14
0
def Editor_pull(from_date, to_date):
    """
    Pull app creator Editor events in Mixpanel.
    
    
    Parameters
    ----------
    
    from_date: date
        Start date of query.
        
    to_date: date
        End date of query.
        
        
    Global Variables
    ----------
    
    api_creator_secret: str
        Client secret used to make calls to Mixpanel Creator Project.
        
        
    Returns
    ----------
    
    dataframe
        Dataframe contains user IDs, Editor event datetime, and Editor event count for app creators in Mixpanel.
           
    """

    #generate JQL query
    query = JQL(api_creator_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "Editor"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(keys=[
                    "e.properties.zUserId", "new Date(e.time).toISOString()"
                ],
                             accumulator=Reducer.count())

    #store user IDs, Editor count, and Editor datetime
    user_id_list = []
    datetime_list = []
    Editor_list = []
    for row in query.send():
        if row['key'][0] is not None:
            user_id_list.append(int(row['key'][0]))
            datetime_list.append(
                datetime.strptime(row['key'][1][:10], '%Y-%m-%d'))
            Editor_list.append(row['value'])

    #generate dataframe
    data = {
        'user_id': user_id_list,
        'Editor_datetime': datetime_list,
        'Editor': Editor_list
    }
    df_Editor = pd.DataFrame(data)

    return df_Editor
示例#15
0
def user_appstart_pull(from_date, to_date):
    """

    Pull app user AppStart events in Mixpanel.



    Parameters
    ----------

    from_date: date
        Start date of query.

    to_date: date
        End date of query.


    Global Variables
    ----------

    api_user_secret: str
        Client secret used to make calls to Mixpanel User Project.


    Returns
    ----------

    df_user_AppStart: dataframe
        Dataframe contains app creator user ID and number of app users.



    """

    #generate JQL query
    query = JQL(api_user_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "AppStart"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(keys=[
                    "e.properties.zUserId", "e.properties.zAppOwnerId",
                    "new Date(e.time).toISOString()"
                ],
                             accumulator=Reducer.count())

    #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts
    app_user_id_list = []
    owner_id_list = []
    AppStart_list = []
    date_list = []

    #process query results
    for row in query.send():
        if (row['key'][0] is not None) & (row['key'][1] is not None):
            app_user_id_list.append(int(row['key'][0]))
            owner_id_list.append(int(row['key'][1]))
            date_list.append(datetime.strptime(row['key'][2][:10], '%Y-%m-%d'))
            AppStart_list.append(row['value'])

    #generate email
    data = {
        'date': date_list,
        'app_user_id': app_user_id_list,
        'user_id': owner_id_list,
        'AppStart': AppStart_list
    }
    df_AppStart = pd.DataFrame(data)

    #make sure IDs are valid
    df_AppStart = df_AppStart[(df_AppStart.app_user_id > 1)
                              & (df_AppStart.user_id > 1)]

    #remove duplicate users on the same day. Since it was pulled based on timestamp, AppStarts in one day can be on multiple roads
    df_AppStart = df_AppStart.drop_duplicates(
        ['user_id', 'date', 'app_user_id'])

    #get total users and list of user emails for each app creator
    df_user_AppStart = df_AppStart.groupby(
        ['user_id', 'date']).app_user_id.count().reset_index()
    df_user_AppStart = df_user_AppStart.rename(
        columns={'app_user_id': 'num_app_users'})

    return df_user_AppStart
def New_Signup_Web_pull(from_date, to_date):
    """
    Pull app creator sign up events in Mixpanel.
    
    
    Parameters
    ----------
    
    from_date: date
        Start date of query.
        
    to_date: date
        End date of query.
        
        
    Global Variables
    ----------
    
    api_creator_secret: str
        Client secret used to make calls to Mixpanel Creator Project.
        
        
    Returns
    ----------
    
    dataframe
        Dataframe contains user IDs, sign up datetime, new sign up event count, and country for app creators in Mixpanel.
           
    """

    #generate JQL query
    query = JQL(api_creator_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "New Signup Web"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(keys=[
                    "e.properties.userId", "new Date(e.time).toISOString()"
                ],
                             accumulator=Reducer.count())

    #store emails, user IDs, sign up datetime, and country
    user_id_list = []
    datetime_list = []
    new_sign_up_list = []
    for row in query.send():
        if row['key'][0] is not None:
            user_id_list.append(int(row['key'][0]))
            datetime_list.append(
                datetime.strptime(row['key'][1][:10], '%Y-%m-%d'))
            new_sign_up_list.append(row['value'])

    #generate dataframe
    data = {
        'app_owner_id': user_id_list,
        'date': datetime_list,
        'new_sign_up': new_sign_up_list
    }
    df_New_Signup_Web = pd.DataFrame(data)

    #only keeping ones with actual IDs
    df_New_Signup_Web = df_New_Signup_Web[df_New_Signup_Web.app_owner_id > 1]

    return df_New_Signup_Web
示例#17
0
 def setUp(self):
     self.query = JQL(api_secret=None, events=Events())
def pull_usage(from_date, to_date):
    """
    Pull usage events in Mixpanel.
    
    
    Parameters
    ----------
    
    from_date: date
        Start date of query.
        
    to_date: date
        End date of query.
        
        
    Global Variables
    ----------
    
    api_creator_secret: str
        Client secret used to make calls to Mixpanel Creator Project.
        
        
    Returns
    ----------
    
    dataframe
        Dataframe contains app ID, user IDs, owner ID, and usage event count in Mixpanel.
           
    """

    #generate JQL query
    query = JQL(api_creator_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "Usage"
                    }],
                    'from_date': from_date,
                    'to_date': to_date
                })).group_by(
                    keys=["e.properties.OwnerId", "e.properties.UserId"],
                    accumulator=Reducer.count())

    #store app owner, app, and app user IDs
    app_owner_id_list = []
    app_user_id_list = []
    usage_list = []
    for row in query.send():
        if row['key'][0] is not None:
            app_owner_id_list.append(int(row['key'][0]))
            app_user_id_list.append(int(row['key'][1]))
            usage_list.append(row['value'])

    #generate dataframe
    data = {
        'app_owner_id': app_owner_id_list,
        'app_user_id': app_user_id_list,
        'usage': usage_list
    }
    df_usage = pd.DataFrame(data)

    #only keep app owners and users with proper IDs
    df_usage = df_usage[(df_usage.app_owner_id > 1)
                        & (df_usage.app_user_id > 1)]
    df_usage = df_usage[~df_usage.app_owner_id.isin(
        [10305, 71626])]  #remove for demo accounts

    return df_usage
示例#19
0
def New_Signup_App_pull(yesterday):
    """
    
    Pull app user sign up events in Mixpanel.
    
    
    Parameters
    ----------
    
    yesterday: date
        Yesterday's date.
        
        
    Global Variables
    ----------
    
    api_user_secret: str
        Client secret used to make calls to Mixpanel User Project.
        
        
    Returns
    ----------
    
    dataframe
        Dataframe contains user IDs, sign up datetime, and new sign up event count for app users in Mixpanel.
           
    """

    #generate JQL query
    query = JQL(api_user_secret,
                events=Events({
                    'event_selectors': [{
                        'event': "New Signup App"
                    }],
                    'from_date': yesterday,
                    'to_date': yesterday
                })).group_by(keys=[
                    "e.properties.zUserId", "new Date(e.time).toISOString()"
                ],
                             accumulator=Reducer.count())

    #initialize lists to store emails, user IDs, and sign up datetime
    user_id_list = []
    datetime_list = []
    new_sign_up_list = []

    #process query results
    for row in query.send():
        if row['key'][0] is not None:
            user_id_list.append(int(row['key'][0]))
            datetime_list.append(
                datetime.strptime(row['key'][1][:10], '%Y-%m-%d'))
            new_sign_up_list.append(row['value'])

    #generate dataframe
    data = {
        'app_user_id': user_id_list,
        'sign_up_datetime': datetime_list,
        'new_signup_app': new_sign_up_list
    }
    df_New_Signup_App = pd.DataFrame(data)

    return df_New_Signup_App