def get_new_signup(yesterday): """ Get yesterday new signup. Parameters ---------- yesterday: date Yesterday's date. Global Variables ---------- api_creator_secret: Mixpanel Creator Project API key. Use to make API calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe containing user IDs and emails of creators who signed up yesterday. """ # New Signup Web query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "New Signup Web" }], 'from_date': yesterday, 'to_date': yesterday }), people=People({'user_selectors': []}), join_params={ 'type': 'full', 'selectors': [{ 'event': "New Signup Web" }] }) #store email, user id, and sign up events email_list = [] userid_list = [] for row in query.send(): if 'user' in list(row.keys()): if '$username' in list(row['user']['properties'].keys()): userid_list.append(row['user']['properties']['$username']) if '$email' in list(row['user']['properties'].keys()): email_list.append(row['user']['properties']['$email']) #create dataframe data = {'user_id': userid_list, 'email': email_list} df_new_users = pd.DataFrame(data=data) return df_new_users
def AppStart_pull(from_date, to_date): """ Pull app user AppStart events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- dataframe Dataframe contains user IDs and AppStart event count for app users in Mixpanel. """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "AppStart" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=["e.properties.zUserId"], accumulator=Reducer.count()) #initalize lists to record user ID and AppStarts user_id_list = [] AppStart_list = [] #process query results for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) AppStart_list.append(row['value']) #generate dataframe data = {'user_id': user_id_list, 'AppStart': AppStart_list} df_AppStart = pd.DataFrame(data) df_AppStart = df_AppStart.dropna() df_AppStart.user_id = df_AppStart.user_id.astype(int) return df_AppStart
def _try_invalid_join(self, params): try: JQL(api_secret="asas", events=Events(), people=People(), join_params=params) self.fail("Expected Events syntax error with params: %s" % params) except JQLSyntaxError as e: return e
def New_Signup_Web_pull(from_date, to_date): """ Pull app creator sign up events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains user IDs and new sign up event count for app creators in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "New Signup Web" }], 'from_date': from_date, 'to_date': to_date })).group_by( keys=[ "e.properties.userId", #use userId ], accumulator=Reducer.count()) #initialize lists to record user IDs and New Signup Web user_id_list = [] new_sign_up_list = [] for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) new_sign_up_list.append(row['value']) #generate dataframe data = {'user_id': user_id_list, 'new_sign_up': new_sign_up_list} df_New_Signup_Web = pd.DataFrame(data) return df_New_Signup_Web
def test_event_date_keys(self): for k in ('to_date', 'from_date'): for v in ('2017-10-19', datetime(2017, 10, 19), datetime(2017, 10, 19).date()): q = Events({k: v}) self.assertIn('2017-10-19', str(q)) # Now a bad key. e = self._try_invalid_events({'to_date': 232}) self.assertEqual(str(e), 'to_date must be datetime, datetime.date, or str')
def test_join_types(self): # Good types for t in ('full', 'left', 'right', 'inner'): JQL('some_key', events=Events(), people=People(), join_params={'type': t}) # Bad type e = self._try_invalid_join({'type': 'mew'}) self.assertEqual( '"mew" is not a valid join type (valid types: full, left, right, inner)', str(e))
def test_join_selectors(self): def good_params(): return { 'selectors': [{ 'event': 'my_event', 'selector': 'my selector' }] } # Test valid JQL('some_api_key', events=Events(), people=People(), join_params=good_params()) # Bad array bad_params = good_params() bad_params['selectors'] = 3 e = self._try_invalid_join(bad_params) self.assertEqual(str(e), "join_params['selectors'] must be iterable") # Bad key types for key in ('event', 'selector'): bad_params = good_params() bad_params['selectors'][0][key] = 3 e = self._try_invalid_join(bad_params) self.assertEqual( str(e), "join_params['selectors'][0].%s must be a string" % key) # Bad key bad_params = good_params() bad_params['selectors'][0]['mrao'] = 3 e = self._try_invalid_join(bad_params) self.assertEqual( str(e), "'mrao' is not a valid key in join_params['selectors'][0]")
def test_event_event_selectors(self): def good_params(): return { 'event_selectors': [{ 'event': 'my_event', 'selector': 'my selector', 'label': 'my label' }] } # Test valid Events(good_params()) # Bad array bad_params = good_params() bad_params['event_selectors'] = 3 e = self._try_invalid_events(bad_params) self.assertEqual(str(e), "event_params['event_selectors'] must be iterable") # Bad key types for key in ('event', 'selector', 'label'): bad_params = good_params() bad_params['event_selectors'][0][key] = 3 e = self._try_invalid_events(bad_params) self.assertEqual( str(e), "event_params['event_selectors'][0].%s must be a string" % key) # Bad key bad_params = good_params() bad_params['event_selectors'][0]['mrao'] = 3 e = self._try_invalid_events(bad_params) self.assertEqual( str(e), "'mrao' is not a valid key in event_params['event_selectors'][0]")
def user_appstart_pull(from_date, to_date): """ Pull app user AppStart events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- df_user_AppStart: dataframe Dataframe contains app creator user ID and number of app users. """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "AppStart" }], 'from_date': from_date, 'to_date': to_date })).group_by( keys=["e.properties.zUserId", "e.properties.zAppOwnerId"], accumulator=Reducer.count()) #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts app_user_id_list = [] owner_id_list = [] AppStart_list = [] #process query results for row in query.send(): if (row['key'][0] is not None) & (row['key'][1] is not None): app_user_id_list.append(int(row['key'][0])) owner_id_list.append(int(row['key'][1])) AppStart_list.append(row['value']) #generate email data = { 'app_user_id': app_user_id_list, 'creator_id': owner_id_list, 'AppStart': AppStart_list } df_AppStart = pd.DataFrame(data) #make sure IDs are valid df_user_AppStart = df_AppStart[(df_AppStart.app_user_id > 1) & (df_AppStart.creator_id > 1)] return df_user_AppStart
def test_query_plan(self): with warnings.catch_warnings(record=True) as w: q = JQL('key', events=Events(), people=People()) q.query_plan() self.assertIs(w[-1].category, DeprecationWarning) self.assertIn('query_plan', str(w[-1].message))
def EditorAction_pull(from_date, to_date): """ Pull app creator EditorAction-Save events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains app creator emails, number of EditorAction events, and zEvent = Save filter. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "EditorAction" }], 'from_date': from_date, 'to_date': to_date })).group_by( keys=["e.properties.zUserEmail", "e.properties.zEvent"], accumulator=Reducer.count()) #initialize lists to record emails, EditorAction events, and zEvent user_email_list = [] EditorAction_list = [] zevent_list = [] #process query results for row in query.send(): if row['key'][0] is not None: user_email_list.append(row['key'][0]) zevent_list.append(row['key'][1]) EditorAction_list.append(row['value']) #create dataframe data = { 'email': user_email_list, 'zevent': zevent_list, 'EditorAction': EditorAction_list } df_editor_action = pd.DataFrame(data) #filter to only include "Save" editor events df_editor_action = df_editor_action[df_editor_action.zevent == 'Save'] return df_editor_action
def user_appstart_pull(from_date, to_date, df_app_users): """ Pull app user AppStart events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- df_user_AppStart: dataframe Dataframe contains app creator user ID, list of app user email domains, and number of app users df_top_appstarts: dataframe Dataframe contains app creator user ID and number of AppStarts from top apps """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "AppStart" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=[ "e.properties.zUserId", "e.properties.zAppOwnerId", "e.properties.zAppName" ], accumulator=Reducer.count()) #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts app_user_id_list = [] owner_id_list = [] app_name_list = [] AppStart_list = [] #process query results for row in query.send(): if row['key'][0] is not None: app_user_id_list.append(row['key'][0]) owner_id_list.append(row['key'][1]) app_name_list.append(row['key'][2]) AppStart_list.append(row['value']) #generate email data = { 'app_user_id': app_user_id_list, 'user_id': owner_id_list, 'app_name': app_name_list, 'AppStart': AppStart_list } df_AppStart = pd.DataFrame(data) #merge AppStart and app user dataframes to associate app user emails to number of AppStarts df_AppStart = pd.merge(df_AppStart, df_app_users, on='app_user_id', how='left') #get total users and list of user emails for each app creator df_user_AppStart = df_AppStart.groupby( ['user_id', 'app_user_id'])['app_name'].count().reset_index() df_user_AppStart = df_user_AppStart.groupby( 'user_id')['app_user_id'].count().reset_index() df_user_AppStart = df_user_AppStart.rename( columns={'app_user_id': 'num_app_users'}) #get app user domain list for each app creator df_AppStart_temp = df_AppStart.groupby( ['user_id', 'app_user_id', 'user_email'])['app_name'].count().reset_index() df_AppStart_temp['user_email_domains'] = df_AppStart_temp[ 'user_email'].str.split('@').str[1].fillna('') df_AppStart_temp = df_AppStart_temp[ df_AppStart_temp['user_email_domains'] != ''] df_user_domains_by_creators = df_AppStart_temp.groupby( 'user_id')['user_email_domains'].apply(list).reset_index() df_user_AppStart = pd.merge(df_user_AppStart, df_user_domains_by_creators, on='user_id', how='left') df_user_AppStart.loc[df_user_AppStart['user_email_domains'].isnull(), ['user_email_domains']] = df_user_AppStart.loc[ df_user_AppStart['user_email_domains'].isnull(), 'user_email_domains'].apply(lambda x: []) #only keep the app with the highest number of AppStarts for each app creator df_top_appstarts = df_AppStart.groupby( ['user_id', 'app_name']).AppStart.sum().reset_index().sort_values( 'AppStart', ascending=False).drop_duplicates('user_id', keep='first') df_top_appstarts = df_top_appstarts.rename( columns={'AppStart': 'appstart_by_top_app'}) return df_user_AppStart, df_top_appstarts
def _try_invalid_events(self, params): try: Events(params) self.fail("Expected Events syntax error with params: %s" % params) except JQLSyntaxError as e: return e
def Editor_pull(from_date, to_date): """ Pull app creator Editor events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains user IDs, Editor event datetime, and Editor event count for app creators in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "Editor" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=[ "e.properties.zUserId", "new Date(e.time).toISOString()" ], accumulator=Reducer.count()) #store user IDs, Editor count, and Editor datetime user_id_list = [] datetime_list = [] Editor_list = [] for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) datetime_list.append( datetime.strptime(row['key'][1][:10], '%Y-%m-%d')) Editor_list.append(row['value']) #generate dataframe data = { 'user_id': user_id_list, 'Editor_datetime': datetime_list, 'Editor': Editor_list } df_Editor = pd.DataFrame(data) return df_Editor
def user_appstart_pull(from_date, to_date): """ Pull app user AppStart events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- df_user_AppStart: dataframe Dataframe contains app creator user ID and number of app users. """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "AppStart" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=[ "e.properties.zUserId", "e.properties.zAppOwnerId", "new Date(e.time).toISOString()" ], accumulator=Reducer.count()) #initialize lists to record app user IDs, app creator IDs, app name, and number of AppStarts app_user_id_list = [] owner_id_list = [] AppStart_list = [] date_list = [] #process query results for row in query.send(): if (row['key'][0] is not None) & (row['key'][1] is not None): app_user_id_list.append(int(row['key'][0])) owner_id_list.append(int(row['key'][1])) date_list.append(datetime.strptime(row['key'][2][:10], '%Y-%m-%d')) AppStart_list.append(row['value']) #generate email data = { 'date': date_list, 'app_user_id': app_user_id_list, 'user_id': owner_id_list, 'AppStart': AppStart_list } df_AppStart = pd.DataFrame(data) #make sure IDs are valid df_AppStart = df_AppStart[(df_AppStart.app_user_id > 1) & (df_AppStart.user_id > 1)] #remove duplicate users on the same day. Since it was pulled based on timestamp, AppStarts in one day can be on multiple roads df_AppStart = df_AppStart.drop_duplicates( ['user_id', 'date', 'app_user_id']) #get total users and list of user emails for each app creator df_user_AppStart = df_AppStart.groupby( ['user_id', 'date']).app_user_id.count().reset_index() df_user_AppStart = df_user_AppStart.rename( columns={'app_user_id': 'num_app_users'}) return df_user_AppStart
def New_Signup_Web_pull(from_date, to_date): """ Pull app creator sign up events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains user IDs, sign up datetime, new sign up event count, and country for app creators in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "New Signup Web" }], 'from_date': from_date, 'to_date': to_date })).group_by(keys=[ "e.properties.userId", "new Date(e.time).toISOString()" ], accumulator=Reducer.count()) #store emails, user IDs, sign up datetime, and country user_id_list = [] datetime_list = [] new_sign_up_list = [] for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) datetime_list.append( datetime.strptime(row['key'][1][:10], '%Y-%m-%d')) new_sign_up_list.append(row['value']) #generate dataframe data = { 'app_owner_id': user_id_list, 'date': datetime_list, 'new_sign_up': new_sign_up_list } df_New_Signup_Web = pd.DataFrame(data) #only keeping ones with actual IDs df_New_Signup_Web = df_New_Signup_Web[df_New_Signup_Web.app_owner_id > 1] return df_New_Signup_Web
def setUp(self): self.query = JQL(api_secret=None, events=Events())
def pull_usage(from_date, to_date): """ Pull usage events in Mixpanel. Parameters ---------- from_date: date Start date of query. to_date: date End date of query. Global Variables ---------- api_creator_secret: str Client secret used to make calls to Mixpanel Creator Project. Returns ---------- dataframe Dataframe contains app ID, user IDs, owner ID, and usage event count in Mixpanel. """ #generate JQL query query = JQL(api_creator_secret, events=Events({ 'event_selectors': [{ 'event': "Usage" }], 'from_date': from_date, 'to_date': to_date })).group_by( keys=["e.properties.OwnerId", "e.properties.UserId"], accumulator=Reducer.count()) #store app owner, app, and app user IDs app_owner_id_list = [] app_user_id_list = [] usage_list = [] for row in query.send(): if row['key'][0] is not None: app_owner_id_list.append(int(row['key'][0])) app_user_id_list.append(int(row['key'][1])) usage_list.append(row['value']) #generate dataframe data = { 'app_owner_id': app_owner_id_list, 'app_user_id': app_user_id_list, 'usage': usage_list } df_usage = pd.DataFrame(data) #only keep app owners and users with proper IDs df_usage = df_usage[(df_usage.app_owner_id > 1) & (df_usage.app_user_id > 1)] df_usage = df_usage[~df_usage.app_owner_id.isin( [10305, 71626])] #remove for demo accounts return df_usage
def New_Signup_App_pull(yesterday): """ Pull app user sign up events in Mixpanel. Parameters ---------- yesterday: date Yesterday's date. Global Variables ---------- api_user_secret: str Client secret used to make calls to Mixpanel User Project. Returns ---------- dataframe Dataframe contains user IDs, sign up datetime, and new sign up event count for app users in Mixpanel. """ #generate JQL query query = JQL(api_user_secret, events=Events({ 'event_selectors': [{ 'event': "New Signup App" }], 'from_date': yesterday, 'to_date': yesterday })).group_by(keys=[ "e.properties.zUserId", "new Date(e.time).toISOString()" ], accumulator=Reducer.count()) #initialize lists to store emails, user IDs, and sign up datetime user_id_list = [] datetime_list = [] new_sign_up_list = [] #process query results for row in query.send(): if row['key'][0] is not None: user_id_list.append(int(row['key'][0])) datetime_list.append( datetime.strptime(row['key'][1][:10], '%Y-%m-%d')) new_sign_up_list.append(row['value']) #generate dataframe data = { 'app_user_id': user_id_list, 'sign_up_datetime': datetime_list, 'new_signup_app': new_sign_up_list } df_New_Signup_App = pd.DataFrame(data) return df_New_Signup_App