def handle(self, *args, **options):
        SIGNUP_WINDOW = int(args[0]) if len(args) > 0 else 1
        RETENTION_WINDOW = int(args[1]) if len(args) > 1 else 1
        NEW_USER_WINDOW = int(args[2]) if len(args) > 2 else 1

        print "Signup window:", SIGNUP_WINDOW, "days"
        print "Retention window:", RETENTION_WINDOW, "days"
        print "New user window:", NEW_USER_WINDOW, "days"

        stop = datetime.datetime.utcnow()
        retention_start = stop - datetime.timedelta(days=RETENTION_WINDOW)
        signup_start = retention_start - datetime.timedelta(days=SIGNUP_WINDOW)
        new_user_start = signup_start - datetime.timedelta(days=NEW_USER_WINDOW)

        fact_iter = lambda: fact_query.iterator(start=new_user_start, stop=stop)

        users = collections.defaultdict(User)

        all_shares = 0
        old_user_max = float("inf")
        retained_first_ts = None

        signups = []
        for row in fact_iter():
            if not retained_first_ts and row["dt"] >= signup_start:
                retained_first_ts = row["ts"]

            key = row.get("type") if row.get("type") != "metric" else row.get("metric")

            user = users[row["user"]] if "user" in row else None

            if user and key == "signup":
                old_user_max = min(old_user_max, row["user"])
                user.signup_ts = row["ts"]
                user.signup_dt = row["dt"]

            if user and key == "view":
                if not user.first_view_ts:
                    user.first_view_ts = row["ts"]
                    user.first_view_dt = row["dt"]

                user.came_back[(row["ts"] - user.signup_ts) // (24 * 60 * 60)] = 1

                if row["dt"] >= signup_start and row["user"] <= old_user_max:
                    user.retained_visits[(row["ts"] - retained_first_ts) // (24 * 60 * 60)] = 1

        user_retention = collections.Counter()

        new_users = set()
        for user in users.values():
            if user.signup_dt and signup_start <= user.signup_dt <= retention_start:
                new_users.add(user)
                user_retention += user.came_back

        retained_visits = sum((user.retained_visits for user in users.values()), collections.Counter())

        print
        print
        print "Signup window:", SIGNUP_WINDOW, "days"
        print "Retention window:", RETENTION_WINDOW, "days"
        print "New user window:", NEW_USER_WINDOW, "days"
        print

        print "New user retention curve"
        for key, value in sorted(user_retention.items(), key=lambda (k, v): k):
            if key < NEW_USER_WINDOW:
                print key, ":", value
        print

        print "Old user retention curve"
        retained_visits_list = [retained_visits[day] for day in range(RETENTION_WINDOW + SIGNUP_WINDOW)]
        for day, value in enumerate(retained_visits_list):
            print day, ":", value
        print
        print

        print "Old user retention curve averaged over signup window days"
        retained_average = [
            sum(retained_visits_list[day : day + SIGNUP_WINDOW]) / float(SIGNUP_WINDOW)
            for day in range(RETENTION_WINDOW)
        ]
        for day, value in enumerate(retained_average):
            print day, ":", value

        print
        print "=" * 100
        print

        n = user_retention[NEW_USER_WINDOW - 1]
        d = user_retention[0]
        print "New user activation %% over new user window: %s / %s = %s" % (n, d, float(n) / d)

        d = retained_average[0]
        n = retained_average[-1]
        print "Retained %% over retained window: %s / %s = %s" % (n, d, float(n) / d)
Exemplo n.º 2
0
    def handle(self, *args, **options):
        SIGNUP_WINDOW = int(args[0]) if len(args) > 0 else 1
        RETENTION_WINDOW = int(args[1]) if len(args) > 1 else 1
        NEW_USER_WINDOW = int(args[2]) if len(args) > 2 else 1

        print "Signup window:", SIGNUP_WINDOW, "days"
        print "Retention window:", RETENTION_WINDOW, "days"
        print "New user window:", NEW_USER_WINDOW, "days"

        stop = datetime.datetime.utcnow()        
        retention_start = stop - datetime.timedelta(days=RETENTION_WINDOW)
        signup_start = retention_start - datetime.timedelta(days=SIGNUP_WINDOW)
        new_user_start = signup_start - datetime.timedelta(days=NEW_USER_WINDOW)

        fact_iter = lambda: fact_query.iterator(start=new_user_start, stop=stop)

        users = collections.defaultdict(User)
        
        all_shares = 0
        old_user_max = float("inf")
        retained_first_ts = None

        signups = []
        for row in fact_iter():
            if not retained_first_ts and row['dt'] >= signup_start:
                retained_first_ts = row['ts']

            key = row.get('type') if row.get('type') != 'metric' else row.get('metric')
            
            user = users[row['user']] if 'user' in row else None
            
            if user and key == 'signup':
                old_user_max = min(old_user_max, row['user'])
                user.signup_ts = row['ts']
                user.signup_dt = row['dt']
            
            if user and key == 'view':
                if not user.first_view_ts:
                    user.first_view_ts = row['ts']
                    user.first_view_dt = row['dt']

                user.came_back[(row['ts'] - user.signup_ts) // (24 * 60 * 60)] = 1
                
                if row['dt'] >= signup_start and row['user'] <= old_user_max:
                    user.retained_visits[(row['ts'] - retained_first_ts) // (24 * 60 * 60)] = 1
            

        user_retention = collections.Counter()
        
        new_users = set()
        for user in users.values():
            if user.signup_dt and signup_start <= user.signup_dt <= retention_start:
                new_users.add(user)
                user_retention += user.came_back

        retained_visits = sum((user.retained_visits for user in users.values()), collections.Counter())

        print
        print
        print "Signup window:", SIGNUP_WINDOW, "days"
        print "Retention window:", RETENTION_WINDOW, "days"
        print "New user window:", NEW_USER_WINDOW, "days"
        print

        print "New user retention curve"
        for key, value in sorted(user_retention.items(), key = lambda (k,v): k):
            if key < NEW_USER_WINDOW:
                print key, ':', value
        print

        print "Old user retention curve"
        retained_visits_list = [retained_visits[day] for day in range(RETENTION_WINDOW + SIGNUP_WINDOW)]
        for day, value in enumerate(retained_visits_list):
                print day, ':', value
        print
        print
        
        print "Old user retention curve averaged over signup window days"
        retained_average = [sum(retained_visits_list[day:day+SIGNUP_WINDOW]) / float(SIGNUP_WINDOW) for day in range(RETENTION_WINDOW)]
        for day, value in enumerate(retained_average):
            print day, ':', value
            
        print
        print "=" * 100
        print
        
        n = user_retention[NEW_USER_WINDOW-1]
        d = user_retention[0]
        print "New user activation %% over new user window: %s / %s = %s" % (n,d, float(n)/d)
        
        d = retained_average[0]
        n = retained_average[-1]
        print "Retained %% over retained window: %s / %s = %s" % (n,d, float(n)/d)
Exemplo n.º 3
0
    def handle(self, *args, **options):
        SIGNUP_WINDOW = float(args[0]) if len(args) > 0 else 0.1
        VIRALITY_WINDOW = float(args[1]) if len(args) > 1 else 0.1

        print "Signup window:", SIGNUP_WINDOW, "days"
        print "Virality window:", VIRALITY_WINDOW, "days"

        stop = datetime.datetime.utcnow()
        virality_start = stop - datetime.timedelta(days=VIRALITY_WINDOW)
        signup_start = virality_start - datetime.timedelta(days=SIGNUP_WINDOW)

        fact_iter = lambda: fact_query.iterator(start=signup_start, stop=stop)

        sessions = collections.defaultdict(Session)
        users = collections.defaultdict(User)

        all_shares = 0

        signups = []
        for row in fact_iter():
            if 'ip' in row:
                if is_bot(row['ip']):
                    continue

            session = None
            if 'ip' in row:
                if 'utma' in row:
                    if row['utma'] not in sessions:
                        # First time we've seen this google session
                        # Let's tie any previous requests from this IP to this utma session
                        sessions[row['utma']] = sessions[row['ip']]
                        del sessions[row['ip']]

                    session = sessions[row['utma']]
                else:
                    # We don't have any google session, use the ip instead
                    session = sessions[row['ip']]

            user = users[row['user']] if 'user' in row else None

            key = row.get('type') if row.get('type') != 'metric' else row.get(
                'metric')

            if key == 'signup':
                user.id = row['user']
                user.signup_dt = row['dt']
                user.in_signup_window = signup_start <= user.signup_dt < virality_start
                session.users.add(user.id)

            if key == 'create_share_url':
                all_shares += 1

            if user and user.signup_dt and row[
                    'dt'] < user.signup_dt + datetime.timedelta(
                        VIRALITY_WINDOW):
                # Within our window of consideration for user-attributable shares
                if key == 'create_share_url':
                    user.shares += 1

            if session and key == 'share_redirect':
                session.share_views += 1
                session.sharers.add(row['sharer'])

        signups_initial = set()
        for user in users.values():
            if user.in_signup_window:
                signups_initial.add(user)

        viral_sessions = set()
        outside_viral_sessions = set()
        for session in sessions.values():
            if len(session.sharers & signups_initial):
                viral_sessions.add(session)
            elif len(session.sharers):
                print session.sharers
                outside_viral_sessions.add(session)

        viral_signups = set()
        for session in viral_sessions:
            viral_signups |= session.users

        outside_signups = set()
        for session in outside_viral_sessions:
            outside_signups |= session.users

        print
        print
        print "CORE STATS:"
        print "Initial Signups (IS):", len(signups_initial)
        print "Shares from IS:", sum(user.shares for user in signups_initial)
        print "Sessions from shares:", len(viral_sessions)
        print "Signups from share sessions:", len(viral_signups)
        print
        print
        print "JUST FOR REFERENCE:"
        print "All shares (new and old users):", all_shares
        print "Outside sessions from shares:", len(outside_viral_sessions)
        print "Outside signups from shares:", len(outside_signups)
    def handle(self, *args, **options):
        SIGNUP_WINDOW = float(args[0]) if len(args) > 0 else 0.1
        VIRALITY_WINDOW = float(args[1]) if len(args) > 1 else 0.1

        print "Signup window:", SIGNUP_WINDOW, "days"
        print "Virality window:", VIRALITY_WINDOW, "days"

        stop = datetime.datetime.utcnow()
        virality_start = stop - datetime.timedelta(days=VIRALITY_WINDOW)
        signup_start = virality_start - datetime.timedelta(days=SIGNUP_WINDOW)

        fact_iter = lambda: fact_query.iterator(start=signup_start, stop=stop)

        sessions = collections.defaultdict(Session)
        users = collections.defaultdict(User)

        all_shares = 0

        signups = []
        for row in fact_iter():
            if "ip" in row:
                if is_bot(row["ip"]):
                    continue

            session = None
            if "ip" in row:
                if "utma" in row:
                    if row["utma"] not in sessions:
                        # First time we've seen this google session
                        # Let's tie any previous requests from this IP to this utma session
                        sessions[row["utma"]] = sessions[row["ip"]]
                        del sessions[row["ip"]]

                    session = sessions[row["utma"]]
                else:
                    # We don't have any google session, use the ip instead
                    session = sessions[row["ip"]]

            user = users[row["user"]] if "user" in row else None

            key = row.get("type") if row.get("type") != "metric" else row.get("metric")

            if key == "signup":
                user.id = row["user"]
                user.signup_dt = row["dt"]
                user.in_signup_window = signup_start <= user.signup_dt < virality_start
                session.users.add(user.id)

            if key == "create_share_url":
                all_shares += 1

            if user and user.signup_dt and row["dt"] < user.signup_dt + datetime.timedelta(VIRALITY_WINDOW):
                # Within our window of consideration for user-attributable shares
                if key == "create_share_url":
                    user.shares += 1

            if session and key == "share_redirect":
                session.share_views += 1
                session.sharers.add(row["sharer"])

        signups_initial = set()
        for user in users.values():
            if user.in_signup_window:
                signups_initial.add(user)

        viral_sessions = set()
        outside_viral_sessions = set()
        for session in sessions.values():
            if len(session.sharers & signups_initial):
                viral_sessions.add(session)
            elif len(session.sharers):
                print session.sharers
                outside_viral_sessions.add(session)

        viral_signups = set()
        for session in viral_sessions:
            viral_signups |= session.users

        outside_signups = set()
        for session in outside_viral_sessions:
            outside_signups |= session.users

        print
        print
        print "CORE STATS:"
        print "Initial Signups (IS):", len(signups_initial)
        print "Shares from IS:", sum(user.shares for user in signups_initial)
        print "Sessions from shares:", len(viral_sessions)
        print "Signups from share sessions:", len(viral_signups)
        print
        print
        print "JUST FOR REFERENCE:"
        print "All shares (new and old users):", all_shares
        print "Outside sessions from shares:", len(outside_viral_sessions)
        print "Outside signups from shares:", len(outside_signups)
Exemplo n.º 5
0
 def __init__(self, **kwargs):
     self.fact_iter = lambda: _wrapped_iter(fact_query.iterator(**kwargs))
     self._phase = 1
     self.start = kwargs.get('start')
     self.stop = kwargs.get('stop')