Пример #1
0
  def test_stream_inversion_timestamps(self):
    """
    Test that if stream is inverted, timestamp of last action is the
    timestamp of the last action in the previous funnel step.
    """
    from analysis import IdentityDict
    from analysis import _stream_earliest_action

    client = Mock()
    client.get = Mock(side_effect=[self.get_stream5()])

    start = datetime.datetime(2014,3,20)
    end = datetime.datetime(2014,3,21)
    step = FunnelStep('stream5', invert=True)
    last_user_action = {'0': datetime_to_kronos_time(start),
                        '1': datetime_to_kronos_time(start)}
    step_output = _stream_earliest_action(client, step,
                                          start, end,
                                          timedelta_to_kronos_time(
                                            datetime.timedelta(minutes=5)),
                                          last_user_action,
                                          {'userId': IdentityDict()})
    user_action = step_output['user_action']
    self.assertEqual(len(user_action), 1)
    self.assertEqual(user_action['1'], datetime_to_kronos_time(start))
Пример #2
0
    def test_stream_inversion_timestamps(self):
        """
    Test that if stream is inverted, timestamp of last action is the
    timestamp of the last action in the previous funnel step.
    """
        from analysis import IdentityDict
        from analysis import _stream_earliest_action

        client = Mock()
        client.get = Mock(side_effect=[self.get_stream5()])

        start = datetime.datetime(2014, 3, 20)
        end = datetime.datetime(2014, 3, 21)
        step = FunnelStep('stream5', invert=True)
        last_user_action = {
            '0': datetime_to_kronos_time(start),
            '1': datetime_to_kronos_time(start)
        }
        step_output = _stream_earliest_action(
            client, step, start, end,
            timedelta_to_kronos_time(datetime.timedelta(minutes=5)),
            last_user_action, {'userId': IdentityDict()})
        user_action = step_output['user_action']
        self.assertEqual(len(user_action), 1)
        self.assertEqual(user_action['1'], datetime_to_kronos_time(start))
Пример #3
0
def funnel_analyze(client,
                   streams,
                   start,
                   end,
                   end_first_funnel_step,
                   user_id_mappers,
                   user_filter,
                   fuzzy_time=timedelta(minutes=5)):
    """
  `streams`: a list of FunnelStep objects, each representing a step in the
  funnel.  The funnel is composed from these objects.

  `start`/`end`: the start and end datetimes to analyze.

  `end_first_funnel_step`: the end time of the first funnel step. You
  sometimes want this to be earlier than the rest of the other steps
  so you can study how a cohort takes certain actions down the line.

  `user_id_mappers`: a dictionary of the form
    {user_id_field: user_id_mapping_function}.
  A user_id_field entry should exist for
  any user_id fieldname of `streams` subsequent to the first stream
  in the funnel.  For example, if `streams` is:
    [(s1, f1, 'userId'), (s2, f2, 'userId'), (s3, f3, 'username')],
  then user_id_mappings should be:
    {'username': function_from_userId_to_username(userId)}

  `user_filter`: a function that returns True/False depending on
  whether an event from a user should be considered (for segmentation,
  for instance).  If user_filter is None, all users will be accepted.

  `fuzzy_time`: a timedelta representing the time that two events in
  subsequent streams can be out-of-order with one-another.
  """
    assert end >= end_first_funnel_step
    streams, user_id_mappers = _sanity_check_args(streams, user_id_mappers)
    last_user_action = FilterCache(user_filter)
    fuzzy_time = timedelta_to_kronos_time(fuzzy_time)
    funnel_output = FunnelOutput()
    user_id_mappings = {}

    for idx, stream in enumerate(streams):
        log.debug('Processing stream', stream.stream_name)
        step_end = end
        if idx == 0:
            user_id_mappings[stream.user_field] = IdentityDict()
            step_end = end_first_funnel_step
        output = _stream_earliest_action(client, stream, start, step_end,
                                         fuzzy_time, last_user_action,
                                         user_id_mappings)
        funnel_output.add(output)
        last_user_action = output['user_action']
        # For the first stream in the funnel, load the mappings to other
        # user_id formats we'll find in subsequent streams.
        if idx == 0:
            log.debug('Loading user_id mappings')
            _load_user_id_mappings(user_id_mappings, user_id_mappers,
                                   last_user_action)

    return funnel_output
  def filter_and_sum(self, start_time, end_time):
    """Bin `self.stream` into buckets, returning the sum of `b` when `a` == 2.

    For all events between `start_time` and `end_time`, create an
    event for every 20-minute interval of events that contains the sum
    of `b` when `a`==2.
    """
    events = self.client.get(self.stream, start_time, end_time)
    counts = defaultdict(int)
    grouping_minutes = timedelta_to_kronos_time(self.bucket_width)
    for event in events:
      if event['a'] == 2:
        counts[event['@time'] -
               (event['@time'] % grouping_minutes)] += event['b']
    for group_time in sorted(counts.iterkeys()):
      yield {'@time': group_time, 'b_sum': counts[group_time]}