Пример #1
0
    def render(self, params, request_params):
        query = self.prepare_query(params, request_params)
        # set F here because prepare_query can change the model
        F = self.model.F

        group_by = dict()
        if params['step'] is not None:
            group_by.update({
                "stage_sequence_names": {
                    '$max': '$' + F.stage_sequence_names
                }
            })

        pipeline = []
        pipe = pipeline.append
        pipe({'$match': query})

        if params['offset']:
            pipe({'$skip': params['offset']})
        if params['limit']:
            pipe({'$limit': params['limit']})

        app.logger.debug(pipeline)
        agg_results = self.model.objects.coll.aggregate(pipeline)['result']
        # if params['step'] is not None:
        #     filtered_results = []
        #     for entry in agg_results:
        #         if len(entry['stage_sequence_names']) <= params['step']:
        #             continue
        #         if entry['stage_sequence_names'][params['step']] != params['step_name']:
        #             continue
        #         filtered_results.append(entry)
        # else:
        #     filtered_results = agg_results

        journeys = [CustomerJourney(data).to_dict() for data in agg_results]
        short_fields = [
            'id', 'journey_type_name', 'customer_id', 'customer_name',
            'status', 'segment_names', 'total_effort', 'journey_tags',
            'start_date', 'last_event_date', 'journey_attributes'
        ]
        for journey in journeys:
            journey['customer_id'] = str(journey['customer_id'])
            journey['journey_type_name'] = JourneyType.objects.get(
                journey['journey_type_id']).display_name
            if params['short_fields']:
                for key in journey.keys():
                    if key not in short_fields:
                        journey.pop(key)

        pagination_parameters = {
            'limit':
            params['limit'],
            'offset':
            params['offset'],
            'more_data_available':
            True if len(journeys) == params['limit'] else False,
        }

        return dict(ok=True, list=journeys, **pagination_parameters)
Пример #2
0
    def format_customers(self, customers):
        customer_ids = [customer.id for customer in customers]
        journeys = CustomerJourney.objects(customer_id__in=customer_ids)
        customer_data = defaultdict(lambda: {
            'journeys_count': 0,
            'agents': set(),
            'journey_type_ids': set()
        })

        for journey in journeys:
            customer_data[journey.customer_id]['journeys_count'] += 1
            for agent in journey.agents:
                customer_data[journey.customer_id]['agents'].add(agent)
            customer_data[journey.customer_id]['journey_type_ids'].add(
                journey.journey_type_id)

        def to_dict(customer):
            base_dict = customer.to_dict()
            # base_dict['assigned_segments'] = [segment.to_dict() for segment in CustomerSegment.objects.cached_find_by_ids(customer.assigned_segments)]
            data = customer_data[customer.id]
            base_dict['journeys_count'] = data['journeys_count']
            base_dict['agents_count'] = len(data['agents'])
            base_dict['journey_types_display'] = ', '.join([
                journey_type.display_name
                for journey_type in JourneyType.objects.cached_find_by_ids(
                    data['journey_type_ids'])
            ])
            return base_dict

        return map(to_dict, customers)
Пример #3
0
    def get_customer_journeys(self, account):
        from solariat_bottle.db.journeys.customer_journey import CustomerJourney

        customer = self.customer_profile(account)
        if customer:
            return CustomerJourney.objects(customer_id=customer.id)[:]
        else:
            return []
Пример #4
0
    def prepare_plot_result(self, params, result):
        computed_metric = params.get('computed_metric', 'count')
        group_by_param = params.get('group_by')
        rv = []

        if params['plot_type'] == 'timeline':
            helper_structure = defaultdict(list)

            for entry in result:
                if group_by_param:
                    group_by_value = entry['_id'][group_by_param]
                    label = CustomerJourney.metric_label(
                        computed_metric, group_by_param, group_by_value)
                else:
                    label = "All journeys' %s" % computed_metric

                timestamp = self.get_timestamp(entry)
                helper_structure[label].append(
                    [timestamp, entry[computed_metric]])

            for key, value in helper_structure.iteritems():
                if params['level'] == 'hour':
                    rv.append(
                        dict(label=key,
                             data=self.fill_with_zeroes(
                                 value, params['from'], params['to'])))
                else:
                    rv.append(
                        dict(label=key, data=sorted(value,
                                                    key=lambda x: x[0])))

            self.fill_multi_series_with_zeroes(rv)
        elif params['plot_type'] == 'avg_distributions':
            for entry in result:
                if group_by_param:
                    group_by_value = entry['_id'][group_by_param]
                    label = CustomerJourney.metric_label(
                        computed_metric, group_by_param, group_by_value)
                else:
                    label = "All journeys' %s" % computed_metric
                # TODO UI is not showing float values, so rounding to integer for the time being
                rv.append(dict(label=label, value=int(entry[computed_metric])))
        return rv
Пример #5
0
    def format_agents(self, agents):
        agent_customers = defaultdict(set)
        for journey in CustomerJourney.objects(agent_ids__in=[agent.id for agent in agents]):
            for agent in journey.agents:
                agent_customers[agent].add(journey.customer_id)

        def to_dict(agent):
            base_dict = agent.to_dict()
            base_dict['customers_count'] = len(agent_customers[agent.id])
            return base_dict
        return map(to_dict, agents)
Пример #6
0
    def __setattr__(self, k, v):
        # TODO: set journey_attributes_context_vars below class definition, but cyclic import won't allow
        from solariat_bottle.db.journeys.customer_journey import CustomerJourney
        JourneyType.journey_attributes_context_vars = ([
            JourneyType.stage_key, JourneyType.event_key,
            JourneyType.events_key
        ] + CustomerJourney.field_names + CustomerJourney.get_properties())

        schema_field_name = JourneyType.journey_attributes_schema.db_field
        if k == 'data' and schema_field_name in v:
            schema = v[schema_field_name]
        elif k == 'journey_attributes_schema':
            schema = v
        else:
            schema = None

        if schema:
            # schema is a meta schema for CustomerJourney, test if expression defined in the schema compiles successfully
            for d in schema:
                BaseParser(d['field_expr'],
                           [])  #JourneyType.journey_attributes_context_vars)

        return super(JourneyType, self).__setattr__(k, v)
Пример #7
0
def process_event_batch(account_id, batch_size):
    start_time = datetime.now()
    # TODO: Once it works, break this up in specific sub-calls to make code more readable.
    account = Account.objects.get(account_id)
    CustomerProfile = account.get_customer_profile_class()
    AgentProfile = account.get_agent_profile_class()
    # TODO: account_id should be used for account specific collections
    customer_event_map = defaultdict(list)
    journey_event_map = defaultdict(list)

    customer_profile_map = dict()
    agent_profile_map = dict()
    journey_type_map = dict()
    num_to_id_customer = dict()
    customer_id_to_num = dict()

    for journey_type in JourneyType.objects.find(account_id=account_id):
        journey_type_map[journey_type.display_name] = journey_type

    agent_ids = set()
    # TODO: After account specific collection is done this should work just fine / uncomment
    # event_batch = Event.objects.find(_was_processed=False).sort(_created=1)[:batch_size]
    event_batch = Event.objects.find(
        channels__in=[c.id for c in account.get_current_channels()],
        _was_processed=False).sort(_created=1)[:batch_size]

    if not event_batch:
        print "No new events found"
        return
    for event in event_batch:
        actor_num, _ = unpack_event_id(event.id)
        agent_id = None
        if not event.is_inbound:
            agent_id = event.actor_id
            agent_ids.add(agent_id)
        customer_event_map[actor_num].append((event, agent_id))

    all_customers = CustomerProfile.objects.find(actor_num__in=customer_event_map.keys())[:]
    for customer in all_customers:
        customer_profile_map[customer.actor_num] = customer
        num_to_id_customer[customer.actor_num] = customer.id
        customer_id_to_num[customer.id] = customer.actor_num
    all_active_journeys = CustomerJourney.objects.find(account_id=account.id,
                                                       customer_id__in=num_to_id_customer.values(),
                                                       status=JourneyStageType.IN_PROGRESS)[:]

    event_sequence_query = []
    for journey in all_active_journeys:
        journey._event_sequence = []
        journey_event_map[journey.customer_id].append(journey)
        for agent in journey.agent_ids:
            agent_ids.add(agent)

        actor_num = customer_id_to_num[journey.customer_id]
        id_lower_bound = pack_event_id(actor_num, utc(journey.first_event_date))
        id_upper_bound = pack_event_id(actor_num, utc(journey.last_event_date))
        event_sequence_query.append({'_id': {'$gte': id_lower_bound, '$lte': id_upper_bound}})

    actor_id_events = defaultdict(list)
    if event_sequence_query:
        all_required_events = Event.objects.find(**{'$or': event_sequence_query})[:]
        for event in sorted(all_required_events, key=lambda x: x.created_at):
            actor_num, _ = unpack_event_id(event.id)
            customer_id = num_to_id_customer[actor_num]
            for journey in journey_event_map[customer_id]:
                if utc(journey.first_event_date) <= utc(event.created_at) <= utc(journey.last_event_date):
                    journey._event_sequence.append(event)
            actor_id_events[customer_id].append(event)

    all_agents = AgentProfile.objects.find(id__in=agent_ids)[:]
    for agent in all_agents:
        agent_profile_map[agent.id] = agent
    print "Finished loading all the required data in " + str(datetime.now() - start_time)
    start_time = datetime.now()
    # All ongoing journeys for this customers are considered. For all of the customers that don't have any active
    # journeys we need to figure out what new type to start. If
    for customer_num, customer_events in customer_event_map.iteritems():
        # TODO: If we need to, this would be a point where we can split based on customer id
        if customer_num not in customer_profile_map:
            continue    # Events from different account. Will be fixed by account specific collections
        customer = customer_profile_map[customer_num]
        for (event, agent) in customer_events:
            event._was_processed = True
            actor_num, _ = unpack_event_id(event.id)
            customer_id = num_to_id_customer[actor_num]
            journey_candidates = journey_event_map[customer.id]

            direct_mappings = dict()
            for mapping in event.journey_mapping:
                journey_type_name, journey_stage_name = mapping.split('__')
                direct_mappings[journey_type_name] = journey_stage_name

            for journey_type in journey_type_map.values():
                found_journey_stage = None
                if journey_type.display_name in direct_mappings:
                    found_journey_stage = [stage for stage in journey_type.available_stages if
                                           stage.display_name == direct_mappings[journey_type.display_name]][0]
                else:
                    for journey_stage in journey_type.available_stages:
                        if journey_stage.evaluate_event(event, customer, actor_id_events.get(customer_id, [])):
                            found_journey_stage = journey_stage
                            break

                found_match = False
                # First step is to try and find it in existing journeys
                for journey in journey_candidates:
                    # All the currently in progress or completed journeys that are matched to same stage
                    if journey.journey_type_id == journey_type.id: # and (journey.status == JourneyStageType.IN_PROGRESS or
                                                                   #    journey.f_current_stage == found_journey_stage):
                        found_match = True
                        journey.agents = [agent_profile_map[a_id] for a_id in journey.agent_ids]
                        journey.customer_profile = customer
                        journey.current_event = event
                        journey.journey_type = journey_type

                        journey.process_event(event, customer, agent_profile_map[agent] if agent else None,
                                              found_journey_stage)
                        journey.event_sequence = journey.event_sequence + [event]

                if found_journey_stage:
                    # If we didn't find any match in existing journeys, create a new one. We create it in memory
                    # So as to not do any extra mongo calls.
                    if not found_match:
                        journey = CustomerJourney(customer_id=customer.id,
                                                  journey_type_id=journey_type.id,
                                                  first_event_date=event.created_at,
                                                  account_id=account_id,
                                                  status=JourneyStageType.IN_PROGRESS,
                                                  node_sequence=[],
                                                  node_sequence_agr=[],
                                                  journey_attributes_schema=journey_type.journey_attributes_schema)
                        journey._event_sequence = []
                        journey_candidates.append(journey)
                        journey.agents = [agent_profile_map[a_id] for a_id in journey.agent_ids]
                        journey.customer_profile = customer
                        journey.current_event = event
                        journey.journey_type = journey_type

                        journey.process_event(event, customer, agent_profile_map[agent] if agent else None,
                                              found_journey_stage)
                        journey.event_sequence = journey.event_sequence + [event]    # TODO: As it is, it will still be one call per journey
                        journey_type.journeys_num += 1

    print "Finished computing journey info in " + str(datetime.now() - start_time)
    start_time = datetime.now()
    # Upsert all journeys, all customer profiles, all agent profiles, all events
    if all_agents:
        bulk_agents = AgentProfile.objects.coll.initialize_unordered_bulk_op()
        for agent in all_agents:
            bulk_agents.find({"_id": agent.id}).upsert().update({'$set': agent.data})
        bulk_agents.execute()

    if all_customers:
        bulk_customers = CustomerProfile.objects.coll.initialize_unordered_bulk_op()
        for customer in all_customers:
            bulk_customers.find({"_id": customer.id}).upsert().update({'$set': customer.data})
        bulk_customers.execute()

    if event_batch:
        bulk_events = Event.objects.coll.initialize_unordered_bulk_op()
        for event in event_batch:
            bulk_events.find({"_id": event.id}).upsert().update({'$set': event.data})
        bulk_events.execute()

    if journey_event_map.values():
        bulk_journeys = CustomerJourney.objects.coll.initialize_unordered_bulk_op()
    have_journeys = False
    for customer_journeys in journey_event_map.values():
        for journey in customer_journeys:
            have_journeys = True
            if journey.id:
                bulk_journeys.find({"_id": journey.id}).upsert().update({'$set': journey.data})
            else:
                bulk_journeys.insert(journey.data)
    if have_journeys:
        bulk_journeys.execute()
    else:
        print "No journeys to upsert"

    print "Finished all the bulk inserts in " + str(datetime.now() - start_time)

    for journey_type in journey_type_map.values():
        journey_type.compute_cardinalities()
        journey_type.update(journeys_num=journey_type.journeys_num)
Пример #8
0
def get_journey_expression_context():
    from solariat_bottle.db.journeys.customer_journey import CustomerJourney
    funcs = CustomerJourney.get_properties()
    return funcs
Пример #9
0
    def add_stages_and_stats(self, path_id, aggr_func, params, result):
        account = self.user.account
        # basic template
        formatted_result = {
            'path_id': path_id,
            'group_by': path_id,
            'measure': aggr_func,
            'label': path_id,
            'no_of_journeys': 0,
            'metrics': {
                'percentage': {
                    'label': '% of paths',
                    'value': 0
                },
            },
            'stages': [],
            'node_sequence_agr': result['_id']
            ['node_sequence_agr'],  # this will be used for drilldown
        }

        # getting counts for nodes
        node_stats = []
        for node_name in result['_id']['node_sequence_agr']:
            node_stats.append([])
        for trace in result['node_stats']:
            for i, item in enumerate(trace):
                node_stats[i].append(item.values()[0])
        for i, item in enumerate(node_stats):
            node_stats[i] = int(math.ceil(sum(item) / len(item)))

        # getting basic counts
        journey_type_id = params['journey_type'][0]
        no_of_all_journeys = CustomerJourney.objects(
            status__in=[
                JourneyStageType.COMPLETED, JourneyStageType.TERMINATED
            ],
            journey_type_id=journey_type_id).count()
        no_of_abandoned_journeys = result['status'].count(
            JourneyStageType.TERMINATED)

        formatted_result['no_of_journeys'] = result['journeys_count']
        formatted_result['no_of_abandoned_journeys'] = no_of_abandoned_journeys

        if result:
            # percentage
            percentage = "%.1f" % (
                (float(result['journeys_count']) / no_of_all_journeys) *
                100) if no_of_all_journeys else '0'
            formatted_result['metrics']['percentage']['value'] = percentage
            # abandonment rate
            # abandonment_rate = "%.1f"  % ((float(no_of_abandoned_journeys) / no_of_all_journeys)*100) if no_of_abandoned_journeys else '0'
            # formatted_result['metrics']['abandonment_rate']['value'] = abandonment_rate

            # populating stage and nodes info for the front-end
            stages = formatted_result['stages']
            for i, node in enumerate(result['_id']['node_sequence_agr']):
                stage_type_name, event_type_name = node.split(':')
                stage_type = JourneyStageType.objects.get(
                    journey_type_id=params['journey_type'][0],
                    display_name=stage_type_name)

                event_type = BaseEventType.objects.get_by_display_name(
                    self.user.account.id,
                    event_type_name)  # event_type_name from Event.event_type
                stage = {
                    'label': stage_type_name,
                    'stage_type_id': str(stage_type.id),
                    'nodes': []
                }
                if 0 == len(stages) or stages[-1]['label'] != stage_type_name:
                    stages.append(stage)

                stages[-1]['nodes'].append({
                    'label':
                    "%s %s" % (node_stats[i], event_type_name),
                    'name':
                    event_type_name,
                    'count':
                    node_stats[i],
                    'platform':
                    event_type.platform,
                    'event_type_id':
                    str(event_type.id)
                })
        return formatted_result