Exemplo n.º 1
0
    def _get_geoip_data_json(self):
        """
        Cleans out any dirty unicode characters to make the geoip data safe for
        JSON encoding.
        """
        clean = {}
        if not self.geoip_data: return {}

        for key,value in self.geoip_data.items():
            clean[key] = utils.u_clean(value)
        return clean
Exemplo n.º 2
0
    def _get_geoip_data_json(self):
        """
        Cleans out any dirty unicode characters to make the geoip data safe for
        JSON encoding.
        """
        clean = {}
        if not self.geoip_data: return {}

        for key,value in self.geoip_data.items():
            clean[key] = utils.u_clean(value)
        return clean
Exemplo n.º 3
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent,
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
                log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = now - timedelta(hours=1)
        if not visitor.last_update or visitor.last_update <= one_hour_ago:
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        try:
            visitor.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
Exemplo n.º 4
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\
             or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')):
            return
        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255],
                             errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' %
                          (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {'session_key': session_key, 'ip_address': ip_address}

        visitor_id = request.session.get('visitor_id', None)
        if not visitor_id:
            # for some reason, Visitor.objects.get_or_create was not working here
            try:
                visitor = Visitor.objects.only('id').get(**attrs)
            except Visitor.DoesNotExist:
                request.session.set_test_cookie()
                # see if there's a visitor with the same IP and user agent
                # within the last 5 minutes
                cutoff = now - timedelta(minutes=5)
                visitors = Visitor.objects.only('id').filter(
                    ip_address=ip_address,
                    user_agent=user_agent,
                    last_update__gte=cutoff)

                if len(visitors):
                    visitor = visitors[0]
                    visitor.session_key = session_key
                    log.debug('Using existing visitor for IP %s / UA %s: %s' %
                              (ip_address, user_agent, visitor.id))
                else:
                    # it's probably safe to assume that the visitor is brand new
                    visitor = Visitor(**attrs)
                    log.debug('Created a new visitor: %s' % attrs)
                try:
                    visitor.save()
                except DatabaseError:
                    print_stack_trace()
                    log.error(
                        'There was a problem saving visitor information:\n%s\n\n%s'
                        % (traceback.format_exc(), locals()))
            except:
                return

            request.session['visitor_id'] = visitor_id = visitor.id

        redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}'
        visitor_data = json.loads(redis_data)
        visitor_data['visitor_id'] = visitor_id

        # update the tracking information
        visitor_data['user_agent'] = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1))
        # TODO: ensure that we are on the same time zone - I just put UTC for now
        # to get it working
        last_update = visitor_data.get('last_update', None)
        if not last_update or last_update <= time.mktime(
                one_hour_ago.timetuple()):
            visitor_data['referrer'] = utils.u_clean(
                request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor_data['page_views'] = 0
            visitor_data['session_start'] = time.mktime(now.timetuple())

        visitor_data['url'] = request.path
        page_views = visitor_data.get('page_views', 0) + 1
        visitor_data['page_views'] = page_views
        visitor_data['last_update'] = time.mktime(now.timetuple())

        try:
            # Extracting visitor data from GA cookie
            cookie = request.COOKIES.get('__utmz')
            if cookie:
                try:
                    data = cookie.split('.', 4)[-1]
                    data = dict(match.groups() for match in re.finditer(
                        r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data))
                except (ValueError, IndexError):
                    log.error('Malformed GA cookie: {0!r}'.format(cookie))
                else:
                    visitor_data['source'] = normalize_ga_value(
                        data.get('utmcsr'))
                    visitor_data['medium'] = normalize_ga_value(
                        data.get('utmcmd'))
                    visitor_data['campaign'] = normalize_ga_value(
                        data.get('utmccn'))
                    visitor_data['keywords'] = normalize_ga_value(
                        data.get('utm.ctr'))

            utm_source = request.GET.get("utm_source", "unknown")
            request.session['acquisition_source_name'] = utm_source

            if utm_source != "unknown":
                # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard.
                # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter.
                # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product.
                # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword.
                # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective.

                #update the tracking info with the latest and bump the old one to be stored in the history

                #visitor.bump_past_acquisition_info()
                past_acquisition_info = visitor_data.get(
                    'past_acquisition_info', [])
                if visitor_data.get('acquisition_source', None):
                    old_visitor_data = {'date_valid_until': time.time()}
                    for k in VISITOR_PARAMS_MAPPING.keys():
                        old_visitor_data[k] = visitor_data.get(k, None)
                    past_acquisition_info.append(old_visitor_data)
                    visitor_data[
                        'past_acquisition_info'] = past_acquisition_info
                for k, v in VISITOR_PARAMS_MAPPING.items():
                    value = request.GET.get(v, 'unknown')[:255]
                    visitor_data[k] = value

        except:
            print_stack_trace()
        redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
    def process_request(self, request):
        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if not request.session.session_key:
            request.session.save()
        session_key = request.session.session_key

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        # For now we have made a fork that works with MySQL backed Django that
        # also has the setting USE_TZ set to False (we are no longer calling `localtime()`)
        now=timezone.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent,
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                visitor, created = Visitor.objects.get_or_create(**attrs)
                if created:
                    log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = now - timedelta(hours=1)
        if not visitor.last_update or visitor.last_update <= one_hour_ago:
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        try:
            sid = transaction.savepoint()
            visitor.save()
            transaction.savepoint_commit(sid)
        except IntegrityError:
            transaction.savepoint_rollback(sid)
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
Exemplo n.º 6
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = right_now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent,
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
                log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = now - timedelta(hours=1)
        if not visitor.last_update or visitor.last_update <= one_hour_ago:
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        try:
            visitor.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
Exemplo n.º 7
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = request.META.get('HTTP_USER_AGENT', '')[:255]

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = list(UntrackedUserAgent.objects.all())
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if unicode(user_agent, errors='ignore').find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session'):
            if not request.session.session_key:
                request.session.save()
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        # Attributes we use when creating a new user
        new_attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # If we have a visitor_id cookie, use it
        visitor_id = request.COOKIES.get('visitor_id')

        if visitor_id:
            attrs = {'id': visitor_id}
        else:
            attrs = new_attrs

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # add tracking ID to model if specified in the URL
            tid = request.GET.get('tid') or request.GET.get('fb_source')
            if tid:
                get = request.GET.copy()
                attrs['tid'] = tid
                request.GET = get

            visitor = Visitor(**new_attrs)
            log.debug('Created a new visitor: %s' % new_attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, update their referrer URL
        if not visitor.last_update:
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        try:
            visitor.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))

        request.visitor = visitor
        request.session['visitor_id'] = visitor.pk
Exemplo n.º 8
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = request.META.get('HTTP_USER_AGENT', '')

	if utils.user_agent_is_untracked( user_agent ):
          log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
          return

        if hasattr(request, 'session'):
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = md5( '%s:%s' % ( ip_address, user_agent ) ).hexdigest()

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in NO_TRACKING_PREFIXES:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent[:255],
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
                log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent[:255]

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        if not visitor.last_update or visitor.last_update <= ( now - timedelta( hours = 1 ) ) :
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path[:255]
        visitor.page_views += 1
        visitor.last_update = now
        visitor.save()
Exemplo n.º 9
0
def log_click_track(request, coupon=None):
    try:
        referer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])
        clicked_link = request.path
        source_url_type='landing'

        if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer):
            source_url_type = 'coupon'
        elif re.search('/coupons/[a-z0-9-]+/', referer):
            source_url_type = 'company'
        elif re.search('/categories/[A-z0-9-]+/', referer):
            source_url_type = 'category'

        merchant=None

        if "/o/" in clicked_link:
            source_url = clicked_link
            merchant = coupon.merchant
            target_url = coupon.get_retailer_link()
        else:
            source_url = referer
            target_url = clicked_link
            merchant = None

        if 'go.redirectingat.com' in target_url:
            target_url = _remove_skimlinks(target_url)

        merchant_domain = shorten_to_domain(target_url)
        visitor = Visitor.objects.get(pk=request.session['visitor_id'])
        click_track = ClickTrack(visitor=visitor, user_agent=visitor.user_agent[:255], referer=referer[:255],
                                 target_url=target_url[:255], source_url_type=source_url_type[:255],
                                 source_url=source_url[:255], merchant=merchant, coupon=coupon, 
                                 merchant_domain=merchant_domain[:255])

        try:
            click_track.acquisition_source      = visitor.acquisition_source
            click_track.acquisition_medium      = visitor.acquisition_medium
            click_track.acquisition_term        = visitor.acquisition_term
            click_track.acquisition_content     = visitor.acquisition_content
            click_track.acquisition_campaign    = visitor.acquisition_campaign
            click_track.acquisition_gclid       = visitor.acquisition_gclid
        except:
            print_stack_trace()

        try:
            click_track.save()
        except:
            try:
                print "Visitor ID", click_track.visitor
                print "User Agent", click_track.user_agent
                print "Referer", click_track.referer
                print "target_url", click_track.target_url
                print "source_url_type", click_track.source_url_type
                print "merchant", click_track.merchant
                print "coupon", click_track.coupon
                print "merchant_domain", click_track.merchant_domain
                print merchant.name, merchant.id
            except:
                pass
            print_stack_trace()
    except:
        print_stack_trace()
Exemplo n.º 10
0
def click_track(request, clicked_link_path=None):
    try:
        referer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])
        clicked_link = request.POST["clicked"][:255]
        try:
            clicked_link=clicked_link.lower()
        except:
            print_stack_trace()

        source_url_type='landing'
        if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer):
            source_url_type = 'coupon'
        elif re.search('/coupons/[a-z0-9-]+/', referer):
            source_url_type = 'company'
        elif re.search('/categories/[A-z0-9-]+/', referer):
            source_url_type = 'category'

        coupon=None
        merchant=None

        if "/coupon/" in clicked_link:
            #skimlinks will assume the source url to be the /coupon/ url
            if clicked_link.endswith("/"):
                coupon_id = clicked_link.split("/")[-2] #assumes trailing '/'
            else:
                coupon_id = clicked_link.split("/")[-1]
            source_url = clicked_link
            coupon = Coupon.active_objects.get(id=int(coupon_id))
            try:
                merchant = Merchant.objects.get(id=coupon.merchant.id)
            except:
                merchant = None
            target_url = coupon.get_retailer_link()
        else:
            source_url = referer
            target_url = clicked_link
            merchant = None

        if 'go.redirectingat.com' in target_url:
            target_url = _remove_skimlinks(target_url)

        merchant_domain = shorten_to_domain(target_url)
        visitor = Visitor.objects.get(pk=request.session['visitor_id'])
        click_track                 = ClickTrack()
        click_track.visitor         = visitor
        click_track.user_agent      = visitor.user_agent[:255]
        click_track.referer         = referer[:255]
        click_track.target_url      = target_url[:255]
        click_track.source_url_type = source_url_type[:255]
        click_track.source_url      = source_url[:255]
        click_track.merchant        = merchant
        click_track.coupon          = coupon
        click_track.merchant_domain = merchant_domain[:255]

        try:
            click_track.acquisition_source      = visitor.acquisition_source
            click_track.acquisition_medium      = visitor.acquisition_medium
            click_track.acquisition_term        = visitor.acquisition_term
            click_track.acquisition_content     = visitor.acquisition_content
            click_track.acquisition_campaign    = visitor.acquisition_campaign
            click_track.acquisition_gclid       = visitor.acquisition_gclid
        except:
            print_stack_trace()

        try:
            click_track.save()
        except:
            try:
                print "Visitor ID", click_track.visitor
                print "User Agent", click_track.user_agent
                print "Referer", click_track.referer
                print "target_url", click_track.target_url
                print "source_url_type", click_track.source_url_type
                print "merchant", click_track.merchant
                print "coupon", click_track.coupon
                print "merchant_domain", click_track.merchant_domain
                print merchant.name, merchant.id
            except:
                pass
            print_stack_trace()
    except:
        print_stack_trace()
    return success()
Exemplo n.º 11
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\
             or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')):
            return
        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }
        
        visitor_id = request.session.get('visitor_id', None)
        if not visitor_id:
            # for some reason, Visitor.objects.get_or_create was not working here
            try:
                visitor = Visitor.objects.only('id').get(**attrs)
            except Visitor.DoesNotExist:
                request.session.set_test_cookie()
                # see if there's a visitor with the same IP and user agent
                # within the last 5 minutes
                cutoff = now - timedelta(minutes=5)
                visitors = Visitor.objects.only('id').filter(
                    ip_address=ip_address,
                    user_agent=user_agent,
                    last_update__gte=cutoff
                )
    
                if len(visitors):
                    visitor = visitors[0]
                    visitor.session_key = session_key
                    log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
                else:
                    # it's probably safe to assume that the visitor is brand new
                    visitor = Visitor(**attrs)
                    log.debug('Created a new visitor: %s' % attrs)
                try:
                    visitor.save()
                except DatabaseError:
                    print_stack_trace()
                    log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
            except:
                return
            
            request.session['visitor_id'] = visitor_id = visitor.id

        redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}'
        visitor_data = json.loads(redis_data)
        visitor_data['visitor_id'] = visitor_id

        # update the tracking information
        visitor_data['user_agent'] = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1))
        # TODO: ensure that we are on the same time zone - I just put UTC for now
        # to get it working
        last_update = visitor_data.get('last_update', None)
        if not last_update or last_update <= time.mktime(one_hour_ago.timetuple()):
            visitor_data['referrer'] = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor_data['page_views'] = 0
            visitor_data['session_start'] = time.mktime(now.timetuple())

        visitor_data['url'] = request.path
        page_views = visitor_data.get('page_views', 0) + 1
        visitor_data['page_views'] = page_views
        visitor_data['last_update'] = time.mktime(now.timetuple())

        try:
            # Extracting visitor data from GA cookie
            cookie = request.COOKIES.get('__utmz')
            if cookie:
                try:
                    data = cookie.split('.', 4)[-1]
                    data = dict(match.groups() for match in re.finditer(
                        r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data))
                except (ValueError, IndexError):
                    log.error('Malformed GA cookie: {0!r}'.format(cookie))
                else:
                    visitor_data['source'] = normalize_ga_value(data.get('utmcsr'))
                    visitor_data['medium'] = normalize_ga_value(data.get('utmcmd'))
                    visitor_data['campaign'] = normalize_ga_value(data.get('utmccn'))
                    visitor_data['keywords'] = normalize_ga_value(data.get('utm.ctr'))
            
            utm_source = request.GET.get("utm_source", "unknown")
            request.session['acquisition_source_name'] = utm_source

            if utm_source != "unknown":
                # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard.
                # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter.
                # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product.
                # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword.
                # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective.

                #update the tracking info with the latest and bump the old one to be stored in the history
                
                #visitor.bump_past_acquisition_info()
                past_acquisition_info = visitor_data.get('past_acquisition_info', [])
                if visitor_data.get('acquisition_source', None):
                    old_visitor_data = {'date_valid_until': time.time()}
                    for k in VISITOR_PARAMS_MAPPING.keys():
                        old_visitor_data[k] = visitor_data.get(k, None)
                    past_acquisition_info.append(old_visitor_data)
                    visitor_data['past_acquisition_info'] = past_acquisition_info
                for k,v in VISITOR_PARAMS_MAPPING.items():
                    value = request.GET.get(v, 'unknown')[:255]
                    visitor_data[k] = value

        except:
            print_stack_trace()
        redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
Exemplo n.º 12
0
def log_click_track(request, coupon=None):
    try:
        referer = utils.u_clean(
            request.META.get('HTTP_REFERER', 'unknown')[:255])
        clicked_link = request.path
        source_url_type = 'landing'

        if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer):
            source_url_type = 'coupon'
        elif re.search('/coupons/[a-z0-9-]+/', referer):
            source_url_type = 'company'
        elif re.search('/categories/[A-z0-9-]+/', referer):
            source_url_type = 'category'

        merchant = None

        if "/o/" in clicked_link:
            source_url = clicked_link
            merchant = coupon.merchant
            target_url = coupon.get_retailer_link()
        else:
            source_url = referer
            target_url = clicked_link
            merchant = None

        if 'go.redirectingat.com' in target_url:
            target_url = _remove_skimlinks(target_url)

        merchant_domain = shorten_to_domain(target_url)
        visitor = Visitor.objects.get(pk=request.session['visitor_id'])
        click_track = ClickTrack(visitor=visitor,
                                 user_agent=visitor.user_agent[:255],
                                 referer=referer[:255],
                                 target_url=target_url[:255],
                                 source_url_type=source_url_type[:255],
                                 source_url=source_url[:255],
                                 merchant=merchant,
                                 coupon=coupon,
                                 merchant_domain=merchant_domain[:255])

        try:
            click_track.acquisition_source = visitor.acquisition_source
            click_track.acquisition_medium = visitor.acquisition_medium
            click_track.acquisition_term = visitor.acquisition_term
            click_track.acquisition_content = visitor.acquisition_content
            click_track.acquisition_campaign = visitor.acquisition_campaign
            click_track.acquisition_gclid = visitor.acquisition_gclid
        except:
            print_stack_trace()

        try:
            click_track.save()
        except:
            try:
                print "Visitor ID", click_track.visitor
                print "User Agent", click_track.user_agent
                print "Referer", click_track.referer
                print "target_url", click_track.target_url
                print "source_url_type", click_track.source_url_type
                print "merchant", click_track.merchant
                print "coupon", click_track.coupon
                print "merchant_domain", click_track.merchant_domain
                print merchant.name, merchant.id
            except:
                pass
            print_stack_trace()
    except:
        print_stack_trace()
Exemplo n.º 13
0
def click_track(request, clicked_link_path=None):
    try:
        referer = utils.u_clean(
            request.META.get('HTTP_REFERER', 'unknown')[:255])
        clicked_link = request.POST["clicked"][:255]
        try:
            clicked_link = clicked_link.lower()
        except:
            print_stack_trace()

        source_url_type = 'landing'
        if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer):
            source_url_type = 'coupon'
        elif re.search('/coupons/[a-z0-9-]+/', referer):
            source_url_type = 'company'
        elif re.search('/categories/[A-z0-9-]+/', referer):
            source_url_type = 'category'

        coupon = None
        merchant = None

        if "/coupon/" in clicked_link:
            #skimlinks will assume the source url to be the /coupon/ url
            if clicked_link.endswith("/"):
                coupon_id = clicked_link.split("/")[-2]  #assumes trailing '/'
            else:
                coupon_id = clicked_link.split("/")[-1]
            source_url = clicked_link
            coupon = Coupon.active_objects.get(id=int(coupon_id))
            try:
                merchant = Merchant.objects.get(id=coupon.merchant.id)
            except:
                merchant = None
            target_url = coupon.get_retailer_link()
        else:
            source_url = referer
            target_url = clicked_link
            merchant = None

        if 'go.redirectingat.com' in target_url:
            target_url = _remove_skimlinks(target_url)

        merchant_domain = shorten_to_domain(target_url)
        visitor = Visitor.objects.get(pk=request.session['visitor_id'])
        click_track = ClickTrack()
        click_track.visitor = visitor
        click_track.user_agent = visitor.user_agent[:255]
        click_track.referer = referer[:255]
        click_track.target_url = target_url[:255]
        click_track.source_url_type = source_url_type[:255]
        click_track.source_url = source_url[:255]
        click_track.merchant = merchant
        click_track.coupon = coupon
        click_track.merchant_domain = merchant_domain[:255]

        try:
            click_track.acquisition_source = visitor.acquisition_source
            click_track.acquisition_medium = visitor.acquisition_medium
            click_track.acquisition_term = visitor.acquisition_term
            click_track.acquisition_content = visitor.acquisition_content
            click_track.acquisition_campaign = visitor.acquisition_campaign
            click_track.acquisition_gclid = visitor.acquisition_gclid
        except:
            print_stack_trace()

        try:
            click_track.save()
        except:
            try:
                print "Visitor ID", click_track.visitor
                print "User Agent", click_track.user_agent
                print "Referer", click_track.referer
                print "target_url", click_track.target_url
                print "source_url_type", click_track.source_url_type
                print "merchant", click_track.merchant
                print "coupon", click_track.coupon
                print "merchant_domain", click_track.merchant_domain
                print merchant.name, merchant.id
            except:
                pass
            print_stack_trace()
    except:
        print_stack_trace()
    return success()