def _get_geoip_data_json(self): """ Cleans out any dirty unicode characters to make the geoip data safe for JSON encoding. """ clean = {} if not self.geoip_data: return {} for key,value in self.geoip_data.items(): clean[key] = utils.u_clean(value) return clean
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = now - timedelta(hours=1) if not visitor.last_update or visitor.last_update <= one_hour_ago: visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now try: visitor.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
def process_request(self, request): # don't process AJAX requests if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\ or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = {'session_key': session_key, 'ip_address': ip_address} visitor_id = request.session.get('visitor_id', None) if not visitor_id: # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.only('id').get(**attrs) except Visitor.DoesNotExist: request.session.set_test_cookie() # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.only('id').filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) try: visitor.save() except DatabaseError: print_stack_trace() log.error( 'There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) except: return request.session['visitor_id'] = visitor_id = visitor.id redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}' visitor_data = json.loads(redis_data) visitor_data['visitor_id'] = visitor_id # update the tracking information visitor_data['user_agent'] = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1)) # TODO: ensure that we are on the same time zone - I just put UTC for now # to get it working last_update = visitor_data.get('last_update', None) if not last_update or last_update <= time.mktime( one_hour_ago.timetuple()): visitor_data['referrer'] = utils.u_clean( request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor_data['page_views'] = 0 visitor_data['session_start'] = time.mktime(now.timetuple()) visitor_data['url'] = request.path page_views = visitor_data.get('page_views', 0) + 1 visitor_data['page_views'] = page_views visitor_data['last_update'] = time.mktime(now.timetuple()) try: # Extracting visitor data from GA cookie cookie = request.COOKIES.get('__utmz') if cookie: try: data = cookie.split('.', 4)[-1] data = dict(match.groups() for match in re.finditer( r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data)) except (ValueError, IndexError): log.error('Malformed GA cookie: {0!r}'.format(cookie)) else: visitor_data['source'] = normalize_ga_value( data.get('utmcsr')) visitor_data['medium'] = normalize_ga_value( data.get('utmcmd')) visitor_data['campaign'] = normalize_ga_value( data.get('utmccn')) visitor_data['keywords'] = normalize_ga_value( data.get('utm.ctr')) utm_source = request.GET.get("utm_source", "unknown") request.session['acquisition_source_name'] = utm_source if utm_source != "unknown": # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard. # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter. # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product. # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword. # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective. #update the tracking info with the latest and bump the old one to be stored in the history #visitor.bump_past_acquisition_info() past_acquisition_info = visitor_data.get( 'past_acquisition_info', []) if visitor_data.get('acquisition_source', None): old_visitor_data = {'date_valid_until': time.time()} for k in VISITOR_PARAMS_MAPPING.keys(): old_visitor_data[k] = visitor_data.get(k, None) past_acquisition_info.append(old_visitor_data) visitor_data[ 'past_acquisition_info'] = past_acquisition_info for k, v in VISITOR_PARAMS_MAPPING.items(): value = request.GET.get(v, 'unknown')[:255] visitor_data[k] = value except: print_stack_trace() redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
def process_request(self, request): # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if not request.session.session_key: request.session.save() session_key = request.session.session_key # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is # For now we have made a fork that works with MySQL backed Django that # also has the setting USE_TZ set to False (we are no longer calling `localtime()`) now=timezone.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: visitor, created = Visitor.objects.get_or_create(**attrs) if created: log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = now - timedelta(hours=1) if not visitor.last_update or visitor.last_update <= one_hour_ago: visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now try: sid = transaction.savepoint() visitor.save() transaction.savepoint_commit(sid) except IntegrityError: transaction.savepoint_rollback(sid) except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = right_now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = now - timedelta(hours=1) if not visitor.last_update or visitor.last_update <= one_hour_ago: visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now try: visitor.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = request.META.get('HTTP_USER_AGENT', '')[:255] # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = list(UntrackedUserAgent.objects.all()) cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if unicode(user_agent, errors='ignore').find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session'): if not request.session.session_key: request.session.save() # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() # Attributes we use when creating a new user new_attrs = { 'session_key': session_key, 'ip_address': ip_address } # If we have a visitor_id cookie, use it visitor_id = request.COOKIES.get('visitor_id') if visitor_id: attrs = {'id': visitor_id} else: attrs = new_attrs # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # add tracking ID to model if specified in the URL tid = request.GET.get('tid') or request.GET.get('fb_source') if tid: get = request.GET.copy() attrs['tid'] = tid request.GET = get visitor = Visitor(**new_attrs) log.debug('Created a new visitor: %s' % new_attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, update their referrer URL if not visitor.last_update: visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now try: visitor.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) request.visitor = visitor request.session['visitor_id'] = visitor.pk
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = request.META.get('HTTP_USER_AGENT', '') if utils.user_agent_is_untracked( user_agent ): log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session'): # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = md5( '%s:%s' % ( ip_address, user_agent ) ).hexdigest() # ensure that the request.path does not begin with any of the prefixes for prefix in NO_TRACKING_PREFIXES: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent[:255], last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent[:255] # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL if not visitor.last_update or visitor.last_update <= ( now - timedelta( hours = 1 ) ) : visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path[:255] visitor.page_views += 1 visitor.last_update = now visitor.save()
def log_click_track(request, coupon=None): try: referer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) clicked_link = request.path source_url_type='landing' if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer): source_url_type = 'coupon' elif re.search('/coupons/[a-z0-9-]+/', referer): source_url_type = 'company' elif re.search('/categories/[A-z0-9-]+/', referer): source_url_type = 'category' merchant=None if "/o/" in clicked_link: source_url = clicked_link merchant = coupon.merchant target_url = coupon.get_retailer_link() else: source_url = referer target_url = clicked_link merchant = None if 'go.redirectingat.com' in target_url: target_url = _remove_skimlinks(target_url) merchant_domain = shorten_to_domain(target_url) visitor = Visitor.objects.get(pk=request.session['visitor_id']) click_track = ClickTrack(visitor=visitor, user_agent=visitor.user_agent[:255], referer=referer[:255], target_url=target_url[:255], source_url_type=source_url_type[:255], source_url=source_url[:255], merchant=merchant, coupon=coupon, merchant_domain=merchant_domain[:255]) try: click_track.acquisition_source = visitor.acquisition_source click_track.acquisition_medium = visitor.acquisition_medium click_track.acquisition_term = visitor.acquisition_term click_track.acquisition_content = visitor.acquisition_content click_track.acquisition_campaign = visitor.acquisition_campaign click_track.acquisition_gclid = visitor.acquisition_gclid except: print_stack_trace() try: click_track.save() except: try: print "Visitor ID", click_track.visitor print "User Agent", click_track.user_agent print "Referer", click_track.referer print "target_url", click_track.target_url print "source_url_type", click_track.source_url_type print "merchant", click_track.merchant print "coupon", click_track.coupon print "merchant_domain", click_track.merchant_domain print merchant.name, merchant.id except: pass print_stack_trace() except: print_stack_trace()
def click_track(request, clicked_link_path=None): try: referer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) clicked_link = request.POST["clicked"][:255] try: clicked_link=clicked_link.lower() except: print_stack_trace() source_url_type='landing' if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer): source_url_type = 'coupon' elif re.search('/coupons/[a-z0-9-]+/', referer): source_url_type = 'company' elif re.search('/categories/[A-z0-9-]+/', referer): source_url_type = 'category' coupon=None merchant=None if "/coupon/" in clicked_link: #skimlinks will assume the source url to be the /coupon/ url if clicked_link.endswith("/"): coupon_id = clicked_link.split("/")[-2] #assumes trailing '/' else: coupon_id = clicked_link.split("/")[-1] source_url = clicked_link coupon = Coupon.active_objects.get(id=int(coupon_id)) try: merchant = Merchant.objects.get(id=coupon.merchant.id) except: merchant = None target_url = coupon.get_retailer_link() else: source_url = referer target_url = clicked_link merchant = None if 'go.redirectingat.com' in target_url: target_url = _remove_skimlinks(target_url) merchant_domain = shorten_to_domain(target_url) visitor = Visitor.objects.get(pk=request.session['visitor_id']) click_track = ClickTrack() click_track.visitor = visitor click_track.user_agent = visitor.user_agent[:255] click_track.referer = referer[:255] click_track.target_url = target_url[:255] click_track.source_url_type = source_url_type[:255] click_track.source_url = source_url[:255] click_track.merchant = merchant click_track.coupon = coupon click_track.merchant_domain = merchant_domain[:255] try: click_track.acquisition_source = visitor.acquisition_source click_track.acquisition_medium = visitor.acquisition_medium click_track.acquisition_term = visitor.acquisition_term click_track.acquisition_content = visitor.acquisition_content click_track.acquisition_campaign = visitor.acquisition_campaign click_track.acquisition_gclid = visitor.acquisition_gclid except: print_stack_trace() try: click_track.save() except: try: print "Visitor ID", click_track.visitor print "User Agent", click_track.user_agent print "Referer", click_track.referer print "target_url", click_track.target_url print "source_url_type", click_track.source_url_type print "merchant", click_track.merchant print "coupon", click_track.coupon print "merchant_domain", click_track.merchant_domain print merchant.name, merchant.id except: pass print_stack_trace() except: print_stack_trace() return success()
def process_request(self, request): # don't process AJAX requests if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\ or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } visitor_id = request.session.get('visitor_id', None) if not visitor_id: # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.only('id').get(**attrs) except Visitor.DoesNotExist: request.session.set_test_cookie() # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.only('id').filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) try: visitor.save() except DatabaseError: print_stack_trace() log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) except: return request.session['visitor_id'] = visitor_id = visitor.id redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}' visitor_data = json.loads(redis_data) visitor_data['visitor_id'] = visitor_id # update the tracking information visitor_data['user_agent'] = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1)) # TODO: ensure that we are on the same time zone - I just put UTC for now # to get it working last_update = visitor_data.get('last_update', None) if not last_update or last_update <= time.mktime(one_hour_ago.timetuple()): visitor_data['referrer'] = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor_data['page_views'] = 0 visitor_data['session_start'] = time.mktime(now.timetuple()) visitor_data['url'] = request.path page_views = visitor_data.get('page_views', 0) + 1 visitor_data['page_views'] = page_views visitor_data['last_update'] = time.mktime(now.timetuple()) try: # Extracting visitor data from GA cookie cookie = request.COOKIES.get('__utmz') if cookie: try: data = cookie.split('.', 4)[-1] data = dict(match.groups() for match in re.finditer( r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data)) except (ValueError, IndexError): log.error('Malformed GA cookie: {0!r}'.format(cookie)) else: visitor_data['source'] = normalize_ga_value(data.get('utmcsr')) visitor_data['medium'] = normalize_ga_value(data.get('utmcmd')) visitor_data['campaign'] = normalize_ga_value(data.get('utmccn')) visitor_data['keywords'] = normalize_ga_value(data.get('utm.ctr')) utm_source = request.GET.get("utm_source", "unknown") request.session['acquisition_source_name'] = utm_source if utm_source != "unknown": # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard. # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter. # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product. # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword. # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective. #update the tracking info with the latest and bump the old one to be stored in the history #visitor.bump_past_acquisition_info() past_acquisition_info = visitor_data.get('past_acquisition_info', []) if visitor_data.get('acquisition_source', None): old_visitor_data = {'date_valid_until': time.time()} for k in VISITOR_PARAMS_MAPPING.keys(): old_visitor_data[k] = visitor_data.get(k, None) past_acquisition_info.append(old_visitor_data) visitor_data['past_acquisition_info'] = past_acquisition_info for k,v in VISITOR_PARAMS_MAPPING.items(): value = request.GET.get(v, 'unknown')[:255] visitor_data[k] = value except: print_stack_trace() redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
def log_click_track(request, coupon=None): try: referer = utils.u_clean( request.META.get('HTTP_REFERER', 'unknown')[:255]) clicked_link = request.path source_url_type = 'landing' if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer): source_url_type = 'coupon' elif re.search('/coupons/[a-z0-9-]+/', referer): source_url_type = 'company' elif re.search('/categories/[A-z0-9-]+/', referer): source_url_type = 'category' merchant = None if "/o/" in clicked_link: source_url = clicked_link merchant = coupon.merchant target_url = coupon.get_retailer_link() else: source_url = referer target_url = clicked_link merchant = None if 'go.redirectingat.com' in target_url: target_url = _remove_skimlinks(target_url) merchant_domain = shorten_to_domain(target_url) visitor = Visitor.objects.get(pk=request.session['visitor_id']) click_track = ClickTrack(visitor=visitor, user_agent=visitor.user_agent[:255], referer=referer[:255], target_url=target_url[:255], source_url_type=source_url_type[:255], source_url=source_url[:255], merchant=merchant, coupon=coupon, merchant_domain=merchant_domain[:255]) try: click_track.acquisition_source = visitor.acquisition_source click_track.acquisition_medium = visitor.acquisition_medium click_track.acquisition_term = visitor.acquisition_term click_track.acquisition_content = visitor.acquisition_content click_track.acquisition_campaign = visitor.acquisition_campaign click_track.acquisition_gclid = visitor.acquisition_gclid except: print_stack_trace() try: click_track.save() except: try: print "Visitor ID", click_track.visitor print "User Agent", click_track.user_agent print "Referer", click_track.referer print "target_url", click_track.target_url print "source_url_type", click_track.source_url_type print "merchant", click_track.merchant print "coupon", click_track.coupon print "merchant_domain", click_track.merchant_domain print merchant.name, merchant.id except: pass print_stack_trace() except: print_stack_trace()
def click_track(request, clicked_link_path=None): try: referer = utils.u_clean( request.META.get('HTTP_REFERER', 'unknown')[:255]) clicked_link = request.POST["clicked"][:255] try: clicked_link = clicked_link.lower() except: print_stack_trace() source_url_type = 'landing' if re.search('/coupons/[a-z0-9-]+/[a-z0-9-]+/[\d]+/', referer): source_url_type = 'coupon' elif re.search('/coupons/[a-z0-9-]+/', referer): source_url_type = 'company' elif re.search('/categories/[A-z0-9-]+/', referer): source_url_type = 'category' coupon = None merchant = None if "/coupon/" in clicked_link: #skimlinks will assume the source url to be the /coupon/ url if clicked_link.endswith("/"): coupon_id = clicked_link.split("/")[-2] #assumes trailing '/' else: coupon_id = clicked_link.split("/")[-1] source_url = clicked_link coupon = Coupon.active_objects.get(id=int(coupon_id)) try: merchant = Merchant.objects.get(id=coupon.merchant.id) except: merchant = None target_url = coupon.get_retailer_link() else: source_url = referer target_url = clicked_link merchant = None if 'go.redirectingat.com' in target_url: target_url = _remove_skimlinks(target_url) merchant_domain = shorten_to_domain(target_url) visitor = Visitor.objects.get(pk=request.session['visitor_id']) click_track = ClickTrack() click_track.visitor = visitor click_track.user_agent = visitor.user_agent[:255] click_track.referer = referer[:255] click_track.target_url = target_url[:255] click_track.source_url_type = source_url_type[:255] click_track.source_url = source_url[:255] click_track.merchant = merchant click_track.coupon = coupon click_track.merchant_domain = merchant_domain[:255] try: click_track.acquisition_source = visitor.acquisition_source click_track.acquisition_medium = visitor.acquisition_medium click_track.acquisition_term = visitor.acquisition_term click_track.acquisition_content = visitor.acquisition_content click_track.acquisition_campaign = visitor.acquisition_campaign click_track.acquisition_gclid = visitor.acquisition_gclid except: print_stack_trace() try: click_track.save() except: try: print "Visitor ID", click_track.visitor print "User Agent", click_track.user_agent print "Referer", click_track.referer print "target_url", click_track.target_url print "source_url_type", click_track.source_url_type print "merchant", click_track.merchant print "coupon", click_track.coupon print "merchant_domain", click_track.merchant_domain print merchant.name, merchant.id except: pass print_stack_trace() except: print_stack_trace() return success()