Пример #1
0
    def post(self):
        query = PageView.all()
        query.order('-created_at')
        cursor = self.request.get('cursor')
        if cursor != None:
            query.with_cursor(cursor)

        filter_out_params = {}
        for param in str(self.request.get('filtered_params')).split(','):
            filter_out_params[param.strip()] = True

        page_views = query.fetch(100)

        for page_view in page_views:            
            parsed = urlparse(page_view.url)
            params = cgi.parse_qs(parsed.query)

            new_url = parsed.scheme + "://" + parsed.netloc + parsed.path
            new_param_string = ''

            for key in sorted(params.keys()):
              if key not in filter_out_params or filter_out_params[key] == False:
                for val in params[key]:
                  if new_param_string != '':
                    new_param_string += '&'
                  new_param_string += key + "=" + val

            if len(new_param_string) == 0:
              page_view.normalized_url = new_url
            else:
              page_view.normalized_url = new_url + "?" + new_param_string

            page_view.put()
        
        self.response.out.write(simplejson.dumps({'status': 'ok', 'cursor': str(query.cursor()), 'count': len(page_views) }))
Пример #2
0
    def get(self):
        search_term = self.request.get('q')
        query = PageView.all()
        query.order('-normalized_url')
        query.filter('normalized_url >=',search_term)
        query.filter('normalized_url <', search_term+"\ufffd")
        page_views = query.fetch(1000)
        
        unique_url_hash = {} 
        for page_view in page_views: 
            unique_url_hash[page_view.normalized_url] = 1
        unique_urls = unique_url_hash.keys()
        unique_urls.sort()

        self.response.out.write("\n".join(unique_urls))
Пример #3
0
 def post(self):
     url = self.request.get('url')
     query = PageView.all()
     query.order('session_id')
     query.filter('normalized_url =', url)
     cursor = self.request.get('cursor')
     if cursor != None:
         query.with_cursor(cursor)
     page_views = query.fetch(1000)
     
     page_views_by_session_id = {}
     for page_view in page_views: 
         if page_view.session_id in page_views_by_session_id:
             page_views_by_session_id[page_view.session_id] += 1
         else:
             page_views_by_session_id[page_view.session_id] = 1
     
     self.response.out.write(simplejson.dumps({'status': 'ok', 'results': page_views_by_session_id, 'cursor': str(query.cursor()), 'total_pageviews': len(page_views), 'total_sessions': len(page_views_by_session_id.keys()) }))
Пример #4
0
 def post(self):
     mode = self.request.get('mode')
     if mode == 'entrances':
         mode_session_order_add_amount = -1
     else:
         mode_session_order_add_amount =  1
     url = self.request.get('url')
     query = PageView.all()
     query.order('session_id')
     query.filter('normalized_url =', url)
     cursor = self.request.get('cursor')
     if cursor != None:
         query.with_cursor(cursor)
     page_views = query.fetch(1000)
     
     entrances_hash = {}
     for page_view in page_views:
         if mode == 'entrances' and page_view.session_order == 0:
             if '[direct]' in entrances_hash:
                 entrances_hash['[direct]'] += 1
             else:
                 entrances_hash['[direct]'] = 1
         
         else:
             q_s = "SELECT * FROM PageView WHERE session_id = :1 AND session_order = :2"
             entrance_page_view = db.GqlQuery(q_s, page_view.session_id, page_view.session_order + mode_session_order_add_amount).get()
             if entrance_page_view:
                 if entrance_page_view.normalized_url in entrances_hash:
                     entrances_hash[entrance_page_view.normalized_url] += 1
                 else:
                     entrances_hash[entrance_page_view.normalized_url] = 1
             elif mode == 'exits':
                 if '[leave-site]' in entrances_hash:
                     entrances_hash['[leave-site]'] += 1
                 else:
                     entrances_hash['[leave-site]'] = 1
                     
     
     self.response.out.write(simplejson.dumps({'status': 'ok', 'mode': mode, 'cursor': str(query.cursor()), 'results': entrances_hash, 'total_entrances': len(entrances_hash.keys()), 'total_entrance_pageviews': sum(entrances_hash.values()) }))
Пример #5
0
    def post(self):
        session_ids = None
        query = PageView.all()
        query.order('-created_at')
        query.filter('session_order =', int(self.request.get('session_order')))
        if self.request.get('session_ids') != '':
            query.filter('session_id IN', simplejson.loads(self.request.get('session_ids')))
        
        cursor = self.request.get('cursor')
        if cursor != None:
            query.with_cursor(cursor)

        page_views = query.fetch(1000)
        page_views.sort(lambda a,b: cmp(a.normalized_url, b.normalized_url))
        collapsed_page_views = []
        current_url = None
        current_count = 0
        current_session_ids = []
        
        for page_view in page_views:
            if current_url == page_view.normalized_url:
                current_count += 1
                current_session_ids.append(page_view.session_id)
            else:
                if current_count > 0:
                    collapsed_page_views.append({'url': current_url, 'count': current_count, 'session_ids': current_session_ids  })
                current_url = page_view.normalized_url
                current_count = 1
                current_session_ids = [page_view.session_id]
        
        if current_count > 0:
            collapsed_page_views.append({'url': current_url, 'count': current_count, 'session_ids': current_session_ids })
        current_url = None
        current_count = 0
        
        self.response.out.write(simplejson.dumps({'status': 'ok', 'cursor': str(query.cursor()), 'results': collapsed_page_views, 'total_pageviews_in_query': len(page_views) }))