def extractor(self, line):
     info=TongjicnzzRequestParser.parse(line.request)
     if not info: return
     if info.get('userid') and 'aoyou'in info['userid']:
         parsed_referer=Domain_Parser.parse(line.http_referer)
         if parsed_referer:
             self.data['aoyou']['pv']+=1
             self.data['aoyou']['uv'].add(info['userid'])
             if info.get('refer') == None:
                 return
             elif info.get('refer') == '':
                 self.data['aoyou']['referer']['direct_or_other'] += 1
             else:
                 # print info['refer']
                 referer = Domain_Parser.parse(unquote(info['refer'].replace('-', '%')).strip())
                 print referer
                 if referer:
                     self.data['aoyou']['referer'].setdefault(referer.SLD, 0)
                     self.data['aoyou']['referer'][referer.SLD] += 1
                 else:
                     self.data['aoyou']['referer']['direct_or_other'] += 1
示例#2
0
 def extractor(self, line):
     url = line.http_referer
     parsed_url = Domain_Parser.parse(url)
     if not parsed_url: return
     sld = parsed_url.SLD
     for site_url in self.criteria.keys():
         if sld in site_url:
             flag = False
             for match_pattern in self.criteria[site_url]['match_patterns']:
                 match=match_pattern.search(url)
                 if match:
                     url=match.group(0)
                     flag = True
                     break
             if flag:
                 for sub_pattern in self.criteria[site_url]['sub_patterns']:
                     url = sub_pattern.sub('', url)
                 self.data.setdefault(url,dict(site_id=self.criteria[site_url]['site_id'],count=0))
                 self.data[url]['count']+=1
    def extractor(self, line):
        info=TongjicnzzRequestParser.parse(line.request)
        if not info:return
        kind=info['kind']
        site = ''
        if kind=='keywords_log':
            try:
                try:keyword=unquote(info['keyword'].encode('utf-8')).decode('utf-8')
                except UnicodeDecodeError:keyword=unquote(info['keyword'].encode('utf-8')).decode('gbk')
            # print keyword
            except UnicodeDecodeError:return
            if keyword and info.get('site'):
                site = info['site']
                if site=='taobao':
                    self.data['tmall']['keyword'].setdefault(keyword,0)
                    self.data['tmall']['keyword'][keyword]+=1
                if site in self.sites:
                    self.data[site]['keyword'].setdefault(keyword,0)
                    self.data[site]['keyword'][keyword]+=1
        elif kind=='visit':
            if not info.get('action') or len(info['action'])<2:return
            # action,site=info['action'][0],info['action'][1]
            for i in info['action']:
                if i in self.sites:
                    site=i
                    break
            if site:
                ip=line.remote_addr
                location=Ip_Locator.locate(ip)
                location=location[0] if location else ''
                action=info['action'][0]
                userid=info.get('userid')
                if userid:
                    self.data[site]['uv'].add(userid)
                    if '北京'in location:
                        self.data[site]['beijing_uv'].add(userid)
                if action=='page_view':
                    self.data[site]['pv']+=1
                    if '北京'in location:
                        self.data[site]['beijing_pv']+=1
                elif action=='category':
                    self.data[site]['category'].setdefault(info['action'][2],0)
                    self.data[site]['category'][info['action'][2]]+=1
                    if '北京'in location:
                        self.data[site]['beijing_category'].setdefault(info['action'][2], dict(url=line.http_referer,count=0))
                        self.data[site]['beijing_category'][info['action'][2]]['count'] += 1
                elif action=='product_page_visit':
                    self.data[site]['product_page']+=1
                elif action=='click' and info['action'][1]=='collection_click':
                    self.data[site]['collection']+=1
                elif action=='productaddcart_success_page' or action=='otheraddcart_success_page':
                    self.data[site]['addcart']+=1
                elif action=='productcart_page_visit' or action=='othercart_page_visit':
                    self.data[site]['cart']+=1
                elif action=='productorderwrite_page_visit' or action=='otherorderwrite_page_visit':
                    self.data[site]['orderwrite']+=1
                elif action=='productordersuccess_page_visit' or action=='otherordersuccess_page_visit':
                    self.data[site]['ordersuccess']+=1
                elif action=='productvip_page_visit' or action=='othervip_page_visit':
                    self.data[site]['vip_pv']+=1
                    self.data[site]['vip_uv'].add(info['userid'])
        if site :
            if info.get('refer')==None:
                return
            elif info.get('refer')=='':
                self.data[site]['referer'].setdefault('direct_or_other',0)
                self.data[site]['referer']['direct_or_other']+=1
            else:
                # print info['refer']
                referer=Domain_Parser.parse(unquote(info['refer'].replace('-','%')).strip())

                if referer:
                    if site=='360buy' and referer.original_url=='u.gwdang.com':return
                    self.data[site]['referer'].setdefault(referer.SLD,0)
                    self.data[site]['referer'][referer.SLD]+=1
                else:
                    self.data[site]['referer'].setdefault('direct_or_other', 0)
                    self.data[site]['referer']['direct_or_other'] += 1