def extractor(self,line): referer_domain=line.http_referer parsed_domain=Domain_Parser.parse(referer_domain) if not parsed_domain: # print referer_domain domain='__NOT_RECOGNIZED' else: domain=parsed_domain.SLD#.split('.')[0] self.data.setdefault(domain,0) self.data[domain]+=1
def extractor(self, line): try: parsed_domain=Domain_Parser.parse(line.url) except: return if not parsed_domain: return sld=parsed_domain.SLD self.data.setdefault(sld,{'count':0,'second_level_domain':[]}) self.data[sld]['count']+=int(line.count) self.data[sld]['second_level_domain'].append(parsed_domain.original_url)