def __init__(self):
     AExternalEntitySource.__init__(self, "NYTimesBestSellers", self.TYPES, 512)
     self.base = 'http://www.nytimes.com'
     self.seen = set()
     
     self.details_re = re.compile('.*___by ([^(]*)\. \(([^)]*)\) (.*)')
     self.date_re    = re.compile('.*/(\d\d\d\d)-(\d\d)-(\d\d)/.*')
示例#2
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "TheTVDBCrawler", self.TYPES, 512)
     self.base = 'http://thetvdb.com'
     
     self._thetvdb   = TheTVDB()
     self._id_re     = re.compile('.*&id=([0-9]+).*')
     self._actor_re  = re.compile('.*___([^_]+)___as ([^_]+)___.*')
     self._season_re = re.compile('([0-9]+) - ([0-9]+)')
     self._date_re   = re.compile('([0-9]+)-([0-9]+)-([0-9]+)')
示例#3
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "Yelp", self.TYPES, 512)
     self.base = 'http://www.yelp.com'
     
     self.title_re          = re.compile('[0-9]*\. (.*)')
     self.address_re        = re.compile('([^_]*)___*([^_]*)___*([^_]*)_*')
     self.rating_reviews_re = re.compile('([0-9.]*) .*')
     self.start_re          = re.compile('.*start=([0-9]*).*')
     self.category_re       = re.compile('refine_category_.*')
     self.results_per_page  = 40
     
     self.seen = set()
示例#4
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "BostonMag", self.TYPES, 512)
     self._seen = set()
示例#5
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "LATimes", self.TYPES, 512)
     self._count = {}
     self._seen = set()
示例#6
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "TimeOutLA", self.TYPES, 512)
     self._seen = set()
示例#7
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "SeattleTimes", self.TYPES, 512)
     self.base = 'http://www.seattletimes.com'
     self.seen = set()
     
     self.page_re = re.compile('.*&page=([0-9]*)')
示例#8
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "NYMag", self.TYPES, 512)
示例#9
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "Urbanspoon", self.TYPES, 512)
     self.base = 'http://www.urbanspoon.com'
示例#10
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "AwardAnnals", self.TYPES, 512)
     self.base = 'http://www.awardannals.com'
     self.seen = set()
     
     self.page_re = re.compile('Page 1 of ([0-9]*)')
 def __init__(self):
     AExternalEntitySource.__init__(self, "AmazonBestSellerBookFeeds", self.TYPES, 512)
     self.base = 'http://www.amazon.com'
     self.seen = set()
     self.max_depth = 2
示例#12
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "SFWeekly", self.TYPES, 512)
     self._seen = set()
示例#13
0
 def __init__(self):
     AExternalEntitySource.__init__(self, "Zagat", self.TYPES, 512)
     self.base = 'http://www.zagat.com'