def __init__(self): AExternalEntitySource.__init__(self, "NYTimesBestSellers", self.TYPES, 512) self.base = 'http://www.nytimes.com' self.seen = set() self.details_re = re.compile('.*___by ([^(]*)\. \(([^)]*)\) (.*)') self.date_re = re.compile('.*/(\d\d\d\d)-(\d\d)-(\d\d)/.*')
def __init__(self): AExternalEntitySource.__init__(self, "TheTVDBCrawler", self.TYPES, 512) self.base = 'http://thetvdb.com' self._thetvdb = TheTVDB() self._id_re = re.compile('.*&id=([0-9]+).*') self._actor_re = re.compile('.*___([^_]+)___as ([^_]+)___.*') self._season_re = re.compile('([0-9]+) - ([0-9]+)') self._date_re = re.compile('([0-9]+)-([0-9]+)-([0-9]+)')
def __init__(self): AExternalEntitySource.__init__(self, "Yelp", self.TYPES, 512) self.base = 'http://www.yelp.com' self.title_re = re.compile('[0-9]*\. (.*)') self.address_re = re.compile('([^_]*)___*([^_]*)___*([^_]*)_*') self.rating_reviews_re = re.compile('([0-9.]*) .*') self.start_re = re.compile('.*start=([0-9]*).*') self.category_re = re.compile('refine_category_.*') self.results_per_page = 40 self.seen = set()
def __init__(self): AExternalEntitySource.__init__(self, "BostonMag", self.TYPES, 512) self._seen = set()
def __init__(self): AExternalEntitySource.__init__(self, "LATimes", self.TYPES, 512) self._count = {} self._seen = set()
def __init__(self): AExternalEntitySource.__init__(self, "TimeOutLA", self.TYPES, 512) self._seen = set()
def __init__(self): AExternalEntitySource.__init__(self, "SeattleTimes", self.TYPES, 512) self.base = 'http://www.seattletimes.com' self.seen = set() self.page_re = re.compile('.*&page=([0-9]*)')
def __init__(self): AExternalEntitySource.__init__(self, "NYMag", self.TYPES, 512)
def __init__(self): AExternalEntitySource.__init__(self, "Urbanspoon", self.TYPES, 512) self.base = 'http://www.urbanspoon.com'
def __init__(self): AExternalEntitySource.__init__(self, "AwardAnnals", self.TYPES, 512) self.base = 'http://www.awardannals.com' self.seen = set() self.page_re = re.compile('Page 1 of ([0-9]*)')
def __init__(self): AExternalEntitySource.__init__(self, "AmazonBestSellerBookFeeds", self.TYPES, 512) self.base = 'http://www.amazon.com' self.seen = set() self.max_depth = 2
def __init__(self): AExternalEntitySource.__init__(self, "SFWeekly", self.TYPES, 512) self._seen = set()
def __init__(self): AExternalEntitySource.__init__(self, "Zagat", self.TYPES, 512) self.base = 'http://www.zagat.com'