def start_requests(self): missions = session.getMission(sum_mark = 'dx', child_mark = 'dx') for m in missions: try: meta = {'mission': m, 'spider': self.name} yield Request(url = m[0], meta = meta, callback = self.__getattribute__('parse_%s' % urltools.get_domain(m[0]))) except: continue
def start_requests(self): missions = session.getMission(sum_mark = 'ghb',child_mark = 'gp') for m in missions: try: meta = {'spider': self.name, 'domain': urltools.get_domain(m[8]), 'mission': m} yield Request(url = 'http://hq.sinajs.cn/list=' + m[8], meta = meta, callback = self.parse_sinajs) except: continue
def start_requests(self): missions = session.getMission(sum_mark = 'wb') for m in missions: meta = {'mission': m} if 'weibo.com' in m[0]: parse_back = self.parse_sina else: parse_back = self.parse_qq yield Request(url = m[0], meta = meta, callback = parse_back)
def start_requests(self): missions = session.getMission(sum_mark = 'xw', child_mark = 'ly') for m in missions: try: if urltools.get_domain(m[0]) == 'aoyou': meta = {'mission': m, 'spider': self.name, 'domain': urltools.get_domain(m[0]), 'simulate': True} else: meta = {'mission': m, 'spider': self.name, 'domain': urltools.get_domain(m[0])} yield Request(url = m[0], meta = meta, callback = self.__getattribute__('parse_%s' % meta['domain'])) except: continue
def start_requests(self): missions = session.getMission(sum_mark = 'xw', child_mark = 'ss') for m in missions: try: meta = {'spider': self.name, 'domain': urltools.get_domain(m[0]), 'mission': m} if 'chinawatch-clock' in meta['domain']: meta['domain'] = "chinawatch_clock" yield Request(url = m[0], meta = meta, callback = self.__getattribute__('parse_%s' % meta['domain'])) except: continue