Exemplo n.º 1
0
 def start_requests(self):
     missions = session.getMission(sum_mark = 'dx', child_mark = 'dx')
     for m in missions:
         try:
             meta = {'mission': m, 'spider': self.name}
             yield Request(url = m[0],
                           meta = meta,
                           callback = self.__getattribute__('parse_%s' % urltools.get_domain(m[0])))
         except:
             continue
Exemplo n.º 2
0
 def start_requests(self):
     missions = session.getMission(sum_mark = 'ghb',child_mark = 'gp')
     for m in missions:
         try:
             meta = {'spider': self.name,
                     'domain': urltools.get_domain(m[8]),
                     'mission': m}
             yield Request(url = 'http://hq.sinajs.cn/list=' + m[8],
                           meta = meta,
                           callback = self.parse_sinajs)
         except:
             continue
Exemplo n.º 3
0
    def start_requests(self):
        missions = session.getMission(sum_mark = 'wb')
        for m in missions:
            meta = {'mission': m}
            if 'weibo.com' in m[0]:
                parse_back = self.parse_sina
            else:
                parse_back = self.parse_qq

            yield Request(url = m[0],
                          meta = meta,
                          callback = parse_back)
Exemplo n.º 4
0
 def start_requests(self):
     missions = session.getMission(sum_mark = 'xw', child_mark = 'ly')
     for m in missions:
         try:
             if urltools.get_domain(m[0]) == 'aoyou':
                 meta = {'mission': m, 'spider': self.name, 'domain': urltools.get_domain(m[0]), 'simulate': True}
             else:
                 meta = {'mission': m, 'spider': self.name, 'domain': urltools.get_domain(m[0])}
             yield Request(url = m[0],
                           meta = meta,
                           callback = self.__getattribute__('parse_%s' % meta['domain']))
         except:
             continue
Exemplo n.º 5
0
 def start_requests(self):
     missions = session.getMission(sum_mark = 'xw', child_mark = 'ss')
     for m in missions:
         try:
             meta = {'spider': self.name,
                     'domain': urltools.get_domain(m[0]),
                     'mission': m}
             if 'chinawatch-clock' in meta['domain']:
                 meta['domain'] = "chinawatch_clock"
             yield Request(url = m[0],
                           meta = meta,
                           callback = self.__getattribute__('parse_%s' % meta['domain']))
         except:
             continue