示例#1
0
文件: parse.py 项目: serkanh/scrapy
    def run(self, args, opts):
        if not args:
            print "An URL is required"
            return

        for response in fetch(args):
            spider = spiders.fromurl(response.url)
            if not spider:
                log.msg('Cannot find spider for "%s"' % response.url)
                continue

            if self.callbacks:
                for callback in self.callbacks:
                    items, links = self.run_callback(spider, response, callback, args, opts)
                    self.print_results(items, links, callback, opts)

            elif opts.rules:
                rules = getattr(spider, "rules", None)
                if rules:
                    items, links = [], []
                    for rule in rules:
                        if rule.callback and rule.link_extractor.matches(response.url):
                            items, links = self.run_callback(spider, response, rule.callback, args, opts)
                            self.print_results(items, links, rule.callback, opts)
                            break
                else:
                    log.msg(
                        'No rules found for spider "%s", please specify a callback for parsing' % spider.domain_name
                    )
                    continue

            else:
                items, links = self.run_callback(spider, response, "parse", args, opts)
                self.print_results(items, links, "parse", opts)
示例#2
0
文件: fetch.py 项目: serkanh/scrapy
    def run(self, args, opts):
        if len(args) != 1:
            print "One URL is required"
            return

        responses = fetch(args)
        if responses:
            if opts.headers:
                pprint.pprint(responses[0].headers)
            else:
                print responses[0].body