def test_extract_repeated_field(self):
     sample = {
         'plugins': {'annotations-plugin': {}},
         'url': 'https://stackoverflow.com',
         'original_body': re.sub(
             'data-scrapy-annotate=".*"', '', html_page._body),
         'scrapes': 'default',
         'page_id': '507f520c3bf361f4c5cd55c44307a271bccb2218',
         'version': '0.13.0'
     }
     data = open_spec('so_annotations.json')
     annos, items, results = data['annos'], data['items'], data['results']
     sample['plugins']['annotations-plugin']['extracts'] = annos
     spider = IblSpider('so', make_spider(sample=sample),
                        items, {}, Settings())
     page = HtmlResponse('http://url', body=sample['original_body'],
                         encoding='utf-8')
     items = [i for i in spider.parse(page) if not isinstance(i, Request)]
     keys = {(u'_index', u'_template', u'_type', u'answered', u'tags',
              u'title', 'url')}
     self.assertEqual({tuple(sorted(i.keys())) for i in items}, keys)
     self.assertEqual([items[0], items[52], items[-1]], results)
     self.assertEqual(len(items), 96)
     spider, page, results = open_spider_page_and_results('autoevolution.json')
     items = [i for i in spider.parse(page) if not isinstance(i, Request)]
     self.assertEqual(items, results)
 def test_extract_repeated_field(self):
     sample = {
         'plugins': {'annotations-plugin': {}},
         'url': 'https://stackoverflow.com',
         'original_body': re.sub(
             'data-scrapy-annotate=".*"', '', html_page._body),
         'scrapes': 'default',
         'page_id': '507f520c3bf361f4c5cd55c44307a271bccb2218',
         'version': '0.13.0'
     }
     data = open_spec('so_annotations.json')
     annos, items, results = data['annos'], data['items'], data['results']
     sample['plugins']['annotations-plugin']['extracts'] = annos
     spider = IblSpider('so', make_spider(sample=sample),
                        items, {}, Settings())
     page = HtmlResponse('http://url', body=sample['original_body'],
                         encoding='utf-8')
     items = [i for i in spider.parse(page) if not isinstance(i, Request)]
     keys = {(u'_index', u'_template', u'_type', u'answered', u'tags',
              u'title', 'url')}
     self.assertEqual({tuple(sorted(i.keys())) for i in items}, keys)
     self.assertEqual([items[0], items[52], items[-1]], results)
     self.assertEqual(len(items), 96)
     spider, page, results = open_spider_page_and_results('autoevolution.json')
     items = [i for i in spider.parse(page) if not isinstance(i, Request)]
     self.assertEqual(items, results)
 def test_extract_multiple_item_types(self):
     spider = IblSpider('xceed', xceed_spider, xceed_spider['items'], {},
                        Settings())
     data = list(spider.parse(
         HtmlResponse('http://url',
                      body=xceed_spider['templates'][0]['original_body'],
                      encoding='utf-8')
     ))
     self.assertEqual(data[:6], xceed_spider['results'])
 def test_extract_multiple_item_types(self):
     spider = IblSpider('xceed', xceed_spider, xceed_spider['items'], {},
                        Settings())
     data = list(spider.parse(
         HtmlResponse('http://url',
                      body=xceed_spider['templates'][0]['original_body'],
                      encoding='utf-8')
     ))
     items = [d for d in data if not isinstance(d, Request)]
     self.assertEqual(items, xceed_spider['results'])
 def test_extract_multiple_item_types(self):
     spider = IblSpider('xceed', xceed_spider, xceed_spider['items'], {},
                        Settings())
     data = list(
         spider.parse(
             HtmlResponse(
                 'http://url',
                 body=xceed_spider['templates'][0]['original_body'],
                 encoding='utf-8')))
     self.assertEqual(data[:6], xceed_spider['results'])
 def test_extract_multiple_item_types(self):
     spider = IblSpider('xceed', xceed_spider, xceed_spider['items'], {},
                        Settings())
     data = list(spider.parse(
         HtmlResponse('http://url',
                      body=xceed_spider['templates'][0]['original_body'],
                      encoding='utf-8')
     ))
     items = [d for d in data if not isinstance(d, Request)]
     self.assertEqual(items, xceed_spider['results'])
 def test_extract_multiple_item_types(self):
     spider = IblSpider('xceed', xceed_spider, xceed_spider['items'], {},
                        Settings())
     data = list(spider.parse(
         HtmlResponse('http://url',
                      body=xceed_spider['templates'][0]['original_body'],
                      encoding='utf-8')
     ))
     items = sorted([d for d in data if not isinstance(d, Request)],
                    key=lambda x: ('ticket', 'venue', 'event').index(x['_type']))
     self.assertEqual(items, xceed_spider['results'])
 def test_extract_multiple_item_types(self):
     spider = IblSpider('xceed', xceed_spider, xceed_spider['items'], {},
                        Settings())
     data = list(spider.parse(
         HtmlResponse('http://url',
                      body=xceed_spider['templates'][0]['original_body'],
                      encoding='utf-8')
     ))
     items = sorted([d for d in data if not isinstance(d, Request)],
                    key=lambda x: ('ticket', 'venue', 'event').index(x['_type']))
     self.assertEqual(items, xceed_spider['results'])