Exemplo n.º 1
0
 def test_site_samples(self):
     """test parse_html from real cases"""
     samples = []
     for line in GzipFile(os.path.join(path, SAMPLES_FILE), "r").readlines():
         samples.append(json.loads(line, object_hook=_decode_element))
     for sample in samples:
         self._test_sample(sample)
Exemplo n.º 2
0
 def run(self, args, opts):
     try:
         import setuptools
     except ImportError:
         raise UsageError("setuptools not installed")
     if opts.list_targets:
         for name, target in _get_targets().items():
             print "%-20s %s" % (name, target['url'])
         return
     if opts.list_projects:
         target = _get_target(opts.list_projects)
         req = urllib2.Request(_url(target, 'listprojects.json'))
         _add_auth_header(req, target)
         f = urllib2.urlopen(req)
         projects = json.loads(f.read())['projects']
         print os.linesep.join(projects)
         return
     target_name = _get_target_name(args)
     target = _get_target(target_name)
     project = _get_project(target, opts)
     version = _get_version(target, opts)
     tmpdir = None
     if opts.egg:
         _log("Using egg: %s" % opts.egg)
         egg = open(opts.egg, 'rb')
     else:
         _log("Building egg of %s-%s" % (project, version))
         egg, tmpdir = _build_egg()
     _upload_egg(target, egg, project, version)
     egg.close()
     if tmpdir:
         shutil.rmtree(tmpdir)
Exemplo n.º 3
0
 def _list(self):
     msgs = []
     m = self.queue.read(visibility_timeout=100)
     while m:
         msgs.append(json.loads(m.get_body()))
         m = self.queue.read(visibility_timeout=100)
     return msgs
 def test_two_items(self):
     self.ie.start_exporting()
     self.ie.export_item(self.i)
     self.ie.export_item(self.i)
     self.ie.finish_exporting()
     exported = json.loads(self.output.getvalue())
     self.assertEqual(exported, [dict(self.i), dict(self.i)])
Exemplo n.º 5
0
 def test_jsonrpc_client_call_request(self):
     ul = urllib_mock(1)
     jsonrpc_client_call('url', 'test', 'one', 2, _urllib=ul)
     req = json.loads(ul.request)
     assert 'id' in req
     self.assertEqual(ul.url, 'url')
     self.assertEqual(req['jsonrpc'], '2.0')
     self.assertEqual(req['method'], 'test')
     self.assertEqual(req['params'], ['one', 2])
Exemplo n.º 6
0
 def _append_next_from_sqs(self):
     q = self._get_sqs_queue()
     msgs = q.get_messages(1, visibility_timeout=self.visibility_timeout)
     if msgs:
         msg = msgs[0]
         msg.delete()
         spargs = json.loads(msg.get_body())
         spname = spargs.pop('name')
         self.append_spider_name(spname, **spargs)
Exemplo n.º 7
0
 def result(self):
     res = json.loads(self.body)
     if 'result' in res:
         return res['result']
     elif 'error' in res:
         er = res['error']
         raise JsonRpcError(er['code'], er['message'], er['data'])
     else:
         msg = "JSON-RPC response must contain 'result' or 'error': %s" % res
         raise ValueError(msg)
Exemplo n.º 8
0
 def test_site_samples(self):
     """test parse_html from real cases"""
     count = 0
     fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
     while os.path.exists(fname):
         source = str_to_unicode(open("%s_%d.html" % (SAMPLES_FILE_PREFIX, count), "rb").read())
         parsed = json.loads(str_to_unicode(open(fname, "rb").read()),\
                 object_hook=_decode_element)
         self._test_sample(source, parsed, count)
         count += 1
         fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
Exemplo n.º 9
0
def jsonrpc_client_call(url, method, *args, **kwargs):
    """Execute a JSON-RPC call on the given url"""
    _urllib = kwargs.pop('_urllib', urllib)
    req = {'jsonrpc': '2.0', 'method': method, 'params': args or kwargs, 'id': 1}
    res = json.loads(_urllib.urlopen(url, json.dumps(req)).read())
    if 'result' in res:
        return res['result']
    elif 'error' in res:
        er = res['error']
        raise JsonRpcError(er['code'], er['message'], er['data'])
    else:
        msg = "JSON-RPC response must contain 'result' or 'error': %s" % res
        raise ValueError(msg)
Exemplo n.º 10
0
def add_sample(source):
    """
    Method for adding samples to test samples file
    (use from console)
    """
    samples = []
    if os.path.exists(SAMPLES_FILE):
        for line in GzipFile(os.path.join(path, SAMPLES_FILE), "r").readlines():
            samples.append(json.loads(line))
    
    new_sample = {"source": source}
    new_sample["parsed"] = list(parse_html(source))
    samples.append(new_sample)
    samples_file = GzipFile(os.path.join(path, SAMPLES_FILE), "wb")
    for sample in samples:
        samples_file.write(json.dumps(sample, default=_encode_element) + "\n")
    samples_file.close()
Exemplo n.º 11
0
 def test_site_pages(self):
     """
     Tests from real pages. More reliable and easy to build for more complicated structures
     """
     samples_file = open(os.path.join(path, "samples_pageparsing.json.gz"), "r")
     samples = []
     for line in GzipFile(fileobj=StringIO(samples_file.read())).readlines():
         samples.append(json.loads(line))
     for sample in samples:
         source = sample["annotated"]
         annotations = sample["annotations"]
         template = HtmlPage(body=str_to_unicode(source))
         parser = TemplatePageParser(TokenDict())
         parser.feed(template)
         for annotation in parser.annotations:
             test_annotation = annotations.pop(0)
             for s in annotation.__slots__:
                 if s == "tag_attributes":
                     for pair in getattr(annotation, s):
                         self.assertEqual(list(pair), test_annotation[s].pop(0))
                 else:
                     self.assertEqual(getattr(annotation, s), test_annotation[s])
         self.assertEqual(annotations, [])
Exemplo n.º 12
0
 def test_site_pages(self):
     """
     Tests from real pages. More reliable and easy to build for more complicated structures
     """
     SAMPLES_FILE_PREFIX = os.path.join(path, "samples/samples_pageparsing")
     count = 0
     fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
     while os.path.exists(fname):
         source = str_to_unicode(open("%s_%d.html" % (SAMPLES_FILE_PREFIX, count), "rb").read())
         annotations = json.loads(str_to_unicode(open(fname, "rb").read()))
         template = HtmlPage(body=source)
         parser = TemplatePageParser(TokenDict())
         parser.feed(template)
         for annotation in parser.annotations:
             test_annotation = annotations.pop(0)
             for s in annotation.__slots__:
                 if s == "tag_attributes":
                     for pair in getattr(annotation, s):
                         self.assertEqual(list(pair), test_annotation[s].pop(0))
                 else:
                     self.assertEqual(getattr(annotation, s), test_annotation[s])
         self.assertEqual(annotations, [])
         count += 1
         fname = "%s_%d.json" % (SAMPLES_FILE_PREFIX, count)
Exemplo n.º 13
0
 def decode(self, text):
     return json.loads(text)
Exemplo n.º 14
0
def json_get(opts, path):
    url = get_wsurl(opts, path)
    return json.loads(urllib.urlopen(url).read())
Exemplo n.º 15
0
 def _read_template_annotation(html_tag):
     template_attr = html_tag.attributes.get('data-scrapy-annotate')
     if template_attr is None:
         return None
     unescaped = template_attr.replace('"', '"')
     return json.loads(unescaped)
Exemplo n.º 16
0
 def decode(self, text):
     return json.loads(text)
Exemplo n.º 17
0
 def _check_output(self):
     exported = json.loads(self.output.getvalue().strip())
     self.assertEqual(exported, [dict(self.i)])
Exemplo n.º 18
0
 def _read_template_annotation(html_tag):
     template_attr = html_tag.attributes.get('data-scrapy-annotate')
     if template_attr is None:
         return None
     unescaped = template_attr.replace('"', '"')
     return json.loads(unescaped)
Exemplo n.º 19
0
 def _check_output(self):
     exported = json.loads(self.output.getvalue().strip())
     self.assertEqual(exported, [dict(self.i)])
Exemplo n.º 20
0
def json_get(opts, path):
    url = get_wsurl(opts, path)
    return json.loads(urllib.urlopen(url).read())
Exemplo n.º 21
0
 def _pop(self):
     msgs = self.queue.get_messages(1, visibility_timeout=self.visibility_timeout)
     if msgs:
         msg = msgs[0]
         msg.delete()
         return json.loads(msg.get_body())