示例#1
0
 def test_url_save_guess_file(self):
     md5, file = url_get_content(URL_DEBIAN_CD_PATH.format('MD5SUMS'),
                                 fake_headers()).splitlines()[0].split()
     Log.d(TAG, 'md5={}, file={}'.format(md5, file))
     self.assertEqual(
         file,
         url_save_guess_file(URL_DEBIAN_CD_PATH.format(file))[0])
示例#2
0
 def test_url_save(self):
     md5, file = url_get_content(URL_DEBIAN_CD_PATH.format('MD5SUMS'),
                                 fake_headers()).splitlines()[0].split()
     Log.d(TAG, 'md5={}, file={}'.format(md5, file))
     file_actual, size = url_save(
         URL_DEBIAN_CD_PATH.format(file),
         reporthook=lambda a, b: Log.d(
             TAG, '{:>5}% downloaded'.format(round(a * 100 / b, 1))))
     Log.d(TAG, 'file size: {} MiB'.format(round(size / 1024 / 1024, 1)))
     md5_actual = hashlib.md5()
     with open(file_actual, 'rb') as f:
         buffer = f.read(512 * 1024)
         while buffer:
             md5_actual.update(buffer)
             buffer = f.read(512 * 1024)
     self.assertEqual(md5, md5_actual.hexdigest())
示例#3
0
 def get_card(self, word: str) -> Tuple[str, List[str]]:
     Log.d(TAG, 'querying "{}"'.format(word))
     response = urlopen_with_retry(
         URL_QUERY.format(urllib.parse.quote(word.replace('/', ' '))),
         fake_headers())
     actual = urllib.parse.urlsplit(response.geturl()).path.rsplit('/',
                                                                   1)[-1]
     actual = ' '.join(actual.split('-'))
     if not actual:
         raise WordNotFoundError('can\'t find: "{}"'.format(word))
     if actual != ' '.join(
             word.replace('/', ' ').replace('-', ' ').replace(
                 '\'', ' ').lower().split()):
         Log.i(TAG, 'redirected "{}" to: "{}"'.format(word, actual))
     content = url_get_content(response, fake_headers())
     fields = self._extract_fields(content)
     Log.d(TAG, 'parsed: "{}"'.format(actual))
     return actual, fields
示例#4
0
 def test_url_get_content(self):
     Log.d(
         TAG,
         url_get_content(URL_DEBIAN_CD_PATH.format('MD5SUMS'),
                         fake_headers()))
示例#5
0
 def test_urlopen_with_retry(self):
     url = URL_CAMBRIDGE_QUERY.format(urllib.parse.quote('cater to'))
     with urlopen_with_retry(url, fake_headers()) as response:
         Log.d(TAG, 'headers={}'.format(response.headers))
         Log.d(TAG, 'status={}, url={}'.format(response.status,
                                               response.url))
示例#6
0
 def collapse2(h):
     m = parse_tag.match(h)
     Log.d(TAG, '{}\n{}\n{}'.format(m.group(1), m.group(2), m.group(3)))
     return m.group(1) + HTML_COLLAPSE.format(m.group(2)) + m.group(3)
示例#7
0
 def test_removeall(self):
     Log.d(TAG, htmls.removeall(self.HTML, 'a'))
示例#8
0
    def test_sub(self):
        def rm_tag(s):
            return re.sub(r'<[\s\S]*?>([\s\S]*)<[\s\S]*>', r'\g<1>', s)

        Log.d(TAG,
              htmls.sub(self.HTML, rm_tag, 'a', 'href="http://example.com/"'))
示例#9
0
 def test_findall(self):
     for e in htmls.findall(self.HTML, 'a'):
         Log.d(TAG, e)
示例#10
0
 def test_find_positions(self):
     for i, j in htmls.find_positions(self.HTML, 'a',
                                      'href="http://example.org/"'):
         Log.d(TAG, 'i={}, j={}'.format(i, j))