def test_url_save_guess_file(self): md5, file = url_get_content(URL_DEBIAN_CD_PATH.format('MD5SUMS'), fake_headers()).splitlines()[0].split() Log.d(TAG, 'md5={}, file={}'.format(md5, file)) self.assertEqual( file, url_save_guess_file(URL_DEBIAN_CD_PATH.format(file))[0])
def test_url_save(self): md5, file = url_get_content(URL_DEBIAN_CD_PATH.format('MD5SUMS'), fake_headers()).splitlines()[0].split() Log.d(TAG, 'md5={}, file={}'.format(md5, file)) file_actual, size = url_save( URL_DEBIAN_CD_PATH.format(file), reporthook=lambda a, b: Log.d( TAG, '{:>5}% downloaded'.format(round(a * 100 / b, 1)))) Log.d(TAG, 'file size: {} MiB'.format(round(size / 1024 / 1024, 1))) md5_actual = hashlib.md5() with open(file_actual, 'rb') as f: buffer = f.read(512 * 1024) while buffer: md5_actual.update(buffer) buffer = f.read(512 * 1024) self.assertEqual(md5, md5_actual.hexdigest())
def get_card(self, word: str) -> Tuple[str, List[str]]: Log.d(TAG, 'querying "{}"'.format(word)) response = urlopen_with_retry( URL_QUERY.format(urllib.parse.quote(word.replace('/', ' '))), fake_headers()) actual = urllib.parse.urlsplit(response.geturl()).path.rsplit('/', 1)[-1] actual = ' '.join(actual.split('-')) if not actual: raise WordNotFoundError('can\'t find: "{}"'.format(word)) if actual != ' '.join( word.replace('/', ' ').replace('-', ' ').replace( '\'', ' ').lower().split()): Log.i(TAG, 'redirected "{}" to: "{}"'.format(word, actual)) content = url_get_content(response, fake_headers()) fields = self._extract_fields(content) Log.d(TAG, 'parsed: "{}"'.format(actual)) return actual, fields
def test_url_get_content(self): Log.d( TAG, url_get_content(URL_DEBIAN_CD_PATH.format('MD5SUMS'), fake_headers()))
def test_urlopen_with_retry(self): url = URL_CAMBRIDGE_QUERY.format(urllib.parse.quote('cater to')) with urlopen_with_retry(url, fake_headers()) as response: Log.d(TAG, 'headers={}'.format(response.headers)) Log.d(TAG, 'status={}, url={}'.format(response.status, response.url))
def collapse2(h): m = parse_tag.match(h) Log.d(TAG, '{}\n{}\n{}'.format(m.group(1), m.group(2), m.group(3))) return m.group(1) + HTML_COLLAPSE.format(m.group(2)) + m.group(3)
def test_removeall(self): Log.d(TAG, htmls.removeall(self.HTML, 'a'))
def test_sub(self): def rm_tag(s): return re.sub(r'<[\s\S]*?>([\s\S]*)<[\s\S]*>', r'\g<1>', s) Log.d(TAG, htmls.sub(self.HTML, rm_tag, 'a', 'href="http://example.com/"'))
def test_findall(self): for e in htmls.findall(self.HTML, 'a'): Log.d(TAG, e)
def test_find_positions(self): for i, j in htmls.find_positions(self.HTML, 'a', 'href="http://example.org/"'): Log.d(TAG, 'i={}, j={}'.format(i, j))