def load_etext(etextno, refresh_cache=False, mirror=None, prefer_ascii=False): """Returns a unicode representation of the full body of a Project Gutenberg text. After making an initial remote call to Project Gutenberg's servers, the text is persisted locally. """ etextno = validate_etextno(etextno) cached = os.path.join(_TEXT_CACHE, '{}.txt.gz'.format(etextno)) if refresh_cache: remove(cached) if not os.path.exists(cached): makedirs(os.path.dirname(cached)) download_uri = _format_download_uri(etextno, mirror, prefer_ascii) response = requests.get(download_uri) # Ensure proper UTF-8 saving. There might be instances of ebooks or # mirrors which advertise a broken encoding, and this will break # downstream usages. For example, #55517 from aleph.gutenberg.org: # # from gutenberg.acquire import load_etext # print(load_etext(55517, refresh_cache=True)[0:1000]) # # response.encoding will be 'ISO-8859-1' while the file is UTF-8 if response.encoding != response.apparent_encoding: response.encoding = response.apparent_encoding text = response.text with closing(gzip.open(cached, 'w')) as cache: cache.write(text.encode('utf-8')) with closing(gzip.open(cached, 'r')) as cache: text = cache.read().decode('utf-8') return text
def load_etext(etextno, refresh_cache=False, mirror=None, prefer_ascii=False): """Returns a unicode representation of the full body of a Project Gutenberg text. After making an initial remote call to Project Gutenberg's servers, the text is persisted locally. """ etextno = validate_etextno(etextno) cached = os.path.join(_TEXT_CACHE, '{}.txt.gz'.format(etextno)) if refresh_cache: remove(cached) if not os.path.exists(cached): makedirs(os.path.dirname(cached)) download_uri = _format_download_uri(etextno, mirror, prefer_ascii) response = requests.get(download_uri) # Ensure proper UTF-8 saving. There might be instances of ebooks or # mirrors which advertise a broken encoding, and this will break # downstream usages. For example, #55517 from aleph.gutenberg.org: # # from gutenberg.acquire import load_etext # print(load_etext(55517, refresh_cache=True)[0:1000]) # # response.encoding will be 'ISO-8859-1' while the file is UTF-8 if response.encoding != response.apparent_encoding: response.encoding = response.apparent_encoding text = response.text with closing(gzip.open(cached, 'w')) as cache: cache.write(text.encode('utf-8')) with closing(gzip.open(cached, 'r')) as cache: text = cache.read().decode('utf-8') return text # def _main(): # """Command line interface to the module. # """ # from argparse import ArgumentParser, FileType # from gutenberg import Error # from gutenberg._util.os import reopen_encoded # parser = ArgumentParser(description='Download a Project Gutenberg text') # parser.add_argument('etextno', type=int) # parser.add_argument('outfile', type=FileType('w')) # parser.add_argument('--mirror', '-m', type=str, default=None) # parser.add_argument('--prefer-ascii', '-a', type=bool, default=False) # args = parser.parse_args() # try: # text = load_etext(args.etextno, # mirror=args.mirror, # prefer_ascii=args.prefer_ascii) # with reopen_encoded(args.outfile, 'w', 'utf8') as outfile: # outfile.write(text) # except Error as error: # parser.error(str(error)) # if __name__ == '__main__': # _main()
def _populate_setup(self): """Just create a local marker file since the actual database should already be created on the Fuseki server. """ makedirs(os.path.dirname(self._cache_marker)) with codecs.open(self._cache_marker, 'w', encoding='utf-8') as fobj: fobj.write(self.cache_uri) self.graph.open(self.cache_uri)
def _open_or_create_metadata_graph(): """Connects to the persistent RDF graph (creating the graph if necessary). """ global _METADATA_DATABASE_SINGLETON _METADATA_DATABASE_SINGLETON = _create_metadata_graph() if not os.path.exists(_METADATA_CACHE): makedirs(_METADATA_CACHE) _populate_metadata_graph(_METADATA_DATABASE_SINGLETON) _METADATA_DATABASE_SINGLETON.open(_METADATA_CACHE, create=False) return _add_namespaces(_METADATA_DATABASE_SINGLETON)
def load_etext(etextno, refresh_cache=False): """Returns a unicode representation of the full body of a Project Gutenberg text. After making an initial remote call to Project Gutenberg's servers, the text is persisted locally. """ etextno = validate_etextno(etextno) cached = os.path.join(_TEXT_CACHE, '{0}.txt.gz'.format(etextno)) if refresh_cache: remove(cached) if not os.path.exists(cached): makedirs(os.path.dirname(cached)) download_uri = _format_download_uri(etextno) response = requests.get(download_uri) text = response.text with closing(gzip.open(cached, 'w')) as cache: cache.write(text.encode('utf-8')) with closing(gzip.open(cached, 'r')) as cache: text = cache.read().decode('utf-8') return text
def load_etext(etextno, refresh_cache=False): """Returns a unicode representation of the full body of a Project Gutenberg text. After making an initial remote call to Project Gutenberg's servers, the text is persisted locally. """ etextno = validate_etextno(etextno) cached = os.path.join(_TEXT_CACHE, '{0}.txt.gz'.format(etextno)) if refresh_cache: remove(cached) if not os.path.exists(cached): makedirs(os.path.dirname(cached)) download_uri = _format_download_uri(etextno) response = requests.get(download_uri) response.encoding = 'utf-8' text = response.text with contextlib.closing(gzip.open(cached, 'w')) as cache: cache.write(text.encode('utf-8')) else: with contextlib.closing(gzip.open(cached, 'r')) as cache: text = cache.read().decode('utf-8') return text
def test_makedirs(self): path = os.path.join(self.temporary_directory, 'foo', 'bar', 'baz') self.assertFalse(os.path.exists(path)) makedirs(path) self.assertTrue(os.path.exists(path))
def _populate_setup(self): makedirs(self.cache_uri) self.graph.open(self.cache_uri, create=True)
def _populate_setup(self): makedirs(self.cache_uri)
def test_makedirs_does_not_swallow_exception(self): original_makedirs = os.makedirs os.makedirs = always_throw(OSError) with self.assertRaises(OSError): makedirs('/some/path') os.makedirs = original_makedirs