def _reset_metadata_graph(): """Removes all traces of the persistent RDF graph. """ global _METADATA_DATABASE_SINGLETON _METADATA_DATABASE_SINGLETON = None remove(_METADATA_CACHE)
def load_etext(etextno, refresh_cache=False, mirror=None, prefer_ascii=False): """Returns a unicode representation of the full body of a Project Gutenberg text. After making an initial remote call to Project Gutenberg's servers, the text is persisted locally. """ etextno = validate_etextno(etextno) cached = os.path.join(_TEXT_CACHE, '{}.txt.gz'.format(etextno)) if refresh_cache: remove(cached) if not os.path.exists(cached): makedirs(os.path.dirname(cached)) download_uri = _format_download_uri(etextno, mirror, prefer_ascii) response = requests.get(download_uri) # Ensure proper UTF-8 saving. There might be instances of ebooks or # mirrors which advertise a broken encoding, and this will break # downstream usages. For example, #55517 from aleph.gutenberg.org: # # from gutenberg.acquire import load_etext # print(load_etext(55517, refresh_cache=True)[0:1000]) # # response.encoding will be 'ISO-8859-1' while the file is UTF-8 if response.encoding != response.apparent_encoding: response.encoding = response.apparent_encoding text = response.text with closing(gzip.open(cached, 'w')) as cache: cache.write(text.encode('utf-8')) with closing(gzip.open(cached, 'r')) as cache: text = cache.read().decode('utf-8') return text
def load_etext(etextno, refresh_cache=False, mirror=None, prefer_ascii=False): """Returns a unicode representation of the full body of a Project Gutenberg text. After making an initial remote call to Project Gutenberg's servers, the text is persisted locally. """ etextno = validate_etextno(etextno) cached = os.path.join(_TEXT_CACHE, '{}.txt.gz'.format(etextno)) if refresh_cache: remove(cached) if not os.path.exists(cached): makedirs(os.path.dirname(cached)) download_uri = _format_download_uri(etextno, mirror, prefer_ascii) response = requests.get(download_uri) # Ensure proper UTF-8 saving. There might be instances of ebooks or # mirrors which advertise a broken encoding, and this will break # downstream usages. For example, #55517 from aleph.gutenberg.org: # # from gutenberg.acquire import load_etext # print(load_etext(55517, refresh_cache=True)[0:1000]) # # response.encoding will be 'ISO-8859-1' while the file is UTF-8 if response.encoding != response.apparent_encoding: response.encoding = response.apparent_encoding text = response.text with closing(gzip.open(cached, 'w')) as cache: cache.write(text.encode('utf-8')) with closing(gzip.open(cached, 'r')) as cache: text = cache.read().decode('utf-8') return text # def _main(): # """Command line interface to the module. # """ # from argparse import ArgumentParser, FileType # from gutenberg import Error # from gutenberg._util.os import reopen_encoded # parser = ArgumentParser(description='Download a Project Gutenberg text') # parser.add_argument('etextno', type=int) # parser.add_argument('outfile', type=FileType('w')) # parser.add_argument('--mirror', '-m', type=str, default=None) # parser.add_argument('--prefer-ascii', '-a', type=bool, default=False) # args = parser.parse_args() # try: # text = load_etext(args.etextno, # mirror=args.mirror, # prefer_ascii=args.prefer_ascii) # with reopen_encoded(args.outfile, 'w', 'utf8') as outfile: # outfile.write(text) # except Error as error: # parser.error(str(error)) # if __name__ == '__main__': # _main()
def _download_metadata_archive(self): """Makes a remote call to the Project Gutenberg servers and downloads the entire Project Gutenberg meta-data catalog. The catalog describes the texts on Project Gutenberg in RDF. The function returns a file-pointer to the catalog. """ with tempfile.NamedTemporaryFile(delete=False) as metadata_archive: shutil.copyfileobj(urlopen(self.catalog_source), metadata_archive) yield metadata_archive.name remove(metadata_archive.name)
def _download_metadata_archive(): """Makes a remote call to the Project Gutenberg servers and downloads the entire Project Gutenberg meta-data catalog. The catalog describes the texts on Project Gutenberg in RDF. The function returns a file-pointer to the catalog. """ data_url = r'http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2' with tempfile.NamedTemporaryFile(delete=False) as metadata_archive: shutil.copyfileobj(urllib2.urlopen(data_url), metadata_archive) yield metadata_archive.name remove(metadata_archive.name)
def load_etext(etextno, refresh_cache=False): """Returns a unicode representation of the full body of a Project Gutenberg text. After making an initial remote call to Project Gutenberg's servers, the text is persisted locally. """ etextno = validate_etextno(etextno) cached = os.path.join(_TEXT_CACHE, '{0}.txt.gz'.format(etextno)) if refresh_cache: remove(cached) if not os.path.exists(cached): makedirs(os.path.dirname(cached)) download_uri = _format_download_uri(etextno) response = requests.get(download_uri) text = response.text with closing(gzip.open(cached, 'w')) as cache: cache.write(text.encode('utf-8')) with closing(gzip.open(cached, 'r')) as cache: text = cache.read().decode('utf-8') return text
def load_etext(etextno, refresh_cache=False): """Returns a unicode representation of the full body of a Project Gutenberg text. After making an initial remote call to Project Gutenberg's servers, the text is persisted locally. """ etextno = validate_etextno(etextno) cached = os.path.join(_TEXT_CACHE, '{0}.txt.gz'.format(etextno)) if refresh_cache: remove(cached) if not os.path.exists(cached): makedirs(os.path.dirname(cached)) download_uri = _format_download_uri(etextno) response = requests.get(download_uri) response.encoding = 'utf-8' text = response.text with contextlib.closing(gzip.open(cached, 'w')) as cache: cache.write(text.encode('utf-8')) else: with contextlib.closing(gzip.open(cached, 'r')) as cache: text = cache.read().decode('utf-8') return text
def delete(self): """Delete the cache. """ self.close() remove(self._local_storage_path)
def tearDown(self): try: remove(self.temporary_path) except OSError: pass
def tearDown(self): remove(self.temporary_path)
def test_remove(self): for path in (self.temporary_file, self.temporary_directory): self.assertTrue(os.path.exists(path)) remove(path) self.assertFalse(os.path.exists(path))