from __future__ import absolute_import import contextlib import gzip import os import requests from gutenberg._domain_model.persistence import local_path from gutenberg._domain_model.types import validate_etextno from gutenberg._util.os import makedirs from gutenberg._util.os import remove _TEXT_CACHE = local_path('text') def _format_download_uri(etextno): """Returns the download location on the Project Gutenberg servers for a given text. """ uri_root = r'http://www.gutenberg.lib.md.us' if 0 < etextno < 10: oldstyle_files = ( 'when11', 'bill11', 'jfk11', 'getty11',
try: import urllib2 except ImportError: import urllib.request as urllib2 from rdflib.graph import Graph from rdflib.term import URIRef from gutenberg._domain_model.persistence import local_path from gutenberg._domain_model.vocabulary import DCTERMS from gutenberg._domain_model.vocabulary import PGTERMS from gutenberg._util.logging import disable_logging from gutenberg._util.os import makedirs from gutenberg._util.os import remove _METADATA_CACHE = local_path( os.path.join('~/Desktop/machineLearning/metadata', 'metadata.db')) _METADATA_DATABASE_SINGLETON = None @contextlib.contextmanager def _download_metadata_archive(): """Makes a remote call to the Project Gutenberg servers and downloads the entire Project Gutenberg meta-data catalog. The catalog describes the texts on Project Gutenberg in RDF. The function returns a file-pointer to the catalog. """ data_url = r'http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2' with tempfile.NamedTemporaryFile(delete=False) as metadata_archive: shutil.copyfileobj(urllib2.urlopen(data_url), metadata_archive) yield metadata_archive.name
try: import urllib2 except ImportError: import urllib.request as urllib2 from rdflib.graph import Graph from rdflib.term import URIRef from gutenberg._domain_model.persistence import local_path from gutenberg._domain_model.vocabulary import DCTERMS from gutenberg._domain_model.vocabulary import PGTERMS from gutenberg._util.logging import disable_logging from gutenberg._util.os import makedirs from gutenberg._util.os import remove _METADATA_CACHE = local_path(os.path.join('metadata', 'metadata.db')) _METADATA_DATABASE_SINGLETON = None @contextlib.contextmanager def _download_metadata_archive(): """Makes a remote call to the Project Gutenberg servers and downloads the entire Project Gutenberg meta-data catalog. The catalog describes the texts on Project Gutenberg in RDF. The function returns a file-pointer to the catalog. """ data_url = r'http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2' with tempfile.NamedTemporaryFile(delete=False) as metadata_archive: shutil.copyfileobj(urllib2.urlopen(data_url), metadata_archive) yield metadata_archive.name
from six import with_metaclass from gutenberg._domain_model.exceptions import CacheAlreadyExistsException from gutenberg._domain_model.exceptions import InvalidCacheException from gutenberg._domain_model.persistence import local_path from gutenberg._domain_model.vocabulary import DCTERMS from gutenberg._domain_model.vocabulary import PGTERMS from gutenberg._util.logging import disable_logging from gutenberg._util.os import makedirs from gutenberg._util.os import remove from gutenberg._util.url import urlopen _GUTENBERG_CATALOG_URL = \ r'http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2' _DB_IDENTIFIER = 'urn:gutenberg:metadata' _DB_PATH = local_path(os.path.join('metadata', 'metadata.db')) class MetadataCache(with_metaclass(abc.ABCMeta, object)): """Super-class for all metadata cache implementations. """ def __init__(self, store, cache_uri): self.store = store self.cache_uri = cache_uri self.graph = Graph(store=self.store, identifier=_DB_IDENTIFIER) self.is_open = False self.catalog_source = _GUTENBERG_CATALOG_URL @property def exists(self):
"""Module to deal with text acquisition.""" from __future__ import absolute_import import contextlib import gzip import os import requests from gutenberg._domain_model.persistence import local_path from gutenberg._domain_model.types import validate_etextno from gutenberg._util.os import makedirs from gutenberg._util.os import remove _TEXT_CACHE = local_path('text') def _format_download_uri(etextno): """Returns the download location on the Project Gutenberg servers for a given text. """ uri_root = r'http://www.gutenberg.lib.md.us' if 0 < etextno < 10: oldstyle_files = ( 'when11', 'bill11', 'jfk11', 'getty11', 'const11',
import urllib2 except ImportError: import urllib.request as urllib2 from rdflib.graph import Graph from rdflib.term import URIRef from gutenberg._domain_model.persistence import local_path from gutenberg._domain_model.vocabulary import DCTERMS from gutenberg._domain_model.vocabulary import PGTERMS from gutenberg._util.logging import disable_logging from gutenberg._util.os import makedirs from gutenberg._util.os import remove _METADATA_CACHE = local_path(os.path.join('metadata', 'metadata.db')) _METADATA_DATABASE_SINGLETON = None @contextlib.contextmanager def _download_metadata_archive(): """Makes a remote call to the Project Gutenberg servers and downloads the entire Project Gutenberg meta-data catalog. The catalog describes the texts on Project Gutenberg in RDF. The function returns a file-pointer to the catalog. """ data_url = r'http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2' with tempfile.NamedTemporaryFile(delete=False) as metadata_archive: shutil.copyfileobj(urllib2.urlopen(data_url), metadata_archive) yield metadata_archive.name
import urllib2 except ImportError: import urllib.request as urllib2 from rdflib.graph import Graph from rdflib.term import URIRef from gutenberg._domain_model.persistence import local_path from gutenberg._domain_model.vocabulary import DCTERMS from gutenberg._domain_model.vocabulary import PGTERMS from gutenberg._util.logging import disable_logging from gutenberg._util.os import makedirs from gutenberg._util.os import remove _METADATA_CACHE = local_path(os.path.join('~/Desktop/machineLearning/metadata', 'metadata.db')) _METADATA_DATABASE_SINGLETON = None @contextlib.contextmanager def _download_metadata_archive(): """Makes a remote call to the Project Gutenberg servers and downloads the entire Project Gutenberg meta-data catalog. The catalog describes the texts on Project Gutenberg in RDF. The function returns a file-pointer to the catalog. """ data_url = r'http://www.gutenberg.org/cache/epub/feeds/rdf-files.tar.bz2' with tempfile.NamedTemporaryFile(delete=False) as metadata_archive: shutil.copyfileobj(urllib2.urlopen(data_url), metadata_archive) yield metadata_archive.name
from __future__ import absolute_import import gzip import os from contextlib import closing import requests from gutenberg._domain_model.exceptions import UnknownDownloadUriException from gutenberg._domain_model.persistence import local_path from gutenberg._domain_model.types import validate_etextno from gutenberg._util.os import makedirs from gutenberg._util.os import remove _TEXT_CACHE = local_path("text") def _format_download_uri(etextno): """Returns the download location on the Project Gutenberg servers for a given text. Raises: UnknownDownloadUri: If no download location can be found for the text. """ uri_root = r"http://www.gutenberg.lib.md.us" if 0 < etextno < 10: oldstyle_files = ("when11", "bill11", "jfk11", "getty11", "const11", "liber11", "mayfl11", "linc211", "linc111") etextno = int(etextno)