class Book(models.Model): book_id = models.IntegerField(unique=True) repo_name = models.CharField(max_length=255, null=True, blank=True) title = models.CharField(max_length=1000, default="", db_index=True,) language = models.CharField(max_length=5, default="en", null=False, db_index=True,) description = models.TextField(default="", null=True, blank=True,) yaml = models.TextField(null=True, default="") def __unicode__(self): return self.repo_name @property def repo_url(self): return 'https://github.com/{}/{}'.format(gh_org,self.repo_name) @property def issues_url(self): return 'https://github.com/{}/{}/issues'.format(gh_org,self.repo_name) @property def downloads_url(self): return 'https://github.com/{}/{}//releases'.format(gh_org,self.repo_name) @property def pg_url(self): return 'https://www.gutenberg.org/ebooks/{}'.format(self.book_id) _pandata=None def metadata(self): if not self._pandata: self._pandata=Pandata() self._pandata.load(self.yaml) return self._pandata.metadata
def test_smart_properties(self): pandata = Pandata(TESTDATA_FILENAME) self.assertEqual(pandata.publication_date, '2007-03-03') pandata.metadata["gutenberg_issued"] = None self.assertNotEqual(pandata.publication_date, '2007-03-03') self.assertEqual(pandata._edition, 'Space-Viking') self.assertTrue(pandata.subjects[0][0] in ('lcsh', 'lcc'))
def test_smart_properties(self): pandata = Pandata(TESTDATA_FILENAME) self.assertEqual(pandata.publication_date,'2007-03-03') pandata.metadata["gutenberg_issued"] = None self.assertNotEqual(pandata.publication_date,'2007-03-03') self.assertEqual(pandata._edition,'Space-Viking') self.assertTrue(pandata.subjects[0][0] in ('lcsh','lcc'))
def add_by_webpage(url, work=None, user=None): edition = None scraper = get_scraper(url) loader = BasePandataLoader(url) pandata = Pandata() pandata.metadata = scraper.metadata for metadata in pandata.get_edition_list(): edition = loader.load_from_pandata(metadata, work) work = edition.work loader.load_ebooks(pandata, edition, user=user) return edition if edition else None
def test_editions(self): pandata = Pandata(EDITIONTEST_FILENAME) (ed1,ed2) = pandata.get_edition_list() self.assertEqual(ed1.publisher, "Project Gutenberg") self.assertEqual(ed2.publisher, "Recovering the Classics") self.assertEqual(ed2.isbn, "9781111122223") self.assertEqual(ed1.isbn, "") self.assertEqual(ed1.edition_identifiers['edition_id'], "repo:Space-Viking_20728#default") self.assertEqual(ed2.edition_identifiers['edition_id'], u'repo:Space-Viking_20728#9781111122223') pandata = Pandata('https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml') [ed] = pandata.get_edition_list()
def load_from_yaml(yaml_url, test_mode=False): """ This really should be called 'load_from_github_yaml' if mock_ebook is True, don't construct list of ebooks from a release -- rather use an epub """ all_metadata = Pandata(yaml_url) loader = GithubLoader(yaml_url) for metadata in all_metadata.get_edition_list(): edition = loader.load_from_pandata(metadata) loader.load_ebooks(metadata, edition, test_mode) return edition.work_id if edition else None
def repo_metadata(): md = Pandata("metadata.yaml") cover = None for cover in md.covers: cover = cover.get('image_path', None) return { 'repo_name': md._repo, 'version': md._version, 'title': md.title, 'author': "; ".join(md.authnames()), 'author_for_calibre': " & ".join(md.authnames()), 'cover': cover, 'book_id': md.identifiers.get('gutenberg', '0') }
def add_by_sitemap(url, maxnum=None): editions = [] for bookdata in scrape_sitemap(url, maxnum=maxnum): edition = work = None loader = BasePandataLoader(bookdata.base) pandata = Pandata() pandata.metadata = bookdata.metadata for metadata in pandata.get_edition_list(): edition = loader.load_from_pandata(metadata, work) work = edition.work loader.load_ebooks(pandata, edition) if edition: editions.append(edition) return editions
def add_from_bookdatas(bookdatas): ''' bookdatas are iterators of scrapers ''' editions = [] for bookdata in bookdatas: edition = work = None loader = BasePandataLoader(bookdata.base) pandata = Pandata() pandata.metadata = bookdata.metadata for metadata in pandata.get_edition_list(): edition = loader.load_from_pandata(metadata, work) work = edition.work loader.load_ebooks(pandata, edition) if edition: editions.append(edition) return editions
class Book(models.Model): book_id = models.IntegerField(unique=True) repo_name = models.CharField(max_length=255, null=True, blank=True) title = models.CharField( max_length=1000, default="", db_index=True, ) language = models.CharField( max_length=5, default="en", null=False, db_index=True, ) description = models.TextField( default="", null=True, blank=True, ) yaml = models.TextField(null=True, default="") def __unicode__(self): return self.repo_name @property def repo_url(self): return 'https://github.com/{}/{}'.format(gh_org, self.repo_name) @property def issues_url(self): return 'https://github.com/{}/{}/issues'.format(gh_org, self.repo_name) @property def downloads_url(self): return 'https://github.com/{}/{}//releases'.format( gh_org, self.repo_name) @property def pg_url(self): return 'https://www.gutenberg.org/ebooks/{}'.format(self.book_id) _pandata = None def metadata(self): if not self._pandata: self._pandata = Pandata() self._pandata.load(self.yaml) return self._pandata.metadata
def metadata(self): if not self._pandata: self._pandata = Pandata() self._pandata.load(self.yaml) return self._pandata.metadata
def metadata(self): if not self._pandata: self._pandata=Pandata() self._pandata.load(self.yaml) return self._pandata.metadata
def test_load_from_string(self): pandata = Pandata() pandata.load(TEST_YAML_STRING) self.assertEqual(pandata.authnames()[0],'Kafka, Franz')
def make_gitberg_info(): metadata = Pandata("metadata.yaml") tempdir = os.path.join(os.path.dirname(__file__), 'templates/') env = Environment(loader=FileSystemLoader([tempdir, '/',])) template = env.get_template(ABOUT) return template.render(metadata=metadata)
def setUp(self): self.pandata = Pandata(TESTDATA_FILENAME)
def test_load_from_string(self): pandata = Pandata() pandata.load(TEST_YAML_STRING) self.assertEqual(pandata.authnames()[0], 'Kafka, Franz')
def test_load_from_url(self): pandata = Pandata( 'https://github.com/gitenberg-dev/metadata/raw/master/samples/pandata.yaml' ) self.assertEqual(pandata._edition, 'Space-Viking')
def test_conversion(self): yaml = pg_rdf_to_yaml(TESTDATA_PGRDFFILENAME) open(TESTDATA_YAMLFILENAME, "w+").write(yaml) pandata = Pandata(TESTDATA_YAMLFILENAME) self.assertEqual(pandata._edition, 'book') self.assertTrue(pandata.subjects[0][0] in ('lcsh', 'lcc'))