def load_moby(self): import shakespeare.model as model pkg = 'shksprdata' fileobj = pkg_resources.resource_stream(pkg, '/moby/metadata.txt') def norm_work_name(out): out = out.replace('_moby', '') out = out.replace('life_and_death_of_king_john', 'john') out = out.replace('labor', 'labour') out = out.replace('part_iii', 'part_3') out = out.replace('part_ii', 'part_2') out = out.replace('part_i', 'part_1') return out material = model.load_material(fileobj, norm_work_name=norm_work_name) for item in material: if not item.resources: model.Resource( locator_type=u'cache', locator='moby/html/%s.html' % item.name, format=u'html', material=item, ) model.Resource( locator_type=u'cache', locator=u'moby/pdf/%s.pdf' % item.name, format=u'pdf', material=item, ) model.Session.commit()
def load_gutenberg(self): import shakespeare.model as model pkg = 'shksprdata' fileobj = pkg_resources.resource_stream(pkg, '/gutenberg/metadata.txt') def norm_work_name(out): if out.endswith('_f'): out = out[:-2] out = out.replace('_gut', '') out = out.replace('anthonie', 'antony') out = out.replace('errours', 'errors') out = out.replace('all_is', "alls") out = out.replace('loves_labour_', 'loves_labours_') out = out.replace('dreame', 'dream') out = out.replace('twelfe-', 'twelfth_') out = out.replace('tragedy_of_', '') return out material = model.load_material(fileobj, norm_work_name=norm_work_name) for item in material: if not item.resources: locator = u'%s::/gutenberg/%s.txt' % (pkg, item.name) res = model.Resource( locator_type=u'package', locator=locator, format=u'txt', material=item, ) model.Session.commit()