def get(id): # First, normalizing id id = id.replace('/', '_') if id.startswith("ABO"): flavour = "OnbViewer" elif id.startswith("DTL"): flavour = "RepViewer" else: raise RuntimeError(f"Can not determine flavour for {id}") # Second, obtaining JSESSIONID cookie value viewer_url = f"http://digital.onb.ac.at/{flavour}/viewer.faces?doc={id}" viewer_response = requests.get(viewer_url) cookies = viewer_response.cookies metadata_url = f"http://digital.onb.ac.at/{flavour}/service/viewer/imageData?doc={id}&from=1&to=1000" metadata = utils.get_json(metadata_url, cookies=cookies) output_folder = utils.make_output_folder("onb", id) image_data = metadata["imageData"] print(f"Going to download {len(image_data)} images") for image in image_data: query_args = image["queryArgs"] image_id = image["imageID"] image_url = f"http://digital.onb.ac.at/{flavour}/image?{query_args}&s=1.0&q=100" output_filename = utils.make_output_filename(output_folder, image_id, extension=None) if os.path.isfile(output_filename): print(f"Skip downloading existing image {image_id}") continue print(f"Downloading {image_id}") utils.get_binary(output_filename, image_url, cookies=cookies)
def get(id): children_url = f"https://kramerius.difmoe.eu/search/api/v5.0/item/uuid:{id}/children" children = utils.get_json(children_url) print(f"Downloading {len(children)} images from kramerius.difmoe.eu") output_folder = utils.make_output_folder("difmoe", id) for page, child in enumerate(children, start=1): child_pid = child["pid"] image_url = f"https://kramerius.difmoe.eu/search/img?pid={child_pid}&stream=IMG_FULL" output_filename = utils.make_output_filename(output_folder, page=page, extension="jpg") utils.get_binary(output_filename, image_url)
def get_book(id): output_folder = utils.make_output_folder("hab", id) page = 0 for page in range(1, 1000): url = f"http://diglib.hab.de/{id}/max/{page:05d}.jpg" output_filename = utils.make_output_filename(output_folder, page=page, extension="jpg") if os.path.exists(output_filename): print(f"Skip downloading existing page #{page:05d}") continue try: print(f"Downloading page #{page:05d} from {url}") utils.get_binary(output_filename, url) except ValueError: break
def get(id): output_folder = utils.make_output_folder("fulda", id) for page in range(1, 1000): # it looks like Fulda library does not use manifest.json, hence it is not possible to guess number of pages in the book in advance image_url = f"https://fuldig.hs-fulda.de/viewer/rest/image/{id}/{page:08d}.tif/full/10000,/0/default.jpg" output_filename = utils.make_output_filename(output_folder, page, extension="jpg") if os.path.exists(output_filename): print(f"Skip downloading existing page #{page:08d}") continue print(f"Downloading page {page} to {output_filename}") try: utils.get_binary(output_filename, image_url) except ValueError: break
def get(id): output_folder = utils.make_output_folder("hathitrust", id) metadata_url = f"https://babel.hathitrust.org/cgi/imgsrv/meta?id={id}" metadata = utils.get_json(metadata_url) total_pages = metadata["total_items"] print(f"Going to download {total_pages} pages to {output_folder}") for page in range(1, total_pages + 1): url = f"https://babel.hathitrust.org/cgi/imgsrv/image?id={id};seq={page};width=1000000" output_filename = utils.make_output_filename(output_folder, page, extension="jpg") if os.path.exists(output_filename): print(f"Skip downloading existing page #{page:08d}") continue print(f"Downloading page {page} to {output_filename}") utils.get_binary(output_filename, url)
def get(id): full_id = f"oai:www.internetculturale.sbn.it/{id}" # FIXME: this xpath is just broken # metadata_url = f"http://www.internetculturale.it/jmms/magparser?id={full_id}&teca=MagTeca+-+ICCU&mode=all" # metadata = utils.get_xml(metadata_url) # page_nodes = metadata.findall("./package/medias/media[1]/pages") # page_count = int(page_nodes[0].attrib("count")) page_url_base = f"http://www.internetculturale.it/jmms/objdownload?id={full_id}&teca=MagTeca%20-%20ICCU&resource=img&mode=raw" output_folder = utils.make_output_folder("iculturale", id) for page in range(1, 1000): page_url = f"{page_url_base}&start={page}" print(f"Downloading page #{page} from {page_url}") output_filename = utils.make_output_filename(output_folder, page=page, extension="jpg") if os.path.exists(output_filename): print(f"Skip downloading existing page #{page:08d}") continue data_size = utils.get_binary(output_filename, page_url) if data_size == 0: os.remove(output_filename) break
def get_book(id): output_folder = utils.make_output_folder("bl", id) manifest_url = f"https://api.bl.uk/metadata/iiif/ark:/81055/{id}.0x000001/manifest.json" iiif.download_book_fast(manifest_url, output_folder)
def get(id): output_folder = utils.make_output_folder("darmstadt", id) manifest_url = f"http://tudigit.ulb.tu-darmstadt.de/show/iiif/{id}/manifest.json" iiif.download_book_fast(manifest_url, output_folder)
help='content weight') parser.add_argument('--beta', type=float, default=1000000, help='style weight') return parser if __name__ == '__main__': parser = arg_parser() args = parser.parse_args() args_dict = vars(args) #make output directory folder_name = utils.make_output_folder(args_dict['content'], args_dict['style'], args_dict['output_folder']) #down-sample image content, style, height, width = utils.down_sample(args_dict['content'], args_dict['style'], args_dict['max_pixel']) assert content.mode == 'RGB', 'content image not in RGB format' assert style.mode == 'RGB', 'style image not in RGB format' # input tensor: input image with shape of [batch, height, width, colors=3] f_img_reshape = lambda x: np.reshape(np.asarray(x), newshape=(-1, height, width, 3)) imgs = {'content': content, 'style': style} imgs_reshaped = {key: f_img_reshape(img) for key, img in imgs.items()} vgg_input = tf.Variable(initial_value=np.zeros(shape=[1, height, width, 3],