示例#1
0
文件: onb.py 项目: lapsh/dancebooks
def get(id):
    # First, normalizing id
    id = id.replace('/', '_')
    if id.startswith("ABO"):
        flavour = "OnbViewer"
    elif id.startswith("DTL"):
        flavour = "RepViewer"
    else:
        raise RuntimeError(f"Can not determine flavour for {id}")

    # Second, obtaining JSESSIONID cookie value
    viewer_url = f"http://digital.onb.ac.at/{flavour}/viewer.faces?doc={id}"
    viewer_response = requests.get(viewer_url)
    cookies = viewer_response.cookies
    metadata_url = f"http://digital.onb.ac.at/{flavour}/service/viewer/imageData?doc={id}&from=1&to=1000"
    metadata = utils.get_json(metadata_url, cookies=cookies)
    output_folder = utils.make_output_folder("onb", id)
    image_data = metadata["imageData"]
    print(f"Going to download {len(image_data)} images")
    for image in image_data:
        query_args = image["queryArgs"]
        image_id = image["imageID"]
        image_url = f"http://digital.onb.ac.at/{flavour}/image?{query_args}&s=1.0&q=100"
        output_filename = utils.make_output_filename(output_folder,
                                                     image_id,
                                                     extension=None)
        if os.path.isfile(output_filename):
            print(f"Skip downloading existing image {image_id}")
            continue
        print(f"Downloading {image_id}")
        utils.get_binary(output_filename, image_url, cookies=cookies)
示例#2
0
def get(id):
	children_url = f"https://kramerius.difmoe.eu/search/api/v5.0/item/uuid:{id}/children"
	children = utils.get_json(children_url)
	print(f"Downloading {len(children)} images from kramerius.difmoe.eu")
	
	output_folder = utils.make_output_folder("difmoe", id)
	for page, child in enumerate(children, start=1):
		child_pid = child["pid"]
		image_url = f"https://kramerius.difmoe.eu/search/img?pid={child_pid}&stream=IMG_FULL"
		output_filename = utils.make_output_filename(output_folder, page=page, extension="jpg")
		utils.get_binary(output_filename, image_url)
示例#3
0
def get_book(id):
	output_folder = utils.make_output_folder("hab", id)
	page = 0
	for page in range(1, 1000):
		url = f"http://diglib.hab.de/{id}/max/{page:05d}.jpg"
		output_filename = utils.make_output_filename(output_folder, page=page, extension="jpg")
		if os.path.exists(output_filename):
			print(f"Skip downloading existing page #{page:05d}")
			continue
		try:
			print(f"Downloading page #{page:05d} from {url}")
			utils.get_binary(output_filename, url)
		except ValueError:
			break
示例#4
0
def get(id):
    output_folder = utils.make_output_folder("fulda", id)
    for page in range(1, 1000):
        # it looks like Fulda library does not use manifest.json, hence it is not possible to guess number of pages in the book in advance
        image_url = f"https://fuldig.hs-fulda.de/viewer/rest/image/{id}/{page:08d}.tif/full/10000,/0/default.jpg"
        output_filename = utils.make_output_filename(output_folder,
                                                     page,
                                                     extension="jpg")
        if os.path.exists(output_filename):
            print(f"Skip downloading existing page #{page:08d}")
            continue
        print(f"Downloading page {page} to {output_filename}")
        try:
            utils.get_binary(output_filename, image_url)
        except ValueError:
            break
示例#5
0
def get(id):
    output_folder = utils.make_output_folder("hathitrust", id)
    metadata_url = f"https://babel.hathitrust.org/cgi/imgsrv/meta?id={id}"
    metadata = utils.get_json(metadata_url)
    total_pages = metadata["total_items"]
    print(f"Going to download {total_pages} pages to {output_folder}")
    for page in range(1, total_pages + 1):
        url = f"https://babel.hathitrust.org/cgi/imgsrv/image?id={id};seq={page};width=1000000"
        output_filename = utils.make_output_filename(output_folder,
                                                     page,
                                                     extension="jpg")
        if os.path.exists(output_filename):
            print(f"Skip downloading existing page #{page:08d}")
            continue
        print(f"Downloading page {page} to {output_filename}")
        utils.get_binary(output_filename, url)
示例#6
0
def get(id):
    full_id = f"oai:www.internetculturale.sbn.it/{id}"
    # FIXME: this xpath is just broken
    # metadata_url = f"http://www.internetculturale.it/jmms/magparser?id={full_id}&teca=MagTeca+-+ICCU&mode=all"
    # metadata = utils.get_xml(metadata_url)
    # page_nodes = metadata.findall("./package/medias/media[1]/pages")
    # page_count = int(page_nodes[0].attrib("count"))
    page_url_base = f"http://www.internetculturale.it/jmms/objdownload?id={full_id}&teca=MagTeca%20-%20ICCU&resource=img&mode=raw"

    output_folder = utils.make_output_folder("iculturale", id)
    for page in range(1, 1000):
        page_url = f"{page_url_base}&start={page}"
        print(f"Downloading page #{page} from {page_url}")
        output_filename = utils.make_output_filename(output_folder,
                                                     page=page,
                                                     extension="jpg")
        if os.path.exists(output_filename):
            print(f"Skip downloading existing page #{page:08d}")
            continue
        data_size = utils.get_binary(output_filename, page_url)
        if data_size == 0:
            os.remove(output_filename)
            break
示例#7
0
def get_book(id):
	output_folder = utils.make_output_folder("bl", id)
	manifest_url = f"https://api.bl.uk/metadata/iiif/ark:/81055/{id}.0x000001/manifest.json"
	iiif.download_book_fast(manifest_url, output_folder)
示例#8
0
def get(id):
    output_folder = utils.make_output_folder("darmstadt", id)
    manifest_url = f"http://tudigit.ulb.tu-darmstadt.de/show/iiif/{id}/manifest.json"
    iiif.download_book_fast(manifest_url, output_folder)
示例#9
0
                        help='content weight')
    parser.add_argument('--beta',
                        type=float,
                        default=1000000,
                        help='style weight')
    return parser


if __name__ == '__main__':
    parser = arg_parser()
    args = parser.parse_args()
    args_dict = vars(args)

    #make output directory
    folder_name = utils.make_output_folder(args_dict['content'],
                                           args_dict['style'],
                                           args_dict['output_folder'])

    #down-sample image
    content, style, height, width = utils.down_sample(args_dict['content'],
                                                      args_dict['style'],
                                                      args_dict['max_pixel'])
    assert content.mode == 'RGB', 'content image not in RGB format'
    assert style.mode == 'RGB', 'style image not in RGB format'

    # input tensor: input image with shape of [batch, height, width, colors=3]
    f_img_reshape = lambda x: np.reshape(np.asarray(x),
                                         newshape=(-1, height, width, 3))
    imgs = {'content': content, 'style': style}
    imgs_reshaped = {key: f_img_reshape(img) for key, img in imgs.items()}
    vgg_input = tf.Variable(initial_value=np.zeros(shape=[1, height, width, 3],