示例#1
0
def main(argv):
    args = docopt(__doc__, argv=argv)
    verbose = args['--verbose']
    item = Item(args['<identifier>'])

    # Files that cannot be deleted via S3.
    no_delete = ['_meta.xml', '_files.xml', '_meta.sqlite']

    if verbose:
        stdout.write('Deleting files from {0}\n'.format(item.identifier))

    if args['--all']:
        files = [f for f in item.files()]
        args['--cacade'] = True
    else:
        files = [item.file(f) for f in args['<file>']]

    for f in files:
        if not f:
            if verbose:
                stderr.write(' error: "{0}" does not exist\n'.format(f.name))
            exit(1)
        if any(f.name.endswith(s) for s in no_delete):
            continue
        resp = f.delete(verbose=args['--verbose'], cascade_delete=args['--cascade'])
        if resp.status_code != 204:
            error = parseString(resp.content)
            msg = get_xml_text(error.getElementsByTagName('Message'))
            stderr.write(' error: {0} ({1})\n'.format(msg, resp.status_code))
            exit(1)
示例#2
0
 def _metadata_getter(self):
     while True:
         i, identifier = self.input_queue.get()
         try:
             item = Item(identifier)
             self.json_queue.put((i, item))
         except:
             self.input_queue.put((i, identifier))
         finally:
             self.input_queue.task_done()
示例#3
0
#!/usr/bin/env python

import os
import json
import ptree

from internetarchive import search_items, Item

total_bytes = 0

for result in search_items('collection:usda-nurseryandseedcatalog'):
    id = result['identifier']
    item = Item(id)
    metadata = item.get_metadata()
    item_dir = os.path.join('items', ptree.id2ptree(id).lstrip("/"))
    if not os.path.isdir(item_dir):
        os.makedirs(item_dir)
    with open(os.path.join(item_dir, 'metadata.json'), 'w') as fh:
        fh.write(json.dumps(metadata, indent=2))

    total_bytes += sum([f.size for f in item.iter_files()])
    print item_dir

print total_bytes
示例#4
0
#!/usr/bin/env python

"""
fetch.py will fetch metadata for the Seed Catalog collection at 
Internet Archive.
"""

import os
import json
import ptree

from internetarchive import search_items, Item

for result in search_items('collection:usda-nurseryandseedcatalog'):
    id = result['identifier']
    item = Item(id)
    
    metadata = item.get_metadata()
    item_dir = os.path.join('items', ptree.id2ptree(id).lstrip("/"))

    if not os.path.isdir(item_dir):
        os.makedirs(item_dir)

    with open(os.path.join(item_dir, 'metadata.json'), 'w') as fh:
        fh.write(json.dumps(metadata, indent=2))

    print item_dir