示例#1
0
def main(argv=None):
    parser = argparse.ArgumentParser(
        description='extent stats via Nuxeo REST API')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    documents = nx.nxql(
        'select * from Document where ecm:path startswith "/asset-library/UCM"'
    )

    duplicates = defaultdict(list)

    row = 0
    for document in documents:
        for blob in blob_from_doc(document):
            if blob:
                duplicates[blob['digest']].append(
                    (blob['uid'],
                     u'{0}#{1}'.format(blob['path'],
                                       blob['xpath']).encode('utf-8')))
        if row % 25000 == 0:
            print '{0} blobs checked'.format(row)
        row = row + 1
    duplicates = {k: v
                  for k, v in duplicates.items()
                  if len(v) > 1}  # http://stackoverflow.com/a/8425075
    pp(duplicates)
    print(len(duplicates))
示例#2
0
def main(argv=None):

    parser = argparse.ArgumentParser(description='check for existence of jp2 file on s3 for given nuxeo path')
    parser.add_argument('path', help="Nuxeo document path")

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nuxeo_path = argv.path
    
    print "\nnuxeo_path:", nuxeo_path

    # get the Nuxeo ID
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    nuxeo_id = nx.get_uid(nuxeo_path)
    print "nuxeo_id:", nuxeo_id

    download_url = get_download_url(nuxeo_id, nuxeo_path, nx)
    print download_url, '\n'

    filename = os.path.basename(nuxeo_path)
    filepath = os.path.join(os.getcwd(), filename)
    download_nuxeo_file(download_url, filepath, nx)

    print "\nDone\n"
def main(argv=None):

    parser = argparse.ArgumentParser(description='check for existence of jp2 file on s3 for given nuxeo path')
    parser.add_argument('path', help="Nuxeo document path")
    parser.add_argument('bucket', help="S3 bucket name")
    parser.add_argument('--pynuxrc', default='~/.pynux-prod', help="rc file for use by pynux")

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nuxeo_path = argv.path
    bucketpath = argv.bucket

    nx = utils.Nuxeo(rcfile=argv.pynuxrc, loglevel=argv.loglevel.upper())
    # just for simple objects for now
    objects = nx.children(argv.path)
    print "\nFound objects at {}.\nChecking S3 bucket {} for existence of corresponding files.\nThis could take a while...".format(nuxeo_path, bucketpath)
    i = 0
    for obj in objects:
        nuxeo_id = nx.get_uid(obj['path'])
        check_object_on_s3(nuxeo_id, bucketpath)
        i = i + 1

    print "Done. Checked {} objects".format(i)
示例#4
0
def main(argv=None):
    parser = argparse.ArgumentParser(description='convert an object to jp2')
    parser.add_argument('path', help="Nuxeo document path")

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    print argv.path
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
示例#5
0
def main(argv=None):

    parser = argparse.ArgumentParser(
        description='nuxeo platform importer status')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    print nx.call_file_importer_api('status')
def main(argv):
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--datafile",
        type=str,
        required=True,
        help="tab-delimited spreadsheet input file -- required")

    parser.add_argument('-d', '--dry-run', action='store_true', help='dry run')

    parser.add_argument('--blankout',
                        action='store_true',
                        help='blank out all fields not set in sheet')

    utils.get_common_options(parser)

    args = parser.parse_args()

    try:
        assert os.path.isfile(args.datafile)
    except AssertionError:
        print("Not a file: ", args.datafile)
        sys.exit(2)

    csv_data_file = args.datafile
    print(csv_data_file)
    print(args.rcfile)
    print(args.loglevel)

    nx = utils.Nuxeo(rcfile=args.rcfile, loglevel=args.loglevel.upper())
    nuxeo_limit = 24

    # get and instance of the Csv2Dict class which must be initialized
    # with the name of an input data (csv) file

    csv2dict = Csv2Dict(csv_data_file, blankout=args.blankout)

    if csv2dict.status != 0:
        print('The Csv2Dict constructor reported and error (%d).' %
              csv2dict.status)
        sys.exit(csv2dict.status)

    process_rows(csv2dict)

    for n in range(csv2dict.get_meta_dict_length()):
        print("Loading payload %d" % n)
        payload = csv2dict.get_meta_dict(n)
        print(payload)
        print(payload['path'])
        if not args.dry_run:
            uid = nx.get_uid(payload['path'])
            print("Returned UID: %d) %s." % (n, uid))
            nx.update_nuxeo_properties(payload, path=payload['path'])
示例#7
0
def main(argv=None):
    parser = argparse.ArgumentParser(description='nuxeo metadata via REST API')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    # http://demo.nuxeo.com/nuxeo/api/v1/path/default-domain/@search\?query\=SELECT%20\*%20FROM%20Document\&pageSize\=2\&currentPageIndex\=1 | jq .
    # todo; add these defaults as parameters as well as env
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    documents = nx.all()
    nx.print_document_summary(documents)
示例#8
0
文件: qa.py 项目: ucldc/extent_stats
def main(argv=None):

    parser = argparse.ArgumentParser(description='nuxeo metadata via REST API')
    parser.add_argument('path', nargs=1, help="nuxeo document path")
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    documents = nx.children(argv.path[0])


    # open the workbook
    workbook = xlsxwriter.Workbook('qa.xlsx')
    header_format = workbook.add_format({'bold': True, })

    report = workbook.add_worksheet()

    report.set_column(0, 0, 10, )
    report.set_column(1, 2, 40, )
    report.set_column(3, 4, 80, )

    report.write(0, 0, 'nuxeo-uid', header_format)
    report.write(0, 1, 'ucldc_schema:localidentifier', header_format)
    report.write(0, 2, 'filename', header_format)
    report.write(0, 3, 'nuxeo-path', header_format)
    report.write(0, 4, 'title', header_format)

    # document specified on command line
    root_doc = nx.get_metadata(path=argv.path[0])

    report.write(1, 0, root_doc['uid'])
    report.write(1, 3, argv.path[0])

    row = 2
    for document in documents:

        p = document['properties']
        
        report.write(row, 0, document['uid'])
        report.write(row, 1, p['ucldc_schema:localidentifier'][0])
        if 'file:filename' in p:
            report.write(row, 2, p['file:filename'])
        report.write(row, 3, document['path'].replace(argv.path[0], '', 1))
        report.write(row, 4, document['title'])
        row = row + 1


    # output
    #  path|localid|title
    #

    workbook.close()
示例#9
0
文件: nxls.py 项目: ngeraci/pynux
def main(argv=None):
    parser = argparse.ArgumentParser(description='nuxeo metadata via REST API')
    parser.add_argument('path',
                        nargs=1,
                        help='nuxeo document path',
                        type=utf8_arg)
    parser.add_argument(
        '--outdir',
        help="directory to hold application/json+nxentity .json files",
        type=utf8_arg)
    rstyle = parser.add_mutually_exclusive_group(required=False)
    rstyle.add_argument('--recursive-folders',
                        help='recursively list project folders/Organzation',
                        action='store_true')
    rstyle.add_argument('--recursive-objects',
                        help='recursively list objects',
                        action='store_true')
    show = parser.add_mutually_exclusive_group(required=False)
    show.add_argument('--show-only-uid', action='store_true')
    show.add_argument('--show-only-path', action='store_true')
    show.add_argument('--show-custom-function')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    if argv.recursive_folders:
        documents = nx.recursive_project_folders(argv.path[0])
    elif argv.recursive_objects:
        documents = nx.recursive_objects(argv.path[0])
    else:
        documents = itertools.chain(
            nx.nxql(u'select * from Document where ecm:path="{}"'.format(
                argv.path[0])), nx.children(argv.path[0]))

    if argv.outdir:
        # Expand user- and relative-paths
        outdir = os.path.abspath(os.path.expanduser(argv.outdir))
        nx.copy_metadata_to_local(documents, outdir)
    elif argv.show_only_path is True:
        for document in documents:
            print(document['path'])
    elif argv.show_only_uid is True:
        for document in documents:
            print(document['uid'])
    elif argv.show_custom_function:
        mapper = importlib.import_module(argv.show_custom_function)
        mapper.nuxeo_mapper(documents, nx)
    else:
        nx.print_document_summary(documents)
示例#10
0
文件: pilog.py 项目: barbarahui/pynux
def main(argv=None):

    parser = argparse.ArgumentParser(
        description='nuxeo platform importer log/logActivate')
    parser.add_argument('--activate', action='store_true')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    if argv.activate:
        nx.import_log_activate()
    else:
        nx.import_log()
示例#11
0
def main(argv=None):

    parser = argparse.ArgumentParser(
        description='Print nuxeo json metadata for object.')
    parser.add_argument('path', help="Nuxeo document path")

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    path = argv.path

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    uid = nx.get_uid(path)
    metadata = nx.get_metadata(uid=uid)
示例#12
0
文件: nxls.py 项目: tingletech/pynux
def main(argv=None):
    parser = argparse.ArgumentParser(description='nuxeo metadata via REST API')
    parser.add_argument('path', nargs=1, help='nuxeo document path', type=utf8_arg)
    parser.add_argument('--outdir', 
        help="directory to hold application/json+nxentity .json files",
        type=utf8_arg)
    rstyle = parser.add_mutually_exclusive_group(required=False)
    rstyle.add_argument('--recursive-folders',
                        help='recursively list project folders/Organzation',
                        action='store_true')
    rstyle.add_argument('--recursive-objects',
                        help='recursively list objects',
                        action='store_true')
    show = parser.add_mutually_exclusive_group(required=False)
    show.add_argument('--show-only-uid', action='store_true')
    show.add_argument('--show-only-path', action='store_true')
    show.add_argument('--show-custom-function')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    if argv.recursive_folders:
        documents = nx.recursive_project_folders(argv.path[0])
    elif argv.recursive_objects:
        documents = nx.recursive_objects(argv.path[0])
    else:
        documents = itertools.chain(
            nx.nxql(u'select * from Document where ecm:path="{}"'.format(argv.path[0])),
            nx.children(argv.path[0])
        )

    if argv.outdir:
        # Expand user- and relative-paths
        outdir = os.path.abspath(os.path.expanduser(argv.outdir))
        nx.copy_metadata_to_local(documents, outdir)
    elif argv.show_only_path == True:
        for document in documents:
            print(document['path'])
    elif argv.show_only_uid == True:
        for document in documents:
            print(document['uid'])
    elif argv.show_custom_function:
        mapper = importlib.import_module(argv.show_custom_function)
        mapper.nuxeo_mapper(documents, nx)
    else:
        nx.print_document_summary(documents)
def main(argv=None):

    parser = argparse.ArgumentParser(
        description='Print nuxeo path for given uid.')
    parser.add_argument('uid', help="Nuxeo uid")

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    uid = argv.uid

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    metadata = nx.get_metadata(uid=uid)
    path = metadata['path']
    print path, uid
示例#14
0
def main(argv=None):

    parser = argparse.ArgumentParser(
        description='print info for items in collection where media.json '
                    'file is missing.'
    )
    parser.add_argument('path', help="Nuxeo document path for collection")
    parser.add_argument('bucket', help="S3 bucket name")

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nuxeo_path = argv.path
    bucketpath = argv.bucket

    print "collection nuxeo_path:", nuxeo_path

    # get the Nuxeo ID for the collection
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    nuxeo_id = nx.get_uid(nuxeo_path)
    print "collection nuxeo_id:", nuxeo_id

    # connect to S3
    conn = connect_s3(calling_format=OrdinaryCallingFormat())
    bucketpath = bucketpath.strip("/")
    bucketbase = bucketpath.split("/")[0]
    print "bucketpath:", bucketpath
    print "bucketbase:", bucketbase

    try:
        bucket = conn.get_bucket(bucketbase)
    except boto.exception.S3ResponseError:
        print "bucket doesn't exist on S3:", bucketbase

    items = nx.children(nuxeo_path)
    for item in items:
        obj_key = "{0}-media.json".format(item['uid'])
        s3_url = "s3://{0}/{1}".format(bucketpath, obj_key)
        #print "s3_url:", s3_url
        parts = urlparse.urlsplit(s3_url)
        #print "obj_key", obj_key
        #print "s3_url", s3_url

        if not (bucket.get_key(parts.path)):
            print "object doesn't exist on S3:", parts.path
        '''
示例#15
0
def main(argv=None):

    parser = argparse.ArgumentParser(
        description='get media.json file for given nuxeo path')
    parser.add_argument('path', help="Nuxeo document path")
    parser.add_argument('bucket', help="S3 bucket name")

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nuxeo_path = argv.path
    bucketpath = argv.bucket

    print "nuxeo_path:", nuxeo_path

    # get the Nuxeo ID
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    nuxeo_id = nx.get_uid(nuxeo_path)
    print "nuxeo_id:", nuxeo_id

    # see if a media.json file exists on S3 for this object
    conn = connect_s3(calling_format=OrdinaryCallingFormat())
    bucketpath = bucketpath.strip("/")
    bucketbase = bucketpath.split("/")[0]
    obj_key = "{0}-media.json".format(nuxeo_id)
    s3_url = "s3://{0}/{1}".format(bucketpath, obj_key)
    print "s3_url:", s3_url
    parts = urlparse.urlsplit(s3_url)
    print "bucketpath:", bucketpath
    print "bucketbase:", bucketbase

    try:
        bucket = conn.get_bucket(bucketbase)
    except boto.exception.S3ResponseError:
        print "bucket doesn't exist on S3:", bucketbase

    if not (bucket.get_key(parts.path)):
        print "object doesn't exist on S3:", parts.path
    else:
        print "yup the object exists!:", parts.path
        k = Key(bucket)
        k.key = parts.path
        print "\nfile contents:"
        print k.get_contents_as_string()
示例#16
0
def main(argv=None):

    parser = argparse.ArgumentParser(
        description='run import of a folder into nuxeo')
    utils.get_common_options(parser)
    required_flags = parser.add_argument_group('there are four required arguments')
    required_flags.add_argument('--leaf_type', 
        help="nuxeo document type for imported leaf nodes", 
        required=True)
    required_flags.add_argument('--input_path',
        help="unix path to files",
        required=True)
    required_flags.add_argument('--target_path', 
        help="target document for import in nuxeo (parent folder where new folder will be created)",
        required=True)
    required_flags.add_argument('--folderish_type',
        help="nuxeo document type for imported folder",
        required=True)
    parser.add_argument('--no_wait',
        help="don't poll/wait for the job to finish",
        dest="no_wait",
        action="store_false")
    parser.add_argument('--poll_interval',
        help="seconds to sleep for if waiting",
        dest="sleep",
        default=20,
        type=int)
    parser.add_argument('--skip_root_folder_creation',
        help="don't create root folder on import",
        dest="skip_root_folder_creation",
        action="store_true")
    if argv is None:
        argv = parser.parse_args()
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    print nx.import_log_activate()
    print nx.import_one_folder(argv.leaf_type,
        argv.input_path,
        argv.target_path,
        argv.folderish_type,
        wait=argv.no_wait,
        sleep=argv.sleep,
        skip_root_folder_creation=argv.skip_root_folder_creation)
    print nx.call_file_importer_api('status')
    print nx.import_log()
示例#17
0
文件: nxls.py 项目: aturner/pynux
def main(argv=None):

    parser = argparse.ArgumentParser(description='nuxeo metadata via REST API')
    parser.add_argument('path', nargs=1, help="nuxeo document path")
    parser.add_argument('--outdir', 
        help="directory to hold application/json+nxentity .json files")
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    documents = nx.children(argv.path[0])

    if argv.outdir:
        # Expand user- and relative-paths
        outdir = os.path.abspath(os.path.expanduser(argv.outdir))
        nx.copy_metadata_to_local(documents, outdir)
    else:
        nx.print_document_summary(documents)
示例#18
0
def main(argv=None):
    parser = argparse.ArgumentParser(description='nuxeo metadata via REST API')
    parser.add_argument('--outdir',
        help="directory to hold application/json+nxentity .json files",
        type=utf8_arg)
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    # http://demo.nuxeo.com/nuxeo/api/v1/path/default-domain/@search\?query\=SELECT%20\*%20FROM%20Document\&pageSize\=2\&currentPageIndex\=1 | jq .
    # todo; add these defaults as parameters as well as env
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    documents = nx.all()

    if argv.outdir:
        # Expand user- and relative-paths
        outdir = os.path.abspath(os.path.expanduser(argv.outdir))
        nx.copy_metadata_to_local(documents, outdir)
    else:
        nx.print_document_summary(documents)
示例#19
0
文件: lija.py 项目: ucldc/nuxeo-load
def main(argv=None):
    
    parser = argparse.ArgumentParser(description='Import metadata into Nuxeo for LIJA2 (mets) collection.')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    naans = [dirs for root, dirs, files in os.walk(metadata_dir)][0]
    for naan in naans:
        naan_dir = os.path.join(metadata_dir, naan)
        arks = [dirs for root, dirs, files in os.walk(naan_dir)][0]
        for ark in arks:
            filepath = os.path.join(metadata_dir, naan, ark, ark + '.mets.xml')
            process_object(filepath, nx)

    print "\n\nPath components over Nuxeo length limit (" + str(nuxeo_limit) + "):"
    print "TOTAL:", len(toolong)
    for long in toolong:
        print long
示例#20
0
文件: nxql_all.py 项目: ngeraci/pynux
def main(argv=None):
    parser = argparse.ArgumentParser(description='nuxeo metadata via REST API')
    parser.add_argument(
        '--outdir',
        help="directory to hold application/json+nxentity .json files",
        type=utf8_arg)
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    # http://demo.nuxeo.com/nuxeo/api/v1/path/default-domain/@search\?query\=SELECT%20\*%20FROM%20Document\&pageSize\=2\&currentPageIndex\=1 | jq .
    # todo; add these defaults as parameters as well as env
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    documents = nx.all()

    if argv.outdir:
        # Expand user- and relative-paths
        outdir = os.path.abspath(os.path.expanduser(argv.outdir))
        nx.copy_metadata_to_local(documents, outdir)
    else:
        nx.print_document_summary(documents)
示例#21
0
文件: nxql.py 项目: ngeraci/pynux
def main(argv=None):

    parser = argparse.ArgumentParser(description='nxql via REST API')
    parser.add_argument('nxql', nargs=1, help="nxql query", type=utf8_arg)
    parser.add_argument(
        '--outdir',
        help="directory to hold application/json+nxentity .json files",
        type=utf8_arg)
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    documents = nx.nxql(argv.nxql[0])

    if argv.outdir:
        # Expand user- and relative-paths
        outdir = os.path.abspath(os.path.expanduser(argv.outdir))
        nx.copy_metadata_to_local(documents, outdir)
    else:
        nx.print_document_summary(documents)
示例#22
0
def main(argv=None):
    parser = argparse.ArgumentParser(description='extent stats via Nuxeo REST API')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    documents = nx.nxql('select * from Document where ecm:path startswith "/asset-library/UCM"')

    duplicates = defaultdict(list)

    row = 0
    for document in documents:
        for blob in blob_from_doc(document):
            if blob:
                duplicates[blob['digest']].append((blob['uid'] ,u'{0}#{1}'.format(blob['path'], blob['xpath']).encode('utf-8')))
        if row % 25000 == 0:
            print '{0} blobs checked'.format(row)
        row = row + 1
    duplicates = {k: v for k, v in duplicates.items() if len(v) > 1}  # http://stackoverflow.com/a/8425075
    pp(duplicates)
    print(len(duplicates))
示例#23
0
def main(argv=None):
    parser = argparse.ArgumentParser(description='extent stats via Nuxeo REST API')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    file_dict = {}
    conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat())
    bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon')
    for count, key in enumerate(bucket.list()):
        file_dict[key.name] = key.size
        if count % 50000 == 0:
            print('{0} s3 files memorized'.format(count))

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    documents = nx.nxql('select * from Document')

    row = 0
    for document in documents:
        for blob in blob_from_doc(document):
            if blob:
                s3_size = file_dict.get(blob['digest'], None)
                if not s3_size:
                    print('{0} from {1} {2} not found in S3'
                          .format(blob['digest'], blob['path'], blob['xpath']))
                if file_dict.get(blob['digest'], 0) != int(blob['length']):
                    print('{0} from {1} {2} s3 size {3} does not match nuxeo size {3}'
                          .format(blob['digest'],
                                  blob['path'],
                                  blob['xpath'],
                                  s3_size,
                                  blob['length']))
                if row % 25000 == 0:
                    print('{0} nuxeo blobs checked'.format(row))
                row = row + 1
示例#24
0
文件: nxup1.py 项目: ngeraci/pynux
def main(argv=None):
    """main"""
    parser = argparse.ArgumentParser(
        description='nuxeo metadata via REST API, one record')
    parser.add_argument('file', nargs=1, help="application/json+nxentity")
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--uid', help="update specific nuxeo uid")
    group.add_argument(
        '--path', help="update specific nuxeo path", type=utf8_arg)
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    # todo; add these defaults as parameters as well as env
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    pp(argv.file[0])
    jfile = argv.file[0]
    uid = argv.uid
    path = argv.path
    json_data = open(jfile)
    data = json.load(json_data)
    ret = {}
    if uid:  # use uid supplied at command line
        ret = nx.update_nuxeo_properties(data, uid=uid)
    elif path:  # use path supplied at command line
        ret = nx.update_nuxeo_properties(data, path=path)
    # if no uid nor path was specified on the command line, then
    # prefer "path": to "uid": when importing files because the file may have
    # come from another machine where the uuids are different
    else:
        uid = nx.get_uid(data.get('path')) or data.get('uid')
        ret = nx.update_nuxeo_properties(data, uid=uid)
    if not ret:
        print("no uid found, specify --uid or --path")
        exit(1)
    pp(ret)
示例#25
0
文件: nxup1.py 项目: tingletech/pynux
def main(argv=None):
    """main"""
    parser = argparse.ArgumentParser(
        description='nuxeo metadata via REST API, one record'
    )
    parser.add_argument('file', nargs=1, help="application/json+nxentity")
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--uid', help="update specific nuxeo uid")
    group.add_argument('--path', help="update specific nuxeo path", type=utf8_arg)
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    # todo; add these defaults as parameters as well as env
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    pp(argv.file[0])
    jfile = argv.file[0]
    uid = argv.uid
    path = argv.path
    json_data = open(jfile)
    data = json.load(json_data)
    ret = {}
    if uid:				# use uid supplied at command line
        ret = nx.update_nuxeo_properties(data, uid=uid)
    elif path:				# use path supplied at command line
        ret = nx.update_nuxeo_properties(data, path=path)
    # if no uid nor path was specified on the command line, then
    # prefer "path": to "uid": when importing files because the file may have
    # come from another machine where the uuids are different
    else:
        uid = nx.get_uid(data.get('path')) or data.get('uid')
        ret = nx.update_nuxeo_properties(data, uid=uid)
    if not ret:
        print "no uid found, specify --uid or --path"
        exit(1)
    pp(ret)
示例#26
0
def main(argv=None):
    parser = argparse.ArgumentParser(
        description='extent stats via Nuxeo REST API')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    file_dict = {}
    conn = s3.connect_to_region('us-west-2',
                                calling_format=OrdinaryCallingFormat())
    bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon')
    for count, key in enumerate(bucket.list()):
        file_dict[key.name] = key.size
        if count % 50000 == 0:
            print('{0} s3 files memorized'.format(count))

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    documents = nx.nxql('select * from Document')

    row = 0
    for document in documents:
        for blob in blob_from_doc(document):
            if blob:
                s3_size = file_dict.get(blob['digest'], None)
                if not s3_size:
                    print('{0} from {1} {2} not found in S3'.format(
                        blob['digest'], blob['path'], blob['xpath']))
                if file_dict.get(blob['digest'], 0) != int(blob['length']):
                    print(
                        '{0} from {1} {2} s3 size {3} does not match nuxeo size {3}'
                        .format(blob['digest'], blob['path'], blob['xpath'],
                                s3_size, blob['length']))
                if row % 25000 == 0:
                    print('{0} nuxeo blobs checked'.format(row))
                row = row + 1
示例#27
0
def main(argv=None):
    parser = argparse.ArgumentParser(
        description="extent stats via Nuxeo REST API")
    parser.add_argument(
        "outdir",
        nargs=1,
    )
    parser.add_argument("--no-s3-check", dest="s3_check", action="store_false")
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    os.makedirs(argv.outdir[0], exist_ok=True)

    # look up all the files in S3, so we can double check that all
    # the files exist as we loop through Nuxeo
    file_check = None
    s3_bytes = s3_count = 0
    if argv.s3_check:
        from boto import s3
        from boto.s3.connection import OrdinaryCallingFormat

        file_check = {}
        conn = s3.connect_to_region("us-west-2",
                                    calling_format=OrdinaryCallingFormat())
        bucket = conn.get_bucket("data.nuxeo.cdlib.org.oregon")
        for count, key in enumerate(bucket.list()):
            file_check[key.name] = key.size
            if count % 50000 == 0:
                print("{0} s3 files memorized".format(count), file=sys.stderr)
            s3_bytes = s3_bytes + key.size
        s3_count = len(file_check)

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    campuses = [
        "UCB",
        "UCD",
        "UCI",
        "UCLA",
        "UCM",
        "UCOP",
        "UCR",
        "UCSB",
        "UCSC",
        "UCSD",
        "UCSF",
    ]

    summary_workbook = xlsxwriter.Workbook(
        os.path.join(argv.outdir[0], "{}-summary.xlsx".format(today)))
    # cell formats
    header_format = summary_workbook.add_format({
        "bold": True,
    })
    number_format = summary_workbook.add_format()
    number_format.set_num_format("#,##0")

    summary_worksheet = summary_workbook.add_worksheet("summary")
    # headers
    summary_worksheet.write(0, 1, "deduplicated files", header_format)
    summary_worksheet.write(0, 2, "deduplicated bytes", header_format)
    summary_worksheet.write(0, 4, "total files", header_format)
    summary_worksheet.write(0, 5, "total bytes", header_format)
    if argv.s3_check:
        summary_worksheet.write(0, 7, "files on S3", header_format)
        summary_worksheet.write(0, 8, "bytes on S3", header_format)
    # widths
    summary_worksheet.set_column(
        0,
        1,
        10,
    )
    summary_worksheet.set_column(
        2,
        2,
        25,
    )
    summary_worksheet.set_column(
        3,
        4,
        10,
    )
    summary_worksheet.set_column(
        5,
        5,
        25,
    )
    summary_worksheet.set_column(
        6,
        7,
        10,
    )
    summary_worksheet.set_column(
        8,
        8,
        25,
    )
    summary_worksheet.set_column(
        9,
        9,
        10,
    )
    true_count = dedup_total = total_count = running_total = 0
    row = 1
    for campus in campuses:
        (this_count, this_total, dedup_count,
         dedup_bytes) = forCampus(campus, file_check, argv.outdir[0], nx)
        # write out this row in the sheet
        summary_worksheet.write(row, 0, campus)
        summary_worksheet.write(row, 1, dedup_count, number_format)
        summary_worksheet.write(row, 2, dedup_bytes, number_format)
        summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes))
        summary_worksheet.write(row, 4, this_count, number_format)
        summary_worksheet.write(row, 5, this_total, number_format)
        summary_worksheet.write(row, 6, sizeof_fmt(this_total))

        # keep track of running totals
        total_count = total_count + this_count  # number of files
        running_total = running_total + this_total  # number of bytes
        true_count = true_count + dedup_count
        dedup_total = dedup_total + dedup_bytes  # number of bytes
        row = row + 1

    # write totals in the summary worksheet
    summary_worksheet.write(row, 0, "{}".format(today))
    summary_worksheet.write(row, 1, true_count, number_format)
    summary_worksheet.write(row, 2, dedup_total, number_format)
    summary_worksheet.write(row, 3, sizeof_fmt(dedup_total))
    summary_worksheet.write(row, 4, total_count, number_format)
    summary_worksheet.write(row, 5, running_total, number_format)
    summary_worksheet.write(row, 6, sizeof_fmt(running_total))
    if argv.s3_check:
        summary_worksheet.write(row, 7, s3_count, number_format)
        summary_worksheet.write(row, 8, s3_bytes, number_format)
        summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes))
    summary_workbook.close()
示例#28
0
def main(argv=None):

    parser = argparse.ArgumentParser(
        description='print info for items in collection where media.json '
                    'file is missing.'
    )
    parser.add_argument('path', help="Nuxeo document path for collection")
    parser.add_argument('bucket', help="S3 bucket name")
    parser.add_argument("--pynuxrc", default='~/.pynuxrc',
                        help="rc file for use by pynux")
    parser.add_argument(
        '--stash',
        action="store_true",
        help="create and stash missing media.json file")
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nuxeo_path = argv.path
    bucketpath = argv.bucket
    pynuxrc = argv.pynuxrc
    stash = argv.stash



    print("collection nuxeo_path:", nuxeo_path)

    # get the Nuxeo ID for the collection
    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    nuxeo_id = nx.get_uid(nuxeo_path)
    print("collection nuxeo_id:", nuxeo_id)

    # connect to S3
    conn = connect_s3(calling_format=OrdinaryCallingFormat())
    bucketpath = bucketpath.strip("/")
    bucketbase = bucketpath.split("/")[0]
    print("bucketpath:", bucketpath)
    print("bucketbase:", bucketbase)

    try:
        bucket = conn.get_bucket(bucketbase)
    except boto.exception.S3ResponseError:
        print("bucket doesn't exist on S3:", bucketbase)

    items = nx.children(nuxeo_path)

    for item in items:
        obj_key = "{0}-media.json".format(item['uid'])
        s3_url = "s3://{0}/{1}".format(bucketpath, obj_key)
        #print "s3_url:", s3_url
        parts = urlparse.urlsplit(s3_url)
        #print "obj_key", obj_key
        #print "s3_url", s3_url

        if item['type'] != 'Organization' and not (bucket.get_key(parts.path)):
            print("object doesn't exist on S3:", parts.path, item['path'])
            if stash:
               nxstash = NuxeoStashMediaJson(
                  item['path'],
                  MEDIA_JSON_BUCKET,
                  MEDIA_JSON_REGION,
                  pynuxrc,
                  True)
               nxstash.nxstashref()
               print("stashed for item['path']")
        '''
示例#29
0
def main(argv=None):
    parser = argparse.ArgumentParser(
        description='extent stats via Nuxeo REST API')
    parser.add_argument('path', nargs=1, help="root path")
    parser.add_argument(
        'outdir',
        nargs=1,
    )
    parser.add_argument('--no-s3-check', dest='s3_check', action='store_false')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    # look up all the files in S3, so we can double check that all
    # the files exist as we loop through Nuxeo
    file_check = None
    s3_bytes = s3_count = 0
    if argv.s3_check:
        from boto import s3
        from boto.s3.connection import OrdinaryCallingFormat
        file_check = {}
        conn = s3.connect_to_region('us-west-2',
                                    calling_format=OrdinaryCallingFormat())
        bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon')
        for count, key in enumerate(bucket.list()):
            file_check[key.name] = key.size
            if count % 50000 == 0:
                print('{0} s3 files memorized'.format(count), file=sys.stderr)
            s3_bytes = s3_bytes + key.size
        s3_count = len(file_check)

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    campuses = nx.children(argv.path[0])

    summary_workbook = xlsxwriter.Workbook(
        os.path.join(argv.outdir[0], '{}-summary.xlsx'.format(today)))
    # cell formats
    header_format = summary_workbook.add_format({
        'bold': True,
    })
    number_format = summary_workbook.add_format()
    number_format.set_num_format('#,##0')

    summary_worksheet = summary_workbook.add_worksheet('summary')
    # headers
    summary_worksheet.write(0, 1, 'deduplicated files', header_format)
    summary_worksheet.write(0, 2, 'deduplicated bytes', header_format)
    summary_worksheet.write(0, 4, 'total files', header_format)
    summary_worksheet.write(0, 5, 'total bytes', header_format)
    if argv.s3_check:
        summary_worksheet.write(0, 7, 'files on S3', header_format)
        summary_worksheet.write(0, 8, 'bytes on S3', header_format)
    # widths
    summary_worksheet.set_column(
        0,
        1,
        10,
    )
    summary_worksheet.set_column(
        2,
        2,
        25,
    )
    summary_worksheet.set_column(
        3,
        4,
        10,
    )
    summary_worksheet.set_column(
        5,
        5,
        25,
    )
    summary_worksheet.set_column(
        6,
        7,
        10,
    )
    summary_worksheet.set_column(
        8,
        8,
        25,
    )
    summary_worksheet.set_column(
        9,
        9,
        10,
    )
    true_count = dedup_total = total_count = running_total = 0
    row = 1
    for campus in campuses:
        basename = os.path.basename(campus['path'])
        documents = nx.nxql(
            'select * from Document where ecm:path startswith"{0}"'.format(
                campus['path']))
        (this_count, this_total, dedup_count,
         dedup_bytes) = forCampus(documents, basename, file_check,
                                  argv.outdir[0])
        summary_worksheet.write(row, 0, basename)
        summary_worksheet.write(row, 1, dedup_count, number_format)
        summary_worksheet.write(row, 2, dedup_bytes, number_format)
        summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes))
        summary_worksheet.write(row, 4, this_count, number_format)
        summary_worksheet.write(row, 5, this_total, number_format)
        summary_worksheet.write(row, 6, sizeof_fmt(this_total))
        total_count = total_count + this_count  # number of files
        running_total = running_total + this_total  # number of bytes
        true_count = true_count + dedup_count
        dedup_total = dedup_total + dedup_bytes  # number of bytes
        row = row + 1
    summary_worksheet.write(row, 0, '{}'.format(today))
    summary_worksheet.write(row, 1, true_count, number_format)
    summary_worksheet.write(row, 2, dedup_total, number_format)
    summary_worksheet.write(row, 3, sizeof_fmt(dedup_total))
    summary_worksheet.write(row, 4, total_count, number_format)
    summary_worksheet.write(row, 5, running_total, number_format)
    summary_worksheet.write(row, 6, sizeof_fmt(running_total))
    if argv.s3_check:
        summary_worksheet.write(row, 7, s3_count, number_format)
        summary_worksheet.write(row, 8, s3_bytes, number_format)
        summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes))
    summary_workbook.close()
示例#30
0
文件: nxid.py 项目: ngeraci/pynux
def main(argv=None):
    parser = argparse.ArgumentParser(
        description=
        'nxid finds top level objects in Nuxeo and syncs them up with EZID')

    parser.add_argument('path',
                        nargs=1,
                        help='nuxeo path (folder or object)',
                        type=utf8_arg)

    ezid_group = parser.add_argument_group('minting behaviour flags')
    ezid_group.add_argument(
        '--mint',
        '-m',
        action='store_true',
        help='when an ARK is missing, mint and bind new ARK in EZID')
    ezid_group.add_argument(
        '--create',
        '-c',
        action='store_true',
        help='when an ARK is found in Nuxeo but not EZID, create EZID')
    ezid_group.add_argument(
        '--update',
        '-u',
        action='store_true',
        help='when an ARK is found in Nuxeo and EZID, update EZID')
    ezid_group.add_argument(
        '--no-noop-report',
        action='store_true',
        help='override default behaviour of reporting on noops')
    ezid_group.add_argument('--show-erc',
                            action='store_true',
                            help='show ANVL record that will be sent to EZID')

    conf_group = parser.add_argument_group('EZID configuration and metadata')
    conf_group.add_argument('--ezid-username',
                            help='username for EZID API (overrides rcfile)',
                            type=utf8_arg)
    conf_group.add_argument('--ezid-password',
                            help='password for EZID API (overrides rc file)',
                            type=utf8_arg)
    conf_group.add_argument('--shoulder',
                            help='shoulder (overrides rcfile)',
                            type=utf8_arg)
    conf_group.add_argument('--owner',
                            help='set as _owner for EZID',
                            type=utf8_arg)
    conf_group.add_argument(
        '--status',
        help='set as _status for EZID (public|reserved|unavailable)',
        type=utf8_arg)
    conf_group.add_argument('--publisher',
                            help='set as dc.publisher for EZID',
                            type=utf8_arg)
    conf_group.add_argument('--location',
                            help='set location URL prefix for EZID',
                            type=utf8_arg)

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    # read config out of .pynuxrc file
    username = argv.ezid_username or nx.ezid_conf['username']
    password = argv.ezid_password or nx.ezid_conf['password']
    shoulder = argv.shoulder or nx.ezid_conf['shoulder']
    ezid = EZID.EZIDClient(
        credentials=dict(username=username, password=password))

    # query to select all parent level objects
    documents = nx.nxql(u'''
SELECT * FROM SampleCustomPicture, CustomFile, CustomVideo, CustomAudio
WHERE ecm:path STARTSWITH "{}"
AND ecm:currentLifeCycleState != "deleted"
AND ecm:pos is NULL'''.format(argv.path[0]))

    # if the user gives the full path to a document
    if not any(
            True
            for _ in documents):  # https://stackoverflow.com/a/3114640/1763984
        documents = nx.nxql(u'''
SELECT * FROM SampleCustomPicture, CustomFile, CustomVideo, CustomAudio
WHERE ecm:path = "{}"
AND ecm:currentLifeCycleState != "deleted"
AND ecm:pos is NULL'''.format(argv.path[0]))

    report = not (argv.no_noop_report)

    # main loop
    for item in documents:
        # check id for ARK
        ark = find_ark(item['properties']['ucldc_schema:identifier'])
        path = item['path']

        # if there is an ARK, check for a record in EZID
        ezid_status = None
        if ark is not None:
            ezid_status = check_ezid(ark, ezid)

        ezdata = item_erc_dict(
            item,
            owner=argv.owner,  # _owner
            status=argv.status,  # _status
            publisher=argv.publisher,  # dc.publisher
            location=argv.location  # _target
        )

        if argv.show_erc:
            print(EZID.formatAnvlFromDict(ezdata))
            print('')

        # mint
        if not (ark) and not (ezid_status):
            if argv.mint:
                new_ark = ezid.mint(shoulder, ezdata)
                update_nuxeo(item, nx, new_ark)
                print('✓ mint "{}" {}'.format(path, new_ark))
            elif report:
                print('ℹ noop mint "{}"'.format(path))

        # create
        if ark and not (ezid_status):
            if argv.create:
                ezid.create(ark, ezdata)
                print('✓ create "{}" {}'.format(path, ark))
            elif report:
                print('ℹ noop create "{}" {}'.format(path, ark))

        # update
        if ark and ezid_status:
            owner = get_owner(ezid_status)
            if argv.update:
                ezid.update(ark, ezdata)
                print('✓ update "{}" {}'.format(path, ark))
            elif report:
                print('ℹ noop update "{}" {} {}'.format(path, ark, owner))
示例#31
0
def main(argv=None):

    parser = argparse.ArgumentParser(description='nuxeo metadata via REST API')
    parser.add_argument('path', nargs=1, help="nuxeo document path")
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())
    documents = nx.children(argv.path[0])

    # open the workbook
    workbook = xlsxwriter.Workbook('qa.xlsx')
    header_format = workbook.add_format({
        'bold': True,
    })

    report = workbook.add_worksheet()

    report.set_column(
        0,
        0,
        10,
    )
    report.set_column(
        1,
        2,
        40,
    )
    report.set_column(
        3,
        4,
        80,
    )

    report.write(0, 0, 'nuxeo-uid', header_format)
    report.write(0, 1, 'ucldc_schema:localidentifier', header_format)
    report.write(0, 2, 'filename', header_format)
    report.write(0, 3, 'nuxeo-path', header_format)
    report.write(0, 4, 'title', header_format)

    # document specified on command line
    root_doc = nx.get_metadata(path=argv.path[0])

    report.write(1, 0, root_doc['uid'])
    report.write(1, 3, argv.path[0])

    row = 2
    for document in documents:

        p = document['properties']

        report.write(row, 0, document['uid'])
        report.write(row, 1, p['ucldc_schema:localidentifier'][0])
        if 'file:filename' in p:
            report.write(row, 2, p['file:filename'])
        report.write(row, 3, document['path'].replace(argv.path[0], '', 1))
        report.write(row, 4, document['title'])
        row = row + 1

    # output
    #  path|localid|title
    #

    workbook.close()
示例#32
0
def main(argv=None):
    parser = argparse.ArgumentParser(
        description='nxidbatch mints a batch of ARKs')

    parser.add_argument('batchsize',
                        nargs=1,
                        help='size of ARK batch',
                        type=int)

    ezid_group = parser.add_argument_group('minting behaviour flags')
    ezid_group.add_argument('--mint',
                            '-m',
                            action='store_true',
                            help='mint ARKs without prompt')
    ezid_group.add_argument('--output',
                            '-o',
                            type=lambda x: is_valid_file(parser, x),
                            required=True)

    conf_group = parser.add_argument_group('EZID configuration and metadata')
    conf_group.add_argument('--ezid-username',
                            help='username for EZID API (overrides rcfile)',
                            type=utf8_arg)
    conf_group.add_argument('--ezid-password',
                            help='password for EZID API (overrides rc file)',
                            type=utf8_arg)
    conf_group.add_argument('--shoulder',
                            help='shoulder (overrides rcfile)',
                            type=utf8_arg)
    conf_group.add_argument('--owner',
                            help='set as _owner for EZID',
                            type=utf8_arg)
    conf_group.add_argument(
        '--status',
        help=
        'set as _status for EZID (default reserved, or public|unavailable)',
        default="reserved",
        type=utf8_arg)
    conf_group.add_argument('--publisher',
                            help='set as dc.publisher for EZID',
                            type=utf8_arg)

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    # read config out of .pynuxrc file
    username = argv.ezid_username or nx.ezid_conf['username']
    password = argv.ezid_password or nx.ezid_conf['password']
    shoulder = argv.shoulder or nx.ezid_conf['shoulder']
    ezid = EZID.EZIDClient(
        credentials=dict(username=username, password=password))

    if argv.mint:
        output = open(argv.output, 'w')
    else:
        # https://stackoverflow.com/a/26514097/1763984
        answer = raw_input(
            'Mint a batch {} of {} ARKs with prefix {} with EZID? [y/n]'.
            format(argv.output, argv.batchsize, shoulder))
        if not answer or answer[0].lower() != 'y':
            print('You did not indicate approval')
            exit(1)
        else:
            output = open(argv.output, 'w')

    for __ in range(argv.batchsize[0]):

        # mint
        new_ark = ezid.mint(shoulder)
        print(new_ark, file=output)

    if not (argv.mint):
        print('done')
示例#33
0
def main(argv=None):
    parser = argparse.ArgumentParser(description='extent stats via Nuxeo REST API')
    parser.add_argument('path', nargs=1, help="root path")
    parser.add_argument('outdir', nargs=1,)
    parser.add_argument('--no-s3-check', dest='s3_check', action='store_false')
    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    # look up all the files in S3, so we can double check that all
    # the files exist as we loop through Nuxeo
    file_check = None
    s3_bytes = s3_count = 0
    if argv.s3_check:
        from boto import s3
        from boto.s3.connection import OrdinaryCallingFormat
        file_check = {}
        conn = s3.connect_to_region('us-west-2', calling_format=OrdinaryCallingFormat())
        bucket = conn.get_bucket('data.nuxeo.cdlib.org.oregon')
        for count, key in enumerate(bucket.list()):
            file_check[key.name] = key.size
            if count % 50000 == 0:
                print('{0} s3 files memorized'.format(count), file=sys.stderr)
            s3_bytes = s3_bytes + key.size
        s3_count = len(file_check)

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    campuses = nx.children(argv.path[0])

    summary_workbook = xlsxwriter.Workbook(os.path.join(argv.outdir[0],'{}-summary.xlsx'.format(today)))
    # cell formats
    header_format = summary_workbook.add_format({'bold': True, })
    number_format = summary_workbook.add_format()
    number_format.set_num_format('#,##0')

    summary_worksheet = summary_workbook.add_worksheet('summary')
    # headers
    summary_worksheet.write(0, 1, 'deduplicated files', header_format)
    summary_worksheet.write(0, 2, 'deduplicated bytes', header_format)
    summary_worksheet.write(0, 4, 'total files', header_format)
    summary_worksheet.write(0, 5, 'total bytes', header_format)
    if argv.s3_check:
        summary_worksheet.write(0, 7, 'files on S3', header_format)
        summary_worksheet.write(0, 8, 'bytes on S3', header_format)
    # widths
    summary_worksheet.set_column(0, 1, 10, )
    summary_worksheet.set_column(2, 2, 25, )
    summary_worksheet.set_column(3, 4, 10, )
    summary_worksheet.set_column(5, 5, 25, )
    summary_worksheet.set_column(6, 7, 10, )
    summary_worksheet.set_column(8, 8, 25, )
    summary_worksheet.set_column(9, 9, 10, )
    true_count = dedup_total = total_count = running_total = 0
    row = 1
    for campus in campuses:
        basename = os.path.basename(campus['path'])
        documents = nx.nxql(
            'select * from Document where ecm:path startswith"{0}"'.format(campus['path'])
        )
        (this_count, this_total, dedup_count, dedup_bytes) = forCampus(documents, basename, file_check, argv.outdir[0])
        summary_worksheet.write(row, 0, basename)
        summary_worksheet.write(row, 1, dedup_count, number_format)
        summary_worksheet.write(row, 2, dedup_bytes, number_format)
        summary_worksheet.write(row, 3, sizeof_fmt(dedup_bytes))
        summary_worksheet.write(row, 4, this_count, number_format)
        summary_worksheet.write(row, 5, this_total, number_format)
        summary_worksheet.write(row, 6, sizeof_fmt(this_total))
        total_count = total_count + this_count  # number of files
        running_total = running_total + this_total  # number of bytes
        true_count = true_count + dedup_count
        dedup_total = dedup_total + dedup_bytes  # number of bytes
        row = row + 1
    summary_worksheet.write(row, 0, '{}'.format(today))
    summary_worksheet.write(row, 1, true_count, number_format)
    summary_worksheet.write(row, 2, dedup_total, number_format)
    summary_worksheet.write(row, 3, sizeof_fmt(dedup_total))
    summary_worksheet.write(row, 4, total_count, number_format)
    summary_worksheet.write(row, 5, running_total, number_format)
    summary_worksheet.write(row, 6, sizeof_fmt(running_total))
    if argv.s3_check:
        summary_worksheet.write(row, 7, s3_count, number_format)
        summary_worksheet.write(row, 8, s3_bytes, number_format)
        summary_worksheet.write(row, 9, sizeof_fmt(s3_bytes))
    summary_workbook.close()
示例#34
0
文件: nxidbatch.py 项目: ucldc/pynux
def main(argv=None):
    parser = argparse.ArgumentParser(
        description='nxidbatch mints a batch of ARKs')

    parser.add_argument(
        'batchsize', nargs=1, help='size of ARK batch', type=int)

    ezid_group = parser.add_argument_group('minting behaviour flags')
    ezid_group.add_argument(
        '--mint', '-m', action='store_true', help='mint ARKs without prompt')
    ezid_group.add_argument(
        '--output',
        '-o',
        type=lambda x: is_valid_file(parser, x),
        required=True)

    conf_group = parser.add_argument_group('EZID configuration and metadata')
    conf_group.add_argument(
        '--ezid-username',
        help='username for EZID API (overrides rcfile)',
        type=utf8_arg)
    conf_group.add_argument(
        '--ezid-password',
        help='password for EZID API (overrides rc file)',
        type=utf8_arg)
    conf_group.add_argument(
        '--shoulder', help='shoulder (overrides rcfile)', type=utf8_arg)
    conf_group.add_argument(
        '--owner', help='set as _owner for EZID', type=utf8_arg)
    conf_group.add_argument(
        '--status',
        help=
        'set as _status for EZID (default reserved, or public|unavailable)',
        default="reserved",
        type=utf8_arg)
    conf_group.add_argument(
        '--publisher', help='set as dc.publisher for EZID', type=utf8_arg)

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()

    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    # read config out of .pynuxrc file
    username = argv.ezid_username or nx.ezid_conf['username']
    password = argv.ezid_password or nx.ezid_conf['password']
    shoulder = argv.shoulder or nx.ezid_conf['shoulder']
    ezid = EZID.EZIDClient(credentials=dict(
        username=username, password=password))

    if argv.mint:
        output = open(argv.output, 'w')
    else:
        # https://stackoverflow.com/a/26514097/1763984
        answer = raw_input(
            'Mint a batch {} of {} ARKs with prefix {} with EZID? [y/n]'.
            format(argv.output, argv.batchsize, shoulder))
        if not answer or answer[0].lower() != 'y':
            print('You did not indicate approval')
            exit(1)
        else:
            output = open(argv.output, 'w')

    for __ in range(argv.batchsize[0]):

        # mint
        new_ark = ezid.mint(shoulder)
        print(new_ark, file=output)

    if not (argv.mint):
        print('done')
示例#35
0
文件: nxid.py 项目: ucldc/pynux
def main(argv=None):
    parser = argparse.ArgumentParser(
        description='nxid finds top level objects in Nuxeo and syncs them up with EZID')

    parser.add_argument(
        'path', nargs=1, help='nuxeo path (folder or object)', type=utf8_arg)

    ezid_group = parser.add_argument_group('minting behaviour flags')
    ezid_group.add_argument(
        '--mint', '-m',
        action='store_true',
        help='when an ARK is missing, mint and bind new ARK in EZID')
    ezid_group.add_argument(
        '--create', '-c',
        action='store_true',
        help='when an ARK is found in Nuxeo but not EZID, create EZID')
    ezid_group.add_argument(
        '--update', '-u',
        action='store_true',
        help='when an ARK is found in Nuxeo and EZID, update EZID')
    ezid_group.add_argument(
        '--no-noop-report',
        action='store_true',
        help='override default behaviour of reporting on noops')
    ezid_group.add_argument(
        '--show-erc',
        action='store_true',
        help='show ANVL record that will be sent to EZID')

    conf_group = parser.add_argument_group('EZID configuration and metadata')
    conf_group.add_argument(
        '--ezid-username', help='username for EZID API (overrides rcfile)', type=utf8_arg)
    conf_group.add_argument(
        '--ezid-password', help='password for EZID API (overrides rc file)', type=utf8_arg)
    conf_group.add_argument(
        '--shoulder', help='shoulder (overrides rcfile)', type=utf8_arg)
    conf_group.add_argument(
        '--owner', help='set as _owner for EZID', type=utf8_arg)
    conf_group.add_argument(
        '--status', help='set as _status for EZID (public|reserved|unavailable)', type=utf8_arg)
    conf_group.add_argument(
        '--publisher', help='set as dc.publisher for EZID', type=utf8_arg)
    conf_group.add_argument(
        '--location', help='set location URL prefix for EZID', type=utf8_arg)

    utils.get_common_options(parser)
    if argv is None:
        argv = parser.parse_args()


    nx = utils.Nuxeo(rcfile=argv.rcfile, loglevel=argv.loglevel.upper())

    # read config out of .pynuxrc file
    username = argv.ezid_username or nx.ezid_conf['username']
    password = argv.ezid_password or nx.ezid_conf['password']
    shoulder = argv.shoulder or nx.ezid_conf['shoulder']
    ezid = EZID.EZIDClient(credentials=dict(username=username, password=password))

    # query to select all parent level objects
    documents = nx.nxql(u'''
SELECT * FROM SampleCustomPicture, CustomFile, CustomVideo, CustomAudio
WHERE ecm:path STARTSWITH "{}"
AND ecm:currentLifeCycleState != "deleted"
AND ecm:pos is NULL'''.format(argv.path[0]))

    # if the user gives the full path to a document
    if not any(True for _ in documents):  # https://stackoverflow.com/a/3114640/1763984
        documents = nx.nxql(u'''
SELECT * FROM SampleCustomPicture, CustomFile, CustomVideo, CustomAudio
WHERE ecm:path = "{}"
AND ecm:currentLifeCycleState != "deleted"
AND ecm:pos is NULL'''.format(argv.path[0]))

    report = not(argv.no_noop_report)

    # main loop
    for item in documents:
        # check id for ARK
        ark = find_ark(item['properties']['ucldc_schema:identifier'])
        path = item['path']

        # if there is an ARK, check for a record in EZID
        ezid_status = None
        if ark is not None:
            ezid_status = check_ezid(ark, ezid)

        ezdata = item_erc_dict(
            item,
            owner=argv.owner,            # _owner
            status=argv.status,          # _status
            publisher=argv.publisher,    # dc.publisher
            location=argv.location       # _target
        )

        if argv.show_erc:
            print(EZID.formatAnvlFromDict(ezdata))
            print('')

        # mint
        if not(ark) and not(ezid_status):
            if argv.mint:
                new_ark = ezid.mint(shoulder, ezdata)
                update_nuxeo(item, nx, new_ark)
                print('✓ mint "{}" {}'.format(path, new_ark))
            elif report:
                print('ℹ noop mint "{}"'.format(path))

        # create
        if ark and not(ezid_status):
            if argv.create:
                ezid.create(ark, ezdata)
                print('✓ create "{}" {}'.format(path, ark))
            elif report:
                print('ℹ noop create "{}" {}'.format(path, ark))

        # update
        if ark and ezid_status:
            owner = get_owner(ezid_status)
            if argv.update:
                ezid.update(ark, ezdata)
                print('✓ update "{}" {}'.format(path, ark))
            elif report:
                print('ℹ noop update "{}" {} {}'.format(path, ark, owner))