def main(arglist=None): # The argparse package was introduced in 2.7 t0 = time.clock() from argparselocal import ArgumentParser, RawTextHelpFormatter if arglist == None: arglist = sys.argv[1:] if len(arglist) == False: arglist.append("-h") parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@', formatter_class=RawTextHelpFormatter) parser.add_argument('-v', default=False, action='store_true', help='show version information') parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly') parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories') parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip and tar files') parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files') parser.add_argument('-pronom_only', default=False, action='store_true', help='disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results') group = parser.add_mutually_exclusive_group() group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin') group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.') parser.add_argument('-filename', default=None, help='filename if file contents passed through STDIN') parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification') parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification') parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.') parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt') parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['bufsize'])+' bytes)') parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['container_bufsize'])+' bytes)') parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.') parser.add_argument('-confdir', default=None, help='configuration directory to load_fido_xml, for example, the format specifications from.') # what is this doing here only once? #mydir = os.path.abspath(os.path.dirname(__file__)) # PROCESS ARGUMENTS args = parser.parse_args(arglist) # print args # sys.exit() # process confdir # load versions.xml # and stick it in defaults if args.confdir: versionsFile = os.path.join(os.path.abspath(args.confdir), defaults['versions_file']) else: versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versions_file']) try: versions = VET.parse(versionsFile) except Exception, e: sys.stderr.write("An error occured loading versions.xml:\n{0}".format(e)) sys.exit()
if arg != None: arglist = arg else: arglist = sys.argv[1:] #print arglist #exit() mydir = os.path.abspath(os.path.dirname(__file__)) # parse version file to fetch versions versionsFile = os.path.join(mydir, 'conf', 'versions.xml') try: versions = VET.parse(versionsFile) except Exception, e: sys.stderr.write("An error occured loading versions.xml:\n{0}".format(e)) sys.exit() xml_pronomSignature = os.path.join(mydir, 'conf', versions.find('pronomSignature').text) xml_pronomZipFile = os.path.join(mydir, 'conf', "pronom-xml-v{0}.zip".format(versions.find('pronomVersion').text)) parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time') parser.add_argument('-input', default=xml_pronomZipFile, help='input file, a zip containing Pronom xml files') parser.add_argument('-output', default=xml_pronomSignature, help='output file') parser.add_argument('-puid', default=None, help='a particular PUID record to extract') # PROCESS ARGUMENTS args = parser.parse_args(arglist) # print os.path.abspath(args.input), os.path.abspath(args.output) info = FormatInfo(args.input) info.load_pronom_xml(args.puid) info.save(args.output) print >> sys.stderr, 'Converted {0} PRONOM formats to FIDO signatures'.format(len(info.formats)) if __name__ == '__main__': main()