def main(): import sys from argparselocal import ArgumentParser arglist = sys.argv[1:] mydir = os.path.abspath(os.path.dirname(__file__)) parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time') parser.add_argument('-input', default=os.path.join(mydir, 'conf', 'pronom-xml.zip'), help='input file, a zip containing Pronom xml files') parser.add_argument('-output', default=os.path.join(mydir, 'conf', 'formats.xml'), help='output file') parser.add_argument('-puid', default=None, help='a particular PUID record to extract') # PROCESS ARGUMENTS args = parser.parse_args(arglist) # print os.path.abspath(args.input), os.path.abspath(args.output) info = FormatInfo(args.input) info.load_pronom_xml(args.puid) info.save(args.output) print >> sys.stderr, 'Converted {0} PRONOM formats to FIDO signatures'.format(len(info.formats))
buf.write('.{' + offset) if maxoffset != None: buf.write(',' + maxoffset) buf.write('}') elif maxoffset != None: buf.write('.{0,' + maxoffset + '}') buf.write('\\Z') val = buf.getvalue() buf.close() return val if __name__ == '__main__': import sys from argparselocal import ArgumentParser arglist = sys.argv[1:] mydir = os.path.abspath(os.path.dirname(__file__)) parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time') parser.add_argument('-input', default=os.path.join(mydir, 'conf', 'pronom-xml.zip'), help='input file, a zip containing Pronom xml files') parser.add_argument('-output', default=os.path.join(mydir, 'conf', 'formats.xml'), help='output file') parser.add_argument('-puid', default=None, help='a particular PUID record to extract') # PROCESS ARGUMENTS args = parser.parse_args(arglist) # print os.path.abspath(args.input), os.path.abspath(args.output) info = FormatInfo(args.input) info.load_pronom_xml(args.puid) info.save(args.output) print >> sys.stderr, 'FIDO: {0} formats'.format(len(info.formats))
def main(arglist=None): # The argparse package was introduced in 2.7 t0 = time.clock() from argparselocal import ArgumentParser if arglist == None: arglist = sys.argv[1:] if len(arglist) == False: arglist.append("-h") parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@') parser.add_argument('-v', default=False, action='store_true', help='show version information') parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly') parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories') parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip and tar files') parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files') group = parser.add_mutually_exclusive_group() group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin') group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.') parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification') parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification') parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.') parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt') parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['bufsize'])+' bytes)') parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['container_bufsize'])+' bytes)') parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.') parser.add_argument('-confdir', default=None, help='configuration directory to load_fido_xml, for example, the format specifications from.') mydir = os.path.abspath(os.path.dirname(__file__)) versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versions_file']) try: versions = VET.parse(versionsFile) except Exception, e: sys.stderr.write("An error occured loading versions.xml:\n{0}".format(e)) sys.exit()
if offset != '0': buf.write('.{' + offset) if maxoffset != None: buf.write(',' + maxoffset) buf.write('}') elif maxoffset != None: buf.write('.{0,' + maxoffset + '}') buf.write('\\Z') val = buf.getvalue() buf.close() return val if __name__ == '__main__': import sys from argparselocal import ArgumentParser arglist = sys.argv[1:] mydir = os.path.abspath(os.path.dirname(__file__)) parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time') parser.add_argument('-input', default=os.path.join(mydir, 'conf', 'pronom-xml.zip'), help='input file, a zip containing Pronom xml files') parser.add_argument('-output', default=os.path.join(mydir, 'conf', 'formats.xml'), help='output file') # PROCESS ARGUMENTS args = parser.parse_args(arglist) # print os.path.abspath(args.input), os.path.abspath(args.output) info = FormatInfo(args.input) info.load_pronom_xml() info.save(args.output) print >> sys.stderr, 'FIDO: {0} formats'.format(len(info.formats))
def main(arglist=None): # The argparse package was introduced in 2.7 from argparselocal import ArgumentParser if arglist == None: arglist = sys.argv[1:] parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'],fromfile_prefix_chars='@') parser.add_argument('-v', default=False, action='store_true', help='show version information') parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly') parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories') parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip files') group = parser.add_mutually_exclusive_group() group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin') group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.') parser.add_argument('-formats', metavar='PUIDS', default=None, help='comma separated string of formats to use in identification') parser.add_argument('-excludeformats', metavar='PUIDS', default=None, help='comma separated string of formats not to use in identification') parser.add_argument('-extension', default=False, action='store_true', help='use file extensions if the patterns fail. May return many matches.') parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.') parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt') parser.add_argument('-bufsize', type=int, default=None, help='size of the buffer to match against') parser.add_argument('-show', default=False, help='show "format" or "defaults"') parser.add_argument('-xmlformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.') parser.add_argument('-confdir', default=None, help='configuration directory to load, for example, the format specifications from.') # PROCESS ARGUMENTS args = parser.parse_args(arglist) if args.v : print "fido/" + version exit(0) if args.show == 'defaults': for (k, v) in defaults.iteritems(): print k, '=', repr(v) exit(0) if args.matchprintf != None: args.matchprintf = args.matchprintf.decode('string_escape') if args.nomatchprintf != None: args.nomatchprintf = args.nomatchprintf.decode('string_escape') t0 = time.clock() fido = Fido(quiet=args.q, bufsize=args.bufsize, extension=args.extension, printmatch=args.matchprintf, printnomatch=args.nomatchprintf, zip=args.zip, conf_dir=args.confdir) #TODO: Allow conf options to be dis-included if args.xmlformats: for file in args.xmlformats.split(','): fido.load(file) #TODO: remove from maps if args.formats: args.formats = args.formats.split(',') fido.formats = [f for f in fido.formats if f.find('puid').text in args.formats] elif args.excludeformats: args.excludeformats = args.excludeformats.split(',') fido.formats = [f for f in fido.formats if f.find('puid') not in args.excludeformats] if args.show == 'formats': for format in fido.formats: print ET.tostring(format, encoding='UTF-8') exit(0) if args.input == '-': args.files = sys.stdin elif args.input: args.files = open(args.input, 'r') # RUN try: if (not args.input) and len(args.files) == 1 and args.files[0] == '-': if fido.zip == True: raise RuntimeError("Multiple content read from stdin not yet supported.") exit(1) fido.identify_multi_object_stream(sys.stdin) else: fido.identify_stream(sys.stdin) else: for file in list_files(args.files, args.recurse): fido.identify_file(file) except KeyboardInterrupt: msg = "FIDO: Interrupt during:\n File: {0}\n Format: Puid={1.Identifier} [{1.FormatName}]\n Sig: ID={2.SignatureID} [{2.SignatureName}]\n Pat={3.ByteSequenceID} {3.regexstring!r}" print >> sys.stderr, msg.format(fido.current_file, fido.current_format, fido.current_sig, fido.current_pat) exit(1) if not args.q: sys.stdout.flush() fido.print_summary(time.clock() - t0)
def main(arglist=None): # The argparse package was introduced in 2.7 t0 = time.clock() from argparselocal import ArgumentParser, RawTextHelpFormatter if arglist == None: arglist = sys.argv[1:] if len(arglist) == False: arglist.append("-h") parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@', formatter_class=RawTextHelpFormatter) parser.add_argument('-v', default=False, action='store_true', help='show version information') parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly') parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories') parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip and tar files') parser.add_argument( '-nocontainer', default=False, action='store_true', help= 'disable deep scan of container documents, increases speed but may reduce accuracy with big files' ) parser.add_argument( '-pronom_only', default=False, action='store_true', help= 'disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results' ) group = parser.add_mutually_exclusive_group() group.add_argument( '-input', default=False, help= 'file containing a list of files to check, one per line. - means stdin' ) group.add_argument( 'files', nargs='*', default=[], metavar='FILE', help= 'files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.' ) parser.add_argument('-filename', default=None, help='filename if file contents passed through STDIN') parser.add_argument( '-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification') parser.add_argument( '-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification') parser.add_argument( '-matchprintf', metavar='FORMATSTRING', default=None, help= 'format string (Python style) to use on match. See nomatchprintf, README.txt.' ) parser.add_argument( '-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt') parser.add_argument( '-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['bufsize']) + ' bytes)') parser.add_argument( '-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['container_bufsize']) + ' bytes)') parser.add_argument( '-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.') parser.add_argument( '-confdir', default=None, help= 'configuration directory to load_fido_xml, for example, the format specifications from.' ) # what is this doing here only once? #mydir = os.path.abspath(os.path.dirname(__file__)) # PROCESS ARGUMENTS args = parser.parse_args(arglist) # print args # sys.exit() # process confdir # load versions.xml # and stick it in defaults if args.confdir: versionsFile = os.path.join(os.path.abspath(args.confdir), defaults['versions_file']) else: versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versions_file']) try: versions = VET.parse(versionsFile) except Exception, e: sys.stderr.write( "An error occured loading versions.xml:\n{0}".format(e)) sys.exit()
if arg != None: arglist = arg else: arglist = sys.argv[1:] #print arglist #exit() mydir = os.path.abspath(os.path.dirname(__file__)) # parse version file to fetch versions versionsFile = os.path.join(mydir, 'conf', 'versions.xml') try: versions = VET.parse(versionsFile) except Exception, e: sys.stderr.write("An error occured loading versions.xml:\n{0}".format(e)) sys.exit() xml_pronomSignature = os.path.join(mydir, 'conf', versions.find('pronomSignature').text) xml_pronomZipFile = os.path.join(mydir, 'conf', "pronom-xml-v{0}.zip".format(versions.find('pronomVersion').text)) parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time') parser.add_argument('-input', default=xml_pronomZipFile, help='input file, a zip containing Pronom xml files') parser.add_argument('-output', default=xml_pronomSignature, help='output file') parser.add_argument('-puid', default=None, help='a particular PUID record to extract') # PROCESS ARGUMENTS args = parser.parse_args(arglist) # print os.path.abspath(args.input), os.path.abspath(args.output) info = FormatInfo(args.input) info.load_pronom_xml(args.puid) info.save(args.output) print >> sys.stderr, 'Converted {0} PRONOM formats to FIDO signatures'.format(len(info.formats)) if __name__ == '__main__': main()
def main(arglist=None): # The argparse package was introduced in 2.7 t0 = time.clock() from argparselocal import ArgumentParser if arglist == None: arglist = sys.argv[1:] if len(arglist) == False: arglist.append("-h") parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@') parser.add_argument('-v', default=False, action='store_true', help='show version information') parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly') parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories') parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip and tar files') parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files') group = parser.add_mutually_exclusive_group() group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin') group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.') parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification') parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification') parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.') parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt') parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['bufsize'])+' bytes)') parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['container_bufsize'])+' bytes)') parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.') parser.add_argument('-confdir', default=None, help='configuration directory to load_fido_xml, for example, the format specifications from.') mydir = os.path.abspath(os.path.dirname(__file__)) # PROCESS ARGUMENTS args = parser.parse_args(arglist) if args.v : sys.stdout.write("fido/" + version + "\n") sys.exit(0) if args.matchprintf != None: args.matchprintf = args.matchprintf.decode('string_escape') if args.nomatchprintf != None: args.nomatchprintf = args.nomatchprintf.decode('string_escape') fido = Fido(quiet=args.q, bufsize=args.bufsize, printmatch=args.matchprintf, printnomatch=args.nomatchprintf, zip=args.zip, nocontainer = args.nocontainer, conf_dir=args.confdir) #TODO: Allow conf options to be dis-included if args.loadformats: for file in args.loadformats.split(','): fido.load_fido_xml(file) #TODO: remove from maps if args.useformats: args.useformats = args.useformats.split(',') fido.formats = [f for f in fido.formats if f.find('puid').text in args.useformats] elif args.nouseformats: args.nouseformats = args.nouseformats.split(',') fido.formats = [f for f in fido.formats if f.find('puid').text not in args.nouseformats] # Set up to use stdin, or open input files: if args.input == '-': args.files = sys.stdin elif args.input: args.files = open(args.input, 'r') # RUN try: if (not args.input) and len(args.files) == 1 and args.files[0] == '-': if fido.zip == True: raise RuntimeError("Multiple content read from stdin not yet supported.") sys.exit(1) fido.identify_multi_object_stream(sys.stdin) else: fido.identify_stream(sys.stdin) else: for file in list_files(args.files, args.recurse): fido.identify_file(file) except KeyboardInterrupt: # MdR: this seems to be broken? msg = "FIDO: Interrupt during:\n File: {0}\n Format: Puid={1.Identifier} [{1.FormatName}]\n Sig: ID={2.SignatureID} [{2.SignatureName}]\n Pat={3.ByteSequenceID} {3.regexstring!r}" sys.stderr.write(msg.format(fido.current_file, fido.current_format, fido.current_sig, fido.current_pat)) sys.exit(1) if not args.q: sys.stdout.flush() fido.print_summary(time.clock() - t0) sys.stderr.flush()
buf.write(calculate_repetition(".", pos, offset, maxoffset)) buf.write("\\Z") val = buf.getvalue() buf.close() return val if __name__ == "__main__": import sys from argparselocal import ArgumentParser arglist = sys.argv[1:] mydir = os.path.abspath(os.path.dirname(__file__)) parser = ArgumentParser(description="Produce the fido format xml that is loaded at run-time") parser.add_argument( "-input", default=os.path.join(mydir, "conf", "pronom-xml.zip"), help="input file, a zip containing Pronom xml files", ) parser.add_argument("-output", default=os.path.join(mydir, "conf", "formats.xml"), help="output file") parser.add_argument("-puid", default=None, help="a particular PUID record to extract") # PROCESS ARGUMENTS args = parser.parse_args(arglist) # print os.path.abspath(args.input), os.path.abspath(args.output) info = FormatInfo(args.input) info.load_pronom_xml(args.puid) info.save(args.output) print >> sys.stderr, "FIDO: {0} formats".format(len(info.formats))