Пример #1
0
def main():
    import sys
    from argparselocal import ArgumentParser  
    arglist = sys.argv[1:]
        
    mydir = os.path.abspath(os.path.dirname(__file__))
    parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time')
    parser.add_argument('-input', default=os.path.join(mydir, 'conf', 'pronom-xml.zip'), help='input file, a zip containing Pronom xml files')
    parser.add_argument('-output', default=os.path.join(mydir, 'conf', 'formats.xml'), help='output file')
    parser.add_argument('-puid', default=None, help='a particular PUID record to extract')
    
    # PROCESS ARGUMENTS
    args = parser.parse_args(arglist)
    # print os.path.abspath(args.input), os.path.abspath(args.output)
    info = FormatInfo(args.input)
    info.load_pronom_xml(args.puid)
    info.save(args.output)
    print >> sys.stderr, 'Converted {0} PRONOM formats to FIDO signatures'.format(len(info.formats))
Пример #2
0
            buf.write('.{' + offset)
            if maxoffset != None:
                buf.write(',' + maxoffset)
            buf.write('}')
        elif maxoffset != None:
            buf.write('.{0,' + maxoffset + '}')
        buf.write('\\Z')
    val = buf.getvalue()
    buf.close()
    return val
    
if __name__ == '__main__':
    import sys
    from argparselocal import ArgumentParser  
    arglist = sys.argv[1:]
        
    mydir = os.path.abspath(os.path.dirname(__file__))
    parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time')
    parser.add_argument('-input', default=os.path.join(mydir, 'conf', 'pronom-xml.zip'), help='input file, a zip containing Pronom xml files')
    parser.add_argument('-output', default=os.path.join(mydir, 'conf', 'formats.xml'), help='output file')
    parser.add_argument('-puid', default=None, help='a particular PUID record to extract')
    
    # PROCESS ARGUMENTS
    args = parser.parse_args(arglist)
    # print os.path.abspath(args.input), os.path.abspath(args.output)
    info = FormatInfo(args.input)
    info.load_pronom_xml(args.puid)
    info.save(args.output)
    print >> sys.stderr, 'FIDO: {0} formats'.format(len(info.formats))
    
Пример #3
0
def main(arglist=None):
    # The argparse package was introduced in 2.7
    t0 = time.clock() 
    from argparselocal import ArgumentParser
    if arglist == None:
        arglist = sys.argv[1:]
    if len(arglist) == False:
        arglist.append("-h")        
    parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@')
    parser.add_argument('-v', default=False, action='store_true', help='show version information')
    parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly')
    parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories')
    parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip and tar files')
    parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin')
    group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check.  If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.')
    parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification')
    parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification')
    parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.')
    parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt')
    parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['bufsize'])+' bytes)')
    parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['container_bufsize'])+' bytes)')
    
    parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.')
    parser.add_argument('-confdir', default=None, help='configuration directory to load_fido_xml, for example, the format specifications from.')
       
    mydir = os.path.abspath(os.path.dirname(__file__))

    versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']), defaults['versions_file'])
    try:
        versions = VET.parse(versionsFile)
    except Exception, e:
        sys.stderr.write("An error occured loading versions.xml:\n{0}".format(e))
        sys.exit()
Пример #4
0
        if offset != '0':
            buf.write('.{' + offset)
            if maxoffset != None:
                buf.write(',' + maxoffset)
            buf.write('}')
        elif maxoffset != None:
            buf.write('.{0,' + maxoffset + '}')
        buf.write('\\Z')
    val = buf.getvalue()
    buf.close()
    return val
    
if __name__ == '__main__':
    import sys
    from argparselocal import ArgumentParser  
    arglist = sys.argv[1:]
        
    mydir = os.path.abspath(os.path.dirname(__file__))
    parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time')
    parser.add_argument('-input', default=os.path.join(mydir, 'conf', 'pronom-xml.zip'), help='input file, a zip containing Pronom xml files')
    parser.add_argument('-output', default=os.path.join(mydir, 'conf', 'formats.xml'), help='output file')
    
    # PROCESS ARGUMENTS
    args = parser.parse_args(arglist)
    # print os.path.abspath(args.input), os.path.abspath(args.output)
    info = FormatInfo(args.input)
    info.load_pronom_xml()
    info.save(args.output)
    print >> sys.stderr, 'FIDO: {0} formats'.format(len(info.formats))
    
Пример #5
0
def main(arglist=None):
    # The argparse package was introduced in 2.7 
    from argparselocal import ArgumentParser  
    if arglist == None:
        arglist = sys.argv[1:]
        
    parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'],fromfile_prefix_chars='@')
    parser.add_argument('-v', default=False, action='store_true', help='show version information')
    parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly')
    parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories')
    parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip files')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin')
    group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check.  If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.')
    parser.add_argument('-formats', metavar='PUIDS', default=None, help='comma separated string of formats to use in identification')
    parser.add_argument('-excludeformats', metavar='PUIDS', default=None, help='comma separated string of formats not to use in identification')
    parser.add_argument('-extension', default=False, action='store_true', help='use file extensions if the patterns fail.  May return many matches.')
    parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.')
    parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt')
    parser.add_argument('-bufsize', type=int, default=None, help='size of the buffer to match against')
    parser.add_argument('-show', default=False, help='show "format" or "defaults"')
    parser.add_argument('-xmlformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.')
    parser.add_argument('-confdir', default=None, help='configuration directory to load, for example, the format specifications from.')
        
    # PROCESS ARGUMENTS
    args = parser.parse_args(arglist)
    
    if args.v :
        print "fido/" + version
        exit(0)
    if args.show == 'defaults':
        for (k, v) in defaults.iteritems():
            print k, '=', repr(v)
        exit(0)
    if args.matchprintf != None:
        args.matchprintf = args.matchprintf.decode('string_escape')
    if args.nomatchprintf != None:
        args.nomatchprintf = args.nomatchprintf.decode('string_escape')
    t0 = time.clock()
    fido = Fido(quiet=args.q, bufsize=args.bufsize, extension=args.extension,
                printmatch=args.matchprintf, printnomatch=args.nomatchprintf, zip=args.zip, conf_dir=args.confdir)
    
    #TODO: Allow conf options to be dis-included
    if args.xmlformats:
        for file in args.xmlformats.split(','):
            fido.load(file)
        
    #TODO: remove from maps
    if args.formats:
        args.formats = args.formats.split(',')
        fido.formats = [f for f in fido.formats if f.find('puid').text in args.formats]
    elif args.excludeformats:
        args.excludeformats = args.excludeformats.split(',')
        fido.formats = [f for f in fido.formats if f.find('puid') not in args.excludeformats]
    
    if args.show == 'formats':
        for format in fido.formats:
            print ET.tostring(format, encoding='UTF-8')
        exit(0)
        
    if args.input == '-':
        args.files = sys.stdin
    elif args.input:
        args.files = open(args.input, 'r')
    
    # RUN
    try:
        if (not args.input) and len(args.files) == 1 and args.files[0] == '-':
            if fido.zip == True:
                raise RuntimeError("Multiple content read from stdin not yet supported.")
                exit(1)
                fido.identify_multi_object_stream(sys.stdin)
            else:
                fido.identify_stream(sys.stdin)
        else:
            for file in list_files(args.files, args.recurse):
                fido.identify_file(file)
    except KeyboardInterrupt:
        msg = "FIDO: Interrupt during:\n  File: {0}\n  Format: Puid={1.Identifier} [{1.FormatName}]\n  Sig: ID={2.SignatureID} [{2.SignatureName}]\n  Pat={3.ByteSequenceID} {3.regexstring!r}"
        print >> sys.stderr, msg.format(fido.current_file, fido.current_format, fido.current_sig, fido.current_pat)
        exit(1)
        
    if not args.q:
        sys.stdout.flush()
        fido.print_summary(time.clock() - t0)
Пример #6
0
Файл: fido.py Проект: 3dox/fido
def main(arglist=None):
    # The argparse package was introduced in 2.7
    t0 = time.clock()
    from argparselocal import ArgumentParser, RawTextHelpFormatter
    if arglist == None:
        arglist = sys.argv[1:]
    if len(arglist) == False:
        arglist.append("-h")
    parser = ArgumentParser(description=defaults['description'],
                            epilog=defaults['epilog'],
                            fromfile_prefix_chars='@',
                            formatter_class=RawTextHelpFormatter)
    parser.add_argument('-v',
                        default=False,
                        action='store_true',
                        help='show version information')
    parser.add_argument('-q',
                        default=False,
                        action='store_true',
                        help='run (more) quietly')
    parser.add_argument('-recurse',
                        default=False,
                        action='store_true',
                        help='recurse into subdirectories')
    parser.add_argument('-zip',
                        default=False,
                        action='store_true',
                        help='recurse into zip and tar files')
    parser.add_argument(
        '-nocontainer',
        default=False,
        action='store_true',
        help=
        'disable deep scan of container documents, increases speed but may reduce accuracy with big files'
    )
    parser.add_argument(
        '-pronom_only',
        default=False,
        action='store_true',
        help=
        'disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results'
    )
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '-input',
        default=False,
        help=
        'file containing a list of files to check, one per line. - means stdin'
    )
    group.add_argument(
        'files',
        nargs='*',
        default=[],
        metavar='FILE',
        help=
        'files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.'
    )
    parser.add_argument('-filename',
                        default=None,
                        help='filename if file contents passed through STDIN')
    parser.add_argument(
        '-useformats',
        metavar='INCLUDEPUIDS',
        default=None,
        help='comma separated string of formats to use in identification')
    parser.add_argument(
        '-nouseformats',
        metavar='EXCLUDEPUIDS',
        default=None,
        help='comma separated string of formats not to use in identification')
    parser.add_argument(
        '-matchprintf',
        metavar='FORMATSTRING',
        default=None,
        help=
        'format string (Python style) to use on match. See nomatchprintf, README.txt.'
    )
    parser.add_argument(
        '-nomatchprintf',
        metavar='FORMATSTRING',
        default=None,
        help='format string (Python style) to use if no match. See README.txt')
    parser.add_argument(
        '-bufsize',
        type=int,
        default=None,
        help='size (in bytes) of the buffer to match against (default=' +
        str(defaults['bufsize']) + ' bytes)')
    parser.add_argument(
        '-container_bufsize',
        type=int,
        default=None,
        help='size (in bytes) of the buffer to match against (default=' +
        str(defaults['container_bufsize']) + ' bytes)')

    parser.add_argument(
        '-loadformats',
        default=None,
        metavar='XML1,...,XMLn',
        help='comma separated string of XML format files to add.')
    parser.add_argument(
        '-confdir',
        default=None,
        help=
        'configuration directory to load_fido_xml, for example, the format specifications from.'
    )

    # what is this doing here only once?
    #mydir = os.path.abspath(os.path.dirname(__file__))

    # PROCESS ARGUMENTS
    args = parser.parse_args(arglist)
    #    print args
    #    sys.exit()
    # process confdir
    # load versions.xml
    # and stick it in defaults
    if args.confdir:
        versionsFile = os.path.join(os.path.abspath(args.confdir),
                                    defaults['versions_file'])
    else:
        versionsFile = os.path.join(os.path.abspath(defaults['conf_dir']),
                                    defaults['versions_file'])
    try:
        versions = VET.parse(versionsFile)
    except Exception, e:
        sys.stderr.write(
            "An error occured loading versions.xml:\n{0}".format(e))
        sys.exit()
Пример #7
0
    if arg != None:
        arglist = arg
    else:
        arglist = sys.argv[1:]
    #print arglist
    #exit()
    mydir = os.path.abspath(os.path.dirname(__file__))
    # parse version file to fetch versions
    versionsFile = os.path.join(mydir, 'conf', 'versions.xml')
    try:
        versions = VET.parse(versionsFile)
    except Exception, e:
        sys.stderr.write("An error occured loading versions.xml:\n{0}".format(e))
        sys.exit()
    xml_pronomSignature = os.path.join(mydir, 'conf', versions.find('pronomSignature').text)
    xml_pronomZipFile = os.path.join(mydir, 'conf', "pronom-xml-v{0}.zip".format(versions.find('pronomVersion').text))
    parser = ArgumentParser(description='Produce the fido format xml that is loaded at run-time')
    parser.add_argument('-input', default=xml_pronomZipFile, help='input file, a zip containing Pronom xml files')
    parser.add_argument('-output', default=xml_pronomSignature, help='output file')
    parser.add_argument('-puid', default=None, help='a particular PUID record to extract')
    # PROCESS ARGUMENTS
    args = parser.parse_args(arglist)
    # print os.path.abspath(args.input), os.path.abspath(args.output)
    info = FormatInfo(args.input)
    info.load_pronom_xml(args.puid)
    info.save(args.output)
    print >> sys.stderr, 'Converted {0} PRONOM formats to FIDO signatures'.format(len(info.formats))
    
if __name__ == '__main__':
    main()    
Пример #8
0
def main(arglist=None):
    # The argparse package was introduced in 2.7
    t0 = time.clock() 
    from argparselocal import ArgumentParser
    if arglist == None:
        arglist = sys.argv[1:]
    if len(arglist) == False:
        arglist.append("-h")        
    parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@')
    parser.add_argument('-v', default=False, action='store_true', help='show version information')
    parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly')
    parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories')
    parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip and tar files')
    parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin')
    group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check.  If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.')
    parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification')
    parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification')
    parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.')
    parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt')
    parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['bufsize'])+' bytes)')
    parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default='+str(defaults['container_bufsize'])+' bytes)')
    
    parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.')
    parser.add_argument('-confdir', default=None, help='configuration directory to load_fido_xml, for example, the format specifications from.')
       
    mydir = os.path.abspath(os.path.dirname(__file__))
        
    # PROCESS ARGUMENTS
    args = parser.parse_args(arglist)
    
    if args.v :
        sys.stdout.write("fido/" + version + "\n")
        sys.exit(0)
    if args.matchprintf != None:
        args.matchprintf = args.matchprintf.decode('string_escape')
    if args.nomatchprintf != None:
        args.nomatchprintf = args.nomatchprintf.decode('string_escape')
    fido = Fido(quiet=args.q, bufsize=args.bufsize, 
                printmatch=args.matchprintf, printnomatch=args.nomatchprintf, zip=args.zip, nocontainer = args.nocontainer, conf_dir=args.confdir)
    
    #TODO: Allow conf options to be dis-included
    if args.loadformats:
        for file in args.loadformats.split(','):
            fido.load_fido_xml(file)
        
    #TODO: remove from maps
    if args.useformats:
        args.useformats = args.useformats.split(',')
        fido.formats = [f for f in fido.formats if f.find('puid').text in args.useformats]
    elif args.nouseformats:
        args.nouseformats = args.nouseformats.split(',')
        fido.formats = [f for f in fido.formats if f.find('puid').text not in args.nouseformats]
 
    # Set up to use stdin, or open input files:
    if args.input == '-':
        args.files = sys.stdin
    elif args.input:
        args.files = open(args.input, 'r')
    
    # RUN
    try:
        if (not args.input) and len(args.files) == 1 and args.files[0] == '-':
            if fido.zip == True:
                raise RuntimeError("Multiple content read from stdin not yet supported.")
                sys.exit(1)
                fido.identify_multi_object_stream(sys.stdin)
            else:
                fido.identify_stream(sys.stdin)
        else:
            for file in list_files(args.files, args.recurse):
                fido.identify_file(file)
    except KeyboardInterrupt:
        # MdR: this seems to be broken?
        msg = "FIDO: Interrupt during:\n  File: {0}\n  Format: Puid={1.Identifier} [{1.FormatName}]\n  Sig: ID={2.SignatureID} [{2.SignatureName}]\n  Pat={3.ByteSequenceID} {3.regexstring!r}"
        sys.stderr.write(msg.format(fido.current_file, fido.current_format, fido.current_sig, fido.current_pat))
        sys.exit(1)
        
    if not args.q:
        sys.stdout.flush()
        fido.print_summary(time.clock() - t0)
        sys.stderr.flush()
Пример #9
0
        buf.write(calculate_repetition(".", pos, offset, maxoffset))
        buf.write("\\Z")

    val = buf.getvalue()
    buf.close()
    return val


if __name__ == "__main__":
    import sys
    from argparselocal import ArgumentParser

    arglist = sys.argv[1:]

    mydir = os.path.abspath(os.path.dirname(__file__))
    parser = ArgumentParser(description="Produce the fido format xml that is loaded at run-time")
    parser.add_argument(
        "-input",
        default=os.path.join(mydir, "conf", "pronom-xml.zip"),
        help="input file, a zip containing Pronom xml files",
    )
    parser.add_argument("-output", default=os.path.join(mydir, "conf", "formats.xml"), help="output file")
    parser.add_argument("-puid", default=None, help="a particular PUID record to extract")

    # PROCESS ARGUMENTS
    args = parser.parse_args(arglist)
    # print os.path.abspath(args.input), os.path.abspath(args.output)
    info = FormatInfo(args.input)
    info.load_pronom_xml(args.puid)
    info.save(args.output)
    print >> sys.stderr, "FIDO: {0} formats".format(len(info.formats))