示例#1
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    if args['--epub3']:
        epub_version = 3
    elif args['--epub2']:
        epub_version = 2
    else:
        epub_version = None

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'],
                               args['--log-to'],
                               args['--log-level'],
                               args['--silent'],
                               args['--verbosity'])

    #Get a logger, the 'openaccess_epub' logger was set up above
    command_log = logging.getLogger('openaccess_epub.commands.convert')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    current_dir = os.getcwd()
    #Our basic flow is to iterate over the args['INPUT'] list
    for inpt in args['INPUT']:
        #We have to temporarily re-base our log while input utils do some work
        if not args['--no-log-file'] and not args['--log-to']:
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file='temp.log',
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)

        command_log.info('Processing input: {0}'.format(inpt))

        #First we need to know the name of the file and where it is
        if inpt.lower().endswith('.xml'):  # This is direct XML file
            root_name = openaccess_epub.utils.file_root_name(inpt)
            abs_input_path = openaccess_epub.utils.get_absolute_path(inpt)
        elif inpt.lower().startswith('doi:'):  # This is a DOI
            root_name = input_utils.doi_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        elif any(inpt.lower().startswith(i) for i in ['http:', 'https:']):
            root_name = input_utils.url_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        else:
            sys.exit('{0} not recognized as XML, DOI, or URL'.format(inpt))

        if not args['--no-log-file'] and not args['--log-to']:
            log_name = root_name + '.log'
            log_path = os.path.join(os.path.dirname(abs_input_path), log_name)

            #Re-base the log file to the new file location
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file=log_path,
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)
            #Now we move over to the new log file
            shutil.copy2('temp.log', log_path)
            os.remove('temp.log')

        #Now that we should be done configuring logging, let's parse the article
        parsed_article = Article(abs_input_path,
                                 validation=not args['--no-validate'])

        if parsed_article.publisher is None:
            command_log.critical('Publisher support was not established, aborting')
            sys.exit(1)

        #Get the output directory
        if args['--output'] is not None:
            output_directory = openaccess_epub.utils.get_absolute_path(args['--output'])
        else:
            if os.path.isabs(config.default_output):  # Absolute remains so
                output_directory = config.default_output
            else:  # Else rendered relative to input
                abs_dirname = os.path.dirname(abs_input_path)
                output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output))

        #The root name must be added on for output
        output_directory = os.path.join(output_directory, root_name)

        #Make the call to make_EPUB
        success = make_EPUB(parsed_article,
                            output_directory,
                            abs_input_path,
                            args['--images'],
                            config_module=config,
                            epub_version=epub_version)

        #Cleanup removes the produced output directory, keeps the EPUB
        if not args['--no-cleanup']:
            command_log.info('Removing {0}'.format(output_directory))
            shutil.rmtree(output_directory)

        #Running epubcheck on the output verifies the validity of the EPUB,
        #requires a local installation of java and epubcheck.
        if not args['--no-epubcheck'] and success:
            epub_name = '{0}.epub'.format(output_directory)
            openaccess_epub.utils.epubcheck(epub_name, config)
示例#2
0
def get_output_directory(args):
    """
    Determination of the directory for output placement involves possibilities
    for explicit user instruction (absolute path or relative to execution) and
    implicit default configuration (absolute path or relative to input) from
    the system global configuration file. This function is responsible for
    reliably returning the appropriate output directory which will contain any
    log(s), ePub(s), and unzipped output of OpenAccess_EPUB.

    It utilizes the parsed args, passed as an object, and is self-sufficient in
    accessing the config file.

    All paths returned by this function are absolute.
    """
    #Import the global config file as a module
    import imp
    config_path = os.path.join(cache_location(), 'config.py')
    try:
        config = imp.load_source('config', config_path)
    except IOError:
        print(('Could not find {0}, please run oae-quickstart'.format(
            config_path)))
        sys.exit()
    #args.output is the explicit user instruction, None if unspecified
    if args.output:
        #args.output may be an absolute path
        if os.path.isabs(args.output):
            return args.output  # return as is
        #or args.output may be a relative path, relative to cwd
        else:
            return evaluate_relative_path(relative=args.output)
    #config.default_output for default behavior without explicit instruction
    else:
        #config.default_output may be an absolute_path
        if os.path.isabs(config.default_output):
            return config.default_output
        #or config.default_output may be a relative path, relative to input
        else:
            if args.input:  # The case of single input
                if 'http://www' in args.input:
                    #Fetched from internet by URL
                    raw_name = url_input(args.input, download=False)
                    abs_input_path = os.path.join(os.getcwd(),
                                                  raw_name + '.xml')
                elif args.input[:4] == 'doi:':
                    #Fetched from internet by DOI
                    raw_name = doi_input(args.input, download=False)
                    abs_input_path = os.path.join(os.getcwd(),
                                                  raw_name + '.xml')
                else:
                    #Local option, could be anywhere
                    abs_input_path = get_absolute_path(args.input)
                abs_input_parent = os.path.split(abs_input_path)[0]
                return evaluate_relative_path(abs_input_parent,
                                              config.default_output)
            elif args.batch:  # The case of Batch Mode
                #Batch should only work on a supplied directory
                abs_batch_path = get_absolute_path(args.batch)
                return abs_batch_path
            elif args.zip:
                #Zip is a local-only option, behaves just like local xml
                abs_input_path = get_absolute_path(args.zip)
                abs_input_parent = os.path.split(abs_input_path)[0]
                return evaluate_relative_path(abs_input_parent,
                                              config.default_output)
            elif args.collection:
                return os.getcwd()
            else:  # Un-handled or currently unsupported options
                print('The output location could not be determined...')
                sys.exit()
示例#3
0
def get_output_directory(args):
    """
    Determination of the directory for output placement involves possibilities
    for explicit user instruction (absolute path or relative to execution) and
    implicit default configuration (absolute path or relative to input) from
    the system global configuration file. This function is responsible for
    reliably returning the appropriate output directory which will contain any
    log(s), ePub(s), and unzipped output of OpenAccess_EPUB.

    It utilizes the parsed args, passed as an object, and is self-sufficient in
    accessing the config file.

    All paths returned by this function are absolute.
    """
    #Import the global config file as a module
    import imp
    config_path = os.path.join(cache_location(), 'config.py')
    try:
        config = imp.load_source('config', config_path)
    except IOError:
        print('Could not find {0}, please run oae-quickstart'.format(config_path))
        sys.exit()
    #args.output is the explicit user instruction, None if unspecified
    if args.output:
        #args.output may be an absolute path
        if os.path.isabs(args.output):
            return args.output  # return as is
        #or args.output may be a relative path, relative to cwd
        else:
            return evaluate_relative_path(relative=args.output)
    #config.default_output for default behavior without explicit instruction
    else:
        #config.default_output may be an absolute_path
        if os.path.isabs(config.default_output):
            return config.default_output
        #or config.default_output may be a relative path, relative to input
        else:
            if args.input:  # The case of single input
                if 'http://www' in args.input:
                    #Fetched from internet by URL
                    raw_name = url_input(args.input, download=False)
                    abs_input_path = os.path.join(os.getcwd(), raw_name+'.xml')
                elif args.input[:4] == 'doi:':
                    #Fetched from internet by DOI
                    raw_name = doi_input(args.input, download=False)
                    abs_input_path = os.path.join(os.getcwd(), raw_name+'.xml')
                else:
                    #Local option, could be anywhere
                    abs_input_path = get_absolute_path(args.input)
                abs_input_parent = os.path.split(abs_input_path)[0]
                return evaluate_relative_path(abs_input_parent, config.default_output)
            elif args.batch:  # The case of Batch Mode
                #Batch should only work on a supplied directory
                abs_batch_path = get_absolute_path(args.batch)
                return abs_batch_path
            elif args.zip:
                #Zip is a local-only option, behaves just like local xml
                abs_input_path = get_absolute_path(args.zip)
                abs_input_parent = os.path.split(abs_input_path)[0]
                return evaluate_relative_path(abs_input_parent, config.default_output)
            elif args.collection:
                return os.getcwd()
            else:  # Un-handled or currently unsupported options
                print('The output location could not be determined...')
                sys.exit()
示例#4
0
def main(argv=None):
    args = docopt(__doc__,
                  argv=argv,
                  version='OpenAccess_EPUB v.' + __version__,
                  options_first=True)

    if args['--epub3']:
        epub_version = 3
    elif args['--epub2']:
        epub_version = 2
    else:
        epub_version = None

    #Basic logging configuration
    oae_logging.config_logging(args['--no-log-file'], args['--log-to'],
                               args['--log-level'], args['--silent'],
                               args['--verbosity'])

    #Get a logger, the 'openaccess_epub' logger was set up above
    command_log = logging.getLogger('openaccess_epub.commands.convert')

    #Load the config module, we do this after logging configuration
    config = openaccess_epub.utils.load_config_module()

    current_dir = os.getcwd()
    #Our basic flow is to iterate over the args['INPUT'] list
    for inpt in args['INPUT']:
        #We have to temporarily re-base our log while input utils do some work
        if not args['--no-log-file'] and not args['--log-to']:
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file='temp.log',
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)

        command_log.info('Processing input: {0}'.format(inpt))

        #First we need to know the name of the file and where it is
        if inpt.lower().endswith('.xml'):  # This is direct XML file
            root_name = openaccess_epub.utils.file_root_name(inpt)
            abs_input_path = openaccess_epub.utils.get_absolute_path(inpt)
        elif inpt.lower().startswith('doi:'):  # This is a DOI
            root_name = input_utils.doi_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        elif any(inpt.lower().startswith(i) for i in ['http:', 'https:']):
            root_name = input_utils.url_input(inpt)
            abs_input_path = os.path.join(current_dir, root_name + '.xml')
        else:
            sys.exit('{0} not recognized as XML, DOI, or URL'.format(inpt))

        if not args['--no-log-file'] and not args['--log-to']:
            log_name = root_name + '.log'
            log_path = os.path.join(os.path.dirname(abs_input_path), log_name)

            #Re-base the log file to the new file location
            oae_logging.replace_filehandler(logname='openaccess_epub',
                                            new_file=log_path,
                                            level=args['--log-level'],
                                            frmt=oae_logging.STANDARD_FORMAT)
            #Now we move over to the new log file
            shutil.copy2('temp.log', log_path)
            os.remove('temp.log')

        #Now that we should be done configuring logging, let's parse the article
        parsed_article = Article(abs_input_path,
                                 validation=not args['--no-validate'])

        if parsed_article.publisher is None:
            command_log.critical(
                'Publisher support was not established, aborting')
            sys.exit(1)

        #Get the output directory
        if args['--output'] is not None:
            output_directory = openaccess_epub.utils.get_absolute_path(
                args['--output'])
        else:
            if os.path.isabs(config.default_output):  # Absolute remains so
                output_directory = config.default_output
            else:  # Else rendered relative to input
                abs_dirname = os.path.dirname(abs_input_path)
                output_directory = os.path.normpath(
                    os.path.join(abs_dirname, config.default_output))

        #The root name must be added on for output
        output_directory = os.path.join(output_directory, root_name)

        #Make the call to make_EPUB
        success = make_EPUB(parsed_article,
                            output_directory,
                            abs_input_path,
                            args['--images'],
                            config_module=config,
                            epub_version=epub_version)

        #Cleanup removes the produced output directory, keeps the EPUB
        if not args['--no-cleanup']:
            command_log.info('Removing {0}'.format(output_directory))
            shutil.rmtree(output_directory)

        #Running epubcheck on the output verifies the validity of the EPUB,
        #requires a local installation of java and epubcheck.
        if not args['--no-epubcheck'] and success:
            epub_name = '{0}.epub'.format(output_directory)
            openaccess_epub.utils.epubcheck(epub_name, config)