def collection_input(args, config=None): """ Collection Input Mode works to compile multiple articles into a single composite ePub. This is akin to such formats as Collections, Issues, and Omnibus; it may also be useful for those interested in the simple distribution of a reading list, personal publications, or topic reference. Collection Input Mode produces output that is necessarily unlike the output generated by Single or Batch (which is just sequential Single) input modes. The primary difference in output lies with the ePub metadata; as applying metadata from any single article to the whole would be inappropriate. Unlike other input modes, Collection Mode is strictly dependent on the local directory of execution. If there is a file named "order.txt" in the local directory, this file should contain the name of one input XML file on each line; the files will be added to the ePub output by line-order. If there is "order.txt" file, Collection Mode will assume that all XML files are input and the article in order in the collection will be random. Collection Input Mode has default epubcheck behavior, it will place a system call to epubcheck unless specified otherwise (--no-epubcheck or -N flags). """ if config is None: config = get_config_module() try: order = open('order.txt', 'r') except IOError: # No order.txt xml_files = list_xml_files(dir=os.getcwd()) else: #Add all nonempty lines, in order, to the xml_files list xml_files = [i.strip() for i in order.readlines() if i.strip()] order.close() #The output name will be the same as the parent directory name #This will also serve as the dc:title output_name = os.path.split(os.getcwd())[1] #The standard make_epub() method will not work for Collection Mode #So the work done here is an adaptation of it print('Processing output to {0}.epub'.format(output_name)) #Copy files from base_epub to the new output if os.path.isdir(output_name): dir_exists(output_name) epub_base = os.path.join(CACHE_LOCATION, 'base_epub') shutil.copytree(epub_base, output_name) if args.collection is True: try: title_txt = open('title.txt', 'r') except IOError: # No title.txt title = output_name else: title = title_txt.readline().strip() title_txt.close() if not title: title = output_name print('title.txt was empty or title was not on first line!') print('Defaulting to name of parent directory. {0}'.format(title)) else: title = args.collection toc = ncx.NCX(oae_version=__version__, location=output_name, collection_mode=True) myopf = opf.OPF(location=output_name, collection_mode=True, title=title) #Now it is time to operate on each of the xml files for xml_file in xml_files: raw_name = u_input.local_input(xml_file) # is this used? parsed_article = Article(xml_file, validation=args.no_dtd_validation) toc.take_article(parsed_article) myopf.take_article(parsed_article) #Get the Digital Object Identifier doi = parsed_article.get_DOI() journal_doi, article_doi = doi.split('/') #Check for images img_dir = os.path.join(output_name, 'OPS', 'images-{0}'.format(article_doi)) expected_local = 'images-{0}'.format(raw_name) if os.path.isdir(expected_local): utils.images.local_images(expected_local, img_dir) else: article_cache = os.path.join(config.image_cache, journal_doi, article_doi) if os.path.isdir(article_cache): utils.images.image_cache(article_cache, img_dir) else: print('Images for {0} (DOI: {1}) could not be found!'.format(xml_file, doi)) r = input('Try to download them? [Y/n]') if r in ['y', 'Y', '']: os.mkdir(img_dir) utils.images.fetch_plos_images(article_doi, img_dir, parsed_article) if config.use_image_cache: utils.images.move_images_to_cache(img_dir, article_cache) else: sys.exit(1) #TODO: Content stuff if journal_doi == '10.1371': # PLoS's publisher DOI ops_doc = ops.OPSPLoS(parsed_article, output_name) #TODO: Workflow change, parse table of contents from OPS processed document toc.write() myopf.write() utils.epub_zip(output_name) #Running epubcheck on the output verifies the validity of the ePub, #requires a local installation of java and epubcheck. if args.no_epubcheck: epubcheck('{0}.epub'.format(output_name), config)
def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) c_file = args['COLLECTION_FILE'] c_file_root = utils.file_root_name(c_file) abs_input_path = utils.get_absolute_path(c_file) if not args['--log-to']: log_to = os.path.join(os.path.dirname(abs_input_path), c_file_root + '.log') else: log_to = args['--log-to'] #Basic logging configuration oae_logging.config_logging(args['--no-log-file'], log_to, args['--log-level'], args['--silent'], args['--verbosity']) command_log = logging.getLogger('openaccess_epub.commands.collection') #Load the config module, we do this after logging configuration config = openaccess_epub.utils.load_config_module() #Quit if the collection file is not there if not os.path.isfile(c_file): command_log.critical('File does not exist {0}'.format(c_file)) sys.exit('Unable to continue') command_log.info('Parsing collection file: {0}'.format(c_file)) with open(c_file, 'r') as f: inputs = [line.strip() for line in f.readlines()] #Get the output directory if args['--output'] is not None: output_directory = utils.get_absolute_path(args['--output']) else: if os.path.isabs(config.default_output): # Absolute remains so output_directory = config.default_output else: # Else rendered relative to input abs_dirname = os.path.dirname(abs_input_path) output_directory = os.path.normpath( os.path.join(abs_dirname, config.default_output)) output_directory = os.path.join(output_directory, c_file_root) command_log.info( 'Processing collection output in {0}'.format(output_directory)) if os.path.isdir(output_directory): utils.dir_exists(output_directory) try: os.makedirs(output_directory) except OSError as err: if err.errno != 17: command_log.exception( 'Unable to recursively create output directories') #Instantiate collection NCX and OPF navigation = Navigation(collection=True) package = Package(collection=True, title=c_file_root) #Copy over the basic epub directory make_epub_base(output_directory) epub_version = None #Iterate over the inputs for xml_file in inputs: xml_path = utils.evaluate_relative_path( os.path.dirname(abs_input_path), xml_file) parsed_article = Article(xml_path, validation=not args['--no-validate']) if epub_version is None: # Only set this once, no mixing! if args['--epub2']: epub_version = 2 elif args['--epub3']: epub_version = 3 else: epub_version = parsed_article.publisher.epub_default navigation.process(parsed_article) package.process(parsed_article) #Get the Digital Object Identifier doi = parsed_article.get_DOI() journal_doi, article_doi = doi.split('/') #Get the images openaccess_epub.utils.images.get_images(output_directory, args['--images'], xml_path, config, parsed_article) parsed_article.publisher.render_content(output_directory, epub_version) if epub_version == 2: navigation.render_EPUB2(output_directory) package.render_EPUB2(output_directory) elif epub_version == 3: navigation.render_EPUB3(output_directory) package.render_EPUB3(output_directory) epub_zip(output_directory) #Cleanup removes the produced output directory, keeps the EPUB if not args['--no-cleanup']: command_log.info('Removing {0}'.format(output_directory)) shutil.rmtree(output_directory) #Running epubcheck on the output verifies the validity of the ePub, #requires a local installation of java and epubcheck. if not args['--no-epubcheck']: epub_name = '{0}.epub'.format(output_directory) openaccess_epub.utils.epubcheck(epub_name, config)
def main(argv=None): args = docopt(__doc__, argv=argv, version='OpenAccess_EPUB v.' + __version__, options_first=True) c_file = args['COLLECTION_FILE'] c_file_root = utils.file_root_name(c_file) abs_input_path = utils.get_absolute_path(c_file) if not args['--log-to']: log_to = os.path.join(os.path.dirname(abs_input_path), c_file_root + '.log') else: log_to = args['--log-to'] #Basic logging configuration oae_logging.config_logging(args['--no-log-file'], log_to, args['--log-level'], args['--silent'], args['--verbosity']) command_log = logging.getLogger('openaccess_epub.commands.collection') #Load the config module, we do this after logging configuration config = openaccess_epub.utils.load_config_module() #Quit if the collection file is not there if not os.path.isfile(c_file): command_log.critical('File does not exist {0}'.format(c_file)) sys.exit('Unable to continue') command_log.info('Parsing collection file: {0}'.format(c_file)) with open(c_file, 'r') as f: inputs = [line.strip() for line in f.readlines()] #Get the output directory if args['--output'] is not None: output_directory = utils.get_absolute_path(args['--output']) else: if os.path.isabs(config.default_output): # Absolute remains so output_directory = config.default_output else: # Else rendered relative to input abs_dirname = os.path.dirname(abs_input_path) output_directory = os.path.normpath(os.path.join(abs_dirname, config.default_output)) output_directory = os.path.join(output_directory, c_file_root) command_log.info('Processing collection output in {0}'.format(output_directory)) if os.path.isdir(output_directory): utils.dir_exists(output_directory) try: os.makedirs(output_directory) except OSError as err: if err.errno != 17: command_log.exception('Unable to recursively create output directories') #Instantiate collection NCX and OPF navigation = Navigation(collection=True) package = Package(collection=True, title=c_file_root) #Copy over the basic epub directory make_epub_base(output_directory) epub_version = None #Iterate over the inputs for xml_file in inputs: xml_path = utils.evaluate_relative_path(os.path.dirname(abs_input_path), xml_file) parsed_article = Article(xml_path, validation=not args['--no-validate']) if epub_version is None: # Only set this once, no mixing! if args['--epub2']: epub_version = 2 elif args['--epub3']: epub_version = 3 else: epub_version = parsed_article.publisher.epub_default navigation.process(parsed_article) package.process(parsed_article) #Get the Digital Object Identifier doi = parsed_article.get_DOI() journal_doi, article_doi = doi.split('/') #Get the images openaccess_epub.utils.images.get_images(output_directory, args['--images'], xml_path, config, parsed_article) parsed_article.publisher.render_content(output_directory, epub_version) if epub_version == 2: navigation.render_EPUB2(output_directory) package.render_EPUB2(output_directory) elif epub_version == 3: navigation.render_EPUB3(output_directory) package.render_EPUB3(output_directory) epub_zip(output_directory) #Cleanup removes the produced output directory, keeps the EPUB if not args['--no-cleanup']: command_log.info('Removing {0}'.format(output_directory)) shutil.rmtree(output_directory) #Running epubcheck on the output verifies the validity of the ePub, #requires a local installation of java and epubcheck. if not args['--no-epubcheck']: epub_name = '{0}.epub'.format(output_directory) openaccess_epub.utils.epubcheck(epub_name, config)