def getVariables(self): """ This script pulls in all the variables from the command line and the config file that are necessary for its running. Errors in variables will lead to an Exception being thrown. We need the ojs server, and user path to the correct dir for the import tool path to the correct dir for the xml name of the journal and an OJS admin user """ self.ojs_server = self.getConfigItem("ojs_server") self.ojs_server_user = self.getConfigItem("ojs_server_user") self.ojs_app_user = self.getConfigItem("ojs_app_user") self.tool_path = self.getConfigItem("tool_path") # Temporary, new processes should always have issn, so check should be in essential section # Initially assume we have an ISSN on the command line self.issn_missing = False issn = "" try: # Get path to generate ojs_dir using ISSN API on Tidsskrift.dk issn = self.command_line.issn except AttributeError as e: self.debug_message("Warning, missing attribute. Details: {0}".format(e)) # We dont have an ISSN on the commandline, so use old code self.issn_missing = True if self.issn_missing: # Old method: generate ojs path from title, or get from command line process_path = self.command_line.process_path mets_file_name = self.getConfigItem("metadata_goobi_file", None, "process_files") mets_file = os.path.join(process_path, mets_file_name) issue_data = mets_tools.getIssueData(mets_file) self.ojs_journal_path = self.getSetting("ojs_journal_path", default="system") if self.ojs_journal_path == "system": self.volume_title = tools.parseTitle(issue_data["TitleDocMain"]) else: self.volume_title = self.ojs_journal_path else: # New method: use issn to lookup journal path self.ojs_journal_path = ojs.getJournalPath(self.ojs_server, issn) self.debug_message("Journal path is %s" % self.ojs_journal_path) # build the path to the ojs xml file based in the form # <upload_dir>/<journal_name>/<process_name>/<process_name>.xml if self.issn_missing: # Old Method: upload_dir = self.getConfigItem("upload_dir").format(self.volume_title, self.command_line.process_title) else: # New method: upload_dir = self.getConfigItem("upload_dir").format(self.ojs_journal_path, self.command_line.process_title) xml_name = "{0}.xml".format(self.command_line.process_title) self.xml_path = os.path.join(upload_dir, xml_name) self.debug_message("XML path is %s" % self.xml_path)
def createXML(self): ''' Get the data from the issue file and the toc Use this to construct the OJS XML ''' data = minidom.parse(self.mets_file) issue_data = mets_tools.getIssueData(self.mets_file) article_data = mets_tools.getArticleData(data, ['FrontMatter', 'Articles', 'BackMatter']) # this is the dir where files will be uploaded to if self.ojs_journal_path == 'system': journal_title_path = tools.parseTitle(issue_data['TitleDocMain']) # TODO: write this one back as a property? #self.goobi_com.addProperty(self.process_id, 'ojs_journal_path', journal_title_path, overwrite=True) else: journal_title_path = self.ojs_journal_path self.ojs_dir = os.path.join(self.ojs_root,journal_title_path, self.command_line.process_title) #======================================================================= # Get and validate PublicationYear # I.e. s only four digits and starts with 17,18,19 or 20 #======================================================================= err = ('Publiceringsåret ("{0}") for hæftet skal være et firecifret tal ' 'begyndende med enten 17, 18, 19 eller 20, f.eks. 1814, 1914 ' 'eller 2014. {1}. Åben metadata-editor og ret metadata for ' 'hæftet og afslut opgaven.') pub_year = issue_data['PublicationYear'] pub_year = pub_year.strip() # Remove leading and trailing spaces. if not pub_year.isdigit(): raise ValueError(err.format(pub_year,'Det indtastede i feltet årstal er ikke et korrekt tal')) if not len(pub_year) == 4: raise ValueError(err.format(pub_year,'Tallet er ikke præcis fire cifre langt')) if not int(int(pub_year)/100) in [17,18,19,20]: raise ValueError(err.format(pub_year,'Tallet starter ikke med 17, 18, 19 eller 20')) date_published = "{0}-01-01".format(pub_year) #======================================================================= # Create base xml for issue #======================================================================= impl = minidom.getDOMImplementation() doc = impl.createDocument(None, "issue", None) doc = self.createHeadMaterial(doc, issue_data) # Get data for articles in the sections front matter, articles and back matter if article_data['FrontMatter']: front_section = self.createFrontSectionXML(doc, issue_data) front_section = self.createArticlesForSection(article_data['FrontMatter'], front_section, doc, date_published) doc.documentElement.appendChild(front_section) if article_data['Articles']: article_section = self.createArticleSectionXML(doc, issue_data) article_section = self.createArticlesForSection(article_data['Articles'], article_section, doc, date_published) doc.documentElement.appendChild(article_section) if article_data['BackMatter']: back_section = self.createBackSectionXML(doc, issue_data) back_section = self.createArticlesForSection(article_data['BackMatter'], back_section, doc, date_published) doc.documentElement.appendChild(back_section) # save the xml content to the correct file output_name = os.path.join(self.ojs_metadata_dir, self.command_line.process_title + '.xml') output = open(output_name, 'w') output.write(doc.toxml())#'utf-8'))
def getVariables(self): ''' This script pulls in all the variables from the command line and the config file that are necessary for its running. Errors in variables will lead to an Exception being thrown. We need the path to the OJS mount, the current process dir, the pdf dir, and the ojs xml dir. ''' # Temporary, new processes should always have issn, so check should be in essential section # Initially assume we have an ISSN on the command line issn_missing = False try: self.issn = self.command_line.issn except AttributeError as e: self.debug_message("Warning, missing attribute. Details: {0}".format(e)) # We dont have an ISSN on the commandline, so use old code issn_missing = True process_path = self.command_line.process_path # Temporary, until all new processes uses issn if issn_missing: mets_file_name = self.getConfigItem('metadata_goobi_file', None, 'process_files') mets_file = os.path.join(process_path, mets_file_name) ojs_mount = self.getConfigItem('ojs_mount') ojs_metadata_dir = self.getConfigItem('metadata_ojs_path', section= self.folder_structure_section) self.ojs_metadata_dir = os.path.join(process_path, ojs_metadata_dir) pdf_path = self.getConfigItem('doc_pdf_path', section= self.folder_structure_section) self.pdf_input_dir = os.path.join(process_path, pdf_path) # Temporary condition, until all new processes uses issn if issn_missing: issue_data = mets_tools.getIssueData(mets_file) # Get path to generate ojs_dir -> system means "define it from system variables" self.ojs_journal_path = self.getSetting('ojs_journal_path', default='system') if self.ojs_journal_path == 'system': volume_title = tools.parseTitle(issue_data['TitleDocMain']) # TODO: write this one back as a property? # self.goobi_com.addProperty(self.process_id, 'ojs_journal_path', volume_title, overwrite=True) else: volume_title = self.ojs_journal_path # volume_title = tools.parseTitle(issue_data['TitleDocMain']) else: # We have a process with issn, so: issn = self.command_line.issn ojs_journal_path = ojs.getJournalPath(self.ojs_server, issn) ojs_journal_folder = os.path.join(ojs_mount, ojs_journal_path) # Temporary condition, until all new processes uses issn if issn_missing: ojs_journal_folder = os.path.join(ojs_mount, volume_title) # Create folder and set owner to gid 1000 => ojs-group tools.find_or_create_dir(ojs_journal_folder,change_owner=1000) self.ojs_dest_dir = os.path.join(ojs_journal_folder, self.command_line.process_title) # Create folder and set owner to gid 1000 => ojs-group tools.find_or_create_dir(self.ojs_dest_dir,change_owner=1000) tools.ensureDirsExist(self.ojs_metadata_dir, self.pdf_input_dir, self.ojs_dest_dir) # Temporary condition, in the future, issn is always available if not issn_missing: self.debug_message("metadata_dir is %s" % self.ojs_metadata_dir) self.debug_message("pdf_input_dir is %s" % self.pdf_input_dir) self.debug_message("dest_dir is %s" % self.ojs_dest_dir)