def createXML(self): ''' Get the data from the issue file and the toc Use this to construct the OJS XML ''' data = minidom.parse(self.mets_file) issue_data = mets_tools.getIssueData(self.mets_file) article_data = mets_tools.getArticleData(data, ['FrontMatter', 'Articles', 'BackMatter']) # this is the dir where files will be uploaded to if self.ojs_journal_path == 'system': journal_title_path = tools.parseTitle(issue_data['TitleDocMain']) # TODO: write this one back as a property? #self.goobi_com.addProperty(self.process_id, 'ojs_journal_path', journal_title_path, overwrite=True) else: journal_title_path = self.ojs_journal_path self.ojs_dir = os.path.join(self.ojs_root,journal_title_path, self.command_line.process_title) #======================================================================= # Get and validate PublicationYear # I.e. s only four digits and starts with 17,18,19 or 20 #======================================================================= err = ('Publiceringsåret ("{0}") for hæftet skal være et firecifret tal ' 'begyndende med enten 17, 18, 19 eller 20, f.eks. 1814, 1914 ' 'eller 2014. {1}. Åben metadata-editor og ret metadata for ' 'hæftet og afslut opgaven.') pub_year = issue_data['PublicationYear'] pub_year = pub_year.strip() # Remove leading and trailing spaces. if not pub_year.isdigit(): raise ValueError(err.format(pub_year,'Det indtastede i feltet årstal er ikke et korrekt tal')) if not len(pub_year) == 4: raise ValueError(err.format(pub_year,'Tallet er ikke præcis fire cifre langt')) if not int(int(pub_year)/100) in [17,18,19,20]: raise ValueError(err.format(pub_year,'Tallet starter ikke med 17, 18, 19 eller 20')) date_published = "{0}-01-01".format(pub_year) #======================================================================= # Create base xml for issue #======================================================================= impl = minidom.getDOMImplementation() doc = impl.createDocument(None, "issue", None) doc = self.createHeadMaterial(doc, issue_data) # Get data for articles in the sections front matter, articles and back matter if article_data['FrontMatter']: front_section = self.createFrontSectionXML(doc, issue_data) front_section = self.createArticlesForSection(article_data['FrontMatter'], front_section, doc, date_published) doc.documentElement.appendChild(front_section) if article_data['Articles']: article_section = self.createArticleSectionXML(doc, issue_data) article_section = self.createArticlesForSection(article_data['Articles'], article_section, doc, date_published) doc.documentElement.appendChild(article_section) if article_data['BackMatter']: back_section = self.createBackSectionXML(doc, issue_data) back_section = self.createArticlesForSection(article_data['BackMatter'], back_section, doc, date_published) doc.documentElement.appendChild(back_section) # save the xml content to the correct file output_name = os.path.join(self.ojs_metadata_dir, self.command_line.process_title + '.xml') output = open(output_name, 'w') output.write(doc.toxml())#'utf-8'))
def getArticles(self): data = minidom.parse(self.mets_file) self.article_data = mets_tools.getArticleData(data, ["FrontMatter", "Articles", "BackMatter"])