Exemplo n.º 1
0
	def extractAndCleanCorpus(self):
		files = os.path.join(self.dirCorpus, "*xml")
		for xmlFile in glob.glob(files):
			with open(xmlFile, 'r', encoding='utf-8') as content_file:
				content = content_file.read()
			striped = FormatEval.strip_tags(content, self.corpusTag)
			self._saveFile(striped, self.dirLabel, os.path.basename(xmlFile))