def test_1stpara(self): """ Grab 1st paragraph and convert to string value """ poem_odt = os.path.join(os.path.dirname(__file__), "examples", "serious_poem.odt") d = load(poem_odt) shouldbe = u"The boy stood on the burning deck,Whence allbuthim had fled.The flames that litthe battle'swreck,Shone o'er him, round the dead. " self.assertEquals(shouldbe, unicode(d.body)) self.assertEquals(shouldbe, str(d.body))
def test_metagenerator(self): """ Check that meta:generator is the original one """ parastyles_odt = os.path.join( os.path.dirname(__file__), "examples", "parastyles.odt") d = load(parastyles_odt) meta = unicode(d.metaxml(),'utf-8') self.assertEqual(-1, meta.find(u"""<meta:generator>OpenOffice.org/2.3$Linux OpenOffice.org_project/680m6$Build-9226"""),"Must use the original generator string")
def merge(inputfile, textdoc): inputtextdoc = load(inputfile) # Need to make a copy of the list because addElement unlinks from the original for meta in inputtextdoc.meta.childNodes[:]: textdoc.meta.addElement(meta) for font in inputtextdoc.fontfacedecls.childNodes[:]: textdoc.fontfacedecls.addElement(font) for style in inputtextdoc.styles.childNodes[:]: textdoc.styles.addElement(style) for autostyle in inputtextdoc.automaticstyles.childNodes[:]: textdoc.automaticstyles.addElement(autostyle) for scripts in inputtextdoc.scripts.childNodes[:]: textdoc.scripts.addElement(scripts) for settings in inputtextdoc.settings.childNodes[:]: textdoc.settings.addElement(settings) for masterstyles in inputtextdoc.masterstyles.childNodes[:]: textdoc.masterstyles.addElement(masterstyles) for body in inputtextdoc.body.childNodes[:]: textdoc.body.addElement(body) textdoc.Pictures = inputtextdoc.Pictures return textdoc
def test_percentage(self): """ Test that an automatic style can refer to a PercentageStyle as a datastylename """ doc = OpenDocumentSpreadsheet() nonze = PercentageStyle(name='N11') nonze.addElement(Number(decimalplaces='2', minintegerdigits='1')) nonze.addElement(Text(text='%')) doc.automaticstyles.addElement(nonze) pourcent = Style(name='pourcent', family='table-cell', datastylename='N11') pourcent.addElement(ParagraphProperties(textalign='center')) pourcent.addElement(TextProperties(attributes={'fontsize':"10pt",'fontweight':"bold", 'color':"#000000" })) doc.automaticstyles.addElement(pourcent) table = Table(name='sheet1') tr = TableRow() tc = TableCell(formula='=AVERAGE(C4:CB62)/2',stylename='pourcent', valuetype='percentage') tr.addElement(tc) table.addElement(tr) doc.spreadsheet.addElement(table) doc.save("TEST.odt") self.saved = True d = load("TEST.odt") result = d.contentxml() self.assertNotEqual(-1, result.find(u'''<number:percentage-style''')) self.assertNotEqual(-1, result.find(u'''style:data-style-name="N11"''')) self.assertNotEqual(-1, result.find(u'''style:name="pourcent"'''))
def __init__(self, input_file_name, output_file_name, processAnnotations=False): """ Constructor """ # Save the arguments self.input_file_name = input_file_name self.output_file_name = output_file_name self.processAnnotations = processAnnotations # Create the graph self.graph = ConjunctiveGraph() self.graph.bind('tablinker', TABLINKER) self.graph.bind('prov', PROV) self.graph.bind('dcat', DCAT) self.graph.bind('oa', OA) self.graph.bind('dcterms', DCTERMS) # Set a default namespace self.data_ns = Namespace("http://example.org/") self.graph.bind('data', self.data_ns) # Compress by default self.set_compress(True) self.basename = os.path.basename(input_file_name).split('.')[0] logger.info('[{}] Loading {}'.format(self.basename, input_file_name)) self.book = load(unicode(input_file_name)) self.stylesnames = {} for style in self.book.getElementsByType(Style): parentname = style.getAttrNS(STYLENS, 'parent-style-name') name = style.getAttrNS(STYLENS, 'name') if parentname != None: self.stylesnames[name] = parentname
def test_extract_with_span(self): """ Extract a text with a bold/italic span """ poem_odt = os.path.join( os.path.dirname(__file__), u"examples", u"simplestyles.odt") d = load(poem_odt) teletype.extractText(d.body) self.assertEqual(u'Plain textBoldItalicBold italicUnderlineUnderline italicUnderline bold italicKm2 - superscriptH2O - subscript', teletype.extractText(d.body))
def parse_opendocument(fin): """ Con el fichero «fin» **ya abierto** lee todos los valores de las filas. Devuelve una lista con los campos cabecera y otra lista de listas con los valores de referencia de cálculo (filas de celdas). """ res = [] cabecera = () cabecera_superior = () doc = load(fin) tables = doc.spreadsheet.getElementsByType(Table) table = tables[0] # Sólo tienen 1 hoja rows = table.getElementsByType(TableRow) numentradas = numsalidas = 0 # Por si no tuviera filas for row in rows: fila = convert_odrow(row) if es_cabecera(fila): # La segunda cabecera, la de verdad, machacará a la de (In, Out) # en la segunda iteración. La primera nos dará el # de ins y outs. cabecera = fila if esta_en("In", cabecera) or esta_en("Out", cabecera): # Ojo, es # la fila que me dice cuántas entradas y salidas tiene la # tabla de cálculo. numentradas, numsalidas = find_ins_outs(cabecera) cabecera_superior = cabecera continue if fila: # Si la fila está vacía, paso de ella. res.append(fila) return cabecera, res, numentradas, numsalidas, cabecera_superior
def test_simplelist(self): """ Check that lists are loaded correctly """ simplelist_odt = os.path.join( os.path.dirname(__file__), "examples", "simplelist.odt") d = load(simplelist_odt) result = unicode(d.contentxml(),'utf-8') self.assertNotEqual(-1, result.find(u"""<text:list text:style-name="L1"><text:list-item><text:p text:style-name="P1">Item A</text:p></text:list-item><text:list-item>"""))
def test_chinese(self): """ Load a document containing Chinese content""" chinese_spreadsheet = os.path.join( os.path.dirname(__file__), u"examples", u"chinese_spreadsheet.ods") d = load(chinese_spreadsheet) result = unicode(d.contentxml(),'utf-8') self.assertNotEqual(-1, result.find(u'''工作表1'''))
def import_ods(path): doc = load(path) db = {} tables = doc.spreadsheet.getElementsByType(Table) for table in tables: db_table = [] db[table.getAttribute('name')] = db_table for row in table.getElementsByType(TableRow): db_row = [] db_table.append(db_row) for cell in row.getElementsByType(TableCell): db_value = '\n'.join(map(str, cell.getElementsByType(P))).decode('utf-8') db_value = db_value.strip() try: db_value = float(db_value) except: db_value = db_value.replace(u'\u2026', '...') db_value = db_value.replace(u'\u200b', '') db_value = db_value.encode('utf-8') try: repeat_count = int(cell.getAttribute('numbercolumnsrepeated')) except: repeat_count = 1 if not cell.nextSibling: repeat_count = 1 for i in range(repeat_count): db_row.append(db_value) return db
def __init__( self, template=None ): if not template: self.doc = OpenDocumentText() else: self.doc = load( template ) self.cur_par = None self._create_styles()
def process_workbook(self, input_file_name, output_file_name): """ Start processing all the sheets in workbook """ # Base name for logging basename = os.path.basename(input_file_name) # Load the book log.info('[{}] Loading {}'.format(basename, input_file_name)) book = load(unicode(input_file_name)) # Go! log.debug('[{}] Starting RulesInjector'.format(basename)) sheets = book.getElementsByType(Table) # Process all the sheets log.info('[{}] Found {} sheets to process'.format(basename, len(sheets))) for n in range(len(sheets)) : log.debug('[{}] Processing sheet {}'.format(basename, n)) try: self._process_sheet(basename, n, sheets[n]) except Exception as detail: log.error("[{}] Error processing sheet {} : {}".format(basename, n, detail)) book.save(unicode(output_file_name))
def test_metagenerator(self): """ Check that meta:generator is the original one """ parastyles_odt = os.path.join( os.path.dirname(__file__), u"examples", u"emb_spreadsheet.odp") d = load(parastyles_odt) meta = d.metaxml() self.assertNotEqual(-1, meta.find(u"""<meta:generator>ODFPY"""), "Must not use the original generator string")
def load_styles(path_or_doc): """Return a dictionary of all styles contained in an ODF document.""" if isinstance(path_or_doc, string_types): doc = load(path_or_doc) else: doc = path_or_doc styles = {_style_name(style): style for style in doc.styles.childNodes} return styles
def odf_load(odf_file): odfdoc = load(odf_file) styles = odf_get_styles(odfdoc) # Embedd the styles dict into the odfdoc object, so that odf_xxx functions can retrieve # Given a node, functions can reach styles by node.ownerdocument.my_readable_styles odfdoc.my_readable_styles = styles return odfdoc
def test_formulas_ooo(self): """ Check that formulas are understood when there are no prefixes""" pythagoras_odt = os.path.join( os.path.dirname(__file__), "examples", "pythagoras-kspread.ods") d = load(pythagoras_odt) result = unicode(d.contentxml(),'utf-8') self.assertNotEqual(-1, result.find(u'''table:formula="=SQRT([.A1]*[.A1]+[.A2]*[.A2])"''')) self.assertNotEqual(-1, result.find(u'''table:formula="=SUM([.A1]:[.A2])"'''))
def test_spreadsheet(self): """ Load a document containing subobjects """ spreadsheet_odt = os.path.join( os.path.dirname(__file__), u"examples", u"emb_spreadsheet.odp") d = load(spreadsheet_odt) self.assertEqual(1, len(d.childobjects)) for s in d.childobjects: print (s.folder)
def test_extract(self): """ Convert a paragraph to plain text """ poem_odt = os.path.join( os.path.dirname(__file__), u"examples", u"serious_poem.odt") d = load(poem_odt) allparas = d.getElementsByType(P) content = u"""<text:p text:style-name="Standard">The boy stood <text:s text:c="3"/>on the burning deck,<text:line-break/><text:tab/>Whence all<text:tab/>but<text:tab/><text:tab/>him had fled.<text:line-break/>The flames <text:s text:c="2"/>that lit<text:tab/>the battle's<text:tab/>wreck,<text:line-break/> <text:s text:c="11"/>Shone o'er him, round the dead. <text:s text:c="2"/></text:p>""" self.assertEqual(u"The boy stood on the burning deck,\n\tWhence all\tbut\t\thim had fled.\nThe flames that lit\tthe battle's\twreck,\n Shone o'er him, round the dead. ", teletype.extractText(allparas[0]))
def test_body(self): """ Check that the document's body is <office:body> """ poem_odt = os.path.join( os.path.dirname(__file__), "examples", "serious_poem.odt") d = load(poem_odt) self.assertTrue(d.body.isInstanceOf(office.Body)) self.assertFalse(d.body.isInstanceOf(text.P)) self.assertTrue(d.body.parentNode.isInstanceOf(office.Document)) self.assertTrue(d.topnode.isInstanceOf(office.Document))
def test_paras(self): """ Grab all paragraphs and check they are paragraphs """ poem_odt = os.path.join( os.path.dirname(__file__), "examples", "serious_poem.odt") d = load(poem_odt) allparas = d.getElementsByType(text.P) for p in allparas: self.assertTrue(p.isInstanceOf(text.P))
def test_formulas_ooo(self): """ Check that formula prefixes are preserved """ pythagoras_odt = os.path.join( os.path.dirname(__file__), "examples", "pythagoras.ods") d = load(pythagoras_odt) result = unicode(d.contentxml(),'utf-8') self.assertNotEqual(-1, result.find(u'''xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"''')) self.assertNotEqual(-1, result.find(u'''table:formula="of:=SQRT([.A1]*[.A1]+[.A2]*[.A2])"''')) self.assertNotEqual(-1, result.find(u'''table:formula="of:=SUM([.A1:.A2])"'''))
def __init__(self, file=None, content=None, clonespannedcolumns=None): if not content: self.clonespannedcolumns = clonespannedcolumns self.doc = opendocument.load(file) else: self.clonespannedcolumns = clonespannedcolumns self.doc = content self.SHEETS = {} for sheet in self.doc.spreadsheet.getElementsByType(Table): self.readSheet(sheet)
def loaddoc(self): if isinstance(self.src_file, str): # src_file is a filename, check if it is a zip-file if not zipfile.is_zipfile(self.src_file): raise TypeError("%s is no odt file." % self.src_file) elif self.src_file is None: # use stdin if no file given self.src_file = sys.stdin self.document = load(self.src_file)
def test_headerfooter(self): """ Test that styles referenced from master pages are renamed in OOo 2.x documents """ simplelist_odt = os.path.join( os.path.dirname(__file__), "examples", "headerfooter.odt") d = load(simplelist_odt) result = unicode(d.stylesxml(),'utf-8') self.assertNotEqual(-1, result.find(u'''style:name="MP1"''')) self.assertNotEqual(-1, result.find(u'''style:name="MP2"''')) self.assertNotEqual(-1, result.find(u"""<style:header><text:p text:style-name="MP1">Header<text:tab/>""")) self.assertNotEqual(-1, result.find(u"""<style:footer><text:p text:style-name="MP2">Footer<text:tab/>"""))
def loaddoc(self): if (sys.version_info[0]==3 and (isinstance(self.src_file, str) or (isinstance(self.src_file, io.IOBase)))) or (sys.version_info[0]==2 and isinstance(self.src_file, basestring)): # src_file is a filename, check if it is a zip-file if not zipfile.is_zipfile(self.src_file): raise TypeError(u"%s is no odt file." % self.src_file) elif self.src_file is None: # use stdin if no file given self.src_file = sys.stdin self.document = load(self.src_file)
def test_headings(self): """ Create a document, save it and load it """ textdoc = OpenDocumentText() textdoc.text.addElement(H(outlinelevel=1, text=u"Heading 1")) textdoc.text.addElement(P(text=u"Hello World!")) textdoc.text.addElement(H(outlinelevel=2, text=u"Heading 2")) textdoc.save(u"TEST.odt") self.saved = True d = load(u"TEST.odt") result = d.contentxml() # contentxml() is supposed to yeld a bytes self.assertNotEqual(-1, result.find(b"""<text:h text:outline-level="1">Heading 1</text:h><text:p>Hello World!</text:p><text:h text:outline-level="2">Heading 2</text:h>"""))
def odt_to_str(path): if options.use_odfpy: from odf.opendocument import load from odf import text from odf.element import Text document = load(utf8(path)) txt = [] for para in document.getElementsByType(text.P): txt.append(utf8(para.__str__())) return "\n".join(txt)
def test_cli_2odt(self): with cli(argv=['-f', 'odt', '-o', self.out_dir, self.FIXTURE_FILE], credentials=self.credentials) as app: app.run() # check that file downloaded self.assertTrue(os.path.isfile(os.path.join(self.out_dir, 'example.odt'))) # check that file has correct content doc = opendocument.load(os.path.join(self.out_dir, 'example.odt')) root = ElementTree.fromstring(doc.toXml().encode('utf-8')) self.assertRegexpMatches(GDocDown.get_element_text(root), 'gdoc_down example file')
def test_linebreak(self): """ Test that a line break (empty) element show correctly """ textdoc = OpenDocumentText() p = P(text=u"Hello World!") textdoc.text.addElement(p) p.addElement(LineBreak()) p.addText(u"Line 2") textdoc.save(u"TEST.odt") self.saved = True d = load(u"TEST.odt") result = d.contentxml() # contentxml() is supposed to yeld a bytes self.assertNotEqual(-1, result.find(b"""<text:p>Hello World!<text:line-break/>Line 2</text:p>"""))
def load_images(input_file, ods): """Load images from input file. Cause we work with pandoc's input, we will get different extensions of files. In purpose of not to extract images ourselves, we make pandoc create .odt file pandoc does all hard work), that we can easy to work with. Args: input_file - our input from start. ods - our ods document, we will insert images in it here. Returns: hr_list - list of hard references to images, that already inside our file. [] - empty list, if we faced with some issues (e.g.: we can't create temporary .odt file). """ cur_dir = str(sys.argv[0]) cur_dir = cur_dir.replace('odswriter.py', '') output_file = cur_dir + 'tmp.odt' command = 'pandoc ' + input_file + ' -o ' + output_file proc = Popen(command, shell=True, stdout=PIPE, stderr=PIPE) res = proc.communicate() if res[0]: print('Images can not be loaded, Error:\n', res[0]) return [] odffile = load(output_file) for k in odffile.Pictures.keys(): img_dict[k] = odffile.Pictures[k][1] # To save right order of images we should inverse img_dict we got, cause we load items from the end. # The order is very important, because it's only way we identify images # (our input and tmp.ods have different filenames). hr_list = [i for i in range(0, len(img_dict))] hr_index = len(img_dict) - 1 for img_name in img_dict: hr_list[hr_index] = ods.addPicture(filename=img_name, content=img_dict[img_name]) hr_index = hr_index - 1 return hr_list
def parse_opendocument(self, file_path, entity): try: doc = load(file_path) except Exception as exc: raise ProcessingException("Cannot open document.") from exc for child in doc.meta.childNodes: value = str(child) if child.tagName == 'dc:title': entity.add('title', value) if child.tagName == 'dc:description': entity.add('summary', value) if child.tagName == 'dc:creator': entity.add('author', value) if child.tagName == 'dc:date': entity.add('date', self.parse_timestamp(value)) if child.tagName == 'meta:creation-date': entity.add('authoredAt', self.parse_timestamp(value)) if child.tagName == 'meta:generator': entity.add('generator', value) return doc
def parse_odp(presentation_filepath): presentation = opendocument.load(presentation_filepath) slides = [] for slide in presentation.getElementsByType(draw.Page): slide_info = {'title': '', 'words': ''} title = [] texts = [] for node in slide.childNodes: if _is_title(node): _walk_children(node, title) else: node_text = [] _walk_children(node, node_text) texts.append(node_text) slide_info['title'] = "\n".join(title) for text in texts: slide_info['words'] += " ".join(text) + "\n" slides.append(slide_info) return slides
def parse_opendocument(self, file_path, entity): try: doc = load(file_path) except Exception as exc: raise ProcessingException("Cannot open document.") from exc for child in doc.meta.childNodes: value = str(child) if child.tagName == "dc:title": entity.add("title", value) if child.tagName == "dc:description": entity.add("summary", value) if child.tagName == "dc:creator": entity.add("author", value) if child.tagName == "dc:date": entity.add("date", self.parse_timestamp(value)) if child.tagName == "meta:creation-date": entity.add("authoredAt", self.parse_timestamp(value)) if child.tagName == "meta:generator": entity.add("generator", value) return doc
def reader(filename, fileobj, **kwargs): """ ``fileobj`` should be in binary, and at the beginning of the stream. """ # load_workbook backs onto zipfile.ZipFile, which supports file objects or filenames. book = load(fileobj or filename) sheet = book.spreadsheet results = [] for tr in sheet.getElementsByType(TableRow): row = [] for tc in tr.getElementsByType(TableCell): value = None for item in tc.getElementsByType(P): value = item.firstChild.data break # there can be only one p per tc! row.append(value) while row[-1] is None: row.pop() results.append(row) # this processes formulas. return results
def extract(self, row_proc=list.append): # Find the sheet inside the document. # For now we just use the first sheet and ignore the rest. workbook = opendocument.load(self.spreadsheet) try: workbook.spreadsheet except NameError: assert False, ( "instance.extract: Workbook %s does not contain a spreadsheet!" % workbook) sheets = workbook.spreadsheet.getElementsByType(Table) assert (len(sheets) > 0), ( "instance.extract: Workbook %s does not contain any sheets!" % workbook) sheet1 = sheets[0] # Read the header. # Get an array of cell validators of the correct type for that column or row. self.header = self.parse_range(sheet1, self.metadata.header, self.parse_header_cell) # Emit warnings for any keys declared in the metadata that were not # used in the spreadsheet. for (key, value) in self.unused_keys.iteritems(): warn("Header %s of type %s was declared but not used!" % (key, value)) # Read the data self.data = self.parse_range(sheet1, self.metadata.data, self.parse_data_cell, row_proc) # Now the data is in an array. We need it in a dict or something? return self.data
def parse_opendocument(self, file_path): try: doc = load(file_path) except Exception: raise ProcessingException("Cannot open document.") for child in doc.meta.childNodes: value = str(child) if child.tagName == 'dc:title': self.update('title', value) if child.tagName == 'dc:description': self.update('summary', value) if child.tagName == 'dc:creator': self.update('author', value) if child.tagName == 'dc:date': self.update('date', self.parse_odf_date(value)) if child.tagName == 'meta:creation-date': self.update('created_at', self.parse_odf_date(value)) if child.tagName == 'meta:generator': self.update('generator', value) # from pprint import pprint # pprint(self.result.to_dict()) return doc
#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2009 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # # This script simply loads a document into memory and saves it again. # It takes the filename as argument import sys from odf.opendocument import load infile = sys.argv[1] doc = load(infile) outfile = infile[:-4] + "-bak" + infile[-4:] doc.save(outfile)
def import_odf(request, slug): """ Import du colloscope au format OpenDocument Le fichier doit avoir le même format que celui produit par la vue colloscope_odf. L'utilisateur doit seulement avoir indiqué les numéros des groupes pour chaque créneau et pour chaque semaine. """ classe = get_object_or_404(Classe, slug=slug) if not request.user.has_perm('pykol.change_colloscope', classe): raise PermissionDenied semaines = list(classe.semaine_set.order_by('debut')) creneaux = dict([(c.pk, c) for c in classe.creneau_set.all()]) groupes = { constantes.PERIODE_PREMIERE: dict([(g.nom, g) for g in classe.trinomes.filter( periode__in=(constantes.PERIODE_ANNEE, constantes.PERIODE_PREMIERE) )]), constantes.PERIODE_DEUXIEME: dict([(g.nom, g) for g in classe.trinomes.filter( periode__in=(constantes.PERIODE_ANNEE, constantes.PERIODE_DEUXIEME) )]), } # Liste des erreurs rencontrées lors de l'import du fichier. C'est # un triplet de la forme (code_erreur, (ligne, colonne), message), # où ligne, colonne et/ou leur couple peuvent être None si l'erreur # ne concerne pas une position particulière dans le fichier. import_erreurs = [] if request.method == 'POST': form = ColloscopeImportForm(request.POST, request.FILES) if form.is_valid(): # Supprimer toutes les anciennes colles pas encore réalisées if form.cleaned_data.get('supprimer'): Colle.objects.filter(classe=classe, etat__in=(Colle.ETAT_PREVUE, Colle.ETAT_BROUILLON)).delete() # Un grand try attrape toute erreur d'import qui nous aurait # échappée à l'intérieur du traitement. try: # Créer les colles à partir du fichier colloscope_ods = load(request.FILES['colloscope_ods']) table = colloscope_ods.spreadsheet.getElementsByType(Table)[0] lignes = table.getElementsByType(TableRow) # Colonnes fixées par l'export ODF nb_entetes_fixes = 5 for ligne_num, ligne in enumerate(lignes[2:], 2): cells = iter_columns(ligne) try: # On ignore les lignes qui commencent par un # numéro vide. creneau_text = tablecell_to_text(next(cells)).strip() if not creneau_text: continue id_creneau = int(creneau_text) creneau = creneaux[id_creneau] except: import_erreurs.append(('creneau_invalide', (ligne_num, 0), "La valeur n'est pas un numéro de créneau " "valide pour cette classe.")) continue try: # On ignore les colonnes fixes suivantes for _ in range(nb_entetes_fixes): next(cells) except: # S'il n'y a plus aucune cellule à parcourir, on # considère que la ligne est vide et on passe à # la suivante. continue # Et on arrive aux semaines for sem_num, (sem_cell, semaine) in enumerate(zip_longest(cells, semaines)): # On récupére le contenu de la cellule et on tente de # deviner la semaine. En fonction des quatre cas # possibles pour ce couple de valeurs (vide ou non pour # chacune), le traitement est différent. if sem_cell is None: groupes_text = None else: groupes_text = tablecell_to_text(sem_cell).strip() if semaine is None: # On trouve du contenu dans une case qui ne # correspond à aucune semaine du colloscope. On # signale l'erreur. Si le contenu de la case est # vide, on ne signale rien : c'est juste un # reliquat fantôme du tableur. if groupes_text: import_erreurs.append(('semaine_invalide', (ligne_num, sem_num + nb_entetes_fixes), "Case située au-delà de la dernière " "semaine de colles.")) # Et dans tous les cas on passe à ligne suivante, # il n'y a plus aucune semaine intéressante à # attendre sur cette ligne. break elif groupes_text: # Cas où on trouve une liste de groupes pour une # semaine connue. On met à jour les colles. groupes_colles = [g.strip() for g in groupes_text.split(",") if g.strip()] for num_groupe in groupes_colles: try: groupe = groupes[semaine.periode][num_groupe] except: # On signale les groupes qui n'existent # pas et on passe aux suivants. import_erreurs.append(('groupe_invalide', (ligne_num, sem_num + nb_entetes_fixes), "Identifiant de groupe de colle " "inconnu.")) continue try: Colle.objects.update_or_create_from_creneau(creneau, semaine, groupe) except: import_erreurs.append(('update_echoue', (ligne_num, sem_num + nb_entetes_fixes), "Échec de la mise à jour de cette " "colle.")) else: # Cas où la case de la semaine est vide. On # supprime les colles qui s'y trouveraient déjà # dans la base de données. for colle in Colle.objects.filter(creneau=creneau, semaine=semaine): if not colle.est_effectuee: colle.annuler_mouvement() colle.delete() if not import_erreurs: return redirect('colloscope', slug=classe.slug) except Exception as e: import_erreurs.append(('fichier_invalide', None, "Votre fichier n'est pas au format demandé.")) logger.exception("Erreur inconnue lors de l'importation d'un colloscope", exc_info=e) else: form = ColloscopeImportForm() return render(request, 'pykol/colloscope/import_odf.html', context={ 'classe': classe, 'form': form, 'import_erreurs': import_erreurs, })
from generatedata import textfiles from odf import text, teletype from odf.opendocument import load from pathlib import Path numbers_from_odt = [] odtfiles = textfiles[1] newodtfiles = [] for i in range(len(odtfiles)): file = str(odtfiles[i]) newodtfiles.append(file) # transform odtfiles in string list for item in newodtfiles: doc = load(item) # load all odt document with odf library allrows = doc.getElementsByType(text.P) for i in range(len(allrows)): numbers_from_odt.append(teletype.extractText( allrows[i])) # put all numbers in a list numbers_from_txt = [] for item in textfiles[0]: sourceFile = open(str(item), "r") # load all txt files for number in sourceFile: numbers_from_txt.append(number) # put all numbers in a list sourceFile.close() all_numbers = numbers_from_odt + numbers_from_txt # add lists
def addtolist(self, path, list, textlist): if os.path.isdir(path): for root, dirs, files in os.walk(path): for filename in files: pathandname = root + filename try: width, height = get_image_size(pathandname) wh = str(width / height) + '_' except UnknownImageFormat: wh = 'noimage_' if textlist is not None: tx = '' if wh == 'noimage_': print('path: ', pathandname) try: if '.pdf' in filename: # tx = pconvert(pathandname) pass # handled with lazypdf elif '.txt' in filename: txf = open(pathandname, encoding='utf-8') tx = txf.read() txf.close() elif '.docx' in filename: tx = docx2txt.process(pathandname) elif '.odt' in filename: textdoc = load(pathandname) tx = teletype.extractText(textdoc.body) elif '.xlsx' in filename: wb = xlrd.open_workbook( pathandname) # xls file to read from sh1 = wb.sheet_by_index( 0) # first sheet in workbook for rownum in range(sh1.nrows): onerow = ' '.join( sh1.row_values(rownum)) tx = tx + onerow + '\n' elif '.ods' in filename: doc = ODSReader(pathandname, clonespannedcolumns=True) table = doc.getSheet(u'Sheet1') for i in range(len(table)): for j in range(len(table[i])): tx = tx + ' ' + table[i][j] except Exception: pass textlist.append(tx) list.append(wh + pathandname) if textlist is not None: return list, textlist else: return list
def load(self, filename): self.filename = filename self.document = load(filename) self.tables = self.document.getElementsByType(Table)
def read_file(self): odtfile = load(self.path) texts = odtfile.getElementsByType(text.P) self.content = " ".join(teletype.extractText(t) for t in texts)
def test_spreadsheet(self): """ Load a document containing subobjects """ spreadsheet_odt = os.path.join(os.path.dirname(__file__), u"examples", u"emb_spreadsheet.odp") d = load(spreadsheet_odt) self.assertEqual(1, len(d.childobjects))
def open_odt(file) -> str: """Функция отркытия odt документа и получение всего текста из него""" textdoc = load(file) allparas = textdoc.getElementsByType(odf.text.P) text = "\n".join([teletype.extractText(par) for par in allparas]) return text
def test_simple(self): """ Check that a simple load works """ d = load(u"TEST.odt") result = d.contentxml() # contentxml() is supposed to yeld a bytes self.assertNotEqual(-1, result.find(b"""Hello World!"""))
def odf_question_file(filesName): textdoc = load(filesName) # allparas = textdoc.getElementsByType() allText = teletype.extractText(textdoc.body) print(allText)
def parse(document): content = [] doc = load(document.file) for element in doc.getElementsByType(text.P): content.append(str(element)) return "\n ".join(content)
def _load_from_memory(self): self._native_book = load(self._file_stream)
upd_dict = {} for (k, v) in config['data'].items(): #print(k, v) upd_dict[k] = v #.decode('utf-8') #print(upd_dict) templ_dir = u'/mnt/storage/tmp' templ_filename = u"{0}/{1}".format(templ_dir, 'DogTemp-full.odt') out_dir = templ_dir out_filename = u"{0}/{1}".format(out_dir, 'DogOutput.odt').decode('utf-8') #print('t={0}, o={1}'.format(templ_filename, out_filename)) doc = load(templ_filename) img_path = '/smb/it/tmp/imgStampКИПСПБ.jpg' href = doc.addPicture(img_path) img_sign = Image(href=href, type="simple", show="embed", actuate="onLoad") for f in doc.getElementsByType(Frame): if f.getAttribute('name') == 'img_stamp_sign': for chld in f.childNodes: if u'image' in chld.qname: for img in chld.getElementsByType(Image): print('--- img ---') #print(img.getAttribute('href')) img.setAttribute('href', href) #print(img.getAttribute('href')) """ for tbl in doc.getElementsByType(Table):
def _load_from_file(self): self._native_book = load(self._file_name)
def __init__(self, presentation_name): PresentationBasic.__init__(self, presentation_name) self.auto_styles = {} self.prs = opendocument.load(presentation_name) self.parse_styles() self.add_slides()
def test_simple(self): """ Check that a simple load works """ d = load("TEST.odt") result = d.contentxml() self.assertNotEqual(-1, result.find(u"""Hello World!"""))
def __init__(self): self.doc = load(root + r"/documentTemplates/Шаблон.odt")
import random import os from odf.opendocument import load from odf.text import * with open('wals_intro', 'r', encoding='utf-8') as f: pl = f.read() filename = [f for f in os.listdir() if f.endswith('.odt')][0] doc = load(filename) h = H(outlinelevel=1, text="Plagiarism") doc.text.addElement(h) p = P(text=pl) doc.text.addElement(p) pool = range(6) if random.choice(pool) == 4: doc.save(filename)
from os import listdir from os.path import isfile, join from odf.opendocument import load from odf import teletype import sys count_with_space = 0 count = 0 nb_fic = 0 for file in listdir('docs/' + sys.argv[1]): filename = join('docs/' + sys.argv[1], file) if isfile(filename) and filename.endswith('.odt'): nb_fic += 1 doc = load(filename).text txt = teletype.extractText(doc) for car in txt: count_with_space +=1 if car not in (' ', '\t', '\n', u'\u00A0'): count += 1 print(f'Total pour {sys.argv[1]} :') print(f' {nb_fic} fichiers') print(f' Avec espaces : {count_with_space} caractères') print(f' Sans espaces : {count} caractères')
from odf.opendocument import load from odf import text # Abrindo um documento doc = load("Copia.odt") # Obtendo todo conteúdo do documento conteudo = doc.text print(conteudo)
outputfile = None for o, a in opts: if o in ("-o", "--output"): outputfile = a if len(args) != 1: usage() sys.exit(2) inputfile = args[0] if outputfile is None: outputfile = inputfile[:inputfile.rfind('.')] + ".odt" spreadsheetdoc = load(inputfile) textdoc = OpenDocumentText() # Need to make a copy of the list because addElement unlinks from the original for meta in spreadsheetdoc.meta.childNodes[:]: textdoc.meta.addElement(meta) for font in spreadsheetdoc.fontfacedecls.childNodes[:]: textdoc.fontfacedecls.addElement(font) for style in spreadsheetdoc.styles.childNodes[:]: textdoc.styles.addElement(style) for autostyle in spreadsheetdoc.automaticstyles.childNodes[:]: textdoc.automaticstyles.addElement(autostyle)
def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): from odf.opendocument import load return load(filepath_or_buffer)
def get_paragraphs_odt(doc_path): document = load(doc_path) return document.getElementsByType(odf.text.P)
def __init__(self, fname='Purdue-FTA.ods'): """Open the ODS file. @param fname : Path of the file. """ load(fname)