Python load примеры, odf.opendocument.load Python примеры использования

Пример #1

0

Показать файл

Файл: testtext.py Проект: bufke/odfpy

 def test_1stpara(self):
     """ Grab 1st paragraph and convert to string value """
     poem_odt = os.path.join(os.path.dirname(__file__), "examples", "serious_poem.odt")
     d = load(poem_odt)
     shouldbe = u"The boy stood on the burning deck,Whence allbuthim had fled.The flames that litthe battle'swreck,Shone o'er him, round the dead. "
     self.assertEquals(shouldbe, unicode(d.body))
     self.assertEquals(shouldbe, str(d.body))

Пример #2

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_metagenerator(self):
     """ Check that meta:generator is the original one """
     parastyles_odt = os.path.join(
         os.path.dirname(__file__), "examples", "parastyles.odt")
     d = load(parastyles_odt)
     meta = unicode(d.metaxml(),'utf-8')
     self.assertEqual(-1, meta.find(u"""<meta:generator>OpenOffice.org/2.3$Linux OpenOffice.org_project/680m6$Build-9226"""),"Must use the original generator string")

Пример #3

0

Показать файл

Файл: odtmerge.py Проект: eea/odfpy

def merge(inputfile, textdoc):
    inputtextdoc = load(inputfile)

    # Need to make a copy of the list because addElement unlinks from the original
    for meta in inputtextdoc.meta.childNodes[:]:
        textdoc.meta.addElement(meta)

    for font in inputtextdoc.fontfacedecls.childNodes[:]:
        textdoc.fontfacedecls.addElement(font)

    for style in inputtextdoc.styles.childNodes[:]:
        textdoc.styles.addElement(style)

    for autostyle in inputtextdoc.automaticstyles.childNodes[:]:
        textdoc.automaticstyles.addElement(autostyle)


    for scripts in inputtextdoc.scripts.childNodes[:]:
        textdoc.scripts.addElement(scripts)

    for settings in inputtextdoc.settings.childNodes[:]:
        textdoc.settings.addElement(settings)

    for masterstyles in inputtextdoc.masterstyles.childNodes[:]:
        textdoc.masterstyles.addElement(masterstyles)

    for body in inputtextdoc.body.childNodes[:]:
        textdoc.body.addElement(body)

    textdoc.Pictures = inputtextdoc.Pictures
    return textdoc

Пример #4

0

Показать файл

Файл: testdatastyles.py Проект: agiacomolli/odfpy

    def test_percentage(self):
        """ Test that an automatic style can refer to a PercentageStyle as a datastylename """
        doc = OpenDocumentSpreadsheet()
        nonze = PercentageStyle(name='N11')
        nonze.addElement(Number(decimalplaces='2', minintegerdigits='1'))
        nonze.addElement(Text(text='%'))
        doc.automaticstyles.addElement(nonze)
        pourcent = Style(name='pourcent', family='table-cell', datastylename='N11')
        pourcent.addElement(ParagraphProperties(textalign='center'))
        pourcent.addElement(TextProperties(attributes={'fontsize':"10pt",'fontweight':"bold", 'color':"#000000" }))
        doc.automaticstyles.addElement(pourcent)

        table = Table(name='sheet1')
        tr = TableRow()
        tc = TableCell(formula='=AVERAGE(C4:CB62)/2',stylename='pourcent', valuetype='percentage')
        tr.addElement(tc)
        table.addElement(tr)
        doc.spreadsheet.addElement(table)
        doc.save("TEST.odt")
        self.saved = True
        d = load("TEST.odt")
        result = d.contentxml()
        self.assertNotEqual(-1, result.find(u'''<number:percentage-style'''))
        self.assertNotEqual(-1, result.find(u'''style:data-style-name="N11"'''))
        self.assertNotEqual(-1, result.find(u'''style:name="pourcent"'''))

Пример #5

0

Показать файл

Файл: tablinker.py Проект: CEDAR-project/Integrator

    def __init__(self, input_file_name, output_file_name, processAnnotations=False):
        """
        Constructor
        """
        # Save the arguments
        self.input_file_name = input_file_name
        self.output_file_name = output_file_name
        self.processAnnotations = processAnnotations
        
        # Create the graph
        self.graph = ConjunctiveGraph()
        self.graph.bind('tablinker', TABLINKER)
        self.graph.bind('prov', PROV)
        self.graph.bind('dcat', DCAT)
        self.graph.bind('oa', OA)
        self.graph.bind('dcterms', DCTERMS)

        # Set a default namespace
        self.data_ns = Namespace("http://example.org/")
        self.graph.bind('data', self.data_ns)
        
        # Compress by default
        self.set_compress(True)
        
        self.basename = os.path.basename(input_file_name).split('.')[0]
                
        logger.info('[{}] Loading {}'.format(self.basename, input_file_name))
        self.book = load(unicode(input_file_name))
        self.stylesnames = {}
        for style in self.book.getElementsByType(Style):
            parentname = style.getAttrNS(STYLENS, 'parent-style-name')
            name = style.getAttrNS(STYLENS, 'name')
            if parentname != None:
                self.stylesnames[name] = parentname

Пример #6

0

Показать файл

Файл: testwhitespace.py Проект: abiapp/odfpy

 def test_extract_with_span(self):
     """ Extract a text with a bold/italic span """
     poem_odt = os.path.join(
         os.path.dirname(__file__), u"examples", u"simplestyles.odt")
     d = load(poem_odt)
     teletype.extractText(d.body)
     self.assertEqual(u'Plain textBoldItalicBold italicUnderlineUnderline italicUnderline bold italicKm2 - superscriptH2O - subscript', teletype.extractText(d.body))

Пример #7

0

Показать файл

Файл: pythonize_tablacalc.py Проект: Geotexan/calculinn

def parse_opendocument(fin):
    """
    Con el fichero «fin» **ya abierto** lee todos los valores de las filas.
    Devuelve una lista con los campos cabecera y otra lista
    de listas con los valores de referencia de cálculo (filas de celdas).
    """
    res = []
    cabecera = ()
    cabecera_superior = ()
    doc = load(fin)
    tables = doc.spreadsheet.getElementsByType(Table)
    table = tables[0]   # Sólo tienen 1 hoja
    rows = table.getElementsByType(TableRow)
    numentradas = numsalidas = 0    # Por si no tuviera filas
    for row in rows:
        fila = convert_odrow(row)
        if es_cabecera(fila):
            # La segunda cabecera, la de verdad, machacará a la de (In, Out)
            # en la segunda iteración. La primera nos dará el # de ins y outs.
            cabecera = fila
            if esta_en("In", cabecera) or esta_en("Out", cabecera):  # Ojo, es
                # la fila que me dice cuántas entradas y salidas tiene la
                # tabla de cálculo.
                numentradas, numsalidas = find_ins_outs(cabecera)
                cabecera_superior = cabecera
            continue
        if fila:    # Si la fila está vacía, paso de ella.
            res.append(fila)
    return cabecera, res, numentradas, numsalidas, cabecera_superior

Пример #8

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_simplelist(self):
     """ Check that lists are loaded correctly """
     simplelist_odt = os.path.join(
         os.path.dirname(__file__), "examples", "simplelist.odt")
     d = load(simplelist_odt)
     result = unicode(d.contentxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u"""<text:list text:style-name="L1"><text:list-item><text:p text:style-name="P1">Item A</text:p></text:list-item><text:list-item>"""))

Пример #9

0

Показать файл

Файл: testload.py Проект: eea/odfpy

 def test_chinese(self):
     """ Load a document containing Chinese content"""
     chinese_spreadsheet = os.path.join(
         os.path.dirname(__file__), u"examples", u"chinese_spreadsheet.ods")
     d = load(chinese_spreadsheet)
     result = unicode(d.contentxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u'''工作表1'''))

Пример #10

0

Показать файл

Файл: odsimport.py Проект: aholkner/PoliticalRPG

def import_ods(path):
    doc = load(path)

    db = {}

    tables = doc.spreadsheet.getElementsByType(Table)
    for table in tables:
        db_table = []
        db[table.getAttribute('name')] = db_table
        for row in table.getElementsByType(TableRow):
            db_row = []
            db_table.append(db_row)
            for cell in row.getElementsByType(TableCell):
                db_value = '\n'.join(map(str, cell.getElementsByType(P))).decode('utf-8')
                db_value = db_value.strip()
                try:
                    db_value = float(db_value)
                except:
                    db_value = db_value.replace(u'\u2026', '...')
                    db_value = db_value.replace(u'\u200b', '')
                    db_value = db_value.encode('utf-8')
                
                try:
                    repeat_count = int(cell.getAttribute('numbercolumnsrepeated'))
                except:
                    repeat_count = 1

                if not cell.nextSibling:
                    repeat_count = 1
                for i in range(repeat_count):
                    db_row.append(db_value)

    return db

Пример #11

0

Показать файл

Файл: aozora2odt.py Проект: dmishin/aozora2odt

 def __init__( self, template=None ):
     if not template:
         self.doc = OpenDocumentText()
     else:
         self.doc = load( template )
     self.cur_par = None
     self._create_styles()

Пример #12

0

Показать файл

Файл: rulesinject.py Проект: CEDAR-project/Integrator

    def process_workbook(self, input_file_name, output_file_name):
        """
        Start processing all the sheets in workbook
        """
        # Base name for logging
        basename = os.path.basename(input_file_name)
        
        # Load the book
        log.info('[{}] Loading {}'.format(basename, input_file_name))
        book = load(unicode(input_file_name))
        
        # Go!
        log.debug('[{}] Starting RulesInjector'.format(basename))
        sheets = book.getElementsByType(Table)
        
        # Process all the sheets
        log.info('[{}] Found {} sheets to process'.format(basename, len(sheets)))
        for n in range(len(sheets)) :
            log.debug('[{}] Processing sheet {}'.format(basename, n))
            try:
                self._process_sheet(basename, n, sheets[n])
            except Exception as detail:
                log.error("[{}] Error processing sheet {} : {}".format(basename, n, detail))

        book.save(unicode(output_file_name))

Пример #13

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_metagenerator(self):
     """ Check that meta:generator is the original one """
     parastyles_odt = os.path.join(
         os.path.dirname(__file__), u"examples", u"emb_spreadsheet.odp")
     d = load(parastyles_odt)
     meta = d.metaxml()
     self.assertNotEqual(-1, meta.find(u"""<meta:generator>ODFPY"""), "Must not use the original generator string")

Пример #14

0

Показать файл

Файл: opendocument.py Проект: jairideout/ipymd

def load_styles(path_or_doc):
    """Return a dictionary of all styles contained in an ODF document."""
    if isinstance(path_or_doc, string_types):
        doc = load(path_or_doc)
    else:
        doc = path_or_doc
    styles = {_style_name(style): style for style in doc.styles.childNodes}
    return styles

Пример #15

0

Показать файл

Файл: odt2markdown.py Проект: PatriceBertrand/odt2markdown

def odf_load(odf_file):
    odfdoc =  load(odf_file)
    styles = odf_get_styles(odfdoc)
    # Embedd the styles dict into the odfdoc object, so that odf_xxx functions can retrieve
    # Given a node, functions can reach styles by node.ownerdocument.my_readable_styles
    odfdoc.my_readable_styles = styles

    return odfdoc

Пример #16

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_formulas_ooo(self):
     """ Check that formulas are understood when there are no prefixes"""
     pythagoras_odt = os.path.join(
         os.path.dirname(__file__), "examples", "pythagoras-kspread.ods")
     d = load(pythagoras_odt)
     result = unicode(d.contentxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u'''table:formula="=SQRT([.A1]*[.A1]+[.A2]*[.A2])"'''))
     self.assertNotEqual(-1, result.find(u'''table:formula="=SUM([.A1]:[.A2])"'''))

Пример #17

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_spreadsheet(self):
     """ Load a document containing subobjects """
     spreadsheet_odt = os.path.join(
         os.path.dirname(__file__), u"examples", u"emb_spreadsheet.odp")
     d = load(spreadsheet_odt)
     self.assertEqual(1, len(d.childobjects))
     for s in d.childobjects:
         print (s.folder)

Пример #18

0

Показать файл

Файл: testwhitespace.py Проект: abiapp/odfpy

    def test_extract(self):
        """ Convert a paragraph to plain text """
        poem_odt = os.path.join(
            os.path.dirname(__file__), u"examples", u"serious_poem.odt")
        d = load(poem_odt)
        allparas = d.getElementsByType(P)
        content = u"""<text:p text:style-name="Standard">The boy stood <text:s text:c="3"/>on the burning deck,<text:line-break/><text:tab/>Whence all<text:tab/>but<text:tab/><text:tab/>him had fled.<text:line-break/>The flames <text:s text:c="2"/>that lit<text:tab/>the battle's<text:tab/>wreck,<text:line-break/> <text:s text:c="11"/>Shone o'er him, round the dead. <text:s text:c="2"/></text:p>"""

        self.assertEqual(u"The boy stood    on the burning deck,\n\tWhence all\tbut\t\thim had fled.\nThe flames   that lit\tthe battle's\twreck,\n           Shone o'er him, round the dead.   ", teletype.extractText(allparas[0]))

Пример #19

0

Показать файл

Файл: testtypes.py Проект: BrickXu/odfpy

 def test_body(self):
     """ Check that the document's body is <office:body> """
     poem_odt = os.path.join(
         os.path.dirname(__file__), "examples", "serious_poem.odt")
     d = load(poem_odt)
     self.assertTrue(d.body.isInstanceOf(office.Body))
     self.assertFalse(d.body.isInstanceOf(text.P))
     self.assertTrue(d.body.parentNode.isInstanceOf(office.Document))
     self.assertTrue(d.topnode.isInstanceOf(office.Document))

Пример #20

0

Показать файл

Файл: testtypes.py Проект: BrickXu/odfpy

    def test_paras(self):
        """ Grab all paragraphs and check they are paragraphs """
        poem_odt = os.path.join(
            os.path.dirname(__file__), "examples", "serious_poem.odt")
        d = load(poem_odt)
        allparas = d.getElementsByType(text.P)

        for p in allparas:
            self.assertTrue(p.isInstanceOf(text.P))

Пример #21

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_formulas_ooo(self):
     """ Check that formula prefixes are preserved """
     pythagoras_odt = os.path.join(
         os.path.dirname(__file__), "examples", "pythagoras.ods")
     d = load(pythagoras_odt)
     result = unicode(d.contentxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u'''xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"'''))
     self.assertNotEqual(-1, result.find(u'''table:formula="of:=SQRT([.A1]*[.A1]+[.A2]*[.A2])"'''))
     self.assertNotEqual(-1, result.find(u'''table:formula="of:=SUM([.A1:.A2])"'''))

Пример #22

0

Показать файл

Файл: odf_ods_reader.py Проект: RoganW/odoo

 def __init__(self, file=None, content=None, clonespannedcolumns=None):
     if not content:
         self.clonespannedcolumns = clonespannedcolumns
         self.doc = opendocument.load(file)
     else:
         self.clonespannedcolumns = clonespannedcolumns
         self.doc = content
     self.SHEETS = {}
     for sheet in self.doc.spreadsheet.getElementsByType(Table):
         self.readSheet(sheet)

Пример #23

0

Показать файл

Файл: userfield.py Проект: 18600597055/hue

    def loaddoc(self):
        if isinstance(self.src_file, str):
            # src_file is a filename, check if it is a zip-file
            if not zipfile.is_zipfile(self.src_file):
                raise TypeError("%s is no odt file." % self.src_file)
        elif self.src_file is None:
            # use stdin if no file given
            self.src_file = sys.stdin

        self.document = load(self.src_file)

Пример #24

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_headerfooter(self):
     """ Test that styles referenced from master pages are renamed in OOo 2.x documents """
     simplelist_odt = os.path.join(
         os.path.dirname(__file__), "examples", "headerfooter.odt")
     d = load(simplelist_odt)
     result = unicode(d.stylesxml(),'utf-8')
     self.assertNotEqual(-1, result.find(u'''style:name="MP1"'''))
     self.assertNotEqual(-1, result.find(u'''style:name="MP2"'''))
     self.assertNotEqual(-1, result.find(u"""<style:header><text:p text:style-name="MP1">Header<text:tab/>"""))
     self.assertNotEqual(-1, result.find(u"""<style:footer><text:p text:style-name="MP2">Footer<text:tab/>"""))

Пример #25

0

Показать файл

Файл: userfield.py Проект: TenKeyAngle/manuskript

    def loaddoc(self):
        if (sys.version_info[0]==3 and (isinstance(self.src_file, str) or (isinstance(self.src_file, io.IOBase)))) or (sys.version_info[0]==2 and isinstance(self.src_file, basestring)):
            # src_file is a filename, check if it is a zip-file
            if not zipfile.is_zipfile(self.src_file):
                raise TypeError(u"%s is no odt file." % self.src_file)
        elif self.src_file is None:
            # use stdin if no file given
            self.src_file = sys.stdin

        self.document = load(self.src_file)

Пример #26

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_headings(self):
     """ Create a document, save it and load it """
     textdoc = OpenDocumentText()
     textdoc.text.addElement(H(outlinelevel=1, text=u"Heading 1"))
     textdoc.text.addElement(P(text=u"Hello World!"))
     textdoc.text.addElement(H(outlinelevel=2, text=u"Heading 2"))
     textdoc.save(u"TEST.odt")
     self.saved = True
     d = load(u"TEST.odt")
     result = d.contentxml() # contentxml() is supposed to yeld a bytes
     self.assertNotEqual(-1, result.find(b"""<text:h text:outline-level="1">Heading 1</text:h><text:p>Hello World!</text:p><text:h text:outline-level="2">Heading 2</text:h>"""))

Пример #27

0

Показать файл

Файл: documents.py Проект: gkunter/coquery

def odt_to_str(path):
    if options.use_odfpy:
        from odf.opendocument import load
        from odf import text
        from odf.element import Text

    document = load(utf8(path))
    txt = []
    for para in document.getElementsByType(text.P):
        txt.append(utf8(para.__str__()))
    return "\n".join(txt)

Пример #28

0

Показать файл

Файл: test_gdoc_down.py Проект: KarrLab/gdoc2text

    def test_cli_2odt(self):
        with cli(argv=['-f', 'odt', '-o', self.out_dir, self.FIXTURE_FILE], credentials=self.credentials) as app:
            app.run()

        # check that file downloaded
        self.assertTrue(os.path.isfile(os.path.join(self.out_dir, 'example.odt')))

        # check that file has correct content
        doc = opendocument.load(os.path.join(self.out_dir, 'example.odt'))
        root = ElementTree.fromstring(doc.toXml().encode('utf-8'))
        self.assertRegexpMatches(GDocDown.get_element_text(root), 'gdoc_down example file')

Пример #29

0

Показать файл

Файл: testload.py Проект: abiapp/odfpy

 def test_linebreak(self):
     """ Test that a line break (empty) element show correctly """
     textdoc = OpenDocumentText()
     p = P(text=u"Hello World!")
     textdoc.text.addElement(p)
     p.addElement(LineBreak())
     p.addText(u"Line 2")
     textdoc.save(u"TEST.odt")
     self.saved = True
     d = load(u"TEST.odt")
     result = d.contentxml() # contentxml() is supposed to yeld a bytes
     self.assertNotEqual(-1, result.find(b"""<text:p>Hello World!<text:line-break/>Line 2</text:p>"""))

Пример #30

0

Показать файл

def load_images(input_file, ods):
    """Load images from input file.

    Cause we work with pandoc's input, we will get different extensions of files. In purpose of not to
    extract images ourselves, we make pandoc create .odt file pandoc does all hard work),
    that we can easy to work with.

    Args:
        input_file - our input from start.
        ods - our ods document, we will insert images in it here.

    Returns:
        hr_list - list of hard references to images, that already inside our file.
        [] - empty list, if we faced with some issues (e.g.: we can't create temporary .odt file).

    """
    cur_dir = str(sys.argv[0])
    cur_dir = cur_dir.replace('odswriter.py', '')
    output_file = cur_dir + 'tmp.odt'
    command = 'pandoc ' + input_file + ' -o ' + output_file
    proc = Popen(command, shell=True, stdout=PIPE, stderr=PIPE)
    res = proc.communicate()
    if res[0]:
        print('Images can not be loaded, Error:\n', res[0])
        return []

    odffile = load(output_file)
    for k in odffile.Pictures.keys():
        img_dict[k] = odffile.Pictures[k][1]

    # To save right order of images we should inverse img_dict we got, cause we load items from the end.
    # The order is very important, because it's only way we identify images
    # (our input and tmp.ods have different filenames).
    hr_list = [i for i in range(0, len(img_dict))]
    hr_index = len(img_dict) - 1
    for img_name in img_dict:
        hr_list[hr_index] = ods.addPicture(filename=img_name,
                                           content=img_dict[img_name])
        hr_index = hr_index - 1
    return hr_list

Пример #31

0

Показать файл

Файл: opendoc.py Проект: x0rzkov/aleph

    def parse_opendocument(self, file_path, entity):
        try:
            doc = load(file_path)
        except Exception as exc:
            raise ProcessingException("Cannot open document.") from exc

        for child in doc.meta.childNodes:
            value = str(child)
            if child.tagName == 'dc:title':
                entity.add('title', value)
            if child.tagName == 'dc:description':
                entity.add('summary', value)
            if child.tagName == 'dc:creator':
                entity.add('author', value)
            if child.tagName == 'dc:date':
                entity.add('date', self.parse_timestamp(value))
            if child.tagName == 'meta:creation-date':
                entity.add('authoredAt', self.parse_timestamp(value))
            if child.tagName == 'meta:generator':
                entity.add('generator', value)

        return doc

Пример #32

0

Показать файл

Файл: odp_parser.py Проект: OSLL/web_speech_trainer

def parse_odp(presentation_filepath):
    presentation = opendocument.load(presentation_filepath)

    slides = []
    for slide in presentation.getElementsByType(draw.Page):
        slide_info = {'title': '', 'words': ''}

        title = []
        texts = []
        for node in slide.childNodes:
            if _is_title(node):
                _walk_children(node, title)
            else:
                node_text = []
                _walk_children(node, node_text)
                texts.append(node_text)

        slide_info['title'] = "\n".join(title)
        for text in texts:
            slide_info['words'] += " ".join(text) + "\n"
        slides.append(slide_info)
    return slides

Пример #33

0

Показать файл

Файл: opendoc.py Проект: vishalbelsare/ingestors

    def parse_opendocument(self, file_path, entity):
        try:
            doc = load(file_path)
        except Exception as exc:
            raise ProcessingException("Cannot open document.") from exc

        for child in doc.meta.childNodes:
            value = str(child)
            if child.tagName == "dc:title":
                entity.add("title", value)
            if child.tagName == "dc:description":
                entity.add("summary", value)
            if child.tagName == "dc:creator":
                entity.add("author", value)
            if child.tagName == "dc:date":
                entity.add("date", self.parse_timestamp(value))
            if child.tagName == "meta:creation-date":
                entity.add("authoredAt", self.parse_timestamp(value))
            if child.tagName == "meta:generator":
                entity.add("generator", value)

        return doc

Пример #34

0

Показать файл

Файл: format_odf.py Проект: dgabrielson/python-spreadsheet

def reader(filename, fileobj, **kwargs):
    """
    ``fileobj`` should be in binary, and at the beginning of the stream.
    
    """
    # load_workbook backs onto zipfile.ZipFile, which supports file objects or filenames.
    book = load(fileobj or filename)
    sheet = book.spreadsheet

    results = []
    for tr in sheet.getElementsByType(TableRow):
        row = []
        for tc in tr.getElementsByType(TableCell):
            value = None
            for item in tc.getElementsByType(P):
                value = item.firstChild.data
                break  # there can be only one p per tc!
            row.append(value)
        while row[-1] is None:
            row.pop()
        results.append(row)  # this processes formulas.
    return results

Пример #35

0

Показать файл

Файл: slang.py Проект: alphagov-mirror/metadata-standards-description-language

    def extract(self, row_proc=list.append):

        # Find the sheet inside the document.
        # For now we just use the first sheet and ignore the rest.
        workbook = opendocument.load(self.spreadsheet)
        try:
            workbook.spreadsheet

        except NameError:
            assert False, (
                "instance.extract: Workbook %s does not contain a spreadsheet!"
                % workbook)

        sheets = workbook.spreadsheet.getElementsByType(Table)
        assert (len(sheets) > 0), (
            "instance.extract: Workbook %s does not contain any sheets!" %
            workbook)

        sheet1 = sheets[0]

        # Read the header.
        # Get an array of cell validators of the correct type for that column or row.
        self.header = self.parse_range(sheet1, self.metadata.header,
                                       self.parse_header_cell)

        # Emit warnings for any keys declared in the metadata that were not
        # used in the spreadsheet.
        for (key, value) in self.unused_keys.iteritems():
            warn("Header %s of type %s was declared but not used!" %
                 (key, value))

        # Read the data
        self.data = self.parse_range(sheet1, self.metadata.data,
                                     self.parse_data_cell, row_proc)

        # Now the data is in an array. We need it in a dict or something?

        return self.data

Пример #36

0

Показать файл

    def parse_opendocument(self, file_path):
        try:
            doc = load(file_path)
        except Exception:
            raise ProcessingException("Cannot open document.")

        for child in doc.meta.childNodes:
            value = str(child)
            if child.tagName == 'dc:title':
                self.update('title', value)
            if child.tagName == 'dc:description':
                self.update('summary', value)
            if child.tagName == 'dc:creator':
                self.update('author', value)
            if child.tagName == 'dc:date':
                self.update('date', self.parse_odf_date(value))
            if child.tagName == 'meta:creation-date':
                self.update('created_at', self.parse_odf_date(value))
            if child.tagName == 'meta:generator':
                self.update('generator', value)

        # from pprint import pprint
        # pprint(self.result.to_dict())
        return doc

Пример #37

0

Показать файл

Файл: loadsave.py Проект: uk-gov-mirror/alphagov.odfpy

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2009 Søren Roug, European Environment Agency
#
# This is free software.  You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
# 2 or at your option any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
# Contributor(s):
#

#
# This script simply loads a document into memory and saves it again.
# It takes the filename as argument
import sys
from odf.opendocument import load
infile = sys.argv[1]
doc = load(infile)
outfile = infile[:-4] + "-bak" + infile[-4:]
doc.save(outfile)

Пример #38

0

Показать файл

Файл: colloscope.py Проект: pykol/pykhollet

def import_odf(request, slug):
	"""
	Import du colloscope au format OpenDocument

	Le fichier doit avoir le même format que celui produit par la vue
	colloscope_odf. L'utilisateur doit seulement avoir indiqué les
	numéros des groupes pour chaque créneau et pour chaque semaine.
	"""
	classe = get_object_or_404(Classe, slug=slug)

	if not request.user.has_perm('pykol.change_colloscope', classe):
		raise PermissionDenied

	semaines = list(classe.semaine_set.order_by('debut'))
	creneaux = dict([(c.pk, c) for c in classe.creneau_set.all()])
	groupes = {
		constantes.PERIODE_PREMIERE:
			dict([(g.nom, g) for g in classe.trinomes.filter(
				periode__in=(constantes.PERIODE_ANNEE,
					constantes.PERIODE_PREMIERE)
			)]),
		constantes.PERIODE_DEUXIEME:
			dict([(g.nom, g) for g in classe.trinomes.filter(
				periode__in=(constantes.PERIODE_ANNEE,
					constantes.PERIODE_DEUXIEME)
			)]),
	}

	# Liste des erreurs rencontrées lors de l'import du fichier. C'est
	# un triplet de la forme (code_erreur, (ligne, colonne), message),
	# où ligne, colonne et/ou leur couple peuvent être None si l'erreur
	# ne concerne pas une position particulière dans le fichier.
	import_erreurs = []

	if request.method == 'POST':
		form = ColloscopeImportForm(request.POST, request.FILES)

		if form.is_valid():
			# Supprimer toutes les anciennes colles pas encore réalisées
			if form.cleaned_data.get('supprimer'):
				Colle.objects.filter(classe=classe,
						etat__in=(Colle.ETAT_PREVUE,
							Colle.ETAT_BROUILLON)).delete()

			# Un grand try attrape toute erreur d'import qui nous aurait
			# échappée à l'intérieur du traitement.
			try:
				# Créer les colles à partir du fichier
				colloscope_ods = load(request.FILES['colloscope_ods'])
				table = colloscope_ods.spreadsheet.getElementsByType(Table)[0]
				lignes = table.getElementsByType(TableRow)

				# Colonnes fixées par l'export ODF
				nb_entetes_fixes = 5

				for ligne_num, ligne in enumerate(lignes[2:], 2):
					cells = iter_columns(ligne)

					try:
						# On ignore les lignes qui commencent par un
						# numéro vide.
						creneau_text = tablecell_to_text(next(cells)).strip()
						if not creneau_text:
							continue
						id_creneau = int(creneau_text)
						creneau = creneaux[id_creneau]
					except:
						import_erreurs.append(('creneau_invalide',
							(ligne_num, 0),
							"La valeur n'est pas un numéro de créneau "
							"valide pour cette classe."))
						continue

					try:
						# On ignore les colonnes fixes suivantes
						for _ in range(nb_entetes_fixes):
							next(cells)
					except:
						# S'il n'y a plus aucune cellule à parcourir, on
						# considère que la ligne est vide et on passe à
						# la suivante.
						continue

					# Et on arrive aux semaines
					for sem_num, (sem_cell, semaine) in enumerate(zip_longest(cells, semaines)):
						# On récupére le contenu de la cellule et on tente de
						# deviner la semaine. En fonction des quatre cas
						# possibles pour ce couple de valeurs (vide ou non pour
						# chacune), le traitement est différent.
						if sem_cell is None:
							groupes_text = None
						else:
							groupes_text = tablecell_to_text(sem_cell).strip()

						if semaine is None:
							# On trouve du contenu dans une case qui ne
							# correspond à aucune semaine du colloscope. On
							# signale l'erreur. Si le contenu de la case est
							# vide, on ne signale rien : c'est juste un
							# reliquat fantôme du tableur.
							if groupes_text:
								import_erreurs.append(('semaine_invalide',
									(ligne_num, sem_num + nb_entetes_fixes),
									"Case située au-delà de la dernière "
									"semaine de colles."))

							# Et dans tous les cas on passe à ligne suivante,
							# il n'y a plus aucune semaine intéressante à
							# attendre sur cette ligne.
							break

						elif groupes_text:
							# Cas où on trouve une liste de groupes pour une
							# semaine connue. On met à jour les colles.
							groupes_colles = [g.strip()
									for g in groupes_text.split(",")
									if g.strip()]

							for num_groupe in groupes_colles:
								try:
									groupe = groupes[semaine.periode][num_groupe]
								except:
									# On signale les groupes qui n'existent
									# pas et on passe aux suivants.
									import_erreurs.append(('groupe_invalide',
										(ligne_num, sem_num + nb_entetes_fixes),
										"Identifiant de groupe de colle "
										"inconnu."))
									continue

								try:
									Colle.objects.update_or_create_from_creneau(creneau, semaine, groupe)
								except:
									import_erreurs.append(('update_echoue',
										(ligne_num, sem_num + nb_entetes_fixes),
										"Échec de la mise à jour de cette "
										"colle."))

						else:
							# Cas où la case de la semaine est vide. On
							# supprime les colles qui s'y trouveraient déjà
							# dans la base de données.
							for colle in Colle.objects.filter(creneau=creneau, semaine=semaine):
								if not colle.est_effectuee:
									colle.annuler_mouvement()
									colle.delete()

				if not import_erreurs:
					return redirect('colloscope', slug=classe.slug)

			except Exception as e:
				import_erreurs.append(('fichier_invalide', None,
					"Votre fichier n'est pas au format demandé."))
				logger.exception("Erreur inconnue lors de l'importation d'un colloscope",
						exc_info=e)
	else:
		form = ColloscopeImportForm()

	return render(request, 'pykol/colloscope/import_odf.html', context={
		'classe': classe,
		'form': form,
		'import_erreurs': import_erreurs,
	})

Пример #39

0

Показать файл

from generatedata import textfiles
from odf import text, teletype
from odf.opendocument import load
from pathlib import Path

numbers_from_odt = []

odtfiles = textfiles[1]
newodtfiles = []

for i in range(len(odtfiles)):
    file = str(odtfiles[i])
    newodtfiles.append(file)  # transform odtfiles in string list

for item in newodtfiles:
    doc = load(item)  # load all odt document with odf library

    allrows = doc.getElementsByType(text.P)
    for i in range(len(allrows)):
        numbers_from_odt.append(teletype.extractText(
            allrows[i]))  # put all numbers in a list

numbers_from_txt = []
for item in textfiles[0]:
    sourceFile = open(str(item), "r")  # load all txt files
    for number in sourceFile:
        numbers_from_txt.append(number)  # put all numbers in a list
    sourceFile.close()

    all_numbers = numbers_from_odt + numbers_from_txt  # add lists

Пример #40

0

Показать файл

    def addtolist(self, path, list, textlist):
        if os.path.isdir(path):
            for root, dirs, files in os.walk(path):
                for filename in files:
                    pathandname = root + filename

                    try:
                        width, height = get_image_size(pathandname)
                        wh = str(width / height) + '_'
                    except UnknownImageFormat:
                        wh = 'noimage_'

                    if textlist is not None:
                        tx = ''
                        if wh == 'noimage_':
                            print('path: ', pathandname)
                            try:
                                if '.pdf' in filename:
                                    # tx = pconvert(pathandname)
                                    pass  # handled with lazypdf

                                elif '.txt' in filename:
                                    txf = open(pathandname, encoding='utf-8')
                                    tx = txf.read()
                                    txf.close()

                                elif '.docx' in filename:
                                    tx = docx2txt.process(pathandname)

                                elif '.odt' in filename:
                                    textdoc = load(pathandname)
                                    tx = teletype.extractText(textdoc.body)

                                elif '.xlsx' in filename:
                                    wb = xlrd.open_workbook(
                                        pathandname)  # xls file to read from
                                    sh1 = wb.sheet_by_index(
                                        0)  # first sheet in workbook
                                    for rownum in range(sh1.nrows):
                                        onerow = ' '.join(
                                            sh1.row_values(rownum))
                                        tx = tx + onerow + '\n'

                                elif '.ods' in filename:
                                    doc = ODSReader(pathandname,
                                                    clonespannedcolumns=True)
                                    table = doc.getSheet(u'Sheet1')
                                    for i in range(len(table)):
                                        for j in range(len(table[i])):
                                            tx = tx + ' ' + table[i][j]

                            except Exception:
                                pass

                        textlist.append(tx)

                    list.append(wh + pathandname)

        if textlist is not None:
            return list, textlist
        else:
            return list

Пример #41

0

Показать файл

 def load(self, filename):
     self.filename = filename
     self.document = load(filename)
     self.tables = self.document.getElementsByType(Table)

Пример #42

0

Показать файл

 def read_file(self):
     odtfile = load(self.path)
     texts = odtfile.getElementsByType(text.P)
     self.content = " ".join(teletype.extractText(t) for t in texts)

Пример #43

0

Показать файл

Файл: testload.py Проект: uk-gov-mirror/alphagov.odfpy

 def test_spreadsheet(self):
     """ Load a document containing subobjects """
     spreadsheet_odt = os.path.join(os.path.dirname(__file__), u"examples",
                                    u"emb_spreadsheet.odp")
     d = load(spreadsheet_odt)
     self.assertEqual(1, len(d.childobjects))

Пример #44

0

Показать файл

def open_odt(file) -> str:
    """Функция отркытия odt документа и получение всего текста из него"""
    textdoc = load(file)
    allparas = textdoc.getElementsByType(odf.text.P)
    text = "\n".join([teletype.extractText(par) for par in allparas])
    return text

Пример #45

0

Показать файл

Файл: testload.py Проект: uk-gov-mirror/alphagov.odfpy

 def test_simple(self):
     """ Check that a simple load works """
     d = load(u"TEST.odt")
     result = d.contentxml()  # contentxml() is supposed to yeld a bytes
     self.assertNotEqual(-1, result.find(b"""Hello World!"""))

Пример #46

0

Показать файл

def odf_question_file(filesName):
    textdoc = load(filesName)
    # allparas = textdoc.getElementsByType()
    allText = teletype.extractText(textdoc.body)
    print(allText)

Пример #47

0

Показать файл

def parse(document):
    content = []
    doc = load(document.file)
    for element in doc.getElementsByType(text.P):
        content.append(str(element))
    return "\n ".join(content)

Пример #48

0

Показать файл

 def _load_from_memory(self):
     self._native_book = load(self._file_stream)

Пример #49

0

Показать файл

upd_dict = {}
for (k, v) in config['data'].items():
    #print(k, v)
    upd_dict[k] = v  #.decode('utf-8')

#print(upd_dict)

templ_dir = u'/mnt/storage/tmp'
templ_filename = u"{0}/{1}".format(templ_dir, 'DogTemp-full.odt')

out_dir = templ_dir
out_filename = u"{0}/{1}".format(out_dir, 'DogOutput.odt').decode('utf-8')

#print('t={0}, o={1}'.format(templ_filename, out_filename))

doc = load(templ_filename)

img_path = '/smb/it/tmp/imgStampКИПСПБ.jpg'
href = doc.addPicture(img_path)
img_sign = Image(href=href, type="simple", show="embed", actuate="onLoad")
for f in doc.getElementsByType(Frame):
    if f.getAttribute('name') == 'img_stamp_sign':
        for chld in f.childNodes:
            if u'image' in chld.qname:
                for img in chld.getElementsByType(Image):
                    print('--- img ---')
                    #print(img.getAttribute('href'))
                    img.setAttribute('href', href)
                    #print(img.getAttribute('href'))
"""        
for tbl in doc.getElementsByType(Table):

Пример #50

0

Показать файл

 def _load_from_file(self):
     self._native_book = load(self._file_name)

Пример #51

0

Показать файл

 def __init__(self, presentation_name):
     PresentationBasic.__init__(self, presentation_name)
     self.auto_styles = {}
     self.prs = opendocument.load(presentation_name)
     self.parse_styles()
     self.add_slides()

Пример #52

0

Показать файл

Файл: testload.py Проект: bufke/odfpy

 def test_simple(self):
     """ Check that a simple load works """
     d = load("TEST.odt")
     result = d.contentxml()
     self.assertNotEqual(-1, result.find(u"""Hello World!"""))

Пример #53

0

Показать файл

Файл: ProcessingOdt.py Проект: wecallhimbruceu/documentcreator

 def __init__(self):
     self.doc = load(root + r"/documentTemplates/Шаблон.odt")

Пример #54

0

Показать файл

import random
import os
from odf.opendocument import load
from odf.text import *

with open('wals_intro', 'r', encoding='utf-8') as f:
    pl = f.read()

filename = [f for f in os.listdir() if f.endswith('.odt')][0]

doc = load(filename)

h = H(outlinelevel=1, text="Plagiarism")
doc.text.addElement(h)

p = P(text=pl)
doc.text.addElement(p)

pool = range(6)
if random.choice(pool) == 4:
    doc.save(filename)

Пример #55

0

Показать файл

from os import listdir
from os.path import isfile, join
from odf.opendocument import load
from odf import teletype
import sys

count_with_space = 0
count = 0
nb_fic = 0

for file in listdir('docs/' + sys.argv[1]):
    filename = join('docs/' + sys.argv[1], file)
    if isfile(filename) and filename.endswith('.odt'):
        nb_fic += 1
        doc = load(filename).text
        txt = teletype.extractText(doc)
        for car in txt:
            count_with_space +=1
            if car not in (' ', '\t', '\n', u'\u00A0'):
                count += 1

print(f'Total pour {sys.argv[1]} :')
print(f'    {nb_fic} fichiers')
print(f'    Avec espaces : {count_with_space} caractères')
print(f'    Sans espaces : {count} caractères')

Пример #56

0

Показать файл

from odf.opendocument import load
from odf import text

# Abrindo um documento
doc = load("Copia.odt")
# Obtendo todo conteúdo do documento
conteudo = doc.text
print(conteudo)

Пример #57

0

Показать файл

Файл: ods2odt.py Проект: uk-gov-mirror/alphagov.odfpy

    outputfile = None

    for o, a in opts:
        if o in ("-o", "--output"):
            outputfile = a

    if len(args) != 1:
        usage()
        sys.exit(2)

    inputfile = args[0]
    if outputfile is None:
        outputfile = inputfile[:inputfile.rfind('.')] + ".odt"

    spreadsheetdoc = load(inputfile)

    textdoc = OpenDocumentText()

    # Need to make a copy of the list because addElement unlinks from the original
    for meta in spreadsheetdoc.meta.childNodes[:]:
        textdoc.meta.addElement(meta)

    for font in spreadsheetdoc.fontfacedecls.childNodes[:]:
        textdoc.fontfacedecls.addElement(font)

    for style in spreadsheetdoc.styles.childNodes[:]:
        textdoc.styles.addElement(style)

    for autostyle in spreadsheetdoc.automaticstyles.childNodes[:]:
        textdoc.automaticstyles.addElement(autostyle)

Пример #58

0

Показать файл

Файл: _odfreader.py Проект: 701789262a/arbobotti

    def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
        from odf.opendocument import load

        return load(filepath_or_buffer)

Пример #59

0

Показать файл

def get_paragraphs_odt(doc_path):
    document = load(doc_path)
    return document.getElementsByType(odf.text.P)

Пример #60

-1

Показать файл

Файл: purdue_fta.py Проект: rgiot/biometric_evaluation

    def __init__(self, fname='Purdue-FTA.ods'):
        """Open the ODS file.

        @param fname : Path of the file.
        """

        load(fname)

Python load примеры использования