Exemplo n.º 1
0
 def get_relationships(self):
     if self.relationships:
         return self.relationships
     doc_path = os.path.join(self.file_path, "word/_rels/document.xml.rels")
     with open(doc_path, encoding="UTF-8") as f:
         doc = f.read()
     doc = BeautifulSoup(doc, "xml")
     self.relationships = Relationships(doc)
     return self.relationships
Exemplo n.º 2
0
    def _write_chartsheet_rels_files(self):
        # Write the chartsheet .rels files for links to drawing files.
        index = 0
        for worksheet in self.workbook.worksheets():

            if not worksheet.is_chartsheet:
                continue

            index += 1

            external_links = worksheet.external_drawing_links

            if not external_links:
                continue

            # Create the chartsheet .rels xlsx_dir.
            rels = Relationships()

            for link_data in external_links:
                rels._add_worksheet_relationship(*link_data)

            # Create .rels file such as /xl/chartsheets/_rels/sheet1.xml.rels.
            rels._set_xml_writer(self._filename('xl/chartsheets/_rels/sheet'
                                                + str(index) + '.xml.rels'))
            rels._assemble_xml_file()
Exemplo n.º 3
0
    def _write_worksheet_rels_files(self):
        # Write data such as hyperlinks or drawings.
        index = 0
        for worksheet in self.workbook.worksheets():

            if worksheet.is_chartsheet:
                continue

            index += 1

            external_links = (worksheet.external_hyper_links +
                              worksheet.external_drawing_links +
                              worksheet.external_vml_links +
                              worksheet.external_table_links +
                              worksheet.external_comment_links)

            if not external_links:
                continue

            # Create the worksheet .rels dirs.
            rels = Relationships()

            for link_data in external_links:
                rels._add_worksheet_relationship(*link_data)

            # Create .rels file such as /xl/worksheets/_rels/sheet1.xml.rels.
            rels._set_xml_writer(self._filename('xl/worksheets/_rels/sheet'
                                                + str(index) + '.xml.rels'))
            rels._assemble_xml_file()
Exemplo n.º 4
0
    def _write_vml_drawing_rels_file(self, worksheet, index):
        # Write the vmlDdrawing .rels files for worksheets with images in
        # headers or footers.

        # Create the drawing .rels dir.
        rels = Relationships()

        for drawing_data in worksheet.vml_drawing_links:
            rels._add_document_relationship(*drawing_data)

        # Create .rels file such as /xl/drawings/_rels/vmlDrawing1.vml.rels.
        rels._set_xml_writer(self._filename('xl/drawings/_rels/vmlDrawing'
                                            + str(index)
                                            + '.vml.rels'))
        rels._assemble_xml_file()
Exemplo n.º 5
0
	def _find_relationships(self, list_tagged, global_entities):
		relationships = Relationships()
		relation_stops_type = ['CONJ', 'WPRO', ',', '(', ')']
		relationship_stop_words = ['ex']

		for tagged in list_tagged:
			for index_sentence, sentence in enumerate(tagged):
				last_entity = None
				last_entity_index = 0
				last_relation = None

				for index, item in enumerate(sentence):
					# In order to avoid stop words
					if( len(item[0]) == 1 or item[0].lower() in relationship_stop_words):
						continue
					# to get the entity already identified
					elif( item[1] == 'NE'):
						# In order to build the relationship
						if(last_entity is not None and self._contain_main_entity(last_entity[0], item[0])):
							# to build a relationship with anything between entities
							# just if there is only one token between entities
							if(index-last_entity_index == 2 and len(sentence[index-1][0])>1 ):
								id1 = self._search_parent_entity(last_entity[2], global_entities).id()
								id2 = self._search_parent_entity(item[2], global_entities).id()
								relation = (sentence[index-1][0], id1, last_entity[0], id2, item[0])
								relationships.add(relation)
							# In order to build a relationship from relation already identified
							elif(last_relation is not None):
								id1 = self._search_parent_entity(last_entity[2], global_entities).id()
								id2 = self._search_parent_entity(item[2], global_entities).id()
								relation = (last_relation, id1, last_entity[0], id2, item[0])
								relationships.add(relation)
						last_entity = item
						last_entity_index = index
						last_relation = None

					# In order to get just relationships between entities
					if(last_entity is None):
						continue
					# In order to get relationship composed by verb and noun
					elif('N' in item[1]):
						last_relation = self._compose_verb_noun(sentence, index, last_entity_index, relation_stops_type)
					# In order to get relationship composed by one or more verbs
					elif('VB' in item[1]):
						last_relation = self._get_composed_verbs(sentence, index, last_entity_index, relation_stops_type)
					# In order to break relationships
					elif(item[1] in relation_stops_type):
						last_relation = None
						last_entity = None
						last_entity_index = 0

					# In order to remove relationships if a conjuction is found
					if last_relation is not None and last_relation[0].isupper():
						last_relation = None

		return relationships
Exemplo n.º 6
0
    def _write_drawing_rels_files(self):
        # Write the drawing .rels files for worksheets with charts or drawings.
        index = 0
        for worksheet in self.workbook.worksheets():
            if not worksheet.drawing_links:
                continue
            index += 1

            # Create the drawing .rels xlsx_dir.
            rels = Relationships()

            for drawing_data in worksheet.drawing_links:
                rels._add_document_relationship(*drawing_data)

            # Create .rels file such as /xl/drawings/_rels/sheet1.xml.rels.
            rels._set_xml_writer(self._filename('xl/drawings/_rels/drawing'
                                                + str(index) + '.xml.rels'))
            rels._assemble_xml_file()
Exemplo n.º 7
0
    def _write_workbook_rels_file(self):
        # Write the _rels/.rels xml file.
        rels = Relationships()

        worksheet_index = 1
        chartsheet_index = 1

        for worksheet in self.workbook.worksheets():
            if worksheet.is_chartsheet:
                rels._add_document_relationship('/chartsheet',
                                                'chartsheets/sheet'
                                                + str(chartsheet_index)
                                                + '.xml')
                chartsheet_index += 1
            else:
                rels._add_document_relationship('/worksheet',
                                                'worksheets/sheet'
                                                + str(worksheet_index)
                                                + '.xml')
                worksheet_index += 1

        rels._add_document_relationship('/theme', 'theme/theme1.xml')
        rels._add_document_relationship('/styles', 'styles.xml')

        # Add the sharedString rel if there is string data in the workbook.
        if self.workbook.str_table.count:
            rels._add_document_relationship('/sharedStrings',
                                            'sharedStrings.xml')

        # Add vbaProject if present.
        if self.workbook.vba_project:
            rels._add_ms_package_relationship('/vbaProject', 'vbaProject.bin')

        rels._set_xml_writer(self._filename('xl/_rels/workbook.xml.rels'))
        rels._assemble_xml_file()
Exemplo n.º 8
0
    def _write_root_rels_file(self):
        # Write the _rels/.rels xml file.
        rels = Relationships()

        rels._add_document_relationship('/officeDocument', 'xl/workbook.xml')
        rels._add_package_relationship('/metadata/core-properties',
                                       'docProps/core.xml')
        rels._add_document_relationship('/extended-properties',
                                        'docProps/app.xml')

        rels._set_xml_writer(self._filename('_rels/.rels'))
        rels._assemble_xml_file()
Exemplo n.º 9
0
class Docx(IdAble):
    def __init__(self, path):
        super(Docx, self).__init__()
        if path is None or not isinstance(path, str):
            raise Exception("Path is not allowed None")
        if not os.path.exists(TEMP_BASE_DIR):
            try:
                os.mkdir(TEMP_BASE_DIR)
            except FileExistsError as e:
                pass
        self.document = None
        self.content_types = None
        self.relationships = None
        self.numbering = None
        self.styles = None
        self.base_dir = uuid1().hex
        file = ZipFile(path)
        self.file_path = os.path.join(TEMP_BASE_DIR, self.base_dir)
        os.mkdir(self.file_path)
        file.extractall(self.file_path)
        file.close()
        self.get_document()
        self.get_content_types()
        self.get_numbering()
        self.get_relationships()
        self.get_styles()

    def get_numbering(self):
        if self.numbering:
            return self.numbering
        numbering_path = os.path.join(self.file_path, "word/numbering.xml")
        if not os.path.exists(numbering_path):
            self.numbering = Numbering()
            return self.numbering
        with open(numbering_path, encoding="UTF-8") as f:
            numbering = f.read()
        numbering = BeautifulSoup(numbering, "xml")
        self.numbering = Numbering(numbering)
        return self.numbering

    def get_document(self):
        if self.document:
            return self.document
        doc_path = os.path.join(self.file_path, "word/document.xml")
        with open(doc_path, encoding="UTF-8") as f:
            document = f.read()
        document = BeautifulSoup(document, "xml")
        self.document = Document(document)
        return self.document

    def get_relationships(self):
        if self.relationships:
            return self.relationships
        doc_path = os.path.join(self.file_path, "word/_rels/document.xml.rels")
        with open(doc_path, encoding="UTF-8") as f:
            doc = f.read()
        doc = BeautifulSoup(doc, "xml")
        self.relationships = Relationships(doc)
        return self.relationships

    def get_content_types(self):
        if self.content_types:
            return self.content_types
        content_path = os.path.join(self.file_path, "[Content_Types].xml")
        with open(content_path, encoding="UTF-8") as f:
            content_types = f.read()
            content_types = BeautifulSoup(content_types, "xml")
        self.content_types = ContentTypes(content_types)
        return self.content_types

    def get_styles(self):
        if self.styles:
            return self.styles
        style_path = os.path.join(self.file_path, "word/styles.xml")
        with open(style_path, encoding="UTF-8") as f:
            styles = f.read()

        styles = BeautifulSoup(styles, "xml")
        self.styles = Styles(styles)
        return self.styles

    def extract_media_files(self, path):
        relationships = self.get_relationships()
        file_mapping = relationships.get_file_mapping()
        template = "cp {} {}"

        base_dir = os.path.join(self.file_path, "word")
        #print(file_mapping)
        for file in file_mapping.keys():
            from_file = os.path.join(base_dir, file)
            to_file = os.path.join(path, file_mapping[file])

            dir_name = os.path.dirname(to_file)
            if not os.path.exists(dir_name):
                os.makedirs(dir_name)
            extract = template.format(from_file, to_file)
            os.system(extract)

    def merge(self, doc, page=False):
        if not isinstance(doc, Docx):
            raise Exception("merge parameter is not docx")
        source_content_types = doc.get_content_types()
        self.get_content_types().merge_content_types(source_content_types)

        source_relationships = doc.get_relationships()
        #print(source_relationships.get_file_mapping())
        source_relationships.generate_id(doc.id)
        doc.extract_media_files(os.path.join(self.file_path, "word"))
        self.get_relationships().merge_relationships(source_relationships)

        source_styles = doc.get_styles()
        source_styles.generate_id(doc.id)
        self.styles.merge(source_styles)

        source_numberings = doc.get_numbering()
        source_numberings.generate_id(doc.num)
        self.numbering.merge(source_numberings)

        source_document = doc.get_document()
        source_document.generate_id(doc.id, doc.num)
        self.get_document().merge(source_document, page)

    def save(self, name):
        import zipfile

        self._save_document()
        self._save_content_types()
        self._save_relationships()
        self._save_numbering()
        self._save_styles()

        file = ZipFile(name, "w", compression=zipfile.ZIP_DEFLATED)
        for base, children, files in os.walk(self.file_path):
            base_name = base.split(self.base_dir)[-1]
            for f in files:
                zip_path = os.path.join(base_name, f)
                real_path = os.path.join(base, f)
                file.write(real_path, zip_path)
        file.close()

    def _save_document(self):
        with open(os.path.join(self.file_path, "word/document.xml"),
                  mode="w",
                  encoding="UTF-8") as f:
            f.write(str(self.document.get_dom()))

    def _save_content_types(self):
        with open(os.path.join(self.file_path, "[Content_Types].xml"),
                  mode="w",
                  encoding="UTF-8") as f:
            f.write(str(self.content_types.get_dom()))

    def _save_relationships(self):
        with open(os.path.join(self.file_path, "word/_rels/document.xml.rels"),
                  mode="w",
                  encoding="UTF-8") as f:
            f.write(str(self.relationships.get_dom()))

    def _save_numbering(self):
        numbering = self.numbering.get_dom()
        if not numbering:
            return
        numbering_path = os.path.join(self.file_path, "word/numbering.xml")
        with open(numbering_path, "w+", encoding="UTF-8") as f:
            f.write(str(numbering))

    def _save_styles(self):
        with open(os.path.join(self.file_path, "word/styles.xml"),
                  "w+",
                  encoding="UTF-8") as f:
            f.write(str(self.styles.get_dom()))

    def append_paragraph(self, text, align="left"):
        self.document.append_paragraph(text, align)

    def append_picture(self, filepath, align="left"):
        if not os.path.exists(filepath):
            return
        media_dir = os.path.join(self.file_path, "word/media")
        if not os.path.exists(media_dir):
            os.mkdir(media_dir)
        suffix = filepath.split(".")[-1]
        self.content_types.append_extension(suffix)
        id_file = self.relationships.append_relationship(suffix)
        #print(id_file)
        file_path = os.path.join(
            self.file_path,
            "word/media/{filename}".format(filename=id_file["filename"]))
        os.system("cp {f_file} {t_file}".format(f_file=filepath,
                                                t_file=file_path))
        img = Image.open(file_path)
        width, height = img.size
        img.close()
        self.document.append_picture(id_file["rid"], width * 6350,
                                     height * 6350, align)

    def close(self):
        os.system("rm -rf {0}".format(self.file_path))
Exemplo n.º 10
0
from relationships import Relationships
from relationshipstats import RelationshipStats
from history import History

Relationships.create_table(fail_silently=True)
RelationshipStats.create_table(fail_silently=True)
History.create_table(fail_silently=True)