Exemplo n.º 1
0
	def __init__(self, inputfile):

		self.filename		= inputfile
		self.lines			= []
		self.current_line	= 0
		self.current_page	= ""
		self.block			= Block()
Exemplo n.º 2
0
class OermDataBase(object):

	def __init__(self, inputfile):

		self.filename		= inputfile
		self.lines			= []
		self.current_line	= 0
		self.current_page	= ""
		self.block			= Block()

	def __enter__(self):

		try:
			self.open_file = open(self.filename, mode="rb")

			struct_fmt	= ">4sB"
			struct_len = struct.calcsize(struct_fmt)
			data = self.open_file.read(struct_len)

			struct_unpack	= struct.Struct(struct_fmt).unpack_from
			magic_number	= struct_unpack(data)[0].decode("utf-8")
			# version			= struct_unpack(data)[1]

			if magic_number != "oerm":
				raise ValueError(_('{0} no es un archivo oerm válido!').format(self.filename))

		except Exception as err:
			print(_("Error al abrir el archivo: {0}").format(err))
			return None

		return self

	def __exit__(self, *args):
		self.open_file.close()
		return True

	def __iter__(self):
		return self

	def __next__(self):

		struct_fmt = '>L'
		struct_len = struct.calcsize(struct_fmt)
		struct_unpack = struct.Struct(struct_fmt).unpack_from

		data = self.open_file.read(struct_len)
		if not data:
			raise StopIteration

		longitud_bloque = struct_unpack(data)[0]

		self.open_file.seek(self.open_file.tell()-struct_len)
		data = self.open_file.read(longitud_bloque)
		if not data:
			raise StopIteration

		return self.block.load(data)
Exemplo n.º 3
0
	def test_query(self):
		"""Verifica la consulta de reportes en el repositorio"""
		c = OermClient(self._configfile)
		c.open_catalog("catalogo1")
		c.open_repo(1)
		resultados = c.query_reports(reporte="Reporte 2", returntype="list")

		block = Block()  # Generic

		self.assertEqual(len(resultados), len(block.compressor.available_types))
Exemplo n.º 4
0
    def __init__(self, database, idrpt):

        data = (idrpt, ) + database.Index.reports[idrpt]

        self.file = database._file
        self.id = data[0]  #: id del reporte
        self.nombre = data[1]  #: Nombre del reporte
        self.metadata_offset = data[2]
        self.max_pages_in_container = data[3]
        self.first_p_container = data[4]
        self.containers_offset = data[5]
        self.total_containers = len(self.containers_offset)
        self.current_page = 1
        self.current_container = -1
        self.current_block_data = None
        self.block = Block()
        self.pagecontainer = PageContainer()
        self.metadatacontainer = MetadataContainer()
        self.metadata = {}  #: Metadatos del reporte
        self.total_pages = 0  #: Cantidad total de páginas del reporte

        self._get_report_data()
Exemplo n.º 5
0
	def test_database_find_text(self):
		"""Genera un database con info random, y realiza un búsqueda de texto"""

		block    = Block()  # Generic
		esperado = [(1, 1, 0), (1, 1, 12028), (1, 2, 0), (1, 2, 12028), (1, 3, 0), (1, 3, 12028), (1, 4, 0), (1, 4, 12028), (1, 5, 0), (1, 5, 12028), (1, 6, 0), (1, 6, 12028),
				(1, 7, 0), (1, 7, 12028), (1, 8, 0), (1, 8, 12028), (1, 9, 0), (1, 9, 12028), (1, 10, 0), (1, 10, 12029)]

		for item in block.compressor.available_types:

			filename = os.path.join(self._repopath, "test.{0}-{1}.oerm".format(item[0], 0))
			db       = Database(file=filename, mode="rb")
			matches  = db.find_text("Pagina", reports=[1])

			self.assertEqual([(x[0], x[1], x[2]) for x in matches], esperado)
Exemplo n.º 6
0
	def test_read_database(self):
		"""Lee un database con info random y verifica los resultados"""
		block = Block()  # Generic

		for item in block.compressor.available_types:

			filename       = os.path.join(self._repopath, "test.{0}-{1}.oerm".format(item[0], 0))
			db             = Database(file=filename, mode="rb")
			paginas_leidas = []
			for report in db.reports():
				for p in report:
					paginas_leidas.append(p)
			db.close()
			self.assertEqual(self._paginas_escritas, paginas_leidas)
Exemplo n.º 7
0
    def setUpClass(cls):

        # print("OermTestFixtures.setUpClass")
        def rnd_generator(size=1024,
                          chars=string.ascii_uppercase + string.digits):
            return ''.join(random.choice(chars) for _ in range(size))

        # Crear directorios de trabajo
        cls._startpath = tempfile.mkdtemp()
        cls._configfile = os.path.join(cls._startpath, "test.yaml")
        cls._repopath = os.path.join(cls._startpath, "repo1")
        cls._dbpath = os.path.join(cls._repopath, "testdb")
        cls._total_pages = 20
        cls._paginas_escritas = []
        cls._reports = [(1, 'Reporte 1'), (2, 'Reporte 2')]

        for i in range(1, cls._total_pages + 1):
            random_text = rnd_generator(size=200 * 60)
            p = "Pagina {0} -----------------\n{1}\nPagina {0} -----------------\n".format(
                i, random_text)
            cls._paginas_escritas.append(p)

        os.makedirs(cls._repopath)

        # Generar un database
        block = Block()  # Generic

        for item in block.compressor.available_types:
            cls._generate_db(compress_method=item[0])

        # Creo un archivo de configuración para Oerm
        with open(cls._configfile, 'w') as outfile:
            yaml.dump({"catalogs": {}}, outfile, default_flow_style=True)

        # Crear catalogo con el repositorio generado
        c = OermClient(cls._configfile)
        cls.catalog_config = {
            "catalogo1": {
                "name": "Ejemplo catalogo local",
                "type": "path",
                "enabled": True
            }
        }
        c.catalog_create(cls.catalog_config)
        c.add_repo("catalogo1", cls._repopath)
Exemplo n.º 8
0
    def test_reports_find_text(self):
        """Genera un database con info random, y realiza un búsqueda de texto
		"""
        block = Block()  # Generic

        for item in block.compressor.available_types:

            filename = os.path.join(self._repopath,
                                    "test.{0}-{1}.oerm".format(item[0], 0))
            db = Database(file=filename, mode="rb")

            matches = db.reports().find_text(text="Pagina",
                                             search_in_reports=[2])
            # print(matches)
            esperado = [(2, 1, 0), (2, 1, 12029), (2, 2, 0), (2, 2, 12029),
                        (2, 3, 0), (2, 3, 12029), (2, 4, 0), (2, 4, 12029),
                        (2, 5, 0), (2, 5, 12029), (2, 6, 0), (2, 6, 12029),
                        (2, 7, 0), (2, 7, 12029), (2, 8, 0), (2, 8, 12029),
                        (2, 9, 0), (2, 9, 12029), (2, 10, 0), (2, 10, 12029)]

            self.assertEqual([(x[0], x[1], x[2]) for x in matches], esperado)
Exemplo n.º 9
0
class Report(object):
    """Clase para el manejo de un Reporte OERM.

	Args:
		database: Objeto :class:`openerm.Database`
		idrpt (int): Identificador único del reporte en el Database

	Example:
		>>> fuerom openerm.Database import Database
		>>> from openerm.Report import Report
		>>> db = Database(file = "out/zstd-level-3-1-22.test.oerm", mode="rb")
		>>> r = Report(db, 1)
		>>> for page in r:
		...     print(page[0:10])
		...
		Pagina 1 -
		Pagina 2 -
		Pagina 3 -
		Pagina 4 -
		Pagina 5 -
		Pagina 6 -
		Pagina 7 -
		Pagina 8 -
		Pagina 9 -
		Pagina 10
		Pagina 11

	**data**:
		========= ================================================
		Tipo	  Detalle
		========= ================================================
		int       Id del reporte
		string    Nombre del reporte
		long      Offset al contenedor de metadatos
		long      Max cantidad de páginas en los PageContainers
		long      Offset al primer PageContainer
		list      Lista de Offsets a los PageContainers
		========= ================================================

	"""

    # def __init__(self, file, data):
    def __init__(self, database, idrpt):

        data = (idrpt, ) + database.Index.reports[idrpt]

        self.file = database._file
        self.id = data[0]  #: id del reporte
        self.nombre = data[1]  #: Nombre del reporte
        self.metadata_offset = data[2]
        self.max_pages_in_container = data[3]
        self.first_p_container = data[4]
        self.containers_offset = data[5]
        self.total_containers = len(self.containers_offset)
        self.current_page = 1
        self.current_container = -1
        self.current_block_data = None
        self.block = Block()
        self.pagecontainer = PageContainer()
        self.metadatacontainer = MetadataContainer()
        self.metadata = {}  #: Metadatos del reporte
        self.total_pages = 0  #: Cantidad total de páginas del reporte

        self._get_report_data()

    def _get_block_data_from_container(self, container):

        container_offset = self.containers_offset[container]
        return self._get_block_data_from_offset(container_offset)

    def __len__(self):
        return self.total_pages

    def __iter__(self):
        return self

    def __next__(self):
        p = self.get_page(self.current_page)
        if not p:
            self.current_page = 1
            raise StopIteration
        else:
            self.current_page += 1
        return p

    def __str__(self):
        return "Report: {0} ".format(self.nombre)

    def get_page(self, pagenum):
        """Retorna una pagina del reporte

		Args:
			pagenum(int): Número de página

		Example:
			>>> from openerm.Database import Database
			>>> from openerm.Report import Report
			>>> db = Database(file = "out/zstd-level-3-1-22.test.oerm", mode="rb")
			>>> r = Report(db, 1)
			>>> p = r.get_page(5)
			>>> print(p[0:30])
			Pagina 5 -----------------
			ZSV
			>>>

		Return:
			string: Texto completo de la página

		"""
        container = int((pagenum - 1) / self.max_pages_in_container)
        if container > self.total_containers - 1:
            return None

        if container != self.current_container:
            self.current_block_data = self._get_block_data_from_container(
                container)
            # (longitud_bloque, tipo_bloque, tipo_compresion, tipo_encriptacion, longitud_datos, data, variable_data)
            self.pagecontainer.load(
                (self.current_block_data[5], self.current_block_data[6]))
            self.current_container = container

        relative_pagenum = pagenum - (container * self.max_pages_in_container)

        return self.pagecontainer.get_page(relative_pagenum)

    def _get_report_data(self):

        # La cantidad de contenedores - 1 por la cantidad de paginas x contenedor da el primer número
        last_container_offset = self.total_containers - 1
        total = (self.total_containers - 1) * self.max_pages_in_container
        # Leer el último contenedor para saber cuantas páginas quedaron en él
        data = self._get_block_data_from_container(last_container_offset)
        self.pagecontainer.load((data[5], data[6]))
        total += len(self.pagecontainer)

        self.total_pages = total

        # Metadatos
        _, _, tipo_compresion, _, _, data, _ = self._get_block_data_from_offset(
            self.metadata_offset)
        self.metadata = self.metadatacontainer.load(data)
        self.__dict__.update(self.metadata)

    def _get_block_data_from_offset(self, container_offset):

        self.file.seek(container_offset)

        struct_fmt = '>L'
        struct_len = struct.calcsize(struct_fmt)
        struct_unpack = struct.Struct(struct_fmt).unpack_from

        data = self.file.read(struct_len)
        if not data:
            return None

        longitud_bloque = struct_unpack(data)[0]

        self.file.seek(self.file.tell() - struct_len)
        data = self.file.read(longitud_bloque)
        if not data:
            return None

        # (longitud_bloque, tipo_bloque, tipo_compresion, tipo_encriptacion, longitud_datos, data, variable_data)
        return self.block.load(data)

    def find_text(self, text):
        """Búsqueda de un texto dentro del reporte

		Args:
			text (string): Patrón de texto a buscar

		Example:
			>>> from openerm.Database import Database
			>>> from openerm.Report import Report
			>>> db = Database(file = "out/.sin_compression_sin_encriptacion.oerm")
			>>> r = Report(db, 1)
			>>> report.find_text("IWY3")
			[(2, 10, 991, 'AGH8B2NULTCTJ0L-[IWY3]-4K6D8RRBYCRQCH')]

		Return:
			Lista de reportes y páginas
				* Reporte id
				* Página
				* Posición en la página
				* Extracto de la ocurrencia a modo de ejemplo
		"""
        def sample(find, text, pos, lfind):

            start = pos - 15
            end = pos + lfind + 15

            if start < 0:
                start = 0

            if end > len(text):
                end = len(text)

            return text[start:pos] + "-[" + find + "]-" + text[pos + lfind +
                                                               1:end]

        lfind = len(text)
        ocurrences = []
        for np in range(1, self.total_pages + 1):
            p = self.get_page(np)
            if p:
                pos = p.find(text)
                while pos >= 0:
                    sampletext = sample(text, p, pos, lfind)
                    ocurrences.append(
                        (self.id, np, pos, sampletext.replace("\n", "")))
                    pos = p.find(text, pos + 1)

        return ocurrences
Exemplo n.º 10
0
    from openerm.Database import Database
    from openerm.SpoolHostReprint import SpoolHostReprint
    from openerm.Block import Block
    from openerm.tabulate import tabulate

except ImportError as err:
    modulename = err.args[0].partition("'")[-1].rpartition("'")[0]
    print(_("No fue posible importar el modulo: %s") % modulename)
    sys.exit(-1)

if __name__ == "__main__":

    resultados = []
    encriptado = 0

    block = Block()  # Generic

    test_file = "d:/mfw/test.txt"
    size_test_file = os.path.getsize(test_file)

    for item in block.compressor.available_types:

        print("Procesando: [{0}] {1}".format(item[0], item[1]))

        start = time.time()
        paginas = 0

        file_name = "out\{0}.prueba.oerm".format(item[1])

        db = Database(file=file_name,
                      default_compress_method=item[0],
Exemplo n.º 11
0
def process_file(configfile, inputfile, outputfile,  compressiontype, complevel, ciphertype, testall, append, pagesingroups) :

	block					= Block(default_compress_level=complevel)  # Generic
	resultados				= []
	size_test_file			= os.path.getsize(inputfile)

	compresiones = [e for e in block.compressor.available_types if e[0] == compressiontype]
	encriptados = [e for e in block.cipher.available_types if e[0] == ciphertype]

	if testall:
		if 'e' in testall:
			encriptados = block.cipher.available_types

		if 'c' in testall:
			compresiones = block.compressor.available_types

	mode = "ab" if append else "wb"

	r = ReportMatcher(configfile)
	for encriptado in encriptados:
		for compress in compresiones:

			print("Procesando: {2} Compresión: [{0}] {1} Cifrado: {3}".format(compress[0], compress[1], inputfile, encriptado[1]))

			start		= time.time()
			paginas		= 0

			file_name	= "{0}.{1}.oerm".format(outputfile, slugify("{0}.{1}".format(compress[1], encriptado[1]), "_"))

			db	= Database(	file=file_name,
				 			mode=mode,
							default_compress_method=compress[0],
							default_compress_level=complevel,
				 			default_encription_method=encriptado[0],
							pages_in_container = pagesingroups)

			reportname_anterior = ""

			# spool = SpoolHostReprint(inputfile, buffer_size=102400, encoding="Latin1")
			spool = SpoolFixedRecordLength(inputfile, buffer_size=102400, encoding="cp500", newpage_code="NEVADO" )

			# with SpoolHostReprint(inputfile, buffer_size=102400, encoding="Latin1") as s:
			with spool as s:
				for page in s:
					data = r.match(page)
					reportname = data[0]
					if reportname != reportname_anterior:
						db.add_report(reporte=reportname, sistema=data[1], aplicacion=data[2], departamento=data[3], fecha=data[4])
						reportname_anterior = reportname

					paginas = paginas + 1
					db.add_page(page)

			db.close()

			compress_time	= time.time() - start
			compress_size	= os.path.getsize(file_name)

			start = time.time()
			db	= Database(file=file_name, mode="rb")
			for report in db.reports():
				try:
					for page in report:
						pass
				except Exception as err:
					print("Error: {0} al descomprimir reporte".format(err))
					break

			uncompress_time		= time.time() - start
			container_size 		= compress_size / (db.Index.container_objects + db.Index.metadata_objects)

			resultados.append([
				"[{0}] {1} ({2}p/cont.)".format(compress[0], compress[1], pagesingroups),
				("" if encriptado[0] == 0 else encriptado[1]),
				float(size_test_file),
				float(compress_size),
				(compress_size/size_test_file)*100,
				paginas/compress_time,
				paginas/uncompress_time,
				container_size
			])


	tablestr = tabulate(
					tabular_data		= resultados,
					headers				= ["Algoritmo", "Encript.", "Real (bytes)", "Compr. (bytes)", "Ratio", "Compr. Pg/Seg", "Descompr. Pg/Seg", "BSize (Prom.)" ],
					floatfmt			= "8.2f",
					tablefmt			= "psql",
					numalign			= "right",
					stralign			= "left",
					override_cols_fmt	= [None, None, ",.0f", ",.0f",",.2f", ",.2f", ",.2f", ",.2f", ",.2f" ]
	 )
	return tablestr
Exemplo n.º 12
0
	test_file		= args.inputfile

	if not file_accessible(test_file, "rb"):
		print("Error: El archivo {0} no se ha encontrado o no es accesible para lectura".format(test_file))
		sys.exit(-1)

	if test_file:
		size_test_file	= os.path.getsize(test_file)

		resultados = []
		totales = {}
		bloques = 0
		paginas = 0

		start = time.time()
		b = Block()
		pg = PageContainer()
		with OermDataBase(test_file) as bloques:
			for bloque in bloques:

				longitud_bloque, tipo_bloque, tipo_compresion, tipo_encriptacion, longitud_datos, data, variable_data = bloque
				resultados.append([longitud_bloque, tipo_bloque, tipo_compresion, tipo_encriptacion, longitud_datos])
				if tipo_bloque == 2:
					pg.load(data)
					paginas += pg.max_page_count
					k = "{0}. {1} comprimido con {2} (páginas: {3})".format(tipo_bloque, b.block_types[tipo_bloque], b.compressor.available_types[tipo_compresion][1], pg.max_page_count)
				else:
					k = "{0}. {1} comprimido con {2}".format(tipo_bloque, b.block_types[tipo_bloque], b.compressor.available_types[tipo_compresion][1])
				totales[k] = tuple(map(lambda x, y: x + y,  totales.get(k, (0, 0)), (longitud_bloque, 1)))

		elapsed = time.time() - start
Exemplo n.º 13
0
    def process_file(self, input_file):

        block = Block(default_compress_level=self.config.compress_level)
        resultados = []
        self.input_file = input_file
        size_test_file = os.path.getsize(self.input_file)

        self.spool_types = {
            "fixed":
            SpoolFixedRecordLength(self.input_file,
                                   buffer_size=self.config.buffer_size,
                                   encoding=self.config.encoding,
                                   newpage_code=self.config.EOP),
            "fcfc":
            SpoolHostReprint(self.input_file,
                             buffer_size=self.config.buffer_size,
                             encoding=self.config.encoding)
        }

        compresiones = [
            e for e in block.compressor.available_types
            if e[0] == self.config.compress_type
        ]
        encriptados = [
            e for e in block.cipher.available_types
            if e[0] == self.config.cipher_type
        ]

        mode = "ab"

        r = ReportMatcher(self.config.report_cfg)
        reports = []
        for encriptado in encriptados:
            for compress in compresiones:

                start = time.time()
                paginas = 0

                # file_name	= "{0}.{1}.oerm".format(self.config.output_path, slugify("{0}.{1}".format(compress[1], encriptado[1]), "_"))
                file_name = os.path.join(
                    self.config.output_path,
                    generate_filename(self.config.file_mask) + ".oerm")

                db = Database(
                    file=file_name,
                    mode=mode,
                    default_compress_method=compress[0],
                    default_compress_level=self.config.compress_level,
                    default_encription_method=encriptado[0],
                    pages_in_container=self.config.pages_in_group)

                file_size = os.path.getsize(file_name)
                reportname_anterior = ""

                widgets = [
                    os.path.basename(self.input_file), ': ',
                    FormatLabel(
                        '%(value)d bytes de %(max_value)d (%(percentage)0.2f)'
                    ),
                    Bar(marker='#', left='[', right=']'), ' ',
                    ETA(), ' ',
                    FileTransferSpeed()
                ]

                p_size = 0
                with ProgressBar(max_value=size_test_file,
                                 widgets=widgets) as bar:
                    spool = self.spool_types[self.config.file_type]
                    with spool as s:
                        for page in s:
                            p_size += len(page)
                            bar.update(p_size)
                            data = r.match(page)
                            reportname = data[0]

                            if reportname not in reports:
                                reports.append(reportname)

                            if reportname != reportname_anterior:
                                rpt_id = db.get_report(reportname)
                                if rpt_id:
                                    db.set_report(reportname)
                                else:
                                    db.add_report(reporte=reportname,
                                                  sistema=data[1],
                                                  departamento=data[2],
                                                  fecha=data[3])
                                reportname_anterior = reportname

                            paginas = paginas + 1
                            db.add_page(page)

                    db.close()

                compress_time = time.time() - start
                compress_size = os.path.getsize(file_name) - file_size

                resultados.append([
                    "[{0}] {1} ({2}p/cont.)".format(
                        compress[0], compress[1], self.config.pages_in_group),
                    ("" if encriptado[0] == 0 else encriptado[1]),
                    float(size_test_file),
                    float(compress_size),
                    (compress_size / size_test_file) * 100,
                    paginas / compress_time,
                    len(reports)
                ])

        tablestr = tabulate(tabular_data=resultados,
                            headers=[
                                "Algoritmo", "Encript.", "Real (bytes)",
                                "Compr. (bytes)", "Ratio", "Compr. Pg/Seg",
                                "Reportes"
                            ],
                            floatfmt="8.2f",
                            tablefmt="psql",
                            numalign="right",
                            stralign="left",
                            override_cols_fmt=[
                                None, None, ",.0f", ",.0f", ",.2f", ",.2f",
                                ",.2f"
                            ])
        return tablestr