def __init__(self, inputfile): self.filename = inputfile self.lines = [] self.current_line = 0 self.current_page = "" self.block = Block()
class OermDataBase(object): def __init__(self, inputfile): self.filename = inputfile self.lines = [] self.current_line = 0 self.current_page = "" self.block = Block() def __enter__(self): try: self.open_file = open(self.filename, mode="rb") struct_fmt = ">4sB" struct_len = struct.calcsize(struct_fmt) data = self.open_file.read(struct_len) struct_unpack = struct.Struct(struct_fmt).unpack_from magic_number = struct_unpack(data)[0].decode("utf-8") # version = struct_unpack(data)[1] if magic_number != "oerm": raise ValueError(_('{0} no es un archivo oerm válido!').format(self.filename)) except Exception as err: print(_("Error al abrir el archivo: {0}").format(err)) return None return self def __exit__(self, *args): self.open_file.close() return True def __iter__(self): return self def __next__(self): struct_fmt = '>L' struct_len = struct.calcsize(struct_fmt) struct_unpack = struct.Struct(struct_fmt).unpack_from data = self.open_file.read(struct_len) if not data: raise StopIteration longitud_bloque = struct_unpack(data)[0] self.open_file.seek(self.open_file.tell()-struct_len) data = self.open_file.read(longitud_bloque) if not data: raise StopIteration return self.block.load(data)
def test_query(self): """Verifica la consulta de reportes en el repositorio""" c = OermClient(self._configfile) c.open_catalog("catalogo1") c.open_repo(1) resultados = c.query_reports(reporte="Reporte 2", returntype="list") block = Block() # Generic self.assertEqual(len(resultados), len(block.compressor.available_types))
def __init__(self, database, idrpt): data = (idrpt, ) + database.Index.reports[idrpt] self.file = database._file self.id = data[0] #: id del reporte self.nombre = data[1] #: Nombre del reporte self.metadata_offset = data[2] self.max_pages_in_container = data[3] self.first_p_container = data[4] self.containers_offset = data[5] self.total_containers = len(self.containers_offset) self.current_page = 1 self.current_container = -1 self.current_block_data = None self.block = Block() self.pagecontainer = PageContainer() self.metadatacontainer = MetadataContainer() self.metadata = {} #: Metadatos del reporte self.total_pages = 0 #: Cantidad total de páginas del reporte self._get_report_data()
def test_database_find_text(self): """Genera un database con info random, y realiza un búsqueda de texto""" block = Block() # Generic esperado = [(1, 1, 0), (1, 1, 12028), (1, 2, 0), (1, 2, 12028), (1, 3, 0), (1, 3, 12028), (1, 4, 0), (1, 4, 12028), (1, 5, 0), (1, 5, 12028), (1, 6, 0), (1, 6, 12028), (1, 7, 0), (1, 7, 12028), (1, 8, 0), (1, 8, 12028), (1, 9, 0), (1, 9, 12028), (1, 10, 0), (1, 10, 12029)] for item in block.compressor.available_types: filename = os.path.join(self._repopath, "test.{0}-{1}.oerm".format(item[0], 0)) db = Database(file=filename, mode="rb") matches = db.find_text("Pagina", reports=[1]) self.assertEqual([(x[0], x[1], x[2]) for x in matches], esperado)
def test_read_database(self): """Lee un database con info random y verifica los resultados""" block = Block() # Generic for item in block.compressor.available_types: filename = os.path.join(self._repopath, "test.{0}-{1}.oerm".format(item[0], 0)) db = Database(file=filename, mode="rb") paginas_leidas = [] for report in db.reports(): for p in report: paginas_leidas.append(p) db.close() self.assertEqual(self._paginas_escritas, paginas_leidas)
def setUpClass(cls): # print("OermTestFixtures.setUpClass") def rnd_generator(size=1024, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) # Crear directorios de trabajo cls._startpath = tempfile.mkdtemp() cls._configfile = os.path.join(cls._startpath, "test.yaml") cls._repopath = os.path.join(cls._startpath, "repo1") cls._dbpath = os.path.join(cls._repopath, "testdb") cls._total_pages = 20 cls._paginas_escritas = [] cls._reports = [(1, 'Reporte 1'), (2, 'Reporte 2')] for i in range(1, cls._total_pages + 1): random_text = rnd_generator(size=200 * 60) p = "Pagina {0} -----------------\n{1}\nPagina {0} -----------------\n".format( i, random_text) cls._paginas_escritas.append(p) os.makedirs(cls._repopath) # Generar un database block = Block() # Generic for item in block.compressor.available_types: cls._generate_db(compress_method=item[0]) # Creo un archivo de configuración para Oerm with open(cls._configfile, 'w') as outfile: yaml.dump({"catalogs": {}}, outfile, default_flow_style=True) # Crear catalogo con el repositorio generado c = OermClient(cls._configfile) cls.catalog_config = { "catalogo1": { "name": "Ejemplo catalogo local", "type": "path", "enabled": True } } c.catalog_create(cls.catalog_config) c.add_repo("catalogo1", cls._repopath)
def test_reports_find_text(self): """Genera un database con info random, y realiza un búsqueda de texto """ block = Block() # Generic for item in block.compressor.available_types: filename = os.path.join(self._repopath, "test.{0}-{1}.oerm".format(item[0], 0)) db = Database(file=filename, mode="rb") matches = db.reports().find_text(text="Pagina", search_in_reports=[2]) # print(matches) esperado = [(2, 1, 0), (2, 1, 12029), (2, 2, 0), (2, 2, 12029), (2, 3, 0), (2, 3, 12029), (2, 4, 0), (2, 4, 12029), (2, 5, 0), (2, 5, 12029), (2, 6, 0), (2, 6, 12029), (2, 7, 0), (2, 7, 12029), (2, 8, 0), (2, 8, 12029), (2, 9, 0), (2, 9, 12029), (2, 10, 0), (2, 10, 12029)] self.assertEqual([(x[0], x[1], x[2]) for x in matches], esperado)
class Report(object): """Clase para el manejo de un Reporte OERM. Args: database: Objeto :class:`openerm.Database` idrpt (int): Identificador único del reporte en el Database Example: >>> fuerom openerm.Database import Database >>> from openerm.Report import Report >>> db = Database(file = "out/zstd-level-3-1-22.test.oerm", mode="rb") >>> r = Report(db, 1) >>> for page in r: ... print(page[0:10]) ... Pagina 1 - Pagina 2 - Pagina 3 - Pagina 4 - Pagina 5 - Pagina 6 - Pagina 7 - Pagina 8 - Pagina 9 - Pagina 10 Pagina 11 **data**: ========= ================================================ Tipo Detalle ========= ================================================ int Id del reporte string Nombre del reporte long Offset al contenedor de metadatos long Max cantidad de páginas en los PageContainers long Offset al primer PageContainer list Lista de Offsets a los PageContainers ========= ================================================ """ # def __init__(self, file, data): def __init__(self, database, idrpt): data = (idrpt, ) + database.Index.reports[idrpt] self.file = database._file self.id = data[0] #: id del reporte self.nombre = data[1] #: Nombre del reporte self.metadata_offset = data[2] self.max_pages_in_container = data[3] self.first_p_container = data[4] self.containers_offset = data[5] self.total_containers = len(self.containers_offset) self.current_page = 1 self.current_container = -1 self.current_block_data = None self.block = Block() self.pagecontainer = PageContainer() self.metadatacontainer = MetadataContainer() self.metadata = {} #: Metadatos del reporte self.total_pages = 0 #: Cantidad total de páginas del reporte self._get_report_data() def _get_block_data_from_container(self, container): container_offset = self.containers_offset[container] return self._get_block_data_from_offset(container_offset) def __len__(self): return self.total_pages def __iter__(self): return self def __next__(self): p = self.get_page(self.current_page) if not p: self.current_page = 1 raise StopIteration else: self.current_page += 1 return p def __str__(self): return "Report: {0} ".format(self.nombre) def get_page(self, pagenum): """Retorna una pagina del reporte Args: pagenum(int): Número de página Example: >>> from openerm.Database import Database >>> from openerm.Report import Report >>> db = Database(file = "out/zstd-level-3-1-22.test.oerm", mode="rb") >>> r = Report(db, 1) >>> p = r.get_page(5) >>> print(p[0:30]) Pagina 5 ----------------- ZSV >>> Return: string: Texto completo de la página """ container = int((pagenum - 1) / self.max_pages_in_container) if container > self.total_containers - 1: return None if container != self.current_container: self.current_block_data = self._get_block_data_from_container( container) # (longitud_bloque, tipo_bloque, tipo_compresion, tipo_encriptacion, longitud_datos, data, variable_data) self.pagecontainer.load( (self.current_block_data[5], self.current_block_data[6])) self.current_container = container relative_pagenum = pagenum - (container * self.max_pages_in_container) return self.pagecontainer.get_page(relative_pagenum) def _get_report_data(self): # La cantidad de contenedores - 1 por la cantidad de paginas x contenedor da el primer número last_container_offset = self.total_containers - 1 total = (self.total_containers - 1) * self.max_pages_in_container # Leer el último contenedor para saber cuantas páginas quedaron en él data = self._get_block_data_from_container(last_container_offset) self.pagecontainer.load((data[5], data[6])) total += len(self.pagecontainer) self.total_pages = total # Metadatos _, _, tipo_compresion, _, _, data, _ = self._get_block_data_from_offset( self.metadata_offset) self.metadata = self.metadatacontainer.load(data) self.__dict__.update(self.metadata) def _get_block_data_from_offset(self, container_offset): self.file.seek(container_offset) struct_fmt = '>L' struct_len = struct.calcsize(struct_fmt) struct_unpack = struct.Struct(struct_fmt).unpack_from data = self.file.read(struct_len) if not data: return None longitud_bloque = struct_unpack(data)[0] self.file.seek(self.file.tell() - struct_len) data = self.file.read(longitud_bloque) if not data: return None # (longitud_bloque, tipo_bloque, tipo_compresion, tipo_encriptacion, longitud_datos, data, variable_data) return self.block.load(data) def find_text(self, text): """Búsqueda de un texto dentro del reporte Args: text (string): Patrón de texto a buscar Example: >>> from openerm.Database import Database >>> from openerm.Report import Report >>> db = Database(file = "out/.sin_compression_sin_encriptacion.oerm") >>> r = Report(db, 1) >>> report.find_text("IWY3") [(2, 10, 991, 'AGH8B2NULTCTJ0L-[IWY3]-4K6D8RRBYCRQCH')] Return: Lista de reportes y páginas * Reporte id * Página * Posición en la página * Extracto de la ocurrencia a modo de ejemplo """ def sample(find, text, pos, lfind): start = pos - 15 end = pos + lfind + 15 if start < 0: start = 0 if end > len(text): end = len(text) return text[start:pos] + "-[" + find + "]-" + text[pos + lfind + 1:end] lfind = len(text) ocurrences = [] for np in range(1, self.total_pages + 1): p = self.get_page(np) if p: pos = p.find(text) while pos >= 0: sampletext = sample(text, p, pos, lfind) ocurrences.append( (self.id, np, pos, sampletext.replace("\n", ""))) pos = p.find(text, pos + 1) return ocurrences
from openerm.Database import Database from openerm.SpoolHostReprint import SpoolHostReprint from openerm.Block import Block from openerm.tabulate import tabulate except ImportError as err: modulename = err.args[0].partition("'")[-1].rpartition("'")[0] print(_("No fue posible importar el modulo: %s") % modulename) sys.exit(-1) if __name__ == "__main__": resultados = [] encriptado = 0 block = Block() # Generic test_file = "d:/mfw/test.txt" size_test_file = os.path.getsize(test_file) for item in block.compressor.available_types: print("Procesando: [{0}] {1}".format(item[0], item[1])) start = time.time() paginas = 0 file_name = "out\{0}.prueba.oerm".format(item[1]) db = Database(file=file_name, default_compress_method=item[0],
def process_file(configfile, inputfile, outputfile, compressiontype, complevel, ciphertype, testall, append, pagesingroups) : block = Block(default_compress_level=complevel) # Generic resultados = [] size_test_file = os.path.getsize(inputfile) compresiones = [e for e in block.compressor.available_types if e[0] == compressiontype] encriptados = [e for e in block.cipher.available_types if e[0] == ciphertype] if testall: if 'e' in testall: encriptados = block.cipher.available_types if 'c' in testall: compresiones = block.compressor.available_types mode = "ab" if append else "wb" r = ReportMatcher(configfile) for encriptado in encriptados: for compress in compresiones: print("Procesando: {2} Compresión: [{0}] {1} Cifrado: {3}".format(compress[0], compress[1], inputfile, encriptado[1])) start = time.time() paginas = 0 file_name = "{0}.{1}.oerm".format(outputfile, slugify("{0}.{1}".format(compress[1], encriptado[1]), "_")) db = Database( file=file_name, mode=mode, default_compress_method=compress[0], default_compress_level=complevel, default_encription_method=encriptado[0], pages_in_container = pagesingroups) reportname_anterior = "" # spool = SpoolHostReprint(inputfile, buffer_size=102400, encoding="Latin1") spool = SpoolFixedRecordLength(inputfile, buffer_size=102400, encoding="cp500", newpage_code="NEVADO" ) # with SpoolHostReprint(inputfile, buffer_size=102400, encoding="Latin1") as s: with spool as s: for page in s: data = r.match(page) reportname = data[0] if reportname != reportname_anterior: db.add_report(reporte=reportname, sistema=data[1], aplicacion=data[2], departamento=data[3], fecha=data[4]) reportname_anterior = reportname paginas = paginas + 1 db.add_page(page) db.close() compress_time = time.time() - start compress_size = os.path.getsize(file_name) start = time.time() db = Database(file=file_name, mode="rb") for report in db.reports(): try: for page in report: pass except Exception as err: print("Error: {0} al descomprimir reporte".format(err)) break uncompress_time = time.time() - start container_size = compress_size / (db.Index.container_objects + db.Index.metadata_objects) resultados.append([ "[{0}] {1} ({2}p/cont.)".format(compress[0], compress[1], pagesingroups), ("" if encriptado[0] == 0 else encriptado[1]), float(size_test_file), float(compress_size), (compress_size/size_test_file)*100, paginas/compress_time, paginas/uncompress_time, container_size ]) tablestr = tabulate( tabular_data = resultados, headers = ["Algoritmo", "Encript.", "Real (bytes)", "Compr. (bytes)", "Ratio", "Compr. Pg/Seg", "Descompr. Pg/Seg", "BSize (Prom.)" ], floatfmt = "8.2f", tablefmt = "psql", numalign = "right", stralign = "left", override_cols_fmt = [None, None, ",.0f", ",.0f",",.2f", ",.2f", ",.2f", ",.2f", ",.2f" ] ) return tablestr
test_file = args.inputfile if not file_accessible(test_file, "rb"): print("Error: El archivo {0} no se ha encontrado o no es accesible para lectura".format(test_file)) sys.exit(-1) if test_file: size_test_file = os.path.getsize(test_file) resultados = [] totales = {} bloques = 0 paginas = 0 start = time.time() b = Block() pg = PageContainer() with OermDataBase(test_file) as bloques: for bloque in bloques: longitud_bloque, tipo_bloque, tipo_compresion, tipo_encriptacion, longitud_datos, data, variable_data = bloque resultados.append([longitud_bloque, tipo_bloque, tipo_compresion, tipo_encriptacion, longitud_datos]) if tipo_bloque == 2: pg.load(data) paginas += pg.max_page_count k = "{0}. {1} comprimido con {2} (páginas: {3})".format(tipo_bloque, b.block_types[tipo_bloque], b.compressor.available_types[tipo_compresion][1], pg.max_page_count) else: k = "{0}. {1} comprimido con {2}".format(tipo_bloque, b.block_types[tipo_bloque], b.compressor.available_types[tipo_compresion][1]) totales[k] = tuple(map(lambda x, y: x + y, totales.get(k, (0, 0)), (longitud_bloque, 1))) elapsed = time.time() - start
def process_file(self, input_file): block = Block(default_compress_level=self.config.compress_level) resultados = [] self.input_file = input_file size_test_file = os.path.getsize(self.input_file) self.spool_types = { "fixed": SpoolFixedRecordLength(self.input_file, buffer_size=self.config.buffer_size, encoding=self.config.encoding, newpage_code=self.config.EOP), "fcfc": SpoolHostReprint(self.input_file, buffer_size=self.config.buffer_size, encoding=self.config.encoding) } compresiones = [ e for e in block.compressor.available_types if e[0] == self.config.compress_type ] encriptados = [ e for e in block.cipher.available_types if e[0] == self.config.cipher_type ] mode = "ab" r = ReportMatcher(self.config.report_cfg) reports = [] for encriptado in encriptados: for compress in compresiones: start = time.time() paginas = 0 # file_name = "{0}.{1}.oerm".format(self.config.output_path, slugify("{0}.{1}".format(compress[1], encriptado[1]), "_")) file_name = os.path.join( self.config.output_path, generate_filename(self.config.file_mask) + ".oerm") db = Database( file=file_name, mode=mode, default_compress_method=compress[0], default_compress_level=self.config.compress_level, default_encription_method=encriptado[0], pages_in_container=self.config.pages_in_group) file_size = os.path.getsize(file_name) reportname_anterior = "" widgets = [ os.path.basename(self.input_file), ': ', FormatLabel( '%(value)d bytes de %(max_value)d (%(percentage)0.2f)' ), Bar(marker='#', left='[', right=']'), ' ', ETA(), ' ', FileTransferSpeed() ] p_size = 0 with ProgressBar(max_value=size_test_file, widgets=widgets) as bar: spool = self.spool_types[self.config.file_type] with spool as s: for page in s: p_size += len(page) bar.update(p_size) data = r.match(page) reportname = data[0] if reportname not in reports: reports.append(reportname) if reportname != reportname_anterior: rpt_id = db.get_report(reportname) if rpt_id: db.set_report(reportname) else: db.add_report(reporte=reportname, sistema=data[1], departamento=data[2], fecha=data[3]) reportname_anterior = reportname paginas = paginas + 1 db.add_page(page) db.close() compress_time = time.time() - start compress_size = os.path.getsize(file_name) - file_size resultados.append([ "[{0}] {1} ({2}p/cont.)".format( compress[0], compress[1], self.config.pages_in_group), ("" if encriptado[0] == 0 else encriptado[1]), float(size_test_file), float(compress_size), (compress_size / size_test_file) * 100, paginas / compress_time, len(reports) ]) tablestr = tabulate(tabular_data=resultados, headers=[ "Algoritmo", "Encript.", "Real (bytes)", "Compr. (bytes)", "Ratio", "Compr. Pg/Seg", "Reportes" ], floatfmt="8.2f", tablefmt="psql", numalign="right", stralign="left", override_cols_fmt=[ None, None, ",.0f", ",.0f", ",.2f", ",.2f", ",.2f" ]) return tablestr