def load_norm_ivecs(self): """ Load normalization i-vectors, scale and shift files and also pretrained model. :returns: i-vectors :rtype: numpy.array """ line = None with open(self.norm_list, 'r') as f: for line in f: line = line.rstrip() loginfo( '[Diarization.load_norm_ivecs] Loading npy file {} ...'. format(line)) try: yield np.load('{}.npy'.format( os.path.join(self.ivecs_dir, line))).flatten() except IOError: logwarning( '[Diarization.load_norm_ivecs] No pickle file found for {}.' .format(line)) self.scale = np.load( os.path.join(self.ivecs_dir, os.path.dirname(line), 'scale.npy')) self.shift = np.load( os.path.join(self.ivecs_dir, os.path.dirname(line), 'shift.npy')) try: with open( os.path.join(self.ivecs_dir, os.path.dirname(line), 'model.pkl')) as f: self.model = pickle.load(f) except IOError: logwarning( '[Diarization.load_norm_ivecs] No pretrained model found.')
def load_ivecs(self): with open(self.input_list, 'r') as f: for line in f: loginfo('[Diarization.load_ivecs] Loading pickle file {} ...'. format(line.rstrip().split()[0])) line = line.rstrip() try: if len(line.split()) == 1: with open(os.path.join(self.ivecs_dir, line + '.pkl')) as i: yield pickle.load(i) elif len(line.split()) == 2: file_name = line.split()[0] num_spks = int(line.split()[1]) with open( os.path.join(self.ivecs_dir, file_name + '.pkl')) as i: ivec_set = pickle.load(i) ivec_set.num_speakers = num_spks yield ivec_set else: raise DiarizationException( '[Diarization.load_ivecs] Unexpected number of columns in input list {}.' .format(self.input_list)) except IOError: logwarning( '[Diarization.load_ivecs] No pickle file found for {}.' .format(line.rstrip().split()[0]))
def score(self): scores_dict = {} for ivecset in self.ivecs: name = os.path.normpath(ivecset.name) ivecs = ivecset.get_all() loginfo('[Diarization.score] Scoring {} ...'.format(name)) size = ivecset.size() if size > 0: if ivecset.num_speakers is not None: num_speakers = min(ivecset.num_speakers, size) sklearnkmeans = sklearnKMeans( n_clusters=num_speakers).fit(ivecs) centroids = KMeans(sklearnkmeans.cluster_centers_, num_speakers, self.plda).fit(ivecs) else: num_speakers, centroids = self.get_num_speakers(ivecs) if self.norm_list is None: scores_dict[name] = self.plda.score( ivecs, centroids, self.scale, self.shift) else: scores_dict[name] = self.s_norm(ivecs, centroids) else: logwarning( '[Diarization.score] No i-vectors to score in {}.'.format( ivecset.name)) return scores_dict
def get_der(self, ref_file, scores): """ Compute Diarization Error Rate from reference and scores. :param ref_file: path to file with diarization reference :type ref_file: str :param scores: input scores from PLDA model :type scores: numpy.array """ ref, hyp = self.init_annotations() with open(ref_file, 'r') as f: for line in f: _, name, _, start, duration, _, _, speaker, _ = line.split() ref[name][Segment(float(start), float(start) + float(duration))] = speaker for ivecset in self.ivecs: if ivecset.size() > 0: name, reg_name = ivecset.name, ivecset.name # dirty trick, will be removed, watch out if 'beamformed' in name: reg_name = re.sub('beamformed/', '', name) # # # # # # # # # # # # # # # # # # # # # reg_name = re.sub('/.*', '', reg_name) for i, ivec in enumerate(ivecset.ivecs): start, end = ivec.window_start / 1000.0, ivec.window_end / 1000.0 hyp[reg_name][Segment(start, end)] = np.argmax(scores[name].T[i]) else: logwarning( '[Diarization.get_der] No i-vectors to dump in {}.'.format( ivecset.name)) der = DiarizationErrorRate() der.collar = 0.25 names, values, summ = [], [], 0.0 for name in ref.keys(): names.append(name) der_num = der(ref[name], hyp[name]) * 100 values.append(der_num) summ += der_num loginfo('[Diarization.get_der] {} DER = {}'.format( name, '{0:.3f}'.format(der_num))) loginfo('[Diarization.get_der] Average DER = {}'.format( '{0:.3f}'.format(summ / float(len(ref.keys()))))) Diarization.plot_der(names, values)
def score(self): """ Score i-vectors agains speaker clusters. :returns: PLDA scores :rtype: numpy.array """ scores_dict = {} for ivecset in self.ivecs: name = os.path.normpath(ivecset.name) ivecs = ivecset.get_all() loginfo('[Diarization.score] Scoring {} ...'.format(name)) size = ivecset.size() if size > 0: if ivecset.num_speakers is not None: num_speakers = ivecset.num_speakers sklearnkmeans = sklearnKMeans( n_clusters=num_speakers).fit(ivecs) centroids = KMeans(sklearnkmeans.cluster_centers_, num_speakers, self.plda).fit(ivecs) else: if self.norm_ivecs is not None: num_speakers, centroids = self.get_num_speakers(ivecs) else: raise DiarizationException( '[Diarization.score] Can not estimate number of speakers without training set.' ) if self.norm_list is None: scores_dict[name] = self.plda.score( ivecs, centroids, self.scale, self.shift) else: scores_dict[name] = self.s_norm(ivecs, centroids) else: logwarning( '[Diarization.score] No i-vectors to score in {}.'.format( ivecset.name)) return scores_dict
def get_main_area(img, acta): remove = cfg.remove_pixels if cfg.remove_pixels else [0, 0, 0, 0] gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) ret, thresh = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV) kernel = np.ones((7, 7), np.uint8) gray = cv.dilate(thresh, kernel, iterations=1) ctrs, hier = cv.findContours(gray.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE) areas = [(t[0], t[1], t[0] + t[2], t[1] + t[3]) for t in [cv.boundingRect(ctr) for ctr in ctrs]] height, width, channels = img.shape if not areas: # No hay zonas de interes posiblemente imagen en blanco areas = [(0, 0, width, height)] lmin = list(map(min, zip(*areas))) lmax = list(map(max, zip(*areas))) add = 3 seguridad = (lmin[0] - add if lmin[0] - add > 0 else 0, lmin[1] - add if lmin[1] - add > 0 else 0, lmax[2] + add if lmax[2] + add < width else width, lmax[3] + add if lmax[3] + add < height else height) x1 = seguridad[0] if seguridad[0] < remove[0] else remove[0] y1 = seguridad[1] if seguridad[1] < remove[1] else remove[1] x2 = seguridad[2] if seguridad[2] > width - remove[2] else width - remove[2] y2 = seguridad[ 3] if seguridad[3] > height - remove[3] else height - remove[3] crop = (x1, y1, x2, y2) loginfo("Acta : {0}".format(acta)) loginfo("Recorte seguridad: {0}".format(str(seguridad))) loginfo("Recorte final : {0}".format(str(crop))) roi = img[crop[1]:crop[1] + crop[3], crop[0]:crop[0] + crop[2]] return roi
def logos_from_pdf(cfg, pdf_file, quiet=False): """logos_from_pdf Procesa un archivo PDF de boletines de Marcas para extraer logos y textos de cada acta cfg: <Config> Objeto de congiguracón del proceso pdf_file: <str> path al archivo PDF del boletín """ print("\nExtracción de logos y textos\n") workpath = tempfile.mkdtemp() filename, _ = os.path.splitext(os.path.basename(pdf_file)) outputpath_logos = os.path.join(cfg.outputdir, filename, "logos") outputpath_txt = os.path.join(cfg.outputdir, filename, "txt") os.makedirs(outputpath_txt, exist_ok=True) os.makedirs(outputpath_logos, exist_ok=True) dte = DataExtractor(workpath) total_logos, total_textos = 0, 0 with open(pdf_file, 'rb') as fp: rsrcmgr = PDFResourceManager() laparams = LAParams() device = PDFPageAggregator(rsrcmgr, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) pages = list(PDFPage.get_pages(fp)) total_pages = len(pages) loginfo("Total de páginas : {0}".format(total_pages)) if not cfg.debug_page: firstp = cfg.from_page if cfg.from_page else ( cfg.ignore_first_pages + 1) endp = cfg.to_page if cfg.to_page else (total_pages - cfg.ignore_last_pages) + 1 if cfg.detect_export_pages and (not cfg.from_page and not cfg.to_page): firstp = 1 endp = total_pages num_bars = (endp - firstp) + 1 else: firstp = cfg.debug_page endp = cfg.debug_page num_bars = 1 if not quiet: widgets = [ FormatLabel(''), ' ', Percentage(), ' ', Bar('#'), ' ', ETA(), ' ', RotatingMarker() ] bar = ProgressBar(widgets=widgets, maxval=num_bars) i = 1 for page in [ p for n, p in enumerate(pages, start=1) if n >= firstp and n <= endp ]: interpreter.process_page(page) layout = device.get_result() objetos = dte.get_data_from_layout(layout) total_logos = total_logos + len( [e for e in objetos if e[2] is not None]) total_textos = total_textos + len(objetos) for acta, texto, filename in objetos: if filename: _, file_extension = os.path.splitext(filename) shutil.copyfile( filename, os.path.join(outputpath_logos, "{0}{1}".format(acta, file_extension))) txt_file = os.path.join(outputpath_txt, "{0}.{1}".format(acta, "txt")) texto = texto.encode('latin-1', 'ignore').decode('utf-8', 'ignore') with open(txt_file, 'w', errors="ignore") as f: f.write(texto) if not quiet: widgets[0] = FormatLabel('[Página {0} de {1}]'.format( i, total_pages)) bar.update(i) i = i + 1 if not quiet: bar.finish() printmsg = True if not quiet else False loginfo("", printmsg=printmsg) loginfo("-- Estatus -------------------------------------------", printmsg=printmsg) loginfo("Carpeta temporal de trabajo : {0}".format(workpath), printmsg=printmsg) loginfo("Total de logos extraídos : {0}".format(total_logos), printmsg=printmsg) loginfo("Total de textos extraídos : {0}".format(total_textos), printmsg=printmsg) if not cfg.debug_page: loginfo("Eliminamos carpeta de trabajo") shutil.rmtree(workpath)
os.path.join(application_path, 'pboletin.ini')) else: configfile = args.configfile if args.pdffile: if args.logfile: log_level = getattr(logging, args.loglevel.upper(), None) logging.basicConfig(filename=args.logfile, level=log_level, format='%(asctime)s|%(levelname)s|%(message)s', datefmt='%Y/%m/%d %I:%M:%S', filemode='w') try: loginfo("Configuración : {0}".format(configfile)) cfg.set_file(configfile) except IOError as msg: cmdparser.error(str(msg)) sys.exit(-1) if args.debug_page: cfg.save_process_files = True cfg.debug_page = args.debug_page cfg.from_page = args.from_page cfg.to_page = args.to_page if args.inputpath: cfg.inputdir = args.inputpath args.pdffile = os.path.join(cfg.inputdir, args.pdffile)
def process_pdf(cfg, pdf_file, quiet=False): lista_actas = [] total_actas = 0 total_regions = 0 total_pages = pdf_count_pages(pdf_file, cfg.pdfinfo_bin, cfg.rxcountpages) print("\nRecorte de actas\n") loginfo("{0} has {1} pages".format(pdf_file, total_pages)) if not cfg.debug_page: firstp = cfg.from_page if cfg.from_page else (cfg.ignore_first_pages + 1) endp = cfg.to_page if cfg.to_page else (total_pages - cfg.ignore_last_pages) + 1 if cfg.detect_export_pages and (not cfg.from_page and not cfg.to_page): firstp = 1 endp = total_pages num_bars = (endp - firstp) + 1 else: firstp = cfg.debug_page endp = cfg.debug_page num_bars = 1 if not quiet: widgets = [ FormatLabel(''), ' ', Percentage(), ' ', Bar('#'), ' ', ETA(), ' ', RotatingMarker() ] bar = ProgressBar(widgets=widgets, maxval=num_bars) loginfo("Create temp dir") workpath = tempfile.mkdtemp() filename, _ = os.path.splitext(os.path.basename(pdf_file)) outputpath = os.path.join(cfg.outputdir, filename) loginfo("Create outputp dir") os.makedirs(outputpath, exist_ok=True) loginfo("Extract PDF pages form {0}".format(pdf_file)) maxz = len(str(total_pages)) i = 1 ############################################################################ # Creamos las subcarpetas para guardar las imagenes por extensión ############################################################################ for ext in cfg.imgext: opath = os.path.join(outputpath, ext, "check") os.makedirs(opath, exist_ok=True) for p in range(firstp, endp + 1): loginfo("Extract page {0} of {1}".format(i, num_bars)) if cfg.export_logos: cmdline = '{0} -png -q -f {3} -l {4} {1} {2}/pagina-{5}'.format( cfg.pdfimages_bin, pdf_file, workpath, p, p, p) loginfo(cmdline) with subprocess.Popen(cmdline, shell=True) as _: pass cmdline = '{0} -q -png -f {3} -l {4} -r {5} {1} {2}/pagina'.format( cfg.pdftoppm_bin, pdf_file, workpath, p, p, cfg.resolution) loginfo(cmdline) with subprocess.Popen(cmdline, shell=True) as _: pass cmdline = '{0} -q -c -f {3} -l {4} {1} {2}/pagina'.format( cfg.pdftohtml_bin, pdf_file, workpath, p, p) loginfo(cmdline) with subprocess.Popen(cmdline, shell=True) as _: pass with open(os.path.join(workpath, 'pagina-{0}.html'.format(str(p))), 'r', encoding="Latin1") as f: html = f.read() img_file = "pagina-{0}.png".format(str(p).zfill(maxz)) img_file = os.path.join(workpath, img_file) actas = get_metadata(cfg, html) loginfo("Actas encontradas: {0}".format(str(actas))) if not cfg.detect_export_pages or (cfg.detect_export_pages and len(actas[2]) > 0): last_acta = lista_actas[-1] if lista_actas else None actas_pagina = [a[2] for a in actas[2]] lista_actas.extend(actas_pagina) total_actas = total_actas + (len(actas[2]) if actas is not None else 0) try: total_regions = total_regions + crop_regions( img_file, workpath, outputpath, last_acta=last_acta, metadata=actas) except Exception as msg: logerror("Error:" + str(msg)) if not quiet: widgets[0] = FormatLabel('[Página {0} de {1}]'.format( i, num_bars)) if not quiet: bar.update(i) i = i + 1 loginfo("Remove temp dir") if not quiet: bar.finish() printmsg = True if not quiet else False loginfo("", printmsg=True) loginfo("-- Estatus -------------------------------------------", printmsg=printmsg) loginfo("Carpeta temporal de trabajo : {0}".format(workpath), printmsg=printmsg) loginfo("Carpeta de salida : {0}".format(outputpath), printmsg=printmsg) if not cfg.debug_page: loginfo("Eliminamos carpeta de trabajo") shutil.rmtree(workpath) actas_error = [] for a in lista_actas: f = os.path.join(outputpath, cfg.imgext[0], '{0}.{1}'.format(a, cfg.imgext[0])) if not os.path.isfile(f): actas_error.append(a) loginfo("Total de actas : {0}".format(total_actas), printmsg=True) loginfo("Total de regiones recortadas : {0}".format(total_regions), printmsg=True) if actas_error: loginfo("Actas no encontradas : {0}".format( ",".join(actas_error)), printmsg=True) if not cfg.debug_page: # loginfo("Actas encontradas : {0}".format(",".join(set(lista_actas)-set(actas_error))), printmsg=True) loginfo("-- Configuración -------------------------------------", printmsg=True) for linea in str(cfg).split("\n"): loginfo(linea, printmsg=True) loginfo("------------------------------------------------------", printmsg=True) loginfo("Finish process", printmsg=True) if cfg.debug_page: show_results(workpath, outputpath, lista_actas)
def save_crop(acta, crop, outputpath, boletin, index, last_acta): loginfo("save_crop") unique_colors = len(np.unique(crop.reshape(-1, crop.shape[2]), axis=0)) compression = [int(cv.IMWRITE_JPEG_QUALITY), cfg.jpg_compression] fmerged = None if unique_colors <= 1: return 0 if not acta and last_acta: loginfo("merging") ############################################################################################ # Es un Merged ############################################################################################ ext_compat = [e for e in cfg.imgext if e not in ['pcx']][0] last_file = os.path.join(outputpath, ext_compat, '{0}.{1}'.format(last_acta, ext_compat)) last_img = cv.imread(last_file) max_width = 0 # find the max width of all the images total_height = 0 # the total height of the images (vertical stacking) images = [last_img, crop] for img in images: if img.shape[1] > max_width: max_width = img.shape[1] total_height += img.shape[0] merged = np.zeros((total_height, max_width, 3), dtype=np.uint8) merged.fill(255) current_y = 0 # keep track of where your current image was last placed in the y coordinate for image in images: merged[current_y:image.shape[0] + current_y, :image.shape[1], :] = image current_y += image.shape[0] unique_colors = len( np.unique(merged.reshape(-1, merged.shape[2]), axis=0)) for ext in cfg.imgext: # shutil.move(last_file, os.path.join(outputpath, ext, 'check')) fmerged = os.path.join(outputpath, ext, 'check', '{0}.merged.{1}'.format(last_acta, ext)) if ext.lower() == 'pcx': # Mejorar esto por Dios src = fmerged.replace(ext, cfg.imgext[0]) Image.open(src).save(fmerged) else: if unique_colors <= 256: merged = cv.cvtColor(merged, cv.COLOR_BGR2GRAY) loginfo("Saving : {0}".format(fmerged)) if ext.lower() == 'jpg': cv.imwrite(fmerged, merged, compression) add_resolution_to_jpg(fmerged, cfg.resolution) else: cv.imwrite(fmerged, merged, compression) for ext in cfg.imgext: opath = os.path.join(outputpath, ext) if acta: f = os.path.join(opath, '{0}.{1}'.format(acta, ext)) else: f = os.path.join(opath, 'check', '{0}_crop_{1}.{2}'.format(boletin, index, ext)) loginfo("Saving : {0}".format(f)) if ext.lower() == 'pcx': # Mejorar esto por Dios src = f.replace(ext, cfg.imgext[0]) Image.open(src).save(f) else: if unique_colors <= 256: crop = cv.cvtColor(crop, cv.COLOR_BGR2GRAY) if ext.lower() == 'jpg': cv.imwrite(f, crop, compression) add_resolution_to_jpg(f, cfg.resolution) else: cv.imwrite(f, crop, compression) if fmerged: for ext in cfg.imgext: last_file = os.path.join(outputpath, ext, '{0}.{1}'.format(last_acta, ext)) # Muevo el file anterior a check outpath = os.path.join(outputpath, ext, 'check') dst_filename = os.path.join(outpath, os.path.basename(last_file)) loginfo("Moving : {0} to {1}".format( last_file, dst_filename)) shutil.move(last_file, outpath) fmerged = os.path.join(outputpath, ext, 'check', '{0}.merged.{1}'.format(last_acta, ext)) dst_filename = os.path.join(outpath, os.path.basename(fmerged)) loginfo("Moving : {0} to {1}".format( fmerged, dst_filename)) shutil.move(fmerged, last_file) return 1
def crop_regions(filepath, workpath, outputpath, last_acta, metadata=None): filename, _ = os.path.splitext(os.path.basename(filepath)) ############################################################################ # El calculo de todo esta hecho sobre una base de 300 dpi # Hay que compensar si la resolucion es distinta ############################################################################ cfg.compensation = (cfg.resolution / 300) ############################################################################ # Lectura inicial de la imagen ############################################################################ loginfo("Abriendo archivo: {0}".format(filepath)) src = cv.imread(filepath) if src is None: logerror('opening {0}!'.format(filepath)) return -1 height, width, channels = src.shape ############################################################################ # Me quedo solo con el color de las lineas rectas y el texto b y n (negativo) ############################################################################ loginfo("Mask image (from {0} to {1})".format(cfg.linecolor_from, cfg.linecolor_to)) mask_bw_negative = cv.inRange(src, cfg.linecolor_from, cfg.linecolor_to) ############################################################################ # Quito artefactos de hasta una cierta superficie ############################################################################ loginfo("Remove artifacts") nb_components, output, stats, centroids = cv.connectedComponentsWithStats( mask_bw_negative, connectivity=8) sizes = stats[1:, -1] nb_components = nb_components - 1 clean_mask = np.zeros((output.shape[0], output.shape[1], 3), dtype="uint8") for i in range(0, nb_components): if sizes[i] >= cfg.artifact_min_size * cfg.compensation: clean_mask[output == i + 1] = 255 ############################################################################ loginfo("Copy original") original_con_lineas = np.copy(src) original_original_con_lineas = np.copy(src) final = src ############################################################################ # Engroso la máscara para no perder lineas rectas ############################################################################ loginfo("Dilate") clean_mask_gray = cv.Canny(clean_mask, 50, 150, apertureSize=3) kernel = cv.getStructuringElement(cv.MORPH_CROSS, (2, 2)) clean_mask_gray = cv.dilate(clean_mask_gray, kernel, iterations=1) ############################################################################ # Detección de líneas rectas y generación de máscara de recorte ############################################################################ height, width, channels = final.shape crop_mask = np.zeros((height, width, 3), np.uint8) minLineLength = int(cfg.line_min_length * cfg.compensation) maxLineGap = int(cfg.line_max_gap * cfg.compensation) theta = int(cfg.theta) thres = int(cfg.line_thres * cfg.compensation) rho = cfg.line_rho loginfo("Lines detection") linesP = None linesP = cv.HoughLinesP(clean_mask_gray, rho, np.pi / theta, thres, minLineLength=minLineLength, maxLineGap=maxLineGap) cv.imwrite(os.path.join(workpath, '01.original.png'), src) cv.imwrite(os.path.join(workpath, '02.mask_bw_negative.png'), mask_bw_negative) cv.imwrite(os.path.join(workpath, '03.clean_mask.png'), clean_mask) cv.imwrite(os.path.join(workpath, '04.clean_mask_gray.png'), clean_mask_gray) if linesP is not None: llorig = [e[0] for e in np.array(linesP).tolist()] for linea in [e[1] for e in enumerate(llorig)]: cv.line(original_original_con_lineas, (linea[0], linea[1]), (linea[2], linea[3]), (0, 0, 255), 3, cv.LINE_AA) ll = process_lines(src, llorig, cfg.resolution, cfg.only_horizontal, cfg.h_line_gap, cfg.v_line_gap, cfg.compensation) for linea in [e[1] for e in enumerate(ll)]: cv.line(original_con_lineas, (linea[0], linea[1]), (linea[2], linea[3]), (0, 0, 255), 3, cv.LINE_AA) cv.line(crop_mask, (linea[0], linea[1]), (linea[2], linea[3]), (0, 0, 255), 3, cv.LINE_AA) if cfg.save_process_files: cv.imwrite(os.path.join(workpath, '05.crop_mask.png'), crop_mask) cv.imwrite(os.path.join(workpath, '06.original_con_lineas.png'), original_con_lineas) ############################################################################ # En base a la mascara obtengo los rectangulos de interes ############################################################################ loginfo("Contours") gray = cv.cvtColor(crop_mask, cv.COLOR_BGR2GRAY) # convert to grayscale retval, thresh_gray = cv.threshold(gray, thresh=1, maxval=255, type=cv.THRESH_BINARY_INV) contours, hierarchy = cv.findContours(thresh_gray, cv.RETR_CCOMP, cv.CHAIN_APPROX_SIMPLE) ############################################################################ # Recorto los rectangulos # Si las coordenadas de algun acta entran dentro de la zona de recorte # Bien! podemos asociar la zona con el número de acta ############################################################################ max_area = cfg.max_area * cfg.compensation min_area = cfg.min_area * cfg.compensation if metadata: (x, y, actas) = metadata relation = sum([height / y, width / x]) / 2 contornos = [] for cont in contours: x, y, w, h = cv.boundingRect(cont) area = w * h contornos.append((x, y, w, h, area)) final = final.astype(np.uint8) contornos.sort(key=lambda x: x[4]) i = 1 adj = 3 # Para que no entren las líneas rectas for recorte in contornos[:-2]: x, y, w, h, area = recorte x = x + adj y = y + adj w = w - (adj * 2) h = h - (adj * 2) if area < max_area and area > min_area: acta = get_acta(actas, (x, y, x + w, y + h), relation) roi = final[y:y + h, x:x + w] roi = get_main_area(roi, acta) save_crop(acta, roi, outputpath, filename, i, last_acta) i = i + 1 loginfo("End crop_regions") return i - 1 return 0
os.path.join(application_path, 'pboletin.ini')) else: configfile = args.configfile if args.pdffile: if args.logfile: log_level = getattr(logging, args.loglevel.upper(), None) logging.basicConfig(filename=args.logfile, level=log_level, format='%(asctime)s|%(levelname)s|%(message)s', datefmt='%Y/%m/%d %I:%M:%S', filemode='w') try: loginfo("Config file: {0}".format(configfile)) cfg.set_file(configfile) except IOError as msg: cmdparser.error(str(msg)) sys.exit(-1) if args.debug_page: cfg.save_process_files = True cfg.debug_page = args.debug_page cfg.from_page = args.from_page cfg.to_page = args.to_page if args.inputpath: cfg.inputdir = args.inputpath args.pdffile = os.path.join(cfg.inputdir, args.pdffile)