Пример #1
0
    def __init__(self, file_name):
        DocumentProcessor.__init__(self, file_name)
        poppler.cvar.globalParams.setEnableFreeType("yes")
        poppler.cvar.globalParams.setAntialias("yes")
        poppler.cvar.globalParams.setVectorAntialias("yes")
        self._doc = poppler.PDFDoc(self._file_name)
        self._index = None

        # store url md5
        # NOTE: assuming the name of the file given is the md5 of the url
        self._url_md5 = os.path.split(self._file_name)[1]
        #self.logger.debug("PDFProcessor, got url_md5=[%s]"%self._url_md5)

        # keyword for boolean search
        self._AND = ' and'
    def __init__(self, file_name):
        DocumentProcessor.__init__(self, file_name)
        poppler.cvar.globalParams.setEnableFreeType("yes")
        poppler.cvar.globalParams.setErrQuiet(True)
        poppler.cvar.globalParams.setAntialias("yes")
        poppler.cvar.globalParams.setVectorAntialias("yes")
        self._doc = poppler.PDFDoc(self._file_name)
        self._index = None

        # store url md5
        # NOTE: assuming the name of the file given is the md5 of the url
        self._url_md5 = os.path.split(self._file_name)[1]
        #self.logger.debug("PDFProcessor, got url_md5=[%s]"%self._url_md5)

        # keyword for boolean search
        self._AND = ' and'
Пример #3
0
 def __init__(self, file_name):
     DocumentProcessor.__init__(self, file_name)
     self._img = Image.open(file_name)
Пример #4
0
 def __init__(self, file_name):
     DocumentProcessor.__init__(self, file_name)
     self._img = Image.open(file_name)