Пример #1
0
 def download(self):
     
     import requests
     import subprocess
     import os.path
     from ambry_sources.fetch import download
     
     if not self.cmd_exists('pdftohtml'):
         self.fatal("pdftohtml program does not exist")
           
     pdf_file_name, dt = download(self._url, self._library.download_cache)
     xml_file_name =  os.path.split(pdf_file_name)[-1].replace('.pdf', '' )
 
     pdf_path = self._library.download_cache.getsyspath(pdf_file_name)
     xml_path = self._bundle.build_fs.getsyspath(xml_file_name)
 
     if not os.path.exists(xml_path+".xml"):
         call_args = ['pdftohtml', '-xml', pdf_path, xml_path]
         process = subprocess.Popen(call_args)
         if process.wait() != 0:
             print('Errors while converting pdf to xml.')
         
     return xml_path+'.xml'
Пример #2
0
    def _download(self):
        from ambry_sources.fetch import download

        self._path , _ = download(self._url, self._fs, self._account_accessor, logger=self._logger)