示例#1
0
 def _get_link(self):
     config_web = self.config_web
     logger.info("STEP: DOWNLOAD - START")
     link_dict = dict()
     link_dict['server_url'] = config_web.server_url
     link_dict['base_url'] = config_web.base_url
     link_dict['file_name'] = config_web.file_name
     url_prov = UrlProvider(**link_dict)
     url = url_prov.get_download_link()
     return url
示例#2
0
 def get_converter(self):
     logger.info("Starting convert part")
     config = self.config
     self.down_dict['dest_dir'] = config.temp_dir
     self.down_dict['work_dir'] = config.work_dir
     self.down_dict['output_dir'] = config.output_dir
     try:
         converter = DataConverter(**self.down_dict)
         file_conv = converter.converter()
     except Exception as err:
         logger.warning('FAILED TO PROCESS STEP; {}'.format(err))
         return False
     logger.info("STEP: CONVERT  - DONE")
     return True
示例#3
0
    def download_file(self):

        logger.info("Starting Download")
        config = self.config
        self.down_dict['dest_dir'] = config.temp_dir
        self.down_dict['own_name'] = config.own_name
        self.down_dict['url'] = self._get_link()
        try:
            downloader = HttpDownloader(**self.down_dict)
            result = downloader.download()
        except Exception as err:
            logger.warning('FAILED TO PROCESS STEP; {}'.format(err))
            return False

        logger.info("STEP: DOWNLOAD ARCHIVE - DONE")
        return result
示例#4
0
    def get_download_link(self):

        chrome_options = Options()
        chrome_options.add_argument("--headless")

        try:
            driver = webdriver.Chrome(
                options=chrome_options,
                executable_path=ChromeDriverManager().install())
            driver.get(self._base_url)
        except SystemError:
            logger.warning("Need to updated Chrome")
        time.sleep(3)
        res = driver.execute_script("return document.body.innerHTML")
        soup = BeautifulSoup(res, 'lxml')
        page_content = soup.find(
            'div', {'class': 's-downloadable-resources__results'})

        partial_url = None
        logger.info("Searching link for {}".format(self._file_name[:-2]))
        try:
            for tag in page_content.find_all(["a"]):
                if self.latest_pattern.search(tag.text):
                    partial_url = tag.get('href')
                    break
        except AttributeError:
            logger.warning("We weren't able to get link for {}".format(
                self._file_name[:-2]))
            return False

        if partial_url is None:
            raise ConnectionError(
                "We weren't able to  get  link for  {}".format(
                    self._file_name[:-2]))

        new_url = self._server_url + str(partial_url)
        return new_url
示例#5
0
    def download(self):

        filename = os.path.join(self._dest_dir, self._own_name)

        r = requests.get(self._url, stream=True)

        logger.info("Starting download from url {} to {}".format(
            self._url, filename))
        cnk_size = 1024 * 8 * 8
        cnt = 1
        with open(filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=cnk_size):
                if chunk:
                    f.write(chunk)
                    f.flush()
                    logger.info('downloaded {} KB'.format(cnt * cnk_size))
                    cnt += 1
        logger.info('Finished file download: {}'.format(filename))
        return filename