Пример #1
0
    def __init__(self,
                 db_connection,
                 logger,
                 date,
                 max_downloads=None,
                 max_upload_workers=20,
                 allow_repeat=False):
        self.max_downloads = max_downloads
        self.max_upload_workers = max_upload_workers
        self.total_downloads = 0
        self.upload_queue = Queue()
        self.lock = Lock()
        self.allow_repeat = allow_repeat

        # Setup the database connection
        self.db_connection = db_connection
        self.job_serializer = Serializer(self.db_connection, job)
        self.granule_serializer = Serializer(self.db_connection, granule)
        self.logger = logger
        self.date = date

        self.logger.info('Creating a workflow')

        # Downloader that handles asynchronous downloads.
        self.logger.info('Creating aria2 downloader client')
        self.downloader = Downloader(
            on_download_error=self._on_download_error,
            on_download_complete=self._on_download_complete,
            callback_args=(self.lock, ))

        # Copernicus Search API
        self.logger.info('Creating copernicus API connector')
        end_date = (self.date + timedelta(days=1))
        start_date = self.date  # self._get_start_date(self.date, end_date)

        self.copernicus = Copernicus(
            start_date=start_date.isoformat(),
            end_date=end_date.isoformat(),
            rows_per_query=30,
        )
Пример #2
0
async def _upload(c, m):
    if (not os.path.isfile(Config.CRED_FILE)):
        await m.reply_text(text=tr.NOT_AUTHENTICATED_MSG)
        return

    if (not m.reply_to_message):
        await m.reply_text(text=tr.NOT_A_REPLY_MSG)
        return

    message = m.reply_to_message

    if (not message.media):
        await m.reply_text(text=tr.NOT_A_MEDIA_MSG)
        return

    if (not valid_media(message)):
        await m.reply_text(text=tr.NOT_A_VALID_MEDIA_MSG)
        return

    snt = await c.send_message(chat_id=m.chat.id,
                               text=tr.PROCESSING,
                               reply_to_message_id=m.message_id)

    download = Downloader(m)

    status, file = await download.start(progress, snt)

    if (not status):
        await snt.edit_text(text=file, parse_mode='markdown')

        return

    title = ' '.join(m.command[1:])

    upload = Uploader(file, title)

    status, link = await upload.start(progress, snt)

    await snt.edit_text(text=link, parse_mode='markdown')
Пример #3
0
class Alojz(Reaction):
    help = "Weather forecast from https://alojz.cz - only in czech!\nRun: $ alojz [city=praha]"
    aliasses = ["alojz"]

    attributes = ["city"]

    downloader = None
    parser = None

    def init(self):
        self.downloader = Downloader()
        self.parser = Parser()
        # Default values
        self.ini.set("city", "praha")

    def do(self, city=None):
        if city is None or city == "":
            city = self.ini.get("city")
        return self.weather(city.lower())

    def weather(self, city):
        page = self.downloader.download("https://alojz.cz/" + city)

        return self.parser.parse_class(page, "h2", "actual-forecast")
Пример #4
0
class Workflow:
    def __init__(self,
                 db_connection,
                 logger,
                 date,
                 max_downloads=None,
                 max_upload_workers=20,
                 allow_repeat=False):
        self.max_downloads = max_downloads
        self.max_upload_workers = max_upload_workers
        self.total_downloads = 0
        self.upload_queue = Queue()
        self.lock = Lock()
        self.allow_repeat = allow_repeat

        # Setup the database connection
        self.db_connection = db_connection
        self.job_serializer = Serializer(self.db_connection, job)
        self.granule_serializer = Serializer(self.db_connection, granule)
        self.logger = logger
        self.date = date

        self.logger.info('Creating a workflow')

        # Downloader that handles asynchronous downloads.
        self.logger.info('Creating aria2 downloader client')
        self.downloader = Downloader(
            on_download_error=self._on_download_error,
            on_download_complete=self._on_download_complete,
            callback_args=(self.lock, ))

        # Copernicus Search API
        self.logger.info('Creating copernicus API connector')
        end_date = (self.date + timedelta(days=1))
        start_date = self.date  # self._get_start_date(self.date, end_date)

        self.copernicus = Copernicus(
            start_date=start_date.isoformat(),
            end_date=end_date.isoformat(),
            rows_per_query=30,
        )

    def start(self, parent_state):
        # Start a new job in the database.
        self.job_id = self.job_serializer.post({
            'job_name': 'Sentinel-2 Downloader',
            'start_time': datetime.now(),
            'date_handled': self.date,
            'status': JobStatus.STARTED,
        })

        self.logger.set_job_id(self.job_id)
        self.logger.info('Starting workflow')

        # Upload workers
        self.logger.info('Creating S3 upload processes')
        self.upload_processes = [
            Process(target=upload_worker,
                    args=(self.upload_queue, self.job_id, i))
            for i in range(self.max_upload_workers)
        ]

        parent_state.job_id = self.job_id
        self.at_least_one_failed_download = False

        # Start processes responsible for uploading downloaded files to S3
        # in a concurrent fashion.
        self.logger.info('S3 upload processes started')
        [upload_process.start() for upload_process in self.upload_processes]

        # Let's read in each url and download them.
        count = 0
        self.logger.info('Fetching granules info from Copernicus',
                         f'Reading for date: {self.date.isoformat()}')

        for product in self.copernicus.read_feed():
            # Check if granule is already in the database.
            existing = self.granule_serializer.get(product.id)

            if existing is None:
                # If not, add it to the database.
                self.granule_serializer.post({
                    'uuid':
                    product.id,
                    'title':
                    product.title,
                    'copernicus_ingestion_date':
                    product.ingestion_date,
                    'validated':
                    False,
                    'downloader_job_id':
                    self.job_id,
                    'download_status':
                    DownloadStatus.NOT_STARTED,
                })
            elif not self.allow_repeat and \
                    existing['download_status'] not in ['ERROR', 'INVALID']:
                # If it was not a failed download, just skip.
                continue

            # And start the download.
            url = product.get_download_link()
            self.logger.info(f'Starting granule download {product.id}',
                             f'URL: {url}')

            self.downloader.start_download(product)
            self.granule_serializer.put(
                product.id, {'download_status': DownloadStatus.DOWNLOADING})

            count += 1
            if self.max_downloads is not None and count >= self.max_downloads:
                break

            while count - self.total_downloads >= 30:
                sleep(5)

        # Set max_downloads to actual number of downloads triggered.
        # This is needed so that we can send an DONE message to S3 uploader
        # when all download completes.
        self.lock.acquire()
        self.max_downloads = count
        # Of course, if all downloads have already been completed at this
        # point, we need to handle that case as well.
        if self.total_downloads == self.max_downloads:
            self.upload_queue.put('DONE')
        self.lock.release()

        # Join the separately running upload processes.
        [upload_process.join() for upload_process in self.upload_processes]

        self.logger.info('All granules downloaded',
                         f'Total granules processed: {self.total_downloads}')
        parent_state.completed = True

        # At this point, we assume that all downloads and uploads have been
        # completed for this job.

        # Stop listening for any download notifications.
        self.downloader.stop_listening()

        # Set the job status to success.
        self.job_serializer.put(self.job_id, {
            'end_time': datetime.now(),
            'status': JobStatus.SUCCESS
        })

        self.logger.info('Stopping workflow')
        self.logger.set_job_id(None)

    def _on_download_error(self, downloader, gid, lock):
        product = downloader.get_download_product(gid)

        error_message, error_code = downloader.get_download_error(gid)
        self.logger.error(f'Download error {product.id}',
                          f'Error Code: {error_code}\n{error_message}')
        self.at_least_one_failed_download = True

        # Download status = ERROR
        self.granule_serializer.put(product.id,
                                    {'download_status': DownloadStatus.ERROR})

        # If these are all the downloads, trigger the upload processes to quit.
        lock.acquire()
        self.total_downloads += 1
        if self.total_downloads == self.max_downloads:
            self.upload_queue.put('DONE')
        lock.release()

    def _on_download_complete(self, downloader, gid, lock):
        product = downloader.get_download_product(gid)
        self.logger.info(f'Download complete {product.id}')

        # After each file downloads, we want to upload it to S3 bucket.
        filename = downloader.get_download_filename(gid)
        self.upload_queue.put((product.id, filename))

        # If these are all the downloads, trigger the upload processes to quit.
        lock.acquire()
        self.total_downloads += 1
        if self.total_downloads == self.max_downloads:
            self.upload_queue.put('DONE')
        lock.release()
Пример #5
0
 def init(self):
     self.downloader = Downloader()
     self.parser = Parser()
     # Default values
     self.ini.set("city", "praha")