Пример #1
0
    def get(self, url, query={}, **kwargs):
        try:
            res = requests.get(self.url(url, query),
                               timeout=self.timeout,
                               **kwargs)
            if res.status_code == 401:
                raise NodeResponseError(
                    "Unauthorized. Do you need to set a token?")
            elif not res.status_code in [200, 403, 206]:
                raise NodeServerError("Unexpected status code: %s" %
                                      res.status_code)

            if "Content-Type" in res.headers and "application/json" in res.headers[
                    'Content-Type']:
                result = res.json()
                if 'error' in result:
                    raise NodeResponseError(result['error'])
                return result
            else:
                return res
        except json.decoder.JSONDecodeError as e:
            raise NodeServerError(str(e))
        except (requests.exceptions.Timeout,
                requests.exceptions.ConnectionError) as e:
            raise NodeConnectionError(str(e))
Пример #2
0
    def post(self, url, data=None, headers={}):
        try:
            res = requests.post(self.url(url),
                                data=data,
                                headers=headers,
                                timeout=self.timeout)

            if res.status_code == 401:
                raise NodeResponseError(
                    "Unauthorized. Do you need to set a token?")
            elif res.status_code != 200 and res.status_code != 403:
                raise NodeServerError(res.status_code)

            if "Content-Type" in res.headers and "application/json" in res.headers[
                    'Content-Type']:
                result = res.json()
                if 'error' in result:
                    raise NodeResponseError(result['error'])
                return result
            else:
                return res
        except json.decoder.JSONDecodeError as e:
            raise NodeServerError(str(e))
        except (requests.exceptions.Timeout,
                requests.exceptions.ConnectionError) as e:
            raise NodeConnectionError(str(e))
Пример #3
0
    def handle_import(self):
        self.console_output += gettext("Importing assets...") + "\n"
        self.save()

        zip_path = self.assets_path("all.zip")

        if self.import_url and not os.path.exists(zip_path):
            try:
                # TODO: this is potentially vulnerable to a zip bomb attack
                #       mitigated by the fact that a valid account is needed to
                #       import tasks
                logger.info("Importing task assets from {} for {}".format(self.import_url, self))
                download_stream = requests.get(self.import_url, stream=True, timeout=10)
                content_length = download_stream.headers.get('content-length')
                total_length = int(content_length) if content_length is not None else None
                downloaded = 0
                last_update = 0

                with open(zip_path, 'wb') as fd:
                    for chunk in download_stream.iter_content(4096):
                        downloaded += len(chunk)

                        if time.time() - last_update >= 2:
                            # Update progress
                            if total_length is not None:
                                Task.objects.filter(pk=self.id).update(running_progress=(float(downloaded) / total_length) * 0.9)

                            self.check_if_canceled()
                            last_update = time.time()

                        fd.write(chunk)

            except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, ReadTimeoutError) as e:
                raise NodeServerError(e)

        self.refresh_from_db()

        try:
            self.extract_assets_and_complete()
        except zipfile.BadZipFile:
            raise NodeServerError(gettext("Invalid zip file"))

        images_json = self.assets_path("images.json")
        if os.path.exists(images_json):
            try:
                with open(images_json) as f:
                    images = json.load(f)
                    self.images_count = len(images)
            except:
                logger.warning("Cannot read images count from imported task {}".format(self))
                pass

        self.pending_action = None
        self.processing_time = 0
        self.save()
Пример #4
0
 def handle_task_new_response(self, result):
     if isinstance(result, dict) and 'uuid' in result:
         return Task(self, result['uuid'])
     elif isinstance(result, dict) and 'error' in result:
         raise NodeResponseError(result['error'])
     else:
         raise NodeServerError('Invalid response: ' + str(result))
Пример #5
0
    def extract_assets_and_complete(self):
        """
        Extracts assets/all.zip and populates task fields where required.
        It will raise a zipfile.BadZipFile exception is the archive is corrupted.
        :return:
        """
        assets_dir = self.assets_path("")
        zip_path = self.assets_path("all.zip")

        # Extract from zip
        with zipfile.ZipFile(zip_path, "r") as zip_h:
            zip_h.extractall(assets_dir)

        logger.info("Extracted all.zip for {}".format(self))

        # Populate *_extent fields
        extent_fields = [
            (os.path.realpath(
                self.assets_path("odm_orthophoto",
                                 "odm_orthophoto.tif")), 'orthophoto_extent'),
            (os.path.realpath(self.assets_path("odm_dem",
                                               "dsm.tif")), 'dsm_extent'),
            (os.path.realpath(self.assets_path("odm_dem",
                                               "dtm.tif")), 'dtm_extent'),
        ]

        for raster_path, field in extent_fields:
            if os.path.exists(raster_path):
                # Read extent and SRID
                raster = GDALRaster(raster_path)
                extent = OGRGeometry.from_bbox(raster.extent)

                # Make sure PostGIS supports it
                with connection.cursor() as cursor:
                    cursor.execute(
                        "SELECT SRID FROM spatial_ref_sys WHERE SRID = %s",
                        [raster.srid])
                    if cursor.rowcount == 0:
                        raise NodeServerError(
                            "Unsupported SRS {}. Please make sure you picked a supported SRS."
                            .format(raster.srid))

                # It will be implicitly transformed into the SRID of the model’s field
                # self.field = GEOSGeometry(...)
                setattr(self, field, GEOSGeometry(extent.wkt,
                                                  srid=raster.srid))

                logger.info("Populated extent field with {} for {}".format(
                    raster_path, self))

        self.update_available_assets_field()
        self.running_progress = 1.0
        self.console_output += "Done!\n"
        self.status = status_codes.COMPLETED
        self.save()

        from app.plugins import signals as plugin_signals
        plugin_signals.task_completed.send_robust(sender=self.__class__,
                                                  task_id=self.id)
Пример #6
0
            def worker():
                while True:
                    task = q.get()
                    if task is None or nonloc.error is not None:
                        q.task_done()
                        break

                    # Upload file
                    if task['wait_until'] > datetime.datetime.now():
                        time.sleep((task['wait_until'] -
                                    datetime.datetime.now()).seconds)

                    try:
                        file = task['file']
                        fields = {
                            'images':
                            [(os.path.basename(file), read_file(file),
                              (mimetypes.guess_type(file)[0] or "image/jpg"))]
                        }

                        e = MultipartEncoder(fields=fields)
                        result = self.post(
                            '/task/new/upload/{}'.format(uuid),
                            data=e,
                            headers={'Content-Type': e.content_type})

                        if isinstance(
                                result, dict
                        ) and 'success' in result and result['success']:
                            uf = nonloc.uploaded_files.increment()
                            if progress_event is not None:
                                progress_event.set()
                        else:
                            if isinstance(result, dict) and 'error' in result:
                                raise NodeResponseError(result['error'])
                            else:
                                raise NodeServerError(
                                    "Failed upload with unexpected result: %s"
                                    % str(result))
                    except OdmError as e:
                        if task['retries'] < max_retries and not (
                                isinstance(result, dict)
                                and 'noRetry' in result and result['noRetry']):
                            # Put task back in queue
                            task['retries'] += 1
                            task['wait_until'] = datetime.datetime.now(
                            ) + datetime.timedelta(seconds=task['retries'] *
                                                   retry_timeout)
                            q.put(task)
                        else:
                            nonloc.error = e
                    except Exception as e:
                        nonloc.error = e
                    finally:
                        q.task_done()
Пример #7
0
    def extract_assets_and_complete(self):
        """
        Extracts assets/all.zip and populates task fields where required.
        :return:
        """
        assets_dir = self.assets_path("")
        zip_path = self.assets_path("all.zip")

        # Extract from zip
        try:
            with zipfile.ZipFile(zip_path, "r") as zip_h:
                zip_h.extractall(assets_dir)
        except zipfile.BadZipFile:
            raise NodeServerError("Invalid zip file")

        logger.info("Extracted all.zip for {}".format(self))

        # Populate *_extent fields
        extent_fields = [
            (os.path.realpath(
                self.assets_path("odm_orthophoto",
                                 "odm_orthophoto.tif")), 'orthophoto_extent'),
            (os.path.realpath(self.assets_path("odm_dem",
                                               "dsm.tif")), 'dsm_extent'),
            (os.path.realpath(self.assets_path("odm_dem",
                                               "dtm.tif")), 'dtm_extent'),
        ]

        for raster_path, field in extent_fields:
            if os.path.exists(raster_path):
                # Read extent and SRID
                raster = GDALRaster(raster_path)
                extent = OGRGeometry.from_bbox(raster.extent)

                # It will be implicitly transformed into the SRID of the model’s field
                # self.field = GEOSGeometry(...)
                setattr(self, field, GEOSGeometry(extent.wkt,
                                                  srid=raster.srid))

                logger.info("Populated extent field with {} for {}".format(
                    raster_path, self))

        self.update_available_assets_field()
        self.running_progress = 1.0
        self.console_output += "Done!\n"
        self.status = status_codes.COMPLETED
        self.save()

        from app.plugins import signals as plugin_signals
        plugin_signals.task_completed.send_robust(sender=self.__class__,
                                                  task_id=self.id)
Пример #8
0
    def get(self, url, query={}, **kwargs):
        try:
            print("***** PYODM get method " +  str(datetime.datetime.now()) + " url " + str(url))

            res = requests.get(self.url(url, query), timeout=self.timeout, **kwargs)
	    #print("get Len(res): "+ str(len(res)))
            if res.status_code == 401:
                raise NodeResponseError("Unauthorized. Do you need to set a token?")
            elif not res.status_code in [200, 403, 206]:
                raise NodeServerError("Unexpected status code: %s" % res.status_code)

            if "Content-Type" in res.headers and "application/json" in res.headers['Content-Type']:
		print("get size " + str(res.headers.get('content-length')) + ' url ' + str(url))
                result = res.json()
                if 'error' in result:
                    raise NodeResponseError(result['error'])
                return result
            else:
                return res
        except json.decoder.JSONDecodeError as e:
            raise NodeServerError(str(e))
        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
            raise NodeConnectionError(str(e))
Пример #9
0
    def post(self, url, data=None, headers={}):
        try:
	    print("***** PYODM post method " +  str(datetime.datetime.now()) + " url " + str(url))
            res = requests.post(self.url(url), data=data, headers=headers, timeout=self.timeout)
            #print("post Len(res): "+ str(len(res)))

            if res.status_code == 401:
                raise NodeResponseError("Unauthorized. Do you need to set a token?")
            elif res.status_code != 200 and res.status_code != 403:
                raise NodeServerError(res.status_code)

            if "Content-Type" in res.headers and "application/json" in res.headers['Content-Type']:
                print("response size " + str(res.headers.get('content-length')) + ' url ' + str(url))

		result = res.json()
                if 'error' in result:
                    raise NodeResponseError(result['error'])
                return result
            else:
                return res
        except json.decoder.JSONDecodeError as e:
            raise NodeServerError(str(e))
        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
            raise NodeConnectionError(str(e))
Пример #10
0
    def process(self):
        """
        This method contains the logic for processing tasks asynchronously
        from a background thread or from a worker. Here tasks that are
        ready to be processed execute some logic. This could be communication
        with a processing node or executing a pending action.
        """

        try:
            if self.pending_action == pending_actions.IMPORT:
                self.handle_import()

            if self.pending_action == pending_actions.RESIZE:
                resized_images = self.resize_images()
                self.refresh_from_db()
                self.resize_gcp(resized_images)
                self.pending_action = None
                self.save()

            if self.auto_processing_node and not self.status in [status_codes.FAILED, status_codes.CANCELED]:
                # No processing node assigned and need to auto assign
                if self.processing_node is None:
                    # Assign first online node with lowest queue count
                    self.processing_node = ProcessingNode.find_best_available_node()
                    if self.processing_node:
                        self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overridden later
                        self.processing_node.save()

                        logger.info("Automatically assigned processing node {} to {}".format(self.processing_node, self))
                        self.save()

                # Processing node assigned, but is offline and no errors
                if self.processing_node and not self.processing_node.is_online():
                    # If we are queued up
                    # detach processing node, and reassignment
                    # will be processed at the next tick
                    if self.status == status_codes.QUEUED:
                        logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self))
                        self.uuid = ''
                        self.processing_node = None
                        self.status = None
                        self.save()

                    elif self.status == status_codes.RUNNING:
                        # Task was running and processing node went offline
                        # It could have crashed due to low memory
                        # or perhaps it went offline due to network errors.
                        # We can't easily differentiate between the two, so we need
                        # to notify the user because if it crashed due to low memory
                        # the user might need to take action (or be stuck in an infinite loop)
                        raise NodeServerError("Processing node went offline. This could be due to insufficient memory or a network error.")

            if self.processing_node:
                # Need to process some images (UUID not yet set and task doesn't have pending actions)?
                if not self.uuid and self.pending_action is None and self.status is None:
                    logger.info("Processing... {}".format(self))

                    images = [image.path() for image in self.imageupload_set.all()]

                    # Track upload progress, but limit the number of DB updates
                    # to every 2 seconds (and always record the 100% progress)
                    last_update = 0
                    def callback(progress):
                        nonlocal last_update

                        time_has_elapsed = time.time() - last_update >= 2
                        if time_has_elapsed:
                            testWatch.manual_log_call("Task.process.callback")
                            self.check_if_canceled()
                            Task.objects.filter(pk=self.id).update(upload_progress=float(progress) / 100.0)
                            last_update = time.time()

                    # This takes a while
                    try:
                        uuid = self.processing_node.process_new_task(images, self.name, self.options, callback)
                    except NodeConnectionError as e:
                        # If we can't create a task because the node is offline
                        # We want to fail instead of trying again
                        raise NodeServerError(gettext('Connection error: %(error)s') % {'error': str(e)})

                    # Refresh task object before committing change
                    self.refresh_from_db()
                    self.upload_progress = 1.0
                    self.uuid = uuid
                    self.save()

                    # TODO: log process has started processing

            if self.pending_action is not None:
                if self.pending_action == pending_actions.CANCEL:
                    # Do we need to cancel the task on the processing node?
                    logger.info("Canceling {}".format(self))
                    if self.processing_node and self.uuid:
                        # Attempt to cancel the task on the processing node
                        # We don't care if this fails (we tried)
                        try:
                            self.processing_node.cancel_task(self.uuid)
                        except OdmError:
                            logger.warning("Could not cancel {} on processing node. We'll proceed anyway...".format(self))

                        self.status = status_codes.CANCELED
                        self.pending_action = None
                        self.save()
                    else:
                        # Tasks with no processing node or UUID need no special action
                        self.status = status_codes.CANCELED
                        self.pending_action = None
                        self.save()

                elif self.pending_action == pending_actions.RESTART:
                    logger.info("Restarting {}".format(self))
                    if self.processing_node:

                        # Check if the UUID is still valid, as processing nodes purge
                        # results after a set amount of time, the UUID might have been eliminated.
                        uuid_still_exists = False

                        if self.uuid:
                            try:
                                info = self.processing_node.get_task_info(self.uuid)
                                uuid_still_exists = info.uuid == self.uuid
                            except OdmError:
                                pass

                        need_to_reprocess = False

                        if uuid_still_exists:
                            # Good to go
                            try:
                                self.processing_node.restart_task(self.uuid, self.options)
                            except (NodeServerError, NodeResponseError) as e:
                                # Something went wrong
                                logger.warning("Could not restart {}, will start a new one".format(self))
                                need_to_reprocess = True
                        else:
                            need_to_reprocess = True

                        if need_to_reprocess:
                            logger.info("{} needs to be reprocessed".format(self))

                            # Task has been purged (or processing node is offline)
                            # Process this as a new task
                            # Removing its UUID will cause the scheduler
                            # to process this the next tick
                            self.uuid = ''

                            # We also remove the "rerun-from" parameter if it's set
                            self.options = list(filter(lambda d: d['name'] != 'rerun-from', self.options))
                            self.upload_progress = 0

                        self.console_output = ""
                        self.processing_time = -1
                        self.status = None
                        self.last_error = None
                        self.pending_action = None
                        self.running_progress = 0
                        self.save()
                    else:
                        raise NodeServerError(gettext("Cannot restart a task that has no processing node"))

                elif self.pending_action == pending_actions.REMOVE:
                    logger.info("Removing {}".format(self))
                    if self.processing_node and self.uuid:
                        # Attempt to delete the resources on the processing node
                        # We don't care if this fails, as resources on processing nodes
                        # Are expected to be purged on their own after a set amount of time anyway
                        try:
                            self.processing_node.remove_task(self.uuid)
                        except OdmError:
                            pass

                    # What's more important is that we delete our task properly here
                    self.delete()

                    # Stop right here!
                    return

            if self.processing_node:
                # Need to update status (first time, queued or running?)
                if self.uuid and self.status in [None, status_codes.QUEUED, status_codes.RUNNING]:
                    # Update task info from processing node
                    if not self.console_output:
                        current_lines_count = 0
                    else:
                        current_lines_count = len(self.console_output.split("\n"))

                    info = self.processing_node.get_task_info(self.uuid, current_lines_count)

                    self.processing_time = info.processing_time
                    self.status = info.status.value

                    if len(info.output) > 0:
                        self.console_output += "\n".join(info.output) + '\n'

                    # Update running progress
                    self.running_progress = (info.progress / 100.0) * self.TASK_PROGRESS_LAST_VALUE

                    if info.last_error != "":
                        self.last_error = info.last_error

                    # Has the task just been canceled, failed, or completed?
                    if self.status in [status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED]:
                        logger.info("Processing status: {} for {}".format(self.status, self))

                        if self.status == status_codes.COMPLETED:
                            assets_dir = self.assets_path("")

                            # Remove previous assets directory
                            if os.path.exists(assets_dir):
                                logger.info("Removing old assets directory: {} for {}".format(assets_dir, self))
                                shutil.rmtree(assets_dir)

                            os.makedirs(assets_dir)

                            # Download and try to extract results up to 4 times
                            # (~5% of the times, on large downloads, the archive could be corrupted)
                            retry_num = 0
                            extracted = False
                            last_update = 0

                            def callback(progress):
                                nonlocal last_update

                                time_has_elapsed = time.time() - last_update >= 2

                                if time_has_elapsed or int(progress) == 100:
                                    Task.objects.filter(pk=self.id).update(running_progress=(
                                        self.TASK_PROGRESS_LAST_VALUE + (float(progress) / 100.0) * 0.1))
                                    last_update = time.time()

                            while not extracted:
                                last_update = 0
                                logger.info("Downloading all.zip for {}".format(self))

                                # Download all assets
                                zip_path = self.processing_node.download_task_assets(self.uuid, assets_dir, progress_callback=callback, parallel_downloads=max(1, int(16 / (2 ** retry_num))))

                                # Rename to all.zip
                                all_zip_path = self.assets_path("all.zip")
                                os.rename(zip_path, all_zip_path)

                                logger.info("Extracting all.zip for {}".format(self))

                                try:
                                    self.extract_assets_and_complete()
                                    extracted = True
                                except zipfile.BadZipFile:
                                    if retry_num < 5:
                                        logger.warning("{} seems corrupted. Retrying...".format(all_zip_path))
                                        retry_num += 1
                                        os.remove(all_zip_path)
                                    else:
                                        raise NodeServerError(gettext("Invalid zip file"))
                        else:
                            # FAILED, CANCELED
                            self.save()
                    else:
                        # Still waiting...
                        self.save()

        except (NodeServerError, NodeResponseError) as e:
            self.set_failure(str(e))
        except NodeConnectionError as e:
            logger.warning("{} connection/timeout error: {}. We'll try reprocessing at the next tick.".format(self, str(e)))
        except TaskInterruptedException as e:
            # Task was interrupted during image resize / upload
            logger.warning("{} interrupted".format(self, str(e)))
Пример #11
0
    def create_task(self,
                    files,
                    options={},
                    name=None,
                    progress_callback=None,
                    skip_post_processing=False,
                    webhook=None,
                    outputs=[],
                    parallel_uploads=10,
                    max_retries=5,
                    retry_timeout=5):
        """Start processing a new task.
        At a minimum you need to pass a list of image paths. All other parameters are optional.

        >>> n = Node('localhost', 3000)
        >>> t = n.create_task(['examples/images/image_1.jpg', 'examples/images/image_2.jpg'], \
                          {'orthophoto-resolution': 2, 'dsm': True})
        >>> info = t.info()
        >>> info.status
        <TaskStatus.RUNNING: 20>
        >>> info.last_error
        ''
        >>> t.info().images_count
        2
        >>> t.output()[0:2]
        ['DJI_0131.JPG - DJI_0313.JPG has 1 candidate matches', 'DJI_0131.JPG - DJI_0177.JPG has 3 candidate matches']

        Args:
            files (list): list of image paths + optional GCP file path.
            options (dict): options to use, for example {'orthophoto-resolution': 3, ...}
            name (str): name for the task
            progress_callback (function): callback reporting upload progress percentage
            skip_post_processing  (bool): When true, skips generation of map tiles, derivate assets, point cloud tiles.
            webhook (str): Optional URL to call when processing has ended (either successfully or unsuccessfully).
            outputs (list): Optional paths relative to the project directory that should be included in the all.zip result file, overriding the default behavior.
            parallel_uploads (int): Number of parallel uploads.
            max_retries (int): Number of attempts to make before giving up on a file upload.
            retry_timeout (int): Wait at least these many seconds before attempting to upload a file a second time, multiplied by the retry number.
        Returns:
            :func:`~Task`
        """
        if not self.version_greater_or_equal_than("1.4.0"):
            return self.create_task_fallback(files, options, name,
                                             progress_callback)

        if len(files) == 0:
            raise NodeResponseError("Not enough images")

        fields = {
            'name': name,
            'options': options_to_json(options),
        }

        if skip_post_processing:
            fields['skipPostProcessing'] = 'true'

        if webhook is not None:
            fields['webhook'] = webhook

        if outputs:
            fields['outputs'] = json.dumps(outputs)

        e = MultipartEncoder(fields=fields)

        result = self.post('/task/new/init',
                           data=e,
                           headers={'Content-Type': e.content_type})
        if isinstance(result, dict) and 'error' in result:
            raise NodeResponseError(result['error'])

        if isinstance(result, dict) and 'uuid' in result:
            uuid = result['uuid']
            progress_event = None

            class nonloc:
                uploaded_files = AtomicCounter(0)
                error = None

            # Equivalent as passing the open file descriptor, since requests
            # eventually calls read(), but this way we make sure to close
            # the file prior to reading the next, so we don't run into open file OS limits
            def read_file(file_path):
                if Node.prefixHttp.match(file_path) or Node.prefixHttps.match(
                        file_path):
                    return requests.get(file_path).content
                else:
                    with open(file_path, 'rb') as f:
                        return f.read()

            # Upload
            def worker():
                while True:
                    task = q.get()
                    if task is None or nonloc.error is not None:
                        q.task_done()
                        break

                    # Upload file
                    if task['wait_until'] > datetime.datetime.now():
                        time.sleep((task['wait_until'] -
                                    datetime.datetime.now()).seconds)

                    try:
                        file = task['file']
                        fields = {
                            'images':
                            [(os.path.basename(file), read_file(file),
                              (mimetypes.guess_type(file)[0] or "image/jpg"))]
                        }

                        e = MultipartEncoder(fields=fields)
                        result = self.post(
                            '/task/new/upload/{}'.format(uuid),
                            data=e,
                            headers={'Content-Type': e.content_type})

                        if isinstance(
                                result, dict
                        ) and 'success' in result and result['success']:
                            uf = nonloc.uploaded_files.increment()
                            if progress_event is not None:
                                progress_event.set()
                        else:
                            if isinstance(result, dict) and 'error' in result:
                                raise NodeResponseError(result['error'])
                            else:
                                raise NodeServerError(
                                    "Failed upload with unexpected result: %s"
                                    % str(result))
                    except OdmError as e:
                        if task['retries'] < max_retries and not (
                                isinstance(result, dict)
                                and 'noRetry' in result and result['noRetry']):
                            # Put task back in queue
                            task['retries'] += 1
                            task['wait_until'] = datetime.datetime.now(
                            ) + datetime.timedelta(seconds=task['retries'] *
                                                   retry_timeout)
                            q.put(task)
                        else:
                            nonloc.error = e
                    except Exception as e:
                        nonloc.error = e
                    finally:
                        q.task_done()

            q = queue.Queue()
            threads = []
            for i in range(parallel_uploads):
                t = threading.Thread(target=worker)
                t.start()
                threads.append(t)

            if progress_callback is not None:
                progress_event = threading.Event()

            now = datetime.datetime.now()
            for file in files:
                q.put({'file': file, 'wait_until': now, 'retries': 0})

            # Wait for progress updates
            if progress_event is not None:
                current_progress = 0
                while not q.empty():
                    if progress_event.wait(0.1):
                        progress_event.clear()
                        current_progress = 100.0 * nonloc.uploaded_files.value / len(
                            files)
                        try:
                            progress_callback(current_progress)
                        except Exception as e:
                            nonloc.error = e
                    if nonloc.error is not None:
                        break

                # Make sure to report 100% complete
                if current_progress != 100 and nonloc.error is None:
                    try:
                        progress_callback(100.0)
                    except Exception as e:
                        nonloc.error = e

            # block until all tasks are done
            if nonloc.error is None:
                q.join()

            # stop workers
            for i in range(parallel_uploads):
                q.put(None)
            for t in threads:
                t.join()

            if nonloc.error is not None:
                raise nonloc.error

            result = self.post('/task/new/commit/{}'.format(uuid))
            return self.handle_task_new_response(result)
        else:
            raise NodeServerError("Invalid response from /task/new/init: %s" %
                                  result)
Пример #12
0
    def process(self):
        """
        This method contains the logic for processing tasks asynchronously
        from a background thread or from a worker. Here tasks that are
        ready to be processed execute some logic. This could be communication
        with a processing node or executing a pending action.
        """

        try:
            if self.pending_action == pending_actions.RESIZE:
                resized_images = self.resize_images()
                self.refresh_from_db()
                self.resize_gcp(resized_images)
                self.pending_action = None
                self.save()

            if self.auto_processing_node and not self.status in [status_codes.FAILED, status_codes.CANCELED]:
                # No processing node assigned and need to auto assign
                if self.processing_node is None:
                    # Assign first online node with lowest queue count
                    self.processing_node = ProcessingNode.find_best_available_node()
                    if self.processing_node:
                        self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overridden later
                        self.processing_node.save()

                        logger.info("Automatically assigned processing node {} to {}".format(self.processing_node, self))
                        self.save()

                # Processing node assigned, but is offline and no errors
                if self.processing_node and not self.processing_node.is_online():
                    # If we are queued up
                    # detach processing node, and reassignment
                    # will be processed at the next tick
                    if self.status == status_codes.QUEUED:
                        logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self))
                        self.uuid = ''
                        self.processing_node = None
                        self.status = None
                        self.save()

                    elif self.status == status_codes.RUNNING:
                        # Task was running and processing node went offline
                        # It could have crashed due to low memory
                        # or perhaps it went offline due to network errors.
                        # We can't easily differentiate between the two, so we need
                        # to notify the user because if it crashed due to low memory
                        # the user might need to take action (or be stuck in an infinite loop)
                        raise NodeServerError("Processing node went offline. This could be due to insufficient memory or a network error.")

            if self.processing_node:
                # Need to process some images (UUID not yet set and task doesn't have pending actions)?
                if not self.uuid and self.pending_action is None and self.status is None:
                    logger.info("Processing... {}".format(self))

                    images = [image.path() for image in self.imageupload_set.all()]

                    # Track upload progress, but limit the number of DB updates
                    # to every 2 seconds (and always record the 100% progress)
                    last_update = 0
                    def callback(progress):
                        nonlocal last_update

                        time_has_elapsed = time.time() - last_update >= 2

                        if time_has_elapsed:
                            testWatch.manual_log_call("Task.process.callback")
                            self.check_if_canceled()
                            Task.objects.filter(pk=self.id).update(upload_progress=float(progress) / 100.0)
                            last_update = time.time()

                    # This takes a while
                    try:
                        uuid = self.processing_node.process_new_task(images, self.name, self.options, callback)
                    except NodeConnectionError as e:
                        # If we can't create a task because the node is offline
                        # We want to fail instead of trying again
                        raise NodeServerError('Connection error: ' + str(e))

                    # Refresh task object before committing change
                    self.refresh_from_db()
                    self.upload_progress = 1.0
                    self.uuid = uuid
                    self.save()

                    # TODO: log process has started processing

            if self.pending_action is not None:
                if self.pending_action == pending_actions.CANCEL:
                    # Do we need to cancel the task on the processing node?
                    logger.info("Canceling {}".format(self))
                    if self.processing_node and self.uuid:
                        # Attempt to cancel the task on the processing node
                        # We don't care if this fails (we tried)
                        try:
                            self.processing_node.cancel_task(self.uuid)
                        except OdmError:
                            logger.warning("Could not cancel {} on processing node. We'll proceed anyway...".format(self))

                        self.status = status_codes.CANCELED
                        self.pending_action = None
                        self.save()
                    else:
                        raise NodeServerError("Cannot cancel a task that has no processing node or UUID")

                elif self.pending_action == pending_actions.RESTART:
                    logger.info("Restarting {}".format(self))
                    if self.processing_node:

                        # Check if the UUID is still valid, as processing nodes purge
                        # results after a set amount of time, the UUID might have been eliminated.
                        uuid_still_exists = False

                        if self.uuid:
                            try:
                                info = self.processing_node.get_task_info(self.uuid)
                                uuid_still_exists = info.uuid == self.uuid
                            except OdmError:
                                pass

                        need_to_reprocess = False

                        if uuid_still_exists:
                            # Good to go
                            try:
                                self.processing_node.restart_task(self.uuid, self.options)
                            except (NodeServerError, NodeResponseError) as e:
                                # Something went wrong
                                logger.warning("Could not restart {}, will start a new one".format(self))
                                need_to_reprocess = True
                        else:
                            need_to_reprocess = True

                        if need_to_reprocess:
                            logger.info("{} needs to be reprocessed".format(self))

                            # Task has been purged (or processing node is offline)
                            # Process this as a new task
                            # Removing its UUID will cause the scheduler
                            # to process this the next tick
                            self.uuid = ''

                            # We also remove the "rerun-from" parameter if it's set
                            self.options = list(filter(lambda d: d['name'] != 'rerun-from', self.options))
                            self.upload_progress = 0

                        self.console_output = ""
                        self.processing_time = -1
                        self.status = None
                        self.last_error = None
                        self.pending_action = None
                        self.running_progress = 0
                        self.save()
                    else:
                        raise NodeServerError("Cannot restart a task that has no processing node")

                elif self.pending_action == pending_actions.REMOVE:
                    logger.info("Removing {}".format(self))
                    if self.processing_node and self.uuid:
                        # Attempt to delete the resources on the processing node
                        # We don't care if this fails, as resources on processing nodes
                        # Are expected to be purged on their own after a set amount of time anyway
                        try:
                            self.processing_node.remove_task(self.uuid)
                        except OdmError:
                            pass

                    # What's more important is that we delete our task properly here
                    self.delete()

                    # Stop right here!
                    return

            if self.processing_node:
                # Need to update status (first time, queued or running?)
                if self.uuid and self.status in [None, status_codes.QUEUED, status_codes.RUNNING]:
                    # Update task info from processing node
                    info = self.processing_node.get_task_info(self.uuid)

                    self.processing_time = info.processing_time
                    self.status = info.status.value

                    current_lines_count = len(self.console_output.split("\n"))
                    console_output = self.processing_node.get_task_console_output(self.uuid, current_lines_count)
                    if len(console_output) > 0:
                        self.console_output += "\n".join(console_output) + '\n'

                        # Update running progress
                        for line in console_output:
                            for line_match, value in self.TASK_OUTPUT_MILESTONES.items():
                                if line_match in line:
                                    self.running_progress = value
                                    break

                    if info.last_error != "":
                        self.last_error = info.last_error

                    # Has the task just been canceled, failed, or completed?
                    if self.status in [status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED]:
                        logger.info("Processing status: {} for {}".format(self.status, self))

                        if self.status == status_codes.COMPLETED:
                            assets_dir = self.assets_path("")

                            # Remove previous assets directory
                            if os.path.exists(assets_dir):
                                logger.info("Removing old assets directory: {} for {}".format(assets_dir, self))
                                shutil.rmtree(assets_dir)

                            os.makedirs(assets_dir)

                            logger.info("Downloading all.zip for {}".format(self))

                            # Download all assets
                            last_update = 0

                            def callback(progress):
                                nonlocal last_update

                                time_has_elapsed = time.time() - last_update >= 2

                                if time_has_elapsed or int(progress) == 100:
                                    Task.objects.filter(pk=self.id).update(running_progress=(
                                    self.TASK_OUTPUT_MILESTONES_LAST_VALUE + (float(progress) / 100.0) * 0.1))
                                    last_update = time.time()

                            zip_path = self.processing_node.download_task_assets(self.uuid, assets_dir, progress_callback=callback)

                            logger.info("Extracting all.zip for {}".format(self))

                            with zipfile.ZipFile(zip_path, "r") as zip_h:
                                zip_h.extractall(assets_dir)

                            # Rename to all.zip
                            os.rename(zip_path, os.path.join(os.path.dirname(zip_path), 'all.zip'))

                            # Populate *_extent fields
                            extent_fields = [
                                (os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto.tif")),
                                 'orthophoto_extent'),
                                (os.path.realpath(self.assets_path("odm_dem", "dsm.tif")),
                                 'dsm_extent'),
                                (os.path.realpath(self.assets_path("odm_dem", "dtm.tif")),
                                 'dtm_extent'),
                            ]

                            for raster_path, field in extent_fields:
                                if os.path.exists(raster_path):
                                    # Read extent and SRID
                                    raster = GDALRaster(raster_path)
                                    extent = OGRGeometry.from_bbox(raster.extent)

                                    # It will be implicitly transformed into the SRID of the model’s field
                                    # self.field = GEOSGeometry(...)
                                    setattr(self, field, GEOSGeometry(extent.wkt, srid=raster.srid))

                                    logger.info("Populated extent field with {} for {}".format(raster_path, self))

                            self.update_available_assets_field()
                            self.running_progress = 1.0
                            self.console_output += "Done!\n"
                            self.save()

                            from app.plugins import signals as plugin_signals
                            plugin_signals.task_completed.send_robust(sender=self.__class__, task_id=self.id)
                        else:
                            # FAILED, CANCELED
                            self.save()
                    else:
                        # Still waiting...
                        self.save()

        except (NodeServerError, NodeResponseError) as e:
            self.set_failure(str(e))
        except NodeConnectionError as e:
            logger.warning("{} connection/timeout error: {}. We'll try reprocessing at the next tick.".format(self, str(e)))
        except TaskInterruptedException as e:
            # Task was interrupted during image resize / upload
            logger.warning("{} interrupted".format(self, str(e)))