def process(self): """ This method contains the logic for processing tasks asynchronously from a background thread or from the scheduler. Here tasks that are ready to be processed execute some logic. This could be communication with a processing node or executing a pending action. """ try: if self.auto_processing_node and not self.status in [status_codes.FAILED, status_codes.CANCELED]: # No processing node assigned and need to auto assign if self.processing_node is None: # Assign first online node with lowest queue count self.processing_node = ProcessingNode.find_best_available_node() if self.processing_node: self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overridden later self.processing_node.save() logger.info("Automatically assigned processing node {} to {}".format(self.processing_node, self)) self.save() # Processing node assigned, but is offline and no errors if self.processing_node and not self.processing_node.is_online(): # Detach processing node, will be processed at the next tick logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self)) self.uuid = '' self.processing_node = None self.save() if self.processing_node: # Need to process some images (UUID not yet set and task doesn't have pending actions)? if not self.uuid and self.pending_action is None and self.status is None: logger.info("Processing... {}".format(self)) images = [image.path() for image in self.imageupload_set.all()] # This takes a while uuid = self.processing_node.process_new_task(images, self.name, self.options) # Refresh task object before committing change self.refresh_from_db() self.uuid = uuid self.save() # TODO: log process has started processing if self.pending_action is not None: if self.pending_action == pending_actions.CANCEL: # Do we need to cancel the task on the processing node? logger.info("Canceling {}".format(self)) if self.processing_node and self.uuid: # Attempt to cancel the task on the processing node # We don't care if this fails (we tried) try: self.processing_node.cancel_task(self.uuid) self.status = None except ProcessingException: logger.warning("Could not cancel {} on processing node. We'll proceed anyway...".format(self)) self.status = status_codes.CANCELED self.pending_action = None self.save() else: raise ProcessingError("Cannot cancel a task that has no processing node or UUID") elif self.pending_action == pending_actions.RESTART: logger.info("Restarting {}".format(self)) if self.processing_node: # Check if the UUID is still valid, as processing nodes purge # results after a set amount of time, the UUID might have eliminated. uuid_still_exists = False if self.uuid: try: info = self.processing_node.get_task_info(self.uuid) uuid_still_exists = info['uuid'] == self.uuid except ProcessingException: pass if uuid_still_exists: # Good to go try: self.processing_node.restart_task(self.uuid) except ProcessingError as e: # Something went wrong logger.warning("Could not restart {}, will start a new one".format(self)) self.uuid = '' else: # Task has been purged (or processing node is offline) # Process this as a new task # Removing its UUID will cause the scheduler # to process this the next tick self.uuid = '' self.console_output = "" self.processing_time = -1 self.status = None self.last_error = None self.pending_action = None self.save() else: raise ProcessingError("Cannot restart a task that has no processing node") elif self.pending_action == pending_actions.REMOVE: logger.info("Removing {}".format(self)) if self.processing_node and self.uuid: # Attempt to delete the resources on the processing node # We don't care if this fails, as resources on processing nodes # Are expected to be purged on their own after a set amount of time anyway try: self.processing_node.remove_task(self.uuid) except ProcessingException: pass # What's more important is that we delete our task properly here self.delete() # Stop right here! return if self.processing_node: # Need to update status (first time, queued or running?) if self.uuid and self.status in [None, status_codes.QUEUED, status_codes.RUNNING]: # Update task info from processing node info = self.processing_node.get_task_info(self.uuid) self.processing_time = info["processingTime"] self.status = info["status"]["code"] current_lines_count = len(self.console_output.split("\n")) - 1 self.console_output += self.processing_node.get_task_console_output(self.uuid, current_lines_count) if "errorMessage" in info["status"]: self.last_error = info["status"]["errorMessage"] # Has the task just been canceled, failed, or completed? if self.status in [status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED]: logger.info("Processing status: {} for {}".format(self.status, self)) if self.status == status_codes.COMPLETED: assets_dir = self.assets_path("") # Remove previous assets directory if os.path.exists(assets_dir): logger.info("Removing old assets directory: {} for {}".format(assets_dir, self)) shutil.rmtree(assets_dir) os.makedirs(assets_dir) logger.info("Downloading all.zip for {}".format(self)) # Download all assets zip_stream = self.processing_node.download_task_asset(self.uuid, "all.zip") zip_path = os.path.join(assets_dir, "all.zip") with open(zip_path, 'wb') as fd: for chunk in zip_stream.iter_content(4096): fd.write(chunk) logger.info("Done downloading all.zip for {}".format(self)) # Extract from zip with zipfile.ZipFile(zip_path, "r") as zip_h: zip_h.extractall(assets_dir) logger.info("Extracted all.zip for {}".format(self)) # Populate *_extent fields extent_fields = [ (os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto.tif")), 'orthophoto_extent'), (os.path.realpath(self.assets_path("odm_dem", "dsm.tif")), 'dsm_extent'), (os.path.realpath(self.assets_path("odm_dem", "dtm.tif")), 'dtm_extent'), ] for raster_path, field in extent_fields: if os.path.exists(raster_path): # Read extent and SRID raster = GDALRaster(raster_path) extent = OGRGeometry.from_bbox(raster.extent) # It will be implicitly transformed into the SRID of the model’s field # self.field = GEOSGeometry(...) setattr(self, field, GEOSGeometry(extent.wkt, srid=raster.srid)) logger.info("Populated extent field with {} for {}".format(raster_path, self)) self.update_available_assets_field() self.save() else: # FAILED, CANCELED self.save() else: # Still waiting... self.save() except ProcessingError as e: self.set_failure(str(e)) except (ConnectionRefusedError, ConnectionError) as e: logger.warning("{} cannot communicate with processing node: {}".format(self, str(e))) except ProcessingTimeout as e: logger.warning("{} timed out with error: {}. We'll try reprocessing at the next tick.".format(self, str(e)))
def process(self): """ This method contains the logic for processing tasks asynchronously from a background thread or from a worker. Here tasks that are ready to be processed execute some logic. This could be communication with a processing node or executing a pending action. """ try: if self.pending_action == pending_actions.IMPORT: self.handle_import() if self.pending_action == pending_actions.RESIZE: resized_images = self.resize_images() self.refresh_from_db() self.resize_gcp(resized_images) self.pending_action = None self.save() if self.auto_processing_node and not self.status in [status_codes.FAILED, status_codes.CANCELED]: # No processing node assigned and need to auto assign if self.processing_node is None: # Assign first online node with lowest queue count self.processing_node = ProcessingNode.find_best_available_node() if self.processing_node: self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overridden later self.processing_node.save() logger.info("Automatically assigned processing node {} to {}".format(self.processing_node, self)) self.save() # Processing node assigned, but is offline and no errors if self.processing_node and not self.processing_node.is_online(): # If we are queued up # detach processing node, and reassignment # will be processed at the next tick if self.status == status_codes.QUEUED: logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self)) self.uuid = '' self.processing_node = None self.status = None self.save() elif self.status == status_codes.RUNNING: # Task was running and processing node went offline # It could have crashed due to low memory # or perhaps it went offline due to network errors. # We can't easily differentiate between the two, so we need # to notify the user because if it crashed due to low memory # the user might need to take action (or be stuck in an infinite loop) raise NodeServerError("Processing node went offline. This could be due to insufficient memory or a network error.") if self.processing_node: # Need to process some images (UUID not yet set and task doesn't have pending actions)? if not self.uuid and self.pending_action is None and self.status is None: logger.info("Processing... {}".format(self)) images = [image.path() for image in self.imageupload_set.all()] # Track upload progress, but limit the number of DB updates # to every 2 seconds (and always record the 100% progress) last_update = 0 def callback(progress): nonlocal last_update time_has_elapsed = time.time() - last_update >= 2 if time_has_elapsed: testWatch.manual_log_call("Task.process.callback") self.check_if_canceled() Task.objects.filter(pk=self.id).update(upload_progress=float(progress) / 100.0) last_update = time.time() # This takes a while try: uuid = self.processing_node.process_new_task(images, self.name, self.options, callback) except NodeConnectionError as e: # If we can't create a task because the node is offline # We want to fail instead of trying again raise NodeServerError(gettext('Connection error: %(error)s') % {'error': str(e)}) # Refresh task object before committing change self.refresh_from_db() self.upload_progress = 1.0 self.uuid = uuid self.save() # TODO: log process has started processing if self.pending_action is not None: if self.pending_action == pending_actions.CANCEL: # Do we need to cancel the task on the processing node? logger.info("Canceling {}".format(self)) if self.processing_node and self.uuid: # Attempt to cancel the task on the processing node # We don't care if this fails (we tried) try: self.processing_node.cancel_task(self.uuid) except OdmError: logger.warning("Could not cancel {} on processing node. We'll proceed anyway...".format(self)) self.status = status_codes.CANCELED self.pending_action = None self.save() else: # Tasks with no processing node or UUID need no special action self.status = status_codes.CANCELED self.pending_action = None self.save() elif self.pending_action == pending_actions.RESTART: logger.info("Restarting {}".format(self)) if self.processing_node: # Check if the UUID is still valid, as processing nodes purge # results after a set amount of time, the UUID might have been eliminated. uuid_still_exists = False if self.uuid: try: info = self.processing_node.get_task_info(self.uuid) uuid_still_exists = info.uuid == self.uuid except OdmError: pass need_to_reprocess = False if uuid_still_exists: # Good to go try: self.processing_node.restart_task(self.uuid, self.options) except (NodeServerError, NodeResponseError) as e: # Something went wrong logger.warning("Could not restart {}, will start a new one".format(self)) need_to_reprocess = True else: need_to_reprocess = True if need_to_reprocess: logger.info("{} needs to be reprocessed".format(self)) # Task has been purged (or processing node is offline) # Process this as a new task # Removing its UUID will cause the scheduler # to process this the next tick self.uuid = '' # We also remove the "rerun-from" parameter if it's set self.options = list(filter(lambda d: d['name'] != 'rerun-from', self.options)) self.upload_progress = 0 self.console_output = "" self.processing_time = -1 self.status = None self.last_error = None self.pending_action = None self.running_progress = 0 self.save() else: raise NodeServerError(gettext("Cannot restart a task that has no processing node")) elif self.pending_action == pending_actions.REMOVE: logger.info("Removing {}".format(self)) if self.processing_node and self.uuid: # Attempt to delete the resources on the processing node # We don't care if this fails, as resources on processing nodes # Are expected to be purged on their own after a set amount of time anyway try: self.processing_node.remove_task(self.uuid) except OdmError: pass # What's more important is that we delete our task properly here self.delete() # Stop right here! return if self.processing_node: # Need to update status (first time, queued or running?) if self.uuid and self.status in [None, status_codes.QUEUED, status_codes.RUNNING]: # Update task info from processing node if not self.console_output: current_lines_count = 0 else: current_lines_count = len(self.console_output.split("\n")) info = self.processing_node.get_task_info(self.uuid, current_lines_count) self.processing_time = info.processing_time self.status = info.status.value if len(info.output) > 0: self.console_output += "\n".join(info.output) + '\n' # Update running progress self.running_progress = (info.progress / 100.0) * self.TASK_PROGRESS_LAST_VALUE if info.last_error != "": self.last_error = info.last_error # Has the task just been canceled, failed, or completed? if self.status in [status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED]: logger.info("Processing status: {} for {}".format(self.status, self)) if self.status == status_codes.COMPLETED: assets_dir = self.assets_path("") # Remove previous assets directory if os.path.exists(assets_dir): logger.info("Removing old assets directory: {} for {}".format(assets_dir, self)) shutil.rmtree(assets_dir) os.makedirs(assets_dir) # Download and try to extract results up to 4 times # (~5% of the times, on large downloads, the archive could be corrupted) retry_num = 0 extracted = False last_update = 0 def callback(progress): nonlocal last_update time_has_elapsed = time.time() - last_update >= 2 if time_has_elapsed or int(progress) == 100: Task.objects.filter(pk=self.id).update(running_progress=( self.TASK_PROGRESS_LAST_VALUE + (float(progress) / 100.0) * 0.1)) last_update = time.time() while not extracted: last_update = 0 logger.info("Downloading all.zip for {}".format(self)) # Download all assets zip_path = self.processing_node.download_task_assets(self.uuid, assets_dir, progress_callback=callback, parallel_downloads=max(1, int(16 / (2 ** retry_num)))) # Rename to all.zip all_zip_path = self.assets_path("all.zip") os.rename(zip_path, all_zip_path) logger.info("Extracting all.zip for {}".format(self)) try: self.extract_assets_and_complete() extracted = True except zipfile.BadZipFile: if retry_num < 5: logger.warning("{} seems corrupted. Retrying...".format(all_zip_path)) retry_num += 1 os.remove(all_zip_path) else: raise NodeServerError(gettext("Invalid zip file")) else: # FAILED, CANCELED self.save() else: # Still waiting... self.save() except (NodeServerError, NodeResponseError) as e: self.set_failure(str(e)) except NodeConnectionError as e: logger.warning("{} connection/timeout error: {}. We'll try reprocessing at the next tick.".format(self, str(e))) except TaskInterruptedException as e: # Task was interrupted during image resize / upload logger.warning("{} interrupted".format(self, str(e)))
def process(self): """ This method contains the logic for processing tasks asynchronously from a background thread or from a worker. Here tasks that are ready to be processed execute some logic. This could be communication with a processing node or executing a pending action. """ try: if self.pending_action == pending_actions.RESIZE: resized_images = self.resize_images() self.refresh_from_db() self.resize_gcp(resized_images) self.pending_action = None self.save() if self.auto_processing_node and not self.status in [ status_codes.FAILED, status_codes.CANCELED ]: # No processing node assigned and need to auto assign if self.processing_node is None: # Assign first online node with lowest queue count self.processing_node = ProcessingNode.find_best_available_node( ) if self.processing_node: self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overridden later self.processing_node.save() logger.info( "Automatically assigned processing node {} to {}". format(self.processing_node, self)) self.save() # Processing node assigned, but is offline and no errors if self.processing_node and not self.processing_node.is_online( ): # If we are queued up # detach processing node, and reassignment # will be processed at the next tick if self.status == status_codes.QUEUED: logger.info( "Processing node {} went offline, reassigning {}..." .format(self.processing_node, self)) self.uuid = '' self.processing_node = None self.status = None self.save() elif self.status == status_codes.RUNNING: # Task was running and processing node went offline # It could have crashed due to low memory # or perhaps it went offline due to network errors. # We can't easily differentiate between the two, so we need # to notify the user because if it crashed due to low memory # the user might need to take action (or be stuck in an infinite loop) raise ProcessingError( "Processing node went offline. This could be due to insufficient memory or a network error." ) if self.processing_node: # Need to process some images (UUID not yet set and task doesn't have pending actions)? if not self.uuid and self.pending_action is None and self.status is None: logger.info("Processing... {}".format(self)) images = [ image.path() for image in self.imageupload_set.all() ] # Track upload progress, but limit the number of DB updates # to every 2 seconds (and always record the 100% progress) last_update = 0 def callback(progress): nonlocal last_update if time.time() - last_update >= 2 or ( progress >= 1.0 - 1e-6 and progress <= 1.0 + 1e-6): Task.objects.filter(pk=self.id).update( upload_progress=progress) last_update = time.time() # This takes a while uuid = self.processing_node.process_new_task( images, self.name, self.options, callback) # Refresh task object before committing change self.refresh_from_db() self.uuid = uuid self.save() # TODO: log process has started processing if self.pending_action is not None: if self.pending_action == pending_actions.CANCEL: # Do we need to cancel the task on the processing node? logger.info("Canceling {}".format(self)) if self.processing_node and self.uuid: # Attempt to cancel the task on the processing node # We don't care if this fails (we tried) try: self.processing_node.cancel_task(self.uuid) except ProcessingException: logger.warning( "Could not cancel {} on processing node. We'll proceed anyway..." .format(self)) self.status = status_codes.CANCELED self.pending_action = None self.save() else: raise ProcessingError( "Cannot cancel a task that has no processing node or UUID" ) elif self.pending_action == pending_actions.RESTART: logger.info("Restarting {}".format(self)) if self.processing_node: # Check if the UUID is still valid, as processing nodes purge # results after a set amount of time, the UUID might have been eliminated. uuid_still_exists = False if self.uuid: try: info = self.processing_node.get_task_info( self.uuid) uuid_still_exists = info['uuid'] == self.uuid except ProcessingException: pass need_to_reprocess = False if uuid_still_exists: # Good to go try: self.processing_node.restart_task( self.uuid, self.options) except ProcessingError as e: # Something went wrong logger.warning( "Could not restart {}, will start a new one" .format(self)) need_to_reprocess = True else: need_to_reprocess = True if need_to_reprocess: logger.info( "{} needs to be reprocessed".format(self)) # Task has been purged (or processing node is offline) # Process this as a new task # Removing its UUID will cause the scheduler # to process this the next tick self.uuid = '' # We also remove the "rerun-from" parameter if it's set self.options = list( filter(lambda d: d['name'] != 'rerun-from', self.options)) self.upload_progress = 0 self.console_output = "" self.processing_time = -1 self.status = None self.last_error = None self.pending_action = None self.running_progress = 0 self.save() else: raise ProcessingError( "Cannot restart a task that has no processing node" ) elif self.pending_action == pending_actions.REMOVE: logger.info("Removing {}".format(self)) if self.processing_node and self.uuid: # Attempt to delete the resources on the processing node # We don't care if this fails, as resources on processing nodes # Are expected to be purged on their own after a set amount of time anyway try: self.processing_node.remove_task(self.uuid) except ProcessingException: pass # What's more important is that we delete our task properly here self.delete() # Stop right here! return if self.processing_node: # Need to update status (first time, queued or running?) if self.uuid and self.status in [ None, status_codes.QUEUED, status_codes.RUNNING ]: # Update task info from processing node info = self.processing_node.get_task_info(self.uuid) self.processing_time = info["processingTime"] self.status = info["status"]["code"] current_lines_count = len(self.console_output.split("\n")) console_output = self.processing_node.get_task_console_output( self.uuid, current_lines_count) if len(console_output) > 0: self.console_output += "\n".join(console_output) + '\n' # Update running progress for line in console_output: for line_match, value in self.TASK_OUTPUT_MILESTONES.items( ): if line_match in line: self.running_progress = value break if "errorMessage" in info["status"]: self.last_error = info["status"]["errorMessage"] # Has the task just been canceled, failed, or completed? if self.status in [ status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED ]: logger.info("Processing status: {} for {}".format( self.status, self)) if self.status == status_codes.COMPLETED: assets_dir = self.assets_path("") # Remove previous assets directory if os.path.exists(assets_dir): logger.info( "Removing old assets directory: {} for {}". format(assets_dir, self)) shutil.rmtree(assets_dir) os.makedirs(assets_dir) logger.info( "Downloading all.zip for {}".format(self)) # Download all assets zip_stream = self.processing_node.download_task_asset( self.uuid, "all.zip") zip_path = os.path.join(assets_dir, "all.zip") with open(zip_path, 'wb') as fd: for chunk in zip_stream.iter_content(4096): fd.write(chunk) logger.info( "Done downloading all.zip for {}".format(self)) # Extract from zip with zipfile.ZipFile(zip_path, "r") as zip_h: zip_h.extractall(assets_dir) logger.info( "Extracted all.zip for {}".format(self)) # Populate *_extent fields extent_fields = [ (os.path.realpath( self.assets_path("odm_orthophoto", "odm_orthophoto.tif")), 'orthophoto_extent'), (os.path.realpath( self.assets_path("odm_dem", "dsm.tif")), 'dsm_extent'), (os.path.realpath( self.assets_path("odm_dem", "dtm.tif")), 'dtm_extent'), ] for raster_path, field in extent_fields: if os.path.exists(raster_path): # Read extent and SRID raster = GDALRaster(raster_path) extent = OGRGeometry.from_bbox( raster.extent) # It will be implicitly transformed into the SRID of the model’s field # self.field = GEOSGeometry(...) setattr( self, field, GEOSGeometry(extent.wkt, srid=raster.srid)) logger.info( "Populated extent field with {} for {}" .format(raster_path, self)) self.update_available_assets_field() self.running_progress = 1.0 self.save() from app.plugins import signals as plugin_signals plugin_signals.task_completed.send_robust( sender=self.__class__, task_id=self.id) else: # FAILED, CANCELED self.save() else: # Still waiting... self.save() except ProcessingError as e: self.set_failure(str(e)) except (ConnectionRefusedError, ConnectionError) as e: logger.warning( "{} cannot communicate with processing node: {}".format( self, str(e))) except ProcessingTimeout as e: logger.warning( "{} timed out with error: {}. We'll try reprocessing at the next tick." .format(self, str(e)))
def process(self): """ This method contains the logic for processing tasks asynchronously from a background thread or from a worker. Here tasks that are ready to be processed execute some logic. This could be communication with a processing node or executing a pending action. """ try: if self.pending_action == pending_actions.RESIZE: resized_images = self.resize_images() self.resize_gcp(resized_images) self.pending_action = None self.save() if self.auto_processing_node and not self.status in [status_codes.FAILED, status_codes.CANCELED]: # No processing node assigned and need to auto assign if self.processing_node is None: # Assign first online node with lowest queue count self.processing_node = ProcessingNode.find_best_available_node() if self.processing_node: self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overridden later self.processing_node.save() logger.info("Automatically assigned processing node {} to {}".format(self.processing_node, self)) self.save() # Processing node assigned, but is offline and no errors if self.processing_node and not self.processing_node.is_online(): # If we are queued up # detach processing node, and reassignment # will be processed at the next tick if self.status == status_codes.QUEUED: logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self)) self.uuid = '' self.processing_node = None self.status = None self.save() elif self.status == status_codes.RUNNING: # Task was running and processing node went offline # It could have crashed due to low memory # or perhaps it went offline due to network errors. # We can't easily differentiate between the two, so we need # to notify the user because if it crashed due to low memory # the user might need to take action (or be stuck in an infinite loop) raise ProcessingError("Processing node went offline. This could be due to insufficient memory or a network error.") if self.processing_node: # Need to process some images (UUID not yet set and task doesn't have pending actions)? if not self.uuid and self.pending_action is None and self.status is None: logger.info("Processing... {}".format(self)) images = [image.path() for image in self.imageupload_set.all()] # This takes a while uuid = self.processing_node.process_new_task(images, self.name, self.options) # Refresh task object before committing change self.refresh_from_db() self.uuid = uuid self.save() # TODO: log process has started processing if self.pending_action is not None: if self.pending_action == pending_actions.CANCEL: # Do we need to cancel the task on the processing node? logger.info("Canceling {}".format(self)) if self.processing_node and self.uuid: # Attempt to cancel the task on the processing node # We don't care if this fails (we tried) try: self.processing_node.cancel_task(self.uuid) except ProcessingException: logger.warning("Could not cancel {} on processing node. We'll proceed anyway...".format(self)) self.status = status_codes.CANCELED self.pending_action = None self.save() else: raise ProcessingError("Cannot cancel a task that has no processing node or UUID") elif self.pending_action == pending_actions.RESTART: logger.info("Restarting {}".format(self)) if self.processing_node: # Check if the UUID is still valid, as processing nodes purge # results after a set amount of time, the UUID might have been eliminated. uuid_still_exists = False if self.uuid: try: info = self.processing_node.get_task_info(self.uuid) uuid_still_exists = info['uuid'] == self.uuid except ProcessingException: pass need_to_reprocess = False if uuid_still_exists: # Good to go try: self.processing_node.restart_task(self.uuid, self.options) except ProcessingError as e: # Something went wrong logger.warning("Could not restart {}, will start a new one".format(self)) need_to_reprocess = True else: need_to_reprocess = True if need_to_reprocess: logger.info("{} needs to be reprocessed".format(self)) # Task has been purged (or processing node is offline) # Process this as a new task # Removing its UUID will cause the scheduler # to process this the next tick self.uuid = '' # We also remove the "rerun-from" parameter if it's set self.options = list(filter(lambda d: d['name'] != 'rerun-from', self.options)) self.console_output = "" self.processing_time = -1 self.status = None self.last_error = None self.pending_action = None self.save() else: raise ProcessingError("Cannot restart a task that has no processing node") elif self.pending_action == pending_actions.REMOVE: logger.info("Removing {}".format(self)) if self.processing_node and self.uuid: # Attempt to delete the resources on the processing node # We don't care if this fails, as resources on processing nodes # Are expected to be purged on their own after a set amount of time anyway try: self.processing_node.remove_task(self.uuid) except ProcessingException: pass # What's more important is that we delete our task properly here self.delete() # Stop right here! return if self.processing_node: # Need to update status (first time, queued or running?) if self.uuid and self.status in [None, status_codes.QUEUED, status_codes.RUNNING]: # Update task info from processing node info = self.processing_node.get_task_info(self.uuid) self.processing_time = info["processingTime"] self.status = info["status"]["code"] current_lines_count = len(self.console_output.split("\n")) console_output = self.processing_node.get_task_console_output(self.uuid, current_lines_count) if len(console_output) > 0: self.console_output += console_output + '\n' if "errorMessage" in info["status"]: self.last_error = info["status"]["errorMessage"] # Has the task just been canceled, failed, or completed? if self.status in [status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED]: logger.info("Processing status: {} for {}".format(self.status, self)) if self.status == status_codes.COMPLETED: assets_dir = self.assets_path("") # Remove previous assets directory if os.path.exists(assets_dir): logger.info("Removing old assets directory: {} for {}".format(assets_dir, self)) shutil.rmtree(assets_dir) os.makedirs(assets_dir) logger.info("Downloading all.zip for {}".format(self)) # Download all assets zip_stream = self.processing_node.download_task_asset(self.uuid, "all.zip") zip_path = os.path.join(assets_dir, "all.zip") with open(zip_path, 'wb') as fd: for chunk in zip_stream.iter_content(4096): fd.write(chunk) logger.info("Done downloading all.zip for {}".format(self)) # Extract from zip with zipfile.ZipFile(zip_path, "r") as zip_h: zip_h.extractall(assets_dir) logger.info("Extracted all.zip for {}".format(self)) # Populate *_extent fields extent_fields = [ (os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto.tif")), 'orthophoto_extent'), (os.path.realpath(self.assets_path("odm_dem", "dsm.tif")), 'dsm_extent'), (os.path.realpath(self.assets_path("odm_dem", "dtm.tif")), 'dtm_extent'), ] for raster_path, field in extent_fields: if os.path.exists(raster_path): # Read extent and SRID raster = GDALRaster(raster_path) extent = OGRGeometry.from_bbox(raster.extent) # It will be implicitly transformed into the SRID of the model’s field # self.field = GEOSGeometry(...) setattr(self, field, GEOSGeometry(extent.wkt, srid=raster.srid)) logger.info("Populated extent field with {} for {}".format(raster_path, self)) self.update_available_assets_field() self.save() from app.plugins import signals as plugin_signals plugin_signals.task_completed.send_robust(sender=self.__class__, task_id=self.id) else: # FAILED, CANCELED self.save() else: # Still waiting... self.save() except ProcessingError as e: self.set_failure(str(e)) except (ConnectionRefusedError, ConnectionError) as e: logger.warning("{} cannot communicate with processing node: {}".format(self, str(e))) except ProcessingTimeout as e: logger.warning("{} timed out with error: {}. We'll try reprocessing at the next tick.".format(self, str(e)))
def process(self): """ This method contains the logic for processing tasks asynchronously from a background thread or from the scheduler. Here tasks that are ready to be processed execute some logic. This could be communication with a processing node or executing a pending action. """ try: if self.auto_processing_node and not self.status in [status_codes.FAILED, status_codes.CANCELED]: # No processing node assigned and need to auto assign if self.processing_node is None: # Assign first online node with lowest queue count self.processing_node = ProcessingNode.find_best_available_node() if self.processing_node: self.processing_node.queue_count += 1 # Doesn't have to be accurate, it will get overriden later self.processing_node.save() logger.info("Automatically assigned processing node {} to {}".format(self.processing_node, self)) self.save() # Processing node assigned, but is offline and no errors if self.processing_node and not self.processing_node.is_online(): # Detach processing node, will be processed at the next tick logger.info("Processing node {} went offline, reassigning {}...".format(self.processing_node, self)) self.uuid = '' self.processing_node = None self.save() if self.processing_node: # Need to process some images (UUID not yet set and task doesn't have pending actions)? if not self.uuid and self.pending_action is None and self.status is None: logger.info("Processing... {}".format(self)) images = [image.path() for image in self.imageupload_set.all()] # This takes a while uuid = self.processing_node.process_new_task(images, self.name, self.options) # Refresh task object before committing change self.refresh_from_db() self.uuid = uuid self.save() # TODO: log process has started processing if self.pending_action is not None: if self.pending_action == pending_actions.CANCEL: # Do we need to cancel the task on the processing node? logger.info("Canceling {}".format(self)) if self.processing_node and self.uuid: # Attempt to cancel the task on the processing node # We don't care if this fails (we tried) try: self.processing_node.cancel_task(self.uuid) self.status = None except ProcessingException: logger.warning("Could not cancel {} on processing node. We'll proceed anyway...".format(self)) self.status = status_codes.CANCELED self.pending_action = None self.save() else: raise ProcessingError("Cannot cancel a task that has no processing node or UUID") elif self.pending_action == pending_actions.RESTART: logger.info("Restarting {}".format(self)) if self.processing_node: # Check if the UUID is still valid, as processing nodes purge # results after a set amount of time, the UUID might have eliminated. uuid_still_exists = False if self.uuid: try: info = self.processing_node.get_task_info(self.uuid) uuid_still_exists = info['uuid'] == self.uuid except ProcessingException: pass if uuid_still_exists: # Good to go try: self.processing_node.restart_task(self.uuid) except ProcessingError as e: # Something went wrong logger.warning("Could not restart {}, will start a new one".format(self)) self.uuid = '' else: # Task has been purged (or processing node is offline) # Process this as a new task # Removing its UUID will cause the scheduler # to process this the next tick self.uuid = '' self.console_output = "" self.processing_time = -1 self.status = None self.last_error = None self.pending_action = None self.save() else: raise ProcessingError("Cannot restart a task that has no processing node") elif self.pending_action == pending_actions.REMOVE: logger.info("Removing {}".format(self)) if self.processing_node and self.uuid: # Attempt to delete the resources on the processing node # We don't care if this fails, as resources on processing nodes # Are expected to be purged on their own after a set amount of time anyway try: self.processing_node.remove_task(self.uuid) except ProcessingException: pass # What's more important is that we delete our task properly here self.delete() # Stop right here! return if self.processing_node: # Need to update status (first time, queued or running?) if self.uuid and self.status in [None, status_codes.QUEUED, status_codes.RUNNING]: # Update task info from processing node info = self.processing_node.get_task_info(self.uuid) self.processing_time = info["processingTime"] self.status = info["status"]["code"] current_lines_count = len(self.console_output.split("\n")) - 1 self.console_output += self.processing_node.get_task_console_output(self.uuid, current_lines_count) if "errorMessage" in info["status"]: self.last_error = info["status"]["errorMessage"] # Has the task just been canceled, failed, or completed? if self.status in [status_codes.FAILED, status_codes.COMPLETED, status_codes.CANCELED]: logger.info("Processing status: {} for {}".format(self.status, self)) if self.status == status_codes.COMPLETED: assets_dir = self.assets_path("") if not os.path.exists(assets_dir): os.makedirs(assets_dir) logger.info("Downloading all.zip for {}".format(self)) # Download all assets zip_stream = self.processing_node.download_task_asset(self.uuid, "all.zip") zip_path = os.path.join(assets_dir, "all.zip") with open(zip_path, 'wb') as fd: for chunk in zip_stream.iter_content(4096): fd.write(chunk) logger.info("Done downloading all.zip for {}".format(self)) # Extract from zip with zipfile.ZipFile(zip_path, "r") as zip_h: zip_h.extractall(assets_dir) logger.info("Extracted all.zip for {}".format(self)) # Add to database orthophoto orthophoto_path = os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto.tif")) if os.path.exists(orthophoto_path): orthophoto = GDALRaster(orthophoto_path, write=True) # We need to transform to 4326 before we can store it # as an offdb raster field orthophoto_4326_path = os.path.realpath(self.assets_path("odm_orthophoto", "odm_orthophoto_4326.tif")) self.orthophoto = orthophoto.transform(4326, 'GTiff', orthophoto_4326_path) logger.info("Imported orthophoto {} for {}".format(orthophoto_4326_path, self)) # Remove old odm_texturing.zip archive (if any) textured_model_archive = self.assets_path(self.get_textured_model_filename()) if os.path.exists(textured_model_archive): os.remove(textured_model_archive) self.save() else: # FAILED, CANCELED self.save() else: # Still waiting... self.save() except ProcessingError as e: self.set_failure(str(e)) except (ConnectionRefusedError, ConnectionError) as e: logger.warning("{} cannot communicate with processing node: {}".format(self, str(e))) except ProcessingTimeout as e: logger.warning("{} timed out with error: {}. We'll try reprocessing at the next tick.".format(self, str(e)))