def get_redefine_map(recognition_provider_id): """ Returns the recognition ptovider's specific redefine labels map as stroed in the COLLECTION_REDEFINE_MAPS. """ db = initialize_db() doc_dict = db.collection(database_schema.COLLECTION_REDEFINE_MAPS).\ document(recognition_provider_id).get().to_dict() return doc_dict[database_schema.COLLECTION_REDEFINE_MAPS_FIELD_REDEFINE_MAP]
def get_provider_id_from_run_id(run_id): """ Returns the provider id of the specific run's id. """ run_doc_ref = initialize_db().collection( database_schema.COLLECTION_PIPELINE_RUNS). \ document(run_id) run_doc_dict = run_doc_ref.get().to_dict() return run_doc_dict[ database_schema.COLLECTION_PIPELINE_RUNS_FIELD_PROVIDER_ID]
def update_pipelinerun_doc_to_invisible(pipeline_run_id): """ Updates the pipeline run's document in the Pipeline runs Firestore collection to invisible after the labels were removed. """ doc_ref = initialize_db().collection(database_schema.COLLECTION_PIPELINE_RUNS).\ document(pipeline_run_id) doc_ref.update({ database_schema.COLLECTION_PIPELINE_RUNS_FIELD_PROVIDER_VISIBILITY:\ data_types.VisibilityType.INVISIBLE.value })
def update_pipelinerun_doc_visibility(image_provider_id, visibility): """ Updates the pipeline run's document in the Pipeline runs Firestore collection to the given visibility. Args: image_provider_id: The providers id. visibility: The visibility we are updating the doc to, e.g. 'VISIBLE'/ 'INVISIBLE' """ doc_ref = initialize_db().collection( database_schema.COLLECTION_PIPELINE_RUNS). \ document(image_provider_id) doc_ref.update({ database_schema.COLLECTION_PIPELINE_RUNS_FIELD_PROVIDER_VISIBILITY: visibility.value })
def get_provider_keys(provider_id): """ This function given a provider id gets the Api key and Secret key from the database. Args: provider_id: string the providers id. Returns: Dict containing all of the providers keys e.g. secret key and api key, {'secretKey': 'X', 'apiKey': 'Y'} """ database = firestore_database.initialize_db() doc_dict = database.collection(database_schema.COLLECTION_IMAGE_PROVIDERS).\ document(provider_id).get().to_dict() return doc_dict[database_schema.COLLECTION_IMAGE_PROVIDERS_FIELD_PROVIDER_KEYS]
def get_latest_runs(status): """ This function given a status returns a limted list of pipeline run documents. Args: status- The status of the pipeline runs we want to receive. Returns: List of pipeline run documents, each document is presented as a dict. """ db = initialize_db() query = db.collection(database_schema.COLLECTION_PIPELINE_RUNS)\ .where(database_schema.COLLECTION_PIPELINE_RUNS_FIELD_STATUS, u'==', status)\ .order_by( database_schema.COLLECTION_PIPELINE_RUNS_FIELD_START_DATE, direction=firestore.Query.DESCENDING)\ .limit(_NUM_OF_LATEST_RUNS).stream() return [doc.to_dict() for doc in query]
def _max_visibility(self, parent_image_id): """This function finds the max visibility in the PipelineRun subcollection. Args: parent_image_id: The image doc id. """ utils.validate_one_arg(self.image_provider, self.pipeline_run) query = initialize_db().collection_group( database_schema.COLLECTION_IMAGES_SUBCOLLECTION_PIPELINE_RUNS) \ .where( database_schema.COLLECTION_IMAGES_SUBCOLLECTION_PIPELINE_RUNS_FIELD_PARENT_IMAGE_ID, u'==', parent_image_id) \ .where( database_schema.COLLECTION_IMAGES_SUBCOLLECTION_PIPELINE_RUNS_FIELD_VISIBILITY, u'==', 1) # 1 == VISIBLE if len(query.get()) == 0: return data_types.VisibilityType.INVISIBLE return data_types.VisibilityType.VISIBLE
def setup(self): # pylint: disable=attribute-defined-outside-init self.db = initialize_db()
def setup(self): self.database_firebase = firestore_database.initialize_db()
def setup(self): self.db = initialize_db()
class IngestionRemovalPipelineInterface(ABC, apache_beam.DoFn): """ This interface contains functions for all of the removal pipeline stages. """ db = initialize_db() @abstractmethod def get_batched_dataset_and_delete_from_database(self, num_of_batch, remove_by_arg): """ Queries firestore database for pipeline runs ingested by the given image provider or pipeline run, and deletes the documents from the database . Args: num_of_batch: The lower limit for querying the database by the random field. remove_by_arg: Either the pipeline run id or the image provider id. Yields: A tuple containing a parent image id and the image provider/pipeline run id. """ @abstractmethod def update_arrays_in_image_docs(self, element, remove_by_arg): """ This function queries if exists any other pipeline_run/provider in the subcollection and updates the ingested providers array / ingested pipeline runs array accordingly. Args: element: A tuple with two arguments, the first is the parent image id and the second can be a list of providers/ pipeline runs e.g. ['parent_id',['run1','run2']] remove_by_arg: Either the pipeline run id or the image provider id. Returns: parent_image_id: The id of the image doc in the image collection. """ def update_provider_and_pipeline_arrays(self, query_provider, query_pipeline_run, parent_image_ref, image_provider=None, pipeline_run=None): """ This function calculates whether we need to update the ingested providers array, the ingested runs array, neither or both. Args: query_provider: All documents from the pipelinerun subcollection with the image_provider as provider. query_pipeline_run: All documents from the pipelinerun subcollection with the pipeline_run as pipeline_run. parent_image_ref: A reference to the image doc in the image collection. image_provider: The image provider we remove the images by. pipeline_run: The image pipeline_run we remove the images by. """ if len(query_provider.get()) == 0: self._remove_provider_from_array(parent_image_ref, image_provider) if len(query_pipeline_run.get()) == 0: self._remove_pipeline_from_array(parent_image_ref, pipeline_run) def _remove_provider_from_array(self, parent_image_ref, image_provider): """ This function is in charge of removing the given image_provider from the ingested providers array. Args: parent_image_ref: A reference to the image doc in the image collection. image_provider: The image provider we remove the images by. """ image_doc_dict = parent_image_ref.get().to_dict() # Updates the provider array in the image collection. providers_array = image_doc_dict[ database_schema.COLLECTION_IMAGES_FIELD_INGESTED_PROVIDERS] if image_provider in providers_array: providers_array.remove(image_provider) parent_image_ref.update({ database_schema.COLLECTION_IMAGES_FIELD_INGESTED_PROVIDERS: providers_array }) def _remove_pipeline_from_array(self, parent_image_ref, pipeline_run): """ This function is in charge of removing the given pipeline_run from the ingested pipeline runs array. Args: parent_image_ref: A reference to the image doc in the image collection. pipeline_run: The image pipeline_run we remove the images by. """ image_doc_dict = parent_image_ref.get().to_dict() # Updates the pipeline array in the image collection. pipeline_runs_array = image_doc_dict[ database_schema.COLLECTION_IMAGES_FIELD_INGESTED_RUNS] if pipeline_run in pipeline_runs_array: pipeline_runs_array.remove(pipeline_run) parent_image_ref.update({ database_schema.COLLECTION_IMAGES_FIELD_INGESTED_RUNS: pipeline_runs_array }) def remove_image_doc_if_necessary(self, parent_image_id): """ This function is in charge of removing the original image if the subcollection pipeline_runs does not contain any documents (both the arrays ingested pipeline runs and ingested providers are empty). Args: parent_image_id: The id of the image doc in the image collection. """ parent_image_ref = self.db.collection( database_schema.COLLECTION_IMAGES).document(parent_image_id) image_doc_dict = parent_image_ref.get().to_dict() providers_array = image_doc_dict[ database_schema.COLLECTION_IMAGES_FIELD_INGESTED_PROVIDERS] pipeline_runs_array = image_doc_dict[ database_schema.COLLECTION_IMAGES_FIELD_INGESTED_RUNS] # If there are no images in the subcollection remove the image. if len(pipeline_runs_array) == 0 and len(providers_array) == 0: if database_schema.COLLECTION_IMAGES_FIELD_LABELS in image_doc_dict: # Need to remove all point keys from COLLECTION_HEATMAP. for label in image_doc_dict[ database_schema.COLLECTION_IMAGES_FIELD_LABELS]: for precision in range(constants.MIN_PRECISION, constants.MAX_PRECISION + 1): point_key = get_point_key( precision, label, image_doc_dict[database_schema. COLLECTION_IMAGES_FIELD_HASHMAP]) yield (point_key, 1) parent_image_ref.delete() # Delete label doc from database.