def RegisterArtifact(self, artifact_rdfvalue, source="datastore", overwrite_if_exists=False, overwrite_system_artifacts=False): """Registers a new artifact.""" artifact_name = artifact_rdfvalue.name if artifact_name in self._artifacts: if not overwrite_if_exists: details = "artifact already exists and `overwrite_if_exists` is unset" raise rdf_artifacts.ArtifactDefinitionError( artifact_name, details) elif not overwrite_system_artifacts: artifact_obj = self._artifacts[artifact_name] if not artifact_obj.loaded_from.startswith("datastore:"): # This artifact was not uploaded to the datastore but came from a # file, refuse to overwrite. details = "system artifact cannot be overwritten" raise rdf_artifacts.ArtifactDefinitionError( artifact_name, details) # Preserve where the artifact was loaded from to help debugging. artifact_rdfvalue.loaded_from = source # Clear any stale errors. artifact_rdfvalue.error_message = None self._artifacts[artifact_rdfvalue.name] = artifact_rdfvalue
def ArtifactsFromYaml(self, yaml_content): """Get a list of Artifacts from yaml.""" raw_list = list(yaml.safe_load_all(yaml_content)) # TODO(hanuszczak): I am very sceptical about that "doing the right thing" # below. What are the real use cases? # Try to do the right thing with json/yaml formatted as a list. if (isinstance(raw_list, list) and len(raw_list) == 1 and isinstance(raw_list[0], list)): raw_list = raw_list[0] # Convert json into artifact and validate. valid_artifacts = [] for artifact_dict in raw_list: # In this case we are feeding parameters directly from potentially # untrusted yaml/json to our RDFValue class. However, safe_load ensures # these are all primitive types as long as there is no other # deserialization involved, and we are passing these into protobuf # primitive types. try: artifact_value = rdf_artifacts.Artifact(**artifact_dict) valid_artifacts.append(artifact_value) except (TypeError, AttributeError, type_info.TypeValueError) as e: name = artifact_dict.get("name") raise rdf_artifacts.ArtifactDefinitionError( name, "invalid definition", cause=e) return valid_artifacts
def ArtifactsFromYaml(self, yaml_content): """Get a list of Artifacts from yaml.""" raw_list = yaml.ParseMany(yaml_content) # TODO(hanuszczak): I am very sceptical about that "doing the right thing" # below. What are the real use cases? # Try to do the right thing with json/yaml formatted as a list. if (isinstance(raw_list, list) and len(raw_list) == 1 and isinstance(raw_list[0], list)): raw_list = raw_list[0] # Convert json into artifact and validate. valid_artifacts = [] for artifact_dict in raw_list: # Old artifacts might still use deprecated fields, so we have to ignore # such. Here, we simply delete keys from the dictionary as otherwise the # RDF value constructor would raise on unknown fields. for field in DEPRECATED_ARTIFACT_FIELDS: artifact_dict.pop(field, None) # In this case we are feeding parameters directly from potentially # untrusted yaml/json to our RDFValue class. However, safe_load ensures # these are all primitive types as long as there is no other # deserialization involved, and we are passing these into protobuf # primitive types. try: artifact_value = rdf_artifacts.Artifact(**artifact_dict) valid_artifacts.append(artifact_value) except (TypeError, AttributeError, type_info.TypeValueError) as e: name = artifact_dict.get("name") raise rdf_artifacts.ArtifactDefinitionError( name, "invalid definition", cause=e) return valid_artifacts
def _LoadArtifactsFromDatastore(self): """Load artifacts from the data store.""" loaded_artifacts = [] # TODO(hanuszczak): Why do we have to remove anything? If some artifact # tries to shadow system artifact shouldn't we just ignore them and perhaps # issue some warning instead? The datastore being loaded should be read-only # during upload. # A collection of artifacts that shadow system artifacts and need # to be deleted from the data store. to_delete = [] artifact_list = [] if data_store.RelationalDBEnabled(): artifact_list = data_store.REL_DB.ReadAllArtifacts() else: for artifact_coll_urn in self._sources.GetDatastores(): artifact_list.extend(ArtifactCollection(artifact_coll_urn)) for artifact_value in artifact_list: try: self.RegisterArtifact(artifact_value, source="datastore:", overwrite_if_exists=True) loaded_artifacts.append(artifact_value) except rdf_artifacts.ArtifactDefinitionError as e: # TODO(hanuszczak): String matching on exception message is rarely # a good idea. Instead this should be refectored to some exception # class and then handled separately. if "system artifact" in str(e): to_delete.append(artifact_value.name) else: raise if to_delete: DeleteArtifactsFromDatastore(to_delete, reload_artifacts=False) self._dirty = True # TODO(hanuszczak): This is connected to the previous TODO comment. Why # do we throw exception at this point? Why do we delete something and then # abort the whole upload procedure by throwing an exception? detail = "system artifacts were shadowed and had to be deleted" raise rdf_artifacts.ArtifactDefinitionError(to_delete, detail) # Once all artifacts are loaded we can validate. revalidate = True while revalidate: revalidate = False for artifact_obj in loaded_artifacts[:]: try: Validate(artifact_obj) except rdf_artifacts.ArtifactDefinitionError as e: logging.error("Artifact %s did not validate: %s", artifact_obj.name, e) artifact_obj.error_message = utils.SmartStr(e) loaded_artifacts.remove(artifact_obj) revalidate = True