def _add_name(self, ref, source, rewrite=False): """ A source is not allowed to provide multiple semantic references a ref must match the regexp ([A-Za-z0-9_]+(\.[A-Za-z0-9_])*) :param ref: :param source: :param rewrite: [False] if True, if SourceAlreadyKnown, re-assign the source to the new ref. This may result in the archive's ref changing, and should only be used when an authoritative source-ref pair is supplied (e.g. a JSON file that was loaded into the archive) :return: """ if not ref_regex.match(ref): raise InvalidSemanticReference('%s' % ref) for k, s in self._catalog_names.items(): if source in s and source is not None: if source == self.source and k == local_ref(self.source): '''if we're trying to add our own source and ref to the name index, and the source is currently registered to the default local_ref, then we override it ''' self._catalog_names[ref] = self._catalog_names.pop(k) return if k == ref: return if rewrite: self._catalog_names[k].remove(source) print('%s: <source removed>' % k) else: raise SourceAlreadyKnown( 'Source %s already registered to name %s (vs: %s)' % (source, k, ref)) print('%s: %s' % (ref, source)) self._catalog_names[ref].add(source) if ref == self.ref and self.source is None and rewrite: self._source = source
def ref(self): if self._origin is not None: return self._origin try: return next(k for k, s in self._catalog_names.items() if self.source in s) except StopIteration: return local_ref(self.source)
def test_rewrite_name(self): """ Discovered-- if we create an archive from an existing file, but without specifying the ref, then the EntityStore will convert that file to a local ref and use it. But if the file contains a ref specification, we want that one to win. So we use it instead. :return: """ self.assertEqual(self.ar.ref, local_ref(WORKING_FILE)) self.ar.load_from_dict(archive_json) self.assertEqual(self.ar.ref, test_ref)
def new(cls, source, ds_type, **kwargs): ref = local_ref(source, prefix='config') res = LcResource(ref, source, ds_type, **kwargs) return cls(res)
def __init__(self, source, ref=None, quiet=True, upstream=None, static=False, dataReference=None, ns_uuid=None, no_validate=None, **kwargs): """ An EntityStore is a provenance structure for a collection of entities. Ostensibly, an EntityStore has a single source from which entities are collected. The source is a resolvable URI that indicates a data resource from which data describing the entities can be extracted. The exact manner of extracting data from resources is subclass-dependent. Internally, all entities are stored with UUID keys. If the external references do not contain UUIDs, it is recommended to derive a UUID3 using an archive-specific, stable namespace ID. The class-level _ns_uuid_required attribute governs this option: - if True, an ns_uuid argument must be provided when the class is instantiated. This is consistent with a use case in which it is desirable to have predictable, fixed UUIDs (i.e. to interface with a data system that requires stable UUIDs) - if False, a random ns_uuid is generated, and used to create a UUID anytime an entity is given a non-UUID external_ref - if None, UUID3 are not used and any supplied ns_uuid argument is ignored. external_refs must always be UUIDs. There is still some refactoring to be done, to try to eliminate the need for externally visible UUIDs anywhere. An archive has a single semantic reference that describes the data context from which its native entities were gathered. The reference is given using dot-separated hierarchical terms in order of decreasing semantic significance from left to right. The leftmost specifier should describe the maintainer of the resource (which defaults to 'local' when a reference argument is not provided), followed by arbitrarily more precise specifications. Some examples are: local.lcia.traci.2.1.spreadsheet ecoinvent.3.2.undefined The purpose for the source / reference distinction is that in principle many different sources can all provide the same semantic content: for instance, ecoinvent can be accessed from the website or from a file on the user's computer. In principle, if the semantic reference for two archives is the same, the archives should contain excerpts of the same data, even if drawn from different sources. An entity is uniquely identified by its link property, which is made from concatenating the semantic origin and a stable reference known as an 'external_ref', as 'origin/external_ref'. The first slash is the delimiter between origin and reference. Examples: elcd.3.2/processes/00043bd2-4563-4d73-8df8-b84b5d8902fc uslci.ecospold/Acetic acid, at plant Note that the inclusion of embedded whitespace, commas, and other characters indicate that these semantic references are not proper URIs. It is hoped that the user community will help develop and maintain a consistent and easily interpreted namespace for semantic references. If this is done, it should be possible to identify any published entity with a concise reference. When an entity is first added to an archive, it is assigned that archive's *reference* as its origin, following the expectation that data about the same reference from different sources is the same data. When an entity with a different origin is added to an archive, it is good practice to add a mapping from that origin to its source in the receiving archive's "catalog_names" dictionary. However, since the entity itself does not know its archive's source, this cannot be done automatically. :param source: physical data source-- where the information is being drawn from :param ref: optional semantic reference for the data source. gets added to catalog_names. :param quiet: :param upstream: :param static: [False] whether archive is expected to be unchanging. :param dataReference: alternative to ref :param ns_uuid: required to store entities by common name. Used to generate uuid3 from string inputs. :param no_validate: if True, skip validation on entity add :param kwargs: any other information that should be serialized with the archive """ self._source = source if ref is None: if dataReference is None: ref = local_ref(source) else: ref = dataReference self._entities = {} # uuid-indexed list of known entities self._quiet = quiet # whether to print out a message every time a new entity is added / deleted / modified self._serialize_dict = kwargs # this gets added to self._counter = defaultdict(int) self._ents_by_type = defaultdict(set) self._upstream = None self._no_validate = no_validate self._loaded = False self._static = static self._descendant = False self._ns_uuid = self._set_ns_uuid(ns_uuid) if upstream is not None: self.set_upstream(upstream) self._catalog_names = defaultdict( set) # this is a place to map semantic references to data sources self._add_name(ref, source) self._serialize_dict['dataReference'] = ref if self._ns_uuid is not None: self._serialize_dict['ns_uuid'] = str(self._ns_uuid)