Exemplo n.º 1
0
 def fmtInfo(self) -> MultiDict:
     if self._fmtInfo is None:
         self._fmtInfo = MultiDict()
         for prop in self.get_values('logset:logFormatInfo'):
             key, sep, val = prop.partition('=')
             self._fmtInfo.add(key, val)
     return self._fmtInfo
Exemplo n.º 2
0
 def instance(cls, uri):
     """ given the uri of a node, instantiate an object of this class based
         on the properties of that node in the graph
     """
     if not isinstance(uri, URIRef):
         uri = graph.geturi(uri)
     mdict = MultiDict(uri=[uri])
     preds = [k for k in cls.getters.keys()]  # impose an order
     preds_by_uri = {graph.geturi(p): p for p in preds}
     for pred_uri, obj in self.graph.predicate_objects(uri):
         if pred_uri in preds_by_uri:
             mdict.add(preds_by_uri[pred_uri], obj)
         else:
             # its a property we're not prepared for, just include it as an Identifier
             mdict.add(pred_uri, obj)
     return cls(properties=mdict)
Exemplo n.º 3
0
    def __init__(self, properties: MultiDict = None, **kwargs) -> None:
        # lazy getting of uri is going to be common enough to just build it into the base class:
        self._uri: Optional[str] = None
        self._namespace: Optional[str] = None
        self._label: Optional[str] = None

        # eg "dct:title": set(Literal("my title"))
        self.properties = MultiDict(properties)
        for key, val in kwargs.items():
            self.properties.add(key, val)

        if 'uri' in self.properties:
            self._uri = self.properties.one('uri')
            self.properties.remove('uri')

        if 'namespace' in self.properties:
            self._namespace = self.properties.one('namespace')
            self.properties.remove('namespace')

#        if properties is not None and 'uri' in properties:
#            logging.debug("setting uri from properties: {0}".format(properties['uri']))
#            #self._uri = properties.pop('uri')[0]
#            self._uri = properties.one('uri')
#            properties.remove('uri')
#
#        if properties is not None and 'namespace' in properties:
#            logging.debug("setting namespace from properties: {0}".format(properties['namespace']))
#            #self._uri = properties.pop('uri')[0]
#            self._namespace = properties.one('namespace')
#            properties.remove('namespace')

# all attributes we have a getter for should have an
# entry in properties, even if it is empty:
        for predicate in self.getters:
            if predicate not in self.properties:
                self.properties.add(predicate)

        # when adding a node to the graph and recursing into its properties,
        # we want a mechanism to bypass adding nodes that are already in the
        # graph:
        self._in_graph = False
Exemplo n.º 4
0
    def __init__(self, mods : dict, ties : set, name : str = ""):
        super().__init__(name)

        #build operations
        _ops = dict()
        for mod_name, opcode in mods.items():
            _ops[mod_name] = Operation(mod_name, opcode)

        #gather values
        _ties = MultiDict() # src -> (dst, dst_port)
        for src_name, dst_name, dst_port in ties:
            _ties[_ops[src_name]] = (_ops[dst_name], dst_port)


        #build actual val objects
        _values = set()
        for src in _ties:
            _values.add(Value(src, _ties[src]))

        self._operations = frozenset(_ops.values())
        self._values     = frozenset(_values)
Exemplo n.º 5
0
def factory(node: str):
    cls = classes[node]
    properties = MultiDict(uri=[cls.rdf_node])
    return cls(properties=properties)
Exemplo n.º 6
0
    def known(cls, filters: Dict[str, str] = dict()):
        """ generator-constructor over known nodes of a given type: """
        # TODO instead of sampling property values, use "order by" and a douple
        # loop to actually get the full set of properties for a uri
        # (the general finder query is like:
        # select ?uri <other fields> <optional fields where {
        #    ?uri a <self.rdf_class> .
        #    ?uri <other predicate> <other variable> .
        #    optional {
        #       ?uri <other predicate> <other variable> .
        #    } } order by ?uri
        # then make a multi-dict of the properties
        # so:
        #  - first add a class var: required: list of required properties
        #
        #   # list of variables:
        #   nrequired = len(self.required)
        #   optionals = [ key for key in self.getters if key not in self.required ]
        #   ntotal = nrequired + len(optionals)
        #   required = [ '?v{:d}'.format(i) for i in range(nrequired) ]
        #   optional = [ '?v{:d}'.format(i) for i in range(nrequired,ntotal) ]
        #   query  = "SELECT ?uri " + ' '.join(self.required) + ' '.join(optionals)
        #   query += " WHERE { "
        #   if self.rdf_superclass is None:
        #       query += "   ?uri a {0} .".format(self.rdf_class)
        #   else:
        #       query += "   ?uri a ?type ."
        #       query += "   ?type rdfs:subClassOf* {0} .".format(self.rdf_superclass)
        #   for clause in zip(self.required, required):
        #       query += " ?uri {0} {1} . ".format(clause[0], clause[1])
        #   if noptional > 0:
        #       query += " OPTIONAL { "
        #       for clause in zip(optionals, optional):
        #           query += " ?uri {0} {1} . ".format(clause[0], clause[1])
        #       query += " } "
        #   query += " } ORDER BY ?uri "
        #   curr = None
        #   next = None
        #   for row in  Graph.graph.query(query):
        #       next = row[0] # the uri
        #       if next != curr:
        #           mdict = Multidict(next)
        #           if curr is not None:
        #               yield cls(properties=mdict)
        #           curr = next
        #       # add each var to mdict
        #       for key,val in zip(required+optional,row):
        #           mdict.add(key, [val])
        #   yield cls(properties=mdict) # the last one
        #
        # or easier still: since we don't actually enforce that certain properties are
        # required, make everything optional:
        #
        # if filters has uris/rdflib identifiers, then to convert to str they need < > around it
        # but if they are a string like 'ddict:someThing' then they should stay as they are:
        as_str = lambda x: "<{0}>".format(str(x)) if isinstance(x, Identifier
                                                                ) else x
        logging.debug("filters has: {0}".format(filters))
        preds = [k for k in cls.getters.keys()]  # impose an order
        qvars = ['?v{:d}'.format(i) for i in range(len(preds))]
        query = "SELECT ?uri {0} WHERE {{ ".format(' '.join(qvars))
        if cls.rdf_superclass is None:
            query += "?uri a {0} . ".format(cls.rdf_class)
        else:
            query += "?uri a ?type . "
            query += "?type rdfs:subClassOf* {0} . ".format(cls.rdf_superclass)
        for pred, var in zip(preds, qvars):
            if pred in filters:
                #query += "?uri {0} {1} . ".format(pred,str(filters[pred]))
                query += "?uri {0} {1} . ".format(pred, as_str(filters[pred]))
            else:
                query += "OPTIONAL {{ ?uri {0} {1} . }} ".format(pred, var)
        query += "} ORDER BY ?uri "
        logging.debug("query is: {0}".format(query))

        curr_uri = None
        next_uri = None
        mdict: MultiDict = None
        #for row in graph.Graph.the_graph.query(query):
        for row in graph.query(query):
            logging.debug("found {0}".format(str(row)))
            next_uri = row[0]  # the uri
            if next_uri != curr_uri:
                if curr_uri is not None:
                    logging.debug("making a {0} with props {1}".format(
                        cls.__name__, str(mdict)))
                    yield cls(properties=mdict)
                mdict = MultiDict(uri=[next_uri])
                curr_uri = next_uri
            # add each var to mdict
            for key, val in zip(preds, row[1:]):
                mdict.add(key, val)
        if mdict is not None:
            logging.debug("making a {0} with props {1}".format(
                cls.__name__, str(mdict)))
            yield cls(properties=mdict)  # the last one
        else:
            logging.debug("no nodes of type {0} found".format(cls.__name__))
Exemplo n.º 7
0
    def catalog(self, candidates: Set[FileInfo],
                context: Context) -> Set[FileInfo]:
        filepatterns = context['filepatterns']
        matching = set()
        for regex in filepatterns:
            logging.info("filepattern: {0}: {1}".format(regex, regex.pattern))
            logging.info("{0:d} candidates".format(len(candidates)))
            logging.info("candidates: {0}".format(candidates))
            logging.info(
                "after filtering: " +
                str(filter(lambda x: regex.match(x.filename), candidates)))
            matching |= set(
                filter(lambda x: regex.match(x.filename), candidates))

        # filter by MIMEtype:
        mediatypes = self.properties['dcat:mediaType']
        mimetest = lambda x: self.right_mime_type(x)
        matching -= set(filter(mimetest, matching))

        # args for the LogFormatType that will handle the actual file/source:
        handler_args = {
            'rdf_class': context['logFormatType'],
            'fmtinfo': context['formatinfo']
        }
        #  'properties': self.properties
        #context.push(handler_factory=handlers.factory)
        context.push(handler_args=handler_args)

        # hmm, everything the ConcreteLog infers, we can just pass as properties:
        common_properties = MultiDict()
        common_properties.add('logset:isInstanceOf', context['logseries_uri'])
        common_properties.add('dcat:accessURL', context['dcat:accessURL'])
        #common_properties.add_values('logset:subject', context['subjects'])
        common_properties.add('logset:subject', *context['subjects'])
        common_properties.add('namespace', context['namespace'])
        logging.info("logset for concretelog has: {0}".format(
            context['logset']))

        # in most cases we are looking file-by-file .. FilePerTimepoint
        # can override this default behavior
        for f in matching:
            #context.push(label=f.filename)
            #context.push({'dcat:downloadURL': f.relpath + os.sep + f.filename})
            properties = MultiDict(common_properties)
            properties.add('rdfs:label', Literal(f.filename))
            relpath = (f.relpath + os.sep + f.filename).lstrip(os.sep)
            properties.add('dcat:downloadURL', Literal(relpath))
            logging.info(
                "properties for concretelog has: {0}".format(properties))

            handler_args['target_url'] = os.sep.join(f)  # full local path
            log = ConcreteLog(properties=properties)
            logging.info("adding log to graph: {0}".format(log))
            try:
                log.add_to_graph(context)
            except UnsupportedLogFormatHandler as err:
                logging.warn("logformat {0} not implemented".format(err))

            properties.remove('rdfs:label')
            properties.remove('dcat:downloadURL')
        context.pop(('handler_args', ))
        #context.pop('handler_factory')

        return candidates - matching
Exemplo n.º 8
0
 def __init__(self, properties: MultiDict = MultiDict()) -> None:
     super().__init__(properties)
     self._fmtInfo = None  # MultiDict()
     self._filePatterns = None
     self._logFormatType = None