Exemplo n.º 1
0
 def identify_rdf_parser(self):
     format = self.rdf_format
     if format == 'ttl':
         #parser = RDF.TurtleParser()
         logger.error(
             "Turtle is not supported by LODStats, should be converted to ntriples!"
         )
         parser = RDF.NTriplesParser()
     elif format == 'n3':
         parser = None
         raise NameError(
             "n3 serialization is not supported, please convert to nt")
     elif format == 'nt':  # FIXME: this probably won't do for n3
         parser = RDF.NTriplesParser()
     elif format == 'nq':
         parser = RDF.Parser(name='nquads')
     elif format == 'rdf':
         parser = RDF.Parser(name="rdfxml")
     elif format == 'sparql':
         return None
     elif format == 'sitemap':
         return None
     else:
         raise NameError("unsupported format")
     return parser
Exemplo n.º 2
0
 def parse(self, fin):
     log.debug('Reading RDF from %s' % fin)
     import RDF
     parser = RDF.Parser(name=self.parser)
     stream = parser.parse_as_stream(fin)
     for triple in stream:
         self.write(triple.subject, triple.predicate, triple.object, 1)
Exemplo n.º 3
0
def main(specloc="file:index.rdf"):
    """The meat and potatoes: Everything starts here."""
    m = RDF.Model()
    p = RDF.Parser()
    p.parse_into_model(m, specloc)

    classlist, proplist = specInformation(m)

    # Build HTML list of terms.
    azlist = buildazlist(classlist, proplist)

    # Generate Term HTML
    termlist = "<h3>Classes and Properties (full detail)</h3>"
    termlist += "<div class='termdetails'>"
    termlist += docTerms('Class', classlist, m)
    termlist += docTerms('Property', proplist, m)
    termlist += "</div>"

    # Generate RDF from original namespace.
    u = urllib.urlopen(specloc)
    rdfdata = u.read()
    rdfdata.replace("""<?xml version="1.0"?>""", "")

    # wip.template is a template file for the spec, python-style % escapes
    # for replaced sections.
    f = open("../0.1/template.html", "r")
    template = f.read()
    print template % (azlist.encode("utf-8"), termlist.encode("utf-8"),
                      rdfdata)
Exemplo n.º 4
0
 def init_stream_from_string(self,
                             string_rdf,
                             base_uri,
                             parser_name="rdfxml"):
     rdf_parser = RDF.Parser(name=parser_name)
     stream = rdf_parser.parse_string_as_stream(string_rdf, base_uri)
     return stream
Exemplo n.º 5
0
    def posts(self, *args):
        alltags = Set()
        for arg in args:
            if isinstance(arg, Tag):
                alltags.add(arg)

        url = "http://del.icio.us/rss/" + self.user
        if len(alltags) > 0:
            url += "/" + "+".join([str(tag) for tag in alltags])

        model = RDF.Model()
        parser = RDF.Parser()
        try:
            parser.parse_string_into_model(model, get_url_contents(url),
                                           RDF.Uri("http://foo"))
            posts = [
                RSSTagPost(model, p.subject) for p in model.find_statements(
                    RDF.Statement(
                        None,
                        RDF.Uri(
                            "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
                        RDF.Uri("http://purl.org/rss/1.0/item")))
            ]
            for post in posts:
                post.user = self
            return posts
        except:
            return []
Exemplo n.º 6
0
    def parse_handle_to_model(self, rooted=False, storage=None, 
                              parse_format='turtle', context=None, **kwargs):
        '''Parse self.handle into RDF model self.model.'''

        if storage is None:
            # store RDF model in memory for now
            storage = new_storage()

        if self.model is None:
            self.model = RDF.Model(storage)
            if self.model is None:
                raise CDAOError("new RDF.model failed")
        model = self.model
        
        self.rooted = rooted
        
        parser = RDF.Parser(name=parse_format)
        if parser is None:
            raise Exception('Failed to create RDF.Parser for MIME type %s' % mime_type)
        
        if 'base_uri' in kwargs: base_uri = kwargs['base_uri']
        else: base_uri = RDF.Uri(string="file://"+os.path.abspath(self.handle.name))
        
        statements = parser.parse_string_as_stream(self.handle.read(), base_uri)
        for s in statements:
            model.append(s)
            
        return self.parse_model(model, context=context)
Exemplo n.º 7
0
    def posts(self,*args):
        alltags = Set()
        extratags = ""
        user = None

        for arg in args:
            if isinstance(arg,Tag):
                alltags.add(arg)
            if isinstance(arg,User):
                user = arg

        if len(alltags)>0:
            extratags = "+"+"+".join([str(tag) for tag in alltags])

        if user is not None:
            url = "http://del.icio.us/rss/"+str(user)+"/"+self.name+extratags
        else:
            url = "http://del.icio.us/rss/tag/"+self.name


        model = RDF.Model()
        parser = RDF.Parser()
        try:
            parser.parse_string_into_model(model,get_url_contents(url),RDF.Uri("http://foo"))
            posts = [RSSTagPost(model,p.subject,self) for p in model.find_statements(RDF.Statement(None,RDF.Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),RDF.Uri("http://purl.org/rss/1.0/item")))]
            if user is not None:
                for post in posts:
                    post.user = user
            return posts
        except:
            return []
Exemplo n.º 8
0
def posts():
    url = "http://del.icio.us/rss/"

    model = RDF.Model()
    parser = RDF.Parser()
    parser.parse_string_into_model(model,get_url_contents(url),RDF.Uri("http://foo"))
    posts = [RSSTagPost(model,p.subject) for p in model.find_statements(RDF.Statement(None,RDF.Uri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),RDF.Uri("http://purl.org/rss/1.0/item")))]
    return posts
Exemplo n.º 9
0
def make_query(rdf, query):
    model = RDF.Model()
    parser = RDF.Parser()
    parser.parse_into_model(model, rdf)
    sparql = """
    %s
    %s""" % (PREFIX, query)
    q = RDF.Query(sparql, query_language="sparql")
    return q.execute(model)
Exemplo n.º 10
0
def modeltest():
    from IPython import embed
    # this hardlocks
    ms = RDF.MemoryStorage('test')
    m = RDF.Model(ms)
    p1 = Path('~/git/NIF-Ontology/ttl/NIF-Molecule.ttl').expanduser()
    p = RDF.Parser(name='turtle')
    p.parse_into_model(m, p1.as_uri())
    embed()
Exemplo n.º 11
0
    def specgen(self, mode="spec"):
        """The meat and potatoes: Everything starts here."""

        m = RDF.Model()
        p = RDF.Parser()
        try:
            p.parse_into_model(m, self.specloc)
        except IOError, e:
            raise Usage("Error reading from ontology: %s" % str(e))
Exemplo n.º 12
0
 def _initOntology(self):
     #storage = RDF.HashStorage('dbpedia', options="hash-type='bdb'")
     storage = RDF.MemoryStorage()
     model = RDF.Model(storage)
     rdfParser = RDF.Parser(name="rdfxml")
     ontologyPath = 'file://' + os.path.join(self._getCurrentDir(),
                                             'dbpedia_3.9.owl')
     rdfParser.parse_into_model(model, ontologyPath, "http://example.org/")
     return model
Exemplo n.º 13
0
def get_parser(url, format=None):
    if format is None:
        format = get_format(url)

    if format == 'ttl':
        parser = RDF.TurtleParser()
    elif format == 'nt' or format == 'n3':  # FIXME: this probably won't do for n3
        parser = RDF.NTriplesParser()
    elif format == 'nq':
        parser = RDF.Parser(name='nquads')
    elif format == 'rdf':
        parser = RDF.Parser(name="rdfxml")
    elif format == 'sparql':
        return None
    elif format == 'sitemap':
        return None
    else:
        raise NameError("unsupported format")
    return parser
Exemplo n.º 14
0
 def make_query(self, rdf, query):
     """Make sparql query."""
     model = RDF.Model()
     parser = RDF.Parser()
     parser.parse_into_model(model, rdf)
     sparql = """
     %s
     %s""" % (self.rdf_prefix, query)
     q = RDF.Query(sparql, query_language="sparql")
     return q.execute(model)
Exemplo n.º 15
0
    def __init__(self, globalities={}, localities={}):
        self.parser = RDF.Parser('raptor')
        if self.parser is None:
            raise Exception("Failed to create RDF.Parser raptor")

        self.localities = localities
        self.globalities = globalities
        self.questions = {}
        self.answers = {}
        self.root = None
Exemplo n.º 16
0
 def _rdfxml_to_ntriples(self, data):
     # Ntriples syntax is not supported by allegro graph
     # as a result format for SPARQL Construct Queries
     # HACK workaround using redland
     import RDF
     model = RDF.Model()
     parser = RDF.Parser()
     try:
         parser.parse_string_into_model(model, data.read(), '-')
     except RDF.RedlandError, err:
         raise TripleStoreError(err)
Exemplo n.º 17
0
 def bootstrap(self, filename):
     file = open(filename, "r")
     parser = RDF.Parser(name="turtle")
     status = parser.parse_string_into_model(
         self.model, file.read(), "http://example.com/bootstrap")
     file.close()
     if not status:
         raise RDF.RedlandError("Error parsing bootstrapping file.")
     else:
         namespaces = parser.namespaces_seen()
         self.addNamespaces(namespaces)
     return self
Exemplo n.º 18
0
 def _ntriples_to_turtle(self, data):
     # Turtle syntax is not supported by allegro graph
     # HACK workaround using redland
     import RDF
     model = RDF.Model()
     parser = RDF.Parser('ntriples')
     data = data.read()
     data = (data.strip() + '\n')
     try:
         parser.parse_string_into_model(model, data, '-')
     except RDF.RedlandError, err:
         raise TripleStoreError(err)
Exemplo n.º 19
0
    def _parse(self, file, format, base_uri=None):            
        if format == 'turtle':
            parser = RDF.TurtleParser()
        else:
            parser = RDF.Parser(format)

        if isinstance(base_uri, unicode):
            base_uri = base_uri.encode('utf8')
        data = file.read()
        file.close()
        try:
            stream = parser.parse_string_as_stream(data, base_uri)
        except RDF.RedlandError, err:
            raise TripleStoreError(err)
Exemplo n.º 20
0
def specgen(specloc, template, instances=False, mode="spec"):
    """The meat and potatoes: Everything starts here."""

    global spec_url
    global spec_ns
    global ns_list

    m = RDF.Model()
    p = RDF.Parser()
    try:
        p.parse_into_model(m, specloc)
    except IOError, e:
        print "Error reading from ontology:", str(e)
        usage()
Exemplo n.º 21
0
def getAggregatedIdentifiers(identifier):
    """Retrieves and parses the resource map with the specified identifier.

    Returns: List(str)
        List of identifiers.
    """

    if type(identifier) is not str or len(identifier) < 1:
        raise Exception("Bad identifier string passed to method.")

    model = RDF.Model()
    parser = RDF.Parser(name="rdfxml")

    base_url = "https://cn.dataone.org/cn/v1/object/"
    query_url = base_url + urllib.quote_plus(identifier)

    try:
        parser.parse_into_model(model, query_url)
    except RDF.RedlandError as e:
        print "Exception: Failed to parse RDF/XML at `%s`: %s" % (query_url, e)

    query = """
    SELECT ?s ?o
    WHERE {
        ?s <http://www.openarchives.org/ore/terms/aggregates> ?o
    }
    """

    q = RDF.Query(query)

    identifiers = []

    for result in q.execute(model):
        if 'o' not in result:
            continue

        object_node = result['o']

        if object_node.is_resource():
            object_node_str = str(object_node)
            identifier = extractIdentifierFromFullURL(object_node_str)

            if identifier is not None:
                identifiers.append(identifier)

    return identifiers
Exemplo n.º 22
0
    def load_rdf(self):
        mtime = os.path.getmtime(self.filename)
        if self.model is not None and mtime <= self.modelMtime:
            return
        self.modelMtime = mtime

        log.info("loading rdf from %r" % self.filename)
        self.model = RDF.Model(RDF.MemoryStorage())
        u = RDF.Uri("file:%s" % self.filename)
        try:
            for s in RDF.Parser('turtle').parse_as_stream(u):
                self.model.add_statement(s)
        except (Exception, ), e:
            # e.__class__.__module__ is "RDF", not the real module!
            if e.__class__.__name__ != "RedlandError":
                raise
            raise ValueError("Error parsing %s: %s" % (u, e))
Exemplo n.º 23
0
    def _ntriples_to_turtle(self, data):
        # Turtle syntax is not supported by allegro graph
        # HACK workaround using redland
        import RDF
        model = RDF.Model()
        parser = RDF.Parser('ntriples')
        data = data.read()
        data = (data.strip() + '\n')
        try:
            parser.parse_string_into_model(model, data, '-')
        except RDF.RedlandError as err:
            raise TripleStoreError(err)

        serializer = RDF.Serializer(name='turtle')
        for prefix, ns in self._nsmap.items():
            serializer.set_namespace(prefix, ns)
        return StringIO(serializer.serialize_model_to_string(model))
Exemplo n.º 24
0
def main(specloc, template, mode="spec"):
    """The meat and potatoes: Everything starts here."""
    m = RDF.Model()
    p = RDF.Parser()
    p.parse_into_model(m, specloc)

    classlist, proplist = specInformation(m)

    if mode == "spec":
        # Build HTML list of terms.
        azlist = buildazlist(classlist, proplist)
    elif mode == "list":
        # Build simple <ul> list of terms.
        azlist = build_simple_list(classlist, proplist)

    # Generate Term HTML


#    termlist = "<h3>Classes and Properties (full detail)</h3>"
    termlist = docTerms('Class', classlist, m)
    termlist += docTerms('Property', proplist, m)

    # Generate RDF from original namespace.
    u = urllib.urlopen(specloc)
    rdfdata = u.read()
    rdfdata = re.sub(r"(<\?xml version.*\?>)", "", rdfdata)
    rdfdata = re.sub(r"(<!DOCTYPE[^]]*]>)", "", rdfdata)
    #    rdfdata.replace("""<?xml version="1.0"?>""", "")

    # print template % (azlist.encode("utf-8"), termlist.encode("utf-8"), rdfdata.encode("ISO-8859-1"))
    template = re.sub(r"^#format \w*\n", "", template)
    template = re.sub(r"\$VersionInfo\$",
                      owlVersionInfo(m).encode("utf-8"), template)

    # NOTE: This works with the assumtpion that all "%" in the template are escaped to "%%" and it
    #       contains the same number of "%s" as the number of parameters in % ( ...parameters here... )
    template = template % (azlist.encode("utf-8"), termlist.encode("utf-8"))
    template += "<!-- specification regenerated at " + time.strftime(
        '%X %x %Z') + " -->"

    return template
Exemplo n.º 25
0
def main():
    reload(sys)
    sys.setdefaultencoding("UTF8")
    success = checkArgs()
    if not success:
        print "Usage: python dbpediaNeo4j.py /full/path/filename.nt"
        sys.exit(1)

    # create dbpedia-graph.db
    db, index = createDB()
    counter = 0.0
    file_lines = int((subprocess.check_output(['wc', '-l',
                                               sys.argv[1]])).split()[0])

    # RDF parses dbpedia ntriples dump
    parser = RDF.Parser("ntriples")
    stream = parser.parse_as_stream("file://" + sys.argv[1])
    print
    startTime = datetime.now()
    # start parsing
    for triple in stream:
        # extract nodes and relationship
        a = str(triple.subject).split('/')[-1]
        r = str(triple.predicate).split('/')[-1]
        b = str(triple.object).split('/')[-1]
        createNodes(db, index, a, b, r)
        counter += 1
        # print updated percentage
        if (counter % 100) == 0:
            perc = (counter / file_lines) * 100
            sys.stdout.write("\rProgress: %d%%" % perc)
            sys.stdout.flush()

    # Shutdown db
    db.shutdown()
    endTime = datetime.now()
    print "\nFinished - %d relationships imported in %d seconds" % (counter, (
        endTime - startTime).seconds)
    print "Move %s/dbpedia-graph.db to your Neo4j data directory ;-)" % os.getcwd(
    )
    return
Exemplo n.º 26
0
    def read(self, path, fmt='', base_uri=None):
        if fmt == '':
            fmt = self._guess_fmt(path)

        gzipped = False

        if self._gzipped(path):
            gzipped = True
            tmp = self._mktemp()
            self._gunzip(path, tmp)
            path = tmp

        parser = RDF.Parser(name=fmt)
        logger.info('reading "%s"...' % path)
        parser.parse_into_model(self._model,
                                'file://' + os.path.abspath(path),
                                base_uri=base_uri)
        logger.info('done.')

        if gzipped:
            os.unlink(tmp)
Exemplo n.º 27
0
def init_model(*filenames):
    """Input: An on-disk path (filenames) to start from.
       Output: A model with those suckers parsed."""
    for filename in filenames:  # filenames, not URIs
        die_unless(
            ':/' not in filename,
            "You passed in something that " + "looks like a URI; blowing up")

    storage = RDF.Storage(
        storage_name="hashes",
        name="test",
        options_string="new='yes',hash-type='memory',dir='.'")
    if storage is None:
        raise "new RDF.Storage failed"

    model = RDF.Model(storage)
    if model is None:
        raise "new RDF.Model failed"

    parser = RDF.Parser('raptor')
    for filename in filenames:
        filename_uri = RDF.Uri(string="file:" + filename)
        parser.parse_into_model(model, filename_uri)
    return model
Exemplo n.º 28
0
 def parse(self, source, sink, **args):
     source.close()
     file_uri = source.getPublicId()
     parser = RDF.Parser(name=self.format)
     stream = parser.parse_as_stream(file_uri)
     [sink.add(statement_to_triple(statement)) for statement in stream]
Exemplo n.º 29
0
def main():
    from IPython import embed
    """ Python 3.6.6
    ibttl 2.605194091796875
    ttl 3.8316309452056885
    diff lt - ttl -1.2264368534088135
    librdfxml 31.267616748809814
    rdfxml 58.25124502182007
    diff lr - rl -26.983628273010254
    simple time 17.405116319656372
    """
    """ Python 3.5.3 (pypy3)
    libttl 2.387338638305664
    ttl 1.3430471420288086
    diff lt - ttl 1.0442914962768555
    librdfxml 24.70371127128601
    rdfxml 17.85916304588318
    diff lr - rl 6.844548225402832
    simple time 18.32300615310669
    """

    # well I guess that answers that question ...
    # librdf much faster for cpython, not for pypy3

    from time import time
    rdflib.plugin.register('librdfxml', rdflib.parser.Parser, 'librdflib',
                           'libRdfxmlParser')
    rdflib.plugin.register('libttl', rdflib.parser.Parser, 'librdflib',
                           'libTurtleParser')

    p1 = Path('~/git/NIF-Ontology/ttl/NIF-Molecule.ttl').expanduser()
    start = time()
    graph = rdflib.Graph().parse(p1.as_posix(), format='libttl')
    stop = time()
    lttime = stop - start
    print('libttl', lttime)
    #serialize(graph)

    start = time()
    graph = rdflib.Graph().parse(p1.as_posix(), format='turtle')
    stop = time()
    ttltime = stop - start
    print('ttl', ttltime)
    print('diff lt - ttl', lttime - ttltime)

    p2 = Path('~/git/NIF-Ontology/ttl/external/uberon.owl').expanduser()
    start = time()
    graph2 = rdflib.Graph().parse(p2.as_posix(), format='librdfxml')
    stop = time()
    lrtime = stop - start
    print('librdfxml', lrtime)
    if True:
        start = time()
        graph2 = rdflib.Graph().parse(p2.as_posix(), format='xml')
        stop = time()
        rltime = stop - start
        print('rdfxml', rltime)
        print('diff lr - rl', lrtime - rltime)

    if True:
        file_uri = p2.as_uri()
        parser = RDF.Parser(name='rdfxml')
        stream = parser.parse_as_stream(file_uri)
        start = time()
        # t = list(stream)
        t = tuple(statement_to_tuple(statement) for statement in stream)
        stop = time()
        stime = stop - start
        print('simple time', stime)

    embed()
Exemplo n.º 30
0
 def _get_statement_stream(self):
     parser = RDF.Parser(name=self._format)
     return parser.parse_as_stream(self._path_to_rdf)