示例#1
0
def doCommand():
    """Command line RDF/N3 crawler
        
 crawl <uriref>

options:
 
See http://www.w3.org/2000/10/swap/doc/cwm  for more documentation.
"""
    global agenda
    global already
    uriref = sys.argv[1]
    uri = join(base(), uriref)
    r = symbol(uri)
    diag.setVerbosity(0)
    print "@prefix : <http://www.w3.org/2000/10/swap/util/semweb#>."
    print "# Generated by crawl.py ", cvsRevision[1:-1]
    agenda = [r]
    while agenda != []:
        r = agenda[0]
        agenda = agenda[1:]
        already.append(r)
        crawl(r)
    print "# ", len(already), "attempts,", successes, "successes."
示例#2
0
文件: crawl.py 项目: AwelEshetu/cwm
def doCommand():
    """Command line RDF/N3 crawler
        
 crawl <uriref>

options:
 
See http://www.w3.org/2000/10/swap/doc/cwm  for more documentation.
"""
    global agenda
    global already
    uriref = sys.argv[1]
    uri = join(base(), uriref)
    r = symbol(uri)
    diag.setVerbosity(0)
    print "@prefix : <http://www.w3.org/2000/10/swap/util/semweb#>."
    print "# Generated by crawl.py ", cvsRevision[1:-1]
    agenda = [r]
    while agenda != []:
	r = agenda[0]
	agenda = agenda[1:]
	already.append(r)
	crawl(r)
    print "# ", len(already), "attempts,", successes, "successes."
示例#3
0
#
from xml.dom.minidom import parse, Text
from diag import verbosity, setVerbosity, progress
import thing, llyn

kb = thing.formula()
setVerbosity(99)


def do(ele, level=0):
    if isinstance(ele, Text):
        if verbosity() > 70: progress("Ignoring text '%s'" % ele.nodeValue)
        return None
    ln = ele.localName
    if verbosity() > 20: progress("  " * level, ln)
    if ln == "dict":
        me = kb.newBlankNode()
        n = len(ele.childNodes)
        i = 0
        pred = None
        while i < n:
            e = ele.childNodes[i]
            if isinstance(e, Text):
                if verbosity() > 70:
                    progress("Ignoring text '%s'" % e.nodeValue)
                i = i + 1
                continue
            if e.localName == "key":
                property = e.firstChild.data
                if not property: property = "nullProp"
                pred = kb.newSymbol(property)
示例#4
0
"""

import urllib2

# eliminate this dependency; use mnot's HtmlDom instead
# http://www.mnot.net/python/HtmlDom.py
# <AaronSw> it lets me do: d = fetch(url); print xml.xpath.Evaluate("//*[@class='rss:item']/text()", d)
import libxml2  # http://xmlsoft.org/python.html , DebianPackage:libxml2-python2.1 won't work because llyn.py uses 2.2isms

# http://www.w3.org/2000/10/swap/
from myStore import Namespace, load, symbol, literal, formula, bind
import myStore
import uripath, toXML  # http://www.w3.org/2000/10/swap/
from RDFSink import SYMBOL, LITERAL, FORMULA
import diag
diag.setVerbosity(0)


def DC(ln):
    return 'http://purl.org/dc/elements/1.1/' + ln


def RDFS(ln):
    return 'http://www.w3.org/2000/01/rdf-schema#' + ln


class Crawler:
    def __init__(self, fmla, here):
        self._fmla = fmla

    def crawlFrom(self, addr, prefix, max):
示例#5
0
"""

import urllib2

# eliminate this dependency; use mnot's HtmlDom instead
# http://www.mnot.net/python/HtmlDom.py
# <AaronSw> it lets me do: d = fetch(url); print xml.xpath.Evaluate("//*[@class='rss:item']/text()", d)
import libxml2 # http://xmlsoft.org/python.html , DebianPackage:libxml2-python2.1 won't work because llyn.py uses 2.2isms

# http://www.w3.org/2000/10/swap/
from myStore import Namespace, load, symbol, literal, formula, bind
import myStore
import uripath, toXML # http://www.w3.org/2000/10/swap/
from RDFSink import SYMBOL, LITERAL, FORMULA
import diag
diag.setVerbosity(0)

def DC(ln):
    return 'http://purl.org/dc/elements/1.1/' + ln

def RDFS(ln):
    return 'http://www.w3.org/2000/01/rdf-schema#' + ln


class Crawler:
    def __init__(self, fmla, here):
        self._fmla = fmla

    def crawlFrom(self, addr, prefix, max):
        fmla = self._fmla
示例#6
0
#
from xml.dom.minidom import parse, Text
from diag import verbosity, setVerbosity, progress
import thing, llyn

kb=thing.formula()
setVerbosity(99)


def do(ele, level=0):
    if isinstance(ele, Text):
	if verbosity() > 70: progress("Ignoring text '%s'" % ele.nodeValue)
	return None
    ln = ele.localName
    if verbosity() > 20: progress("  "*level, ln)
    if ln == "dict":
	me = kb.newBlankNode()
	n = len(ele.childNodes)
	i = 0
	pred = None
	while i<n:
	    e = ele.childNodes[i]
	    if isinstance(e, Text):
		if verbosity() > 70: progress("Ignoring text '%s'" % e.nodeValue)
		i = i + 1
		continue
	    if e.localName == "key":
		property = e.firstChild.data
		if not property: property = "nullProp"
		pred = kb.newSymbol(property)
	    else: