def parse(uri, start, followers, null): k0 = time.time() lexer = sparql_tokens.Lexer() k1 = time.time() ip = webAccess.urlopenForRDF(uri, None) lexer.input(ip) parser = Earley(start,followers, null) print 'ready to parse\n\n' k2 = time.time() return (parser.parse(lexer.token)[-1].productions[start], k1 - k0, k1)
def parse(uri, start, followers, null): k0 = time.time() lexer = sparql_tokens.Lexer() k1 = time.time() ip = webAccess.urlopenForRDF(uri, None) lexer.input(ip) parser = Earley(start, followers, null) print 'ready to parse\n\n' k2 = time.time() return (parser.parse(lexer.token)[-1].productions[start], k1 - k0, k1)
def main(): global already, agenda, errors parseAs = None grammarFile = None parseFile = None yaccFile = None global verbose global g verbose = 0 lumped = 1 try: opts, args = getopt.getopt( sys.argv[1:], "ha:v:p:g:y:", ["help", "as=", "verbose=", "parse=", "grammar=", "yacc="]) except getopt.GetoptError: usage() sys.exit(2) output = None for o, a in opts: if o in ("-h", "--help"): usage() sys.exit() if o in ("-v", "--verbose"): verbose = int(a) diag.chatty_flag = int(a) if o in ("-a", "--as"): parseAs = uripath.join(uripath.base(), a) if o in ("-p", "--parse"): parseFile = uripath.join(uripath.base(), a) if o in ("-g", "--grammar"): grammarFile = uripath.join(uripath.base(), a) if o in ("-y", "--yacc"): yaccFile = uripath.join(uripath.base(), a)[5:] # strip off file: # if testFiles == []: testFiles = [ "/dev/stdin" ] if not parseAs: usage() sys.exit(2) parseAs = uripath.join(uripath.base(), parseAs) if not grammarFile: grammarFile = parseAs.split("#")[0] # strip off fragid else: grammarFile = uripath.join(uripath.base(), grammarFile) # The Grammar formula progress("Loading " + grammarFile) start = clock() g = load(grammarFile) taken = clock() - start + 1 progress("Loaded %i statements in %fs, ie %f/s." % (len(g), taken, len(g) / taken)) document = g.newSymbol(parseAs) already = [] agenda = [] errors = [] doProduction(document) while agenda: x = agenda[0] agenda = agenda[1:] already.append(x) doProduction(x) if errors != []: progress("###### FAILED with %i errors." % len(errors)) for s in errors: progress("\t%s" % s) exit(-2) else: progress("Ok for predictive parsing") #if parser.verb: progress "Branch table:", branchTable if verbose: progress("Literal terminals: %s" % literalTerminals.keys()) progress("Token regular expressions:") for r in tokenRegexps: progress("\t%s matches %s" % (r, tokenRegexps[r].pattern)) if yaccFile: yacc = open(yaccFile, "w") yaccConvert(yacc, document, tokenRegexps) yacc.close() if parseFile == None: exit(0) ip = webAccess.urlopenForRDF(parseFile, None) lexer = sparql_tokens.Lexer() lexer.input(ip) #str = ip.read().decode('utf_8') sink = g.newFormula() keywords = g.each(pred=BNF.keywords, subj=document) keywords = [a.value() for a in keywords] p = PredictiveParser(sink=sink, top=document, branchTable=branchTable, tokenSet=tokenSet, keywords=keywords) p.verb = 1 start = clock() #print lexer.token() print p.parse(lexer.token) taken = clock() - start + 1 # progress("Loaded %i chars in %fs, ie %f/s." % # (len(str), taken, len(str)/taken)) progress("Parsed <%s> OK" % parseFile) sys.exit(0) # didn't crash
def main(): global already, agenda, errors parseAs = None grammarFile = None parseFile = None yaccFile = None global verbose global g verbose = 0 lumped = 1 try: opts, args = getopt.getopt(sys.argv[1:], "ha:v:p:g:y:", ["help", "as=", "verbose=", "parse=", "grammar=", "yacc="]) except getopt.GetoptError: usage() sys.exit(2) output = None for o, a in opts: if o in ("-h", "--help"): usage() sys.exit() if o in ("-v", "--verbose"): verbose =int(a) diag.chatty_flag = int(a) if o in ("-a", "--as"): parseAs = uripath.join(uripath.base(), a) if o in ("-p", "--parse"): parseFile = uripath.join(uripath.base(), a) if o in ("-g", "--grammar"): grammarFile = uripath.join(uripath.base(), a) if o in ("-y", "--yacc"): yaccFile = uripath.join(uripath.base(), a)[5:] # strip off file: # if testFiles == []: testFiles = [ "/dev/stdin" ] if not parseAs: usage() sys.exit(2) parseAs = uripath.join(uripath.base(), parseAs) if not grammarFile: grammarFile = parseAs.split("#")[0] # strip off fragid else: grammarFile = uripath.join(uripath.base(), grammarFile) # The Grammar formula progress("Loading " + grammarFile) start = clock() g = load(grammarFile) taken = clock() - start + 1 progress("Loaded %i statements in %fs, ie %f/s." % (len(g), taken, len(g)/taken)) document = g.newSymbol(parseAs) already = [] agenda = [] errors = [] doProduction(document) while agenda: x = agenda[0] agenda = agenda[1:] already.append(x) doProduction(x) if errors != []: progress("###### FAILED with %i errors." % len(errors)) for s in errors: progress ("\t%s" % s) exit(-2) else: progress( "Ok for predictive parsing") #if parser.verb: progress "Branch table:", branchTable if verbose: progress( "Literal terminals: %s" % literalTerminals.keys()) progress("Token regular expressions:") for r in tokenRegexps: progress( "\t%s matches %s" %(r, tokenRegexps[r].pattern) ) if yaccFile: yacc=open(yaccFile, "w") yaccConvert(yacc, document, tokenRegexps) yacc.close() if parseFile == None: exit(0) ip = webAccess.urlopenForRDF(parseFile, None) str = ip.read().decode('utf_8') sink = g.newFormula() keywords = g.each(pred=BNF.keywords, subj=document) keywords = [a.value() for a in keywords] p = PredictiveParser(sink=sink, top=document, branchTable= branchTable, tokenRegexps= tokenRegexps, keywords = keywords) p.verb = verbose start = clock() p.parse(str) taken = clock() - start + 1 progress("Loaded %i chars in %fs, ie %f/s." % (len(str), taken, len(str)/taken)) progress("Parsed <%s> OK" % parseFile) sys.exit(0) # didn't crash
print "No server root URL path given (--webroot)", webroot = "/archive" print "Assuming " + webroot if (uri == None): print "No URI given\n\n" + __doc__ sys.exit(2) if (uri[:7] != 'http://' and uri[:7] != 'htts://'): print "URI is not http[s]:// URI \n" + __doc__ sys.exit(2) #try: if filename != None: datafile = open(filename, "r") else: datafile = urlopenForRDF(uri) headers = datafile.headers print "Headers: " + ` headers ` #except: # print "Cannot access file \n" + filename + "\n" # sys.exit(3); data = datafile.read() print "Data length: ", len(data) if data.find("rdf") < 0: print "File does not look like a data file:\n" + data[:100] + "\n" sys.exit(3) path = root + '/' + uri[7:]