def read_schemas(loadExtensions=False): """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt""" import os.path import glob import re global schemasInitialized if (not schemasInitialized or DYNALOAD): log.info("(re)loading core and annotations.") files = glob.glob("data/*.rdfa") file_paths = [] for f in files: file_paths.append(full_path(f)) parser = parsers.MakeParserOfType('rdfa', None) items = parser.parse(file_paths, "core") if loadExtensions: log.info("(re)scanning for extensions.") extfiles = glob.glob("data/ext/*/*.rdfa") log.info("Extensions found: %s ." % " , ".join(extfiles) ) fnstrip_re = re.compile("\/.*") for ext in extfiles: ext_file_path = full_path(ext) extid = ext.replace('data/ext/', '') extid = re.sub(fnstrip_re,'',extid) log.info("Preparing to parse extension data: %s as '%s'" % (ext_file_path, "%s" % extid)) parser = parsers.MakeParserOfType('rdfa', None) all_layers[extid] = "1" extitems = parser.parse([ext_file_path], layer="%s" % extid) # put schema triples in a layer # log.debug("Results: %s " % len( extitems) ) for x in extitems: if x is not None: log.debug("%s:%s" % ( extid, str(x.id) )) # e.g. see 'data/ext/bib/bibdemo.rdfa' files = glob.glob("data/*examples.txt") example_contents = [] for f in files: example_content = read_file(f) example_contents.append(example_content) parser = parsers.ParseExampleFile(None) parser.parse(example_contents) files = glob.glob("data/2015-04-vocab_counts.txt") for file in files: usage_data = read_file(file) parser = parsers.UsageFileParser(None) parser.parse(usage_data) schemasInitialized = True
def read_extensions(extensions): import os.path import glob import re global extensionsLoaded extfiles = [] expfiles = [] if not extensionsLoaded: #2nd load will throw up errors and duplicate terms log.info("(re)scanning for extensions.") for i in extensions: extfiles += glob.glob("data/ext/%s/*.rdfa" % i) expfiles += glob.glob("data/ext/%s/*examples.txt" % i) log.info("Extensions found: %s ." % " , ".join(extfiles)) fnstrip_re = re.compile("\/.*") for ext in extfiles: ext_file_path = full_path(ext) extid = ext.replace('data/ext/', '') extid = re.sub(fnstrip_re, '', extid) log.info("Preparing to parse extension data: %s as '%s'" % (ext_file_path, "%s" % extid)) parser = parsers.MakeParserOfType('rdfa', None) all_layers[extid] = "1" extitems = parser.parse([ext_file_path], layer="%s" % extid) # put schema triples in a layer setHomeValues(extitems, extid, False) read_examples(expfiles) extensionsLoaded = True
def read_schemas(loadExtensions=False): """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt""" import os.path import glob import re global schemasInitialized if (not schemasInitialized or DYNALOAD): log.info("(re)loading core and annotations.") files = glob.glob("data/*.rdfa") file_paths = [] for f in files: file_paths.append(full_path(f)) parser = parsers.MakeParserOfType('rdfa', None) items = parser.parse(file_paths, "core") #set default home for those in core that do not have one setHomeValues(items, "core", True) files = glob.glob("data/*examples.txt") read_examples(files) files = glob.glob("data/2015-04-vocab_counts.txt") for file in files: usage_data = read_file(file) parser = parsers.UsageFileParser(None) parser.parse(usage_data) schemasInitialized = True
def read_schemas(): """Read/parse/ingest schemas from data/*.rdfa. Also alsodata/*examples.txt""" import os.path import glob global schemasInitialized if (not schemasInitialized): files = glob.glob("data/*.rdfa") file_paths = [] for f in files: file_paths.append(full_path(f)) parser = parsers.MakeParserOfType('rdfa', None) items = parser.parse(file_paths) files = glob.glob("data/*examples.txt") example_contents = [] for f in files: example_content = read_file(f) example_contents.append(example_content) parser = parsers.ParseExampleFile(None) parser.parse(example_contents) schemasInitialized = True