Пример #1
0
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    import os.path
    import glob
    import re

    global schemasInitialized
    if (not schemasInitialized or DYNALOAD):
        log.info("(re)loading core and annotations.")
        files = glob.glob("data/*.rdfa")
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        parser = parsers.MakeParserOfType('rdfa', None)
        items = parser.parse(file_paths, "core")

        if loadExtensions:
            log.info("(re)scanning for extensions.")
            extfiles = glob.glob("data/ext/*/*.rdfa")
            log.info("Extensions found: %s ." % " , ".join(extfiles) )
            fnstrip_re = re.compile("\/.*")
            for ext in extfiles:
                ext_file_path = full_path(ext)
                extid = ext.replace('data/ext/', '')
                extid = re.sub(fnstrip_re,'',extid)
                log.info("Preparing to parse extension data: %s as '%s'" % (ext_file_path, "%s" % extid))
                parser = parsers.MakeParserOfType('rdfa', None)
                all_layers[extid] = "1"
                extitems = parser.parse([ext_file_path], layer="%s" % extid) # put schema triples in a layer
                # log.debug("Results: %s " % len( extitems) )
                for x in extitems:
                    if x is not None:
                        log.debug("%s:%s" % ( extid, str(x.id) ))
                # e.g. see 'data/ext/bib/bibdemo.rdfa'

        files = glob.glob("data/*examples.txt")
        example_contents = []
        for f in files:
            example_content = read_file(f)
            example_contents.append(example_content)
        parser = parsers.ParseExampleFile(None)
        parser.parse(example_contents)

        files = glob.glob("data/2015-04-vocab_counts.txt")

        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)
        schemasInitialized = True
Пример #2
0
def read_extensions(extensions):
    import os.path
    import glob
    import re
    global extensionsLoaded
    extfiles = []
    expfiles = []
    if not extensionsLoaded:  #2nd load will throw up errors and duplicate terms
        log.info("(re)scanning for extensions.")
        for i in extensions:
            extfiles += glob.glob("data/ext/%s/*.rdfa" % i)
            expfiles += glob.glob("data/ext/%s/*examples.txt" % i)

        log.info("Extensions found: %s ." % " , ".join(extfiles))
        fnstrip_re = re.compile("\/.*")
        for ext in extfiles:
            ext_file_path = full_path(ext)
            extid = ext.replace('data/ext/', '')
            extid = re.sub(fnstrip_re, '', extid)
            log.info("Preparing to parse extension data: %s as '%s'" %
                     (ext_file_path, "%s" % extid))
            parser = parsers.MakeParserOfType('rdfa', None)
            all_layers[extid] = "1"
            extitems = parser.parse([ext_file_path], layer="%s" %
                                    extid)  # put schema triples in a layer
            setHomeValues(extitems, extid, False)

        read_examples(expfiles)

    extensionsLoaded = True
Пример #3
0
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    import os.path
    import glob
    import re

    global schemasInitialized
    if (not schemasInitialized or DYNALOAD):
        log.info("(re)loading core and annotations.")
        files = glob.glob("data/*.rdfa")
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        parser = parsers.MakeParserOfType('rdfa', None)
        items = parser.parse(file_paths, "core")

        #set default home for those in core that do not have one
        setHomeValues(items, "core", True)

        files = glob.glob("data/*examples.txt")

        read_examples(files)

        files = glob.glob("data/2015-04-vocab_counts.txt")

        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)

    schemasInitialized = True
Пример #4
0
def read_schemas():
    """Read/parse/ingest schemas from data/*.rdfa. Also alsodata/*examples.txt"""
    import os.path
    import glob
    global schemasInitialized
    if (not schemasInitialized):
        files = glob.glob("data/*.rdfa")
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))

        parser = parsers.MakeParserOfType('rdfa', None)
        items = parser.parse(file_paths)

        files = glob.glob("data/*examples.txt")
        example_contents = []
        for f in files:
            example_content = read_file(f)
            example_contents.append(example_content)
        parser = parsers.ParseExampleFile(None)
        parser.parse(example_contents)
        schemasInitialized = True