Exemplo n.º 1
0
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    import os.path
    import glob
    import re

    global schemasInitialized
    if (not schemasInitialized or DYNALOAD):
        log.info("(re)loading core and annotations.")
        files = glob.glob("data/*.rdfa")
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        parser = parsers.MakeParserOfType('rdfa', None)
        items = parser.parse(file_paths, "core")

        #set default home for those in core that do not have one
        setHomeValues(items, "core", True)

        files = glob.glob("data/*examples.txt")

        read_examples(files)

        files = glob.glob("data/2015-04-vocab_counts.txt")

        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)

    schemasInitialized = True
Exemplo n.º 2
0
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    load_start = datetime.datetime.now()

    global schemasInitialized
    schemasInitialized = True
    if (not schemasInitialized or DYNALOAD):
        log.debug("[%s] (re)loading core and annotations." %
                  getInstanceId(short=True))
        files = glob.glob("data/*.rdfa")
        jfiles = glob.glob("data/*.jsonld")
        for jf in jfiles:
            rdfequiv = jf[:-7] + ".rdfa"
            if not rdfequiv in files:  #Only add .jsonld files if no equivalent .rdfa
                files.append(jf)
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        apirdflib.load_graph('core', file_paths)
        log.info("[%s] Loaded core graphs in %s" %
                 (getInstanceId(short=True),
                  (datetime.datetime.now() - load_start)))

        load_start = datetime.datetime.now()

        files = glob.glob("data/2015-04-vocab_counts.txt")
        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)
        log.debug("[%s]Loaded usage data in %s" %
                  (getInstanceId(short=True),
                   (datetime.datetime.now() - load_start)))

    schemasInitialized = True
Exemplo n.º 3
0
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    import os.path
    import glob
    import re

    global schemasInitialized
    schemasInitialized = True
    if (not schemasInitialized or DYNALOAD):
        log.info("(re)loading core and annotations.")
        files = glob.glob("data/*.rdfa")
        jfiles = glob.glob("data/*.jsonld")
        for jf in jfiles: 
            rdfequiv = jf[:-7]+".rdfa"
            if not rdfequiv in files: #Only add .jsonld files if no equivalent .rdfa
                files.append(jf)
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        apirdflib.load_graph('core',file_paths)

        files = glob.glob("data/*examples.txt")

        read_examples(files,'core')

        files = glob.glob("data/2015-04-vocab_counts.txt")
        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)

    schemasInitialized = True
Exemplo n.º 4
0
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    import os.path
    import glob
    import re

    global schemasInitialized
    if (not schemasInitialized or DYNALOAD):
        log.info("(re)loading core and annotations.")
        files = glob.glob("data/*.rdfa")
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        parser = parsers.MakeParserOfType('rdfa', None)
        items = parser.parse(file_paths, "core")

        if loadExtensions:
            log.info("(re)scanning for extensions.")
            extfiles = glob.glob("data/ext/*/*.rdfa")
            log.info("Extensions found: %s ." % " , ".join(extfiles) )
            fnstrip_re = re.compile("\/.*")
            for ext in extfiles:
                ext_file_path = full_path(ext)
                extid = ext.replace('data/ext/', '')
                extid = re.sub(fnstrip_re,'',extid)
                log.info("Preparing to parse extension data: %s as '%s'" % (ext_file_path, "%s" % extid))
                parser = parsers.MakeParserOfType('rdfa', None)
                all_layers[extid] = "1"
                extitems = parser.parse([ext_file_path], layer="%s" % extid) # put schema triples in a layer
                # log.debug("Results: %s " % len( extitems) )
                for x in extitems:
                    if x is not None:
                        log.debug("%s:%s" % ( extid, str(x.id) ))
                # e.g. see 'data/ext/bib/bibdemo.rdfa'

        files = glob.glob("data/*examples.txt")
        example_contents = []
        for f in files:
            example_content = read_file(f)
            example_contents.append(example_content)
        parser = parsers.ParseExampleFile(None)
        parser.parse(example_contents)

        files = glob.glob("data/2015-04-vocab_counts.txt")

        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)
        schemasInitialized = True