Exemplo n.º 1
0
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    load_start = datetime.datetime.now()

    global schemasInitialized
    schemasInitialized = True
    if (not schemasInitialized or DYNALOAD):
        log.debug("[%s] (re)loading core and annotations." %
                  getInstanceId(short=True))
        files = glob.glob("data/*.rdfa")
        jfiles = glob.glob("data/*.jsonld")
        for jf in jfiles:
            rdfequiv = jf[:-7] + ".rdfa"
            if not rdfequiv in files:  #Only add .jsonld files if no equivalent .rdfa
                files.append(jf)
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        apirdflib.load_graph('core', file_paths)
        log.info("[%s] Loaded core graphs in %s" %
                 (getInstanceId(short=True),
                  (datetime.datetime.now() - load_start)))

        load_start = datetime.datetime.now()

        files = glob.glob("data/2015-04-vocab_counts.txt")
        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)
        log.debug("[%s]Loaded usage data in %s" %
                  (getInstanceId(short=True),
                   (datetime.datetime.now() - load_start)))

    schemasInitialized = True
Exemplo n.º 2
0
def read_extensions(extensions):
    global extensionsLoaded
    extfiles = []
    expfiles = []
    load_start = datetime.datetime.now()

    if not extensionsLoaded:  #2nd load will throw up errors and duplicate terms
        log.info("[%s] extensions %s " %
                 (getInstanceId(short=True), extensions))
        for i in extensions:
            all_layers[i] = "1"
            extfiles = glob.glob("data/ext/%s/*.rdfa" % i)
            jextfiles = glob.glob("data/ext/%s/*.jsonld" % i)
            for jf in jextfiles:
                rdfequiv = jf[:-7] + ".rdfa"
                if not rdfequiv in extfiles:  #Only add .jsonld files if no equivalent .rdfa
                    extfiles.append(jf)

            file_paths = []
            for f in extfiles:
                file_paths.append(full_path(f))
            apirdflib.load_graph(i, file_paths)
    log.info("[%s]Loaded extension graphs in %s" %
             (getInstanceId(short=True),
              (datetime.datetime.now() - load_start)))
    extensionsLoaded = True
Exemplo n.º 3
0
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    import os.path
    import glob
    import re

    global schemasInitialized
    schemasInitialized = True
    if (not schemasInitialized or DYNALOAD):
        log.info("(re)loading core and annotations.")
        files = glob.glob("data/*.rdfa")
        jfiles = glob.glob("data/*.jsonld")
        for jf in jfiles: 
            rdfequiv = jf[:-7]+".rdfa"
            if not rdfequiv in files: #Only add .jsonld files if no equivalent .rdfa
                files.append(jf)
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        apirdflib.load_graph('core',file_paths)

        files = glob.glob("data/*examples.txt")

        read_examples(files,'core')

        files = glob.glob("data/2015-04-vocab_counts.txt")
        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)

    schemasInitialized = True
Exemplo n.º 4
0
Arquivo: api.py Projeto: dbs/schemaorg
def read_schemas(loadExtensions=False):
    """Read/parse/ingest schemas from data/*.rdfa. Also data/*examples.txt"""
    load_start = datetime.datetime.now()

    global schemasInitialized
    schemasInitialized = True
    if (not schemasInitialized or DYNALOAD):
        log.debug("[%s] (re)loading core and annotations." % getInstanceId(short=True))
        files = glob.glob("data/*.rdfa")
        jfiles = glob.glob("data/*.jsonld")
        for jf in jfiles: 
            rdfequiv = jf[:-7]+".rdfa"
            if not rdfequiv in files: #Only add .jsonld files if no equivalent .rdfa
                files.append(jf)
        file_paths = []
        for f in files:
            file_paths.append(full_path(f))
        apirdflib.load_graph('core',file_paths)
        log.info("[%s] Loaded core graphs in %s" % (getInstanceId(short=True),(datetime.datetime.now() - load_start)))

        load_start = datetime.datetime.now()

        files = glob.glob("data/2015-04-vocab_counts.txt")
        for file in files:
            usage_data = read_file(file)
            parser = parsers.UsageFileParser(None)
            parser.parse(usage_data)
        log.debug("[%s]Loaded usage data in %s" % (getInstanceId(short=True),(datetime.datetime.now() - load_start)))

    schemasInitialized = True
Exemplo n.º 5
0
def read_extensions(extensions):
    import os.path
    import glob
    import re
    global extensionsLoaded
    extfiles = []
    expfiles = []

    if not extensionsLoaded:  #2nd load will throw up errors and duplicate terms
        log.info("(re)scanning for extensions %s " % extensions)
        for i in extensions:
            all_layers[i] = "1"
            extfiles = glob.glob("data/ext/%s/*.rdfa" % i)
            jextfiles = glob.glob("data/ext/%s/*.jsonld" % i)
            for jf in jextfiles:
                rdfequiv = jf[:-7] + ".rdfa"
                if not rdfequiv in extfiles:  #Only add .jsonld files if no equivalent .rdfa
                    extfiles.append(jf)

#            log.info("FILES: %s" % extfiles)

            file_paths = []
            for f in extfiles:
                file_paths.append(full_path(f))
            apirdflib.load_graph(i, file_paths)
            expfiles = glob.glob("data/ext/%s/*examples.txt" % i)
            read_examples(expfiles, i)

        log.info("Extensions found: %s ." % " , ".join(extfiles))


#        fnstrip_re = re.compile("\/.*")
#        for ext in extfiles:
#            ext_file_path = full_path(ext)
#            extid = ext.replace('data/ext/', '')
#            extid = re.sub(fnstrip_re,'',extid)
#            log.info("Preparing to parse extension data: %s as '%s'" % (ext_file_path, "%s" % extid))

    extensionsLoaded = True
Exemplo n.º 6
0
Arquivo: api.py Projeto: dbs/schemaorg
def read_extensions(extensions):
    global extensionsLoaded
    extfiles = []
    expfiles = []
    load_start = datetime.datetime.now()

    if not extensionsLoaded: #2nd load will throw up errors and duplicate terms
        log.info("[%s] extensions %s " % (getInstanceId(short=True),extensions))
        for i in extensions:
            all_layers[i] = "1"
            extfiles = glob.glob("data/ext/%s/*.rdfa" % i)
            jextfiles = glob.glob("data/ext/%s/*.jsonld" % i)
            for jf in jextfiles: 
                rdfequiv = jf[:-7]+".rdfa"
                if not rdfequiv in extfiles: #Only add .jsonld files if no equivalent .rdfa
                    extfiles.append(jf)

            file_paths = []
            for f in extfiles:
                file_paths.append(full_path(f))
            apirdflib.load_graph(i,file_paths)
    log.info("[%s]Loaded extension graphs in %s" % (getInstanceId(short=True),(datetime.datetime.now() - load_start)))
    extensionsLoaded = True
Exemplo n.º 7
0
def read_extensions(extensions):
    import os.path
    import glob
    import re
    global extensionsLoaded
    extfiles = []
    expfiles = []

    if not extensionsLoaded: #2nd load will throw up errors and duplicate terms
        log.info("(re)scanning for extensions %s " % extensions)
        for i in extensions:
            all_layers[i] = "1"
            extfiles = glob.glob("data/ext/%s/*.rdfa" % i)
            jextfiles = glob.glob("data/ext/%s/*.jsonld" % i)
            for jf in jextfiles: 
                rdfequiv = jf[:-7]+".rdfa"
                if not rdfequiv in extfiles: #Only add .jsonld files if no equivalent .rdfa
                    extfiles.append(jf)

#            log.info("FILES: %s" % extfiles)
            
            file_paths = []
            for f in extfiles:
                file_paths.append(full_path(f))
            apirdflib.load_graph(i,file_paths)
            expfiles = glob.glob("data/ext/%s/*examples.txt" % i)
            read_examples(expfiles,i)

        log.info("Extensions found: %s ." % " , ".join(extfiles) )
#        fnstrip_re = re.compile("\/.*")
#        for ext in extfiles:
#            ext_file_path = full_path(ext)
#            extid = ext.replace('data/ext/', '')
#            extid = re.sub(fnstrip_re,'',extid)
#            log.info("Preparing to parse extension data: %s as '%s'" % (ext_file_path, "%s" % extid))

    extensionsLoaded = True