Exemplo n.º 1
0
class Session:
    def __init__(self, args, loadOnly):
        with open(args) as f:
            self.conf = yaml.load(f)
        self.loadOnly = loadOnly
        es_hosts = self.conf.get("elasticsearch")
        print("###")
        print("# SPARQL endpoint: " + self.conf["sparql"]["uri"])
        print("# ElasticSearch: %s" % es_hosts)
        print("###")
        self.es = Elasticsearch(es_hosts)

        self.urlOpener = FancyURLopener()
        self.urlOpener.addheader(
            "Accept", "application/sparql-results+json, applicaton/json;q=0.1")

    def uri_to_qname(self, uri):
        for p, u in self.conf.get("prefixes", {}).items():
            if uri.startswith(u):
                return uri.replace(u, p + ":", 1)
        return uri

    def sparql_prefixes(self):
        sparql = []
        for p, u in self.conf.get("prefixes", {}).items():
            sparql.append("PREFIX %s: <%s>" % (p, u))
        return "\n".join(sparql)

    def run(self):
        for index in self.conf["indexes"]:
            if not self.loadOnly:
                try:
                    res = self.es.indices.delete(index=index, ignore=404)
                    print("Delete index " + index + ", response : '%s'" %
                          (res))
                    settings = {
                        "settings": {
                            "number_of_shards": 1,
                            "analysis": {
                                "filter": {
                                    "autocomplete_filter": {
                                        "type": "edge_ngram",
                                        "min_gram": 3,
                                        "max_gram": 20
                                    }
                                },
                                "analyzer": {
                                    "autocomplete": {
                                        "type":
                                        "custom",
                                        "tokenizer":
                                        "standard",
                                        "filter":
                                        ["lowercase", "autocomplete_filter"]
                                    }
                                }
                            }
                        },
                        "mappings": {
                            "compound": {
                                "properties": {
                                    "label": {
                                        "type": "string",
                                        "analyzer": "autocomplete",
                                        "search_analyzer": "standard"
                                    },
                                    "title": {
                                        "type": "string",
                                        "analyzer": "autocomplete",
                                        "search_analyzer": "standard"
                                    },
                                    "Synonym": {
                                        "type": "string",
                                        "analyzer": "autocomplete",
                                        "search_analyzer": "standard"
                                    },
                                    "brand_name": {
                                        "type": "string",
                                        "analyzer": "autocomplete",
                                        "search_analyzer": "standard"
                                    },
                                    "Definition": {
                                        "type": "string",
                                        "analyzer": "autocomplete",
                                        "search_analyzer": "standard"
                                    }
                                }
                            }
                        }
                    }
                    res = self.es.indices.create(index=index, body=settings)
                    print("Create index " + index + ", response: '%s'" % (res))
                except NotFoundError:
                    pass
                for doc_type in self.conf["indexes"][index]:
                    print("index " + index)
                    indexer = Indexer(self, index, doc_type)
                    ## TODO: Store mapping for JSON-LD
                    indexer.load()

    def dryrun(self):
        for index in self.conf["indexes"]:
            for doc_type in self.conf["indexes"][index]:
                print("## index/type:", index, doc_type)
                indexer = Indexer(self, index, doc_type)
                # below should print the sparql
                indexer.sparql()

    def check(self):
        self.check_prefixes()
        self.check_required_properties()

    def expand_qname(self, p):
        if not ":" in p:
            raise Exception("Invalid property, no prefix: " + p)
        prefix, rest = p.split(":", 1)
        if not prefix in self.conf.get("prefixes", {}):
            raise Exception("Unknown prefix: " + prefix)
        base = self.conf.get("prefixes")[prefix]
        return base + rest

    def check_property(self, p):
        if type(p) == str:
            urlparse(self.expand_qname(p))
        else:
            ## Assume it is dict-based - check they are all non-empty
            if not p.get("sparql"):
                raise Exception("'sparql' missing for %s" % p)
            if not p.get("variable"):
                raise Exception("'variable' missing for %s" % p)
            if not p.get("jsonld"):
                raise Exception("'jsonld' missing for %s" % p)

    def check_required_properties(self):
        # Check that every index+type have at least one
        # required triple (rdf:type or a property)

        for p in self.conf.get("common_properties", []):
            if type(p) != str and is_property_required(p):
                return  # Great! required for every index

        # if not, we'll need to check each index+type
        for index, index_conf in self.conf["indexes"].items():
            for doc_type, type_conf in index_conf.items():
                if "type" in type_conf:
                    continue  # OK
                if filter(is_property_required,
                          type_conf.get("properties", [])):
                    continue  # OK
                raise Exception(
                    "No type: or property with required:true for %s %s" %
                    (index, doc_type))

    def check_prefixes(self):
        for uri in self.conf.get("prefixes", {}).values():
            if not (uri.endswith("#") or uri.endswith("/")):
                # This should catch prefix definitions not ending with / #
                print("WARNING: Prefix doesn't end with / or #: %s" % uri,
                      file=sys.stderr)
        for p in self.conf.get("common_properties", []):
            self.check_property(p)
        for index, index_conf in self.conf["indexes"].items():
            for doc_type, type_conf in index_conf.items():
                if "type" in type_conf:
                    urlparse(self.expand_qname(type_conf["type"]))
                for p in type_conf.get("properties", []):
                    self.check_property(p)
Exemplo n.º 2
0
def get_month_data(month, cookie, token):
    params = 'date={}&_token={}'.format(month, token)
    opener = FancyURLopener()
    opener.addheader('Cookie', cookie)
    stream = opener.open(APIURL, params)
    return stream.read().decode('utf-8')