Exemplo n.º 1
0
 def flush(docs):
     print('Flushing {} movies'.format(len(docs)))
     resp = requests.post('{}/{}/update?commitWithin=1500'.format(
         self.solr_base_ep, index),
                          json=docs)
     resp_msg(msg="Done", resp=resp)
     docs.clear()
Exemplo n.º 2
0
 def create_index(self, index):
     """ Take the local config files for Elasticsearch for index, reload them into ES"""
     cfg_json_path = os.path.join(self.configs_dir, "%s_settings.json" % index)
     with open(cfg_json_path) as src:
         settings = json.load(src)
         resp = self.es.indices.create(index, body=settings)
         resp_msg(msg="Created index {}".format(index), resp=ElasticResp(resp))
Exemplo n.º 3
0
 def create_featureset(self, index, name, ftr_config):
     self.validate_featureset(name, ftr_config)
     resp = requests.put('{}/{}/schema/feature-store'.format(
         self.solr_base_ep, index, name),
                         json=ftr_config)
     resp_msg(msg='Created {} feature store under {}:'.format(name, index),
              resp=resp)
Exemplo n.º 4
0
    def submit_model(self, featureset, index, model_name, solr_model):
        url = '{}/{}/schema/model-store'.format(self.solr_base_ep, index)
        resp = requests.delete('{}/{}'.format(url, model_name))
        resp_msg(msg='Deleted Model {}'.format(model_name), resp=resp)

        resp = requests.put(url, json=solr_model)
        resp_msg(msg='Created Model {}'.format(model_name), resp=resp)
Exemplo n.º 5
0
    def model_query(self, index, model, model_params, query):
        params = {
            "query": query,
            "rescore": {
                "window_size": 1000,
                "query": {
                    "rescore_query": {
                        "sltr": {
                            "params": model_params,
                            "model": model
                        }
                    }
                }
            },
            "size": 1000
        }

        resp = self.es.search(index, body=params)
        resp_msg(msg="Searching {} - {}".format(index, str(query)[:20]), resp=SearchResp(resp))

        # Transform to consistent format between ES/Solr
        matches = []
        for hit in resp['hits']['hits']:
            matches.append(hit['_source'])

        return matches
Exemplo n.º 6
0
 def create_index(self, index, settings):
     params = {
         'action': 'CREATE',
         'name': index,
         'configSet': 'tmdb'
     }
     resp = requests.get('{}/admin/cores?'.format(self.solr_base_ep), params=params)
     resp_msg(msg="Created index {}".format(index), resp=resp)
Exemplo n.º 7
0
 def reset_ltr(self, index):
     resp = requests.delete(self.elastic_ep)
     resp_msg(msg="Removed Default LTR feature store".format(),
              resp=resp,
              throw=False)
     resp = requests.put(self.elastic_ep)
     resp_msg(msg="Initialize Default LTR feature store".format(),
              resp=resp)
Exemplo n.º 8
0
    def submit_model(self, featureset, index, model_name, model_payload):
        model_ep = "{}/_model/".format(self.elastic_ep)
        create_ep = "{}/_featureset/{}/_createmodel".format(self.elastic_ep, featureset)

        resp = requests.delete('{}{}'.format(model_ep, model_name))
        print('Delete model {}: {}'.format(model_name, resp.status_code))

        resp = requests.post(create_ep, json=model_payload)
        resp_msg(msg="Created Model {}".format(model_name), resp=resp)
Exemplo n.º 9
0
    def reset_ltr(self):
        models = ['classic', 'genre', 'latest', 'title', 'title_fuzzy']
        for model in models:
            resp = requests.delete('{}/tmdb/schema/model-store/{}'.format(self.solr_base_ep, model))
            resp_msg(msg='Deleted {} model'.format(model), resp=resp)

        stores = ['_DEFAULT', 'genre', 'release', 'title', 'title_fuzzy']
        for store in stores:
            resp = requests.delete('{}/tmdb/schema/feature-store/{}'.format(self.solr_base_ep, store))
            resp_msg(msg='Deleted {} Featurestore'.format(store), resp=resp)
Exemplo n.º 10
0
    def reset_ltr(self, index):
        models = self.get_models(index)
        for model in models:
            resp = requests.delete('{}/{}/schema/model-store/{}'.format(self.solr_base_ep, index, model))
            resp_msg(msg='Deleted {} model'.format(model), resp=resp)

        stores = self.get_feature_stores(index)
        for store in stores:
            resp = requests.delete('{}/{}/schema/feature-store/{}'.format(self.solr_base_ep, index, store))
            resp_msg(msg='Deleted {} Featurestore'.format(store), resp=resp)
Exemplo n.º 11
0
    def delete_index(self, index):
        params = {
            'action': 'UNLOAD',
            'core': index,
            'deleteIndex': 'true',
            'deleteDataDir': 'true',
            'deleteInstanceDir': 'true'
        }

        resp = requests.get('{}/admin/cores?'.format(self.solr_base_ep), params=params)
        resp_msg(msg="Deleted index {}".format(index), resp=resp, throw=False)
Exemplo n.º 12
0
 def create_index(self, index):
     # Presumes there is a link between the docker container and the 'index'
     # directory under docker/solr/ (ie docker/solr/tmdb/ is linked into
     # Docker container configsets)
     params = {
         'action': 'CREATE',
         'name': index,
         'configSet': index,
     }
     resp = requests.get('{}/admin/cores?'.format(self.solr_base_ep), params=params)
     resp_msg(msg="Created index {}".format(index), resp=resp)
Exemplo n.º 13
0
    def model_query(self, index, model, model_params, query):
        url = '{}/{}/select?'.format(self.solr_base_ep, index)
        params = {
            'q': query,
            'rq': '{{!ltr model={}}}'.format(model),
            'rows': 10000
        }

        resp = requests.post(url, data=params)
        resp_msg(msg='Search keywords - {}'.format(query), resp=resp)
        return resp.json()['response']['docs']
Exemplo n.º 14
0
    def log_query(self, index, featureset, ids, params={}):
        params = {
            "query": {
                "bool": {
                    "filter": [
                        {
                            "sltr": {
                                "_name": "logged_features",
                                "featureset": featureset,
                                "params": params
                            }
                        }
                    ]
                }
            },
            "ext": {
                "ltr_log": {
                    "log_specs": {
                        "name": "ltr_features",
                        "named_query": "logged_features"
                    }
                }
            },
            "size": 1000
        }

        terms_query = [
            {
                "terms": {
                    "_id": ids
                }
            }
        ]

        if ids is not None:
            params["query"]["bool"]["must"] = terms_query

        resp = self.es.search(index, body=params)
        resp_msg(msg="Searching {} - {}".format(index, str(terms_query)[:20]), resp=SearchResp(resp))

        matches = []
        for hit in resp['hits']['hits']:
            hit['_source']['ltr_features'] = []

            for feature in hit['fields']['_ltrlog'][0]['ltr_features']:
                value = 0.0
                if 'value' in feature:
                    value = feature['value']

                hit['_source']['ltr_features'].append(value)

            matches.append(hit['_source'])

        return matches
Exemplo n.º 15
0
    def query(self, index, query):
        resp = self.es.search(index, body=query)
        resp_msg(msg="Searching {} - {}".format(index, str(query)[:20]), resp=SearchResp(resp))

        # Transform to consistent format between ES/Solr
        matches = []
        for hit in resp['hits']['hits']:
            hit['_source']['_score'] = hit['_score']
            matches.append(hit['_source'])

        return matches
Exemplo n.º 16
0
    def index_documents(self, index, movie_source):

        def bulkDocs(movie_source):
            for movie in movie_source:
                addCmd = {"_index": index,
                          "_type": "movie",
                          "_id": movie['id'],
                          "_source": movie}
                yield addCmd

        resp = elasticsearch.helpers.bulk(self.es, bulkDocs(movie_source), chunk_size=100)
        resp_msg(msg="Streaming Bulk index DONE {}".format(index), resp=BulkResp(resp))
Exemplo n.º 17
0
    def query(self, index, query):
        url = '{}/{}/select?'.format(self.solr_base_ep, index)

        resp = requests.post(url, data=query)
        resp_msg(msg='Query {}...'.format(str(query)[:10]), resp=resp)
        resp = resp.json()

        # Transform to be consistent
        for doc in resp['response']['docs']:
            if 'score' in doc:
                doc['_score'] = doc['score']

        return resp['response']['docs']
Exemplo n.º 18
0
    def feature_set(self, index, name):
        resp = requests.get('{}/{}/schema/feature-store/{}'.format(
            self.solr_base_ep, index, name))
        resp_msg(msg='Feature Set {}...'.format(name), resp=resp)

        response = resp.json()

        rawFeatureSet = response['features']

        mapping = []
        for feature in response['features']:
            mapping.append({'name': feature['name']})

        return mapping, rawFeatureSet
Exemplo n.º 19
0
    def index_documents(self, index, doc_type, doc_src):

        def bulkDocs(doc_src):
            for doc in doc_src:
                if 'id' not in doc:
                    raise ValueError("Expecting docs to have field 'id' that uniquely identifies document")
                addCmd = {"_index": index,
                          "_type": doc_type,
                          "_id": doc['id'],
                          "_source": doc}
                yield addCmd

        resp = elasticsearch.helpers.bulk(self.es, bulkDocs(doc_src), chunk_size=100)
        resp_msg(msg="Streaming Bulk index DONE {}".format(index), resp=BulkResp(resp))
Exemplo n.º 20
0
    def feature_set(self, index, name):
        resp = requests.get('{}/_featureset/{}'.format(self.elastic_ep, name))

        jsonResp = resp.json()
        if not jsonResp['found']:
            raise RuntimeError("Unable to find {}".format(name))

        resp_msg(msg="Fetched FeatureSet {}".format(name), resp=resp)

        rawFeatureSet = jsonResp['_source']['featureset']['features']

        mapping = []
        for feature in rawFeatureSet:
            mapping.append({'name': feature['name']})

        return mapping, rawFeatureSet
Exemplo n.º 21
0
    def submit_model(self, featureset, index, model_name, model_payload):
        model_ep = "{}/_model/".format(self.elastic_ep)
        create_ep = "{}/_featureset/{}/_createmodel".format(self.elastic_ep, featureset)

        resp = requests.delete('{}{}'.format(model_ep, model_name))
        print('Delete model {}: {}'.format(model_name, resp.status_code))

        params = {
            'model': {
                'name': model_name,
                'model': {
                    'type': 'model/ranklib',
                    'definition': model_payload
                }
            }
        }

        resp = requests.post(create_ep, json=params)
        resp_msg(msg="Created Model {}".format(model_name), resp=resp)
Exemplo n.º 22
0
    def submit_ranklib_model(self, featureset, index, model_name,
                             model_payload):
        """ Submits a Ranklib model, converting it to Solr representation """
        resp = requests.get('{}/{}/schema/feature-store/{}'.format(
            self.solr_base_ep, index, featureset))
        resp_msg(msg='Submit Model {} Ftr Set {}'.format(
            model_name, featureset),
                 resp=resp)
        metadata = resp.json()
        features = metadata['features']

        feature_dict = {}
        for idx, value in enumerate(features):
            feature_dict[idx + 1] = value['name']

        feature_mapping, _ = self.feature_set(index, featureset)

        solr_model = convert(model_payload, model_name, featureset,
                             feature_mapping)
        self.submit_model(featureset, index, model_name, solr_model)
Exemplo n.º 23
0
    def log_query(self, index, featureset, ids, options={}, id_field='id'):
        efi_options = []
        for key, val in options.items():
            efi_options.append('efi.{}="{}"'.format(key, val))

        efi_str = ' '.join(efi_options)

        if ids == None:
            query = "*:*"
        else:
            query = "{{!terms f={}}}{}".format(id_field, ','.join(ids))
            print(query)

        params = {
            'fl': '{},[features store={} {}]'.format(id_field, featureset,
                                                     efi_str),
            'q': query,
            'rows': 1000,
            'wt': 'json'
        }
        resp = requests.post('{}/{}/select'.format(self.solr_base_ep, index),
                             data=params)
        resp_msg(msg='Searching {}'.format(index), resp=resp)
        resp = resp.json()

        def parseFeatures(features):
            fv = []

            all_features = features.split(',')

            for feature in all_features:
                elements = feature.split('=')
                fv.append(float(elements[1]))

            return fv

        # Clean up features to consistent format
        for doc in resp['response']['docs']:
            doc['ltr_features'] = parseFeatures(doc['[features]'])

        return resp['response']['docs']
Exemplo n.º 24
0
    def submit_model(self, featureset, model_name, model_payload):
        # Fetch feature metadata
        resp = requests.get('{}/tmdb/schema/feature-store/{}'.format(
            self.solr_base_ep, featureset))
        resp_msg(msg='Submit Model {} Ftr Set {}'.format(
            model_name, featureset),
                 resp=resp)
        metadata = resp.json()
        features = metadata['features']

        feature_dict = {}
        for idx, value in enumerate(features):
            feature_dict[idx + 1] = value['name']

        feature_mapping, _ = self.feature_set('tmdb', featureset)

        solr_model = convert(model_payload, model_name, featureset,
                             feature_mapping)

        url = '{}/tmdb/schema/model-store'.format(self.solr_base_ep)
        resp = requests.delete('{}/{}'.format(url, model_name))
        resp_msg(msg='Deleted Model {}'.format(model_name), resp=resp)

        resp = requests.put(url, json=solr_model)
        resp_msg(msg='Created Model {}'.format(model_name), resp=resp)
Exemplo n.º 25
0
 def create_featureset(self, index, name, ftr_config):
     resp = requests.post('{}/_featureset/{}'.format(self.elastic_ep, name),
                          json=ftr_config)
     resp_msg(msg="Create {} feature set".format(name), resp=resp)
Exemplo n.º 26
0
 def create_index(self, index, settings):
     resp = self.es.indices.create(index, body=settings)
     resp_msg(msg="Created index {}".format(index), resp=ElasticResp(resp))
Exemplo n.º 27
0
 def commit():
     resp = requests.get('{}/{}/update?commit=true'.format(
         self.solr_base_ep, index))
     resp_msg(msg="Committed index {}".format(index), resp=resp)
Exemplo n.º 28
0
 def create_index(self, index):
     """ Take the local config files for Elasticsearch for index, reload them into ES"""
     with open("docker/elasticsearch/%s_settings.json" % index) as src:
         settings = json.load(src)
         resp = self.es.indices.create(index, body=settings)
         resp_msg(msg="Created index {}".format(index), resp=ElasticResp(resp))
Exemplo n.º 29
0
 def delete_index(self, index):
     resp = self.es.indices.delete(index=index, ignore=[400, 404])
     resp_msg(msg="Deleted index {}".format(index),
              resp=ElasticResp(resp),
              throw=False)
Exemplo n.º 30
0
 def flush(docs):
     resp = self.solr.post('{}/{}/update'.format(
         self.solr_base_ep, index),
                           json=docs)
     resp_msg(msg="{} Docs Sent".format(len(docs)), resp=resp)