示例#1
0
 def _get_ids_worker_by_taxid(args):
     from utils.es import ESIndexer
     from pyes import TermQuery
     es_kwargs, taxid, step = args
     q = TermQuery()
     q.add('taxid', taxid)
     q.fields = []
     q.size = step
     esi = ESIndexer(**es_kwargs)
     res = esi.conn.search(q)
     xli = [doc['_id'] for doc in res]
     assert len(xli) == res.total
     return xli
示例#2
0
文件: indexing.py 项目: dbuxton/pyes
 def testGetCountBySearch(self):
     self.conn.index({"name": "Joe Tester"}, "test-index", "test-type", 1)
     self.conn.index({"name": "Bill Baloney"}, "test-index", "test-type", 2)
     self.conn.refresh(["test-index"])
     q = TermQuery("name", "joe")
     result = self.conn.count(q, indexes=["test-index"])
     self.assertResultContains(result, {'count': 1})
示例#3
0
    def find_term(self, name, value, size=10, index=None):
        if not self.connection:
            return

        query = TermQuery(name, value)
        return self.connection.search(query=Search(query, size=size),
                                      indices=index or self.index)
示例#4
0
    def test_TermQuery(self):
        q = TermQuery("name", "joe")
        resultset = self.conn.search(query=q, indices=["test-index"])
        self.assertEquals(resultset.total, 1)

        q = TermQuery("name", "joe", 3)
        resultset = self.conn.search(query=q, indices=["test-index"])
        self.assertEquals(resultset.total, 1)

        q = TermQuery("name", "joe", "3")
        resultset = self.conn.search(query=q, indices=["test-index"])
        self.assertEquals(resultset.total, 1)

        q = TermQuery("value", 43.32)
        resultset = self.conn.search(query=q, indices=["test-index"])
        self.assertEquals(resultset.total, 1)
示例#5
0
 def test_TermQuery(self):
     q = TermQuery("parsedtext", "joe")
     result = self.conn.search(query=q, indexes="test-index")
     self.dump(result)
     result = ResultSet(result)
     self.assertEquals(result.total, 2)
     self.assertEquals(result.max_score, 0.15342641000000001)
示例#6
0
    def _get_query_type(self, column, lookup_type, db_type, value):
        if db_type == "unicode":
            if (lookup_type == "exact" or lookup_type == "iexact"):
                q = TermQuery(column, value)
                return q
            if (lookup_type == "startswith" or lookup_type == "istartswith"):
                return RegexTermFilter(column, value)
            if (lookup_type == "endswith" or lookup_type == "iendswith"):
                return RegexTermFilter(column, value)
            if (lookup_type == "contains" or lookup_type == "icontains"):
                return RegexTermFilter(column, value)
            if (lookup_type == "regex" or lookup_type == "iregex"):
                return RegexTermFilter(column, value)

        if db_type == "datetime" or db_type == "date":
            if (lookup_type == "exact" or lookup_type == "iexact"):
                return TermFilter(column, value)

        #TermFilter, TermsFilter
        if lookup_type in ["gt", "gte", "lt", "lte", "range", "year"]:
            value['field'] = column
            return RangeQuery(ESRange(**value))
        if lookup_type == "in":
            #            terms = [TermQuery(column, val) for val in value]
            #            if len(terms) == 1:
            #                return terms[0]
            #            return BoolQuery(should=terms)
            return TermsFilter(field=column, values=value)
        raise NotImplemented
示例#7
0
    def test_TermQuery(self):
        q = TermQuery("name", "joe")
        result = self.conn.search(query=q, indexes=["test-index"])
        self.assertEquals(result['hits']['total'], 1)

        q = TermQuery("name", "joe", 3)
        result = self.conn.search(query=q, indexes=["test-index"])
        self.assertEquals(result['hits']['total'], 1)

        q = TermQuery("name", "joe", "3")
        result = self.conn.search(query=q, indexes=["test-index"])
        self.assertEquals(result['hits']['total'], 1)

        q = TermQuery("value", 43.32)
        result = self.conn.search(query=q, indexes=["test-index"])
        self.assertEquals(result['hits']['total'], 1)
示例#8
0
def find_BID_in_SBN(bid, es_server="localhost:9200"):
    sbn_bid = to_iccu_bid(bid)
    q = TermQuery('codiceIdentificativo', sbn_bid)
    es_conn = ES(server=es_server)
    resultset = list(es_conn.search(query=q, indices="iccu"))
    if (len(resultset) > 0):
        return resultset
    else:
        return None
示例#9
0
 def testGetCountBySearch(self):
     self.conn.index({"name": "Joe Tester"}, self.index_name,
                     self.document_type, 1)
     self.conn.index({"name": "Bill Baloney"}, self.index_name,
                     self.document_type, 2)
     self.conn.refresh(self.index_name)
     q = TermQuery("name", "joe")
     result = self.conn.count(q, indices=self.index_name)
     self.assertResultContains(result, {'count': 1})
示例#10
0
    def post_verify_changes(self, changes):
        target = GeneDocESBackend(self)
        _timestamp = changes['timestamp']
        ts_stats = self.get_timestamp_stats()

        if changes['add'] or changes['update']:
            print('Verifying "add" and "update"...', end='')
            assert ts_stats[0][0] == _timestamp, "{} != {}".format(ts_stats[0][0], _timestamp)
            _cnt = ts_stats[0][1]
            _cnt_add_update = len(changes['add']) + len(changes['update'])
            if _cnt == _cnt_add_update:
                print('...{}=={}...OK'.format(_cnt, _cnt_add_update))
            else:
                print('...{}!={}...ERROR!!!'.format(_cnt, _cnt_add_update))
        if changes['delete']:
            print('Verifying "delete"...', end='')
            _res = target.mget_from_ids(changes['delete'])
            _cnt = len([x for x in _res if x])
            if _cnt == 0:
                print('...{}==0...OK'.format(_cnt))
            else:
                print('...{}!=0...ERROR!!!'.format(_cnt))

        print("Verifying all docs have timestamp...", end='')
        _cnt = sum([x[1] for x in ts_stats])
        _cnt_all = self.count()['count']
        if _cnt == _cnt_all:
            print('{}=={}...OK'.format(_cnt, _cnt_all))
        else:
            print('ERROR!!!\n\t Should be "{}", but get "{}"'.format(_cnt_all, _cnt))

        print("Verifying all new docs have updated timestamp...")
        ts = time.mktime(_timestamp.utctimetuple())
        ts = ts - 8 * 3600    # convert to utc timestamp, here 8 hour difference is hard-coded (PST)
        ts = int(ts * 1000)
        q = TermQuery()
        q.add('_timestamp', ts)
        cur = self.doc_feeder(query=q, fields=[], step=10000)
        _li1 = sorted(changes['add'] + [x['_id'] for x in changes['update']])
        _li2 = sorted([x['_id'] for x in cur])
        if _li1 == _li2:
            print("{}=={}...OK".format(len(_li1), len(_li2)))
        else:
            print('ERROR!!!\n\t Should be "{}", but get "{}"'.format(len(_li1), len(_li2)))
示例#11
0
 def test_TermQuery(self):
     q = TermQuery("uuid", "1").search(fields=[
         'attachment', 'attachment.author', 'attachment.title',
         'attachment.date'
     ])
     #        q = TermQuery("uuid", "1", fields=['*'])
     resultset = self.conn.search(query=q, indices=self.index_name)
     self.assertEquals(resultset.total, 1)
     self.assertEquals(resultset.hits[0]['fields']['attachment.author'],
                       u'Tika Developers')
示例#12
0
 def all_as_schedule(self):
     """
     Get the current schedule comprising entries built from Elastic data.
     """
     self.logger.debug("ControlPlaneScheduler: Fetching database schedule")
     entries = {}
     for svc in self._elastic.search(TermQuery("_type", "service")):
         for task in svc.Tasks or ():
             entry = self.Entry(svc_model=svc, task_model=task)
             entries[entry.name] = entry
     return entries
示例#13
0
 def test_TermQuery(self):
     q = TermQuery("uuid", "1").search(fields=[
         'attachment', 'attachment.author', 'attachment.title',
         'attachment.date'
     ])
     #        q = TermQuery("uuid", "1", fields=['*'])
     result = self.conn.search(query=q, indexes=["test-index"])
     self.assertEquals(result['hits']['total'], 1)
     self.assertEquals(
         result['hits']['hits'][0]['fields']['attachment.author'],
         u'Tika Developers')
示例#14
0
    def test_es_sample_data(self):
        print "\TestESPyes.test_es_sample_data"
        print """LOAD SAMPLE DOCS:\n"""

        resp = self.load_sample('contacts_es')
        assert resp['status'] == 200

        sample_docs = resp['response']['docs']

        document_type = 'Cnt'
        es = self.es
        index_name = es.__dict__['index_name']
        es.delete_index_if_exists(index_name)
        es.create_index(index_name)

        es.put_mapping(document_type, {'properties': models.esCnt},
                       [index_name])

        for doc in sample_docs:
            es.index(
                {
                    "dNam": doc['dNam'],
                    "oOn": doc['oOn'],
                    "prefix": doc['prefix'],
                    "parsedtext": doc['dNam']
                }, index_name, doc['_c'], doc['_id'].__str__())

        es.default_indices = [index_name]
        # es.refresh(index_name)
        time.sleep(1)
        q = TermQuery("prefix", "dr")
        results = es.search(query=q)
        for r in results:
            assert r.prefix == 'Dr'

        q = TermQuery("oOn", datetime.datetime(2012, 10, 8, 13, 44, 33,
                                               851000))
        results = es.search(query=q)
        for r in results:
            assert r.dNam == 'Einstein, Mr Larry Wayne'
    def test_delete(self):
        '''
        Test if records are deleted from remove elastic server
        '''
        with Transaction().start(DB_NAME, USER, context=CONTEXT):
            self.create_defaults()
            users = self.create_users()
            self.assertEqual(len(self.IndexBacklog.search([])), 2)
            self.IndexBacklog.update_index()
            self.assertEqual(len(self.IndexBacklog.search([])), 0)

            time.sleep(2)  # wait for changes to reach search server
            conn = self.IndexBacklog._get_es_connection()
            result = conn.search(query=TermQuery('rec_name', 'testuser'))
            self.assertEqual(len(result), 1)

            self.User.delete(users)
            self.assertEqual(len(self.IndexBacklog.search([])), 2)
            self.IndexBacklog.update_index()
            time.sleep(2)  # wait for changes to reach search server
            result = conn.search(query=TermQuery('rec_name', 'testuser'))
            self.assertEqual(len(result), 0)
示例#16
0
 def test_force(self):
     self.conn.raise_on_bulk_item_failure = False
     self.conn.index(
         {
             "name": "Joe Tester",
             "parsedtext": "Joe Testere nice guy",
             "uuid": "11111",
             "position": 1
         },
         self.index_name,
         self.document_type,
         1,
         bulk=True)
     self.conn.index(
         {
             "name": "Bill Baloney",
             "parsedtext": "Bill Testere nice guy",
             "uuid": "22222",
             "position": 2
         },
         self.index_name,
         self.document_type,
         2,
         bulk=True)
     self.conn.index(
         {
             "name": "Bill Clinton",
             "parsedtext": """Bill is not 
             nice guy""",
             "uuid": "33333",
             "position": 3
         },
         self.index_name,
         self.document_type,
         3,
         bulk=True)
     bulk_result = self.conn.force_bulk()
     self.assertEquals(len(bulk_result['items']), 3)
     self.conn.refresh(self.index_name)
     q = TermQuery("name", "bill")
     resultset = self.conn.search(query=q, indices=self.index_name)
     self.assertEquals(resultset.total, 2)
示例#17
0
    def test_initial(self):
        es              = self.es
        es_index_name   = es.__dict__['index_name']
        generic = controllers.Generic(self.db, es)

        args    = {
            'class_name': 'Usr',
            'docs': [{
                "uNam"  :"jkutz", "fNam"  :"Josh", "lNam"  :"Kutz", "gen"   :'m', "emails": [{"email" : "*****@*****.**"}]
            }]
        }
        rs           = generic.post(**args)
        assert rs['status'] == 200 and rs['response']['total_inserted'] == 1
        doc = rs['response']['docs'][0]['doc']


        #time.sleep(1)
        es.refresh(es_index_name)
        q = TermQuery("dNam", "josh")
        results = es.search(query = q)

        x=0
示例#18
0
def import_prov(conn, index, alias, prov_es_json):
    """Index PROV-ES concepts into ElasticSearch."""

    # fix hadMember ids
    fix_hadMember_ids(prov_es_json)
    #print(json.dumps(prov_es_json, indent=2))

    # import
    prefix = prov_es_json['prefix']
    for concept in prov_es_json:
        if concept == 'prefix': continue
        elif concept == 'bundle':
            for bundle_id in prov_es_json['bundle']:
                try:
                    found = len(
                        conn.search(query=TermQuery("_id", bundle_id),
                                    indices=[alias]))
                except SearchPhaseExecutionException:
                    found = 0
                if found > 0: continue
                bundle_prov = copy.deepcopy(prov_es_json['bundle'][bundle_id])
                bundle_prov['prefix'] = prefix
                bundle_doc = {
                    'identifier': bundle_id,
                    'prov_es_json': bundle_prov,
                }
                for b_concept in bundle_prov:
                    if b_concept == 'prefix': continue
                    bundle_doc[b_concept] = []
                    for i in bundle_prov[b_concept]:
                        doc = copy.deepcopy(bundle_prov[b_concept][i])
                        prov_doc = copy.deepcopy(doc)
                        doc['identifier'] = i
                        doc['prov_es_json'] = {'prefix': prefix}
                        doc['prov_es_json'].setdefault(b_concept,
                                                       {})[i] = prov_doc
                        if 'prov:type' in doc and isinstance(
                                doc['prov:type'], types.DictType):
                            doc['prov:type'] = doc['prov:type'].get('$', '')
                        try:
                            found = len(
                                conn.search(query=TermQuery("_id", i),
                                            indices=[alias]))
                        except SearchPhaseExecutionException:
                            found = 0
                        if found > 0: pass
                        else: conn.index(doc, index, b_concept, i)
                        bundle_doc[b_concept].append(i)
                conn.index(bundle_doc, index, 'bundle', bundle_id)
        else:
            for i in prov_es_json[concept]:
                try:
                    found = len(
                        conn.search(query=TermQuery("_id", i),
                                    indices=[alias]))
                except SearchPhaseExecutionException:
                    found = 0
                if found > 0: continue
                docs = prov_es_json[concept][i]
                if not isinstance(docs, types.ListType): docs = [docs]
                for doc in docs:
                    prov_doc = copy.deepcopy(doc)
                    doc['identifier'] = i
                    doc['prov_es_json'] = {'prefix': prefix}
                    doc['prov_es_json'].setdefault(concept, {})[i] = prov_doc
                    if 'prov:type' in doc and isinstance(
                            doc['prov:type'], types.DictType):
                        doc['prov:type'] = doc['prov:type'].get('$', '')
                    conn.index(doc, index, concept, i)
示例#19
0
def check_index_version():
    q = TermQuery('dtc', 'dtc')
    conn.search(query=q, indices=e_index, doc_types="dtc").total
示例#20
0
def get_pubs():
    q = TermQuery('expired', 'true')
    f = NotFilter(HasChildFilter('opinion', TermQuery('opinion', 'like')))
    q = FilteredQuery(q, f)
    pubs = conn.search(query=q, indices=e_index, doc_types="immo")
    return pubs
示例#21
0
 def test_Update(self):
     q = TermQuery("name", "joe")
     result = self.conn.reindex(query=q, indexes=["test-index"])
     from pprint import pprint
     pprint(result)
     self.assertEquals(result['hits']['total'], 2)
示例#22
0
 def test_TermQuery(self):
     q = TermQuery("name", "joe")
     resultset = self.conn.search(query=q, indices=self.index_name)
     self.assertEquals(resultset.total, 1)
     hit = resultset[0]
     self.assertEquals(hit.inserted, datetime(2010, 10, 22, 12, 12, 12))
示例#23
0
 def test_TermQuery(self):
     q = TermQuery("name", "joe")
     result = self.conn.search(query=q, indexes=["test-index"])
     self.assertEquals(result['hits']['total'], 1)
     self.assertEquals(result['hits']['hits'][0]['_source']['inserted'],
                       datetime(2010, 10, 22, 12, 12, 12))
示例#24
0
def import_instruments(instrs, es_url, index, alias):
    """Create JSON ES docs and import."""

    prefix = {
        "bibo": "http://purl.org/ontology/bibo/",
        "dcterms": "http://purl.org/dc/terms/",
        "eos": "http://nasa.gov/eos.owl#",
        "gcis": "http://data.globalchange.gov/gcis.owl#",
        "hysds": "http://hysds.jpl.nasa.gov/hysds/0.1#",
        "info": "http://info-uri.info/",
        "xlink": "http://www.w3.org/1999/xlink"
    }

    conn = ES(es_url)
    if not conn.indices.exists_index(index):
        conn.indices.create_index(index)

    # track agencies/organizations
    orgs = {}

    for instr in instrs:
        identifier = "eos:%s" % instr['Instrument Name Short']
        id = hashlib.md5(identifier).hexdigest()
        if 'Instrument Technology' in instr and not EMPTY.search(
                instr['Instrument Technology']):
            sensor = "eos:%s" % instr['Instrument Technology']
        else:
            if 'Instrument Type' in instr and not EMPTY.search(
                    instr['Instrument Type']):
                sensor = "eos:%s" % instr['Instrument Type']
            else:
                if 'Subtype' in instr and not EMPTY.search(instr['Subtype']):
                    sensor = "eos:%s" % instr['Subtype']
                else:
                    if 'Type' in instr and not EMPTY.search(instr['Type']):
                        sensor = "eos:%s" % instr['Type']
                    else:
                        if 'Class' in instr and not EMPTY.search(
                                instr['Class']):
                            sensor = "eos:%s" % instr['Class']
                        else:
                            sensor = None
        #print(instr['Instrument Technology'], sensor)
        platform = None
        if 'Instrument Agencies' in instr and not EMPTY.search(
                instr['Instrument Agencies']):
            org = "eos:%s" % instr['Instrument Agencies']
            if org not in orgs:
                orgs[org] = {
                    "prov_es_json": {
                        "prefix": prefix,
                        "agent": {
                            org: {
                                "prov:type": {
                                    "type": "prov:QualifiedName",
                                    "$": "prov:Organization",
                                },
                            },
                        },
                    },
                    "identifier": org,
                    "prov:type": "prov:Organization",
                }
                if len(
                        conn.search(query=TermQuery("_id", org),
                                    indices=[alias])) > 0:
                    pass
                else:
                    conn.index(orgs[org], index, 'agent', org)
        else:
            org = None
        doc = {
            "prov_es_json": {
                "prefix": prefix,
                "entity": {
                    identifier: {
                        "gcis:hasSensor": sensor,
                        "gcis:inPlatform": platform,
                        "prov:type": "eos:instrument",
                        "gcis:hasGoverningOrganization": org,
                    },
                },
            },
            "gcis:hasSensor": sensor,
            "gcis:inPlatform": platform,
            "prov:type": "eos:instrument",
            "gcis:hasGoverningOrganization": org,
            "identifier": identifier,
        }
        if len(conn.search(query=TermQuery("_id", identifier),
                           indices=[alias])) > 0:
            pass
        else:
            conn.index(doc, index, 'entity', identifier)
示例#25
0
 def test_TermQuery(self):
     q = TermQuery("name", "bill")
     result = self.conn.search(query=q, indexes=["test-index"])
     self.assertEquals(result['hits']['total'], 2)
示例#26
0
文件: test_bulk.py 项目: diN0bot/pyes
 def test_TermQuery(self):
     q = TermQuery("name", "bill")
     resultset = self.conn.search(query=q, indices=self.index_name)
     self.assertEquals(resultset.total, 2)