Пример #1
0
 def get(self, loop=None):
     if self._conn is None:
         self._conn = Elasticsearch(
             loop=loop,
             **app_settings.get("elasticsearch",
                                {}).get("connection_settings"),
         )
     return self._conn
Пример #2
0
 async def setSingleCache(self, cacheModel: EsCacheModel) -> bool:
     """
     写入单条缓存数据
     classStr:类名称
     phoneStr:手机号码
     dataType:缓存数据类型
     month:日期(格式:yyyyMM)
     page:页码
     maxPages:总页数
     dataItems:缓存数据
     返回:是否成功写入
     """
     _item = cacheModel
     try:
         async with Elasticsearch([{
                 'host': config[ELK.ELK_HOST.value],
                 'port': config[ELK.ELK_PORT.value],
                 "timeout": 360000
         }]) as es:
             await es.index(index=self.esindex,
                            body=_item.getDict(),
                            doc_type=self.estype,
                            id=_item.getId())
         return True
     except Exception:
         traceback.print_exc()
         return False
Пример #3
0
async def add_elk(token,
                  date_item,
                  index=config.ELK_INDEX,
                  doc_type='dataitem'):
    try:
        date_item.update({
            '@timestamp':
            (datetime.datetime.now() +
             relativedelta(hours=-8)).strftime('%Y-%m-%dT%H:%M:%S.000Z')
        })
        async with Elasticsearch([{
                'host':
                config.ELK_HOST,
                'port':
                config.ELK_PORT,
                'http_auth': (config[ELK.USR.value], config[ELK.PWD.value]),
                "timeout":
                360000
        }]) as es_client:
            await es_client.index(index=index,
                                  body=date_item,
                                  doc_type=doc_type)
    except:
        import traceback
        print(traceback.format_exc())
        await log(token,
                  status_code=diy.DIY_STATUS.value,
                  message=f'存入elk报错',
                  iselk=False)
Пример #4
0
async def search(request):
    es = Elasticsearch()

    q = request.query.get('q')
    try:
        limit = int(request.query.get('limit', 0))
        offset = int(request.query.get('offset', 0))
    except:
        return json_response({'response': 'wrong query'})

    body = {}
    if q:
        body['query'] = {'match': {'text': q}}

    async with Scan(
            es,
            index=index_name,
            doc_type='crawler',
            query=body,
    ) as scan_res:
        res_source, count = await format_search(scan_res, limit, offset)
        text = {
            'total_hits': count,
            'count': len(res_source),
            'results': res_source
        }
        return json_response(text)
Пример #5
0
 async def getAllCache(self, classStr: str, phoneStr: str, dataType: FLAG,
                       monthRange: List[str]) -> List[EsCacheModel]:
     """
     根据指定条件获得所有缓存
     classStr:类名称
     phoneStr:手机号码
     dataType:缓存数据类型
     monthRange:日期数组
     返回类型:list(EsCacheModel)
     """
     result = []
     monthRange = [f"month:{month}" for month in monthRange]
     query = 'apiClass:{} AND phoneNo:{} AND dataType:{} AND ({})'.format(
         classStr, phoneStr, dataType.value, ' OR '.join(monthRange))
     async with Elasticsearch([{
             'host': config[ELK.ELK_HOST.value],
             'port': config[ELK.ELK_PORT.value],
             "timeout": 360000
     }]) as es:
         r = await es.search(index=self.esindex,
                             doc_type=self.estype,
                             q=query)
     r = ExObject(r)
     for item in r["?hits"]["?hits"]:
         _ec = EsCacheModel()
         _ec.apiClass = item["?_source"]["?apiClass"].ToString()
         _ec.month = item["?_source"]["?month"].ToString()
         _ec.phoneNo = item["?_source"]["?phoneNo"].ToString()
         _ec.dataType = item["?_source"]["?dataType"].ToString()
         _ec.page = item["?_source"]["?page"].ToOriginal()
         _ec.maxPages = item["?_source"]["?maxPages"].ToOriginal()
         _ec.updateTime = item["?_source"]["?updateTime"].ToOriginal()
         _ec.items = item["?_source"]["?items"].ToOriginal()
         result.append(_ec)
     return result
Пример #6
0
async def aioes_request(request):
    es = Elasticsearch(data['elasticsearch']['hosts'])
    query = request.rel_url.query
    if 'q' in query:
        q = query['q']
    else:
        return []
    try:
        limit = int(query['limit']) if 'limit' in query else 10
    except ValueError:
        limit = 10
    try:
        offset = int(query['offset']) if 'offset' in query else 0
    except ValueError:
        offset = 0

    res = await es.search(index='site.docs.python.org',
                          body={
                              'query': {
                                  'match': {
                                      'content': q
                                  }
                              },
                              'size': limit,
                              'from': offset
                          })
    urls = [r['_source']['url'] for r in res['hits']['hits']]
    await es.close()
    return web.Response(text=json.dumps({
        'request': q,
        'limit': limit,
        'offset': offset,
        'urls': urls
    }))
Пример #7
0
    async def main(self):
        async with Elasticsearch([{'host': 'localhost', 'port': 9200}]) as es:
            await self.initialize_index(es)
            await self.links.put(self.start_url)

            async with aiohttp.ClientSession() as session:
                async with asyncpool.AsyncPool(
                        self.loop,
                        num_workers=10,
                        name="CrawlerPool",
                        logger=logging.getLogger("CrawlerPool"),
                        worker_co=self.worker) as pool:
                    link = await self.links.get()
                    await pool.push(link, es, session)

                    while True:
                        if not self.links.empty():
                            link = await self.links.get()
                        else:
                            await asyncio.sleep(0.2)
                            if self.links.empty():
                                break

                            link = await self.links.get()

                        await asyncio.sleep(self.sleep_time)
                        await pool.push(link=link, es=es, session=session)
    async def test_get_apps_stats_with_data(self):
        """
        Prepara um ElasticSearch com alguns dados e faz o cálculo
        agregado do uso de CPU e RAM
        """
        app_stats_datapoints = get_fixture(
            f"agents/ead07ffb-5a61-42c9-9386-21b680597e6c-S0/app_stats.json")
        app = MesosApp(id="infra/asgard/api")

        await self._load_app_stats_into_storage(self.INDEX_NAME, self.utc_now,
                                                app_stats_datapoints)

        backend = MarathonAppsBackend()
        user = User(**USER_WITH_MULTIPLE_ACCOUNTS_DICT)
        account = Account(**ACCOUNT_DEV_DICT)
        async with Elasticsearch([settings.STATS_API_URL]) as es:
            raw = await es.search(index=self.INDEX_NAME)
            cpu_pcts = [
                hit["_source"]["cpu_pct"] for hit in raw["hits"]["hits"]
            ]
            mem_pcts = [
                hit["_source"]["mem_pct"] for hit in raw["hits"]["hits"]
            ]
            self.assertEqual(5, len(cpu_pcts))
            self.assertEqual(5, len(mem_pcts))

        app_stats = await backend.get_app_stats(app, user, account)
        self.assertEqual(
            AppStats(cpu_pct="0.25", ram_pct="15.05", cpu_thr_pct="1.00"),
            app_stats,
        )
Пример #9
0
async def test_send_get_body_as_get(es_server, auto_close, loop):
    cl = auto_close(Elasticsearch([{'host': es_server['host'],
                                   'port': es_server['port']}],
                                  http_auth=es_server['auth'],
                                  loop=loop))
    await cl.create('test', 'type', '1', {'val': '1'})
    await cl.create('test', 'type', '2', {'val': '2'})
    ret = await cl.mget(
        {"docs": [
                {"_id": "1"},
                {"_id": "2"}
        ]},
        index='test', doc_type='type')
    assert ret == {'docs': [{'_id': '1',
                             '_index': 'test',
                             '_source': {'val': '1'},
                             '_type': 'type',
                             '_version': 1,
                             'found': True},
                            {'_id': '2',
                             '_index': 'test',
                             '_source': {'val': '2'},
                             '_type': 'type',
                             '_version': 1,
                             'found': True}]}
Пример #10
0
async def test_send_get_body_as_source_none_params(es_server,
                                                   auto_close, loop):
    cl = auto_close(Elasticsearch([{'host': es_server['host'],
                                   'port': es_server['port']}],
                                  send_get_body_as='source',
                                  http_auth=es_server['auth'],
                                  loop=loop))
    await cl.create('test', 'type', '1', {'val': '1'})
    await cl.create('test', 'type', '2', {'val': '2'})
    ret = await cl.transport.perform_request(
        'GET', 'test/type/_mget',
        body={"docs": [
            {"_id": "1"},
            {"_id": "2"}
        ]})
    assert ret == {'docs': [{'_id': '1',
                             '_index': 'test',
                             '_source': {'val': '1'},
                             '_type': 'type',
                             '_version': 1,
                             'found': True},
                            {'_id': '2',
                             '_index': 'test',
                             '_source': {'val': '2'},
                             '_type': 'type',
                             '_version': 1,
                             'found': True}]}
Пример #11
0
    async def search(self,request):

        try:
            data = await request.json()
            vk_id = data['vk_id']
            token = data['app_token']
            access_token = data['access_token']
            search_querry = str(data['search']) 
        except:
            response = jsoner(status=400)
            return web.json_response(response)

        correct = await self.auth_.authorize(user_id = vk_id, access_token = access_token,sign=token)
        if correct.decode() == 'False':
            response = jsoner(status=403)
            return web.json_response(response)

        data = []
        try:
            async with Elasticsearch(elastic_settings) as es:
                    res = await es.search(index='obi',doc_type='product', body={"query":{"terms":{"name":[search_querry]}}})
                    if len(res['hits']['hits']) == 0:
                        res = await es.search(index='obi',doc_type='product', body={"query":{"match":{"name":'name2 name3'}}})
                    for prod in res['hits']['hits']:
                        dic={"product_id":prod['_id']}
                        dic.update(prod['_source'])
                        data.append(dic)
        except: 
            return web.json_response({'status':'404','text':'NOT FOUND'})
        
        response = jsoner(status=200,searchlist=data)
        return web.json_response(response)
Пример #12
0
    async def es_range(self, index, tp, *keys, call=None, **query):
        async with Elasticsearch([self.host]) as es:
            async with Scan(
                    es,
                    index=index,
                    doc_type=tp,
                    query=query,
            ) as scan:

                res = []
                count = await es.count(index=index)
                count = count['count']
                progressbar = tqdm(desc="scan all elasticsearch", total=count)
                ic = 0
                si = count / 1000
                async for doc in scan:
                    ic += 1
                    if ic > 0 and ic % 1000 == 0:
                        progressbar.update(si)
                    if call:
                        call(doc)
                    else:
                        dd = {}
                        for k in keys:
                            km = k.split(':')
                            v = doc
                            for kk in km:
                                v = v.get(kk)
                                if not v: break
                            dd[k] = v
                        res.append(dd)
                progressbar.close()
                return res
Пример #13
0
async def cleanup_es(es_host, prefix=""):
    conn = Elasticsearch(**app_settings["elasticsearch"]["connection_settings"])
    for alias in (await conn.cat.aliases()).splitlines():
        name, index = alias.split()[:2]
        if name[0] == "." or index[0] == ".":
            # ignore indexes that start with .
            continue
        if name.startswith(prefix):
            try:
                await conn.indices.delete_alias(index, name)
                await conn.indices.delete(index)
            except (
                elasticsearch.exceptions.AuthorizationException,
                elasticsearch.exceptions.NotFoundError,
            ):
                pass
    for index in (await conn.cat.indices()).splitlines():
        _, _, index_name = index.split()[:3]
        if index_name[0] == ".":
            # ignore indexes that start with .
            continue
        if index_name.startswith(prefix):
            try:
                await conn.indices.delete(index_name)
            except elasticsearch.exceptions.AuthorizationException:
                pass
def test_elastic_default_loop(auto_close, loop):
    asyncio.set_event_loop(loop)

    es = Elasticsearch()

    auto_close(es)

    assert es.loop is loop
Пример #15
0
 async def connect_elasticsearch():
     _es = None
     _es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
     if await _es.ping():
         print('Yay Connect')
     else:
         print('Awww it could not connect!')
     return _es
Пример #16
0
def es(loop):
    es = Elasticsearch(loop=loop)

    delete_template = es.transport.perform_request(
        'DELETE',
        '/_template/*',
    )
    delete_all = es.transport.perform_request(
        'DELETE',
        '/_all',
    )
    coros = [delete_template, delete_all]
    coro = asyncio.gather(*coros, loop=loop)
    loop.run_until_complete(coro)

    try:
        yield es
    finally:
        loop.run_until_complete(es.close())
Пример #17
0
 async def main(self, number):
     tasks = []
     q = asyncio.Queue()
     q.put_nowait(self.host)
     async with aiohttp.ClientSession() as client:
         for i in range(number):
             _es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
             task = asyncio.create_task(self.get_links(q, client, _es))
             tasks.append(task)
         await asyncio.gather(*tasks)
Пример #18
0
async def search(q, limit=None, offset=0):
    async with Elasticsearch() as es:
        body = {"query": {"match": {"body": q}}}
        scan = await es.search(size=limit,
                               from_=offset,
                               index="_all",
                               doc_type="crawler_links",
                               body=body)
        docs = scan["hits"]["hits"]
        return [doc["_source"]["link"] for doc in docs]
Пример #19
0
async def test_bulk_raise_exception(loop):

    asyncio.set_event_loop(loop)
    es = Elasticsearch()
    datas = [{'_op_type': 'delete',
              '_index': 'test_aioes',
              '_type': 'type_3', '_id': "999"}
             ]
    with pytest.raises(TransportError):
        success, fails = await bulk(es, datas, stats_only=True)
Пример #20
0
async def search(conn, body, dtm):
    #speaking honestly, i have no idea how can i search documents
    try:
        es = Elasticsearch()
        res = await es.get(index="test-index", doc_type='tweet')
        str(res['_source']).replace('\n', '')
        ###print(str(res['_source']).replace('\n','')[body['offset']:body['offset']+body['limit']])???
        return json.dumps(body)
    except:
        return json.dumps(('error'))
Пример #21
0
 async def myfun(self, quantity):
     tasks = list()
     q = asyncio.Queue()
     q.put_nowait('https://docs.python.org/')
     async with aiohttp.ClientSession() as session:
         es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
         for _ in range(quantity):
             task = asyncio.create_task(self.crawling(q, session, es))
             tasks.append(task)
         await asyncio.gather(*tasks)
Пример #22
0
    def __init__(self, max_rps):
        self.max_tasks = max_rps
        self.q_url = asyncio.Queue()
        self.q_text = asyncio.Queue()
        self.q_rps = asyncio.Queue()
        self.seen_urls = set()

        self.sem = asyncio.Semaphore(max_rps)

        self.session = None
        self.es = Elasticsearch()
Пример #23
0
async def test_mark_live_not_dead(auto_close, es_server, loop):
    es = auto_close(Elasticsearch(hosts=[{'host': es_server['host'],
                                          'port': es_server['port']},
                                         {'host': 'unknown_host',
                                          'port': 9200}],
                                  http_auth=es_server['auth'],
                                  loop=loop))
    conn = await es.transport.get_connection()
    pool = es.transport.connection_pool
    pool.mark_live(conn)
    assert conn not in pool.dead_count
Пример #24
0
async def cleanup_es(es_host, prefix=''):
    conn = Elasticsearch(hosts=[es_host])
    for alias in (await conn.cat.aliases()).splitlines():
        name, index = alias.split()[:2]
        if name.startswith(prefix):
            await conn.indices.delete_alias(index, name)
            await conn.indices.delete(index)
    for index in (await conn.cat.indices()).splitlines():
        _, _, index_name = index.split()[:3]
        if index_name.startswith(prefix):
            await conn.indices.delete(index_name)
Пример #25
0
 async def myfun(self, quantity, domain, glub):
     tasks = list()
     q = asyncio.Queue()
     q.put_nowait(domain)
     async with aiohttp.ClientSession() as session:
         es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
         for _ in range(quantity):
             task = asyncio.create_task(self.crawling(q, session, es, glub))
             tasks.append(task)
         
         await asyncio.gather(*tasks)
         await self.waitress()
 async def make_record_to_es(self, link, soup):
     cleaned_text = soup.get_text()
     doc = {"link": link, "body": cleaned_text}
     async with Elasticsearch(ingore=409) as es:
         try:
             await es.create(
                 index="crawler_links", doc_type="crawler_links", id=link, body=doc
             )
             await es.close()
             self.LINKS_COUNTER += 1
         except elastic_exceptions.ConflictError:
             pass
Пример #27
0
 async def clear_index(self):
     name = self.name
     index = self.status()[1].split("|")[0]
     async with Elasticsearch([i for i in self.host.split(",")]) as es:
         ss = self.load_session(name)
         pwd = self['passwd']
         if pwd:
             e = getpass("passwd :")
             if e != pwd:
                 logging.info(
                     "error passwd to delete all data in index: %s" % index)
                 return
         return await es.indices.delete(index)
Пример #28
0
async def test_mark_dead_removed_connection(auto_close, es_server, loop):
    es = auto_close(Elasticsearch(hosts=[{'host': es_server['host'],
                                          'port': es_server['port']},
                                         {'host': 'unknown_host',
                                          'port': 9200}],
                                  http_auth=es_server['auth'],
                                  loop=loop))
    conn = await es.transport.get_connection()
    pool = es.transport.connection_pool
    pool.mark_dead(conn)
    assert conn in pool.dead_count
    # second call should succeed
    pool.mark_dead(conn)
    assert conn in pool.dead_count
Пример #29
0
async def save_to_es(id, hand, data, loop):
    host = hand.get('es_host', 'localhost:9200')
    index = hand.get('es_index', 'es-main')
    doc_type = hand.get('es_type', 'es-doc')
    filter = hand.get('es_filter')
    type = hand.get('type')
    if type == 'json':
        data = json.loads(data)

    if filter:
        if type == 'json':
            filter_d = json.loads(filter)

            for k in filter_d:
                vv = filter_d[k]
                if isinstance(vv, list):
                    if data.get(k) in vv:
                        logging.info(
                            colored("Filter:  %s from data: {}".format(data) %
                                    id,
                                    'yellow',
                                    attrs=['bold']))
                        return
                else:
                    if data.get(k) == vv:
                        logging.info(
                            colored("Filter:  %s from data: {}".format(data) %
                                    id,
                                    'yellow',
                                    attrs=['bold']))
                        return

        else:
            if re.search(filter.encode('utf-8'), data):
                logging.info(
                    colored("Filter:  %s from data: {}".format(data[:100]) %
                            id,
                            'yellow',
                            attrs=['bold']))
                return
    try:
        data = json.loads(data)
    except json.JSONDecodeError:
        pass
    async with Elasticsearch([i for i in host.split(",")]) as es:
        ret = await es.create(index, doc_type, id, data)
        return ret
Пример #30
0
 async def just_bulk(self, datas):
     async with Elasticsearch([i for i in self.host.split(",")]) as es:
         d = []
         with tqdm(total=len(datas)) as pbar:
             for i, v in enumerate(datas):
                 if i > 0 and i % 1024 == 0:
                     await es.bulk(d)
                     pbar.update(1024)
                     if isinstance(v, list):
                         d = v
                     else:
                         d = [v]
                 else:
                     if isinstance(v, list):
                         d += v
                     else:
                         d.append(v)