示例#1
0
              'a') as file_obj:
        for key in new_title_hash.keys():
            title_parts = key.split("\n")
            title_str = ""
            for i in title_parts:
                title_str = title_str + i + " "

            file_obj.write(title_str[:-1] + "|" + str(new_title_hash[key]) +
                           "\n")

    print len(new_title_hash), " new article(s) added"


new_title_hash = {}
title_hash = {}
es = Elasticsearch(['http://localhost:9200'])
res = es.search(index="arxiv_feed", doc_type="feed", size=2000)
res = res['hits']['hits']

for entry in res:
    # this part of code removes all the new line characters that might be present between the title as in teh fees and replaces them with spaces
    entry_id = entry['_id']
    entry = entry['_source']
    print entry_id
    print entry.keys()
    title_parts = entry['title'].encode('utf-8').split("\n")
    title_str = ""

    for i in title_parts:
        title_str = title_str + i + " "
    title_str = title_str[:-1]
    print('Benchmark file: {0}'.format(args.csv_file))
    print('ElasticSearch URL: {0}'.format(args.url))
    return args


def build_index_name(index_prefix: str, d: datetime.date) -> str:
    today = d.strftime('%Y-%m-%d')
    index = '{0}-{1}'.format(index_prefix, today)
    print('Index: {0}'.format(index))
    return index


if __name__ == "__main__":

    cmd_args = parse_args(sys.argv[1:])
    index_name = build_index_name(cmd_args.index_prefix,
                                  datetime.utcnow().date())

    es = Elasticsearch(
        hosts=[cmd_args.url],
        http_auth=(cmd_args.username,
                   cmd_args.password) if cmd_args.username else None,
        use_ssl=cmd_args.use_ssl,
        verify_certs=cmd_args.verify_certs)

    processed = process_data(
        cmd_args.csv_file,
        index=index_name,
        process_func=lambda actions: helpers.bulk(es, actions))
    print('Indexed {0} benchmarks'.format(processed))
示例#3
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
from elasticsearch import Elasticsearch, client
from FMStats import confManager as cm
import json

# Configuration
conf = cm.ConfManager()
es = Elasticsearch([conf.elasticAddres()])
ec = client.IndicesClient(es)


def checkIndex(esIndex):
    if ec.exists(index=esIndex):
        return True
    else:
        return False

def createIndex(esIndex, esMapping):
    mapping = open(esMapping, 'r')
    mapping = mapping.read()
    ec.create(index=esIndex, body=mapping)

def mappingInit():
    if checkIndex(conf.artistIndex()) == False:
        createIndex(conf.artistIndex(), conf.artistMapping())
    if checkIndex(conf.radioIndex()) == False:
        createIndex(conf.radioIndex(), conf.radioMapping())
    return True # All indices inicialized.
#!/usr/bin/env python3

from elasticsearch import Elasticsearch

client = Elasticsearch()
ready = False

try:
    while not ready:
        ready = client.ping()
except KeyboardInterrupt:
    pass
示例#5
0
#!/usr/bin/env python
'''
Licensed to Elasticsearch B.V under one or more agreements.
Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
See the LICENSE file in the project root for more information
'''

from elasticsearch import Elasticsearch

es = Elasticsearch()

print("bb143628fd04070683eeeadc9406d9cc - L:11")
# tag::bb143628fd04070683eeeadc9406d9cc[]
response = es.index(index='twitter',
                    id=1,
                    body={
                        'user': '******',
                        'post_date': '2009-11-15T14:12:12',
                        'message': 'trying out Elasticsearch',
                    })
# end::bb143628fd04070683eeeadc9406d9cc[]
print("---------------------------------------")
print(response)
print("---------------------------------------")

print("804a97ff4d0613e6568e4efb19c52021 - L:77")
print("TODO")

print("d718b63cf1b6591a1d59a0cf4fd995eb - L:121")
# tag::d718b63cf1b6591a1d59a0cf4fd995eb[]
response = es.index(
示例#6
0
文件: es.py 项目: ghukill/combine
    def copy_es_index(source_index=None,
                      target_index=None,
                      create_target_index=True,
                      refresh=True,
                      wait_for_completion=True,
                      add_copied_from=None):
        '''
        Method to duplicate one ES index to another

        Args:
            create_target_index (boolean): If True, check for target and create
            source_index (str): Source ES index to copy from
            target_index (str): Target ES index to copy to

        Returns:
            (dict): results of reindex via elasticsearch client reindex request
        '''

        # get ES handle
        es_handle_temp = Elasticsearch(hosts=[settings.ES_HOST])

        # put/confirm combine es index templates
        template_body = {
            'template': '*',
            'settings': {
                'number_of_shards': 1,
                'number_of_replicas': 0,
                'refresh_interval': -1
            },
            'mappings': {
                'record': {
                    'date_detection': False,
                    'properties': {
                        'combine_db_id': {
                            'type': 'integer'
                        }
                    }
                }
            }
        }
        es_handle_temp.indices.put_template('combine_template',
                                            body=json.dumps(template_body))

        # if creating target index check if target index exists
        if create_target_index and not es_handle_temp.indices.exists(
                target_index):
            es_handle_temp.indices.create(target_index)

        # prepare reindex query
        dupe_dict = {
            'source': {
                'index': source_index,
                'query': {}
            },
            'dest': {
                'index': target_index
            }
        }

        # if add_copied_from, include in reindexed document
        if add_copied_from:
            dupe_dict['script'] = {
                'inline': 'ctx._source.source_job_id = %s' % add_copied_from,
                'lang': 'painless'
            }

        # reindex using elasticsearch client
        reindex = es_handle_temp.reindex(
            body=dupe_dict,
            wait_for_completion=wait_for_completion,
            refresh=refresh)
        return reindex
示例#7
0
from datetime import datetime
from elasticsearch import Elasticsearch

es = Elasticsearch()
# es = Elasticsearch([{'host': 'd.es.dataapi.rea-asia.com', 'port': 9200}])

doc = {
    'author': 'Kamal',
    "searched_keyword": {
        "search_keyword": "Sunday spk",
        "matched_places": {
            "Sunday spk …": 90,
            "Sunday spkksjff": 89,
            "XXXXXXXXX": 80
        }
    },
    'timestamp': datetime.now(),
}
res = es.index(index="keyword", doc_type='search_submit', id=1, body=doc)

string_matching = {
    'searchkeyword': 'midvalley',
    'text': 'most relevant search keywords according to db',
    'timestamp': datetime.now(),
    'matched_placekeywords': {
        1: 'mid valley city',
        2: 'mid valley gardens',
        3: 'mid valley gardens'
    }
}
res = es.index(index="midvalley",
import ast
import math
import re
import time

from tqdm import tqdm

from index.query_processing import get_query_words, get_query_vectors
from index.merge_index import get_vocab_size
from index.create_inverted_docs import create_index
from index.constants import *
from index.proximity_search import *
from index.merge_index import read_data
from elasticsearch import Elasticsearch

ES = Elasticsearch()


def generate_result_matrix(results_dic, query_no, doc_no, score_query):
    if query_no not in results_dic.keys():
        results_dic[query_no] = {doc_no: score_query}
    else:
        if doc_no not in results_dic[query_no]:
            results_dic[query_no][doc_no] = score_query
    return results_dic


def write_result(result_dic, filename):
    with open(filename, 'w') as f:
        for query_no in result_dic.keys():
            rank = 1
示例#9
0
from laserembeddings import Laser
import sys

__author__ = "Bijin Benny"
__email__ = "*****@*****.**"
__license__ = "MIT"
__version__ = "1.0"

#Client connection to local BERT server
bc = BertClient(ip='localhost', output_fmt='list')

#Instance of the LASER language model
laser = Laser()

#Elasticsearch DB client
client = Elasticsearch(hosts="http://*****:*****@localhost:9200/")
"""
createScript function creates custom database queries based on the search type. 
The search type includes basic TF-IDF term search, LASER vector and 
BERT vector similarity searches. The function returns a unique query 
for each of the scenarios.
Arguments :
query        : Text form of the query
search type  : Type of search, i.e term, laser or bert
query_vector : Vector form of the query for cosine similarity
"""


def createScript(query, search_type, query_vector):
    if (search_type == 'term'):
        return {
示例#10
0
def create_app(config_class=Config):
    app = Flask(__name__)
    app.config.from_object(config_class)

    db.init_app(app)
    migrate.init_app(app, db)
    login.init_app(app)
    mail.init_app(app)
    bootstrap.init_app(app)
    moment.init_app(app)
    babel.init_app(app)
    app.elasticsearch = Elasticsearch([app.config['ELASTICSEARCH_URL']]) \
        if app.config['ELASTICSEARCH_URL'] else None

    app.redis = Redis.from_url(app.config['REDIS_URL'])
    app.task_queue = rq.Queue('microblog-tasks', connection=app.redis)

    from app.errors import bp as errors_bp
    app.register_blueprint(errors_bp)

    from app.auth import bp as auth_bp
    app.register_blueprint(auth_bp, url_prefix='/auth')

    from app.main import bp as main_bp
    app.register_blueprint(main_bp)

    if not app.debug and not app.testing:
        if app.config['MAIL_SERVER']:
            auth = None
            if app.config['MAIL_USERNAME'] or app.config['MAIL_PASSWORD']:
                auth = (app.config['MAIL_USERNAME'],
                        app.config['MAIL_PASSWORD'])
            secure = None
            if app.config['MAIL_USE_TLS']:
                secure = ()
            mail_handler = SMTPHandler(
                mailhost=(app.config['MAIL_SERVER'], app.config['MAIL_PORT']),
                fromaddr='no-reply@' + app.config['MAIL_SERVER'],
                toaddrs=app.config['ADMINS'],
                subject='Microblog Failure',
                credentials=auth,
                secure=secure)
            mail_handler.setLevel(logging.ERROR)
            app.logger.addHandler(mail_handler)

        if app.config['LOG_TO_STDOUT']:
            stream_handler = logging.StreamHandler()
            stream_handler.setLevel(logging.INFO)
            app.logger.addHandler(stream_handler)
        else:
            if not os.path.exists('logs'):
                os.mkdir('logs')
            file_handler = RotatingFileHandler('logs/microblog.log',
                                               maxBytes=10240,
                                               backupCount=10)
            file_handler.setFormatter(
                logging.Formatter('%(asctime)s %(levelname)s: %(message)s '
                                  '[in %(pathname)s:%(lineno)d]'))
            file_handler.setLevel(logging.INFO)
            app.logger.addHandler(file_handler)

        app.logger.setLevel(logging.INFO)
        app.logger.info('Microblog startup')

    return app
示例#11
0
                "receipts_root_hash": "11111111111111111111111111111111",
                "pub_key": "GZsJqUVM3QHVANAb2U9TGGoawjn6Tn2Wipzdeuzy1CcYjfFxuq"
            },
            "hash": "FGxUBNkjAzQQ6GYcMbzWvmUHovuvcXiUv8P34XkyryVV"
        },
        "node_id": "d4b2fc83-976a-4eef-a342-6dc87f87afe8"
    }

    return doc


from elasticsearch import Elasticsearch
from elasticsearch import helpers

# 일레스틱서치 IP주소와 포트(기본:9200)로 연결한다
es = Elasticsearch("http://13.209.67.143:9200/")  # 환경에 맞게 바꿀 것
es.info()


# 인덱스는 독립된 파일 집합으로 관리되는 데이터 덩어리이다
def makeIndex(es, index_name):
    """인덱스를 신규 생성한다(존재하면 삭제 후 생성) """
    print(f"make index for {index_name}")

    if es.indices.exists(index=index_name):
        es.indices.delete(index=index_name)
    print(es.indices.create(index=index_name))


def genBulkEntry(newdoc, indexName, indexType):
    #body = json.dumps(doc)
示例#12
0
import sys
from elasticsearch import Elasticsearch
from utils.kafkahelper import KafkaConnection

PORT = 9200
INDEX_NAME = "data"
host = "localhost:%s" % PORT
es = Elasticsearch([host])


def get_raw_data(query):
    items = []
    offset = 0
    limit = 100
    # By default, the limit is 10
    while True:
        response = es.search(index=INDEX_NAME,
                             q=query,
                             size=limit,
                             from_=offset)
        data = response['hits']['hits']
        if len(data) == 0:
            break
        items += data
        offset += limit
    return items


def push_data(data):
    conn = KafkaConnection()
    for item in data:
示例#13
0
 def __init__(self, hosts, port, timeout, **kwargs):
     self.conn = Elasticsearch(hosts=hosts, port=port, **kwargs)
     self.timeout = timeout
示例#14
0
from faker import Factory
from datetime import datetime
from elasticsearch import Elasticsearch
import json

esDomainEndpoint = "http://search-endpoint:80"
es = Elasticsearch(esDomainEndpoint)


def create_names(fake):
    for x in range(100):
        genUname = fake.slug()
        genName = fake.name()
        genJob = fake.job()
        genCountry = fake.country()
        genText = fake.text()
        genProfile = fake.profile()

        go = es.index(index="profiles",
                      doc_type="users",
                      id=genUname,
                      body={
                          "name": genName,
                          "job": genJob,
                          "country": genCountry,
                          "notes": genText,
                          "profile_details": genProfile,
                          "timestamp": datetime.now()
                      })

        print json.dumps(go)
def api_search(request):

    query = request.GET.get("term", None)
    if query is None:
        return HttpResponse("[]")

    # For user experiment, run search version 1 or 2, 2 being more feature
    # rich and having parsed filters. See atlas-data#32
    search_version = int(request.GET.get("search_var", 0))

    # Parse search query
    query, query_type, kwargs = parse_search(
        query, strip_keywords=(search_version != 1))

    # Resolve any synonyms. feasibility -> pie_scatter etc.
    if "app_name" in kwargs:
        given_app_name = kwargs["app_name"][0]
        kwargs["app_name"] = [
            APP_NAME_SYNONYMS.get(given_app_name, given_app_name)
        ]

    # Viz params are not an elasticsearch filter so pop that off
    viz_params = kwargs.pop("viz_params", None)

    # Prepare elasticsearch filters
    if search_version == 2 or search_version == 0:
        filters = prepare_filters(kwargs)
    else:
        filters = {}

    es_query = {"query": {"filtered": {}}, "size": 8}

    # Add filters to the query if they were given. Filters are ANDed.
    if len(filters) > 0:
        es_filters = [{
            "terms": {
                k: [x.lower() for x in v]
            }
        } for k, v in filters.iteritems()]
        es_filters = {"bool": {"must": es_filters}}
        es_query["query"]["filtered"]["filter"] = es_filters

    # Add fuzzy search for query string if any non-filter query string remains
    # after taking out the filters
    if query.strip() != "":
        es_query["query"]["filtered"]["query"] = {
            "fuzzy_like_this": {
                "like_text": query,
                "fields": ["title"],
                "max_query_terms": 15,
                "prefix_length": 3
            }
        }

    # Do the query
    es = Elasticsearch()
    result = es.search(index="questions", body=es_query)

    # Format the results in a way that complies with the OpenSearch standard's
    # suggestion extension
    labels = []
    urls = []
    for x in result['hits']['hits']:
        data = x['_source']

        # Regenerate title and url so we can add stuff into it dynamically,
        # like the year being searched for, or forcing an app.
        years = kwargs.get('years', None)

        # Possible apps this title could be visualized as
        app_names = data['app_name']

        # If the app the user requested is possible, use that. Otherwise, use
        # the first one as default. App names in the elasticsearch index are
        # sorted in a certain way for this to make sense so check out the
        # indexer script
        requested_app_name = filters.get("app_name", [None])[0]
        if requested_app_name in app_names:
            app_name = requested_app_name
        else:
            app_name = app_names[0]

        if years and len(years) == 2:
            if app_name in ["map", "tree_map"]:
                # If multiple years are specified and we can do a stacked
                # graph, do a stacked graph instead of a treemap or map
                app_name = "stacked"
            elif app_name in ["product_space", "pie_scatter"]:
                # Some apps can never have multiple years so just use the first
                # one specified
                years = [years[0]]

        # If no years specified, use default years
        if years is None:
            if app_name == "stacked":
                years = [1995, 2012]
            else:
                years = [2012]

        # You can't show a product space based on imports so ignore those
        if app_name == "product_space" and data["trade_flow"] == "import":
            continue

        title = get_title(api_name=data['api_name'],
                          app_name=app_name,
                          country_names=data.get('country_names', None),
                          trade_flow=data['trade_flow'],
                          years=years,
                          product_name=data.get('product_name', None))
        url = params_to_url(api_name=data['api_name'],
                            app_name=app_name,
                            country_codes=data.get('country_codes', None),
                            trade_flow=data['trade_flow'],
                            years=years,
                            product_code=data.get('product_code', None))

        if viz_params:
            if app_name == "pie_scatter":
                url += "?queryActivated=True"
                url += "&yaxis=%s" % viz_params[0]

        labels.append(title)
        urls.append(settings.HTTP_HOST + url)

    return HttpResponse(json.dumps([query, labels, [], urls]))
示例#16
0
from api.models import db, models
from api.core import create_response, KEYWORDS, get_database_url
from api.auth import auth

_elasticsearch = Blueprint("_elasticsearch", __name__)

_es_url = get_database_url()["elasticsearch"]

_es = None

if "https" in _es_url:
    import certifi

    _es = Elasticsearch(get_database_url()["elasticsearch"],
                        use_ssl=True,
                        ca_certs=certifi.where())
else:
    _es = Elasticsearch(get_database_url()["elasticsearch"])


def _generate_body(query):
    return {
        "query": {
            "multi_match": {
                "query": query,
                "type": "bool_prefix",
                "fields": ["name", "name._2gram", "name._3gram"],
            }
        }
    }
示例#17
0
 def setup_method(self, method):
     elastic = Elasticsearch(transport_class=DummyTransport)
     self.client = LtrClient(elastic)
示例#18
0
from elasticsearch import Elasticsearch

parser = argparse.ArgumentParser(description="configures elastic")
parser.add_argument(
    "--task", "-t", default="setup", choices=["setup", "delete"]
)
args = parser.parse_args()

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

es_user = os.getenv("ELASTIC_USER")
if es_user is not None:
    es_pass = os.getenv("ELASTIC_PASS")
    es = Elasticsearch([os.getenv("ELASTICSEARCH_URL", "elasticsearch:9200")],
                       http_auth=(es_user, es_pass))
else:
    es = Elasticsearch([os.getenv("ELASTICSEARCH_URL", "elasticsearch:9200")])
settings = {
    "settings": {
        "analysis": {
            "filter": {
                "brazilian_stop": {
                    "type": "stop",
                    "stopwords":  "_brazilian_"
                },
                "brazilian_keywords": {
                    "type": "keyword_marker",
                    "keywords":   []
                },
                "brazilian_stemmer": {
示例#19
0
文件: es.py 项目: ghukill/combine
    def index_job_to_es_spark(spark, job, records_df, field_mapper_config):
        '''
        Method to index records dataframe into ES

        Args:
            spark (pyspark.sql.session.SparkSession): spark instance from static job methods
            job (core.models.Job): Job for records
            records_df (pyspark.sql.DataFrame): records as pyspark DataFrame
            field_mapper_config (dict): XML2kvp field mapper configurations

        Returns:
            None
                - indexes records to ES
        '''

        # init logging support
        spark.sparkContext.setLogLevel('INFO')
        log4jLogger = spark.sparkContext._jvm.org.apache.log4j
        logger = log4jLogger.LogManager.getLogger(__name__)

        # get index mapper
        index_mapper_handle = globals()['XML2kvpMapper']

        # create rdd from index mapper
        def es_mapper_pt_udf(pt):

            # init mapper once per partition
            mapper = index_mapper_handle(
                field_mapper_config=field_mapper_config)

            for row in pt:
                yield mapper.map_record(record_string=row.document,
                                        db_id=row._id.oid,
                                        combine_id=row.combine_id,
                                        record_id=row.record_id,
                                        publish_set_id=job.publish_set_id,
                                        fingerprint=row.fingerprint)

        logger.info('###ES 1 -- mapping records')
        mapped_records_rdd = records_df.rdd.mapPartitions(es_mapper_pt_udf)

        # attempt to write index mapping failures to DB
        # filter our failures
        logger.info('###ES 2 -- filtering failures')
        failures_rdd = mapped_records_rdd.filter(lambda row: row[0] == 'fail')

        # if failures, write
        if not failures_rdd.isEmpty():
            logger.info('###ES 3 -- writing indexing failures')

            failures_df = failures_rdd.map(
                lambda row: Row(db_id=row[1]['db_id'],
                                record_id=row[1]['record_id'],
                                mapping_error=row[1]['mapping_error'])).toDF()

            # add job_id as column
            failures_df = failures_df.withColumn('job_id', lit(job.id))

            # write mapping failures to DB
            failures_df.select(['db_id', 'record_id', 'job_id', 'mapping_error']) \
                .write.format("com.mongodb.spark.sql.DefaultSource") \
                .mode("append") \
                .option("uri", "mongodb://127.0.0.1") \
                .option("database", "combine") \
                .option("collection", "index_mapping_failure").save()

        # retrieve successes to index
        logger.info('###ES 4 -- filtering successes')
        to_index_rdd = mapped_records_rdd.filter(
            lambda row: row[0] == 'success')

        # create index in advance
        index_name = 'j%s' % job.id
        es_handle_temp = Elasticsearch(hosts=[settings.ES_HOST])
        if not es_handle_temp.indices.exists(index_name):
            # put combine es index templates
            template_body = {
                'template': '*',
                'settings': {
                    'number_of_shards': 1,
                    'number_of_replicas': 0,
                    'refresh_interval': -1
                },
                'mappings': {
                    'record': {
                        "dynamic_templates": [{
                            "strings": {
                                "match_mapping_type": "string",
                                "mapping": {
                                    "type": "text",
                                    "fields": {
                                        "keyword": {
                                            "type": "keyword"
                                        }
                                    }
                                }
                            }
                        }],
                        'date_detection':
                        False,
                        'properties': {
                            'combine_db_id': {
                                'type': 'integer'
                            }
                        }
                    }
                }
            }
            es_handle_temp.indices.put_template('combine_template',
                                                body=json.dumps(template_body))

            # create index
            es_handle_temp.indices.create(index_name)

        # index to ES
        logger.info('###ES 5 -- writing to ES')
        to_index_rdd.saveAsNewAPIHadoopFile(
            path='-',
            outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat",
            keyClass="org.apache.hadoop.io.NullWritable",
            valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
            conf={
                "es.resource": "%s/record" % index_name,
                "es.nodes": "%s:9200" % settings.ES_HOST,
                "es.mapping.exclude": "temp_id,__class__",
                "es.mapping.id": "temp_id",
            })

        # refresh index
        es_handle_temp.indices.refresh(index_name)

        # return
        return to_index_rdd
示例#20
0
import json
from datetime import datetime

import redis
from django.shortcuts import render
from django.views.generic.base import View
from django.http import HttpResponse
from search.models import JobboleEsModel, ZhihuAnswerEsModel, ZhihuQuestionEsModel, LagouEsModel
from elasticsearch import Elasticsearch

# elasticsearch
client = Elasticsearch(hosts=['localhost'])
# redis
redis_cli = redis.StrictRedis()


class IndexView(View):
    """首页"""
    def get(self, request):
        # 获取搜索关键词top10
        hot_keywords = redis_cli.zrevrangebyscore('search_keywords_set',
                                                  '+inf',
                                                  '-inf',
                                                  start=0,
                                                  num=5)
        return render(request, 'index.html', {'hot_keywords': hot_keywords})


class SuggestView(View):
    """生成搜索建议并返回ajax响应"""
    def get(self, request):
示例#21
0
def es_builder(hosts=None, port=9200):
    if hosts is None:
        hosts = '127.0.0.1'
    logger = logging.getLogger('elasticsearch')
    logger.setLevel(logging.WARNING)
    return Elasticsearch(hosts=hosts, port=port, timeout=60, request_timeout=60)
示例#22
0
# companies with headquarter in Trento
# --------------------------------------------------

from elasticsearch import Elasticsearch, helpers
import certifi
import csv
import json
import globalvariable as gv

gv.init()
http = gv.http
index = gv.index_atk
path = gv.path_write_tnurls

es = Elasticsearch([http],
                   use_ssl=True,
                   verify_certs=True,
                   ca_certs=certifi.where())


def main():

    urls = url_scan()

    with open(path, "wb") as asd:
        writer = csv.writer(asd)
        for line in urls:
            writer.writerow(line)


def url_scan():
    url_list = []
 def process_item(self, item, spider):
     es = Elasticsearch(self.es_hosts)
     es.index(index=self.index_name, doc_type=self.index_type, body=json.dumps(dict(item), ensure_ascii=False, default=json_serial).encode("utf-8"))
     # es.index(index=self.index_name, doc_type=self.index_type, pipeline=self.ingest_pipeline, body=json.dumps(dict(item), ensure_ascii=False, default=json_serial).encode("utf-8"))
     return item
示例#24
0
#!/usr/bin/env python
#encoding: utf-8

import time
import datetime

import json
import csv
import random

from elasticsearch import Elasticsearch
from ruman.time_utils import *

#config
weibo_es = Elasticsearch('219.224.134.216:9201', timeout=1000)
INDEX_SENCE = 'social_sensing_task'
TYPE_SENCE = 'rumor-media'
TYPE_FLOAT_TEXT = "text"
ES_INDEX_CAL_LIST = 'rumor_calculated_list'
WEBOUSER_INDEX = 'weibo_user'


def get_user(uid):
    uid = int(uid)
    query_body = {"size": 10, "query": {"match": {"uid": uid}}}

    res = weibo_es.search(index=WEBOUSER_INDEX,
                          doc_type='user',
                          body=query_body,
                          request_timeout=100)
    hits = res['hits']['hits']
示例#25
0
文件: test4.py 项目: simi0815/ss
 def __init__(self, address=MONGO_ADDRESS, port=MONGO_PORT):
     self.es = Elasticsearch(
         ['192.168.1.230:9200'],
         request_timeout=30000,
     )
示例#26
0
	def __init__(self, input_file,es_ip,es_port,index_name):
		self.input_file = input_file
		self.tree = self.__importXML()
		self.root = self.tree.getroot()
		self.es = Elasticsearch([{'host':es_ip,'port':es_port}])
		self.index_name = index_name
示例#27
0
@app.errorhandler(InvalidUsage)
def handle_invalid_usage(error):
    response = jsonify(error.to_dict())
    response.status_code = error.status_code
    return response


def load_model():
    """Load feature extractor model"""
    extractor = Extractor()
    return extractor


extractor = load_model()
es = Elasticsearch(hosts='localhost:9200')


@app.route("/hello", methods=['GET'])
def hello():
    return "Hello, world!"


@app.route("/extract_fea", methods=['GET', 'POST'])
def extract_fea():
    imgStr = request.values.get('img')
    if imgStr is None:
        raise InvalidUsage('parameter "img" is missing', status_code=410)
    try:
        img = read_img_blob(imgStr)
    except:
示例#28
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from elasticsearch import helpers
from elasticsearch import Elasticsearch
import datetime
import hashlib
import sys, os
reload(sys)
sys.setdefaultencoding('utf-8')

es = Elasticsearch([
    "http://192.168.241.47:9201", "http://192.168.241.46:9200",
    "192.168.241.50:9201"
],
                   sniffer_timeout=False)


class Saver(object):
    def __init__(self, cache_size=1000):
        self.actions = []
        self._cache_size = cache_size

    def pass_data(self, dic, es=es, my_index="relation2", my_type="relation2"):
        # read data
        """
        load to es
        :param dic:
        :param es:
        :param my_index:
        :param my_type:
        :return:
示例#29
0
def create_app(config_class=Config):
    app = Flask("Flask Application for Nemo")
    app.config.from_object(config_class)
    if app.config['ELASTICSEARCH_URL']:
        if app.config['ES_CLIENT_CERT'] or app.config['ES_CLIENT_KEY']:
            app.elasticsearch = Elasticsearch(
                app.config['ELASTICSEARCH_URL'],
                use_ssl=True,
                verify_certs=True,
                client_cert=app.config['ES_CLIENT_CERT'],
                client_key=app.config['ES_CLIENT_KEY'])
        else:
            app.elasticsearch = Elasticsearch(app.config['ELASTICSEARCH_URL'])
    else:
        app.elasticsearch = None

    app.IIIFserver = app.config['IIIF_SERVER']\
        if app.config['IIIF_SERVER'] else None

    if app.config['IIIF_MAPPING']:
        app.IIIFmapping = app.config['IIIF_MAPPING']
        with open('{}/Mapping.json'.format(app.config['IIIF_MAPPING']),
                  "r") as f:
            app.picture_file = load(f)
        for key, value in app.picture_file.items():
            if type(value) == dict:
                if 'manifest' in value.keys():
                    app.IIIFviewer = True
                    continue
                else:
                    app.IIIFviewer = False
                    app.picture_file = ""
                    break
    else:
        app.IIIFviewer = False
        app.picture_file = ""

    db.init_app(app)
    migrate.init_app(app, db)
    login.init_app(app)
    mail.init_app(app)
    bootstrap.init_app(app)
    babel.init_app(app)
    sess.init_app(app)
    app.redis = Redis.from_url(app.config['REDIS_URL'])

    if not app.debug and not app.testing:
        if not os.path.exists('logs'):
            os.mkdir('logs')
        file_handler = RotatingFileHandler('logs/formulae-nemo.log',
                                           maxBytes=10240,
                                           backupCount=10)
        file_handler.setFormatter(
            logging.Formatter(
                '%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]'
            ))
        file_handler.setLevel(logging.INFO)
        app.logger.addHandler(file_handler)
        app.logger.setLevel(logging.INFO)
        app.logger.info('Formulae-Nemo startup')

    from .auth import bp as auth_bp
    app.register_blueprint(auth_bp, url_prefix="/auth")
    from .search import bp as search_bp
    app.register_blueprint(search_bp, url_prefix="/search")
    if app.IIIFviewer is False:
        app.logger.warning(_l('Der Viewer konnte nicht gestartet werden.'))
    else:
        from .viewer import bp as viewer_bp
        viewer_bp.static_folder = app.config['IIIF_MAPPING']
        app.register_blueprint(viewer_bp, url_prefix="/viewer")

    return app
示例#30
0
# WARNING: This file contains cron jobs for elasticsearch, please use pure python for any kind of operation here,
# Objects requiring flask app context may not work properly

from elasticsearch import helpers, Elasticsearch
from app.views.redis_store import redis_store
from config import Config
from app.views.celery_ import celery
import psycopg2

es_store = Elasticsearch([Config.ELASTICSEARCH_HOST])
conn = psycopg2.connect(Config.SQLALCHEMY_DATABASE_URI)


@celery.task(name='rebuild.events.elasticsearch')
def cron_rebuild_events_elasticsearch():
    """
    Re-inserts all eligible events into elasticsearch
    Also clears event_index and event_delete redis sets
    :return:
    """
    conn = psycopg2.connect(Config.SQLALCHEMY_DATABASE_URI)
    cur = conn.cursor()
    cur.execute(
        "SELECT id, name, description, searchable_location_name, organizer_name, organizer_description FROM events WHERE state = 'published' and deleted_at is NULL ;"
    )
    events = cur.fetchall()
    event_data = ({
        '_type': 'event',
        '_index': 'events',
        '_id': event_[0],
        'name': event_[1],