示例#1
0
 def test_servers(self):
     es = ES("127.0.0.1:9200")
     self.assertEquals(es.servers, [("http", "127.0.0.1", 9200)])
     es = ES("127.0.0.1:9500")
     self.assertEquals(es.servers, [("thrift", "127.0.0.1", 9500)])
     es = ES("http://127.0.0.1:9400")
     self.assertEquals(es.servers, [("http", "127.0.0.1", 9400)])
     es = ES("thrift://127.0.0.1:9100")
     self.assertEquals(es.servers, [("thrift", "127.0.0.1", 9100)])
     es = ES([
         "thrift://127.0.0.1:9100", "127.0.0.1:9200",
         ("thrift", "127.0.0.1", 9000)
     ])
     self.assertEquals(sorted(es.servers), [("http", "127.0.0.1", 9200),
                                            ("thrift", "127.0.0.1", 9000),
                                            ("thrift", "127.0.0.1", 9100)])
示例#2
0
 def __init__(self, server, settings=None):
     self.conn = ES(server)
     self.indices = {}
     if settings:
         self.settings = settings
     else:
         self.settings = {
             'index': {
                 'analysis': {
                     'analyzer': {
                         'ngram_analyzer': {
                             'tokenizer': 'keyword',
                             'filter': ['lowercase', 'filter_ngram'],
                             'type': 'custom'
                         }
                     },
                     'filter': {
                         'filter_ngram': {
                             'type': 'nGram',
                             'max_gram': 30,
                             'min_gram': 1
                         }
                     }
                 }
             }
         }
示例#3
0
def connect_to_db():
    eshosts = settings.ES_SETTINGS['ES_HOSTS']
    index = settings.ES_SETTINGS['INDEX']
    timeout = settings.ES_SETTINGS.get('TIMEOUT', 60.0)

    # build query
    return ES(eshosts, timeout=timeout)
示例#4
0
 def test_servers(self):
     geturls = lambda servers: [server.geturl() for server in servers]
     es = ES("127.0.0.1:9200")
     self.assertEqual(geturls(es.servers), ["http://127.0.0.1:9200"])
     es = ES("127.0.0.1:9500")
     self.assertEqual(geturls(es.servers), ["thrift://127.0.0.1:9500"])
     es = ES(("http", "127.0.0.1", 9400))
     self.assertEqual(geturls(es.servers), ["http://127.0.0.1:9400"])
     es = ES(("thrift", "127.0.0.1", 9100))
     self.assertEqual(geturls(es.servers), ["thrift://127.0.0.1:9100"])
     es = ES([
         "http://127.0.0.1:9100",
         "127.0.0.1:9200",
         ("thrift", "127.0.0.1", 9000),
         "127.0.0.1:9500",
     ])
     self.assertEqual(geturls(sorted(es.servers)), [
         "http://127.0.0.1:9100", "http://127.0.0.1:9200",
         "thrift://127.0.0.1:9000", "thrift://127.0.0.1:9500"
     ])
示例#5
0
    def initialize(self):
        """Initialize ElasticSearch source stream:
        """
        args = self.elasticsearch_args.copy()
        server = ""
        if self.host:
            server = self.host
        if self.port:
            server += ":" + self.port

        self.connection = ES(server, **args)
        self.connection.default_indices = self.index
        self.connection.default_types = self.document_type
示例#6
0
    def setUp(self):
        """reads the "real" elasticsearch settings 
        from SOURCE/elasticsearch/settings.json and uses it to configure
        an index for the unittests"""
        self.es_settings = {'ES_HOSTS':['localhost:9200',],
                            'INDEX':"unittest-binarypig",
                            'FACET_SIZE':999999}
        query.settings.ES_SETTINGS = self.es_settings

        index_template_fn = os.path.join(settings.SOURCE_ROOT, 
                                         'elasticsearch', 
                                         'settings.json')
        self.index_settings = json.loads(file(index_template_fn).read())
        conn = ES(self.es_settings['ES_HOSTS'])
        self.createIndex(conn)
示例#7
0
    def initialize(self):
        """
        Initialize ElasticSearch source stream:
        """
        from pyes.es import ES
        from pyes.exceptions import IndexAlreadyExistsException

        args = self.elasticsearch_args.copy()
        server = ""
        if self.host:
            server = self.host
        if self.port:
            server += ":" + self.port

        create = args.pop("create", False)
        replace = args.pop("replace", False)

        self.connection = ES(server, **args)
        self.connection.default_indices = self.index
        self.connection.default_types = self.document_type

        created = False
        if create:
            try:
                self.connection.create_index(self.index)
                self.connection.refresh(self.index)
                created = True
            except IndexAlreadyExistsException:
                pass

        if replace and not created:
            self.connection.delete_index_if_exists(self.index)
            self.connection.refresh(self.index)
            self.connection.create_index(self.index)
            self.connection.refresh(self.index)

        if self.truncate:
            self.connection.delete_mapping(self.index, self.document_type)
            self.connection.refresh(self.index)
        #check mapping
        try:
            self.connection.get_mapping(self.document_type, self.index)
        except TypeMissingException:
            self.connection.put_mapping(self.document_type,
                                        self._get_mapping(), self.index)
示例#8
0
    odict = obj.as_dict()
    if isinstance(obj, (mappings.DocumentObjectField, mappings.ObjectField,
                        mappings.NestedObject)):
        properties = odict.pop("properties", [])
        doc_count += 1
        kwargs = ["name=%r" % obj.name,
                  "type=%r" % odict.pop("type")] +\
                 ["%s=%r" % (k, odict[k]) for k in sorted(odict.keys())]
        result.append("doc%d=" % doc_count +
                      str(type(obj)).split(".")[-1].strip("'>") + "(" +
                      ', '.join(kwargs) + ")")
        for k in sorted(obj.properties.keys()):
            result.extend(mappings_to_code(obj.properties[k], doc_count))
    else:
        kwargs = ["name=%r" % obj.name,
                  "type=%r" % odict.pop("type"),
                  "store=%r" % obj.store,
                  "index=%r" % odict.pop("index")] +\
                 ["%s=%r" % (k, odict[k]) for k in sorted(odict.keys())]
        result.append("doc%d.add_property(" % doc_count +\
                      str(type(obj)).split(".")[-1].strip("'>") + "(" +\
                      ', '.join(kwargs) + "))")

    return result


if __name__ == '__main__':
    es = ES("192.168.1.1:9200")
    res = mappings_to_code(es.mappings.get_doctype("twitter", "twitter"))
    print("\n".join(res))
示例#9
0
def get_conn(*args, **kwargs):
    return ES(("http", "127.0.0.1", 9200), *args, **kwargs)
示例#10
0
文件: run.py 项目: mlys/gsapi
from utils import myyaml

app = Flask(__name__)

# ElasticSearch
escfg = Config.ES
# Wanna make sure test db is used if /test/ in url
try:
    if '/test/' in request.url:
        Config.TESTING = True
        escfg = Config.ES_TEST
except:
    pass

# ElasticSearch
es = ES(("http", escfg['host'], escfg['port']))
es.__dict__['index_name'] = escfg['name']
app.es = es

app.config.from_object(Config)

mongo     = PyMongo()
app.mongo = mongo
mongo.init_app(app)

# add regex for routing
app.url_map.converters['regex'] = RegexConverter

##################### GET SEARCH
@app.route( '/es', methods=['GET'])
def es():
示例#11
0
import sys

#sys.path.insert(0, "../")

#from pyes import ES
from pyes.es import ES
from datetime import datetime
import shelve
conn = ES('127.0.0.1:9500')
#conn = ES('192.168.2.50:9200')
try:
    conn.delete_index("test-index")
except:
    pass

dataset = shelve.open("samples.shelve")

mapping = {
    u'description': {
        'boost': 1.0,
        'index': 'analyzed',
        'store': 'true',
        'type': u'string',
        "term_vector": "with_positions_offsets"
    },
    u'name': {
        'boost': 1.0,
        'index': 'analyzed',
        'store': 'true',
        'type': u'string',
        "term_vector": "with_positions_offsets"
Created on May 25, 2013

@author: yapianyu
'''
from bson.objectid import ObjectId
from difflib import SequenceMatcher
from pyes.es import ES
from pyes.query import MultiMatchQuery, Search
from pymongo.connection import Connection
import collections
import datetime

ml_100k_folder = '/home/yapianyu/Desktop/movielens/ml-100k/'
ml_10m_folder = '/home/yapianyu/Desktop/movielens/ml-10M100K/'
mongodb = Connection('127.0.0.1', 27017)['right-channel']
elasticsearch = ES(('http', '127.0.0.1', 9200))


def count_movie_num_each_year():
    movie_num = {}
    f = open(ml_10m_folder + 'movies.dat')
    for line in f:
        year = int(line.split('::')[1][-5:-1])
        if year in movie_num:
            movie_num[year] += 1
        else:
            movie_num[year] = 1

    d = collections.OrderedDict(sorted(movie_num.items(), key=lambda t: -t[0]))
    for year, num in d.items():
        print year, '\t', num
示例#13
0
def _get_conn(*args, **kwargs):
    _conn = ES(settings.es_hosts, *args, **kwargs)
    _conn.default_indices = settings.es_index
    return _conn
示例#14
0
 def tearDown(self):
     """delete the index"""
     conn = ES(self.es_settings['ES_HOSTS'])
     self.deleteIndex(conn)
示例#15
0
'''
Created on Jan 13, 2013

@author: Fang Jiaguo
'''
from pyes.es import ES
from pymongo.connection import Connection
import json

settings = json.load(open('settings.json', 'r'))
mongodb = Connection(settings['mongodb']['host'],
                     settings['mongodb']['port'])[settings['mongodb']['db']]
elasticsearch = ES(('http', settings['elasticsearch']['host'],
                    settings['elasticsearch']['port']))