def test_servers(self): es = ES("127.0.0.1:9200") self.assertEquals(es.servers, [("http", "127.0.0.1", 9200)]) es = ES("127.0.0.1:9500") self.assertEquals(es.servers, [("thrift", "127.0.0.1", 9500)]) es = ES("http://127.0.0.1:9400") self.assertEquals(es.servers, [("http", "127.0.0.1", 9400)]) es = ES("thrift://127.0.0.1:9100") self.assertEquals(es.servers, [("thrift", "127.0.0.1", 9100)]) es = ES([ "thrift://127.0.0.1:9100", "127.0.0.1:9200", ("thrift", "127.0.0.1", 9000) ]) self.assertEquals(sorted(es.servers), [("http", "127.0.0.1", 9200), ("thrift", "127.0.0.1", 9000), ("thrift", "127.0.0.1", 9100)])
def __init__(self, server, settings=None): self.conn = ES(server) self.indices = {} if settings: self.settings = settings else: self.settings = { 'index': { 'analysis': { 'analyzer': { 'ngram_analyzer': { 'tokenizer': 'keyword', 'filter': ['lowercase', 'filter_ngram'], 'type': 'custom' } }, 'filter': { 'filter_ngram': { 'type': 'nGram', 'max_gram': 30, 'min_gram': 1 } } } } }
def connect_to_db(): eshosts = settings.ES_SETTINGS['ES_HOSTS'] index = settings.ES_SETTINGS['INDEX'] timeout = settings.ES_SETTINGS.get('TIMEOUT', 60.0) # build query return ES(eshosts, timeout=timeout)
def test_servers(self): geturls = lambda servers: [server.geturl() for server in servers] es = ES("127.0.0.1:9200") self.assertEqual(geturls(es.servers), ["http://127.0.0.1:9200"]) es = ES("127.0.0.1:9500") self.assertEqual(geturls(es.servers), ["thrift://127.0.0.1:9500"]) es = ES(("http", "127.0.0.1", 9400)) self.assertEqual(geturls(es.servers), ["http://127.0.0.1:9400"]) es = ES(("thrift", "127.0.0.1", 9100)) self.assertEqual(geturls(es.servers), ["thrift://127.0.0.1:9100"]) es = ES([ "http://127.0.0.1:9100", "127.0.0.1:9200", ("thrift", "127.0.0.1", 9000), "127.0.0.1:9500", ]) self.assertEqual(geturls(sorted(es.servers)), [ "http://127.0.0.1:9100", "http://127.0.0.1:9200", "thrift://127.0.0.1:9000", "thrift://127.0.0.1:9500" ])
def initialize(self): """Initialize ElasticSearch source stream: """ args = self.elasticsearch_args.copy() server = "" if self.host: server = self.host if self.port: server += ":" + self.port self.connection = ES(server, **args) self.connection.default_indices = self.index self.connection.default_types = self.document_type
def setUp(self): """reads the "real" elasticsearch settings from SOURCE/elasticsearch/settings.json and uses it to configure an index for the unittests""" self.es_settings = {'ES_HOSTS':['localhost:9200',], 'INDEX':"unittest-binarypig", 'FACET_SIZE':999999} query.settings.ES_SETTINGS = self.es_settings index_template_fn = os.path.join(settings.SOURCE_ROOT, 'elasticsearch', 'settings.json') self.index_settings = json.loads(file(index_template_fn).read()) conn = ES(self.es_settings['ES_HOSTS']) self.createIndex(conn)
def initialize(self): """ Initialize ElasticSearch source stream: """ from pyes.es import ES from pyes.exceptions import IndexAlreadyExistsException args = self.elasticsearch_args.copy() server = "" if self.host: server = self.host if self.port: server += ":" + self.port create = args.pop("create", False) replace = args.pop("replace", False) self.connection = ES(server, **args) self.connection.default_indices = self.index self.connection.default_types = self.document_type created = False if create: try: self.connection.create_index(self.index) self.connection.refresh(self.index) created = True except IndexAlreadyExistsException: pass if replace and not created: self.connection.delete_index_if_exists(self.index) self.connection.refresh(self.index) self.connection.create_index(self.index) self.connection.refresh(self.index) if self.truncate: self.connection.delete_mapping(self.index, self.document_type) self.connection.refresh(self.index) #check mapping try: self.connection.get_mapping(self.document_type, self.index) except TypeMissingException: self.connection.put_mapping(self.document_type, self._get_mapping(), self.index)
odict = obj.as_dict() if isinstance(obj, (mappings.DocumentObjectField, mappings.ObjectField, mappings.NestedObject)): properties = odict.pop("properties", []) doc_count += 1 kwargs = ["name=%r" % obj.name, "type=%r" % odict.pop("type")] +\ ["%s=%r" % (k, odict[k]) for k in sorted(odict.keys())] result.append("doc%d=" % doc_count + str(type(obj)).split(".")[-1].strip("'>") + "(" + ', '.join(kwargs) + ")") for k in sorted(obj.properties.keys()): result.extend(mappings_to_code(obj.properties[k], doc_count)) else: kwargs = ["name=%r" % obj.name, "type=%r" % odict.pop("type"), "store=%r" % obj.store, "index=%r" % odict.pop("index")] +\ ["%s=%r" % (k, odict[k]) for k in sorted(odict.keys())] result.append("doc%d.add_property(" % doc_count +\ str(type(obj)).split(".")[-1].strip("'>") + "(" +\ ', '.join(kwargs) + "))") return result if __name__ == '__main__': es = ES("192.168.1.1:9200") res = mappings_to_code(es.mappings.get_doctype("twitter", "twitter")) print("\n".join(res))
def get_conn(*args, **kwargs): return ES(("http", "127.0.0.1", 9200), *args, **kwargs)
from utils import myyaml app = Flask(__name__) # ElasticSearch escfg = Config.ES # Wanna make sure test db is used if /test/ in url try: if '/test/' in request.url: Config.TESTING = True escfg = Config.ES_TEST except: pass # ElasticSearch es = ES(("http", escfg['host'], escfg['port'])) es.__dict__['index_name'] = escfg['name'] app.es = es app.config.from_object(Config) mongo = PyMongo() app.mongo = mongo mongo.init_app(app) # add regex for routing app.url_map.converters['regex'] = RegexConverter ##################### GET SEARCH @app.route( '/es', methods=['GET']) def es():
import sys #sys.path.insert(0, "../") #from pyes import ES from pyes.es import ES from datetime import datetime import shelve conn = ES('127.0.0.1:9500') #conn = ES('192.168.2.50:9200') try: conn.delete_index("test-index") except: pass dataset = shelve.open("samples.shelve") mapping = { u'description': { 'boost': 1.0, 'index': 'analyzed', 'store': 'true', 'type': u'string', "term_vector": "with_positions_offsets" }, u'name': { 'boost': 1.0, 'index': 'analyzed', 'store': 'true', 'type': u'string', "term_vector": "with_positions_offsets"
Created on May 25, 2013 @author: yapianyu ''' from bson.objectid import ObjectId from difflib import SequenceMatcher from pyes.es import ES from pyes.query import MultiMatchQuery, Search from pymongo.connection import Connection import collections import datetime ml_100k_folder = '/home/yapianyu/Desktop/movielens/ml-100k/' ml_10m_folder = '/home/yapianyu/Desktop/movielens/ml-10M100K/' mongodb = Connection('127.0.0.1', 27017)['right-channel'] elasticsearch = ES(('http', '127.0.0.1', 9200)) def count_movie_num_each_year(): movie_num = {} f = open(ml_10m_folder + 'movies.dat') for line in f: year = int(line.split('::')[1][-5:-1]) if year in movie_num: movie_num[year] += 1 else: movie_num[year] = 1 d = collections.OrderedDict(sorted(movie_num.items(), key=lambda t: -t[0])) for year, num in d.items(): print year, '\t', num
def _get_conn(*args, **kwargs): _conn = ES(settings.es_hosts, *args, **kwargs) _conn.default_indices = settings.es_index return _conn
def tearDown(self): """delete the index""" conn = ES(self.es_settings['ES_HOSTS']) self.deleteIndex(conn)
''' Created on Jan 13, 2013 @author: Fang Jiaguo ''' from pyes.es import ES from pymongo.connection import Connection import json settings = json.load(open('settings.json', 'r')) mongodb = Connection(settings['mongodb']['host'], settings['mongodb']['port'])[settings['mongodb']['db']] elasticsearch = ES(('http', settings['elasticsearch']['host'], settings['elasticsearch']['port']))